nx86inl.pas 73 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86inl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. tx86inlinenode = class(tcginlinenode)
  24. protected
  25. procedure maybe_remove_round_trunc_typeconv; virtual;
  26. public
  27. function pass_typecheck_cpu:tnode;override;
  28. { first pass override
  29. so that the code generator will actually generate
  30. these nodes.
  31. }
  32. function first_cpu: tnode;override;
  33. function first_pi: tnode ; override;
  34. function first_arctan_real: tnode; override;
  35. function first_abs_real: tnode; override;
  36. function first_sqr_real: tnode; override;
  37. function first_sqrt_real: tnode; override;
  38. function first_ln_real: tnode; override;
  39. function first_cos_real: tnode; override;
  40. function first_sin_real: tnode; override;
  41. function first_round_real: tnode; override;
  42. function first_trunc_real: tnode; override;
  43. function first_popcnt: tnode; override;
  44. function first_fma: tnode; override;
  45. function first_frac_real : tnode; override;
  46. function first_int_real : tnode; override;
  47. function first_minmax: tnode; override;
  48. function simplify(forinline : boolean) : tnode; override;
  49. { second pass override to generate these nodes }
  50. procedure pass_generate_code_cpu;override;
  51. procedure second_IncludeExclude;override;
  52. procedure second_AndOrXorShiftRot_assign;override;
  53. procedure second_pi; override;
  54. procedure second_arctan_real; override;
  55. procedure second_abs_real; override;
  56. procedure second_round_real; override;
  57. procedure second_sqr_real; override;
  58. procedure second_sqrt_real; override;
  59. procedure second_ln_real; override;
  60. procedure second_cos_real; override;
  61. procedure second_sin_real; override;
  62. procedure second_trunc_real; override;
  63. procedure second_prefetch;override;
  64. procedure second_abs_long;override;
  65. procedure second_popcnt;override;
  66. procedure second_fma;override;
  67. procedure second_frac_real;override;
  68. procedure second_int_real;override;
  69. procedure second_high;override;
  70. procedure second_minmax;override;
  71. private
  72. procedure load_fpu_location(lnode: tnode);
  73. end;
  74. implementation
  75. uses
  76. systems,
  77. globtype,globals,
  78. verbose,compinnr,fmodule,
  79. defutil,
  80. aasmbase,aasmdata,aasmcpu,
  81. symconst,symtype,symdef,symcpu,
  82. ncnv,
  83. htypechk,
  84. cgbase,pass_1,pass_2,
  85. cpuinfo,cpubase,nutils,
  86. ncal,ncgutil,nld,ncon,nadd,nmat,constexp,
  87. tgobj,
  88. cga,cgutils,cgx86,cgobj,hlcgobj,cutils;
  89. {*****************************************************************************
  90. TX86INLINENODE
  91. *****************************************************************************}
  92. procedure tx86inlinenode.maybe_remove_round_trunc_typeconv;
  93. begin
  94. { only makes a difference for x86_64 }
  95. end;
  96. function tx86inlinenode.pass_typecheck_cpu: tnode;
  97. begin
  98. Result:=nil;
  99. case inlinenumber of
  100. in_x86_inportb:
  101. begin
  102. CheckParameters(1);
  103. resultdef:=u8inttype;
  104. end;
  105. in_x86_inportw:
  106. begin
  107. CheckParameters(1);
  108. resultdef:=u16inttype;
  109. end;
  110. in_x86_inportl:
  111. begin
  112. CheckParameters(1);
  113. resultdef:=s32inttype;
  114. end;
  115. in_x86_outportb,
  116. in_x86_outportw,
  117. in_x86_outportl:
  118. begin
  119. CheckParameters(2);
  120. resultdef:=voidtype;
  121. end;
  122. in_x86_cli,
  123. in_x86_sti:
  124. resultdef:=voidtype;
  125. in_x86_get_cs,
  126. in_x86_get_ss,
  127. in_x86_get_ds,
  128. in_x86_get_es,
  129. in_x86_get_fs,
  130. in_x86_get_gs:
  131. {$ifdef i8086}
  132. resultdef:=u16inttype;
  133. {$else i8086}
  134. resultdef:=s32inttype;
  135. {$endif i8086}
  136. { include automatically generated code }
  137. {$i x86mmtype.inc}
  138. else
  139. Result:=inherited pass_typecheck_cpu;
  140. end;
  141. end;
  142. function tx86inlinenode.first_cpu: tnode;
  143. begin
  144. Result:=nil;
  145. case inlinenumber of
  146. in_x86_inportb,
  147. in_x86_inportw,
  148. in_x86_inportl,
  149. in_x86_get_cs,
  150. in_x86_get_ss,
  151. in_x86_get_ds,
  152. in_x86_get_es,
  153. in_x86_get_fs,
  154. in_x86_get_gs:
  155. expectloc:=LOC_REGISTER;
  156. in_x86_outportb,
  157. in_x86_outportw,
  158. in_x86_outportl,
  159. in_x86_cli,
  160. in_x86_sti:
  161. expectloc:=LOC_VOID;
  162. { include automatically generated code }
  163. {$i x86mmfirst.inc}
  164. else
  165. Result:=inherited first_cpu;
  166. end;
  167. end;
  168. function tx86inlinenode.first_pi : tnode;
  169. begin
  170. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  171. begin
  172. expectloc:=LOC_FPUREGISTER;
  173. first_pi := nil;
  174. end
  175. else
  176. result:=inherited;
  177. end;
  178. function tx86inlinenode.first_arctan_real : tnode;
  179. begin
  180. {$ifdef i8086}
  181. { FPATAN's range is limited to (0 <= value < 1) on the 8087 and 80287,
  182. so we need to use the RTL helper on these FPUs }
  183. if current_settings.cputype < cpu_386 then
  184. begin
  185. result := inherited;
  186. exit;
  187. end;
  188. {$endif i8086}
  189. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  190. begin
  191. expectloc:=LOC_FPUREGISTER;
  192. first_arctan_real := nil;
  193. end
  194. else
  195. result:=inherited;
  196. end;
  197. function tx86inlinenode.first_abs_real : tnode;
  198. begin
  199. if use_vectorfpu(resultdef) then
  200. expectloc:=LOC_MMREGISTER
  201. else
  202. expectloc:=LOC_FPUREGISTER;
  203. first_abs_real := nil;
  204. end;
  205. function tx86inlinenode.first_sqr_real : tnode;
  206. begin
  207. if use_vectorfpu(resultdef) then
  208. expectloc:=LOC_MMREGISTER
  209. else
  210. expectloc:=LOC_FPUREGISTER;
  211. first_sqr_real := nil;
  212. end;
  213. function tx86inlinenode.first_sqrt_real : tnode;
  214. begin
  215. if use_vectorfpu(resultdef) then
  216. expectloc:=LOC_MMREGISTER
  217. else
  218. expectloc:=LOC_FPUREGISTER;
  219. first_sqrt_real := nil;
  220. end;
  221. function tx86inlinenode.first_ln_real : tnode;
  222. begin
  223. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  224. begin
  225. expectloc:=LOC_FPUREGISTER;
  226. first_ln_real := nil;
  227. end
  228. else
  229. result:=inherited;
  230. end;
  231. function tx86inlinenode.first_cos_real : tnode;
  232. begin
  233. {$ifdef i8086}
  234. { FCOS is 387+ }
  235. if current_settings.cputype < cpu_386 then
  236. begin
  237. result := inherited;
  238. exit;
  239. end;
  240. {$endif i8086}
  241. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  242. begin
  243. expectloc:=LOC_FPUREGISTER;
  244. result:=nil;
  245. end
  246. else
  247. result:=inherited;
  248. end;
  249. function tx86inlinenode.first_sin_real : tnode;
  250. begin
  251. {$ifdef i8086}
  252. { FSIN is 387+ }
  253. if current_settings.cputype < cpu_386 then
  254. begin
  255. result := inherited;
  256. exit;
  257. end;
  258. {$endif i8086}
  259. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  260. begin
  261. expectloc:=LOC_FPUREGISTER;
  262. result:=nil;
  263. end
  264. else
  265. result:=inherited;
  266. end;
  267. function tx86inlinenode.first_round_real : tnode;
  268. begin
  269. maybe_remove_round_trunc_typeconv;
  270. {$ifdef x86_64}
  271. if use_vectorfpu(left.resultdef) then
  272. expectloc:=LOC_REGISTER
  273. else
  274. {$endif x86_64}
  275. expectloc:=LOC_REFERENCE;
  276. result:=nil;
  277. end;
  278. function tx86inlinenode.first_trunc_real: tnode;
  279. begin
  280. maybe_remove_round_trunc_typeconv;
  281. if (cs_opt_size in current_settings.optimizerswitches)
  282. {$ifdef x86_64}
  283. and not(use_vectorfpu(left.resultdef))
  284. {$endif x86_64}
  285. then
  286. result:=inherited
  287. else
  288. begin
  289. {$ifdef x86_64}
  290. if use_vectorfpu(left.resultdef) then
  291. expectloc:=LOC_REGISTER
  292. else
  293. {$endif x86_64}
  294. expectloc:=LOC_REFERENCE;
  295. result:=nil;
  296. end;
  297. end;
  298. function tx86inlinenode.first_popcnt: tnode;
  299. begin
  300. Result:=nil;
  301. {$ifndef i8086}
  302. if (CPUX86_HAS_POPCNT in cpu_capabilities[current_settings.cputype])
  303. {$ifdef i386}
  304. and not is_64bit(left.resultdef)
  305. {$endif i386}
  306. then
  307. expectloc:=LOC_REGISTER
  308. else
  309. {$endif not i8086}
  310. Result:=inherited first_popcnt
  311. end;
  312. function tx86inlinenode.first_fma : tnode;
  313. begin
  314. {$ifndef i8086}
  315. if ((fpu_capabilities[current_settings.fputype]*[FPUX86_HAS_FMA,FPUX86_HAS_FMA4])<>[]) and
  316. ((is_double(resultdef)) or (is_single(resultdef))) then
  317. begin
  318. expectloc:=LOC_MMREGISTER;
  319. Result:=nil;
  320. end
  321. else
  322. {$endif i8086}
  323. Result:=inherited first_fma;
  324. end;
  325. function tx86inlinenode.first_frac_real : tnode;
  326. begin
  327. if (current_settings.fputype>=fpu_sse41) and
  328. ((is_double(resultdef)) or (is_single(resultdef))) then
  329. begin
  330. maybe_remove_round_trunc_typeconv;
  331. expectloc:=LOC_MMREGISTER;
  332. Result:=nil;
  333. end
  334. else
  335. Result:=inherited first_frac_real;
  336. end;
  337. function tx86inlinenode.first_int_real : tnode;
  338. begin
  339. if (current_settings.fputype>=fpu_sse41) and
  340. ((is_double(resultdef)) or (is_single(resultdef))) then
  341. begin
  342. Result:=nil;
  343. expectloc:=LOC_MMREGISTER;
  344. end
  345. else
  346. Result:=inherited first_int_real;
  347. end;
  348. function tx86inlinenode.first_minmax: tnode;
  349. begin
  350. {$ifndef i8086}
  351. if
  352. {$ifdef i386}
  353. ((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
  354. ((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
  355. {$else i386}
  356. ((is_double(resultdef)) or (is_single(resultdef)))
  357. {$endif i386}
  358. then
  359. begin
  360. expectloc:=LOC_MMREGISTER;
  361. Result:=nil;
  362. end
  363. else
  364. {$endif i8086}
  365. if
  366. {$ifndef x86_64}
  367. (CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) and
  368. {$endif x86_64}
  369. (
  370. {$ifdef x86_64}
  371. is_64bitint(resultdef) or
  372. {$endif x86_64}
  373. is_32bitint(resultdef)
  374. ) then
  375. begin
  376. expectloc:=LOC_REGISTER;
  377. Result:=nil;
  378. end
  379. else
  380. Result:=inherited first_minmax;
  381. end;
  382. function tx86inlinenode.simplify(forinline : boolean) : tnode;
  383. var
  384. temp : tnode;
  385. begin
  386. if (current_settings.fputype>=fpu_sse41) and
  387. (inlinenumber=in_int_real) and (left.nodetype=typeconvn) and
  388. not(nf_explicit in left.flags) and
  389. (ttypeconvnode(left).left.resultdef.typ=floatdef) and
  390. ((is_double(ttypeconvnode(left).left.resultdef)) or (is_single(ttypeconvnode(left).left.resultdef))) then
  391. begin
  392. { get rid of the type conversion }
  393. temp:=ttypeconvnode(left).left;
  394. ttypeconvnode(left).left:=nil;
  395. left.free;
  396. left:=temp;
  397. result:=self.getcopy;
  398. tinlinenode(result).resultdef:=temp.resultdef;
  399. typecheckpass(result);
  400. end
  401. else
  402. Result:=inherited simplify(forinline);
  403. end;
  404. procedure tx86inlinenode.pass_generate_code_cpu;
  405. var
  406. paraarray : array[1..4] of tnode;
  407. i : integer;
  408. op: TAsmOp;
  409. procedure inport(dreg:TRegister;dsize:topsize;dtype:tdef);
  410. var
  411. portnumber: tnode;
  412. begin
  413. portnumber:=left;
  414. secondpass(portnumber);
  415. if (portnumber.location.loc=LOC_CONSTANT) and
  416. (portnumber.location.value>=0) and
  417. (portnumber.location.value<=255) then
  418. begin
  419. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  420. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_IN,dsize,portnumber.location.value,dreg));
  421. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  422. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  423. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  424. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  425. end
  426. else
  427. begin
  428. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  429. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  430. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  431. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_IN,dsize,NR_DX,dreg));
  432. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  433. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  434. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  435. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  436. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  437. end;
  438. end;
  439. procedure outport(dreg:TRegister;dsize:topsize;dtype:tdef);
  440. var
  441. portnumber, portdata: tnode;
  442. begin
  443. portnumber:=tcallparanode(tcallparanode(left).right).left;
  444. portdata:=tcallparanode(left).left;
  445. secondpass(portdata);
  446. secondpass(portnumber);
  447. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  448. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portdata.resultdef,dtype,portdata.location,dreg);
  449. if (portnumber.location.loc=LOC_CONSTANT) and
  450. (portnumber.location.value>=0) and
  451. (portnumber.location.value<=255) then
  452. current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_OUT,dsize,dreg,portnumber.location.value))
  453. else
  454. begin
  455. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  456. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  457. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_OUT,dsize,dreg,NR_DX));
  458. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  459. end;
  460. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  461. end;
  462. procedure get_segreg(segreg:tregister);
  463. begin
  464. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  465. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  466. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,TCGSize2OpSize[def_cgsize(resultdef)],segreg,location.register));
  467. end;
  468. function GetConstInt(n: tnode): longint;
  469. begin
  470. Result:=0;
  471. if is_constintnode(n) then
  472. result:=tordconstnode(n).value.svalue
  473. else
  474. Message(type_e_constant_expr_expected);
  475. end;
  476. procedure GetParameters(count: longint);
  477. var
  478. i: longint;
  479. p: tnode;
  480. begin
  481. if (count=1) and
  482. (not (left is tcallparanode)) then
  483. paraarray[1]:=left
  484. else
  485. begin
  486. p:=left;
  487. for i := count downto 1 do
  488. begin
  489. paraarray[i]:=tcallparanode(p).paravalue;
  490. p:=tcallparanode(p).nextpara;
  491. end;
  492. end;
  493. end;
  494. procedure location_force_mmxreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
  495. var
  496. reg : tregister;
  497. begin
  498. if (l.loc<>LOC_MMXREGISTER) and
  499. ((l.loc<>LOC_CMMXREGISTER) or (not maybeconst)) then
  500. begin
  501. reg:=tcgx86(cg).getmmxregister(list);
  502. cg.a_loadmm_loc_reg(list,OS_M64,l,reg,nil);
  503. location_freetemp(list,l);
  504. location_reset(l,LOC_MMXREGISTER,OS_M64);
  505. l.register:=reg;
  506. end;
  507. end;
  508. procedure location_make_ref(var loc: tlocation);
  509. var
  510. hloc: tlocation;
  511. begin
  512. case loc.loc of
  513. LOC_CREGISTER,
  514. LOC_REGISTER:
  515. begin
  516. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  517. hloc.reference.base:=loc.register;
  518. loc:=hloc;
  519. end;
  520. LOC_CREFERENCE,
  521. LOC_REFERENCE:
  522. begin
  523. end;
  524. else
  525. begin
  526. hlcg.location_force_reg(current_asmdata.CurrAsmList,loc,u32inttype,u32inttype,false);
  527. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  528. hloc.reference.base:=loc.register;
  529. loc:=hloc;
  530. end;
  531. end;
  532. end;
  533. begin
  534. FillChar(paraarray,sizeof(paraarray),0);
  535. case inlinenumber of
  536. in_x86_inportb:
  537. inport(NR_AL,S_B,u8inttype);
  538. in_x86_inportw:
  539. inport(NR_AX,S_W,u16inttype);
  540. in_x86_inportl:
  541. inport(NR_EAX,S_L,s32inttype);
  542. in_x86_outportb:
  543. outport(NR_AL,S_B,u8inttype);
  544. in_x86_outportw:
  545. outport(NR_AX,S_W,u16inttype);
  546. in_x86_outportl:
  547. outport(NR_EAX,S_L,s32inttype);
  548. in_x86_cli:
  549. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLI));
  550. in_x86_sti:
  551. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_STI));
  552. in_x86_get_cs:
  553. get_segreg(NR_CS);
  554. in_x86_get_ss:
  555. get_segreg(NR_SS);
  556. in_x86_get_ds:
  557. get_segreg(NR_DS);
  558. in_x86_get_es:
  559. get_segreg(NR_ES);
  560. in_x86_get_fs:
  561. get_segreg(NR_FS);
  562. in_x86_get_gs:
  563. get_segreg(NR_GS);
  564. {$i x86mmsecond.inc}
  565. else
  566. inherited pass_generate_code_cpu;
  567. end;
  568. end;
  569. procedure tx86inlinenode.second_AndOrXorShiftRot_assign;
  570. {$ifndef i8086}
  571. var
  572. opsize : tcgsize;
  573. valuenode, indexnode, loadnode: TNode;
  574. DestReg: TRegister;
  575. {$endif i8086}
  576. begin
  577. {$ifndef i8086}
  578. if (cs_opt_level2 in current_settings.optimizerswitches) then
  579. begin
  580. { Saves on a lot of typecasting and potential coding mistakes }
  581. valuenode := tcallparanode(left).left;
  582. loadnode := tcallparanode(tcallparanode(left).right).left;
  583. opsize := def_cgsize(loadnode.resultdef);
  584. { BMI2 optimisations }
  585. if (CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) and (inlinenumber=in_and_assign_x_y) then
  586. begin
  587. { If the second operand is "((1 shl y) - 1)", we can turn it
  588. into a BZHI operator instead }
  589. if (opsize in [OS_32, OS_S32{$ifdef x86_64}, OS_64, OS_S64{$endif x86_64}]) and
  590. (valuenode.nodetype = subn) and
  591. (taddnode(valuenode).right.nodetype = ordconstn) and
  592. (tordconstnode(taddnode(valuenode).right).value = 1) and
  593. (taddnode(valuenode).left.nodetype = shln) and
  594. (tshlshrnode(taddnode(valuenode).left).left.nodetype = ordconstn) and
  595. (tordconstnode(tshlshrnode(taddnode(valuenode).left).left).value = 1) then
  596. begin
  597. { Skip the subtract and shift nodes completely }
  598. { Helps avoid all the awkward typecasts }
  599. indexnode := tshlshrnode(taddnode(valuenode).left).right;
  600. {$ifdef x86_64}
  601. { The code generator sometimes extends the shift result to 64-bit unnecessarily }
  602. if (indexnode.nodetype = typeconvn) and (opsize in [OS_32, OS_S32]) and
  603. (def_cgsize(TTypeConvNode(indexnode).resultdef) in [OS_64, OS_S64]) then
  604. begin
  605. { Convert to the 32-bit type }
  606. indexnode.resultdef:=loadnode.resultdef;
  607. node_reset_flags(indexnode,[],[tnf_pass1_done]);
  608. { We should't be getting any new errors }
  609. if do_firstpass(indexnode) then
  610. InternalError(2022110202);
  611. { Keep things internally consistent in case indexnode changed }
  612. tshlshrnode(taddnode(valuenode).left).right:=indexnode;
  613. end;
  614. {$endif x86_64}
  615. secondpass(indexnode);
  616. secondpass(loadnode);
  617. { allocate registers }
  618. hlcg.location_force_reg(
  619. current_asmdata.CurrAsmList,
  620. indexnode.location,
  621. indexnode.resultdef,
  622. loadnode.resultdef,
  623. false
  624. );
  625. case loadnode.location.loc of
  626. LOC_REFERENCE,
  627. LOC_CREFERENCE:
  628. begin
  629. { BZHI can only write to a register }
  630. DestReg := cg.getintregister(current_asmdata.CurrAsmList,opsize);
  631. emit_reg_ref_reg(A_BZHI, TCGSize2OpSize[opsize], indexnode.location.register, loadnode.location.reference, DestReg);
  632. emit_reg_ref(A_MOV, TCGSize2OpSize[opsize], DestReg, loadnode.location.reference);
  633. end;
  634. LOC_REGISTER,
  635. LOC_CREGISTER:
  636. emit_reg_reg_reg(A_BZHI, TCGSize2OpSize[opsize], indexnode.location.register, loadnode.location.register, loadnode.location.register);
  637. else
  638. InternalError(2022102120);
  639. end;
  640. Exit;
  641. end;
  642. end;
  643. end;
  644. {$endif not i8086}
  645. inherited second_AndOrXorShiftRot_assign;
  646. end;
  647. procedure tx86inlinenode.second_pi;
  648. begin
  649. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  650. emit_none(A_FLDPI,S_NO);
  651. tcgx86(cg).inc_fpu_stack;
  652. location.register:=NR_FPU_RESULT_REG;
  653. end;
  654. { load the FPU into the an fpu register }
  655. procedure tx86inlinenode.load_fpu_location(lnode: tnode);
  656. begin
  657. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  658. location.register:=NR_FPU_RESULT_REG;
  659. secondpass(lnode);
  660. case lnode.location.loc of
  661. LOC_FPUREGISTER:
  662. ;
  663. LOC_CFPUREGISTER:
  664. begin
  665. cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,lnode.location.size,
  666. lnode.location.size,lnode.location.register,location.register);
  667. end;
  668. LOC_REFERENCE,LOC_CREFERENCE:
  669. begin
  670. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  671. lnode.location.size,lnode.location.size,
  672. lnode.location.reference,location.register);
  673. end;
  674. LOC_MMREGISTER,LOC_CMMREGISTER:
  675. begin
  676. location:=lnode.location;
  677. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,location,lnode.resultdef,false);
  678. end;
  679. else
  680. internalerror(309991);
  681. end;
  682. end;
  683. procedure tx86inlinenode.second_arctan_real;
  684. begin
  685. load_fpu_location(left);
  686. emit_none(A_FLD1,S_NO);
  687. emit_none(A_FPATAN,S_NO);
  688. end;
  689. procedure tx86inlinenode.second_abs_real;
  690. function needs_indirect:boolean; inline;
  691. begin
  692. result:=(tf_supports_packages in target_info.flags) and
  693. (target_info.system in systems_indirect_var_imports);
  694. end;
  695. var
  696. href : treference;
  697. sym : tasmsymbol;
  698. begin
  699. if use_vectorfpu(resultdef) then
  700. begin
  701. secondpass(left);
  702. if left.location.loc<>LOC_MMREGISTER then
  703. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,UseAVX);
  704. if UseAVX then
  705. begin
  706. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  707. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  708. end
  709. else
  710. location:=left.location;
  711. case tfloatdef(resultdef).floattype of
  712. s32real:
  713. begin
  714. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_SINGLE',AT_DATA,needs_indirect);
  715. reference_reset_symbol(href,sym,0,4,[]);
  716. current_module.add_extern_asmsym(sym);
  717. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  718. if UseAVX then
  719. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  720. A_VANDPS,S_XMM,href,left.location.register,location.register))
  721. else
  722. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPS,S_XMM,href,location.register));
  723. end;
  724. s64real:
  725. begin
  726. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_DOUBLE',AT_DATA,needs_indirect);
  727. reference_reset_symbol(href,sym,0,4,[]);
  728. current_module.add_extern_asmsym(sym);
  729. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  730. if UseAVX then
  731. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  732. A_VANDPD,S_XMM,href,left.location.register,location.register))
  733. else
  734. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPD,S_XMM,href,location.register))
  735. end;
  736. else
  737. internalerror(200506081);
  738. end;
  739. end
  740. else
  741. begin
  742. load_fpu_location(left);
  743. emit_none(A_FABS,S_NO);
  744. end;
  745. end;
  746. procedure tx86inlinenode.second_round_real;
  747. begin
  748. {$ifdef x86_64}
  749. if use_vectorfpu(left.resultdef) then
  750. begin
  751. secondpass(left);
  752. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  753. location_reset(location,LOC_REGISTER,OS_S64);
  754. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  755. if UseAVX then
  756. case left.location.size of
  757. OS_F32:
  758. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSS2SI,S_NO,left.location.register,location.register));
  759. OS_F64:
  760. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSD2SI,S_NO,left.location.register,location.register));
  761. else
  762. internalerror(2007031402);
  763. end
  764. else
  765. case left.location.size of
  766. OS_F32:
  767. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSS2SI,S_NO,left.location.register,location.register));
  768. OS_F64:
  769. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSD2SI,S_NO,left.location.register,location.register));
  770. else
  771. internalerror(2007031404);
  772. end;
  773. end
  774. else
  775. {$endif x86_64}
  776. begin
  777. load_fpu_location(left);
  778. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  779. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  780. emit_ref(A_FISTP,S_IQ,location.reference);
  781. tcgx86(cg).dec_fpu_stack;
  782. emit_none(A_FWAIT,S_NO);
  783. end;
  784. end;
  785. procedure tx86inlinenode.second_trunc_real;
  786. var
  787. oldcw,newcw : treference;
  788. begin
  789. {$ifdef x86_64}
  790. if use_vectorfpu(left.resultdef) and
  791. not((left.location.loc=LOC_FPUREGISTER) and (current_settings.fputype>=fpu_sse3)) then
  792. begin
  793. secondpass(left);
  794. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  795. location_reset(location,LOC_REGISTER,OS_S64);
  796. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  797. if UseAVX then
  798. case left.location.size of
  799. OS_F32:
  800. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSS2SI,S_NO,left.location.register,location.register));
  801. OS_F64:
  802. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSD2SI,S_NO,left.location.register,location.register));
  803. else
  804. internalerror(2007031401);
  805. end
  806. else
  807. case left.location.size of
  808. OS_F32:
  809. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSS2SI,S_NO,left.location.register,location.register));
  810. OS_F64:
  811. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSD2SI,S_NO,left.location.register,location.register));
  812. else
  813. internalerror(2007031403);
  814. end;
  815. end
  816. else
  817. {$endif x86_64}
  818. begin
  819. if (current_settings.fputype>=fpu_sse3) then
  820. begin
  821. load_fpu_location(left);
  822. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  823. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  824. emit_ref(A_FISTTP,S_IQ,location.reference);
  825. tcgx86(cg).dec_fpu_stack;
  826. end
  827. else
  828. begin
  829. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,oldcw);
  830. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,newcw);
  831. {$ifdef i8086}
  832. if current_settings.cputype<=cpu_286 then
  833. begin
  834. emit_ref(A_FSTCW,S_NO,newcw);
  835. emit_ref(A_FSTCW,S_NO,oldcw);
  836. emit_none(A_FWAIT,S_NO);
  837. end
  838. else
  839. {$endif i8086}
  840. begin
  841. emit_ref(A_FNSTCW,S_NO,newcw);
  842. emit_ref(A_FNSTCW,S_NO,oldcw);
  843. end;
  844. emit_const_ref(A_OR,S_W,$0f00,newcw);
  845. load_fpu_location(left);
  846. emit_ref(A_FLDCW,S_NO,newcw);
  847. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  848. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  849. emit_ref(A_FISTP,S_IQ,location.reference);
  850. tcgx86(cg).dec_fpu_stack;
  851. emit_ref(A_FLDCW,S_NO,oldcw);
  852. emit_none(A_FWAIT,S_NO);
  853. tg.UnGetTemp(current_asmdata.CurrAsmList,oldcw);
  854. tg.UnGetTemp(current_asmdata.CurrAsmList,newcw);
  855. end;
  856. end;
  857. end;
  858. procedure tx86inlinenode.second_sqr_real;
  859. begin
  860. if use_vectorfpu(resultdef) then
  861. begin
  862. secondpass(left);
  863. location_reset(location,LOC_MMREGISTER,left.location.size);
  864. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  865. if UseAVX then
  866. begin
  867. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  868. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location.register,left.location.register,location.register,mms_movescalar);
  869. end
  870. else
  871. begin
  872. if left.location.loc in [LOC_CFPUREGISTER,LOC_FPUREGISTER] then
  873. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  874. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  875. cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,location.register,location.register,mms_movescalar);
  876. end;
  877. end
  878. else
  879. begin
  880. load_fpu_location(left);
  881. emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
  882. end;
  883. end;
  884. procedure tx86inlinenode.second_sqrt_real;
  885. begin
  886. if use_vectorfpu(resultdef) then
  887. begin
  888. secondpass(left);
  889. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  890. location_reset(location,LOC_MMREGISTER,left.location.size);
  891. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  892. if UseAVX then
  893. case tfloatdef(resultdef).floattype of
  894. s32real:
  895. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  896. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  897. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSS,S_NO,left.location.register,left.location.register,location.register));
  898. s64real:
  899. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  900. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  901. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSD,S_NO,left.location.register,left.location.register,location.register));
  902. else
  903. internalerror(200510031);
  904. end
  905. else
  906. case tfloatdef(resultdef).floattype of
  907. s32real:
  908. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSS,S_NO,left.location.register,location.register));
  909. s64real:
  910. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSD,S_NO,left.location.register,location.register));
  911. else
  912. internalerror(2005100303);
  913. end;
  914. end
  915. else
  916. begin
  917. load_fpu_location(left);
  918. if left.location.loc=LOC_REFERENCE then
  919. tg.ungetiftemp(current_asmdata.CurrAsmList,left.location.reference);
  920. emit_none(A_FSQRT,S_NO);
  921. end;
  922. end;
  923. procedure tx86inlinenode.second_ln_real;
  924. begin
  925. load_fpu_location(left);
  926. emit_none(A_FLDLN2,S_NO);
  927. emit_none(A_FXCH,S_NO);
  928. emit_none(A_FYL2X,S_NO);
  929. end;
  930. procedure tx86inlinenode.second_cos_real;
  931. begin
  932. {$ifdef i8086}
  933. { FCOS is 387+ }
  934. if current_settings.cputype < cpu_386 then
  935. begin
  936. inherited;
  937. exit;
  938. end;
  939. {$endif i8086}
  940. load_fpu_location(left);
  941. emit_none(A_FCOS,S_NO);
  942. end;
  943. procedure tx86inlinenode.second_sin_real;
  944. begin
  945. {$ifdef i8086}
  946. { FSIN is 387+ }
  947. if current_settings.cputype < cpu_386 then
  948. begin
  949. inherited;
  950. exit;
  951. end;
  952. {$endif i8086}
  953. load_fpu_location(left);
  954. emit_none(A_FSIN,S_NO)
  955. end;
  956. procedure tx86inlinenode.second_prefetch;
  957. var
  958. ref : treference;
  959. r : tregister;
  960. checkpointer_used : boolean;
  961. begin
  962. {$if defined(i386) or defined(i8086)}
  963. if current_settings.cputype>=cpu_Pentium3 then
  964. {$endif i386 or i8086}
  965. begin
  966. { do not call Checkpointer for left node }
  967. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  968. if checkpointer_used then
  969. node_change_local_switch(left,cs_checkpointer,false);
  970. secondpass(left);
  971. if checkpointer_used then
  972. node_change_local_switch(left,cs_checkpointer,false);
  973. case left.location.loc of
  974. LOC_CREFERENCE,
  975. LOC_REFERENCE:
  976. begin
  977. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  978. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  979. reference_reset_base(ref,r,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
  980. current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PREFETCHNTA,S_NO,ref));
  981. end;
  982. else
  983. { nothing to prefetch };
  984. end;
  985. end;
  986. end;
  987. procedure tx86inlinenode.second_abs_long;
  988. var
  989. hregister : tregister;
  990. opsize : tcgsize;
  991. hp : taicpu;
  992. hl: TAsmLabel;
  993. begin
  994. {$if defined(i8086) or defined(i386)}
  995. if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then
  996. begin
  997. opsize:=def_cgsize(left.resultdef);
  998. secondpass(left);
  999. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  1000. location:=left.location;
  1001. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1002. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  1003. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,opsize,tcgsize2size[opsize]*8-1,left.location.register);
  1004. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,opsize,left.location.register,location.register);
  1005. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_SUB,opsize,left.location.register,location.register);
  1006. if cs_check_overflow in current_settings.localswitches then
  1007. begin
  1008. current_asmdata.getjumplabel(hl);
  1009. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl);
  1010. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  1011. cg.a_label(current_asmdata.CurrAsmList,hl);
  1012. end;
  1013. end
  1014. else
  1015. {$endif i8086 or i386}
  1016. begin
  1017. opsize:=def_cgsize(left.resultdef);
  1018. secondpass(left);
  1019. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  1020. hregister:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1021. location:=left.location;
  1022. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1023. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,hregister);
  1024. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  1025. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  1026. emit_reg(A_NEG,tcgsize2opsize[opsize],hregister);
  1027. if cs_check_overflow in current_settings.localswitches then
  1028. begin
  1029. current_asmdata.getjumplabel(hl);
  1030. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl);
  1031. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  1032. cg.a_label(current_asmdata.CurrAsmList,hl);
  1033. end;
  1034. hp:=taicpu.op_reg_reg(A_CMOVcc,tcgsize2opsize[opsize],hregister,location.register);
  1035. hp.condition:=C_NS;
  1036. cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  1037. current_asmdata.CurrAsmList.concat(hp);
  1038. end;
  1039. end;
  1040. {*****************************************************************************
  1041. INCLUDE/EXCLUDE GENERIC HANDLING
  1042. *****************************************************************************}
  1043. procedure tx86inlinenode.second_IncludeExclude;
  1044. var
  1045. hregister,
  1046. hregister2: tregister;
  1047. setbase : aint;
  1048. bitsperop,l : longint;
  1049. cgop : topcg;
  1050. asmop : tasmop;
  1051. opdef : tdef;
  1052. opsize,
  1053. orgsize: tcgsize;
  1054. begin
  1055. {$ifdef i8086}
  1056. { BTS and BTR are 386+ }
  1057. if current_settings.cputype < cpu_386 then
  1058. {$else i8086}
  1059. { bts on memory locations is very slow, so even the default code is faster }
  1060. if not(cs_opt_size in current_settings.optimizerswitches) and (tcallparanode(tcallparanode(left).right).left.expectloc<>LOC_CONSTANT) and
  1061. (tcallparanode(left).left.expectloc=LOC_REFERENCE) then
  1062. {$endif i8086}
  1063. begin
  1064. inherited;
  1065. exit;
  1066. end;
  1067. if is_smallset(tcallparanode(left).resultdef) then
  1068. begin
  1069. opdef:=tcallparanode(left).resultdef;
  1070. opsize:=int_cgsize(opdef.size)
  1071. end
  1072. else
  1073. begin
  1074. opdef:=u32inttype;
  1075. opsize:=OS_32;
  1076. end;
  1077. bitsperop:=(8*tcgsize2size[opsize]);
  1078. secondpass(tcallparanode(left).left);
  1079. secondpass(tcallparanode(tcallparanode(left).right).left);
  1080. setbase:=tsetdef(tcallparanode(left).left.resultdef).setbase;
  1081. if tcallparanode(tcallparanode(left).right).left.location.loc=LOC_CONSTANT then
  1082. begin
  1083. { calculate bit position }
  1084. l:=1 shl ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) mod bitsperop);
  1085. { determine operator }
  1086. if inlinenumber=in_include_x_y then
  1087. cgop:=OP_OR
  1088. else
  1089. begin
  1090. cgop:=OP_AND;
  1091. l:=not(l);
  1092. end;
  1093. case tcallparanode(left).left.location.loc of
  1094. LOC_REFERENCE :
  1095. begin
  1096. inc(tcallparanode(left).left.location.reference.offset,
  1097. ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) div bitsperop)*tcgsize2size[opsize]);
  1098. cg.a_op_const_ref(current_asmdata.CurrAsmList,cgop,opsize,l,tcallparanode(left).left.location.reference);
  1099. end;
  1100. LOC_CSUBSETREG,
  1101. LOC_CREGISTER :
  1102. hlcg.a_op_const_loc(current_asmdata.CurrAsmList,cgop,tcallparanode(left).left.resultdef,l,tcallparanode(left).left.location);
  1103. else
  1104. internalerror(200405022);
  1105. end;
  1106. end
  1107. else
  1108. begin
  1109. orgsize:=opsize;
  1110. if opsize in [OS_8,OS_S8] then
  1111. begin
  1112. opdef:=u32inttype;
  1113. opsize:=OS_32;
  1114. end;
  1115. { determine asm operator }
  1116. if inlinenumber=in_include_x_y then
  1117. asmop:=A_BTS
  1118. else
  1119. asmop:=A_BTR;
  1120. hlcg.location_force_reg(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.location,tcallparanode(tcallparanode(left).right).left.resultdef,opdef,true);
  1121. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.resultdef,tcallparanode(tcallparanode(left).right).left.location,setbase);
  1122. hregister:=tcallparanode(tcallparanode(left).right).left.location.register;
  1123. if tcallparanode(left).left.location.loc=LOC_REFERENCE then
  1124. emit_reg_ref(asmop,tcgsize2opsize[opsize],hregister,tcallparanode(left).left.location.reference)
  1125. else
  1126. begin
  1127. { second argument can't be an 8 bit register either }
  1128. hregister2:=tcallparanode(left).left.location.register;
  1129. if (orgsize in [OS_8,OS_S8]) then
  1130. hregister2:=cg.makeregsize(current_asmdata.CurrAsmList,hregister2,opsize);
  1131. emit_reg_reg(asmop,tcgsize2opsize[opsize],hregister,hregister2);
  1132. end;
  1133. end;
  1134. end;
  1135. procedure tx86inlinenode.second_popcnt;
  1136. var
  1137. opsize: tcgsize;
  1138. begin
  1139. secondpass(left);
  1140. opsize:=tcgsize2unsigned[left.location.size];
  1141. { no 8 Bit popcont }
  1142. if opsize=OS_8 then
  1143. opsize:=OS_16;
  1144. if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE]) or
  1145. (left.location.size<>opsize) then
  1146. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
  1147. location_reset(location,LOC_REGISTER,opsize);
  1148. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1149. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  1150. emit_reg_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.register,location.register)
  1151. else
  1152. emit_ref_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.reference,location.register);
  1153. if resultdef.size=1 then
  1154. begin
  1155. location.size:=OS_8;
  1156. location.register:=cg.makeregsize(current_asmdata.CurrAsmList,location.register,location.size);
  1157. end;
  1158. end;
  1159. procedure tx86inlinenode.second_fma;
  1160. {$ifndef i8086}
  1161. const
  1162. op : array[false..true,false..true,s32real..s64real,0..3] of TAsmOp =
  1163. (
  1164. { positive product }
  1165. (
  1166. { positive third operand }
  1167. ((A_VFMADD231SS,A_VFMADD231SS,A_VFMADD231SS,A_VFMADD213SS),
  1168. (A_VFMADD231SD,A_VFMADD231SD,A_VFMADD231SD,A_VFMADD213SD)
  1169. ),
  1170. { negative third operand }
  1171. ((A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB213SS),
  1172. (A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB213SD)
  1173. )
  1174. ),
  1175. { negative product }
  1176. (
  1177. { positive third operand }
  1178. ((A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD213SS),
  1179. (A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD213SD)
  1180. ),
  1181. { negative third operand }
  1182. ((A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB213SS),
  1183. (A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB213SD)
  1184. )
  1185. )
  1186. );
  1187. var
  1188. paraarray : array[1..3] of tnode;
  1189. memop,
  1190. i : integer;
  1191. negop3,
  1192. negproduct,
  1193. gotmem : boolean;
  1194. {$endif i8086}
  1195. begin
  1196. {$ifndef i8086}
  1197. if (fpu_capabilities[current_settings.fputype]*[FPUX86_HAS_FMA,FPUX86_HAS_FMA4])<>[] then
  1198. begin
  1199. negop3:=false;
  1200. negproduct:=false;
  1201. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  1202. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1203. paraarray[3]:=tcallparanode(parameters).paravalue;
  1204. { check if a neg. node can be removed
  1205. this is possible because changing the sign of
  1206. a floating point number does not affect its absolute
  1207. value in any way
  1208. }
  1209. if paraarray[1].nodetype=unaryminusn then
  1210. begin
  1211. paraarray[1]:=tunarynode(paraarray[1]).left;
  1212. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1213. only no code is generated for it }
  1214. negproduct:=not(negproduct);
  1215. end;
  1216. if paraarray[2].nodetype=unaryminusn then
  1217. begin
  1218. paraarray[2]:=tunarynode(paraarray[2]).left;
  1219. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1220. only no code is generated for it }
  1221. negproduct:=not(negproduct);
  1222. end;
  1223. if paraarray[3].nodetype=unaryminusn then
  1224. begin
  1225. paraarray[3]:=tunarynode(paraarray[3]).left;
  1226. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1227. only no code is generated for it }
  1228. negop3:=true;
  1229. end;
  1230. for i:=1 to 3 do
  1231. secondpass(paraarray[i]);
  1232. { only one memory operand is allowed }
  1233. gotmem:=false;
  1234. memop:=0;
  1235. { in case parameters come on the FPU stack, we have to pop them in reverse order as we
  1236. called secondpass }
  1237. for i:=3 downto 1 do
  1238. begin
  1239. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1240. begin
  1241. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1242. begin
  1243. memop:=i;
  1244. gotmem:=true;
  1245. end
  1246. else
  1247. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1248. end;
  1249. end;
  1250. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1251. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1252. if gotmem then
  1253. begin
  1254. case memop of
  1255. 1:
  1256. begin
  1257. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1258. paraarray[3].location.register,location.register,mms_movescalar);
  1259. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1260. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1261. end;
  1262. 2:
  1263. begin
  1264. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1265. paraarray[3].location.register,location.register,mms_movescalar);
  1266. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1267. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1268. end;
  1269. 3:
  1270. begin
  1271. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1272. paraarray[1].location.register,location.register,mms_movescalar);
  1273. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1274. paraarray[3].location.reference,paraarray[2].location.register,location.register);
  1275. end
  1276. else
  1277. internalerror(2014041301);
  1278. end;
  1279. end
  1280. else
  1281. begin
  1282. { try to use the location which is already in a temp. mm register as destination,
  1283. so the compiler might be able to re-use the register }
  1284. if paraarray[1].location.loc=LOC_MMREGISTER then
  1285. begin
  1286. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1287. paraarray[1].location.register,location.register,mms_movescalar);
  1288. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1289. paraarray[3].location.register,paraarray[2].location.register,location.register);
  1290. end
  1291. else if paraarray[2].location.loc=LOC_MMREGISTER then
  1292. begin
  1293. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1294. paraarray[2].location.register,location.register,mms_movescalar);
  1295. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1296. paraarray[3].location.register,paraarray[1].location.register,location.register);
  1297. end
  1298. else
  1299. begin
  1300. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1301. paraarray[3].location.register,location.register,mms_movescalar);
  1302. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,0],S_NO,
  1303. paraarray[1].location.register,paraarray[2].location.register,location.register);
  1304. end;
  1305. end;
  1306. end
  1307. else
  1308. {$endif i8086}
  1309. internalerror(2014032301);
  1310. end;
  1311. procedure tx86inlinenode.second_frac_real;
  1312. var
  1313. extrareg : TRegister;
  1314. begin
  1315. if use_vectorfpu(resultdef) then
  1316. begin
  1317. secondpass(left);
  1318. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1319. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1320. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1321. if UseAVX then
  1322. case tfloatdef(left.resultdef).floattype of
  1323. s32real:
  1324. begin
  1325. {$ifndef i8086}
  1326. if UseAVX512 and (FPUX86_HAS_AVX512DQ in fpu_capabilities[current_settings.fputype]) then
  1327. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VREDUCESS,S_NO,3,left.location.register,left.location.register,location.register))
  1328. else
  1329. {$endif not i8086}
  1330. begin
  1331. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1332. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1333. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSS,S_NO,location.register,left.location.register,location.register));
  1334. end;
  1335. end;
  1336. s64real:
  1337. begin
  1338. {$ifndef i8086}
  1339. if UseAVX512 and (FPUX86_HAS_AVX512DQ in fpu_capabilities[current_settings.fputype]) then
  1340. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VREDUCESD,S_NO,3,left.location.register,left.location.register,location.register))
  1341. else
  1342. {$endif not i8086}
  1343. begin
  1344. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1345. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1346. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSD,S_NO,location.register,left.location.register,location.register));
  1347. end;
  1348. end;
  1349. else
  1350. internalerror(2017052102);
  1351. end
  1352. else
  1353. begin
  1354. extrareg:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1355. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  1356. case tfloatdef(left.resultdef).floattype of
  1357. s32real:
  1358. begin
  1359. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,extrareg));
  1360. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSS,S_NO,extrareg,location.register));
  1361. end;
  1362. s64real:
  1363. begin
  1364. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,extrareg));
  1365. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSD,S_NO,extrareg,location.register));
  1366. end;
  1367. else
  1368. internalerror(2017052103);
  1369. end;
  1370. end;
  1371. if tfloatdef(left.resultdef).floattype<>tfloatdef(resultdef).floattype then
  1372. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,left.resultdef,resultdef,location.register,location.register,mms_movescalar);
  1373. end
  1374. else
  1375. internalerror(2017052101);
  1376. end;
  1377. procedure tx86inlinenode.second_int_real;
  1378. begin
  1379. if use_vectorfpu(resultdef) then
  1380. begin
  1381. secondpass(left);
  1382. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1383. location_reset(location,LOC_MMREGISTER,left.location.size);
  1384. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1385. if UseAVX then
  1386. case tfloatdef(resultdef).floattype of
  1387. s32real:
  1388. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1389. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1390. s64real:
  1391. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1392. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1393. else
  1394. internalerror(2017052105);
  1395. end
  1396. else
  1397. begin
  1398. case tfloatdef(resultdef).floattype of
  1399. s32real:
  1400. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,location.register));
  1401. s64real:
  1402. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,location.register));
  1403. else
  1404. internalerror(2017052106);
  1405. end;
  1406. end;
  1407. end
  1408. else
  1409. internalerror(2017052107);
  1410. end;
  1411. procedure tx86inlinenode.second_high;
  1412. var
  1413. donelab: tasmlabel;
  1414. hregister : tregister;
  1415. href : treference;
  1416. begin
  1417. secondpass(left);
  1418. if not(is_dynamic_array(left.resultdef)) then
  1419. Internalerror(2019122809);
  1420. { length in dynamic arrays is at offset -sizeof(pint) }
  1421. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  1422. current_asmdata.getjumplabel(donelab);
  1423. { by subtracting 1 here, we get the -1 into the register we need if the dyn. array is nil and the carry
  1424. flag is set in this case, so we can jump depending on it
  1425. when loading the actual high value, we have to take care later of the decreased value
  1426. do not use the cgs, as they might emit dec instead of a sub instruction, however with dec the trick
  1427. we are using is not working as dec does not touch the carry flag }
  1428. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_SUB,TCGSize2OpSize[def_cgsize(left.resultdef)],1,left.location.register));
  1429. { volatility of the dyn. array refers to the volatility of the
  1430. string pointer, not of the string data }
  1431. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_C,donelab);
  1432. hlcg.reference_reset_base(href,left.resultdef,left.location.register,-ossinttype.size+1,ctempposinvalid,ossinttype.alignment,[]);
  1433. { if the string pointer is nil, the length is 0 -> reuse the register
  1434. that originally held the string pointer for the length, so that we
  1435. can keep the original nil/0 as length in that case }
  1436. hregister:=cg.makeregsize(current_asmdata.CurrAsmList,left.location.register,def_cgsize(resultdef));
  1437. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,ossinttype,resultdef,href,hregister);
  1438. cg.a_label(current_asmdata.CurrAsmList,donelab);
  1439. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  1440. location.register:=hregister;
  1441. end;
  1442. procedure tx86inlinenode.second_minmax;
  1443. {$ifndef i8086}
  1444. const
  1445. oparray : array[false..true,false..true,s32real..s64real] of TAsmOp =
  1446. (
  1447. (
  1448. (A_MINSS,A_MINSD),
  1449. (A_VMINSS,A_VMINSD)
  1450. ),
  1451. (
  1452. (A_MAXSS,A_MAXSD),
  1453. (A_VMAXSS,A_VMAXSD)
  1454. )
  1455. );
  1456. {$endif i8086}
  1457. var
  1458. {$ifndef i8086}
  1459. memop : integer;
  1460. gotmem : boolean;
  1461. op: TAsmOp;
  1462. {$endif i8086}
  1463. i : integer;
  1464. paraarray : array[1..2] of tnode;
  1465. instr: TAiCpu;
  1466. opsize: topsize;
  1467. finalval: TCgInt;
  1468. tmpreg: TRegister;
  1469. begin
  1470. {$ifndef i8086}
  1471. if
  1472. {$ifdef i386}
  1473. ((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
  1474. ((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
  1475. {$else i386}
  1476. is_single(resultdef) or is_double(resultdef)
  1477. {$endif i386}
  1478. then
  1479. begin
  1480. paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1481. paraarray[2]:=tcallparanode(parameters).paravalue;
  1482. for i:=low(paraarray) to high(paraarray) do
  1483. secondpass(paraarray[i]);
  1484. { only one memory operand is allowed }
  1485. gotmem:=false;
  1486. memop:=0;
  1487. for i:=low(paraarray) to high(paraarray) do
  1488. begin
  1489. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1490. begin
  1491. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1492. begin
  1493. memop:=i;
  1494. gotmem:=true;
  1495. end
  1496. else
  1497. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1498. end;
  1499. end;
  1500. { due to min/max behaviour that it loads always the second operand (must be the else assignment) into destination if
  1501. one of the operands is a NaN, we cannot swap operands to omit a mova operation in case fastmath is off }
  1502. if not(cs_opt_fastmath in current_settings.optimizerswitches) and gotmem and (memop=1) then
  1503. begin
  1504. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[1].location,paraarray[1].resultdef,true);
  1505. gotmem:=false;
  1506. end;
  1507. op:=oparray[inlinenumber in [in_max_single,in_max_double],UseAVX,tfloatdef(resultdef).floattype];
  1508. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1509. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1510. if gotmem then
  1511. begin
  1512. if UseAVX then
  1513. case memop of
  1514. 1:
  1515. emit_ref_reg_reg(op,S_NO,
  1516. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1517. 2:
  1518. emit_ref_reg_reg(op,S_NO,
  1519. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1520. else
  1521. internalerror(2020120504);
  1522. end
  1523. else
  1524. case memop of
  1525. 1:
  1526. begin
  1527. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1528. paraarray[2].location.register,location.register,mms_movescalar);
  1529. emit_ref_reg(op,S_NO,
  1530. paraarray[1].location.reference,location.register);
  1531. end;
  1532. 2:
  1533. begin
  1534. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1535. paraarray[1].location.register,location.register,mms_movescalar);
  1536. emit_ref_reg(op,S_NO,
  1537. paraarray[2].location.reference,location.register);
  1538. end;
  1539. else
  1540. internalerror(2020120601);
  1541. end;
  1542. end
  1543. else
  1544. begin
  1545. if UseAVX then
  1546. emit_reg_reg_reg(op,S_NO,
  1547. paraarray[2].location.register,paraarray[1].location.register,location.register)
  1548. else
  1549. begin
  1550. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1551. paraarray[1].location.register,location.register,mms_movescalar);
  1552. emit_reg_reg(op,S_NO,
  1553. paraarray[2].location.register,location.register)
  1554. end;
  1555. end;
  1556. end
  1557. else
  1558. {$endif i8086}
  1559. if
  1560. {$ifndef x86_64}
  1561. (CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) and
  1562. {$endif x86_64}
  1563. (
  1564. {$ifdef x86_64}
  1565. is_64bitint(resultdef) or
  1566. {$endif x86_64}
  1567. is_32bitint(resultdef)
  1568. ) then
  1569. begin
  1570. { paraarray[1] is the right-hand side }
  1571. paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1572. paraarray[2]:=tcallparanode(parameters).paravalue;
  1573. for i:=low(paraarray) to high(paraarray) do
  1574. secondpass(paraarray[i]);
  1575. if paraarray[2].location.loc = LOC_CONSTANT then
  1576. begin
  1577. { Swap the parameters so the constant is on the right }
  1578. paraarray[2]:=paraarray[1];
  1579. paraarray[1]:=tcallparanode(parameters).paravalue;
  1580. end;
  1581. location_reset(location,LOC_REGISTER,paraarray[1].location.size);
  1582. location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
  1583. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,paraarray[1].location,location.register);
  1584. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  1585. {$ifdef x86_64}
  1586. if is_64bitint(resultdef) then
  1587. opsize := S_Q
  1588. else
  1589. {$endif x86_64}
  1590. opsize := S_L;
  1591. { Try to use references as is, unless they would trigger internal
  1592. error 200502052 }
  1593. if (cs_create_pic in current_settings.moduleswitches) and
  1594. Assigned(paraarray[2].location.reference.symbol) then
  1595. hlcg.location_force_reg(current_asmdata.CurrAsmList,paraarray[2].location,
  1596. paraarray[2].resultdef,paraarray[2].resultdef,true);
  1597. case paraarray[1].location.loc of
  1598. LOC_CONSTANT:
  1599. case paraarray[2].location.loc of
  1600. LOC_REFERENCE,LOC_CREFERENCE:
  1601. begin
  1602. {$ifdef x86_64}
  1603. { x86_64 only supports signed 32 bits constants directly }
  1604. if (opsize=S_Q) and
  1605. ((paraarray[1].location.value<low(longint)) or (paraarray[1].location.value>high(longint))) then
  1606. begin
  1607. tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  1608. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,paraarray[1].location.value,tmpreg);
  1609. current_asmdata.CurrAsmList.concat(taicpu.op_reg_ref(A_CMP,opsize,
  1610. tmpreg,paraarray[2].location.reference));
  1611. end
  1612. else
  1613. {$endif x86_64}
  1614. current_asmdata.CurrAsmList.concat(taicpu.op_const_ref(A_CMP,opsize,
  1615. paraarray[1].location.value,paraarray[2].location.reference));
  1616. instr:=TAiCpu.op_ref_reg(A_CMOVcc,opsize,paraarray[2].location.reference,location.register);
  1617. end;
  1618. LOC_REGISTER,LOC_CREGISTER:
  1619. begin
  1620. {$ifdef x86_64}
  1621. { x86_64 only supports signed 32 bits constants directly }
  1622. if (opsize=S_Q) and
  1623. ((paraarray[1].location.value<low(longint)) or (paraarray[1].location.value>high(longint))) then
  1624. begin
  1625. tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  1626. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,paraarray[1].location.value,tmpreg);
  1627. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,opsize,
  1628. tmpreg,paraarray[2].location.register));
  1629. end
  1630. else
  1631. {$endif x86_64}
  1632. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,opsize,
  1633. paraarray[1].location.value,paraarray[2].location.register));
  1634. instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,paraarray[2].location.register,location.register);
  1635. end;
  1636. else
  1637. InternalError(2021121907);
  1638. end;
  1639. LOC_REFERENCE,LOC_CREFERENCE:
  1640. case paraarray[2].location.loc of
  1641. LOC_REFERENCE,LOC_CREFERENCE:
  1642. begin
  1643. { The reference has already been stored at location.register, so use that }
  1644. current_asmdata.CurrAsmList.concat(taicpu.op_reg_ref(A_CMP,opsize,
  1645. location.register,paraarray[2].location.reference));
  1646. instr:=TAiCpu.op_ref_reg(A_CMOVcc,opsize,paraarray[2].location.reference,location.register);
  1647. end;
  1648. LOC_REGISTER,LOC_CREGISTER:
  1649. begin
  1650. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_CMP,opsize,
  1651. paraarray[1].location.reference,paraarray[2].location.register));
  1652. instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,paraarray[2].location.register,location.register);
  1653. end;
  1654. else
  1655. InternalError(2021121906);
  1656. end;
  1657. LOC_REGISTER,LOC_CREGISTER:
  1658. case paraarray[2].location.loc of
  1659. LOC_REFERENCE,LOC_CREFERENCE:
  1660. begin
  1661. current_asmdata.CurrAsmList.concat(taicpu.op_reg_ref(A_CMP,opsize,
  1662. paraarray[1].location.register,paraarray[2].location.reference));
  1663. instr:=TAiCpu.op_ref_reg(A_CMOVcc,opsize,paraarray[2].location.reference,location.register);
  1664. end;
  1665. LOC_REGISTER,LOC_CREGISTER:
  1666. begin
  1667. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,opsize,
  1668. paraarray[1].location.register,paraarray[2].location.register));
  1669. instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,paraarray[2].location.register,location.register);
  1670. end;
  1671. else
  1672. InternalError(2021121905);
  1673. end;
  1674. else
  1675. InternalError(2021121904);
  1676. end;
  1677. case inlinenumber of
  1678. in_min_longint,
  1679. in_min_int64:
  1680. instr.condition := C_L;
  1681. in_min_dword,
  1682. in_min_qword:
  1683. instr.condition := C_B;
  1684. in_max_longint,
  1685. in_max_int64:
  1686. instr.condition := C_G;
  1687. in_max_dword,
  1688. in_max_qword:
  1689. instr.condition := C_A;
  1690. else
  1691. Internalerror(2021121903);
  1692. end;
  1693. current_asmdata.CurrAsmList.concat(instr);
  1694. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  1695. end
  1696. else
  1697. internalerror(2020120503);
  1698. end;
  1699. end.