nx86inl.pas 51 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86inl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. tx86inlinenode = class(tcginlinenode)
  24. protected
  25. procedure maybe_remove_round_trunc_typeconv; virtual;
  26. public
  27. function pass_typecheck_cpu:tnode;override;
  28. { first pass override
  29. so that the code generator will actually generate
  30. these nodes.
  31. }
  32. function first_cpu: tnode;override;
  33. function first_pi: tnode ; override;
  34. function first_arctan_real: tnode; override;
  35. function first_abs_real: tnode; override;
  36. function first_sqr_real: tnode; override;
  37. function first_sqrt_real: tnode; override;
  38. function first_ln_real: tnode; override;
  39. function first_cos_real: tnode; override;
  40. function first_sin_real: tnode; override;
  41. function first_round_real: tnode; override;
  42. function first_trunc_real: tnode; override;
  43. function first_popcnt: tnode; override;
  44. function first_fma: tnode; override;
  45. function first_frac_real : tnode; override;
  46. function first_int_real : tnode; override;
  47. function simplify(forinline : boolean) : tnode; override;
  48. { second pass override to generate these nodes }
  49. procedure pass_generate_code_cpu;override;
  50. procedure second_IncludeExclude;override;
  51. procedure second_pi; override;
  52. procedure second_arctan_real; override;
  53. procedure second_abs_real; override;
  54. procedure second_round_real; override;
  55. procedure second_sqr_real; override;
  56. procedure second_sqrt_real; override;
  57. procedure second_ln_real; override;
  58. procedure second_cos_real; override;
  59. procedure second_sin_real; override;
  60. procedure second_trunc_real; override;
  61. procedure second_prefetch;override;
  62. procedure second_abs_long;override;
  63. procedure second_popcnt;override;
  64. procedure second_fma;override;
  65. procedure second_frac_real;override;
  66. procedure second_int_real;override;
  67. procedure second_high;override;
  68. private
  69. procedure load_fpu_location(lnode: tnode);
  70. end;
  71. implementation
  72. uses
  73. systems,
  74. globtype,globals,
  75. verbose,compinnr,fmodule,
  76. defutil,
  77. aasmbase,aasmdata,aasmcpu,
  78. symconst,symtype,symdef,symcpu,
  79. ncnv,
  80. htypechk,
  81. cgbase,pass_1,pass_2,
  82. cpuinfo,cpubase,nutils,
  83. ncal,ncgutil,nld,
  84. tgobj,
  85. cga,cgutils,cgx86,cgobj,hlcgobj;
  86. {*****************************************************************************
  87. TX86INLINENODE
  88. *****************************************************************************}
  89. procedure tx86inlinenode.maybe_remove_round_trunc_typeconv;
  90. begin
  91. { only makes a difference for x86_64 }
  92. end;
  93. function tx86inlinenode.pass_typecheck_cpu: tnode;
  94. begin
  95. Result:=nil;
  96. case inlinenumber of
  97. in_x86_inportb:
  98. begin
  99. CheckParameters(1);
  100. resultdef:=u8inttype;
  101. end;
  102. in_x86_inportw:
  103. begin
  104. CheckParameters(1);
  105. resultdef:=u16inttype;
  106. end;
  107. in_x86_inportl:
  108. begin
  109. CheckParameters(1);
  110. resultdef:=s32inttype;
  111. end;
  112. in_x86_outportb,
  113. in_x86_outportw,
  114. in_x86_outportl:
  115. begin
  116. CheckParameters(2);
  117. resultdef:=voidtype;
  118. end;
  119. in_x86_cli,
  120. in_x86_sti:
  121. resultdef:=voidtype;
  122. in_x86_get_cs,
  123. in_x86_get_ss,
  124. in_x86_get_ds,
  125. in_x86_get_es,
  126. in_x86_get_fs,
  127. in_x86_get_gs:
  128. {$ifdef i8086}
  129. resultdef:=u16inttype;
  130. {$else i8086}
  131. resultdef:=s32inttype;
  132. {$endif i8086}
  133. else
  134. Result:=inherited pass_typecheck_cpu;
  135. end;
  136. end;
  137. function tx86inlinenode.first_cpu: tnode;
  138. begin
  139. Result:=nil;
  140. case inlinenumber of
  141. in_x86_inportb,
  142. in_x86_inportw,
  143. in_x86_inportl,
  144. in_x86_get_cs,
  145. in_x86_get_ss,
  146. in_x86_get_ds,
  147. in_x86_get_es,
  148. in_x86_get_fs,
  149. in_x86_get_gs:
  150. expectloc:=LOC_REGISTER;
  151. in_x86_outportb,
  152. in_x86_outportw,
  153. in_x86_outportl,
  154. in_x86_cli,
  155. in_x86_sti:
  156. expectloc:=LOC_VOID;
  157. else
  158. Result:=inherited first_cpu;
  159. end;
  160. end;
  161. function tx86inlinenode.first_pi : tnode;
  162. begin
  163. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  164. begin
  165. expectloc:=LOC_FPUREGISTER;
  166. first_pi := nil;
  167. end
  168. else
  169. result:=inherited;
  170. end;
  171. function tx86inlinenode.first_arctan_real : tnode;
  172. begin
  173. {$ifdef i8086}
  174. { FPATAN's range is limited to (0 <= value < 1) on the 8087 and 80287,
  175. so we need to use the RTL helper on these FPUs }
  176. if current_settings.cputype < cpu_386 then
  177. begin
  178. result := inherited;
  179. exit;
  180. end;
  181. {$endif i8086}
  182. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  183. begin
  184. expectloc:=LOC_FPUREGISTER;
  185. first_arctan_real := nil;
  186. end
  187. else
  188. result:=inherited;
  189. end;
  190. function tx86inlinenode.first_abs_real : tnode;
  191. begin
  192. if use_vectorfpu(resultdef) then
  193. expectloc:=LOC_MMREGISTER
  194. else
  195. expectloc:=LOC_FPUREGISTER;
  196. first_abs_real := nil;
  197. end;
  198. function tx86inlinenode.first_sqr_real : tnode;
  199. begin
  200. if use_vectorfpu(resultdef) then
  201. expectloc:=LOC_MMREGISTER
  202. else
  203. expectloc:=LOC_FPUREGISTER;
  204. first_sqr_real := nil;
  205. end;
  206. function tx86inlinenode.first_sqrt_real : tnode;
  207. begin
  208. if use_vectorfpu(resultdef) then
  209. expectloc:=LOC_MMREGISTER
  210. else
  211. expectloc:=LOC_FPUREGISTER;
  212. first_sqrt_real := nil;
  213. end;
  214. function tx86inlinenode.first_ln_real : tnode;
  215. begin
  216. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  217. begin
  218. expectloc:=LOC_FPUREGISTER;
  219. first_ln_real := nil;
  220. end
  221. else
  222. result:=inherited;
  223. end;
  224. function tx86inlinenode.first_cos_real : tnode;
  225. begin
  226. {$ifdef i8086}
  227. { FCOS is 387+ }
  228. if current_settings.cputype < cpu_386 then
  229. begin
  230. result := inherited;
  231. exit;
  232. end;
  233. {$endif i8086}
  234. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  235. begin
  236. expectloc:=LOC_FPUREGISTER;
  237. result:=nil;
  238. end
  239. else
  240. result:=inherited;
  241. end;
  242. function tx86inlinenode.first_sin_real : tnode;
  243. begin
  244. {$ifdef i8086}
  245. { FSIN is 387+ }
  246. if current_settings.cputype < cpu_386 then
  247. begin
  248. result := inherited;
  249. exit;
  250. end;
  251. {$endif i8086}
  252. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  253. begin
  254. expectloc:=LOC_FPUREGISTER;
  255. result:=nil;
  256. end
  257. else
  258. result:=inherited;
  259. end;
  260. function tx86inlinenode.first_round_real : tnode;
  261. begin
  262. maybe_remove_round_trunc_typeconv;
  263. {$ifdef x86_64}
  264. if use_vectorfpu(left.resultdef) then
  265. expectloc:=LOC_REGISTER
  266. else
  267. {$endif x86_64}
  268. expectloc:=LOC_REFERENCE;
  269. result:=nil;
  270. end;
  271. function tx86inlinenode.first_trunc_real: tnode;
  272. begin
  273. maybe_remove_round_trunc_typeconv;
  274. if (cs_opt_size in current_settings.optimizerswitches)
  275. {$ifdef x86_64}
  276. and not(use_vectorfpu(left.resultdef))
  277. {$endif x86_64}
  278. then
  279. result:=inherited
  280. else
  281. begin
  282. {$ifdef x86_64}
  283. if use_vectorfpu(left.resultdef) then
  284. expectloc:=LOC_REGISTER
  285. else
  286. {$endif x86_64}
  287. expectloc:=LOC_REFERENCE;
  288. result:=nil;
  289. end;
  290. end;
  291. function tx86inlinenode.first_popcnt: tnode;
  292. begin
  293. Result:=nil;
  294. {$ifndef i8086}
  295. if (CPUX86_HAS_POPCNT in cpu_capabilities[current_settings.cputype])
  296. {$ifdef i386}
  297. and not is_64bit(left.resultdef)
  298. {$endif i386}
  299. then
  300. expectloc:=LOC_REGISTER
  301. else
  302. {$endif not i8086}
  303. Result:=inherited first_popcnt
  304. end;
  305. function tx86inlinenode.first_fma : tnode;
  306. begin
  307. {$ifndef i8086}
  308. if ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]) and
  309. ((is_double(resultdef)) or (is_single(resultdef))) then
  310. begin
  311. expectloc:=LOC_MMREGISTER;
  312. Result:=nil;
  313. end
  314. else
  315. {$endif i8086}
  316. Result:=inherited first_fma;
  317. end;
  318. function tx86inlinenode.first_frac_real : tnode;
  319. begin
  320. if (current_settings.fputype>=fpu_sse41) and
  321. ((is_double(resultdef)) or (is_single(resultdef))) then
  322. begin
  323. maybe_remove_round_trunc_typeconv;
  324. expectloc:=LOC_MMREGISTER;
  325. Result:=nil;
  326. end
  327. else
  328. Result:=inherited first_frac_real;
  329. end;
  330. function tx86inlinenode.first_int_real : tnode;
  331. begin
  332. if (current_settings.fputype>=fpu_sse41) and
  333. ((is_double(resultdef)) or (is_single(resultdef))) then
  334. begin
  335. Result:=nil;
  336. expectloc:=LOC_MMREGISTER;
  337. end
  338. else
  339. Result:=inherited first_int_real;
  340. end;
  341. function tx86inlinenode.simplify(forinline : boolean) : tnode;
  342. var
  343. temp : tnode;
  344. begin
  345. if (current_settings.fputype>=fpu_sse41) and
  346. (inlinenumber=in_int_real) and (left.nodetype=typeconvn) and
  347. not(nf_explicit in left.flags) and
  348. (ttypeconvnode(left).left.resultdef.typ=floatdef) and
  349. ((is_double(ttypeconvnode(left).left.resultdef)) or (is_single(ttypeconvnode(left).left.resultdef))) then
  350. begin
  351. { get rid of the type conversion }
  352. temp:=ttypeconvnode(left).left;
  353. ttypeconvnode(left).left:=nil;
  354. left.free;
  355. left:=temp;
  356. result:=self.getcopy;
  357. tinlinenode(result).resultdef:=temp.resultdef;
  358. typecheckpass(result);
  359. end
  360. else
  361. Result:=inherited simplify(forinline);
  362. end;
  363. procedure tx86inlinenode.pass_generate_code_cpu;
  364. procedure inport(dreg:TRegister;dsize:topsize;dtype:tdef);
  365. var
  366. portnumber: tnode;
  367. begin
  368. portnumber:=left;
  369. secondpass(portnumber);
  370. if (portnumber.location.loc=LOC_CONSTANT) and
  371. (portnumber.location.value>=0) and
  372. (portnumber.location.value<=255) then
  373. begin
  374. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  375. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_IN,dsize,portnumber.location.value,dreg));
  376. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  377. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  378. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  379. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  380. end
  381. else
  382. begin
  383. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  384. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  385. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  386. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_IN,dsize,NR_DX,dreg));
  387. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  388. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  389. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  390. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  391. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  392. end;
  393. end;
  394. procedure outport(dreg:TRegister;dsize:topsize;dtype:tdef);
  395. var
  396. portnumber, portdata: tnode;
  397. begin
  398. portnumber:=tcallparanode(tcallparanode(left).right).left;
  399. portdata:=tcallparanode(left).left;
  400. secondpass(portdata);
  401. secondpass(portnumber);
  402. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  403. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portdata.resultdef,dtype,portdata.location,dreg);
  404. if (portnumber.location.loc=LOC_CONSTANT) and
  405. (portnumber.location.value>=0) and
  406. (portnumber.location.value<=255) then
  407. current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_OUT,dsize,dreg,portnumber.location.value))
  408. else
  409. begin
  410. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  411. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  412. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_OUT,dsize,dreg,NR_DX));
  413. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  414. end;
  415. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  416. end;
  417. procedure get_segreg(segreg:tregister);
  418. begin
  419. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  420. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  421. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,TCGSize2OpSize[def_cgsize(resultdef)],segreg,location.register));
  422. end;
  423. begin
  424. case inlinenumber of
  425. in_x86_inportb:
  426. inport(NR_AL,S_B,u8inttype);
  427. in_x86_inportw:
  428. inport(NR_AX,S_W,u16inttype);
  429. in_x86_inportl:
  430. inport(NR_EAX,S_L,s32inttype);
  431. in_x86_outportb:
  432. outport(NR_AL,S_B,u8inttype);
  433. in_x86_outportw:
  434. outport(NR_AX,S_W,u16inttype);
  435. in_x86_outportl:
  436. outport(NR_EAX,S_L,s32inttype);
  437. in_x86_cli:
  438. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLI));
  439. in_x86_sti:
  440. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_STI));
  441. in_x86_get_cs:
  442. get_segreg(NR_CS);
  443. in_x86_get_ss:
  444. get_segreg(NR_SS);
  445. in_x86_get_ds:
  446. get_segreg(NR_DS);
  447. in_x86_get_es:
  448. get_segreg(NR_ES);
  449. in_x86_get_fs:
  450. get_segreg(NR_FS);
  451. in_x86_get_gs:
  452. get_segreg(NR_GS);
  453. else
  454. inherited pass_generate_code_cpu;
  455. end;
  456. end;
  457. procedure tx86inlinenode.second_pi;
  458. begin
  459. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  460. emit_none(A_FLDPI,S_NO);
  461. tcgx86(cg).inc_fpu_stack;
  462. location.register:=NR_FPU_RESULT_REG;
  463. end;
  464. { load the FPU into the an fpu register }
  465. procedure tx86inlinenode.load_fpu_location(lnode: tnode);
  466. begin
  467. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  468. location.register:=NR_FPU_RESULT_REG;
  469. secondpass(lnode);
  470. case lnode.location.loc of
  471. LOC_FPUREGISTER:
  472. ;
  473. LOC_CFPUREGISTER:
  474. begin
  475. cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,lnode.location.size,
  476. lnode.location.size,lnode.location.register,location.register);
  477. end;
  478. LOC_REFERENCE,LOC_CREFERENCE:
  479. begin
  480. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  481. lnode.location.size,lnode.location.size,
  482. lnode.location.reference,location.register);
  483. end;
  484. LOC_MMREGISTER,LOC_CMMREGISTER:
  485. begin
  486. location:=lnode.location;
  487. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,location,resultdef,false);
  488. end;
  489. else
  490. internalerror(309991);
  491. end;
  492. end;
  493. procedure tx86inlinenode.second_arctan_real;
  494. begin
  495. load_fpu_location(left);
  496. emit_none(A_FLD1,S_NO);
  497. emit_none(A_FPATAN,S_NO);
  498. end;
  499. procedure tx86inlinenode.second_abs_real;
  500. function needs_indirect:boolean; inline;
  501. begin
  502. result:=(tf_supports_packages in target_info.flags) and
  503. (target_info.system in systems_indirect_var_imports);
  504. end;
  505. var
  506. href : treference;
  507. sym : tasmsymbol;
  508. begin
  509. if use_vectorfpu(resultdef) then
  510. begin
  511. secondpass(left);
  512. if left.location.loc<>LOC_MMREGISTER then
  513. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,UseAVX);
  514. if UseAVX then
  515. begin
  516. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  517. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  518. end
  519. else
  520. location:=left.location;
  521. case tfloatdef(resultdef).floattype of
  522. s32real:
  523. begin
  524. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_SINGLE',AT_DATA,needs_indirect);
  525. reference_reset_symbol(href,sym,0,4,[]);
  526. current_module.add_extern_asmsym(sym);
  527. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  528. if UseAVX then
  529. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  530. A_VANDPS,S_XMM,href,left.location.register,location.register))
  531. else
  532. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPS,S_XMM,href,location.register));
  533. end;
  534. s64real:
  535. begin
  536. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_DOUBLE',AT_DATA,needs_indirect);
  537. reference_reset_symbol(href,sym,0,4,[]);
  538. current_module.add_extern_asmsym(sym);
  539. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  540. if UseAVX then
  541. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  542. A_VANDPD,S_XMM,href,left.location.register,location.register))
  543. else
  544. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPD,S_XMM,href,location.register))
  545. end;
  546. else
  547. internalerror(200506081);
  548. end;
  549. end
  550. else
  551. begin
  552. load_fpu_location(left);
  553. emit_none(A_FABS,S_NO);
  554. end;
  555. end;
  556. procedure tx86inlinenode.second_round_real;
  557. begin
  558. {$ifdef x86_64}
  559. if use_vectorfpu(left.resultdef) then
  560. begin
  561. secondpass(left);
  562. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  563. location_reset(location,LOC_REGISTER,OS_S64);
  564. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  565. if UseAVX then
  566. case left.location.size of
  567. OS_F32:
  568. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSS2SI,S_NO,left.location.register,location.register));
  569. OS_F64:
  570. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSD2SI,S_NO,left.location.register,location.register));
  571. else
  572. internalerror(2007031402);
  573. end
  574. else
  575. case left.location.size of
  576. OS_F32:
  577. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSS2SI,S_NO,left.location.register,location.register));
  578. OS_F64:
  579. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSD2SI,S_NO,left.location.register,location.register));
  580. else
  581. internalerror(2007031402);
  582. end;
  583. end
  584. else
  585. {$endif x86_64}
  586. begin
  587. load_fpu_location(left);
  588. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  589. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  590. emit_ref(A_FISTP,S_IQ,location.reference);
  591. tcgx86(cg).dec_fpu_stack;
  592. emit_none(A_FWAIT,S_NO);
  593. end;
  594. end;
  595. procedure tx86inlinenode.second_trunc_real;
  596. var
  597. oldcw,newcw : treference;
  598. begin
  599. {$ifdef x86_64}
  600. if use_vectorfpu(left.resultdef) and
  601. not((left.location.loc=LOC_FPUREGISTER) and (current_settings.fputype>=fpu_sse3)) then
  602. begin
  603. secondpass(left);
  604. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  605. location_reset(location,LOC_REGISTER,OS_S64);
  606. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  607. if UseAVX then
  608. case left.location.size of
  609. OS_F32:
  610. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSS2SI,S_NO,left.location.register,location.register));
  611. OS_F64:
  612. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSD2SI,S_NO,left.location.register,location.register));
  613. else
  614. internalerror(2007031401);
  615. end
  616. else
  617. case left.location.size of
  618. OS_F32:
  619. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSS2SI,S_NO,left.location.register,location.register));
  620. OS_F64:
  621. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSD2SI,S_NO,left.location.register,location.register));
  622. else
  623. internalerror(2007031401);
  624. end;
  625. end
  626. else
  627. {$endif x86_64}
  628. begin
  629. if (current_settings.fputype>=fpu_sse3) then
  630. begin
  631. load_fpu_location(left);
  632. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  633. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  634. emit_ref(A_FISTTP,S_IQ,location.reference);
  635. tcgx86(cg).dec_fpu_stack;
  636. end
  637. else
  638. begin
  639. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,oldcw);
  640. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,newcw);
  641. {$ifdef i8086}
  642. if current_settings.cputype<=cpu_286 then
  643. begin
  644. emit_ref(A_FSTCW,S_NO,newcw);
  645. emit_ref(A_FSTCW,S_NO,oldcw);
  646. emit_none(A_FWAIT,S_NO);
  647. end
  648. else
  649. {$endif i8086}
  650. begin
  651. emit_ref(A_FNSTCW,S_NO,newcw);
  652. emit_ref(A_FNSTCW,S_NO,oldcw);
  653. end;
  654. emit_const_ref(A_OR,S_W,$0f00,newcw);
  655. load_fpu_location(left);
  656. emit_ref(A_FLDCW,S_NO,newcw);
  657. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  658. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  659. emit_ref(A_FISTP,S_IQ,location.reference);
  660. tcgx86(cg).dec_fpu_stack;
  661. emit_ref(A_FLDCW,S_NO,oldcw);
  662. emit_none(A_FWAIT,S_NO);
  663. tg.UnGetTemp(current_asmdata.CurrAsmList,oldcw);
  664. tg.UnGetTemp(current_asmdata.CurrAsmList,newcw);
  665. end;
  666. end;
  667. end;
  668. procedure tx86inlinenode.second_sqr_real;
  669. begin
  670. if use_vectorfpu(resultdef) then
  671. begin
  672. secondpass(left);
  673. location_reset(location,LOC_MMREGISTER,left.location.size);
  674. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  675. if UseAVX then
  676. begin
  677. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  678. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location.register,left.location.register,location.register,mms_movescalar);
  679. end
  680. else
  681. begin
  682. if left.location.loc in [LOC_CFPUREGISTER,LOC_FPUREGISTER] then
  683. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  684. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  685. cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,location.register,location.register,mms_movescalar);
  686. end;
  687. end
  688. else
  689. begin
  690. load_fpu_location(left);
  691. emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
  692. end;
  693. end;
  694. procedure tx86inlinenode.second_sqrt_real;
  695. begin
  696. if use_vectorfpu(resultdef) then
  697. begin
  698. secondpass(left);
  699. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  700. location_reset(location,LOC_MMREGISTER,left.location.size);
  701. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  702. if UseAVX then
  703. case tfloatdef(resultdef).floattype of
  704. s32real:
  705. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  706. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  707. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSS,S_NO,left.location.register,left.location.register,location.register));
  708. s64real:
  709. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  710. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  711. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSD,S_NO,left.location.register,left.location.register,location.register));
  712. else
  713. internalerror(200510031);
  714. end
  715. else
  716. case tfloatdef(resultdef).floattype of
  717. s32real:
  718. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSS,S_NO,left.location.register,location.register));
  719. s64real:
  720. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSD,S_NO,left.location.register,location.register));
  721. else
  722. internalerror(200510031);
  723. end;
  724. end
  725. else
  726. begin
  727. load_fpu_location(left);
  728. emit_none(A_FSQRT,S_NO);
  729. end;
  730. end;
  731. procedure tx86inlinenode.second_ln_real;
  732. begin
  733. load_fpu_location(left);
  734. emit_none(A_FLDLN2,S_NO);
  735. emit_none(A_FXCH,S_NO);
  736. emit_none(A_FYL2X,S_NO);
  737. end;
  738. procedure tx86inlinenode.second_cos_real;
  739. begin
  740. {$ifdef i8086}
  741. { FCOS is 387+ }
  742. if current_settings.cputype < cpu_386 then
  743. begin
  744. inherited;
  745. exit;
  746. end;
  747. {$endif i8086}
  748. load_fpu_location(left);
  749. emit_none(A_FCOS,S_NO);
  750. end;
  751. procedure tx86inlinenode.second_sin_real;
  752. begin
  753. {$ifdef i8086}
  754. { FSIN is 387+ }
  755. if current_settings.cputype < cpu_386 then
  756. begin
  757. inherited;
  758. exit;
  759. end;
  760. {$endif i8086}
  761. load_fpu_location(left);
  762. emit_none(A_FSIN,S_NO)
  763. end;
  764. procedure tx86inlinenode.second_prefetch;
  765. var
  766. ref : treference;
  767. r : tregister;
  768. checkpointer_used : boolean;
  769. begin
  770. {$if defined(i386) or defined(i8086)}
  771. if current_settings.cputype>=cpu_Pentium3 then
  772. {$endif i386 or i8086}
  773. begin
  774. { do not call Checkpointer for left node }
  775. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  776. if checkpointer_used then
  777. node_change_local_switch(left,cs_checkpointer,false);
  778. secondpass(left);
  779. if checkpointer_used then
  780. node_change_local_switch(left,cs_checkpointer,false);
  781. case left.location.loc of
  782. LOC_CREFERENCE,
  783. LOC_REFERENCE:
  784. begin
  785. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  786. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  787. reference_reset_base(ref,r,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
  788. current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PREFETCHNTA,S_NO,ref));
  789. end;
  790. else
  791. { nothing to prefetch };
  792. end;
  793. end;
  794. end;
  795. procedure tx86inlinenode.second_abs_long;
  796. var
  797. hregister : tregister;
  798. opsize : tcgsize;
  799. hp : taicpu;
  800. begin
  801. {$if defined(i8086) or defined(i386)}
  802. if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then
  803. begin
  804. opsize:=def_cgsize(left.resultdef);
  805. secondpass(left);
  806. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  807. location:=left.location;
  808. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  809. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  810. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,opsize,tcgsize2size[opsize]*8-1,left.location.register);
  811. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,opsize,left.location.register,location.register);
  812. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_SUB,opsize,left.location.register,location.register);
  813. end
  814. else
  815. {$endif i8086 or i386}
  816. begin
  817. opsize:=def_cgsize(left.resultdef);
  818. secondpass(left);
  819. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  820. hregister:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  821. location:=left.location;
  822. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  823. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,hregister);
  824. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  825. emit_reg(A_NEG,tcgsize2opsize[opsize],hregister);
  826. hp:=taicpu.op_reg_reg(A_CMOVcc,tcgsize2opsize[opsize],hregister,location.register);
  827. hp.condition:=C_NS;
  828. current_asmdata.CurrAsmList.concat(hp);
  829. end;
  830. end;
  831. {*****************************************************************************
  832. INCLUDE/EXCLUDE GENERIC HANDLING
  833. *****************************************************************************}
  834. procedure tx86inlinenode.second_IncludeExclude;
  835. var
  836. hregister,
  837. hregister2: tregister;
  838. setbase : aint;
  839. bitsperop,l : longint;
  840. cgop : topcg;
  841. asmop : tasmop;
  842. opdef : tdef;
  843. opsize,
  844. orgsize: tcgsize;
  845. begin
  846. {$ifdef i8086}
  847. { BTS and BTR are 386+ }
  848. if current_settings.cputype < cpu_386 then
  849. begin
  850. inherited;
  851. exit;
  852. end;
  853. {$endif i8086}
  854. if is_smallset(tcallparanode(left).resultdef) then
  855. begin
  856. opdef:=tcallparanode(left).resultdef;
  857. opsize:=int_cgsize(opdef.size)
  858. end
  859. else
  860. begin
  861. opdef:=u32inttype;
  862. opsize:=OS_32;
  863. end;
  864. bitsperop:=(8*tcgsize2size[opsize]);
  865. secondpass(tcallparanode(left).left);
  866. secondpass(tcallparanode(tcallparanode(left).right).left);
  867. setbase:=tsetdef(tcallparanode(left).left.resultdef).setbase;
  868. if tcallparanode(tcallparanode(left).right).left.location.loc=LOC_CONSTANT then
  869. begin
  870. { calculate bit position }
  871. l:=1 shl ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) mod bitsperop);
  872. { determine operator }
  873. if inlinenumber=in_include_x_y then
  874. cgop:=OP_OR
  875. else
  876. begin
  877. cgop:=OP_AND;
  878. l:=not(l);
  879. end;
  880. case tcallparanode(left).left.location.loc of
  881. LOC_REFERENCE :
  882. begin
  883. inc(tcallparanode(left).left.location.reference.offset,
  884. ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) div bitsperop)*tcgsize2size[opsize]);
  885. cg.a_op_const_ref(current_asmdata.CurrAsmList,cgop,opsize,l,tcallparanode(left).left.location.reference);
  886. end;
  887. LOC_CREGISTER :
  888. cg.a_op_const_reg(current_asmdata.CurrAsmList,cgop,tcallparanode(left).left.location.size,l,tcallparanode(left).left.location.register);
  889. else
  890. internalerror(200405022);
  891. end;
  892. end
  893. else
  894. begin
  895. orgsize:=opsize;
  896. if opsize in [OS_8,OS_S8] then
  897. begin
  898. opdef:=u32inttype;
  899. opsize:=OS_32;
  900. end;
  901. { determine asm operator }
  902. if inlinenumber=in_include_x_y then
  903. asmop:=A_BTS
  904. else
  905. asmop:=A_BTR;
  906. hlcg.location_force_reg(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.location,tcallparanode(tcallparanode(left).right).left.resultdef,opdef,true);
  907. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.resultdef,tcallparanode(tcallparanode(left).right).left.location,setbase);
  908. hregister:=tcallparanode(tcallparanode(left).right).left.location.register;
  909. if (tcallparanode(left).left.location.loc=LOC_REFERENCE) then
  910. emit_reg_ref(asmop,tcgsize2opsize[opsize],hregister,tcallparanode(left).left.location.reference)
  911. else
  912. begin
  913. { second argument can't be an 8 bit register either }
  914. hregister2:=tcallparanode(left).left.location.register;
  915. if (orgsize in [OS_8,OS_S8]) then
  916. hregister2:=cg.makeregsize(current_asmdata.CurrAsmList,hregister2,opsize);
  917. emit_reg_reg(asmop,tcgsize2opsize[opsize],hregister,hregister2);
  918. end;
  919. end;
  920. end;
  921. procedure tx86inlinenode.second_popcnt;
  922. var
  923. opsize: tcgsize;
  924. begin
  925. secondpass(left);
  926. opsize:=tcgsize2unsigned[left.location.size];
  927. { no 8 Bit popcont }
  928. if opsize=OS_8 then
  929. opsize:=OS_16;
  930. if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE]) or
  931. (left.location.size<>opsize) then
  932. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
  933. location_reset(location,LOC_REGISTER,opsize);
  934. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  935. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  936. emit_reg_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.register,location.register)
  937. else
  938. emit_ref_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.reference,location.register);
  939. end;
  940. procedure tx86inlinenode.second_fma;
  941. const
  942. op : array[false..true,false..true,s32real..s64real,0..3] of TAsmOp =
  943. (
  944. { positive product }
  945. (
  946. { positive third operand }
  947. ((A_VFMADD231SS,A_VFMADD231SS,A_VFMADD231SS,A_VFMADD213SS),
  948. (A_VFMADD231SD,A_VFMADD231SD,A_VFMADD231SD,A_VFMADD213SD)
  949. ),
  950. { negative third operand }
  951. ((A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB213SS),
  952. (A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB213SD)
  953. )
  954. ),
  955. { negative product }
  956. (
  957. { positive third operand }
  958. ((A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD213SS),
  959. (A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD213SD)
  960. ),
  961. { negative third operand }
  962. ((A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB213SS),
  963. (A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB213SD)
  964. )
  965. )
  966. );
  967. var
  968. paraarray : array[1..3] of tnode;
  969. memop,
  970. i : integer;
  971. negop3,
  972. negproduct,
  973. gotmem : boolean;
  974. begin
  975. {$ifndef i8086}
  976. if (cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[] then
  977. begin
  978. negop3:=false;
  979. negproduct:=false;
  980. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  981. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  982. paraarray[3]:=tcallparanode(parameters).paravalue;
  983. { check if a neg. node can be removed
  984. this is possible because changing the sign of
  985. a floating point number does not affect its absolute
  986. value in any way
  987. }
  988. if paraarray[1].nodetype=unaryminusn then
  989. begin
  990. paraarray[1]:=tunarynode(paraarray[1]).left;
  991. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  992. only no code is generated for it }
  993. negproduct:=not(negproduct);
  994. end;
  995. if paraarray[2].nodetype=unaryminusn then
  996. begin
  997. paraarray[2]:=tunarynode(paraarray[2]).left;
  998. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  999. only no code is generated for it }
  1000. negproduct:=not(negproduct);
  1001. end;
  1002. if paraarray[3].nodetype=unaryminusn then
  1003. begin
  1004. paraarray[3]:=tunarynode(paraarray[3]).left;
  1005. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1006. only no code is generated for it }
  1007. negop3:=true;
  1008. end;
  1009. for i:=1 to 3 do
  1010. secondpass(paraarray[i]);
  1011. { only one memory operand is allowed }
  1012. gotmem:=false;
  1013. memop:=0;
  1014. for i:=1 to 3 do
  1015. begin
  1016. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1017. begin
  1018. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1019. begin
  1020. memop:=i;
  1021. gotmem:=true;
  1022. end
  1023. else
  1024. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1025. end;
  1026. end;
  1027. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1028. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1029. if gotmem then
  1030. begin
  1031. case memop of
  1032. 1:
  1033. begin
  1034. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1035. paraarray[3].location.register,location.register,mms_movescalar);
  1036. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1037. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1038. end;
  1039. 2:
  1040. begin
  1041. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1042. paraarray[3].location.register,location.register,mms_movescalar);
  1043. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1044. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1045. end;
  1046. 3:
  1047. begin
  1048. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1049. paraarray[1].location.register,location.register,mms_movescalar);
  1050. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1051. paraarray[3].location.reference,paraarray[2].location.register,location.register);
  1052. end
  1053. else
  1054. internalerror(2014041301);
  1055. end;
  1056. end
  1057. else
  1058. begin
  1059. { try to use the location which is already in a temp. mm register as destination,
  1060. so the compiler might be able to re-use the register }
  1061. if paraarray[1].location.loc=LOC_MMREGISTER then
  1062. begin
  1063. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1064. paraarray[1].location.register,location.register,mms_movescalar);
  1065. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1066. paraarray[3].location.register,paraarray[2].location.register,location.register);
  1067. end
  1068. else if paraarray[2].location.loc=LOC_MMREGISTER then
  1069. begin
  1070. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1071. paraarray[2].location.register,location.register,mms_movescalar);
  1072. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1073. paraarray[3].location.register,paraarray[1].location.register,location.register);
  1074. end
  1075. else
  1076. begin
  1077. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1078. paraarray[3].location.register,location.register,mms_movescalar);
  1079. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,0],S_NO,
  1080. paraarray[1].location.register,paraarray[2].location.register,location.register);
  1081. end;
  1082. end;
  1083. end
  1084. else
  1085. {$endif i8086}
  1086. internalerror(2014032301);
  1087. end;
  1088. procedure tx86inlinenode.second_frac_real;
  1089. var
  1090. extrareg : TRegister;
  1091. begin
  1092. if use_vectorfpu(resultdef) then
  1093. begin
  1094. secondpass(left);
  1095. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1096. location_reset(location,LOC_MMREGISTER,left.location.size);
  1097. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1098. if UseAVX then
  1099. case tfloatdef(resultdef).floattype of
  1100. s32real:
  1101. begin
  1102. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1103. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1104. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSS,S_NO,location.register,left.location.register,location.register));
  1105. end;
  1106. s64real:
  1107. begin
  1108. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1109. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1110. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSD,S_NO,location.register,left.location.register,location.register));
  1111. end;
  1112. else
  1113. internalerror(2017052102);
  1114. end
  1115. else
  1116. begin
  1117. extrareg:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1118. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  1119. case tfloatdef(resultdef).floattype of
  1120. s32real:
  1121. begin
  1122. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,extrareg));
  1123. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSS,S_NO,extrareg,location.register));
  1124. end;
  1125. s64real:
  1126. begin
  1127. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,extrareg));
  1128. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSD,S_NO,extrareg,location.register));
  1129. end;
  1130. else
  1131. internalerror(2017052103);
  1132. end;
  1133. end;
  1134. end
  1135. else
  1136. internalerror(2017052101);
  1137. end;
  1138. procedure tx86inlinenode.second_int_real;
  1139. begin
  1140. if use_vectorfpu(resultdef) then
  1141. begin
  1142. secondpass(left);
  1143. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1144. location_reset(location,LOC_MMREGISTER,left.location.size);
  1145. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1146. if UseAVX then
  1147. case tfloatdef(resultdef).floattype of
  1148. s32real:
  1149. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1150. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1151. s64real:
  1152. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1153. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1154. else
  1155. internalerror(2017052105);
  1156. end
  1157. else
  1158. begin
  1159. case tfloatdef(resultdef).floattype of
  1160. s32real:
  1161. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,location.register));
  1162. s64real:
  1163. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,location.register));
  1164. else
  1165. internalerror(2017052106);
  1166. end;
  1167. end;
  1168. end
  1169. else
  1170. internalerror(2017052107);
  1171. end;
  1172. procedure tx86inlinenode.second_high;
  1173. var
  1174. donelab: tasmlabel;
  1175. hregister : tregister;
  1176. href : treference;
  1177. begin
  1178. secondpass(left);
  1179. if not(is_dynamic_array(left.resultdef)) then
  1180. Internalerror(2019122801);
  1181. { length in dynamic arrays is at offset -sizeof(pint) }
  1182. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  1183. current_asmdata.getjumplabel(donelab);
  1184. { by subtracting 1 here, we get the -1 into the register we need if the dyn. array is nil and the carry
  1185. flag is set in this case, so we can jump depending on it
  1186. when loading the actual high value, we have to take care later of the decreased value }
  1187. hlcg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SUB,left.resultdef,1,left.location.register);
  1188. { volatility of the dyn. array refers to the volatility of the
  1189. string pointer, not of the string data }
  1190. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_C,donelab);
  1191. hlcg.reference_reset_base(href,left.resultdef,left.location.register,-ossinttype.size+1,ctempposinvalid,ossinttype.alignment,[]);
  1192. { if the string pointer is nil, the length is 0 -> reuse the register
  1193. that originally held the string pointer for the length, so that we
  1194. can keep the original nil/0 as length in that case }
  1195. hregister:=cg.makeregsize(current_asmdata.CurrAsmList,left.location.register,def_cgsize(resultdef));
  1196. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,ossinttype,resultdef,href,hregister);
  1197. cg.a_label(current_asmdata.CurrAsmList,donelab);
  1198. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  1199. location.register:=hregister;
  1200. end;
  1201. end.