nx86inl.pas 75 KB


  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86inl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. tx86inlinenode = class(tcginlinenode)
  24. protected
  25. procedure maybe_remove_round_trunc_typeconv; virtual;
  26. public
  27. function pass_typecheck_cpu:tnode;override;
  28. { first pass override
  29. so that the code generator will actually generate
  30. these nodes.
  31. }
  32. function first_cpu: tnode;override;
  33. function first_pi: tnode ; override;
  34. function first_arctan_real: tnode; override;
  35. function first_abs_real: tnode; override;
  36. function first_sqr_real: tnode; override;
  37. function first_sqrt_real: tnode; override;
  38. function first_ln_real: tnode; override;
  39. function first_cos_real: tnode; override;
  40. function first_sin_real: tnode; override;
  41. function first_round_real: tnode; override;
  42. function first_trunc_real: tnode; override;
  43. function first_popcnt: tnode; override;
  44. function first_fma: tnode; override;
  45. function first_frac_real : tnode; override;
  46. function first_int_real : tnode; override;
  47. function first_minmax: tnode; override;
  48. function simplify(forinline : boolean) : tnode; override;
  49. { second pass override to generate these nodes }
  50. procedure pass_generate_code_cpu;override;
  51. procedure second_IncludeExclude;override;
  52. procedure second_AndOrXorShiftRot_assign;override;
  53. procedure second_pi; override;
  54. procedure second_arctan_real; override;
  55. procedure second_abs_real; override;
  56. procedure second_round_real; override;
  57. procedure second_sqr_real; override;
  58. procedure second_sqrt_real; override;
  59. procedure second_ln_real; override;
  60. procedure second_cos_real; override;
  61. procedure second_sin_real; override;
  62. procedure second_trunc_real; override;
  63. procedure second_prefetch;override;
  64. procedure second_abs_long;override;
  65. procedure second_popcnt;override;
  66. procedure second_fma;override;
  67. procedure second_frac_real;override;
  68. procedure second_int_real;override;
  69. procedure second_high;override;
  70. procedure second_minmax;override;
  71. private
  72. procedure load_fpu_location(lnode: tnode);
  73. end;
  74. implementation
  75. uses
  76. systems,
  77. globtype,globals,
  78. verbose,compinnr,fmodule,
  79. defutil,
  80. aasmbase,aasmdata,aasmcpu,
  81. symconst,symtype,symdef,symcpu,
  82. ncnv,
  83. htypechk,
  84. cgbase,pass_1,pass_2,
  85. cpuinfo,cpubase,nutils,
  86. ncal,ncgutil,nld,ncon,nadd,nmat,constexp,
  87. tgobj,
  88. cga,cgutils,cgx86,cgobj,hlcgobj,cutils;
  89. {*****************************************************************************
  90. TX86INLINENODE
  91. *****************************************************************************}
  92. procedure tx86inlinenode.maybe_remove_round_trunc_typeconv;
  93. begin
  94. { only makes a difference for x86_64 }
  95. end;
  96. function tx86inlinenode.pass_typecheck_cpu: tnode;
  97. begin
  98. Result:=nil;
  99. case inlinenumber of
  100. in_x86_inportb:
  101. begin
  102. CheckParameters(1);
  103. resultdef:=u8inttype;
  104. end;
  105. in_x86_inportw:
  106. begin
  107. CheckParameters(1);
  108. resultdef:=u16inttype;
  109. end;
  110. in_x86_inportl:
  111. begin
  112. CheckParameters(1);
  113. resultdef:=s32inttype;
  114. end;
  115. in_x86_outportb,
  116. in_x86_outportw,
  117. in_x86_outportl:
  118. begin
  119. CheckParameters(2);
  120. resultdef:=voidtype;
  121. end;
  122. in_x86_cli,
  123. in_x86_sti:
  124. resultdef:=voidtype;
  125. in_x86_get_cs,
  126. in_x86_get_ss,
  127. in_x86_get_ds,
  128. in_x86_get_es,
  129. in_x86_get_fs,
  130. in_x86_get_gs:
  131. {$ifdef i8086}
  132. resultdef:=u16inttype;
  133. {$else i8086}
  134. resultdef:=s32inttype;
  135. {$endif i8086}
  136. { include automatically generated code }
  137. {$i x86mmtype.inc}
  138. else
  139. Result:=inherited pass_typecheck_cpu;
  140. end;
  141. end;
  142. function tx86inlinenode.first_cpu: tnode;
  143. begin
  144. Result:=nil;
  145. case inlinenumber of
  146. in_x86_inportb,
  147. in_x86_inportw,
  148. in_x86_inportl,
  149. in_x86_get_cs,
  150. in_x86_get_ss,
  151. in_x86_get_ds,
  152. in_x86_get_es,
  153. in_x86_get_fs,
  154. in_x86_get_gs:
  155. expectloc:=LOC_REGISTER;
  156. in_x86_outportb,
  157. in_x86_outportw,
  158. in_x86_outportl,
  159. in_x86_cli,
  160. in_x86_sti:
  161. expectloc:=LOC_VOID;
  162. { include automatically generated code }
  163. {$i x86mmfirst.inc}
  164. else
  165. Result:=inherited first_cpu;
  166. end;
  167. end;
  168. function tx86inlinenode.first_pi : tnode;
  169. begin
  170. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  171. begin
  172. expectloc:=LOC_FPUREGISTER;
  173. first_pi := nil;
  174. end
  175. else
  176. result:=inherited;
  177. end;
  178. function tx86inlinenode.first_arctan_real : tnode;
  179. begin
  180. {$ifdef i8086}
  181. { FPATAN's range is limited to (0 <= value < 1) on the 8087 and 80287,
  182. so we need to use the RTL helper on these FPUs }
  183. if current_settings.cputype < cpu_386 then
  184. begin
  185. result := inherited;
  186. exit;
  187. end;
  188. {$endif i8086}
  189. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  190. begin
  191. expectloc:=LOC_FPUREGISTER;
  192. first_arctan_real := nil;
  193. end
  194. else
  195. result:=inherited;
  196. end;
  197. function tx86inlinenode.first_abs_real : tnode;
  198. begin
  199. if use_vectorfpu(resultdef) then
  200. expectloc:=LOC_MMREGISTER
  201. else
  202. expectloc:=LOC_FPUREGISTER;
  203. first_abs_real := nil;
  204. end;
  205. function tx86inlinenode.first_sqr_real : tnode;
  206. begin
  207. if use_vectorfpu(resultdef) then
  208. expectloc:=LOC_MMREGISTER
  209. else
  210. expectloc:=LOC_FPUREGISTER;
  211. first_sqr_real := nil;
  212. end;
  213. function tx86inlinenode.first_sqrt_real : tnode;
  214. begin
  215. if use_vectorfpu(resultdef) then
  216. expectloc:=LOC_MMREGISTER
  217. else
  218. expectloc:=LOC_FPUREGISTER;
  219. first_sqrt_real := nil;
  220. end;
  221. function tx86inlinenode.first_ln_real : tnode;
  222. begin
  223. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  224. begin
  225. expectloc:=LOC_FPUREGISTER;
  226. first_ln_real := nil;
  227. end
  228. else
  229. result:=inherited;
  230. end;
  231. function tx86inlinenode.first_cos_real : tnode;
  232. begin
  233. {$ifdef i8086}
  234. { FCOS is 387+ }
  235. if current_settings.cputype < cpu_386 then
  236. begin
  237. result := inherited;
  238. exit;
  239. end;
  240. {$endif i8086}
  241. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  242. begin
  243. expectloc:=LOC_FPUREGISTER;
  244. result:=nil;
  245. end
  246. else
  247. result:=inherited;
  248. end;
  249. function tx86inlinenode.first_sin_real : tnode;
  250. begin
  251. {$ifdef i8086}
  252. { FSIN is 387+ }
  253. if current_settings.cputype < cpu_386 then
  254. begin
  255. result := inherited;
  256. exit;
  257. end;
  258. {$endif i8086}
  259. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  260. begin
  261. expectloc:=LOC_FPUREGISTER;
  262. result:=nil;
  263. end
  264. else
  265. result:=inherited;
  266. end;
  267. function tx86inlinenode.first_round_real : tnode;
  268. begin
  269. maybe_remove_round_trunc_typeconv;
  270. {$ifdef x86_64}
  271. if use_vectorfpu(left.resultdef) then
  272. expectloc:=LOC_REGISTER
  273. else
  274. {$endif x86_64}
  275. expectloc:=LOC_REFERENCE;
  276. result:=nil;
  277. end;
  278. function tx86inlinenode.first_trunc_real: tnode;
  279. begin
  280. maybe_remove_round_trunc_typeconv;
  281. if (cs_opt_size in current_settings.optimizerswitches)
  282. {$ifdef x86_64}
  283. and not(use_vectorfpu(left.resultdef))
  284. {$endif x86_64}
  285. then
  286. result:=inherited
  287. else
  288. begin
  289. {$ifdef x86_64}
  290. if use_vectorfpu(left.resultdef) then
  291. expectloc:=LOC_REGISTER
  292. else
  293. {$endif x86_64}
  294. expectloc:=LOC_REFERENCE;
  295. result:=nil;
  296. end;
  297. end;
  298. function tx86inlinenode.first_popcnt: tnode;
  299. begin
  300. Result:=nil;
  301. {$ifndef i8086}
  302. if (CPUX86_HAS_POPCNT in cpu_capabilities[current_settings.cputype])
  303. {$ifdef i386}
  304. and not is_64bit(left.resultdef)
  305. {$endif i386}
  306. then
  307. expectloc:=LOC_REGISTER
  308. else
  309. {$endif not i8086}
  310. Result:=inherited first_popcnt
  311. end;
  312. function tx86inlinenode.first_fma : tnode;
  313. begin
  314. {$ifndef i8086}
  315. if ((fpu_capabilities[current_settings.fputype]*[FPUX86_HAS_FMA,FPUX86_HAS_FMA4])<>[]) and
  316. ((is_double(resultdef)) or (is_single(resultdef))) then
  317. begin
  318. expectloc:=LOC_MMREGISTER;
  319. Result:=nil;
  320. end
  321. else
  322. {$endif i8086}
  323. Result:=inherited first_fma;
  324. end;
  325. function tx86inlinenode.first_frac_real : tnode;
  326. begin
  327. if (current_settings.fputype>=fpu_sse41) and
  328. ((is_double(resultdef)) or (is_single(resultdef))) then
  329. begin
  330. maybe_remove_round_trunc_typeconv;
  331. expectloc:=LOC_MMREGISTER;
  332. Result:=nil;
  333. end
  334. else
  335. Result:=inherited first_frac_real;
  336. end;
  337. function tx86inlinenode.first_int_real : tnode;
  338. begin
  339. if (current_settings.fputype>=fpu_sse41) and
  340. ((is_double(resultdef)) or (is_single(resultdef))) then
  341. begin
  342. Result:=nil;
  343. expectloc:=LOC_MMREGISTER;
  344. end
  345. else
  346. Result:=inherited first_int_real;
  347. end;
  348. function tx86inlinenode.first_minmax: tnode;
  349. begin
  350. {$ifndef i8086}
  351. if
  352. {$ifdef i386}
  353. ((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
  354. ((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
  355. {$else i386}
  356. ((is_double(resultdef)) or (is_single(resultdef)))
  357. {$endif i386}
  358. then
  359. begin
  360. expectloc:=LOC_MMREGISTER;
  361. Result:=nil;
  362. end
  363. else
  364. {$endif i8086}
  365. if
  366. {$ifndef x86_64}
  367. (CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) and
  368. {$endif x86_64}
  369. (
  370. {$ifdef x86_64}
  371. is_64bitint(resultdef) or
  372. {$endif x86_64}
  373. is_32bitint(resultdef)
  374. ) then
  375. begin
  376. expectloc:=LOC_REGISTER;
  377. Result:=nil;
  378. end
  379. else
  380. Result:=inherited first_minmax;
  381. end;
  382. function tx86inlinenode.simplify(forinline : boolean) : tnode;
  383. var
  384. temp : tnode;
  385. begin
  386. if (current_settings.fputype>=fpu_sse41) and
  387. (inlinenumber=in_int_real) and (left.nodetype=typeconvn) and
  388. not(nf_explicit in left.flags) and
  389. (ttypeconvnode(left).left.resultdef.typ=floatdef) and
  390. ((is_double(ttypeconvnode(left).left.resultdef)) or (is_single(ttypeconvnode(left).left.resultdef))) then
  391. begin
  392. { get rid of the type conversion }
  393. temp:=ttypeconvnode(left).left;
  394. ttypeconvnode(left).left:=nil;
  395. left.free;
  396. left:=temp;
  397. result:=self.getcopy;
  398. tinlinenode(result).resultdef:=temp.resultdef;
  399. typecheckpass(result);
  400. end
  401. else
  402. Result:=inherited simplify(forinline);
  403. end;
  404. procedure tx86inlinenode.pass_generate_code_cpu;
  405. var
  406. paraarray : array[1..4] of tnode;
  407. i : integer;
  408. op: TAsmOp;
  409. procedure inport(dreg:TRegister;dsize:topsize;dtype:tdef);
  410. var
  411. portnumber: tnode;
  412. begin
  413. portnumber:=left;
  414. secondpass(portnumber);
  415. if (portnumber.location.loc=LOC_CONSTANT) and
  416. (portnumber.location.value>=0) and
  417. (portnumber.location.value<=255) then
  418. begin
  419. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  420. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_IN,dsize,portnumber.location.value,dreg));
  421. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  422. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  423. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  424. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  425. end
  426. else
  427. begin
  428. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  429. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  430. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  431. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_IN,dsize,NR_DX,dreg));
  432. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  433. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  434. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  435. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  436. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  437. end;
  438. end;
  439. procedure outport(dreg:TRegister;dsize:topsize;dtype:tdef);
  440. var
  441. portnumber, portdata: tnode;
  442. begin
  443. portnumber:=tcallparanode(tcallparanode(left).right).left;
  444. portdata:=tcallparanode(left).left;
  445. secondpass(portdata);
  446. secondpass(portnumber);
  447. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  448. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portdata.resultdef,dtype,portdata.location,dreg);
  449. if (portnumber.location.loc=LOC_CONSTANT) and
  450. (portnumber.location.value>=0) and
  451. (portnumber.location.value<=255) then
  452. current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_OUT,dsize,dreg,portnumber.location.value))
  453. else
  454. begin
  455. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  456. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  457. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_OUT,dsize,dreg,NR_DX));
  458. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  459. end;
  460. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  461. end;
  462. procedure get_segreg(segreg:tregister);
  463. begin
  464. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  465. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  466. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,TCGSize2OpSize[def_cgsize(resultdef)],segreg,location.register));
  467. end;
  468. function GetConstInt(n: tnode): longint;
  469. begin
  470. Result:=0;
  471. if is_constintnode(n) then
  472. result:=tordconstnode(n).value.svalue
  473. else
  474. Message(type_e_constant_expr_expected);
  475. end;
  476. procedure GetParameters(count: longint);
  477. var
  478. i: longint;
  479. p: tnode;
  480. begin
  481. if (count=1) and
  482. (not (left is tcallparanode)) then
  483. paraarray[1]:=left
  484. else
  485. begin
  486. p:=left;
  487. for i := count downto 1 do
  488. begin
  489. paraarray[i]:=tcallparanode(p).paravalue;
  490. p:=tcallparanode(p).nextpara;
  491. end;
  492. end;
  493. end;
  494. procedure location_force_mmxreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
  495. var
  496. reg : tregister;
  497. begin
  498. if (l.loc<>LOC_MMXREGISTER) and
  499. ((l.loc<>LOC_CMMXREGISTER) or (not maybeconst)) then
  500. begin
  501. reg:=tcgx86(cg).getmmxregister(list);
  502. cg.a_loadmm_loc_reg(list,OS_M64,l,reg,nil);
  503. location_freetemp(list,l);
  504. location_reset(l,LOC_MMXREGISTER,OS_M64);
  505. l.register:=reg;
  506. end;
  507. end;
  508. procedure location_make_ref(var loc: tlocation);
  509. var
  510. hloc: tlocation;
  511. begin
  512. case loc.loc of
  513. LOC_CREGISTER,
  514. LOC_REGISTER:
  515. begin
  516. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  517. hloc.reference.base:=loc.register;
  518. loc:=hloc;
  519. end;
  520. LOC_CREFERENCE,
  521. LOC_REFERENCE:
  522. begin
  523. end;
  524. else
  525. begin
  526. hlcg.location_force_reg(current_asmdata.CurrAsmList,loc,u32inttype,u32inttype,false);
  527. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  528. hloc.reference.base:=loc.register;
  529. loc:=hloc;
  530. end;
  531. end;
  532. end;
  533. begin
  534. FillChar(paraarray,sizeof(paraarray),0);
  535. case inlinenumber of
  536. in_x86_inportb:
  537. inport(NR_AL,S_B,u8inttype);
  538. in_x86_inportw:
  539. inport(NR_AX,S_W,u16inttype);
  540. in_x86_inportl:
  541. inport(NR_EAX,S_L,s32inttype);
  542. in_x86_outportb:
  543. outport(NR_AL,S_B,u8inttype);
  544. in_x86_outportw:
  545. outport(NR_AX,S_W,u16inttype);
  546. in_x86_outportl:
  547. outport(NR_EAX,S_L,s32inttype);
  548. in_x86_cli:
  549. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLI));
  550. in_x86_sti:
  551. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_STI));
  552. in_x86_get_cs:
  553. get_segreg(NR_CS);
  554. in_x86_get_ss:
  555. get_segreg(NR_SS);
  556. in_x86_get_ds:
  557. get_segreg(NR_DS);
  558. in_x86_get_es:
  559. get_segreg(NR_ES);
  560. in_x86_get_fs:
  561. get_segreg(NR_FS);
  562. in_x86_get_gs:
  563. get_segreg(NR_GS);
  564. {$i x86mmsecond.inc}
  565. else
  566. inherited pass_generate_code_cpu;
  567. end;
  568. end;
  569. procedure tx86inlinenode.second_AndOrXorShiftRot_assign;
  570. {$ifndef i8086}
  571. var
  572. opsize : tcgsize;
  573. valuenode, indexnode, loadnode: TNode;
  574. DestReg: TRegister;
  575. {$endif i8086}
  576. begin
  577. {$ifndef i8086}
  578. if (cs_opt_level2 in current_settings.optimizerswitches) then
  579. begin
  580. { Saves on a lot of typecasting and potential coding mistakes }
  581. valuenode := tcallparanode(left).left;
  582. loadnode := tcallparanode(tcallparanode(left).right).left;
  583. opsize := def_cgsize(loadnode.resultdef);
  584. { BMI2 optimisations }
  585. if (CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) and (inlinenumber=in_and_assign_x_y) then
  586. begin
  587. { If the second operand is "((1 shl y) - 1)", we can turn it
  588. into a BZHI operator instead }
  589. if (opsize in [OS_32, OS_S32{$ifdef x86_64}, OS_64, OS_S64{$endif x86_64}]) and
  590. (valuenode.nodetype = subn) and
  591. (taddnode(valuenode).right.nodetype = ordconstn) and
  592. (tordconstnode(taddnode(valuenode).right).value = 1) and
  593. (taddnode(valuenode).left.nodetype = shln) and
  594. (tshlshrnode(taddnode(valuenode).left).left.nodetype = ordconstn) and
  595. (tordconstnode(tshlshrnode(taddnode(valuenode).left).left).value = 1) then
  596. begin
  597. { Skip the subtract and shift nodes completely }
  598. { Helps avoid all the awkward typecasts }
  599. indexnode := tshlshrnode(taddnode(valuenode).left).right;
  600. {$ifdef x86_64}
  601. { The code generator sometimes extends the shift result to 64-bit unnecessarily }
  602. if (indexnode.nodetype = typeconvn) and (opsize in [OS_32, OS_S32]) and
  603. (def_cgsize(TTypeConvNode(indexnode).resultdef) in [OS_64, OS_S64]) then
  604. begin
  605. { Convert to the 32-bit type }
  606. indexnode.resultdef:=loadnode.resultdef;
  607. node_reset_flags(indexnode,[],[tnf_pass1_done]);
  608. { We should't be getting any new errors }
  609. if do_firstpass(indexnode) then
  610. InternalError(2022110202);
  611. { Keep things internally consistent in case indexnode changed }
  612. tshlshrnode(taddnode(valuenode).left).right:=indexnode;
  613. end;
  614. {$endif x86_64}
  615. secondpass(indexnode);
  616. secondpass(loadnode);
  617. { allocate registers }
  618. hlcg.location_force_reg(
  619. current_asmdata.CurrAsmList,
  620. indexnode.location,
  621. indexnode.resultdef,
  622. loadnode.resultdef,
  623. false
  624. );
  625. case loadnode.location.loc of
  626. LOC_REFERENCE,
  627. LOC_CREFERENCE:
  628. begin
  629. { BZHI can only write to a register }
  630. DestReg := cg.getintregister(current_asmdata.CurrAsmList,opsize);
  631. emit_reg_ref_reg(A_BZHI, TCGSize2OpSize[opsize], indexnode.location.register, loadnode.location.reference, DestReg);
  632. emit_reg_ref(A_MOV, TCGSize2OpSize[opsize], DestReg, loadnode.location.reference);
  633. end;
  634. LOC_REGISTER,
  635. LOC_CREGISTER:
  636. emit_reg_reg_reg(A_BZHI, TCGSize2OpSize[opsize], indexnode.location.register, loadnode.location.register, loadnode.location.register);
  637. else
  638. InternalError(2022102120);
  639. end;
  640. Exit;
  641. end;
  642. end;
  643. end;
  644. {$endif not i8086}
  645. inherited second_AndOrXorShiftRot_assign;
  646. end;
  647. procedure tx86inlinenode.second_pi;
  648. begin
  649. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  650. emit_none(A_FLDPI,S_NO);
  651. tcgx86(cg).inc_fpu_stack;
  652. location.register:=NR_FPU_RESULT_REG;
  653. end;
  654. { load the FPU into the an fpu register }
  655. procedure tx86inlinenode.load_fpu_location(lnode: tnode);
  656. begin
  657. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  658. location.register:=NR_FPU_RESULT_REG;
  659. secondpass(lnode);
  660. case lnode.location.loc of
  661. LOC_FPUREGISTER:
  662. ;
  663. LOC_CFPUREGISTER:
  664. begin
  665. cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,lnode.location.size,
  666. lnode.location.size,lnode.location.register,location.register);
  667. end;
  668. LOC_REFERENCE,LOC_CREFERENCE:
  669. begin
  670. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  671. lnode.location.size,lnode.location.size,
  672. lnode.location.reference,location.register);
  673. end;
  674. LOC_MMREGISTER,LOC_CMMREGISTER:
  675. begin
  676. location:=lnode.location;
  677. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,location,lnode.resultdef,false);
  678. end;
  679. else
  680. internalerror(309991);
  681. end;
  682. end;
  683. procedure tx86inlinenode.second_arctan_real;
  684. begin
  685. load_fpu_location(left);
  686. emit_none(A_FLD1,S_NO);
  687. emit_none(A_FPATAN,S_NO);
  688. end;
  689. procedure tx86inlinenode.second_abs_real;
  690. function needs_indirect:boolean; inline;
  691. begin
  692. result:=(tf_supports_packages in target_info.flags) and
  693. (target_info.system in systems_indirect_var_imports);
  694. end;
  695. var
  696. href : treference;
  697. sym : tasmsymbol;
  698. begin
  699. if use_vectorfpu(resultdef) then
  700. begin
  701. secondpass(left);
  702. if left.location.loc<>LOC_MMREGISTER then
  703. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,UseAVX);
  704. if UseAVX then
  705. begin
  706. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  707. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  708. end
  709. else
  710. location:=left.location;
  711. case tfloatdef(resultdef).floattype of
  712. s32real:
  713. begin
  714. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_SINGLE',AT_DATA,needs_indirect);
  715. reference_reset_symbol(href,sym,0,4,[]);
  716. current_module.add_extern_asmsym(sym);
  717. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  718. if UseAVX then
  719. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  720. A_VANDPS,S_XMM,href,left.location.register,location.register))
  721. else
  722. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPS,S_XMM,href,location.register));
  723. end;
  724. s64real:
  725. begin
  726. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_DOUBLE',AT_DATA,needs_indirect);
  727. reference_reset_symbol(href,sym,0,4,[]);
  728. current_module.add_extern_asmsym(sym);
  729. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  730. if UseAVX then
  731. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  732. A_VANDPD,S_XMM,href,left.location.register,location.register))
  733. else
  734. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPD,S_XMM,href,location.register))
  735. end;
  736. else
  737. internalerror(200506081);
  738. end;
  739. end
  740. else
  741. begin
  742. load_fpu_location(left);
  743. emit_none(A_FABS,S_NO);
  744. end;
  745. end;
  746. procedure tx86inlinenode.second_round_real;
  747. begin
  748. {$ifdef x86_64}
  749. if use_vectorfpu(left.resultdef) then
  750. begin
  751. secondpass(left);
  752. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  753. location_reset(location,LOC_REGISTER,OS_S64);
  754. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  755. if UseAVX then
  756. case left.location.size of
  757. OS_F32:
  758. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSS2SI,S_NO,left.location.register,location.register));
  759. OS_F64:
  760. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSD2SI,S_NO,left.location.register,location.register));
  761. else
  762. internalerror(2007031402);
  763. end
  764. else
  765. case left.location.size of
  766. OS_F32:
  767. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSS2SI,S_NO,left.location.register,location.register));
  768. OS_F64:
  769. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSD2SI,S_NO,left.location.register,location.register));
  770. else
  771. internalerror(2007031404);
  772. end;
  773. end
  774. else
  775. {$endif x86_64}
  776. begin
  777. load_fpu_location(left);
  778. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  779. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  780. emit_ref(A_FISTP,S_IQ,location.reference);
  781. tcgx86(cg).dec_fpu_stack;
  782. emit_none(A_FWAIT,S_NO);
  783. end;
  784. end;
  785. procedure tx86inlinenode.second_trunc_real;
  786. var
  787. oldcw,newcw : treference;
  788. begin
  789. {$ifdef x86_64}
  790. if use_vectorfpu(left.resultdef) and
  791. not((left.location.loc=LOC_FPUREGISTER) and (current_settings.fputype>=fpu_sse3)) then
  792. begin
  793. secondpass(left);
  794. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  795. location_reset(location,LOC_REGISTER,OS_S64);
  796. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  797. if UseAVX then
  798. case left.location.size of
  799. OS_F32:
  800. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSS2SI,S_NO,left.location.register,location.register));
  801. OS_F64:
  802. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSD2SI,S_NO,left.location.register,location.register));
  803. else
  804. internalerror(2007031401);
  805. end
  806. else
  807. case left.location.size of
  808. OS_F32:
  809. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSS2SI,S_NO,left.location.register,location.register));
  810. OS_F64:
  811. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSD2SI,S_NO,left.location.register,location.register));
  812. else
  813. internalerror(2007031403);
  814. end;
  815. end
  816. else
  817. {$endif x86_64}
  818. begin
  819. if (current_settings.fputype>=fpu_sse3) then
  820. begin
  821. load_fpu_location(left);
  822. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  823. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  824. emit_ref(A_FISTTP,S_IQ,location.reference);
  825. tcgx86(cg).dec_fpu_stack;
  826. end
  827. else
  828. begin
  829. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,oldcw);
  830. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,newcw);
  831. {$ifdef i8086}
  832. if current_settings.cputype<=cpu_286 then
  833. begin
  834. emit_ref(A_FSTCW,S_NO,newcw);
  835. emit_ref(A_FSTCW,S_NO,oldcw);
  836. emit_none(A_FWAIT,S_NO);
  837. end
  838. else
  839. {$endif i8086}
  840. begin
  841. emit_ref(A_FNSTCW,S_NO,newcw);
  842. emit_ref(A_FNSTCW,S_NO,oldcw);
  843. end;
  844. emit_const_ref(A_OR,S_W,$0f00,newcw);
  845. load_fpu_location(left);
  846. emit_ref(A_FLDCW,S_NO,newcw);
  847. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  848. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  849. emit_ref(A_FISTP,S_IQ,location.reference);
  850. tcgx86(cg).dec_fpu_stack;
  851. emit_ref(A_FLDCW,S_NO,oldcw);
  852. emit_none(A_FWAIT,S_NO);
  853. tg.UnGetTemp(current_asmdata.CurrAsmList,oldcw);
  854. tg.UnGetTemp(current_asmdata.CurrAsmList,newcw);
  855. end;
  856. end;
  857. end;
  858. procedure tx86inlinenode.second_sqr_real;
  859. begin
  860. if use_vectorfpu(resultdef) then
  861. begin
  862. secondpass(left);
  863. location_reset(location,LOC_MMREGISTER,left.location.size);
  864. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  865. if UseAVX then
  866. begin
  867. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  868. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location.register,left.location.register,location.register,mms_movescalar);
  869. end
  870. else
  871. begin
  872. if left.location.loc in [LOC_CFPUREGISTER,LOC_FPUREGISTER] then
  873. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  874. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  875. cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,location.register,location.register,mms_movescalar);
  876. end;
  877. end
  878. else
  879. begin
  880. load_fpu_location(left);
  881. emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
  882. end;
  883. end;
  884. procedure tx86inlinenode.second_sqrt_real;
  885. begin
  886. if use_vectorfpu(resultdef) then
  887. begin
  888. secondpass(left);
  889. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  890. location_reset(location,LOC_MMREGISTER,left.location.size);
  891. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  892. if UseAVX then
  893. case tfloatdef(resultdef).floattype of
  894. s32real:
  895. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  896. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  897. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSS,S_NO,left.location.register,left.location.register,location.register));
  898. s64real:
  899. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  900. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  901. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSD,S_NO,left.location.register,left.location.register,location.register));
  902. else
  903. internalerror(200510031);
  904. end
  905. else
  906. case tfloatdef(resultdef).floattype of
  907. s32real:
  908. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSS,S_NO,left.location.register,location.register));
  909. s64real:
  910. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSD,S_NO,left.location.register,location.register));
  911. else
  912. internalerror(2005100303);
  913. end;
  914. end
  915. else
  916. begin
  917. load_fpu_location(left);
  918. if left.location.loc=LOC_REFERENCE then
  919. tg.ungetiftemp(current_asmdata.CurrAsmList,left.location.reference);
  920. emit_none(A_FSQRT,S_NO);
  921. end;
  922. end;
  923. procedure tx86inlinenode.second_ln_real;
  924. begin
  925. load_fpu_location(left);
  926. emit_none(A_FLDLN2,S_NO);
  927. emit_none(A_FXCH,S_NO);
  928. emit_none(A_FYL2X,S_NO);
  929. end;
  930. procedure tx86inlinenode.second_cos_real;
  931. begin
  932. {$ifdef i8086}
  933. { FCOS is 387+ }
  934. if current_settings.cputype < cpu_386 then
  935. begin
  936. inherited;
  937. exit;
  938. end;
  939. {$endif i8086}
  940. load_fpu_location(left);
  941. emit_none(A_FCOS,S_NO);
  942. end;
  943. procedure tx86inlinenode.second_sin_real;
  944. begin
  945. {$ifdef i8086}
  946. { FSIN is 387+ }
  947. if current_settings.cputype < cpu_386 then
  948. begin
  949. inherited;
  950. exit;
  951. end;
  952. {$endif i8086}
  953. load_fpu_location(left);
  954. emit_none(A_FSIN,S_NO)
  955. end;
  956. procedure tx86inlinenode.second_prefetch;
  957. var
  958. ref : treference;
  959. r : tregister;
  960. checkpointer_used : boolean;
  961. begin
  962. {$if defined(i386) or defined(i8086)}
  963. if current_settings.cputype>=cpu_Pentium3 then
  964. {$endif i386 or i8086}
  965. begin
  966. { do not call Checkpointer for left node }
  967. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  968. if checkpointer_used then
  969. node_change_local_switch(left,cs_checkpointer,false);
  970. secondpass(left);
  971. if checkpointer_used then
  972. node_change_local_switch(left,cs_checkpointer,false);
  973. case left.location.loc of
  974. LOC_CREFERENCE,
  975. LOC_REFERENCE:
  976. begin
  977. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  978. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  979. reference_reset_base(ref,r,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
  980. current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PREFETCHNTA,S_NO,ref));
  981. end;
  982. else
  983. { nothing to prefetch };
  984. end;
  985. end;
  986. end;
  987. procedure tx86inlinenode.second_abs_long;
  988. var
  989. hregister : tregister;
  990. opsize : tcgsize;
  991. hp : taicpu;
  992. hl: TAsmLabel;
  993. begin
  994. {$if defined(i8086) or defined(i386)}
  995. if is_64bitint(resultdef) then
  996. inherited
  997. else if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then
  998. begin
  999. opsize:=def_cgsize(left.resultdef);
  1000. secondpass(left);
  1001. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  1002. location:=left.location;
  1003. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1004. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  1005. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,opsize,tcgsize2size[opsize]*8-1,left.location.register);
  1006. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,opsize,left.location.register,location.register);
  1007. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_SUB,opsize,left.location.register,location.register);
  1008. if cs_check_overflow in current_settings.localswitches then
  1009. begin
  1010. current_asmdata.getjumplabel(hl);
  1011. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl);
  1012. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  1013. cg.a_label(current_asmdata.CurrAsmList,hl);
  1014. end;
  1015. end
  1016. else
  1017. {$endif i8086 or i386}
  1018. begin
  1019. opsize:=def_cgsize(left.resultdef);
  1020. secondpass(left);
  1021. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  1022. hregister:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1023. location:=left.location;
  1024. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1025. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,hregister);
  1026. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  1027. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  1028. emit_reg(A_NEG,tcgsize2opsize[opsize],hregister);
  1029. if cs_check_overflow in current_settings.localswitches then
  1030. begin
  1031. current_asmdata.getjumplabel(hl);
  1032. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl);
  1033. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  1034. cg.a_label(current_asmdata.CurrAsmList,hl);
  1035. end;
  1036. hp:=taicpu.op_reg_reg(A_CMOVcc,tcgsize2opsize[opsize],hregister,location.register);
  1037. hp.condition:=C_NS;
  1038. cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  1039. current_asmdata.CurrAsmList.concat(hp);
  1040. end;
  1041. end;
  1042. {*****************************************************************************
  1043. INCLUDE/EXCLUDE GENERIC HANDLING
  1044. *****************************************************************************}
  1045. procedure tx86inlinenode.second_IncludeExclude;
  1046. var
  1047. hregister,
  1048. hregister2: tregister;
  1049. setbase : aint;
  1050. bitsperop,l : longint;
  1051. cgop : topcg;
  1052. asmop : tasmop;
  1053. opdef : tdef;
  1054. opsize,
  1055. orgsize: tcgsize;
  1056. begin
  1057. {$ifdef i8086}
  1058. { BTS and BTR are 386+ }
  1059. if current_settings.cputype < cpu_386 then
  1060. {$else i8086}
  1061. { bts on memory locations is very slow, so even the default code is faster }
  1062. if not(cs_opt_size in current_settings.optimizerswitches) and (tcallparanode(tcallparanode(left).right).left.expectloc<>LOC_CONSTANT) and
  1063. (tcallparanode(left).left.expectloc=LOC_REFERENCE) then
  1064. {$endif i8086}
  1065. begin
  1066. inherited;
  1067. exit;
  1068. end;
  1069. if is_smallset(tcallparanode(left).resultdef) then
  1070. begin
  1071. opdef:=tcallparanode(left).resultdef;
  1072. opsize:=int_cgsize(opdef.size)
  1073. end
  1074. else
  1075. begin
  1076. opdef:=u32inttype;
  1077. opsize:=OS_32;
  1078. end;
  1079. bitsperop:=(8*tcgsize2size[opsize]);
  1080. secondpass(tcallparanode(left).left);
  1081. secondpass(tcallparanode(tcallparanode(left).right).left);
  1082. setbase:=tsetdef(tcallparanode(left).left.resultdef).setbase;
  1083. if tcallparanode(tcallparanode(left).right).left.location.loc=LOC_CONSTANT then
  1084. begin
  1085. { calculate bit position }
  1086. l:=1 shl ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) mod bitsperop);
  1087. { determine operator }
  1088. if inlinenumber=in_include_x_y then
  1089. cgop:=OP_OR
  1090. else
  1091. begin
  1092. cgop:=OP_AND;
  1093. l:=not(l);
  1094. end;
  1095. case tcallparanode(left).left.location.loc of
  1096. LOC_REFERENCE :
  1097. begin
  1098. inc(tcallparanode(left).left.location.reference.offset,
  1099. ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) div bitsperop)*tcgsize2size[opsize]);
  1100. cg.a_op_const_ref(current_asmdata.CurrAsmList,cgop,opsize,l,tcallparanode(left).left.location.reference);
  1101. end;
  1102. LOC_CSUBSETREG,
  1103. LOC_CREGISTER :
  1104. hlcg.a_op_const_loc(current_asmdata.CurrAsmList,cgop,tcallparanode(left).left.resultdef,l,tcallparanode(left).left.location);
  1105. else
  1106. internalerror(200405022);
  1107. end;
  1108. end
  1109. else
  1110. begin
  1111. orgsize:=opsize;
  1112. if opsize in [OS_8,OS_S8] then
  1113. begin
  1114. opdef:=u32inttype;
  1115. opsize:=OS_32;
  1116. end;
  1117. { determine asm operator }
  1118. if inlinenumber=in_include_x_y then
  1119. asmop:=A_BTS
  1120. else
  1121. asmop:=A_BTR;
  1122. hlcg.location_force_reg(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.location,tcallparanode(tcallparanode(left).right).left.resultdef,opdef,true);
  1123. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.resultdef,tcallparanode(tcallparanode(left).right).left.location,setbase);
  1124. hregister:=tcallparanode(tcallparanode(left).right).left.location.register;
  1125. if tcallparanode(left).left.location.loc=LOC_REFERENCE then
  1126. emit_reg_ref(asmop,tcgsize2opsize[opsize],hregister,tcallparanode(left).left.location.reference)
  1127. else
  1128. begin
  1129. { second argument can't be an 8 bit register either }
  1130. hregister2:=tcallparanode(left).left.location.register;
  1131. if (orgsize in [OS_8,OS_S8]) then
  1132. hregister2:=cg.makeregsize(current_asmdata.CurrAsmList,hregister2,opsize);
  1133. emit_reg_reg(asmop,tcgsize2opsize[opsize],hregister,hregister2);
  1134. end;
  1135. end;
  1136. end;
  1137. procedure tx86inlinenode.second_popcnt;
  1138. var
  1139. opsize: tcgsize;
  1140. begin
  1141. secondpass(left);
  1142. opsize:=tcgsize2unsigned[left.location.size];
  1143. { no 8 Bit popcont }
  1144. if opsize=OS_8 then
  1145. opsize:=OS_16;
  1146. if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE]) or
  1147. (left.location.size<>opsize) then
  1148. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
  1149. location_reset(location,LOC_REGISTER,opsize);
  1150. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1151. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  1152. emit_reg_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.register,location.register)
  1153. else
  1154. emit_ref_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.reference,location.register);
  1155. if resultdef.size=1 then
  1156. begin
  1157. location.size:=OS_8;
  1158. location.register:=cg.makeregsize(current_asmdata.CurrAsmList,location.register,location.size);
  1159. end;
  1160. end;
  1161. procedure tx86inlinenode.second_fma;
  1162. {$ifndef i8086}
  1163. const
  1164. op : array[false..true,false..true,s32real..s64real,0..3] of TAsmOp =
  1165. (
  1166. { positive product }
  1167. (
  1168. { positive third operand }
  1169. ((A_VFMADD231SS,A_VFMADD231SS,A_VFMADD231SS,A_VFMADD213SS),
  1170. (A_VFMADD231SD,A_VFMADD231SD,A_VFMADD231SD,A_VFMADD213SD)
  1171. ),
  1172. { negative third operand }
  1173. ((A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB213SS),
  1174. (A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB213SD)
  1175. )
  1176. ),
  1177. { negative product }
  1178. (
  1179. { positive third operand }
  1180. ((A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD213SS),
  1181. (A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD213SD)
  1182. ),
  1183. { negative third operand }
  1184. ((A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB213SS),
  1185. (A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB213SD)
  1186. )
  1187. )
  1188. );
  1189. var
  1190. paraarray : array[1..3] of tnode;
  1191. memop,
  1192. i : integer;
  1193. negop3,
  1194. negproduct,
  1195. gotmem : boolean;
  1196. {$endif i8086}
  1197. begin
  1198. {$ifndef i8086}
  1199. if (fpu_capabilities[current_settings.fputype]*[FPUX86_HAS_FMA,FPUX86_HAS_FMA4])<>[] then
  1200. begin
  1201. negop3:=false;
  1202. negproduct:=false;
  1203. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  1204. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1205. paraarray[3]:=tcallparanode(parameters).paravalue;
  1206. { check if a neg. node can be removed
  1207. this is possible because changing the sign of
  1208. a floating point number does not affect its absolute
  1209. value in any way
  1210. }
  1211. if paraarray[1].nodetype=unaryminusn then
  1212. begin
  1213. paraarray[1]:=tunarynode(paraarray[1]).left;
  1214. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1215. only no code is generated for it }
  1216. negproduct:=not(negproduct);
  1217. end;
  1218. if paraarray[2].nodetype=unaryminusn then
  1219. begin
  1220. paraarray[2]:=tunarynode(paraarray[2]).left;
  1221. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1222. only no code is generated for it }
  1223. negproduct:=not(negproduct);
  1224. end;
  1225. if paraarray[3].nodetype=unaryminusn then
  1226. begin
  1227. paraarray[3]:=tunarynode(paraarray[3]).left;
  1228. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1229. only no code is generated for it }
  1230. negop3:=true;
  1231. end;
  1232. for i:=1 to 3 do
  1233. secondpass(paraarray[i]);
  1234. { only one memory operand is allowed }
  1235. gotmem:=false;
  1236. memop:=0;
  1237. { in case parameters come on the FPU stack, we have to pop them in reverse order as we
  1238. called secondpass }
  1239. for i:=3 downto 1 do
  1240. begin
  1241. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1242. begin
  1243. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1244. begin
  1245. memop:=i;
  1246. gotmem:=true;
  1247. end
  1248. else
  1249. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1250. end;
  1251. end;
  1252. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1253. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1254. if gotmem then
  1255. begin
  1256. case memop of
  1257. 1:
  1258. begin
  1259. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1260. paraarray[3].location.register,location.register,mms_movescalar);
  1261. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1262. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1263. end;
  1264. 2:
  1265. begin
  1266. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1267. paraarray[3].location.register,location.register,mms_movescalar);
  1268. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1269. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1270. end;
  1271. 3:
  1272. begin
  1273. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1274. paraarray[1].location.register,location.register,mms_movescalar);
  1275. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1276. paraarray[3].location.reference,paraarray[2].location.register,location.register);
  1277. end
  1278. else
  1279. internalerror(2014041301);
  1280. end;
  1281. end
  1282. else
  1283. begin
  1284. { try to use the location which is already in a temp. mm register as destination,
  1285. so the compiler might be able to re-use the register }
  1286. if paraarray[1].location.loc=LOC_MMREGISTER then
  1287. begin
  1288. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1289. paraarray[1].location.register,location.register,mms_movescalar);
  1290. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1291. paraarray[3].location.register,paraarray[2].location.register,location.register);
  1292. end
  1293. else if paraarray[2].location.loc=LOC_MMREGISTER then
  1294. begin
  1295. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1296. paraarray[2].location.register,location.register,mms_movescalar);
  1297. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1298. paraarray[3].location.register,paraarray[1].location.register,location.register);
  1299. end
  1300. else
  1301. begin
  1302. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1303. paraarray[3].location.register,location.register,mms_movescalar);
  1304. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,0],S_NO,
  1305. paraarray[1].location.register,paraarray[2].location.register,location.register);
  1306. end;
  1307. end;
  1308. end
  1309. else
  1310. {$endif i8086}
  1311. internalerror(2014032301);
  1312. end;
  1313. procedure tx86inlinenode.second_frac_real;
  1314. var
  1315. extrareg : TRegister;
  1316. begin
  1317. if use_vectorfpu(resultdef) then
  1318. begin
  1319. secondpass(left);
  1320. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1321. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1322. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1323. if UseAVX then
  1324. case tfloatdef(left.resultdef).floattype of
  1325. s32real:
  1326. begin
  1327. {$ifndef i8086}
  1328. if UseAVX512 and (FPUX86_HAS_AVX512DQ in fpu_capabilities[current_settings.fputype]) then
  1329. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VREDUCESS,S_NO,3,left.location.register,left.location.register,location.register))
  1330. else
  1331. {$endif not i8086}
  1332. begin
  1333. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1334. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1335. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSS,S_NO,location.register,left.location.register,location.register));
  1336. end;
  1337. end;
  1338. s64real:
  1339. begin
  1340. {$ifndef i8086}
  1341. if UseAVX512 and (FPUX86_HAS_AVX512DQ in fpu_capabilities[current_settings.fputype]) then
  1342. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VREDUCESD,S_NO,3,left.location.register,left.location.register,location.register))
  1343. else
  1344. {$endif not i8086}
  1345. begin
  1346. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1347. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1348. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSD,S_NO,location.register,left.location.register,location.register));
  1349. end;
  1350. end;
  1351. else
  1352. internalerror(2017052102);
  1353. end
  1354. else
  1355. begin
  1356. extrareg:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1357. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  1358. case tfloatdef(left.resultdef).floattype of
  1359. s32real:
  1360. begin
  1361. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,extrareg));
  1362. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSS,S_NO,extrareg,location.register));
  1363. end;
  1364. s64real:
  1365. begin
  1366. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,extrareg));
  1367. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSD,S_NO,extrareg,location.register));
  1368. end;
  1369. else
  1370. internalerror(2017052103);
  1371. end;
  1372. end;
  1373. if tfloatdef(left.resultdef).floattype<>tfloatdef(resultdef).floattype then
  1374. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,left.resultdef,resultdef,location.register,location.register,mms_movescalar);
  1375. end
  1376. else
  1377. internalerror(2017052101);
  1378. end;
  1379. procedure tx86inlinenode.second_int_real;
  1380. begin
  1381. if use_vectorfpu(resultdef) then
  1382. begin
  1383. secondpass(left);
  1384. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1385. location_reset(location,LOC_MMREGISTER,left.location.size);
  1386. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1387. if UseAVX then
  1388. case tfloatdef(resultdef).floattype of
  1389. s32real:
  1390. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1391. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1392. s64real:
  1393. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1394. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1395. else
  1396. internalerror(2017052105);
  1397. end
  1398. else
  1399. begin
  1400. case tfloatdef(resultdef).floattype of
  1401. s32real:
  1402. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,location.register));
  1403. s64real:
  1404. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,location.register));
  1405. else
  1406. internalerror(2017052106);
  1407. end;
  1408. end;
  1409. end
  1410. else
  1411. internalerror(2017052107);
  1412. end;
  1413. procedure tx86inlinenode.second_high;
  1414. var
  1415. donelab: tasmlabel;
  1416. hregister : tregister;
  1417. href : treference;
  1418. begin
  1419. secondpass(left);
  1420. if not(is_dynamic_array(left.resultdef)) then
  1421. Internalerror(2019122809);
  1422. { length in dynamic arrays is at offset -sizeof(pint) }
  1423. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  1424. current_asmdata.getjumplabel(donelab);
  1425. { by subtracting 1 here, we get the -1 into the register we need if the dyn. array is nil and the carry
  1426. flag is set in this case, so we can jump depending on it
  1427. when loading the actual high value, we have to take care later of the decreased value
  1428. do not use the cgs, as they might emit dec instead of a sub instruction, however with dec the trick
  1429. we are using is not working as dec does not touch the carry flag }
  1430. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_SUB,TCGSize2OpSize[def_cgsize(left.resultdef)],1,left.location.register));
  1431. { volatility of the dyn. array refers to the volatility of the
  1432. string pointer, not of the string data }
  1433. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_C,donelab);
  1434. hlcg.reference_reset_base(href,left.resultdef,left.location.register,-ossinttype.size+1,ctempposinvalid,ossinttype.alignment,[]);
  1435. { if the string pointer is nil, the length is 0 -> reuse the register
  1436. that originally held the string pointer for the length, so that we
  1437. can keep the original nil/0 as length in that case }
  1438. hregister:=cg.makeregsize(current_asmdata.CurrAsmList,left.location.register,def_cgsize(resultdef));
  1439. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,ossinttype,resultdef,href,hregister);
  1440. cg.a_label(current_asmdata.CurrAsmList,donelab);
  1441. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  1442. location.register:=hregister;
  1443. end;
  1444. procedure tx86inlinenode.second_minmax;
  1445. {$ifndef i8086}
  1446. const
  1447. oparray : array[false..true,false..true,s32real..s64real] of TAsmOp =
  1448. (
  1449. (
  1450. (A_MINSS,A_MINSD),
  1451. (A_VMINSS,A_VMINSD)
  1452. ),
  1453. (
  1454. (A_MAXSS,A_MAXSD),
  1455. (A_VMAXSS,A_VMAXSD)
  1456. )
  1457. );
  1458. {$endif i8086}
  1459. var
  1460. {$ifndef i8086}
  1461. memop : integer;
  1462. gotmem : boolean;
  1463. op: TAsmOp;
  1464. {$endif i8086}
  1465. i : integer;
  1466. paraarray : array[1..2] of tnode;
  1467. instr: TAiCpu;
  1468. opsize: topsize;
  1469. finalval: TCgInt;
  1470. tmpreg: TRegister;
  1471. begin
  1472. {$ifndef i8086}
  1473. if
  1474. {$ifdef i386}
  1475. ((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
  1476. ((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
  1477. {$else i386}
  1478. is_single(resultdef) or is_double(resultdef)
  1479. {$endif i386}
  1480. then
  1481. begin
  1482. paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1483. paraarray[2]:=tcallparanode(parameters).paravalue;
  1484. for i:=low(paraarray) to high(paraarray) do
  1485. secondpass(paraarray[i]);
  1486. { only one memory operand is allowed }
  1487. gotmem:=false;
  1488. memop:=0;
  1489. for i:=low(paraarray) to high(paraarray) do
  1490. begin
  1491. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1492. begin
  1493. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1494. begin
  1495. memop:=i;
  1496. gotmem:=true;
  1497. end
  1498. else
  1499. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1500. end;
  1501. end;
  1502. { due to min/max behaviour that it loads always the second operand (must be the else assignment) into destination if
  1503. one of the operands is a NaN, we cannot swap operands to omit a mova operation in case fastmath is off }
  1504. if not(cs_opt_fastmath in current_settings.optimizerswitches) and gotmem and (memop=1) then
  1505. begin
  1506. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[1].location,paraarray[1].resultdef,true);
  1507. gotmem:=false;
  1508. end;
  1509. op:=oparray[inlinenumber in [in_max_single,in_max_double],UseAVX,tfloatdef(resultdef).floattype];
  1510. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1511. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1512. if gotmem then
  1513. begin
  1514. if UseAVX then
  1515. case memop of
  1516. 1:
  1517. emit_ref_reg_reg(op,S_NO,
  1518. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1519. 2:
  1520. emit_ref_reg_reg(op,S_NO,
  1521. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1522. else
  1523. internalerror(2020120504);
  1524. end
  1525. else
  1526. case memop of
  1527. 1:
  1528. begin
  1529. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1530. paraarray[2].location.register,location.register,mms_movescalar);
  1531. emit_ref_reg(op,S_NO,
  1532. paraarray[1].location.reference,location.register);
  1533. end;
  1534. 2:
  1535. begin
  1536. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1537. paraarray[1].location.register,location.register,mms_movescalar);
  1538. emit_ref_reg(op,S_NO,
  1539. paraarray[2].location.reference,location.register);
  1540. end;
  1541. else
  1542. internalerror(2020120601);
  1543. end;
  1544. end
  1545. else
  1546. begin
  1547. if UseAVX then
  1548. emit_reg_reg_reg(op,S_NO,
  1549. paraarray[2].location.register,paraarray[1].location.register,location.register)
  1550. else
  1551. begin
  1552. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1553. paraarray[1].location.register,location.register,mms_movescalar);
  1554. emit_reg_reg(op,S_NO,
  1555. paraarray[2].location.register,location.register)
  1556. end;
  1557. end;
  1558. end
  1559. else
  1560. {$endif i8086}
  1561. if
  1562. {$ifndef x86_64}
  1563. (CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) and
  1564. {$endif x86_64}
  1565. (
  1566. {$ifdef x86_64}
  1567. is_64bitint(resultdef) or
  1568. {$endif x86_64}
  1569. is_32bitint(resultdef)
  1570. ) then
  1571. begin
  1572. { paraarray[1] is the right-hand side }
  1573. paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1574. paraarray[2]:=tcallparanode(parameters).paravalue;
  1575. for i:=low(paraarray) to high(paraarray) do
  1576. secondpass(paraarray[i]);
  1577. if paraarray[2].location.loc = LOC_CONSTANT then
  1578. begin
  1579. { Swap the parameters so the constant is on the right }
  1580. paraarray[2]:=paraarray[1];
  1581. paraarray[1]:=tcallparanode(parameters).paravalue;
  1582. end;
  1583. if not(paraarray[1].location.loc in [LOC_CONSTANT,LOC_REFERENCE,LOC_CREFERENCE,LOC_REGISTER,LOC_CREGISTER]) then
  1584. hlcg.location_force_reg(current_asmdata.CurrAsmList,paraarray[1].location,
  1585. paraarray[1].resultdef,paraarray[1].resultdef,true);
  1586. if not(paraarray[2].location.loc in [LOC_REFERENCE,LOC_CREFERENCE,LOC_REGISTER,LOC_CREGISTER]) then
  1587. hlcg.location_force_reg(current_asmdata.CurrAsmList,paraarray[2].location,
  1588. paraarray[2].resultdef,paraarray[2].resultdef,true);
  1589. location_reset(location,LOC_REGISTER,paraarray[1].location.size);
  1590. location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
  1591. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,paraarray[1].location,location.register);
  1592. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  1593. {$ifdef x86_64}
  1594. if is_64bitint(resultdef) then
  1595. opsize := S_Q
  1596. else
  1597. {$endif x86_64}
  1598. opsize := S_L;
  1599. { Try to use references as is, unless they would trigger internal
  1600. error 200502052 }
  1601. if (cs_create_pic in current_settings.moduleswitches) and
  1602. (paraarray[1].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and
  1603. Assigned(paraarray[1].location.reference.symbol) then
  1604. hlcg.location_force_reg(current_asmdata.CurrAsmList,paraarray[1].location,
  1605. paraarray[1].resultdef,paraarray[1].resultdef,true);
  1606. { Try to use references as is, unless they would trigger internal
  1607. error 200502052 }
  1608. if (cs_create_pic in current_settings.moduleswitches) and
  1609. (paraarray[2].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and
  1610. Assigned(paraarray[2].location.reference.symbol) then
  1611. hlcg.location_force_reg(current_asmdata.CurrAsmList,paraarray[2].location,
  1612. paraarray[2].resultdef,paraarray[2].resultdef,true);
  1613. case paraarray[1].location.loc of
  1614. LOC_CONSTANT:
  1615. case paraarray[2].location.loc of
  1616. LOC_REFERENCE,LOC_CREFERENCE:
  1617. begin
  1618. {$ifdef x86_64}
  1619. { x86_64 only supports signed 32 bits constants directly }
  1620. if (opsize=S_Q) and
  1621. ((paraarray[1].location.value<low(longint)) or (paraarray[1].location.value>high(longint))) then
  1622. begin
  1623. tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  1624. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,paraarray[1].location.value,tmpreg);
  1625. emit_reg_ref(A_CMP,opsize,tmpreg,paraarray[2].location.reference);
  1626. end
  1627. else
  1628. {$endif x86_64}
  1629. emit_const_ref(A_CMP,opsize,paraarray[1].location.value,paraarray[2].location.reference);
  1630. emit_ref_reg(A_CMOVcc,opsize,paraarray[2].location.reference,location.register);
  1631. instr:=TAiCpu(current_asmdata.CurrAsmList.Last); { The instruction just inserted; we need to modify its condition below }
  1632. end;
  1633. LOC_REGISTER,LOC_CREGISTER:
  1634. begin
  1635. {$ifdef x86_64}
  1636. { x86_64 only supports signed 32 bits constants directly }
  1637. if (opsize=S_Q) and
  1638. ((paraarray[1].location.value<low(longint)) or (paraarray[1].location.value>high(longint))) then
  1639. begin
  1640. tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  1641. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,paraarray[1].location.value,tmpreg);
  1642. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,opsize,
  1643. tmpreg,paraarray[2].location.register));
  1644. end
  1645. else
  1646. {$endif x86_64}
  1647. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,opsize,
  1648. paraarray[1].location.value,paraarray[2].location.register));
  1649. instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,paraarray[2].location.register,location.register);
  1650. current_asmdata.CurrAsmList.concat(instr); { We need to modify the instruction's condition below }
  1651. end;
  1652. else
  1653. InternalError(2021121907);
  1654. end;
  1655. LOC_REFERENCE,LOC_CREFERENCE:
  1656. case paraarray[2].location.loc of
  1657. LOC_REFERENCE,LOC_CREFERENCE:
  1658. begin
  1659. { The reference has already been stored at location.register, so use that }
  1660. emit_reg_ref(A_CMP,opsize,location.register,paraarray[2].location.reference);
  1661. emit_ref_reg(A_CMOVcc,opsize,paraarray[2].location.reference,location.register);
  1662. instr:=TAiCpu(current_asmdata.CurrAsmList.Last); { The instruction just inserted; we need to modify its condition below }
  1663. end;
  1664. LOC_REGISTER,LOC_CREGISTER:
  1665. begin
  1666. emit_ref_reg(A_CMP,opsize,paraarray[1].location.reference,paraarray[2].location.register);
  1667. instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,paraarray[2].location.register,location.register);
  1668. current_asmdata.CurrAsmList.concat(instr); { We need to modify the instruction's condition below }
  1669. end;
  1670. else
  1671. InternalError(2021121906);
  1672. end;
  1673. LOC_REGISTER,LOC_CREGISTER:
  1674. case paraarray[2].location.loc of
  1675. LOC_REFERENCE,LOC_CREFERENCE:
  1676. begin
  1677. emit_reg_ref(A_CMP,opsize,paraarray[1].location.register,paraarray[2].location.reference);
  1678. emit_ref_reg(A_CMOVcc,opsize,paraarray[2].location.reference,location.register);
  1679. instr:=TAiCpu(current_asmdata.CurrAsmList.Last); { The instruction just inserted; we need to modify its condition below }
  1680. end;
  1681. LOC_REGISTER,LOC_CREGISTER:
  1682. begin
  1683. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,opsize,
  1684. paraarray[1].location.register,paraarray[2].location.register));
  1685. instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,paraarray[2].location.register,location.register);
  1686. current_asmdata.CurrAsmList.concat(instr); { We need to modify the instruction's condition below }
  1687. end;
  1688. else
  1689. InternalError(2021121905);
  1690. end;
  1691. else
  1692. InternalError(2021121904);
  1693. end;
  1694. case inlinenumber of
  1695. in_min_longint,
  1696. in_min_int64:
  1697. instr.condition := C_L;
  1698. in_min_dword,
  1699. in_min_qword:
  1700. instr.condition := C_B;
  1701. in_max_longint,
  1702. in_max_int64:
  1703. instr.condition := C_G;
  1704. in_max_dword,
  1705. in_max_qword:
  1706. instr.condition := C_A;
  1707. else
  1708. Internalerror(2021121903);
  1709. end;
  1710. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  1711. end
  1712. else
  1713. internalerror(2020120503);
  1714. end;
  1715. end.