nllvminl.pas 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448
  1. {
  2. Copyright (c) 2014 by Jonas Maebe
  3. Generate LLVM bytecode for inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nllvminl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,
  22. ncginl;
  23. type
  24. tllvminlinenode = class(tcginlinenode)
  25. protected
  26. procedure maybe_remove_round_trunc_typeconv;
  27. function first_get_frame: tnode; override;
  28. function first_abs_real: tnode; override;
  29. function first_bitscan: tnode; override;
  30. function first_fma: tnode; override;
  31. function first_sqr_real: tnode; override;
  32. function first_sqrt_real: tnode; override;
  33. function first_trunc_real: tnode; override;
  34. function first_popcnt: tnode; override;
  35. public
  36. procedure second_length; override;
  37. procedure second_high; override;
  38. procedure second_sqr_real; override;
  39. procedure second_trunc_real; override;
  40. end;
  41. implementation
  42. uses
  43. verbose,globals,globtype,constexp,cutils,
  44. aasmbase, aasmdata,
  45. symconst,symtype,symdef,defutil,
  46. compinnr,
  47. nutils,nadd,nbas,ncal,ncnv,ncon,nflw,ninl,nld,nmat,
  48. pass_2,
  49. cgbase,cgutils,tgobj,hlcgobj,
  50. cpubase,
  51. llvmbase,aasmllvm;
  52. procedure tllvminlinenode.maybe_remove_round_trunc_typeconv;
  53. var
  54. temp: tnode;
  55. begin
  56. { the prototype of trunc()/round() in the system unit is declared
  57. with valreal as parameter type, so the argument will always be
  58. extended -> remove the typeconversion to extended if any; not done
  59. in ninl, because there are other code generators that assume that
  60. the parameter to trunc has been converted to valreal (e.g. PowerPC).
  61. (copy from code in nx64inl, should be refactored)
  62. }
  63. if (left.nodetype=typeconvn) and
  64. not(nf_explicit in left.flags) and
  65. (ttypeconvnode(left).left.resultdef.typ=floatdef) then
  66. begin
  67. { get rid of the type conversion, so the use_vectorfpu will be
  68. applied to the original type }
  69. temp:=ttypeconvnode(left).left;
  70. ttypeconvnode(left).left:=nil;
  71. left.free;
  72. left:=temp;
  73. end;
  74. end;
  75. function tllvminlinenode.first_get_frame: tnode;
  76. begin
  77. result:=ccallnode.createintern('llvm_frameaddress',
  78. ccallparanode.create(genintconstnode(0),nil));
  79. end;
  80. { in general, generate regular expression rather than intrinsics: according
  81. to the "Performance Tips for Frontend Authors", "The optimizer is quite
  82. good at reasoning about general control flow and arithmetic, it is not
  83. anywhere near as strong at reasoning about the various intrinsics. If
  84. profitable for code generation purposes, the optimizer will likely form
  85. the intrinsics itself late in the optimization pipeline." }
  86. function tllvminlinenode.first_abs_real: tnode;
  87. var
  88. lefttemp,
  89. resulttemp: ttempcreatenode;
  90. stat: tstatementnode;
  91. begin
  92. result:=internalstatements(stat);
  93. lefttemp:=ctempcreatenode.create(left.resultdef,left.resultdef.size,tt_persistent,true);
  94. { assigned twice -> will be spilled if put in register }
  95. resulttemp:=ctempcreatenode.create(resultdef,resultdef.size,tt_persistent,false);
  96. addstatement(stat,lefttemp);
  97. addstatement(stat,resulttemp);
  98. { lefttemp:=left }
  99. addstatement(stat,
  100. cassignmentnode.create(ctemprefnode.create(lefttemp),left)
  101. );
  102. { if lefttemp>=0 then
  103. resulttemp:=lefttemp
  104. else
  105. resulttemp:=-lefttemp
  106. }
  107. addstatement(stat,
  108. cifnode.create(
  109. caddnode.create(
  110. gten,
  111. ctemprefnode.create(lefttemp),
  112. crealconstnode.create(0.0,left.resultdef)
  113. ),
  114. cassignmentnode.create(
  115. ctemprefnode.create(resulttemp),
  116. ctemprefnode.create(lefttemp)
  117. ),
  118. cassignmentnode.create(
  119. ctemprefnode.create(resulttemp),
  120. cunaryminusnode.create(ctemprefnode.create(lefttemp))
  121. )
  122. )
  123. );
  124. addstatement(stat,ctempdeletenode.create(lefttemp));
  125. addstatement(stat,ctempdeletenode.create_normal_temp(resulttemp));
  126. { return resulttemp }
  127. addstatement(stat,ctemprefnode.create(resulttemp));
  128. { reused }
  129. left:=nil;
  130. end;
  131. function tllvminlinenode.first_bitscan: tnode;
  132. var
  133. leftdef: tdef;
  134. resulttemp,
  135. lefttemp: ttempcreatenode;
  136. stat: tstatementnode;
  137. block: tblocknode;
  138. cntresult: tnode;
  139. procname: string[15];
  140. begin
  141. {
  142. if left<>0 then
  143. result:=llvm_ctlz/cttz(unsigned(left),true)
  144. else
  145. result:=255;
  146. }
  147. if inlinenumber=in_bsr_x then
  148. procname:='LLVM_CTLZ'
  149. else
  150. procname:='LLVM_CTTZ';
  151. leftdef:=left.resultdef;
  152. block:=internalstatements(stat);
  153. resulttemp:=ctempcreatenode.create(resultdef,resultdef.size,tt_persistent,false);
  154. addstatement(stat,resulttemp);
  155. lefttemp:=maybereplacewithtemp(left,block,stat,left.resultdef.size,true);
  156. cntresult:=
  157. ccallnode.createintern(
  158. procname,
  159. ccallparanode.create(cordconstnode.create(1,llvmbool1type,false),
  160. ccallparanode.create(
  161. ctypeconvnode.create_explicit(left,get_unsigned_inttype(leftdef)),nil
  162. )
  163. )
  164. );
  165. { ctlz returns the number of leading zero bits, while bsr returns the bit
  166. number of the first non-zero bit (with the least significant bit as 0)
  167. -> invert result }
  168. if inlinenumber=in_bsr_x then
  169. begin
  170. cntresult:=
  171. caddnode.create(xorn,
  172. cntresult,
  173. genintconstnode(leftdef.size*8-1)
  174. );
  175. end;
  176. addstatement(stat,
  177. cifnode.create(caddnode.create(unequaln,left.getcopy,genintconstnode(0)),
  178. cassignmentnode.create(
  179. ctemprefnode.create(resulttemp),
  180. cntresult
  181. ),
  182. cassignmentnode.create(
  183. ctemprefnode.create(resulttemp),
  184. genintconstnode(255)
  185. )
  186. )
  187. );
  188. if assigned(lefttemp) then
  189. addstatement(stat,ctempdeletenode.create(lefttemp));
  190. addstatement(stat,ctempdeletenode.create_normal_temp(resulttemp));
  191. addstatement(stat,ctemprefnode.create(resulttemp));
  192. left:=nil;
  193. result:=block;
  194. end;
  195. function tllvminlinenode.first_fma: tnode;
  196. var
  197. procname: string[40];
  198. begin
  199. if cs_opt_fastmath in current_settings.optimizerswitches then
  200. begin
  201. case inlinenumber of
  202. in_fma_single:
  203. procname:='llvm_fma_f32';
  204. in_fma_double:
  205. procname:='llvm_fma_f64';
  206. in_fma_extended:
  207. procname:='llvm_fma_f80';
  208. in_fma_float128:
  209. procname:='llvm_fma_f128';
  210. else
  211. internalerror(2018122101);
  212. end;
  213. result:=ccallnode.createintern(procname,left);
  214. end
  215. else
  216. begin
  217. case inlinenumber of
  218. in_fma_single,
  219. in_fma_double,
  220. in_fma_extended,
  221. in_fma_float128:
  222. procname:='LLVM_EXPERIMENTAL_CONSTRAINED_FMA';
  223. else
  224. internalerror(2019122811);
  225. end;
  226. result:=ccallnode.createintern(procname,
  227. ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('fpexcept.strict'),length('fpexcept.strict'),llvm_metadatatype),
  228. ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
  229. left
  230. )
  231. )
  232. );
  233. end;
  234. left:=nil;
  235. end;
  236. function tllvminlinenode.first_sqr_real: tnode;
  237. begin
  238. result:=nil;
  239. if use_vectorfpu(left.resultdef) then
  240. expectloc:=LOC_MMREGISTER
  241. else
  242. expectloc:=LOC_FPUREGISTER;
  243. end;
  244. function tllvminlinenode.first_sqrt_real: tnode;
  245. var
  246. intrinsic: string[40];
  247. begin
  248. if left.resultdef.typ<>floatdef then
  249. internalerror(2018121601);
  250. if cs_opt_fastmath in current_settings.optimizerswitches then
  251. begin
  252. case tfloatdef(left.resultdef).floattype of
  253. s32real:
  254. intrinsic:='llvm_sqrt_f32';
  255. s64real:
  256. intrinsic:='llvm_sqrt_f64';
  257. s80real,sc80real:
  258. intrinsic:='llvm_sqrt_f80';
  259. s128real:
  260. intrinsic:='llvm_sqrt_f128';
  261. else
  262. internalerror(2018121602);
  263. end;
  264. result:=ccallnode.createintern(intrinsic, ccallparanode.create(left,nil));
  265. end
  266. else
  267. begin
  268. case tfloatdef(left.resultdef).floattype of
  269. s32real,
  270. s64real,
  271. s80real,sc80real,
  272. s128real:
  273. intrinsic:='LLVM_EXPERIMENTAL_CONSTRAINED_SQRT';
  274. else
  275. internalerror(2019122810);
  276. end;
  277. result:=ccallnode.createintern(intrinsic,
  278. ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('fpexcept.strict'),length('fpexcept.strict'),llvm_metadatatype),
  279. ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
  280. ccallparanode.create(left,nil)
  281. )
  282. )
  283. );
  284. end;
  285. left:=nil;
  286. end;
  287. function tllvminlinenode.first_trunc_real: tnode;
  288. begin
  289. { fptosi is undefined if the value is out of range -> only generate
  290. in cast of fastmath }
  291. if cs_opt_fastmath in current_settings.optimizerswitches then
  292. begin
  293. maybe_remove_round_trunc_typeconv;
  294. expectloc:=LOC_REGISTER;
  295. result:=nil;
  296. end
  297. else
  298. result:=inherited;
  299. end;
  300. function tllvminlinenode.first_popcnt: tnode;
  301. begin
  302. result:=ctypeconvnode.create(ccallnode.createintern('LLVM_CTPOP', ccallparanode.create(left,nil)),resultdef);
  303. left:=nil;
  304. end;
  305. procedure tllvminlinenode.second_length;
  306. var
  307. hreg: tregister;
  308. begin
  309. second_high;
  310. { Dynamic arrays do not have their length attached but their maximum index }
  311. if is_dynamic_array(left.resultdef) then
  312. begin
  313. hreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  314. hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_ADD,resultdef,1,location.register,hreg);
  315. location.register:=hreg;
  316. end;
  317. end;
  318. procedure tllvminlinenode.second_high;
  319. var
  320. lengthlab, nillab: tasmlabel;
  321. hregister: tregister;
  322. href: treference;
  323. lendef: tdef;
  324. begin
  325. secondpass(left);
  326. if is_shortstring(left.resultdef) then
  327. begin
  328. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  329. internalerror(2014080806);
  330. { typecast the shortstring reference into a length byte reference }
  331. location_reset_ref(location,left.location.loc,def_cgsize(resultdef),left.location.reference.alignment,left.location.reference.volatility);
  332. hregister:=hlcg.getaddressregister(current_asmdata.CurrAsmList,cpointerdef.getreusable(resultdef));
  333. hlcg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.resultdef,cpointerdef.getreusable(resultdef),left.location.reference,hregister);
  334. hlcg.reference_reset_base(location.reference,cpointerdef.getreusable(resultdef),hregister,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
  335. end
  336. else
  337. begin
  338. { length in ansi/wide strings and high in dynamic arrays is at offset
  339. -sizeof(sizeint), for widestrings it's at -4 }
  340. if is_widestring(left.resultdef) then
  341. lendef:=u32inttype
  342. else
  343. lendef:=ossinttype;
  344. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,
  345. left.resultdef,cpointerdef.getreusable(lendef),true);
  346. current_asmdata.getjumplabel(nillab);
  347. current_asmdata.getjumplabel(lengthlab);
  348. hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,cpointerdef.getreusable(lendef),OC_EQ,0,left.location.register,nillab);
  349. { volatility of the ansistring/widestring refers to the volatility of the
  350. string pointer, not of the string data }
  351. hlcg.reference_reset_base(href,cpointerdef.getreusable(lendef),left.location.register,-lendef.size,ctempposinvalid,lendef.alignment,[]);
  352. hregister:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  353. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,lendef,resultdef,href,hregister);
  354. if is_widestring(left.resultdef) then
  355. hlcg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,resultdef,1,hregister);
  356. hlcg.a_jmp_always(current_asmdata.CurrAsmList,lengthlab);
  357. hlcg.a_label(current_asmdata.CurrAsmList,nillab);
  358. if is_dynamic_array(left.resultdef) then
  359. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,-1,hregister)
  360. else
  361. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,0,hregister);
  362. hlcg.a_label(current_asmdata.CurrAsmList,lengthlab);
  363. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  364. location.register:=hregister;
  365. end;
  366. end;
  367. procedure tllvminlinenode.second_sqr_real;
  368. begin
  369. secondpass(left);
  370. location.loc:=expectloc;
  371. if expectloc=LOC_MMREGISTER then
  372. begin
  373. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  374. location.register:=hlcg.getmmregister(current_asmdata.CurrAsmList,resultdef);
  375. end
  376. else
  377. begin
  378. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  379. location.register:=hlcg.getfpuregister(current_asmdata.CurrAsmList,resultdef);
  380. end;
  381. current_asmdata.CurrAsmList.concat(
  382. taillvm.op_reg_size_reg_reg(la_fmul,
  383. location.register,resultdef,
  384. left.location.register,left.location.register
  385. )
  386. );
  387. end;
  388. procedure tllvminlinenode.second_trunc_real;
  389. begin
  390. secondpass(left);
  391. if use_vectorfpu(left.resultdef) then
  392. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true)
  393. else
  394. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  395. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  396. location.register:=hlcg.getregisterfordef(current_asmdata.CurrAsmList,resultdef);
  397. current_asmdata.CurrAsmList.concat(
  398. taillvm.op_reg_size_reg_size(la_fptosi,location.register,left.resultdef,left.location.register,resultdef)
  399. );
  400. end;
  401. begin
  402. cinlinenode:=tllvminlinenode;
  403. end.