nllvminl.pas 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. {
  2. Copyright (c) 2014 by Jonas Maebe
  3. Generate LLVM bytecode for inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nllvminl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,
  22. ncginl;
  23. type
  24. tllvminlinenode = class(tcginlinenode)
  25. protected
  26. procedure maybe_remove_round_trunc_typeconv;
  27. function first_get_frame: tnode; override;
  28. function first_abs_real: tnode; override;
  29. function first_bitscan: tnode; override;
  30. function first_fma: tnode; override;
  31. function first_sqr_real: tnode; override;
  32. function first_sqrt_real: tnode; override;
  33. function first_trunc_real: tnode; override;
  34. function first_popcnt: tnode; override;
  35. public
  36. procedure second_length; override;
  37. procedure second_sqr_real; override;
  38. procedure second_trunc_real; override;
  39. end;
  40. implementation
  41. uses
  42. verbose,globals,globtype,constexp,
  43. aasmbase, aasmdata,
  44. symconst,symtype,symdef,defutil,
  45. compinnr,
  46. nutils,nadd,nbas,ncal,ncnv,ncon,nflw,ninl,nld,nmat,
  47. pass_2,
  48. cgbase,cgutils,tgobj,hlcgobj,
  49. cpubase,
  50. llvmbase,aasmllvm;
  51. procedure tllvminlinenode.maybe_remove_round_trunc_typeconv;
  52. var
  53. temp: tnode;
  54. begin
  55. { the prototype of trunc()/round() in the system unit is declared
  56. with valreal as parameter type, so the argument will always be
  57. extended -> remove the typeconversion to extended if any; not done
  58. in ninl, because there are other code generators that assume that
  59. the parameter to trunc has been converted to valreal (e.g. PowerPC).
  60. (copy from code in nx64inl, should be refactored)
  61. }
  62. if (left.nodetype=typeconvn) and
  63. not(nf_explicit in left.flags) and
  64. (ttypeconvnode(left).left.resultdef.typ=floatdef) then
  65. begin
  66. { get rid of the type conversion, so the use_vectorfpu will be
  67. applied to the original type }
  68. temp:=ttypeconvnode(left).left;
  69. ttypeconvnode(left).left:=nil;
  70. left.free;
  71. left:=temp;
  72. end;
  73. end;
  74. function tllvminlinenode.first_get_frame: tnode;
  75. begin
  76. result:=ccallnode.createintern('llvm_frameaddress',
  77. ccallparanode.create(genintconstnode(0),nil));
  78. end;
  79. { in general, generate regular expression rather than intrinsics: according
  80. to the "Performance Tips for Frontend Authors", "The optimizer is quite
  81. good at reasoning about general control flow and arithmetic, it is not
  82. anywhere near as strong at reasoning about the various intrinsics. If
  83. profitable for code generation purposes, the optimizer will likely form
  84. the intrinsics itself late in the optimization pipeline." }
  85. function tllvminlinenode.first_abs_real: tnode;
  86. var
  87. lefttemp,
  88. resulttemp: ttempcreatenode;
  89. stat: tstatementnode;
  90. begin
  91. result:=internalstatements(stat);
  92. lefttemp:=ctempcreatenode.create(left.resultdef,left.resultdef.size,tt_persistent,true);
  93. { assigned twice -> will be spilled if put in register }
  94. resulttemp:=ctempcreatenode.create(resultdef,resultdef.size,tt_persistent,false);
  95. addstatement(stat,lefttemp);
  96. addstatement(stat,resulttemp);
  97. { lefttemp:=left }
  98. addstatement(stat,
  99. cassignmentnode.create(ctemprefnode.create(lefttemp),left)
  100. );
  101. { if lefttemp>=0 then
  102. resulttemp:=lefttemp
  103. else
  104. resulttemp:=-lefttemp
  105. }
  106. addstatement(stat,
  107. cifnode.create(
  108. caddnode.create(
  109. gten,
  110. ctemprefnode.create(lefttemp),
  111. crealconstnode.create(0.0,left.resultdef)
  112. ),
  113. cassignmentnode.create(
  114. ctemprefnode.create(resulttemp),
  115. ctemprefnode.create(lefttemp)
  116. ),
  117. cassignmentnode.create(
  118. ctemprefnode.create(resulttemp),
  119. cunaryminusnode.create(ctemprefnode.create(lefttemp))
  120. )
  121. )
  122. );
  123. addstatement(stat,ctempdeletenode.create(lefttemp));
  124. addstatement(stat,ctempdeletenode.create_normal_temp(resulttemp));
  125. { return resulttemp }
  126. addstatement(stat,ctemprefnode.create(resulttemp));
  127. { reused }
  128. left:=nil;
  129. end;
  130. function tllvminlinenode.first_bitscan: tnode;
  131. var
  132. leftdef: tdef;
  133. resulttemp,
  134. lefttemp: ttempcreatenode;
  135. stat: tstatementnode;
  136. block: tblocknode;
  137. cntresult: tnode;
  138. procname: string[15];
  139. begin
  140. {
  141. if left<>0 then
  142. result:=llvm_ctlz/cttz(unsigned(left),true)
  143. else
  144. result:=255;
  145. }
  146. if inlinenumber=in_bsr_x then
  147. procname:='LLVM_CTLZ'
  148. else
  149. procname:='LLVM_CTTZ';
  150. leftdef:=left.resultdef;
  151. block:=internalstatements(stat);
  152. resulttemp:=ctempcreatenode.create(resultdef,resultdef.size,tt_persistent,false);
  153. addstatement(stat,resulttemp);
  154. lefttemp:=maybereplacewithtemp(left,block,stat,left.resultdef.size,true);
  155. cntresult:=
  156. ccallnode.createintern(
  157. procname,
  158. ccallparanode.create(cordconstnode.create(1,llvmbool1type,false),
  159. ccallparanode.create(
  160. ctypeconvnode.create_explicit(left,get_unsigned_inttype(leftdef)),nil
  161. )
  162. )
  163. );
  164. { ctlz returns the number of leading zero bits, while bsr returns the bit
  165. number of the first non-zero bit (with the least significant bit as 0)
  166. -> invert result }
  167. if inlinenumber=in_bsr_x then
  168. begin
  169. cntresult:=
  170. caddnode.create(xorn,
  171. cntresult,
  172. genintconstnode(leftdef.size*8-1)
  173. );
  174. end;
  175. addstatement(stat,
  176. cifnode.create(caddnode.create(unequaln,left.getcopy,genintconstnode(0)),
  177. cassignmentnode.create(
  178. ctemprefnode.create(resulttemp),
  179. cntresult
  180. ),
  181. cassignmentnode.create(
  182. ctemprefnode.create(resulttemp),
  183. genintconstnode(255)
  184. )
  185. )
  186. );
  187. if assigned(lefttemp) then
  188. addstatement(stat,ctempdeletenode.create(lefttemp));
  189. addstatement(stat,ctempdeletenode.create_normal_temp(resulttemp));
  190. addstatement(stat,ctemprefnode.create(resulttemp));
  191. left:=nil;
  192. result:=block;
  193. end;
  194. function tllvminlinenode.first_fma: tnode;
  195. var
  196. procname: string[15];
  197. begin
  198. case inlinenumber of
  199. in_fma_single:
  200. procname:='llvm_fma_f32';
  201. in_fma_double:
  202. procname:='llvm_fma_f64';
  203. in_fma_extended:
  204. procname:='llvm_fma_f80';
  205. in_fma_float128:
  206. procname:='llvm_fma_f128';
  207. else
  208. internalerror(2018122101);
  209. end;
  210. result:=ccallnode.createintern(procname,left);
  211. left:=nil;
  212. end;
  213. function tllvminlinenode.first_sqr_real: tnode;
  214. begin
  215. result:=nil;
  216. if use_vectorfpu(left.resultdef) then
  217. expectloc:=LOC_MMREGISTER
  218. else
  219. expectloc:=LOC_FPUREGISTER;
  220. end;
  221. function tllvminlinenode.first_sqrt_real: tnode;
  222. var
  223. intrinsic: string[20];
  224. begin
  225. if left.resultdef.typ<>floatdef then
  226. internalerror(2018121601);
  227. case tfloatdef(left.resultdef).floattype of
  228. s32real:
  229. intrinsic:='llvm_sqrt_f32';
  230. s64real:
  231. intrinsic:='llvm_sqrt_f64';
  232. s80real,sc80real:
  233. intrinsic:='llvm_sqrt_f80';
  234. s128real:
  235. intrinsic:='llvm_sqrt_f128';
  236. else
  237. internalerror(2018121602);
  238. end;
  239. result:=ccallnode.createintern(intrinsic, ccallparanode.create(left,nil));
  240. left:=nil;
  241. end;
  242. function tllvminlinenode.first_trunc_real: tnode;
  243. begin
  244. { fptosi is undefined if the value is out of range -> only generate
  245. in cast of fastmath }
  246. if cs_opt_fastmath in current_settings.optimizerswitches then
  247. begin
  248. maybe_remove_round_trunc_typeconv;
  249. expectloc:=LOC_REGISTER;
  250. result:=nil;
  251. end
  252. else
  253. result:=inherited;
  254. end;
  255. function tllvminlinenode.first_popcnt: tnode;
  256. begin
  257. result:=ctypeconvnode.create(ccallnode.createintern('LLVM_CTPOP', ccallparanode.create(left,nil)),resultdef);
  258. left:=nil;
  259. end;
  260. procedure tllvminlinenode.second_length;
  261. var
  262. lengthlab, nillab: tasmlabel;
  263. hregister: tregister;
  264. href, tempref: treference;
  265. lendef: tdef;
  266. begin
  267. secondpass(left);
  268. if is_shortstring(left.resultdef) then
  269. begin
  270. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  271. internalerror(2014080806);
  272. { typecast the shortstring reference into a length byte reference }
  273. location_reset_ref(location,left.location.loc,def_cgsize(resultdef),left.location.reference.alignment,left.location.reference.volatility);
  274. hregister:=hlcg.getaddressregister(current_asmdata.CurrAsmList,cpointerdef.getreusable(resultdef));
  275. hlcg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.resultdef,cpointerdef.getreusable(resultdef),left.location.reference,hregister);
  276. hlcg.reference_reset_base(location.reference,cpointerdef.getreusable(resultdef),hregister,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
  277. end
  278. else
  279. begin
  280. tg.gethltemp(current_asmdata.CurrAsmList,resultdef,resultdef.size,tt_normal,tempref);
  281. { length in ansi/wide strings and high in dynamic arrays is at offset
  282. -sizeof(sizeint), for widestrings it's at -4 }
  283. if is_widestring(left.resultdef) then
  284. lendef:=u32inttype
  285. else
  286. lendef:=ossinttype;
  287. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,
  288. left.resultdef,cpointerdef.getreusable(lendef),true);
  289. current_asmdata.getjumplabel(nillab);
  290. current_asmdata.getjumplabel(lengthlab);
  291. hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,cpointerdef.getreusable(lendef),OC_EQ,0,left.location.register,nillab);
  292. { volatility of the ansistring/widestring refers to the volatility of the
  293. string pointer, not of the string data }
  294. hlcg.reference_reset_base(href,cpointerdef.getreusable(lendef),left.location.register,-lendef.size,ctempposinvalid,lendef.alignment,[]);
  295. hregister:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  296. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,lendef,resultdef,href,hregister);
  297. if is_widestring(left.resultdef) then
  298. hlcg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,resultdef,1,hregister);
  299. { Dynamic arrays do not have their length attached but their maximum index }
  300. if is_dynamic_array(left.resultdef) then
  301. hlcg.a_op_const_reg(current_asmdata.CurrAsmList,OP_ADD,resultdef,1,hregister);
  302. hlcg.a_load_reg_ref(current_asmdata.CurrAsmList,resultdef,resultdef,hregister,tempref);
  303. hlcg.a_jmp_always(current_asmdata.CurrAsmList,lengthlab);
  304. hlcg.a_label(current_asmdata.CurrAsmList,nillab);
  305. hlcg.a_load_const_ref(current_asmdata.CurrAsmList,resultdef,0,tempref);
  306. hlcg.a_label(current_asmdata.CurrAsmList,lengthlab);
  307. hregister:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  308. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,resultdef,resultdef,tempref,hregister);
  309. tg.ungettemp(current_asmdata.CurrAsmList,tempref);
  310. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  311. location.register:=hregister;
  312. end;
  313. end;
  314. procedure tllvminlinenode.second_sqr_real;
  315. begin
  316. secondpass(left);
  317. location.loc:=expectloc;
  318. if expectloc=LOC_MMREGISTER then
  319. begin
  320. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  321. location.register:=hlcg.getmmregister(current_asmdata.CurrAsmList,resultdef);
  322. end
  323. else
  324. begin
  325. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  326. location.register:=hlcg.getfpuregister(current_asmdata.CurrAsmList,resultdef);
  327. end;
  328. current_asmdata.CurrAsmList.concat(
  329. taillvm.op_reg_size_reg_reg(la_fmul,
  330. location.register,resultdef,
  331. left.location.register,left.location.register
  332. )
  333. );
  334. end;
  335. procedure tllvminlinenode.second_trunc_real;
  336. begin
  337. secondpass(left);
  338. if use_vectorfpu(left.resultdef) then
  339. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true)
  340. else
  341. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  342. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  343. location.register:=hlcg.getregisterfordef(current_asmdata.CurrAsmList,resultdef);
  344. current_asmdata.CurrAsmList.concat(
  345. taillvm.op_reg_size_reg_size(la_fptosi,location.register,left.resultdef,left.location.register,resultdef)
  346. );
  347. end;
  348. begin
  349. cinlinenode:=tllvminlinenode;
  350. end.