nllvminl.pas 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. {
  2. Copyright (c) 2014 by Jonas Maebe
  3. Generate LLVM bytecode for inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nllvminl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,
  22. ncginl;
  23. type
  24. tllvminlinenode = class(tcginlinenode)
  25. protected
  26. procedure maybe_remove_round_trunc_typeconv;
  27. function first_get_frame: tnode; override;
  28. function first_abs_real: tnode; override;
  29. function first_fma: tnode; override;
  30. function first_sqr_real: tnode; override;
  31. function first_sqrt_real: tnode; override;
  32. function first_trunc_real: tnode; override;
  33. public
  34. procedure second_length; override;
  35. procedure second_sqr_real; override;
  36. procedure second_trunc_real; override;
  37. end;
  38. implementation
  39. uses
  40. verbose,globals,globtype,constexp,
  41. aasmbase, aasmdata,
  42. symconst,symtype,symdef,defutil,
  43. compinnr,
  44. nutils,nadd,nbas,ncal,ncnv,ncon,nflw,ninl,nld,nmat,
  45. pass_2,
  46. cgbase,cgutils,tgobj,hlcgobj,
  47. cpubase,
  48. llvmbase,aasmllvm;
  49. procedure tllvminlinenode.maybe_remove_round_trunc_typeconv;
  50. var
  51. temp: tnode;
  52. begin
  53. { the prototype of trunc()/round() in the system unit is declared
  54. with valreal as parameter type, so the argument will always be
  55. extended -> remove the typeconversion to extended if any; not done
  56. in ninl, because there are other code generators that assume that
  57. the parameter to trunc has been converted to valreal (e.g. PowerPC).
  58. (copy from code in nx64inl, should be refactored)
  59. }
  60. if (left.nodetype=typeconvn) and
  61. not(nf_explicit in left.flags) and
  62. (ttypeconvnode(left).left.resultdef.typ=floatdef) then
  63. begin
  64. { get rid of the type conversion, so the use_vectorfpu will be
  65. applied to the original type }
  66. temp:=ttypeconvnode(left).left;
  67. ttypeconvnode(left).left:=nil;
  68. left.free;
  69. left:=temp;
  70. end;
  71. end;
  72. function tllvminlinenode.first_get_frame: tnode;
  73. begin
  74. result:=ccallnode.createintern('llvm_frameaddress',
  75. ccallparanode.create(genintconstnode(0),nil));
  76. end;
  77. { in general, generate regular expression rather than intrinsics: according
  78. to the "Performance Tips for Frontend Authors", "The optimizer is quite
  79. good at reasoning about general control flow and arithmetic, it is not
  80. anywhere near as strong at reasoning about the various intrinsics. If
  81. profitable for code generation purposes, the optimizer will likely form
  82. the intrinsics itself late in the optimization pipeline." }
  83. function tllvminlinenode.first_abs_real: tnode;
  84. var
  85. lefttemp,
  86. resulttemp: ttempcreatenode;
  87. stat: tstatementnode;
  88. begin
  89. result:=internalstatements(stat);
  90. lefttemp:=ctempcreatenode.create(left.resultdef,left.resultdef.size,tt_persistent,true);
  91. { assigned twice -> will be spilled if put in register }
  92. resulttemp:=ctempcreatenode.create(resultdef,resultdef.size,tt_persistent,false);
  93. addstatement(stat,lefttemp);
  94. addstatement(stat,resulttemp);
  95. { lefttemp:=left }
  96. addstatement(stat,
  97. cassignmentnode.create(ctemprefnode.create(lefttemp),left)
  98. );
  99. { if lefttemp>=0 then
  100. resulttemp:=lefttemp
  101. else
  102. resulttemp:=-lefttemp
  103. }
  104. addstatement(stat,
  105. cifnode.create(
  106. caddnode.create(
  107. gten,
  108. ctemprefnode.create(lefttemp),
  109. crealconstnode.create(0.0,left.resultdef)
  110. ),
  111. cassignmentnode.create(
  112. ctemprefnode.create(resulttemp),
  113. ctemprefnode.create(lefttemp)
  114. ),
  115. cassignmentnode.create(
  116. ctemprefnode.create(resulttemp),
  117. cunaryminusnode.create(ctemprefnode.create(lefttemp))
  118. )
  119. )
  120. );
  121. addstatement(stat,ctempdeletenode.create(lefttemp));
  122. addstatement(stat,ctempdeletenode.create_normal_temp(resulttemp));
  123. { return resulttemp }
  124. addstatement(stat,ctemprefnode.create(resulttemp));
  125. { reused }
  126. left:=nil;
  127. end;
  128. function tllvminlinenode.first_fma: tnode;
  129. var
  130. procname: string[15];
  131. begin
  132. case inlinenumber of
  133. in_fma_single:
  134. procname:='llvm_fma_f32';
  135. in_fma_double:
  136. procname:='llvm_fma_f64';
  137. in_fma_extended:
  138. procname:='llvm_fma_f80';
  139. in_fma_float128:
  140. procname:='llvm_fma_f128';
  141. else
  142. internalerror(2018122101);
  143. end;
  144. result:=ccallnode.createintern(procname,left);
  145. left:=nil;
  146. end;
  147. function tllvminlinenode.first_sqr_real: tnode;
  148. begin
  149. result:=nil;
  150. if use_vectorfpu(left.resultdef) then
  151. expectloc:=LOC_MMREGISTER
  152. else
  153. expectloc:=LOC_FPUREGISTER;
  154. end;
  155. function tllvminlinenode.first_sqrt_real: tnode;
  156. var
  157. intrinsic: string[20];
  158. begin
  159. if left.resultdef.typ<>floatdef then
  160. internalerror(2018121601);
  161. case tfloatdef(left.resultdef).floattype of
  162. s32real:
  163. intrinsic:='llvm_sqrt_f32';
  164. s64real:
  165. intrinsic:='llvm_sqrt_f64';
  166. s80real,sc80real:
  167. intrinsic:='llvm_sqrt_f80';
  168. s128real:
  169. intrinsic:='llvm_sqrt_f128';
  170. else
  171. internalerror(2018121602);
  172. end;
  173. result:=ccallnode.createintern(intrinsic, ccallparanode.create(left,nil));
  174. left:=nil;
  175. end;
  176. function tllvminlinenode.first_trunc_real: tnode;
  177. begin
  178. { fptosi is undefined if the value is out of range -> only generate
  179. in cast of fastmath }
  180. if cs_opt_fastmath in current_settings.optimizerswitches then
  181. begin
  182. maybe_remove_round_trunc_typeconv;
  183. expectloc:=LOC_REGISTER;
  184. result:=nil;
  185. end
  186. else
  187. result:=inherited;
  188. end;
  189. procedure tllvminlinenode.second_length;
  190. var
  191. lengthlab, nillab: tasmlabel;
  192. hregister: tregister;
  193. href, tempref: treference;
  194. lendef: tdef;
  195. begin
  196. secondpass(left);
  197. if is_shortstring(left.resultdef) then
  198. begin
  199. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  200. internalerror(2014080806);
  201. { typecast the shortstring reference into a length byte reference }
  202. location_reset_ref(location,left.location.loc,def_cgsize(resultdef),left.location.reference.alignment,left.location.reference.volatility);
  203. hregister:=hlcg.getaddressregister(current_asmdata.CurrAsmList,cpointerdef.getreusable(resultdef));
  204. hlcg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.resultdef,cpointerdef.getreusable(resultdef),left.location.reference,hregister);
  205. hlcg.reference_reset_base(location.reference,cpointerdef.getreusable(resultdef),hregister,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
  206. end
  207. else
  208. begin
  209. tg.gethltemp(current_asmdata.CurrAsmList,resultdef,resultdef.size,tt_normal,tempref);
  210. { length in ansi/wide strings and high in dynamic arrays is at offset
  211. -sizeof(sizeint), for widestrings it's at -4 }
  212. if is_widestring(left.resultdef) then
  213. lendef:=u32inttype
  214. else
  215. lendef:=ossinttype;
  216. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,
  217. left.resultdef,cpointerdef.getreusable(lendef),true);
  218. current_asmdata.getjumplabel(nillab);
  219. current_asmdata.getjumplabel(lengthlab);
  220. hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,cpointerdef.getreusable(lendef),OC_EQ,0,left.location.register,nillab);
  221. { volatility of the ansistring/widestring refers to the volatility of the
  222. string pointer, not of the string data }
  223. hlcg.reference_reset_base(href,cpointerdef.getreusable(lendef),left.location.register,-lendef.size,ctempposinvalid,lendef.alignment,[]);
  224. hregister:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  225. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,lendef,resultdef,href,hregister);
  226. if is_widestring(left.resultdef) then
  227. hlcg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,resultdef,1,hregister);
  228. { Dynamic arrays do not have their length attached but their maximum index }
  229. if is_dynamic_array(left.resultdef) then
  230. hlcg.a_op_const_reg(current_asmdata.CurrAsmList,OP_ADD,resultdef,1,hregister);
  231. hlcg.a_load_reg_ref(current_asmdata.CurrAsmList,resultdef,resultdef,hregister,tempref);
  232. hlcg.a_jmp_always(current_asmdata.CurrAsmList,lengthlab);
  233. hlcg.a_label(current_asmdata.CurrAsmList,nillab);
  234. hlcg.a_load_const_ref(current_asmdata.CurrAsmList,resultdef,0,tempref);
  235. hlcg.a_label(current_asmdata.CurrAsmList,lengthlab);
  236. hregister:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  237. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,resultdef,resultdef,tempref,hregister);
  238. tg.ungettemp(current_asmdata.CurrAsmList,tempref);
  239. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  240. location.register:=hregister;
  241. end;
  242. end;
  243. procedure tllvminlinenode.second_sqr_real;
  244. begin
  245. secondpass(left);
  246. location.loc:=expectloc;
  247. if expectloc=LOC_MMREGISTER then
  248. begin
  249. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  250. location.register:=hlcg.getmmregister(current_asmdata.CurrAsmList,resultdef);
  251. end
  252. else
  253. begin
  254. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  255. location.register:=hlcg.getfpuregister(current_asmdata.CurrAsmList,resultdef);
  256. end;
  257. current_asmdata.CurrAsmList.concat(
  258. taillvm.op_reg_size_reg_reg(la_fmul,
  259. location.register,resultdef,
  260. left.location.register,left.location.register
  261. )
  262. );
  263. end;
  264. procedure tllvminlinenode.second_trunc_real;
  265. begin
  266. secondpass(left);
  267. if use_vectorfpu(left.resultdef) then
  268. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true)
  269. else
  270. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  271. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  272. location.register:=hlcg.getregisterfordef(current_asmdata.CurrAsmList,resultdef);
  273. current_asmdata.CurrAsmList.concat(
  274. taillvm.op_reg_size_reg_size(la_fptosi,location.register,left.resultdef,left.location.register,resultdef)
  275. );
  276. end;
  277. begin
  278. cinlinenode:=tllvminlinenode;
  279. end.