nllvminl.pas 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. {
  2. Copyright (c) 2014 by Jonas Maebe
  3. Generate LLVM bytecode for inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nllvminl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,
  22. ncginl;
  23. type
  24. tllvminlinenode = class(tcginlinenode)
  25. protected
  26. procedure maybe_remove_round_trunc_typeconv;
  27. function first_get_frame: tnode; override;
  28. function first_abs_real: tnode; override;
  29. function first_sqr_real: tnode; override;
  30. function first_trunc_real: tnode; override;
  31. public
  32. procedure second_length; override;
  33. procedure second_sqr_real; override;
  34. procedure second_trunc_real; override;
  35. end;
  36. implementation
  37. uses
  38. verbose,globals,globtype,constexp,
  39. aasmbase, aasmdata,
  40. symconst,symtype,symdef,defutil,
  41. nutils,nadd,nbas,ncal,ncnv,ncon,nflw,ninl,nld,nmat,
  42. pass_2,
  43. cgbase,cgutils,tgobj,hlcgobj,
  44. cpubase,
  45. llvmbase,aasmllvm;
  46. procedure tllvminlinenode.maybe_remove_round_trunc_typeconv;
  47. var
  48. temp: tnode;
  49. begin
  50. { the prototype of trunc()/round() in the system unit is declared
  51. with valreal as parameter type, so the argument will always be
  52. extended -> remove the typeconversion to extended if any; not done
  53. in ninl, because there are other code generators that assume that
  54. the parameter to trunc has been converted to valreal (e.g. PowerPC).
  55. (copy from code in nx64inl, should be refactored)
  56. }
  57. if (left.nodetype=typeconvn) and
  58. not(nf_explicit in left.flags) and
  59. (ttypeconvnode(left).left.resultdef.typ=floatdef) then
  60. begin
  61. { get rid of the type conversion, so the use_vectorfpu will be
  62. applied to the original type }
  63. temp:=ttypeconvnode(left).left;
  64. ttypeconvnode(left).left:=nil;
  65. left.free;
  66. left:=temp;
  67. end;
  68. end;
  69. function tllvminlinenode.first_get_frame: tnode;
  70. begin
  71. result:=ccallnode.createintern('llvm_frameaddress',
  72. ccallparanode.create(genintconstnode(0),nil));
  73. end;
  74. { in general, generate regular expression rather than intrinsics: according
  75. to the "Performance Tips for Frontend Authors", "The optimizer is quite
  76. good at reasoning about general control flow and arithmetic, it is not
  77. anywhere near as strong at reasoning about the various intrinsics. If
  78. profitable for code generation purposes, the optimizer will likely form
  79. the intrinsics itself late in the optimization pipeline." }
  80. function tllvminlinenode.first_abs_real: tnode;
  81. var
  82. lefttemp,
  83. resulttemp: ttempcreatenode;
  84. stat: tstatementnode;
  85. begin
  86. result:=internalstatements(stat);
  87. lefttemp:=ctempcreatenode.create(left.resultdef,left.resultdef.size,tt_persistent,true);
  88. { assigned twice -> will be spilled if put in register }
  89. resulttemp:=ctempcreatenode.create(resultdef,resultdef.size,tt_persistent,false);
  90. addstatement(stat,lefttemp);
  91. addstatement(stat,resulttemp);
  92. { lefttemp:=left }
  93. addstatement(stat,
  94. cassignmentnode.create(ctemprefnode.create(lefttemp),left)
  95. );
  96. { if lefttemp>=0 then
  97. resulttemp:=lefttemp
  98. else
  99. resulttemp:=-lefttemp
  100. }
  101. addstatement(stat,
  102. cifnode.create(
  103. caddnode.create(
  104. gten,
  105. ctemprefnode.create(lefttemp),
  106. crealconstnode.create(0.0,left.resultdef)
  107. ),
  108. cassignmentnode.create(
  109. ctemprefnode.create(resulttemp),
  110. ctemprefnode.create(lefttemp)
  111. ),
  112. cassignmentnode.create(
  113. ctemprefnode.create(resulttemp),
  114. cunaryminusnode.create(ctemprefnode.create(lefttemp))
  115. )
  116. )
  117. );
  118. addstatement(stat,ctempdeletenode.create(lefttemp));
  119. addstatement(stat,ctempdeletenode.create_normal_temp(resulttemp));
  120. { return resulttemp }
  121. addstatement(stat,ctemprefnode.create(resulttemp));
  122. { reused }
  123. left:=nil;
  124. end;
  125. function tllvminlinenode.first_sqr_real: tnode;
  126. begin
  127. result:=nil;
  128. if use_vectorfpu(left.resultdef) then
  129. expectloc:=LOC_MMREGISTER
  130. else
  131. expectloc:=LOC_FPUREGISTER;
  132. end;
  133. function tllvminlinenode.first_trunc_real: tnode;
  134. begin
  135. { fptosi is undefined if the value is out of range -> only generate
  136. in cast of fastmath }
  137. if cs_opt_fastmath in current_settings.optimizerswitches then
  138. begin
  139. maybe_remove_round_trunc_typeconv;
  140. expectloc:=LOC_REGISTER;
  141. result:=nil;
  142. end
  143. else
  144. result:=inherited;
  145. end;
  146. procedure tllvminlinenode.second_length;
  147. var
  148. lengthlab, nillab: tasmlabel;
  149. hregister: tregister;
  150. href, tempref: treference;
  151. lendef: tdef;
  152. begin
  153. secondpass(left);
  154. if is_shortstring(left.resultdef) then
  155. begin
  156. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  157. internalerror(2014080806);
  158. { typecast the shortstring reference into a length byte reference }
  159. location_reset_ref(location,left.location.loc,def_cgsize(resultdef),left.location.reference.alignment,left.location.reference.volatility);
  160. hregister:=hlcg.getaddressregister(current_asmdata.CurrAsmList,cpointerdef.getreusable(resultdef));
  161. hlcg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.resultdef,cpointerdef.getreusable(resultdef),left.location.reference,hregister);
  162. hlcg.reference_reset_base(location.reference,cpointerdef.getreusable(resultdef),hregister,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
  163. end
  164. else
  165. begin
  166. tg.gethltemp(current_asmdata.CurrAsmList,resultdef,resultdef.size,tt_normal,tempref);
  167. { length in ansi/wide strings and high in dynamic arrays is at offset
  168. -sizeof(sizeint), for widestrings it's at -4 }
  169. if is_widestring(left.resultdef) then
  170. lendef:=u32inttype
  171. else
  172. lendef:=ossinttype;
  173. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,
  174. left.resultdef,cpointerdef.getreusable(lendef),true);
  175. current_asmdata.getjumplabel(nillab);
  176. current_asmdata.getjumplabel(lengthlab);
  177. hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,cpointerdef.getreusable(lendef),OC_EQ,0,left.location.register,nillab);
  178. { volatility of the ansistring/widestring refers to the volatility of the
  179. string pointer, not of the string data }
  180. hlcg.reference_reset_base(href,cpointerdef.getreusable(lendef),left.location.register,-lendef.size,ctempposinvalid,lendef.alignment,[]);
  181. hregister:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  182. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,lendef,resultdef,href,hregister);
  183. if is_widestring(left.resultdef) then
  184. hlcg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,resultdef,1,hregister);
  185. { Dynamic arrays do not have their length attached but their maximum index }
  186. if is_dynamic_array(left.resultdef) then
  187. hlcg.a_op_const_reg(current_asmdata.CurrAsmList,OP_ADD,resultdef,1,hregister);
  188. hlcg.a_load_reg_ref(current_asmdata.CurrAsmList,resultdef,resultdef,hregister,tempref);
  189. hlcg.a_jmp_always(current_asmdata.CurrAsmList,lengthlab);
  190. hlcg.a_label(current_asmdata.CurrAsmList,nillab);
  191. hlcg.a_load_const_ref(current_asmdata.CurrAsmList,resultdef,0,tempref);
  192. hlcg.a_label(current_asmdata.CurrAsmList,lengthlab);
  193. hregister:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  194. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,resultdef,resultdef,tempref,hregister);
  195. tg.ungettemp(current_asmdata.CurrAsmList,tempref);
  196. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  197. location.register:=hregister;
  198. end;
  199. end;
  200. procedure tllvminlinenode.second_sqr_real;
  201. begin
  202. secondpass(left);
  203. location.loc:=expectloc;
  204. if expectloc=LOC_MMREGISTER then
  205. begin
  206. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  207. location.register:=hlcg.getmmregister(current_asmdata.CurrAsmList,resultdef);
  208. end
  209. else
  210. begin
  211. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  212. location.register:=hlcg.getfpuregister(current_asmdata.CurrAsmList,resultdef);
  213. end;
  214. current_asmdata.CurrAsmList.concat(
  215. taillvm.op_reg_size_reg_reg(la_fmul,
  216. location.register,resultdef,
  217. left.location.register,left.location.register
  218. )
  219. );
  220. end;
  221. procedure tllvminlinenode.second_trunc_real;
  222. begin
  223. secondpass(left);
  224. if use_vectorfpu(left.resultdef) then
  225. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true)
  226. else
  227. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  228. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  229. location.register:=hlcg.getregisterfordef(current_asmdata.CurrAsmList,resultdef);
  230. current_asmdata.CurrAsmList.concat(
  231. taillvm.op_reg_size_reg_size(la_fptosi,location.register,left.resultdef,left.location.register,resultdef)
  232. );
  233. end;
  234. begin
  235. cinlinenode:=tllvminlinenode;
  236. end.