ncpuset.pas 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. {
  2. Copyright (c) 2015 by Jonas Maebe
  3. Generate AArch64 assembler for in set/case nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit ncpuset;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,nset,ncgset,cpubase,cgbase,cgobj,aasmbase,aasmtai,aasmdata,globtype;
  22. type
  23. taarch64casenode = class(tcgcasenode)
  24. protected
  25. procedure optimizevalues(var max_linear_list: int64; var max_dist: qword);override;
  26. function has_jumptable: boolean;override;
  27. procedure genjumptable(hp: pcaselabel ;min_, max_: int64);override;
  28. procedure genlinearlist(hp: pcaselabel);override;
  29. end;
  30. implementation
  31. uses
  32. systems,
  33. verbose,globals,constexp,cutils,
  34. symconst,symdef,defutil,
  35. paramgr,
  36. cpuinfo,
  37. pass_2,cgcpu,
  38. ncon,
  39. tgobj,ncgutil,rgobj,aasmcpu,
  40. procinfo,
  41. cgutils;
  42. {*****************************************************************************
  43. TCGCASENODE
  44. *****************************************************************************}
  45. procedure taarch64casenode.optimizevalues(var max_linear_list: int64; var max_dist: qword);
  46. begin
  47. max_linear_list:=10;
  48. end;
  49. function taarch64casenode.has_jumptable: boolean;
  50. begin
  51. has_jumptable:=true;
  52. end;
  53. procedure taarch64casenode.genlinearlist(hp : pcaselabel);
  54. var
  55. first : boolean;
  56. lastrange : boolean;
  57. last : TConstExprInt;
  58. cond_lt,cond_le : tresflags;
  59. opcgsize, unsigned_opcgsize: tcgsize;
  60. procedure genitem(t : pcaselabel);
  61. var
  62. ovloc: tlocation;
  63. begin
  64. if assigned(t^.less) then
  65. genitem(t^.less);
  66. { need we to test the first value }
  67. if first and (t^._low>get_min_value(left.resultdef)) then
  68. begin
  69. cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opcgsize,jmp_lt,aint(t^._low.svalue),hregister,elselabel);
  70. end;
  71. if t^._low=t^._high then
  72. begin
  73. if t^._low-last=0 then
  74. cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList, opcgsize, OC_EQ,0,hregister,blocklabel(t^.blockid))
  75. else
  76. begin
  77. { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
  78. then genlinearlist wouldn't be used }
  79. cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue-last.svalue), hregister, hregister,
  80. true,ovloc);
  81. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_EQ,blocklabel(t^.blockid));
  82. end;
  83. last:=t^._low;
  84. lastrange:=false;
  85. end
  86. else
  87. begin
  88. { it begins with the smallest label, if the value }
  89. { is even smaller then jump immediately to the }
  90. { ELSE-label }
  91. if first then
  92. begin
  93. { have we to ajust the first value ? }
  94. if (t^._low>get_min_value(left.resultdef)) or (get_min_value(left.resultdef)<>0) then
  95. begin
  96. { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
  97. then genlinearlist wouldn't be use }
  98. cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue), hregister, hregister,
  99. true,ovloc);
  100. end;
  101. end
  102. else
  103. begin
  104. { if there is no unused label between the last and the }
  105. { present label then the lower limit can be checked }
  106. { immediately. else check the range in between: }
  107. { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
  108. then genlinearlist wouldn't be use }
  109. cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue - last.svalue), hregister, hregister,
  110. true,ovloc);
  111. { no jump necessary here if the new range starts at }
  112. { at the value following the previous one }
  113. if (aint(t^._low.svalue - last.svalue) <> 1) or
  114. (not lastrange) then
  115. cg.a_jmp_flags(current_asmdata.CurrAsmList,cond_lt,elselabel);
  116. end;
  117. { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
  118. then genlinearlist wouldn't be use }
  119. cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,unsigned_opcgsize,aint(t^._high.svalue - t^._low.svalue), hregister, hregister,
  120. true,ovloc);
  121. cg.a_jmp_flags(current_asmdata.CurrAsmList,cond_le,blocklabel(t^.blockid));
  122. last:=t^._high;
  123. lastrange:=true;
  124. end;
  125. first:=false;
  126. if assigned(t^.greater) then
  127. genitem(t^.greater);
  128. end;
  129. begin
  130. opcgsize:=def_cgsize(opsize);
  131. case opcgsize of
  132. OS_8,OS_16,OS_32,OS_S8,OS_S16,OS_S32:
  133. unsigned_opcgsize:=OS_32;
  134. OS_64,OS_S64:
  135. unsigned_opcgsize:=OS_64;
  136. else
  137. Internalerror(2019090902);
  138. end;
  139. if with_sign then
  140. begin
  141. cond_lt:=F_LT;
  142. cond_le:=F_LE;
  143. end
  144. else
  145. begin
  146. cond_lt:=F_CC;
  147. cond_le:=F_LS;
  148. end;
  149. { do we need to generate cmps? }
  150. if (with_sign and (min_label<0)) then
  151. genlinearcmplist(hp)
  152. else
  153. begin
  154. last:=0;
  155. lastrange:=false;
  156. first:=true;
  157. genitem(hp);
  158. cg.a_jmp_always(current_asmdata.CurrAsmList,elselabel);
  159. end;
  160. end;
  161. procedure taarch64casenode.genjumptable(hp: pcaselabel; min_, max_: int64);
  162. var
  163. last: TConstExprInt;
  164. tablelabel: TAsmLabel;
  165. basereg,indexreg,jumpreg: TRegister;
  166. href: TReference;
  167. opcgsize: tcgsize;
  168. sectype: TAsmSectiontype;
  169. jtitemconsttype: taiconst_type;
  170. procedure genitem(list:TAsmList;t : pcaselabel);
  171. var
  172. i : int64;
  173. begin
  174. if assigned(t^.less) then
  175. genitem(list,t^.less);
  176. { fill possible hole }
  177. i:=last.svalue+1;
  178. while i<=t^._low.svalue-1 do
  179. begin
  180. list.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,elselabel));
  181. inc(i);
  182. end;
  183. i:=t^._low.svalue;
  184. while i<=t^._high.svalue do
  185. begin
  186. list.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,blocklabel(t^.blockid)));
  187. inc(i);
  188. end;
  189. last:=t^._high;
  190. if assigned(t^.greater) then
  191. genitem(list,t^.greater);
  192. end;
  193. begin
  194. if not(target_info.system in systems_darwin) then
  195. jtitemconsttype:=aitconst_32bit
  196. else
  197. { see https://gmplib.org/list-archives/gmp-bugs/2012-December/002836.html }
  198. jtitemconsttype:=aitconst_darwin_dwarf_delta32;
  199. last:=min_;
  200. opcgsize:=def_cgsize(opsize);
  201. { a <= x <= b <-> unsigned(x-a) <= (b-a) }
  202. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SUB,opcgsize,aint(min_),hregister);
  203. if not(jumptable_no_range) then
  204. begin
  205. { case expr greater than max_ => goto elselabel }
  206. cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opcgsize,OC_A,aint(max_)-aint(min_),hregister,elselabel);
  207. min_:=0;
  208. end;
  209. { local label in order to avoid using GOT }
  210. current_asmdata.getlabel(tablelabel,alt_data);
  211. indexreg:=cg.makeregsize(current_asmdata.CurrAsmList,hregister,OS_ADDR);
  212. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opcgsize,OS_ADDR,hregister,indexreg);
  213. { load table address }
  214. reference_reset_symbol(href,tablelabel,0,4,[]);
  215. basereg:=cg.getaddressregister(current_asmdata.CurrAsmList);
  216. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,basereg);
  217. { load table slot, 32-bit sign extended }
  218. reference_reset_base(href,basereg,0,href.temppos,4,[]);
  219. href.index:=indexreg;
  220. href.shiftmode:=SM_LSL;
  221. href.shiftimm:=2;
  222. jumpreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
  223. cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_S32,OS_ADDR,href,jumpreg);
  224. { add table address }
  225. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_ADDR,basereg,jumpreg);
  226. { and finally jump }
  227. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_BR,jumpreg));
  228. { generate jump table }
  229. if target_info.system=system_aarch64_win64 then
  230. begin
  231. { for Windows we need to make sure that the jump table is located in the
  232. same section as the corresponding code as for one clang generates a
  233. ABSOLUTE32 relocation that can not be handled correctly and armasm64
  234. rejects the difference entries due to the symbols being located in
  235. different sections }
  236. sectype:=sec_code;
  237. new_section(current_procinfo.aktlocaldata,sectype,lower(current_procinfo.procdef.mangledname),getprocalign);
  238. end
  239. else
  240. begin
  241. if not(target_info.system in systems_darwin) then
  242. sectype:=sec_rodata
  243. else
  244. begin
  245. { on Mac OS X, dead code stripping ("smart linking") happens based on
  246. global symbols: every global/static symbol (symbols that do not
  247. start with "L") marks the start of a new "subsection" that is
  248. discarded by the linker if there are no references to this symbol.
  249. This means that if you put the jump table in the rodata section, it
  250. will become part of the block of data associated with the previous
  251. non-L-label in the rodata section and stay or be thrown away
  252. depending on whether that block of data is referenced. Therefore,
  253. jump tables must be added in the code section and since aktlocaldata
  254. is inserted right after the routine, it will become part of the
  255. same subsection that contains the routine's code }
  256. sectype:=sec_code;
  257. end;
  258. new_section(current_procinfo.aktlocaldata,sectype,current_procinfo.procdef.mangledname,4);
  259. end;
  260. if target_info.system in systems_darwin then
  261. begin
  262. { additionally, these tables are now marked via ".data_region jt32"
  263. and ".end_data_region" }
  264. current_procinfo.aktlocaldata.concat(tai_directive.Create(asd_data_region,'jt32'));
  265. end;
  266. current_procinfo.aktlocaldata.concat(Tai_label.Create(tablelabel));
  267. genitem(current_procinfo.aktlocaldata,hp);
  268. if target_info.system in systems_darwin then
  269. current_procinfo.aktlocaldata.concat(tai_directive.Create(asd_end_data_region,''));
  270. end;
  271. begin
  272. ccasenode:=taarch64casenode;
  273. end.