nx64set.pas 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86_64 assembler for in set/case nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx64set;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. constexp,
  22. globtype,
  23. nset,nx86set;
  24. type
  25. tx8664casenode = class(tx86casenode)
  26. procedure optimizevalues(var max_linear_list:int64;var max_dist:qword);override;
  27. procedure genjumptable(hp : pcaselabel;min_,max_ : int64);override;
  28. end;
  29. implementation
  30. uses
  31. systems,cpuinfo,
  32. verbose,globals,
  33. defutil,cutils,
  34. aasmbase,aasmtai,aasmdata,aasmcpu,
  35. cgbase,
  36. cpubase,procinfo,
  37. cga,cgutils,cgobj,cgx86;
  38. {*****************************************************************************
  39. TX8664CASENODE
  40. *****************************************************************************}
  41. procedure tx8664casenode.optimizevalues(var max_linear_list:int64;var max_dist:qword);
  42. begin
  43. inc(max_linear_list,9);
  44. end;
  45. { Always generate position-independent jump table, it is twice less in size at a price
  46. of two extra instructions (which shouldn't cause more slowdown than pipeline trashing) }
  47. procedure tx8664casenode.genjumptable(hp : pcaselabel; min_,max_ : int64);
  48. var
  49. last: TConstExprInt;
  50. tablelabel: TAsmLabel;
  51. basereg,indexreg,jumpreg: TRegister;
  52. href: TReference;
  53. jtlist: TAsmList;
  54. opcgsize: tcgsize;
  55. sectype: TAsmSectiontype;
  56. jtitemconsttype: taiconst_type;
  57. AlmostExhaustive: Boolean;
  58. lv, hv: TConstExprInt;
  59. ExhaustiveLimit, Range, x, oldmin : aint;
  60. const
  61. ExhaustiveLimitBase = 32;
  62. procedure genitem(t : pcaselabel);
  63. var
  64. i : TConstExprInt;
  65. begin
  66. if assigned(t^.less) then
  67. genitem(t^.less);
  68. { fill possible hole }
  69. i:=last+1;
  70. while i<=t^._low-1 do
  71. begin
  72. jtlist.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,elselabel));
  73. i:=i+1;
  74. end;
  75. i:=t^._low;
  76. while i<=t^._high do
  77. begin
  78. jtlist.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,blocklabel(t^.blockid)));
  79. i:=i+1;
  80. end;
  81. last:=t^._high;
  82. if assigned(t^.greater) then
  83. genitem(t^.greater);
  84. end;
  85. begin
  86. lv:=0;
  87. hv:=0;
  88. if not(target_info.system in systems_darwin) then
  89. jtitemconsttype:=aitconst_32bit
  90. else
  91. { see https://gmplib.org/list-archives/gmp-bugs/2012-December/002836.html }
  92. jtitemconsttype:=aitconst_darwin_dwarf_delta32;
  93. jtlist := current_asmdata.CurrAsmList;
  94. last:=min_;
  95. opcgsize:=def_cgsize(opsize);
  96. AlmostExhaustive := False;
  97. oldmin := min_;
  98. if not(jumptable_no_range) then
  99. begin
  100. getrange(left.resultdef,lv,hv);
  101. Range := aint(max_)-aint(min_);
  102. if (cs_opt_size in current_settings.optimizerswitches) then
  103. { Limit size of jump tables for small enumerations so they have
  104. to be at least two-thirds full before being considered for the
  105. "almost exhaustive" treatment }
  106. ExhaustiveLimit := min(ExhaustiveLimitBase, TrueCount shl 1)
  107. else
  108. ExhaustiveLimit := ExhaustiveLimitBase;
  109. { If true, then this indicates that almost every possible value of x is covered by
  110. a label. As such, it's more cost-efficient to remove the initial range check and
  111. instead insert the remaining values into the jump table, pointing at elselabel. [Kit] }
  112. if ((hv - lv) - Range <= ExhaustiveLimit) then
  113. begin
  114. oldmin := min_;
  115. min_ := lv.svalue;
  116. AlmostExhaustive := True;
  117. end
  118. else
  119. begin
  120. { a <= x <= b <-> unsigned(x-a) <= (b-a) }
  121. cg.a_op_const_reg(jtlist,OP_SUB,opcgsize,aint(min_),hregister);
  122. { case expr greater than max_ => goto elselabel }
  123. cg.a_cmp_const_reg_label(jtlist,opcgsize,OC_A,Range,hregister,elselabel);
  124. min_:=0;
  125. { do not sign extend when we load the index register, as we applied an offset above }
  126. opcgsize:=tcgsize2unsigned[opcgsize];
  127. end;
  128. end;
  129. { local label in order to avoid using GOT }
  130. current_asmdata.getlabel(tablelabel,alt_data);
  131. indexreg:=cg.makeregsize(jtlist,hregister,OS_ADDR);
  132. cg.a_load_reg_reg(jtlist,opcgsize,OS_ADDR,hregister,indexreg);
  133. { load table address }
  134. reference_reset_symbol(href,tablelabel,0,4,[]);
  135. basereg:=cg.getaddressregister(jtlist);
  136. cg.a_loadaddr_ref_reg(jtlist,href,basereg);
  137. { load table slot, 32-bit sign extended }
  138. reference_reset_base(href,basereg,-aint(min_)*4,ctempposinvalid,4,[]);
  139. href.index:=indexreg;
  140. href.scalefactor:=4;
  141. jumpreg:=cg.getaddressregister(jtlist);
  142. cg.a_load_ref_reg(jtlist,OS_S32,OS_ADDR,href,jumpreg);
  143. { add table address }
  144. reference_reset_base(href,basereg,0,ctempposinvalid,sizeof(pint),[]);
  145. href.index:=jumpreg;
  146. href.scalefactor:=1;
  147. cg.a_loadaddr_ref_reg(jtlist,href,jumpreg);
  148. { and finally jump }
  149. emit_reg(A_JMP,S_NO,jumpreg);
  150. { generate jump table }
  151. if not(target_info.system in systems_darwin) then
  152. sectype:=sec_rodata
  153. else
  154. { on Mac OS X, dead code stripping ("smart linking") happens based on
  155. global symbols: every global/static symbol (symbols that do not
  156. start with "L") marks the start of a new "subsection" that is
  157. discarded by the linker if there are no references to this symbol.
  158. This means that if you put the jump table in the rodata section, it
  159. will become part of the block of data associated with the previous
  160. non-L-label in the rodata section and stay or be thrown away
  161. depending on whether that block of data is referenced. Therefore,
  162. jump tables must be added in the code section and since aktlocaldata
  163. is inserted right after the routine, it will become part of the
  164. same subsection that contains the routine's code }
  165. sectype:=sec_code;
  166. jtlist := current_procinfo.aktlocaldata;
  167. new_section(jtlist,sectype,current_procinfo.procdef.mangledname,4);
  168. jtlist.concat(Tai_label.Create(tablelabel));
  169. if AlmostExhaustive then
  170. begin
  171. { Fill the table with the values below _min }
  172. x := lv.svalue;
  173. while x < oldmin do
  174. begin
  175. jtlist.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,elselabel));
  176. Inc(x);
  177. end;
  178. genitem(hp);
  179. { Fill the table with the values above _max }
  180. { Subtracting one from hv and not adding 1 to max_ averts the risk of an overflow }
  181. x := max_;
  182. hv := hv - 1;
  183. while x <= hv.svalue do
  184. begin
  185. jtlist.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,elselabel));
  186. Inc(x);
  187. end;
  188. end
  189. else
  190. genitem(hp);
  191. end;
  192. begin
  193. ccasenode:=tx8664casenode;
  194. end.