nwasmset.pas 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. {
  2. Copyright (c) 1998-2002, 2021 by Florian Klaempfl and Nikolay Nikolov
  3. Generate WebAssembly code for in/case nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nwasmset;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,nset,ncgset,
  22. aasmbase;
  23. type
  24. { twasminnode }
  25. twasminnode = class(tcginnode)
  26. protected
  27. function checkgenjumps(out setparts: Tsetparts; out numparts: byte; out use_small: boolean): boolean;override;
  28. end;
  29. { twasmcasenode }
  30. twasmcasenode = class(tcgcasenode)
  31. private
  32. function GetBranchLabel(Block: TNode; out _Label: TAsmLabel): Boolean;
  33. protected
  34. procedure genlinearlist(hp : pcaselabel);override;
  35. procedure genlinearcmplist(hp : pcaselabel);override;
  36. public
  37. procedure pass_generate_code;override;
  38. end;
  39. implementation
  40. uses
  41. globtype,globals,
  42. cpubase,
  43. cgbase,cgutils,
  44. aasmdata,aasmcpu,
  45. hlcgobj,hlcgcpu,
  46. nbas,
  47. symtype,
  48. pass_2,defutil,verbose,constexp;
  49. {*****************************************************************************
  50. TWASMINNODE
  51. *****************************************************************************}
  52. function twasminnode.checkgenjumps(out setparts: Tsetparts; out numparts: byte; out use_small: boolean): boolean;
  53. begin
  54. { call inherited to initialize use_small }
  55. inherited;
  56. result:=false;
  57. end;
  58. {*****************************************************************************
  59. TWASMCASENODE
  60. *****************************************************************************}
  61. function twasmcasenode.GetBranchLabel(Block: TNode; out _Label: TAsmLabel): Boolean;
  62. begin
  63. Result := True;
  64. if not Assigned(Block) then
  65. begin
  66. { Block doesn't exist / is empty }
  67. _Label := endlabel;
  68. Exit;
  69. end;
  70. { These optimisations aren't particularly debugger friendly }
  71. if not (cs_opt_level2 in current_settings.optimizerswitches) then
  72. begin
  73. Result := False;
  74. current_asmdata.getjumplabel(_Label);
  75. current_asmdata.CurrAsmList.concat(taicpu.op_none(a_block));
  76. Exit;
  77. end;
  78. while Assigned(Block) do
  79. begin
  80. case Block.nodetype of
  81. nothingn:
  82. begin
  83. _Label := endlabel;
  84. Exit;
  85. end;
  86. blockn:
  87. begin
  88. Block := TBlockNode(Block).Left;
  89. Continue;
  90. end;
  91. statementn:
  92. begin
  93. { If the right node is assigned, then it's a compound block
  94. that can't be simplified, so fall through, set Result to
  95. False and make a new label }
  96. if Assigned(TStatementNode(Block).right) then
  97. Break;
  98. Block := TStatementNode(Block).Left;
  99. Continue;
  100. end;
  101. else
  102. ;
  103. end;
  104. Break;
  105. end;
  106. { Create unique label }
  107. Result := False;
  108. current_asmdata.CurrAsmList.concat(taicpu.op_none(a_block));
  109. current_asmdata.getjumplabel(_Label);
  110. end;
  111. procedure twasmcasenode.genlinearlist(hp: pcaselabel);
  112. var
  113. first : boolean;
  114. last : TConstExprInt;
  115. scratch_reg: tregister;
  116. newsize: tcgsize;
  117. newdef: tdef;
  118. procedure gensub(value:tcgint);
  119. begin
  120. { here, since the sub and cmp are separate we need
  121. to move the result before subtract to help
  122. the register allocator
  123. }
  124. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList, opsize, opsize, hregister, scratch_reg);
  125. hlcg.a_op_const_reg(current_asmdata.CurrAsmList, OP_SUB, opsize, value, hregister);
  126. end;
  127. procedure genitem(t : pcaselabel);
  128. begin
  129. if assigned(t^.less) then
  130. genitem(t^.less);
  131. { do we need to test the first value? }
  132. if first and (t^._low>get_min_value(left.resultdef)) then
  133. thlcgwasm(hlcg).a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,jmp_lt,tcgint(t^._low.svalue),hregister,elselabel);
  134. if t^._low=t^._high then
  135. begin
  136. if t^._low-last=0 then
  137. thlcgwasm(hlcg).a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,OC_EQ,0,hregister,blocklabel(t^.blockid))
  138. else
  139. begin
  140. gensub(tcgint(t^._low.svalue-last.svalue));
  141. thlcgwasm(hlcg).a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,
  142. OC_EQ,tcgint(t^._low.svalue-last.svalue),scratch_reg,blocklabel(t^.blockid));
  143. end;
  144. last:=t^._low;
  145. end
  146. else
  147. begin
  148. { it begins with the smallest label, if the value }
  149. { is even smaller then jump immediately to the }
  150. { ELSE-label }
  151. if first then
  152. begin
  153. { have we to ajust the first value ? }
  154. if (t^._low>get_min_value(left.resultdef)) or (get_min_value(left.resultdef)<>0) then
  155. gensub(tcgint(t^._low.svalue));
  156. end
  157. else
  158. begin
  159. { if there is no unused label between the last and the }
  160. { present label then the lower limit can be checked }
  161. { immediately. else check the range in between: }
  162. gensub(tcgint(t^._low.svalue-last.svalue));
  163. thlcgwasm(hlcg).a_cmp_const_reg_label(current_asmdata.CurrAsmList, opsize,jmp_lt,tcgint(t^._low.svalue-last.svalue),scratch_reg,elselabel);
  164. end;
  165. gensub(tcgint(t^._high.svalue-t^._low.svalue));
  166. thlcgwasm(hlcg).a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,jmp_le,tcgint(t^._high.svalue-t^._low.svalue),scratch_reg,blocklabel(t^.blockid));
  167. last:=t^._high;
  168. end;
  169. first:=false;
  170. if assigned(t^.greater) then
  171. genitem(t^.greater);
  172. end;
  173. begin
  174. { do we need to generate cmps? }
  175. if (with_sign and (min_label<0)) then
  176. genlinearcmplist(hp)
  177. else
  178. begin
  179. { sign/zero extend the value to a full register before starting to
  180. subtract values, so that on platforms that don't have
  181. subregisters of the same size as the value we don't generate
  182. sign/zero-extensions after every subtraction
  183. make newsize always signed, since we only do this if the size in
  184. bytes of the register is larger than the original opsize, so
  185. the value can always be represented by a larger signed type }
  186. newsize:=tcgsize2signed[reg_cgsize(hregister)];
  187. if tcgsize2size[newsize]>opsize.size then
  188. begin
  189. newdef:=cgsize_orddef(newsize);
  190. scratch_reg:=hlcg.getintregister(current_asmdata.CurrAsmList,newdef);
  191. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,newdef,hregister,scratch_reg);
  192. hregister:=scratch_reg;
  193. opsize:=newdef;
  194. end;
  195. if (labelcnt>1) or not(cs_opt_level1 in current_settings.optimizerswitches) then
  196. begin
  197. last:=0;
  198. first:=true;
  199. scratch_reg:=hlcg.getintregister(current_asmdata.CurrAsmList,opsize);
  200. genitem(hp);
  201. end
  202. else
  203. begin
  204. { If only one label exists, we can greatly simplify the checks to a simple comparison }
  205. if hp^._low=hp^._high then
  206. thlcgwasm(hlcg).a_cmp_const_reg_label(current_asmdata.CurrAsmList, opsize, OC_EQ, tcgint(hp^._low.svalue), hregister,blocklabel(hp^.blockid))
  207. else
  208. begin
  209. scratch_reg:=hlcg.getintregister(current_asmdata.CurrAsmList,opsize);
  210. gensub(tcgint(hp^._low.svalue));
  211. thlcgwasm(hlcg).a_cmp_const_reg_label(current_asmdata.CurrAsmList, opsize, OC_BE, tcgint(hp^._high.svalue-hp^._low.svalue), hregister,blocklabel(hp^.blockid))
  212. end;
  213. end;
  214. current_asmdata.CurrAsmList.concat(taicpu.op_sym(a_br,elselabel));
  215. end;
  216. end;
  217. procedure twasmcasenode.genlinearcmplist(hp : pcaselabel);
  218. var
  219. last : TConstExprInt;
  220. lastwasrange: boolean;
  221. procedure genitem(t : pcaselabel);
  222. begin
  223. if assigned(t^.less) then
  224. genitem(t^.less);
  225. if t^._low=t^._high then
  226. begin
  227. thlcgwasm(hlcg).a_cmp_const_reg_label(current_asmdata.CurrAsmList, opsize, OC_EQ, tcgint(t^._low.svalue),hregister, blocklabel(t^.blockid));
  228. { Reset last here, because we've only checked for one value and need to compare
  229. for the next range both the lower and upper bound }
  230. lastwasrange := false;
  231. end
  232. else
  233. begin
  234. { it begins with the smallest label, if the value }
  235. { is even smaller then jump immediately to the }
  236. { ELSE-label }
  237. if not lastwasrange or (t^._low-last>1) then
  238. thlcgwasm(hlcg).a_cmp_const_reg_label(current_asmdata.CurrAsmList, opsize, jmp_lt, tcgint(t^._low.svalue), hregister, elselabel);
  239. thlcgwasm(hlcg).a_cmp_const_reg_label(current_asmdata.CurrAsmList, opsize, jmp_le, tcgint(t^._high.svalue), hregister, blocklabel(t^.blockid));
  240. last:=t^._high;
  241. lastwasrange := true;
  242. end;
  243. if assigned(t^.greater) then
  244. genitem(t^.greater);
  245. end;
  246. begin
  247. last:=0;
  248. lastwasrange:=false;
  249. genitem(hp);
  250. current_asmdata.CurrAsmList.concat(taicpu.op_sym(a_br,elselabel));
  251. end;
  252. procedure twasmcasenode.pass_generate_code;
  253. var
  254. oldflowcontrol: tflowcontrol;
  255. ShortcutElse: Boolean;
  256. i: Integer;
  257. begin
  258. location_reset(location,LOC_VOID,OS_NO);
  259. oldflowcontrol := flowcontrol;
  260. include(flowcontrol,fc_inflowcontrol);
  261. current_asmdata.getjumplabel(endlabel);
  262. current_asmdata.CurrAsmList.concat(taicpu.op_none(a_block));
  263. { Do some optimisation to deal with empty else blocks }
  264. ShortcutElse := GetBranchLabel(elseblock, elselabel);
  265. for i:=blocks.count-1 downto 0 do
  266. with pcaseblock(blocks[i])^ do
  267. shortcut := GetBranchLabel(statement, blocklabel);
  268. with_sign:=is_signed(left.resultdef);
  269. if with_sign then
  270. begin
  271. jmp_gt:=OC_GT;
  272. jmp_lt:=OC_LT;
  273. jmp_le:=OC_LTE;
  274. end
  275. else
  276. begin
  277. jmp_gt:=OC_A;
  278. jmp_lt:=OC_B;
  279. jmp_le:=OC_BE;
  280. end;
  281. secondpass(left);
  282. if (left.expectloc=LOC_JUMP)<>
  283. (left.location.loc=LOC_JUMP) then
  284. internalerror(2006050501);
  285. { determines the size of the operand }
  286. opsize:=left.resultdef;
  287. { copy the case expression to a register }
  288. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opsize,false);
  289. {$if not defined(cpu64bitalu)}
  290. if def_cgsize(opsize) in [OS_S64,OS_64] then
  291. begin
  292. hregister:=left.location.register64.reglo;
  293. hregister2:=left.location.register64.reghi;
  294. end
  295. else
  296. {$endif not cpu64bitalu and not cpuhighleveltarget}
  297. hregister:=left.location.register;
  298. { we need the min_label always to choose between }
  299. { cmps and subs/decs }
  300. min_label:=case_get_min(labels);
  301. { Generate the jumps }
  302. {$if not defined(cpu64bitalu)}
  303. if def_cgsize(opsize) in [OS_64,OS_S64] then
  304. genlinearcmplist(labels)
  305. else
  306. {$endif not cpu64bitalu and not cpuhighleveltarget}
  307. begin
  308. //if cs_opt_level1 in current_settings.optimizerswitches then
  309. // begin
  310. // { procedures are empirically passed on }
  311. // { consumption can also be calculated }
  312. // { but does it pay on the different }
  313. // { processors? }
  314. // { moreover can the size only be appro- }
  315. // { ximated as it is not known if rel8, }
  316. // { rel16 or rel32 jumps are used }
  317. //
  318. // max_label := case_get_max(labels);
  319. //
  320. // { can we omit the range check of the jump table ? }
  321. // getrange(left.resultdef,lv,hv);
  322. // jumptable_no_range:=(lv=min_label) and (hv=max_label);
  323. //
  324. // distv:=max_label-min_label;
  325. // if distv>=0 then
  326. // dist:=distv.uvalue
  327. // else
  328. // dist:=asizeuint(-distv.svalue);
  329. //
  330. // { optimize for size ? }
  331. // if cs_opt_size in current_settings.optimizerswitches then
  332. // begin
  333. // if has_jumptable and
  334. // (min_label>=int64(low(aint))) and
  335. // (max_label<=high(aint)) and
  336. // not((labelcnt<=2) or
  337. // (distv.svalue<0) or
  338. // (dist>3*labelcnt)) then
  339. // begin
  340. // { if the labels less or more a continuum then }
  341. // genjumptable(labels,min_label.svalue,max_label.svalue);
  342. // end
  343. // else
  344. // begin
  345. // { a linear list is always smaller than a jump tree }
  346. // genlinearlist(labels);
  347. // end;
  348. // end
  349. // else
  350. // begin
  351. // max_dist:=4*labelcoverage;
  352. //
  353. // { Don't allow jump tables to get too large }
  354. // if max_dist>4*labelcnt then
  355. // max_dist:=min(max_dist,2048);
  356. //
  357. // if jumptable_no_range then
  358. // max_linear_list:=4
  359. // else
  360. // max_linear_list:=2;
  361. //
  362. // { allow processor specific values }
  363. // optimizevalues(max_linear_list,max_dist);
  364. //
  365. // if (labelcnt<=max_linear_list) then
  366. // genlinearlist(labels)
  367. // else
  368. // begin
  369. // if (has_jumptable) and
  370. // (dist<max_dist) and
  371. // (min_label>=int64(low(aint))) and
  372. // (max_label<=high(aint)) then
  373. // genjumptable(labels,min_label.svalue,max_label.svalue)
  374. // { value has been determined on an i7-4770 using a random case with random values
  375. // if more values are known, this can be handled depending on the target CPU
  376. //
  377. // Testing on a Core 2 Duo E6850 as well as on a Raspi3 showed also, that 64 is
  378. // a good value }
  379. // else if labelcnt>=64 then
  380. // genjmptree(labels)
  381. // else
  382. // genlinearlist(labels);
  383. // end;
  384. // end;
  385. // end
  386. //else
  387. { it's always not bad }
  388. genlinearlist(labels);
  389. end;
  390. { generate the instruction blocks }
  391. for i:=0 to blocks.count-1 do with pcaseblock(blocks[i])^ do
  392. begin
  393. { If the labels are not equal, then the block label has been shortcut to point elsewhere,
  394. so there's no need to implement it }
  395. if not shortcut then
  396. begin
  397. current_asmdata.CurrAsmList.concat(taicpu.op_none(a_end_block));
  398. hlcg.a_label(current_asmdata.CurrAsmList,blocklabel);
  399. secondpass(statement);
  400. { don't come back to case line }
  401. current_filepos:=current_asmdata.CurrAsmList.getlasttaifilepos^;
  402. current_asmdata.CurrAsmList.concat(taicpu.op_sym(a_br,endlabel));
  403. end;
  404. end;
  405. { ...and the else block }
  406. if not ShortcutElse then
  407. begin
  408. current_asmdata.CurrAsmList.concat(taicpu.op_none(a_end_block));
  409. hlcg.a_label(current_asmdata.CurrAsmList,elselabel);
  410. end;
  411. if Assigned(elseblock) then
  412. begin
  413. secondpass(elseblock);
  414. end;
  415. current_asmdata.CurrAsmList.concat(taicpu.op_none(a_end_block));
  416. hlcg.a_label(current_asmdata.CurrAsmList,endlabel);
  417. flowcontrol := oldflowcontrol + (flowcontrol - [fc_inflowcontrol]);
  418. end;
  419. begin
  420. cinnode:=twasminnode;
  421. ccasenode:=twasmcasenode;
  422. end.