2
0

rgcpu.pas 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. {
  2. Copyright (c) 2019 by Dmitry Boyarintsev
  3. This unit implements the WebAssembly specific class for the register
  4. allocator
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************}
  17. unit rgcpu;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. cclasses,
  22. aasmbase,aasmcpu,aasmtai,aasmdata,
  23. cgbase,cgutils, procinfo,
  24. cpubase,
  25. rgobj;
  26. type
  27. tspilltemps = array[tregistertype] of ^Tspill_temp_list;
  28. { trgcpu }
  29. trgcpu=class(trgobj)
  30. protected
  31. class procedure do_spill_replace_all(list:TAsmList;instr:taicpu;const spilltemps: tspilltemps);
  32. class procedure remove_dummy_load_stores(list: TAsmList; headertai: tai);
  33. public
  34. { performs the register allocation for *all* register types }
  35. class procedure do_all_register_allocation(list: TAsmList; headertai: tai);
  36. end;
  37. implementation
  38. uses
  39. verbose,cutils,
  40. globtype,globals,
  41. cgobj,
  42. tgobj,
  43. symtype,symdef,symcpu;
  44. { trgcpu }
  45. class procedure trgcpu.do_spill_replace_all(list:TAsmList;instr:taicpu;const spilltemps: tspilltemps);
  46. var
  47. l: longint;
  48. reg: tregister;
  49. begin
  50. { WebAssebly instructions never have more than one memory (virtual register)
  51. operand, so there is no danger of superregister conflicts }
  52. for l:=0 to instr.ops-1 do
  53. if (instr.oper[l]^.typ=top_reg) and (instr.oper[l]^.reg<>NR_LOCAL_FRAME_POINTER_REG) then
  54. begin
  55. reg:=instr.oper[l]^.reg;
  56. instr.loadref(l,spilltemps[getregtype(reg)]^[getsupreg(reg)]);
  57. end;
  58. end;
  59. class procedure trgcpu.remove_dummy_load_stores(list: TAsmList; headertai: tai);
  60. type
  61. taitypeset = set of taitype;
  62. function nextskipping(p: tai; const skip: taitypeset): tai;
  63. begin
  64. result:=p;
  65. if not assigned(result) then
  66. exit;
  67. repeat
  68. result:=tai(result.next);
  69. until not assigned(result) or
  70. not(result.typ in skip);
  71. end;
  72. function issimpleregstore(p: tai; var reg: tregister; doubleprecisionok: boolean): boolean;
  73. const
  74. simplestoressp = [a_f32_store];
  75. simplestoresdp = [a_f64_store];
  76. begin
  77. result:=
  78. assigned(p) and
  79. (p.typ=ait_instruction) and
  80. ((taicpu(p).opcode in simplestoressp) or
  81. (doubleprecisionok and
  82. (taicpu(p).opcode in simplestoresdp))) and
  83. ((reg=NR_NO) or
  84. (taicpu(p).oper[0]^.typ=top_reg) and
  85. (taicpu(p).oper[0]^.reg=reg));
  86. if result and
  87. (reg=NR_NO) then
  88. reg:=taicpu(p).oper[0]^.reg;
  89. end;
  90. function issimpleregload(p: tai; var reg: tregister; doubleprecisionok: boolean): boolean;
  91. const
  92. simpleloadssp = [a_f32_load];
  93. simpleloadsdp = [a_f64_load];
  94. begin
  95. result:=
  96. assigned(p) and
  97. (p.typ=ait_instruction) and
  98. ((taicpu(p).opcode in simpleloadssp) or
  99. (doubleprecisionok and
  100. (taicpu(p).opcode in simpleloadsdp))) and
  101. ((reg=NR_NO) or
  102. (taicpu(p).oper[0]^.typ=top_reg) and
  103. (taicpu(p).oper[0]^.reg=reg));
  104. if result and
  105. (reg=NR_NO) then
  106. reg:=taicpu(p).oper[0]^.reg;
  107. end;
  108. function isregallocoftyp(p: tai; typ: TRegAllocType;var reg: tregister): boolean;
  109. begin
  110. result:=
  111. assigned(p) and
  112. (p.typ=ait_regalloc) and
  113. (tai_regalloc(p).ratype=typ);
  114. if result then
  115. if reg=NR_NO then
  116. reg:=tai_regalloc(p).reg
  117. else
  118. result:=tai_regalloc(p).reg=reg;
  119. end;
  120. function regininstruction(p: tai; reg: tregister): boolean;
  121. var
  122. sr: tsuperregister;
  123. i: longint;
  124. begin
  125. result:=false;
  126. if p.typ<>ait_instruction then
  127. exit;
  128. sr:=getsupreg(reg);
  129. for i:=0 to taicpu(p).ops-1 do
  130. case taicpu(p).oper[0]^.typ of
  131. top_reg:
  132. if (getsupreg(taicpu(p).oper[0]^.reg)=sr) then
  133. exit(true);
  134. top_ref:
  135. begin
  136. if (getsupreg(taicpu(p).oper[0]^.ref^.base)=sr) then
  137. exit(true);
  138. if (getsupreg(taicpu(p).oper[0]^.ref^.index)=sr) then
  139. exit(true);
  140. end;
  141. else
  142. ;
  143. end;
  144. end;
  145. function try_remove_store_dealloc_load(var p: tai): boolean;
  146. var
  147. dealloc,
  148. load: tai;
  149. reg: tregister;
  150. begin
  151. result:=false;
  152. { check for:
  153. store regx
  154. dealloc regx
  155. load regx
  156. and remove. We don't have to check that the load/store
  157. types match, because they have to for this to be
  158. valid WebAssembly code }
  159. dealloc:=nextskipping(p,[ait_comment,ait_tempalloc]);
  160. load:=nextskipping(dealloc,[ait_comment,ait_tempalloc]);
  161. reg:=NR_NO;
  162. if issimpleregstore(p,reg,true) and
  163. isregallocoftyp(dealloc,ra_dealloc,reg) and
  164. issimpleregload(load,reg,true) then
  165. begin
  166. { remove the whole sequence: the store }
  167. list.remove(p);
  168. p.free;
  169. p:=Tai(load.next);
  170. { the load }
  171. list.remove(load);
  172. load.free;
  173. result:=true;
  174. end;
  175. end;
  176. function try_swap_store_x_load(var p: tai): boolean;
  177. var
  178. insertpos,
  179. storex,
  180. deallocy,
  181. loady,
  182. deallocx,
  183. loadx: tai;
  184. swapxy: taicpu;
  185. regx, regy: tregister;
  186. begin
  187. result:=false;
  188. { check for:
  189. alloc regx (optional)
  190. store regx (p)
  191. dealloc regy
  192. load regy
  193. dealloc regx
  194. load regx
  195. and change to
  196. dealloc regy
  197. load regy
  198. swap
  199. alloc regx (if it existed)
  200. store regx
  201. dealloc regx
  202. load regx
  203. This will create opportunities to remove the store/load regx
  204. (and possibly also for regy)
  205. }
  206. //regx:=NR_NO;
  207. //regy:=NR_NO;
  208. //if not issimpleregstore(p,regx,false) then
  209. // exit;
  210. //storex:=p;
  211. //deallocy:=nextskipping(storex,[ait_comment,ait_tempalloc]);
  212. //loady:=nextskipping(deallocy,[ait_comment,ait_tempalloc]);
  213. //deallocx:=nextskipping(loady,[ait_comment,ait_tempalloc]);
  214. //loadx:=nextskipping(deallocx,[ait_comment,ait_tempalloc]);
  215. //if not assigned(loadx) then
  216. // exit;
  217. //if not issimpleregload(loady,regy,false) then
  218. // exit;
  219. //if not issimpleregload(loadx,regx,false) then
  220. // exit;
  221. //if not isregallocoftyp(deallocy,ra_dealloc,regy) then
  222. // exit;
  223. //if not isregallocoftyp(deallocx,ra_dealloc,regx) then
  224. // exit;
  225. //insertpos:=tai(p.previous);
  226. //if not assigned(insertpos) or
  227. // not isregallocoftyp(insertpos,ra_alloc,regx) then
  228. // insertpos:=storex;
  229. //list.remove(deallocy);
  230. //list.insertbefore(deallocy,insertpos);
  231. //list.remove(loady);
  232. //list.insertbefore(loady,insertpos);
  233. //swapxy:=taicpu.op_none(a_swap);
  234. //swapxy.fileinfo:=taicpu(loady).fileinfo;
  235. //list.insertbefore(swapxy,insertpos);
  236. //result:=true;
  237. end;
  238. var
  239. p,next,nextnext: tai;
  240. reg: tregister;
  241. removedsomething: boolean;
  242. begin
  243. repeat
  244. removedsomething:=false;
  245. p:=headertai;
  246. while assigned(p) do
  247. begin
  248. case p.typ of
  249. ait_regalloc:
  250. begin
  251. reg:=NR_NO;
  252. next:=nextskipping(p,[ait_comment,ait_tempalloc]);
  253. nextnext:=nextskipping(next,[ait_comment,ait_regalloc]);
  254. if assigned(nextnext) then
  255. begin
  256. { remove
  257. alloc reg
  258. dealloc reg
  259. (can appear after optimisations, necessary to prevent
  260. useless stack slot allocations) }
  261. if isregallocoftyp(p,ra_alloc,reg) and
  262. isregallocoftyp(next,ra_dealloc,reg) and
  263. not regininstruction(nextnext,reg) then
  264. begin
  265. list.remove(p);
  266. p.free;
  267. p:=tai(next.next);
  268. list.remove(next);
  269. next.free;
  270. removedsomething:=true;
  271. continue;
  272. end;
  273. end;
  274. end;
  275. ait_instruction:
  276. begin
  277. if try_remove_store_dealloc_load(p) or
  278. try_swap_store_x_load(p) then
  279. begin
  280. removedsomething:=true;
  281. continue;
  282. end;
  283. end;
  284. else
  285. ;
  286. end;
  287. p:=tai(p.next);
  288. end;
  289. until not removedsomething;
  290. end;
  291. class procedure trgcpu.do_all_register_allocation(list: TAsmList; headertai: tai);
  292. var
  293. spill_temps : tspilltemps;
  294. templist : TAsmList;
  295. intrg,
  296. fprg : trgcpu;
  297. p,q : tai;
  298. size : longint;
  299. insbefore : TLinkedListItem;
  300. lastins : TLinkedListItem;
  301. //locavail : array[TWasmBasicType] of tlocalalloc; // used or not
  302. ra : tai_regalloc;
  303. idx : integer;
  304. fidx : integer;
  305. pidx : integer;
  306. t: treftemppos;
  307. def: tdef;
  308. begin
  309. { Since there are no actual registers, we simply spill everything. We
  310. use tt_regallocator temps, which are not used by the temp allocator
  311. during code generation, so that we cannot accidentally overwrite
  312. any temporary values }
  313. { get references to all register allocators }
  314. intrg:=trgcpu(cg.rg[R_INTREGISTER]);
  315. fprg:=trgcpu(cg.rg[R_FPUREGISTER]);
  316. { determine the live ranges of all registers }
  317. intrg.insert_regalloc_info_all(list);
  318. fprg.insert_regalloc_info_all(list);
  319. { Don't do the actual allocation when -sr is passed }
  320. if (cs_no_regalloc in current_settings.globalswitches) then
  321. exit;
  322. { remove some simple useless store/load sequences }
  323. remove_dummy_load_stores(list,headertai);
  324. { allocate room to store the virtual register -> temp mapping }
  325. spill_temps[R_INTREGISTER]:=allocmem(sizeof(treference)*intrg.maxreg);
  326. spill_temps[R_FPUREGISTER]:=allocmem(sizeof(treference)*fprg.maxreg);
  327. { List to insert temp allocations into }
  328. templist:=TAsmList.create;
  329. { allocate/replace all registers }
  330. p:=headertai;
  331. insbefore := nil;
  332. while assigned(p) do
  333. begin
  334. case p.typ of
  335. ait_regalloc:
  336. begin
  337. ra := tai_regalloc(p);
  338. case getregtype(ra.reg) of
  339. R_INTREGISTER:
  340. case getsubreg(ra.reg) of
  341. R_SUBD:
  342. begin
  343. size:=4;
  344. def:=s32inttype;
  345. end;
  346. R_SUBQ:
  347. begin
  348. size:=8;
  349. def:=s64inttype;
  350. end;
  351. else
  352. internalerror(2020120803);
  353. end;
  354. R_ADDRESSREGISTER:
  355. begin
  356. size:=4;
  357. def:=voidpointertype;
  358. end;
  359. R_FPUREGISTER:
  360. case getsubreg(ra.reg) of
  361. R_SUBFS:
  362. begin
  363. size:=4;
  364. def:=s32floattype;
  365. end;
  366. R_SUBFD:
  367. begin
  368. size:=8;
  369. def:=s64floattype;
  370. end;
  371. else
  372. internalerror(2020120804);
  373. end;
  374. else
  375. internalerror(2010122912);
  376. end;
  377. case ra.ratype of
  378. ra_alloc :
  379. tg.gethltemp(templist,def,
  380. size,
  381. tt_regallocator,spill_temps[getregtype(ra.reg)]^[getsupreg(ra.reg)]);
  382. ra_dealloc :
  383. begin
  384. tg.ungettemp(templist,spill_temps[getregtype(ra.reg)]^[getsupreg(ra.reg)]);
  385. { don't invalidate the temp reference, may still be used one instruction
  386. later }
  387. end;
  388. else
  389. ;
  390. end;
  391. { insert the tempallocation/free at the right place }
  392. { remove the register allocation info for the register
  393. (p.previous is valid because we just inserted the temp
  394. allocation/free before p) }
  395. q:=Tai(p.previous);
  396. list.remove(p);
  397. p.free;
  398. p:=q;
  399. end;
  400. ait_instruction:
  401. do_spill_replace_all(list,taicpu(p),spill_temps);
  402. else
  403. ;
  404. end;
  405. p:=Tai(p.next);
  406. end;
  407. if templist.count>0 then
  408. list.insertListBefore(nil, templist);
  409. freemem(spill_temps[R_INTREGISTER]);
  410. freemem(spill_temps[R_FPUREGISTER]);
  411. templist.free;
  412. end;
  413. end.