rgcpu.pas 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468
  1. {
  2. Copyright (c) 2019 by Dmitry Boyarintsev
  3. This unit implements the WebAssembly specific class for the register
  4. allocator
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************}
  17. unit rgcpu;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. cclasses,
  22. aasmbase,aasmcpu,aasmtai,aasmdata,
  23. cgbase,cgutils, procinfo,
  24. cpubase,
  25. rgobj, tgcpu;
  26. type
  27. tspilltemps = array[tregistertype] of ^Tspill_temp_list;
  28. { trgcpu }
  29. trgcpu=class(trgobj)
  30. protected
  31. class procedure do_spill_replace_all(list:TAsmList;instr:taicpu;const spilltemps: tspilltemps);
  32. class procedure remove_dummy_load_stores(list: TAsmList; headertai: tai);
  33. public
  34. { performs the register allocation for *all* register types }
  35. class procedure do_all_register_allocation(list: TAsmList; headertai: tai);
  36. end;
  37. implementation
  38. uses
  39. verbose,cutils,
  40. globtype,globals,
  41. cgobj,
  42. tgobj;
  43. { trgcpu }
  44. class procedure trgcpu.do_spill_replace_all(list:TAsmList;instr:taicpu;const spilltemps: tspilltemps);
  45. var
  46. l: longint;
  47. reg: tregister;
  48. begin
  49. { WebAssebly instructions never have more than one memory (virtual register)
  50. operand, so there is no danger of superregister conflicts }
  51. for l:=0 to instr.ops-1 do
  52. if instr.oper[l]^.typ=top_reg then
  53. begin
  54. reg:=instr.oper[l]^.reg;
  55. instr.loadref(l,spilltemps[getregtype(reg)]^[getsupreg(reg)]);
  56. end;
  57. end;
  58. class procedure trgcpu.remove_dummy_load_stores(list: TAsmList; headertai: tai);
  59. type
  60. taitypeset = set of taitype;
  61. function nextskipping(p: tai; const skip: taitypeset): tai;
  62. begin
  63. result:=p;
  64. if not assigned(result) then
  65. exit;
  66. repeat
  67. result:=tai(result.next);
  68. until not assigned(result) or
  69. not(result.typ in skip);
  70. end;
  71. function issimpleregstore(p: tai; var reg: tregister; doubleprecisionok: boolean): boolean;
  72. const
  73. simplestoressp = [a_f32_store];
  74. simplestoresdp = [a_f64_store];
  75. begin
  76. result:=
  77. assigned(p) and
  78. (p.typ=ait_instruction) and
  79. ((taicpu(p).opcode in simplestoressp) or
  80. (doubleprecisionok and
  81. (taicpu(p).opcode in simplestoresdp))) and
  82. ((reg=NR_NO) or
  83. (taicpu(p).oper[0]^.typ=top_reg) and
  84. (taicpu(p).oper[0]^.reg=reg));
  85. if result and
  86. (reg=NR_NO) then
  87. reg:=taicpu(p).oper[0]^.reg;
  88. end;
  89. function issimpleregload(p: tai; var reg: tregister; doubleprecisionok: boolean): boolean;
  90. const
  91. simpleloadssp = [a_f32_load];
  92. simpleloadsdp = [a_f64_load];
  93. begin
  94. result:=
  95. assigned(p) and
  96. (p.typ=ait_instruction) and
  97. ((taicpu(p).opcode in simpleloadssp) or
  98. (doubleprecisionok and
  99. (taicpu(p).opcode in simpleloadsdp))) and
  100. ((reg=NR_NO) or
  101. (taicpu(p).oper[0]^.typ=top_reg) and
  102. (taicpu(p).oper[0]^.reg=reg));
  103. if result and
  104. (reg=NR_NO) then
  105. reg:=taicpu(p).oper[0]^.reg;
  106. end;
  107. function isregallocoftyp(p: tai; typ: TRegAllocType;var reg: tregister): boolean;
  108. begin
  109. result:=
  110. assigned(p) and
  111. (p.typ=ait_regalloc) and
  112. (tai_regalloc(p).ratype=typ);
  113. if result then
  114. if reg=NR_NO then
  115. reg:=tai_regalloc(p).reg
  116. else
  117. result:=tai_regalloc(p).reg=reg;
  118. end;
  119. function regininstruction(p: tai; reg: tregister): boolean;
  120. var
  121. sr: tsuperregister;
  122. i: longint;
  123. begin
  124. result:=false;
  125. if p.typ<>ait_instruction then
  126. exit;
  127. sr:=getsupreg(reg);
  128. for i:=0 to taicpu(p).ops-1 do
  129. case taicpu(p).oper[0]^.typ of
  130. top_reg:
  131. if (getsupreg(taicpu(p).oper[0]^.reg)=sr) then
  132. exit(true);
  133. top_ref:
  134. begin
  135. if (getsupreg(taicpu(p).oper[0]^.ref^.base)=sr) then
  136. exit(true);
  137. if (getsupreg(taicpu(p).oper[0]^.ref^.index)=sr) then
  138. exit(true);
  139. end;
  140. else
  141. ;
  142. end;
  143. end;
  144. function try_remove_store_dealloc_load(var p: tai): boolean;
  145. var
  146. dealloc,
  147. load: tai;
  148. reg: tregister;
  149. begin
  150. result:=false;
  151. { check for:
  152. store regx
  153. dealloc regx
  154. load regx
  155. and remove. We don't have to check that the load/store
  156. types match, because they have to for this to be
  157. valid WebAssembly code }
  158. dealloc:=nextskipping(p,[ait_comment,ait_tempalloc]);
  159. load:=nextskipping(dealloc,[ait_comment,ait_tempalloc]);
  160. reg:=NR_NO;
  161. if issimpleregstore(p,reg,true) and
  162. isregallocoftyp(dealloc,ra_dealloc,reg) and
  163. issimpleregload(load,reg,true) then
  164. begin
  165. { remove the whole sequence: the store }
  166. list.remove(p);
  167. p.free;
  168. p:=Tai(load.next);
  169. { the load }
  170. list.remove(load);
  171. load.free;
  172. result:=true;
  173. end;
  174. end;
  175. function try_swap_store_x_load(var p: tai): boolean;
  176. var
  177. insertpos,
  178. storex,
  179. deallocy,
  180. loady,
  181. deallocx,
  182. loadx: tai;
  183. swapxy: taicpu;
  184. regx, regy: tregister;
  185. begin
  186. result:=false;
  187. { check for:
  188. alloc regx (optional)
  189. store regx (p)
  190. dealloc regy
  191. load regy
  192. dealloc regx
  193. load regx
  194. and change to
  195. dealloc regy
  196. load regy
  197. swap
  198. alloc regx (if it existed)
  199. store regx
  200. dealloc regx
  201. load regx
  202. This will create opportunities to remove the store/load regx
  203. (and possibly also for regy)
  204. }
  205. //regx:=NR_NO;
  206. //regy:=NR_NO;
  207. //if not issimpleregstore(p,regx,false) then
  208. // exit;
  209. //storex:=p;
  210. //deallocy:=nextskipping(storex,[ait_comment,ait_tempalloc]);
  211. //loady:=nextskipping(deallocy,[ait_comment,ait_tempalloc]);
  212. //deallocx:=nextskipping(loady,[ait_comment,ait_tempalloc]);
  213. //loadx:=nextskipping(deallocx,[ait_comment,ait_tempalloc]);
  214. //if not assigned(loadx) then
  215. // exit;
  216. //if not issimpleregload(loady,regy,false) then
  217. // exit;
  218. //if not issimpleregload(loadx,regx,false) then
  219. // exit;
  220. //if not isregallocoftyp(deallocy,ra_dealloc,regy) then
  221. // exit;
  222. //if not isregallocoftyp(deallocx,ra_dealloc,regx) then
  223. // exit;
  224. //insertpos:=tai(p.previous);
  225. //if not assigned(insertpos) or
  226. // not isregallocoftyp(insertpos,ra_alloc,regx) then
  227. // insertpos:=storex;
  228. //list.remove(deallocy);
  229. //list.insertbefore(deallocy,insertpos);
  230. //list.remove(loady);
  231. //list.insertbefore(loady,insertpos);
  232. //swapxy:=taicpu.op_none(a_swap);
  233. //swapxy.fileinfo:=taicpu(loady).fileinfo;
  234. //list.insertbefore(swapxy,insertpos);
  235. //result:=true;
  236. end;
  237. var
  238. p,next,nextnext: tai;
  239. reg: tregister;
  240. removedsomething: boolean;
  241. begin
  242. repeat
  243. removedsomething:=false;
  244. p:=headertai;
  245. while assigned(p) do
  246. begin
  247. case p.typ of
  248. ait_regalloc:
  249. begin
  250. reg:=NR_NO;
  251. next:=nextskipping(p,[ait_comment,ait_tempalloc]);
  252. nextnext:=nextskipping(next,[ait_comment,ait_regalloc]);
  253. if assigned(nextnext) then
  254. begin
  255. { remove
  256. alloc reg
  257. dealloc reg
  258. (can appear after optimisations, necessary to prevent
  259. useless stack slot allocations) }
  260. if isregallocoftyp(p,ra_alloc,reg) and
  261. isregallocoftyp(next,ra_dealloc,reg) and
  262. not regininstruction(nextnext,reg) then
  263. begin
  264. list.remove(p);
  265. p.free;
  266. p:=tai(next.next);
  267. list.remove(next);
  268. next.free;
  269. removedsomething:=true;
  270. continue;
  271. end;
  272. end;
  273. end;
  274. ait_instruction:
  275. begin
  276. if try_remove_store_dealloc_load(p) or
  277. try_swap_store_x_load(p) then
  278. begin
  279. removedsomething:=true;
  280. continue;
  281. end;
  282. end;
  283. else
  284. ;
  285. end;
  286. p:=tai(p.next);
  287. end;
  288. until not removedsomething;
  289. end;
  290. function registertobastype(const reg: TRegister): TWasmBasicType;
  291. begin
  292. case getregtype(reg) of
  293. R_INTREGISTER:
  294. if getsubreg(reg)=R_SUBD then
  295. registertobastype:=wbt_i32
  296. else
  297. registertobastype:=wbt_i64;
  298. R_ADDRESSREGISTER:
  299. registertobastype:=wbt_i32;
  300. R_FPUREGISTER:
  301. if getsubreg(reg)=R_SUBFS then
  302. registertobastype:=wbt_f32
  303. else
  304. registertobastype:=wbt_f64
  305. else
  306. internalerror(2010122912);
  307. end;
  308. end;
  309. class procedure trgcpu.do_all_register_allocation(list: TAsmList; headertai: tai);
  310. var
  311. spill_temps : tspilltemps;
  312. templist : TAsmList;
  313. intrg,
  314. fprg : trgcpu;
  315. p,q : tai;
  316. size : longint;
  317. insbefore : TLinkedListItem;
  318. lastins : TLinkedListItem;
  319. //locavail : array[TWasmBasicType] of tlocalalloc; // used or not
  320. wbt : TWasmBasicType;
  321. ra : tai_regalloc;
  322. idx : integer;
  323. fidx : integer;
  324. pidx : integer;
  325. t: treftemppos;
  326. begin
  327. { Since there are no actual registers, we simply spill everything. We
  328. use tt_regallocator temps, which are not used by the temp allocator
  329. during code generation, so that we cannot accidentally overwrite
  330. any temporary values }
  331. { get references to all register allocators }
  332. intrg:=trgcpu(cg.rg[R_INTREGISTER]);
  333. fprg:=trgcpu(cg.rg[R_FPUREGISTER]);
  334. { determine the live ranges of all registers }
  335. intrg.insert_regalloc_info_all(list);
  336. fprg.insert_regalloc_info_all(list);
  337. { Don't do the actual allocation when -sr is passed }
  338. if (cs_no_regalloc in current_settings.globalswitches) then
  339. exit;
  340. { remove some simple useless store/load sequences }
  341. remove_dummy_load_stores(list,headertai);
  342. { allocate room to store the virtual register -> temp mapping }
  343. spill_temps[R_INTREGISTER]:=allocmem(sizeof(treference)*intrg.maxreg);
  344. spill_temps[R_FPUREGISTER]:=allocmem(sizeof(treference)*fprg.maxreg);
  345. { List to insert temp allocations into }
  346. templist:=TAsmList.create;
  347. { allocate/replace all registers }
  348. p:=headertai;
  349. insbefore := nil;
  350. while assigned(p) do
  351. begin
  352. case p.typ of
  353. ait_regalloc:
  354. begin
  355. ra := tai_regalloc(p);
  356. wbt := registertobastype(ra.reg);
  357. case getregtype(ra.reg) of
  358. R_INTREGISTER:
  359. if getsubreg(ra.reg)=R_SUBD then
  360. size:=4
  361. else
  362. size:=8;
  363. R_ADDRESSREGISTER:
  364. size:=4;
  365. R_FPUREGISTER:
  366. if getsubreg(ra.reg)=R_SUBFS then
  367. size:=4
  368. else
  369. size:=8;
  370. else
  371. internalerror(2010122912);
  372. end;
  373. case ra.ratype of
  374. ra_alloc :
  375. begin
  376. ttgwasm(tg).allocLocalVarToRef(wbt, spill_temps[getregtype(ra.reg)]^[getsupreg(ra.reg)]);
  377. (*wasmloc.
  378. pidx := fidx;
  379. idx := wasmloc.alloc(wbt);
  380. if idx<0 then
  381. internalerror(201909173); // ran out of local variables! ... must be dynamic
  382. //spill_temps[getregtype(ra.reg)]^[getsupreg(ra.reg)].temppos := idx;
  383. //spill_temps[getregtype(ra.reg)]^[getsupreg(ra.reg)].isfloat := true;
  384. //tg.gettemp(templist,
  385. //size,1,
  386. //tt_regallocator,spill_temps[getregtype(ra.reg)]^[getsupreg(ra.reg)]);
  387. t.val:=idx;
  388. reference_reset_base(spill_temps[getregtype(ra.reg)]^[getsupreg(ra.reg)],current_procinfo.framepointer,idx,t,size,[]);
  389. wasm
  390. if fidx<>pidx then // new local variable allocated
  391. templist.Concat( tai_local.create(wbt));*)
  392. end;
  393. ra_dealloc :
  394. begin
  395. ttgwasm(tg).deallocLocalVar(wbt, spill_temps[getregtype(ra.reg)]^[getsupreg(ra.reg)].offset);
  396. //tg.ungettemp(templist,spill_temps[getregtype(ra.reg)]^[getsupreg(ra.reg)]);
  397. { don't invalidate the temp reference, may still be used one instruction
  398. later }
  399. end;
  400. else
  401. ;
  402. end;
  403. { insert the tempallocation/free at the right place }
  404. { remove the register allocation info for the register
  405. (p.previous is valid because we just inserted the temp
  406. allocation/free before p) }
  407. q:=Tai(p.previous);
  408. list.remove(p);
  409. p.free;
  410. p:=q;
  411. end;
  412. ait_instruction:
  413. do_spill_replace_all(list,taicpu(p),spill_temps);
  414. else
  415. ;
  416. end;
  417. p:=Tai(p.next);
  418. end;
  419. if templist.count>0 then
  420. list.insertListBefore(nil, templist);
  421. freemem(spill_temps[R_INTREGISTER]);
  422. freemem(spill_temps[R_FPUREGISTER]);
  423. templist.free;
  424. end;
  425. end.