watscanner.pas 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486
  1. { This file is part of wasmbin - a collection of WebAssembly binary utils.
  2. Copyright (C) 2019, 2020 Dmitry Boyarintsev <[email protected]>
  3. Copyright (C) 2020 by the Free Pascal development team
  4. This source is free software; you can redistribute it and/or modify it under
  5. the terms of the GNU General Public License as published by the Free
  6. Software Foundation; either version 2 of the License, or (at your option)
  7. any later version.
  8. This code is distributed in the hope that it will be useful, but WITHOUT ANY
  9. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  10. FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  11. details.
  12. A copy of the GNU General Public License is available on the World Wide Web
  13. at <http://www.gnu.org/copyleft/gpl.html>. You can also obtain it by writing
  14. to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
  15. Boston, MA 02110-1335, USA.
  16. }
  17. unit watscanner;
  18. {$mode delphi}{$H+}
  19. interface
  20. uses
  21. SysUtils, Classes, parseutils, wasmtext;
  22. type
  23. TWatToken = (weNone, weError,
  24. weIdent,
  25. weString, weNumber, weOpenBrace, weCloseBrace,
  26. weAsmSymbol,
  27. weInstr,
  28. weFunc,
  29. weParam, weResult,
  30. weModule, weMut, weFuncRef,
  31. wei32, wei64,
  32. wef32, wef64,
  33. weType,
  34. weImport, weGlobal, weTable, weMemory, weLocal, weExport,
  35. weElem, weData, weOffset, weAlign, weEqual
  36. );
  37. // used only for weNumber
  38. TWatNumberFormat = (
  39. wnfNo, // other than number
  40. wnfInteger, // 00
  41. wnfHex, // 0xABC
  42. wnfFloat, // 0.000
  43. wnfFloatHex // 0x000.bced
  44. );
  45. THexStr = record
  46. num : QWord;
  47. frac : QWord;
  48. exp : integer;
  49. isNeg : Boolean;
  50. end;
  51. { TWatScanner }
  52. TWatScanner = class(TObject)
  53. protected
  54. procedure DoComment(ofs: Integer; const cmt: string); virtual;
  55. function CommentIsSymbol(const cmt: string): Boolean;
  56. public
  57. buf : string;
  58. idx : integer;
  59. instrCode : byte;
  60. ofs : integer;
  61. token : TWatToken;
  62. numformat : TWatNumberFormat;
  63. resText : string;
  64. asmCmd : string;
  65. skipAsmSym : Boolean;
  66. procedure SetSource(const abuf: string);
  67. function Next: Boolean;
  68. function resInt32(const def: integer=-1): Integer;
  69. function resWasmString: string;
  70. end;
  71. const
  72. // see Identifiers of Textual format
  73. IdStart = '$';
  74. IdBody = AlphaNumChars
  75. + [ '!' ,'#' ,'$' ,'%' ,'&' ,'''' ,'*'
  76. ,'+' ,'-' ,'.' ,'/' ,':' ,'<' ,'='
  77. ,'>' ,'?' ,'@' ,'\' ,'^' ,'_' ,'`'
  78. ,'|' ,'~'];
  79. GrammarChars = AlphaNumChars+['.','_'
  80. ,'/' // some old instructions are like that: "f32.reinterpret/i32"
  81. ];
  82. procedure GetGrammar(const txt: string; out entity: TWatToken; out instByte: byte);
  83. const
  84. KEY_MODULE = 'module';
  85. KEY_FUNC = 'func';
  86. KEY_FUNCREF = 'funcref';
  87. KEY_I32 = 'i32';
  88. KEY_I64 = 'i64';
  89. KEY_F32 = 'f32';
  90. KEY_F64 = 'f64';
  91. KEY_PARAM = 'param';
  92. KEY_RESULT = 'result';
  93. KEY_MUT = 'mut';
  94. KEY_TYPE = 'type';
  95. KEY_IMPORT = 'import';
  96. KEY_GLOBAL = 'global';
  97. KEY_TABLE = 'table';
  98. KEY_MEMORY = 'memory';
  99. KEY_LOCAL = 'local';
  100. KEY_EXPORT = 'export';
  101. KEY_ELEM = 'elem';
  102. KEY_DATA = 'data';
  103. KEY_OFFSET = 'offset';
  104. function ScanString(const buf: string; var idx: integer): string;
  105. function HexFloatStrToHexStr(const t: string; out hexStr: THexStr): Boolean;
  106. function HexFracToSingle(const num, frac: QWord; exp: Integer; isNeg: Boolean): Single;
  107. function HexFloatStrToSingle(const hexstr: string): Single;
  108. function HexFracToDouble(const num, frac: QWord; exp: Integer; neg: Boolean): Double;
  109. function HexFloatStrToDouble(const hexstr: string): double;
  110. implementation
  111. procedure GetGrammar(const txt: string; out entity: TWatToken; out instByte: byte);
  112. begin
  113. instByte:=0;
  114. entity:=weError;
  115. if txt='' then Exit;
  116. case txt[1] of
  117. 'a':
  118. if txt='anyfunc' then entity:=weFuncRef
  119. else if txt = 'align' then entity:=weAlign
  120. else if TextToInst(txt, instByte) then entity:=weInstr;
  121. 'd':
  122. if txt=KEY_DATA then entity:=weData
  123. else if TextToInst(txt, instByte) then entity:=weInstr;
  124. 'e':
  125. if txt=KEY_EXPORT then entity:=weExport
  126. else if txt=KEY_ELEM then entity:=weElem
  127. else if TextToInst(txt, instByte) then entity:=weInstr;
  128. 'i':
  129. if txt=KEY_I32 then entity:=wei32
  130. else if txt=KEY_I64 then entity:=wei64
  131. else if txt=KEY_IMPORT then entity:=weImport
  132. else if TextToInst(txt, instByte) then entity:=weInstr;
  133. 'g':
  134. if txt=KEY_GLOBAL then entity:=weGlobal
  135. else if TextToInst(txt, instByte) then entity:=weInstr;
  136. 'f':
  137. if txt=KEY_FUNC then entity:=weFunc
  138. else if txt=KEY_FUNCREF then entity:=weFuncRef
  139. else if txt=KEY_F32 then entity:=wef32
  140. else if txt=KEY_F64 then entity:=wef64
  141. else if TextToInst(txt, instByte) then entity:=weInstr;
  142. 'l':
  143. if txt=KEY_LOCAL then entity:=weLocal
  144. else if TextToInst(txt, instByte) then entity:=weInstr;
  145. 'm':
  146. if txt=KEY_MODULE then entity:=weModule
  147. else if txt = KEY_MUT then entity:=weMut
  148. else if txt = KEY_MEMORY then entity:=weMemory
  149. else if TextToInst(txt, instByte) then entity:=weInstr;
  150. 'o':
  151. if txt=KEY_OFFSET then entity:=weOffset
  152. else if TextToInst(txt, instByte) then entity:=weInstr;
  153. 'p':
  154. if txt=KEY_PARAM then entity:=weParam
  155. else if TextToInst(txt, instByte) then entity:=weInstr;
  156. 'r':
  157. if txt=KEY_RESULT then entity:=weResult
  158. else if TextToInst(txt, instByte) then entity:=weInstr;
  159. 't':
  160. if txt=KEY_TYPE then entity:=weType
  161. else if txt=KEY_TABLE then entity:=weTable
  162. else if TextToInst(txt, instByte) then entity:=weInstr;
  163. else
  164. if TextToInst(txt, instByte) then entity:=weInstr;
  165. end;
  166. end;
  167. { TWatScanner }
  168. procedure TWatScanner.DoComment(ofs: Integer; const cmt: string);
  169. begin
  170. end;
  171. function TWatScanner.CommentIsSymbol(const cmt: string): Boolean;
  172. var
  173. i: integer;
  174. t: string;
  175. v: string;
  176. begin
  177. Result := false;
  178. if (Pos(';;',cmt)<>1) then Exit;
  179. i:=3;
  180. ScanWhile(cmt, i, SpaceChars);
  181. if (i>length(cmt)) or (cmt[i]<>'.') then Exit;
  182. inc(i);
  183. t := AnsiLowerCase(ScanTo(cmt, i, SpaceChars));
  184. ScanWhile(cmt, i, SpaceChars);
  185. v := ScanTo(cmt, i, SpaceChars);
  186. asmCmd := t;
  187. resText := v;
  188. Result := true;
  189. end;
  190. procedure TWatScanner.SetSource(const abuf: string);
  191. begin
  192. buf:=abuf;
  193. idx:=1;
  194. end;
  195. function ScanString(const buf: string; var idx: integer): string;
  196. var
  197. j : integer;
  198. begin
  199. if buf[idx]<>'"' then begin
  200. Result:='';
  201. Exit;
  202. end;
  203. j:=idx;
  204. inc(idx);
  205. while (buf[idx]<>'"') and (idx<length(buf)) do begin
  206. if buf[idx]='\' then inc(idx);
  207. inc(idx);
  208. end;
  209. inc(idx);
  210. Result:=Copy(buf, j, idx-j);
  211. end;
  212. function TWatScanner.Next: Boolean;
  213. var
  214. cmt : string;
  215. done: boolean;
  216. fmt : TCNumberFormat;
  217. si : integer;
  218. begin
  219. numformat := wnfNo;
  220. Result := idx<=length(buf);
  221. if not Result then Exit;
  222. done:=false;
  223. resText:='';
  224. while not done do begin
  225. ScanWhile(buf, idx, SpaceEolnChars);
  226. Result := idx<=length(buf);
  227. if not Result then Exit;
  228. ofs:=idx;
  229. if (idx<length(buf)) and (buf[idx] in [';','(']) and (buf[idx+1]=';') then begin
  230. if (buf[idx]=';') then begin
  231. // comment until the end of the line
  232. cmt := ScanTo(buf, idx, EoLnChars);
  233. ScanWhile(buf, idx, EoLnChars);
  234. end else
  235. // comment until the ;)
  236. cmt := ScanToSubstr(buf, idx, ';)');
  237. if not skipAsmSym and CommentIsSymbol(cmt) then begin
  238. token:=weAsmSymbol;
  239. done:=true;
  240. end else
  241. DoComment(ofs, cmt);
  242. end else begin
  243. done:=true;
  244. if buf[idx] = '(' then begin
  245. token:=weOpenBrace;
  246. inc(idx);
  247. end else if buf[idx]=')' then begin
  248. token:=weCloseBrace;
  249. inc(idx);
  250. end else if buf[idx]='=' then begin
  251. token:=weEqual;
  252. inc(idx);
  253. end else if buf[idx]='"' then begin
  254. token:=weString;
  255. resText:=ScanString(buf, idx);
  256. end else if buf[idx] = IdStart then begin
  257. token:=weIdent;
  258. resText:=ScanWhile(buf, idx, IdBody);
  259. end else if buf[idx] in SignNumericChars then begin
  260. fmt := ScanNumberC(buf, idx, resText);
  261. if fmt = nfError then begin
  262. token := weError;
  263. Exit;
  264. end else
  265. token:=weNumber;
  266. case fmt of
  267. nfFloat: numformat := wnfFloat;
  268. nfFloatHex: numFormat := wnfFloatHex;
  269. nfHex: numformat := wnfHex;
  270. else
  271. numformat := wnfInteger;
  272. end;
  273. end else if buf[idx] in GrammarChars then begin
  274. si := idx;
  275. resText:=ScanWhile(buf, idx, GrammarChars);
  276. // second try for the number
  277. if (resText = 'nan') or (resText = 'inf') then begin
  278. idx := si;
  279. fmt := ScanNumberC(buf, idx, resText);
  280. if fmt = nfError then begin
  281. token := weError;
  282. Exit;
  283. end else
  284. token:=weNumber;
  285. case fmt of
  286. nfFloat: numformat := wnfFloat;
  287. nfHex: numformat := wnfHex;
  288. else
  289. numformat := wnfInteger;
  290. end;
  291. end else
  292. GetGrammar(resText, token, instrCode);
  293. done:=true;
  294. end else begin
  295. token:=weError;
  296. inc(idx);
  297. done:=true;
  298. end;
  299. end;
  300. end;
  301. if resText='' then
  302. resText := Copy(buf, ofs, idx-ofs);
  303. end;
  304. function TWatScanner.resInt32(const def: integer=-1): Integer;
  305. var
  306. err: integer;
  307. begin
  308. Val(resText, Result, err);
  309. if err<>0 then Result:=def;
  310. end;
  311. function TWatScanner.resWasmString: string;
  312. var
  313. i : integer;
  314. j : integer;
  315. begin
  316. if token<>weString then begin
  317. Result:='';
  318. Exit;
  319. end;
  320. Result:=Copy(resText, 2, length(resText)-2);
  321. if Result='' then Exit;
  322. i:=1;
  323. j:=1;
  324. while i<=length(Result) do begin
  325. if Result[i]='\' then begin
  326. inc(i);
  327. if i<=length(Result) then
  328. case Result[i] of
  329. 'r': Result[j]:=#13;
  330. 'n': Result[j]:=#10;
  331. '\': Result[j]:='\';
  332. '"': Result[j]:='"';
  333. end;
  334. end else
  335. if (j<i) then Result[j]:=Result[i];
  336. inc(j);
  337. inc(i);
  338. end;
  339. SetLength(Result, j-1);
  340. end;
  341. function HexFloatStrToHexStr(const t: string; out hexStr: THexStr): Boolean;
  342. var
  343. i : integer;
  344. j : integer;
  345. err : Integer;
  346. const
  347. HexChars = ['0'..'9','a'..'f','A'..'F'];
  348. begin
  349. hexStr.isNeg:=false;
  350. hexStr.num:=0;
  351. hexStr.frac:=0;
  352. hexStr.exp:=0;
  353. if (t='') then begin
  354. Result:=true;
  355. Exit;
  356. end;
  357. i:=1;
  358. hexStr.isNeg:=t[i]='-';
  359. if (hexStr.isNeg) then inc(i);
  360. inc(i,2); // skipping '0x'
  361. j:=i;
  362. while (i<=length(t)) and (t[i] in HexChars) do inc(i);
  363. Val('$'+Copy(t, j, i-j), hexStr.num, err);
  364. Result:=err=0;
  365. if not Result then Exit;
  366. if (t[i]='.') then begin
  367. inc(i);
  368. j:=i;
  369. while (i<=length(t)) and (t[i] in HexChars) do inc(i);
  370. Val('$'+Copy(t, j, i-j), hexStr.frac, err);
  371. Result:=err=0;
  372. if not Result then Exit;
  373. end;
  374. Result := (i<=length(t)) and (t[i] = 'p') or (t[i]='P');
  375. inc(i);
  376. Val(Copy(t, i, length(t)), hexStr.exp, err);
  377. Result:=err=0;
  378. end;
  379. function HexFracToSingle(const num, frac: QWord; exp: Integer; isNeg: Boolean): Single;
  380. var
  381. x : QWord;
  382. nm : QWord;
  383. adjexp : integer;
  384. sr : TSingleRec;
  385. begin
  386. nm := num;
  387. x := frac;
  388. adjexp := -1;
  389. while (nm > 0) do begin
  390. x:=(x shr 1) or ((nm and 1) shl 23);
  391. nm := nm shr 1;
  392. inc(adjexp);
  393. end;
  394. sr.Exp:=127 + exp + adjexp;
  395. sr.Frac:=x;
  396. sr.Sign:=isNeg;
  397. Result := sr.Value;
  398. end;
  399. function HexFloatStrToSingle(const hexstr: string): Single;
  400. var
  401. st : THexStr;
  402. begin
  403. HexFloatStrToHexStr(hexstr, st);
  404. Result:=HexFracToSingle(st.num, st.frac, st.exp, st.isNeg);
  405. end;
  406. function HexFracToDouble(const num, frac: QWord; exp: Integer; neg: Boolean): Double;
  407. var
  408. x : QWord;
  409. nm : QWord;
  410. adjexp : integer;
  411. sr : TDoubleRec;
  412. begin
  413. nm := num;
  414. x := frac;
  415. adjexp := 0;
  416. while (nm > 1) do begin
  417. x:=(x shr 1) or ((nm and 1) shl 52);
  418. nm := nm shr 1;
  419. inc(adjexp);
  420. end;
  421. sr.Exp:=1023 + exp + adjexp;
  422. sr.Frac:=x;
  423. sr.Sign:=neg;
  424. Result := sr.Value;
  425. end;
  426. function HexFloatStrToDouble(const hexstr: string): double;
  427. var
  428. st : THexStr;
  429. begin
  430. HexFloatStrToHexStr(hexstr, st);
  431. Result:=HexFracToDouble(st.num, st.frac, st.exp, st.isNeg);
  432. end;
  433. end.