watscanner.pas 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. unit watscanner;
  2. {$mode delphi}{$H+}
  3. interface
  4. uses
  5. SysUtils, Classes, parseutils, wasmtext;
  6. type
  7. TWatToken = (weNone, weError,
  8. weIdent,
  9. weString, weNumber, weOpenBrace, weCloseBrace,
  10. weAsmSymbol,
  11. weInstr,
  12. weFunc,
  13. weParam, weResult,
  14. weModule, weMut, weFuncRef,
  15. wei32, wei64,
  16. wef32, wef64,
  17. weType,
  18. weImport, weGlobal, weTable, weMemory, weLocal, weExport,
  19. weElem, weData, weOffset, weAlign, weEqual
  20. );
  21. // used only for weNumber
  22. TWatNumberFormat = (
  23. wnfNo, // other than number
  24. wnfInteger, // 00
  25. wnfHex, // 0xABC
  26. wnfFloat, // 0.000
  27. wnfFloatHex // 0x000.bced
  28. );
  29. { TWatScanner }
  30. TWatScanner = class(TObject)
  31. protected
  32. procedure DoComment(ofs: Integer; const cmt: string); virtual;
  33. function CommentIsSymbol(const cmt: string): Boolean;
  34. public
  35. buf : string;
  36. idx : integer;
  37. instrCode : byte;
  38. ofs : integer;
  39. token : TWatToken;
  40. numformat : TWatNumberFormat;
  41. resText : string;
  42. asmCmd : string;
  43. skipAsmSym : Boolean;
  44. procedure SetSource(const abuf: string);
  45. function Next: Boolean;
  46. function resInt32(const def: integer=-1): Integer;
  47. function resWasmString: string;
  48. end;
  49. const
  50. // see Identifiers of Textual format
  51. IdStart = '$';
  52. IdBody = AlphaNumChars
  53. + [ '!' ,'#' ,'$' ,'%' ,'&' ,'''' ,'*'
  54. ,'+' ,'-' ,'.' ,'/' ,':' ,'<' ,'='
  55. ,'>' ,'?' ,'@' ,'\' ,'^' ,'_' ,'`'
  56. ,'|' ,'~'];
  57. GrammarChars = AlphaNumChars+['.','_'
  58. ,'/' // some old instructions are like that: "f32.reinterpret/i32"
  59. ];
  60. procedure GetGrammar(const txt: string; out entity: TWatToken; out instByte: byte);
  61. const
  62. KEY_MODULE = 'module';
  63. KEY_FUNC = 'func';
  64. KEY_FUNCREF = 'funcref';
  65. KEY_I32 = 'i32';
  66. KEY_I64 = 'i64';
  67. KEY_F32 = 'f32';
  68. KEY_F64 = 'f64';
  69. KEY_PARAM = 'param';
  70. KEY_RESULT = 'result';
  71. KEY_MUT = 'mut';
  72. KEY_TYPE = 'type';
  73. KEY_IMPORT = 'import';
  74. KEY_GLOBAL = 'global';
  75. KEY_TABLE = 'table';
  76. KEY_MEMORY = 'memory';
  77. KEY_LOCAL = 'local';
  78. KEY_EXPORT = 'export';
  79. KEY_ELEM = 'elem';
  80. KEY_DATA = 'data';
  81. KEY_OFFSET = 'offset';
  82. function ScanString(const buf: string; var idx: integer): string;
  83. implementation
  84. procedure GetGrammar(const txt: string; out entity: TWatToken; out instByte: byte);
  85. begin
  86. instByte:=0;
  87. entity:=weError;
  88. if txt='' then Exit;
  89. case txt[1] of
  90. 'a':
  91. if txt='anyfunc' then entity:=weFuncRef
  92. else if txt = 'align' then entity:=weAlign
  93. else if TextToInst(txt, instByte) then entity:=weInstr;
  94. 'd':
  95. if txt=KEY_DATA then entity:=weData
  96. else if TextToInst(txt, instByte) then entity:=weInstr;
  97. 'e':
  98. if txt=KEY_EXPORT then entity:=weExport
  99. else if txt=KEY_ELEM then entity:=weElem
  100. else if TextToInst(txt, instByte) then entity:=weInstr;
  101. 'i':
  102. if txt=KEY_I32 then entity:=wei32
  103. else if txt=KEY_I64 then entity:=wei64
  104. else if txt=KEY_IMPORT then entity:=weImport
  105. else if TextToInst(txt, instByte) then entity:=weInstr;
  106. 'g':
  107. if txt=KEY_GLOBAL then entity:=weGlobal
  108. else if TextToInst(txt, instByte) then entity:=weInstr;
  109. 'f':
  110. if txt=KEY_FUNC then entity:=weFunc
  111. else if txt=KEY_FUNCREF then entity:=weFuncRef
  112. else if txt=KEY_F32 then entity:=wef32
  113. else if txt=KEY_F64 then entity:=wef64
  114. else if TextToInst(txt, instByte) then entity:=weInstr;
  115. 'l':
  116. if txt=KEY_LOCAL then entity:=weLocal
  117. else if TextToInst(txt, instByte) then entity:=weInstr;
  118. 'm':
  119. if txt=KEY_MODULE then entity:=weModule
  120. else if txt = KEY_MUT then entity:=weMut
  121. else if txt = KEY_MEMORY then entity:=weMemory
  122. else if TextToInst(txt, instByte) then entity:=weInstr;
  123. 'o':
  124. if txt=KEY_OFFSET then entity:=weOffset
  125. else if TextToInst(txt, instByte) then entity:=weInstr;
  126. 'p':
  127. if txt=KEY_PARAM then entity:=weParam
  128. else if TextToInst(txt, instByte) then entity:=weInstr;
  129. 'r':
  130. if txt=KEY_RESULT then entity:=weResult
  131. else if TextToInst(txt, instByte) then entity:=weInstr;
  132. 't':
  133. if txt=KEY_TYPE then entity:=weType
  134. else if txt=KEY_TABLE then entity:=weTable
  135. else if TextToInst(txt, instByte) then entity:=weInstr;
  136. else
  137. if TextToInst(txt, instByte) then entity:=weInstr;
  138. end;
  139. end;
  140. { TWatScanner }
  141. procedure TWatScanner.DoComment(ofs: Integer; const cmt: string);
  142. begin
  143. end;
  144. function TWatScanner.CommentIsSymbol(const cmt: string): Boolean;
  145. var
  146. i: integer;
  147. t: string;
  148. v: string;
  149. begin
  150. Result := false;
  151. if (Pos(';;',cmt)<>1) then Exit;
  152. i:=3;
  153. ScanWhile(cmt, i, SpaceChars);
  154. if (i>length(cmt)) or (cmt[i]<>'.') then Exit;
  155. inc(i);
  156. t := AnsiLowerCase(ScanTo(cmt, i, SpaceChars));
  157. ScanWhile(cmt, i, SpaceChars);
  158. v := ScanTo(cmt, i, SpaceChars);
  159. asmCmd := t;
  160. resText := v;
  161. Result := true;
  162. end;
  163. procedure TWatScanner.SetSource(const abuf: string);
  164. begin
  165. buf:=abuf;
  166. idx:=1;
  167. end;
  168. function ScanString(const buf: string; var idx: integer): string;
  169. var
  170. j : integer;
  171. begin
  172. if buf[idx]<>'"' then begin
  173. Result:='';
  174. Exit;
  175. end;
  176. j:=idx;
  177. inc(idx);
  178. while (buf[idx]<>'"') and (idx<length(buf)) do begin
  179. if buf[idx]='\' then inc(idx);
  180. inc(idx);
  181. end;
  182. inc(idx);
  183. Result:=Copy(buf, j, idx-j);
  184. end;
  185. function TWatScanner.Next: Boolean;
  186. var
  187. cmt : string;
  188. done: boolean;
  189. fmt : TCNumberFormat;
  190. si : integer;
  191. begin
  192. numformat := wnfNo;
  193. Result := idx<=length(buf);
  194. if not Result then Exit;
  195. done:=false;
  196. resText:='';
  197. while not done do begin
  198. ScanWhile(buf, idx, SpaceEolnChars);
  199. Result := idx<=length(buf);
  200. if not Result then Exit;
  201. ofs:=idx;
  202. if (idx<length(buf)) and (buf[idx] in [';','(']) and (buf[idx+1]=';') then begin
  203. if (buf[idx]=';') then begin
  204. // comment until the end of the line
  205. cmt := ScanTo(buf, idx, EoLnChars);
  206. ScanWhile(buf, idx, EoLnChars);
  207. end else
  208. // comment until the ;)
  209. cmt := ScanToSubstr(buf, idx, ';)');
  210. if not skipAsmSym and CommentIsSymbol(cmt) then begin
  211. token:=weAsmSymbol;
  212. done:=true;
  213. end else
  214. DoComment(ofs, cmt);
  215. end else begin
  216. done:=true;
  217. if buf[idx] = '(' then begin
  218. token:=weOpenBrace;
  219. inc(idx);
  220. end else if buf[idx]=')' then begin
  221. token:=weCloseBrace;
  222. inc(idx);
  223. end else if buf[idx]='=' then begin
  224. token:=weEqual;
  225. inc(idx);
  226. end else if buf[idx]='"' then begin
  227. token:=weString;
  228. resText:=ScanString(buf, idx);
  229. end else if buf[idx] = IdStart then begin
  230. token:=weIdent;
  231. resText:=ScanWhile(buf, idx, IdBody);
  232. end else if buf[idx] in SignNumericChars then begin
  233. fmt := ScanNumberC(buf, idx, resText);
  234. if fmt = nfError then begin
  235. token := weError;
  236. Exit;
  237. end else
  238. token:=weNumber;
  239. case fmt of
  240. nfFloat: numformat := wnfFloat;
  241. nfFloatHex: numFormat := wnfFloatHex;
  242. nfHex: numformat := wnfHex;
  243. else
  244. numformat := wnfInteger;
  245. end;
  246. end else if buf[idx] in GrammarChars then begin
  247. si := idx;
  248. resText:=ScanWhile(buf, idx, GrammarChars);
  249. // second try for the number
  250. if (resText = 'nan') or (resText = 'inf') then begin
  251. idx := si;
  252. fmt := ScanNumberC(buf, idx, resText);
  253. if fmt = nfError then begin
  254. token := weError;
  255. Exit;
  256. end else
  257. token:=weNumber;
  258. case fmt of
  259. nfFloat: numformat := wnfFloat;
  260. nfHex: numformat := wnfHex;
  261. else
  262. numformat := wnfInteger;
  263. end;
  264. end else
  265. GetGrammar(resText, token, instrCode);
  266. done:=true;
  267. end else begin
  268. token:=weError;
  269. inc(idx);
  270. done:=true;
  271. end;
  272. end;
  273. end;
  274. if resText='' then
  275. resText := Copy(buf, ofs, idx-ofs);
  276. end;
  277. function TWatScanner.resInt32(const def: integer=-1): Integer;
  278. var
  279. err: integer;
  280. begin
  281. Val(resText, Result, err);
  282. if err<>0 then Result:=def;
  283. end;
  284. function TWatScanner.resWasmString: string;
  285. var
  286. i : integer;
  287. j : integer;
  288. begin
  289. if token<>weString then begin
  290. Result:='';
  291. Exit;
  292. end;
  293. Result:=Copy(resText, 2, length(resText)-2);
  294. if Result='' then Exit;
  295. i:=1;
  296. j:=1;
  297. while i<=length(Result) do begin
  298. if Result[i]='\' then begin
  299. inc(i);
  300. if i<=length(Result) then
  301. case Result[i] of
  302. 'r': Result[j]:=#13;
  303. 'n': Result[j]:=#10;
  304. '\': Result[j]:='\';
  305. '"': Result[j]:='"';
  306. end;
  307. end else
  308. if (j<i) then Result[j]:=Result[i];
  309. inc(j);
  310. inc(i);
  311. end;
  312. SetLength(Result, j-1);
  313. end;
  314. end.