Browse Source

[PATCH 034/188] updating wat scanner

From 428c7173cf6a7d363ffa8e1ba748337a277189d3 Mon Sep 17 00:00:00 2001
From: Dmitry Boyarintsev <[email protected]>
Date: Tue, 19 Nov 2019 23:26:17 -0500

git-svn-id: branches/wasm@46030 -
nickysn 5 years ago
parent
commit
f7ae46356b
1 changed files with 159 additions and 31 deletions
  1. 159 31
      utils/wasmbin/watparser.pas

+ 159 - 31
utils/wasmbin/watparser.pas

@@ -5,59 +5,180 @@ unit watparser;
 interface
 
 uses
-  SysUtils, Classes, parseutils;
+  SysUtils, Classes, parseutils, wasmtext;
 
 type
-  TWatEntity = (weNone, weError,
-     weIdent, weString, weNumber, weSymbol, weOpenBrace, weCloseBrace);
-
-  { TWatParser }
-
-  TWatParser = class(TObject)
+  TWatToken = (weNone, weError,
+     weIdent,
+     weString, weNumber, weOpenBrace, weCloseBrace,
+     weAsmSymbol,
+
+     weInstr,
+     weFunc,
+     weParam, weResult,
+     weModule, weMut, weFuncRef,
+     wei32, wei64,
+     wef32, wef64,
+     weType,
+     weImport, weGlobal, weTable, weMemory, weLocal, weExport,
+     weElem, weData, weOffset
+   );
+
+  { TWatScanner }
+
+  TWatScanner = class(TObject)
   protected
     procedure DoComment(const cmt: string);
     function CommentIsSymbol(const cmt: string): Boolean;
   public
-    buf : string;
-    idx : integer;
+    buf       : string;
+    idx       : integer;
 
-    ofs : integer;
-    entity : TWatEntity;
-    resText : string;
+    instrCode : byte;
+    ofs       : integer;
+    token     : TWatToken;
+    resText   : string;
     procedure SetSource(const abuf: string);
     function Next: Boolean;
   end;
 
 const
-  SymbolChars : TCharSet = ['(',')'];
-  AlphaNumCharsUnd : TCharSet = AlphaNumChars + ['_','$'];
+  // see Identifiers of Textual format
+  IdStart  = '$';
+  IdBody   = AlphaNumChars
+             + [ '!' ,'#' ,'$' ,'%' ,'&' ,'''' ,'*'
+                ,'+' ,'-' ,'.' ,'/' ,':' ,'<' ,'='
+                ,'>' ,'?' ,'@' ,'\' ,'^' ,'_' ,'`'
+                ,'|' ,'~'];
+  GrammarChars  = AlphaNumChars+['.','_'];
+
+procedure GetGrammar(const txt: string; out entity: TWatToken; out instByte: byte);
+
+const
+  KEY_MODULE = 'module';
+  KEY_FUNC   = 'func';
+  KEY_FUNCREF = 'funcref';
+  KEY_I32    = 'i32';
+  KEY_I64    = 'i64';
+  KEY_F32    = 'f32';
+  KEY_F64    = 'f64';
+  KEY_PARAM  = 'param';
+  KEY_RESULT = 'result';
+  KEY_MUT    = 'mut';
+  KEY_TYPE   = 'type';
+
+  KEY_IMPORT = 'import';
+  KEY_GLOBAL = 'global';
+  KEY_TABLE  = 'table';
+  KEY_MEMORY = 'memory';
+  KEY_LOCAL  = 'local';
+  KEY_EXPORT = 'export';
+  KEY_ELEM   = 'elem';
+  KEY_DATA   = 'data';
+  KEY_OFFSET = 'offset';
+
+function ScanString(const buf: string; var idx: integer): string;
 
 implementation
 
-{ TWatParser }
+procedure GetGrammar(const txt: string; out entity: TWatToken; out instByte: byte);
+begin
+  instByte:=0;
+  entity:=weError;
+  if txt='' then Exit;
+  case txt[1] of
+    'a':
+      if txt='anyfunc' then entity:=weFuncRef
+      else if TextToInst(txt, instByte) then entity:=weInstr;
+    'd':
+      if txt=KEY_DATA then entity:=weData
+      else if TextToInst(txt, instByte) then entity:=weInstr;
+    'e':
+      if txt=KEY_EXPORT then entity:=weExport
+      else if txt=KEY_ELEM then entity:=weElem
+      else if TextToInst(txt, instByte) then entity:=weInstr;
+    'i':
+      if txt=KEY_I32 then entity:=wei32
+      else if txt=KEY_I64 then entity:=wei64
+      else if txt=KEY_IMPORT then entity:=weImport
+      else if TextToInst(txt, instByte) then entity:=weInstr;
+    'g':
+      if txt=KEY_GLOBAL then entity:=weGlobal
+      else if TextToInst(txt, instByte) then entity:=weInstr;
+    'f':
+      if txt=KEY_FUNC then entity:=weFunc
+      else if txt=KEY_FUNCREF then entity:=weFuncRef
+      else if txt=KEY_F32 then entity:=wef32
+      else if txt=KEY_F64 then entity:=wef64
+      else if TextToInst(txt, instByte) then entity:=weInstr;
+    'l':
+      if txt=KEY_LOCAL then entity:=weLocal
+      else if TextToInst(txt, instByte) then entity:=weInstr;
+    'm':
+      if txt=KEY_MODULE then entity:=weModule
+      else if txt = KEY_MUT then entity:=weMut
+      else if txt = KEY_MEMORY then entity:=weMemory
+      else if TextToInst(txt, instByte) then entity:=weInstr;
+    'o':
+      if txt=KEY_OFFSET then entity:=weOffset
+      else if TextToInst(txt, instByte) then entity:=weInstr;
+    'p':
+      if txt=KEY_PARAM then entity:=weParam
+      else if TextToInst(txt, instByte) then entity:=weInstr;
+    'r':
+      if txt=KEY_RESULT then entity:=weResult
+      else if TextToInst(txt, instByte) then entity:=weInstr;
+    't':
+      if txt=KEY_TYPE then entity:=weType
+      else if txt=KEY_TABLE then entity:=weTable
+      else if TextToInst(txt, instByte) then entity:=weInstr;
+  else
+    if TextToInst(txt, instByte) then entity:=weInstr;
+  end;
+end;
+
 
-procedure TWatParser.DoComment(const cmt: string);
+{ TWatScanner }
+
+procedure TWatScanner.DoComment(const cmt: string);
 begin
 
 end;
 
-function TWatParser.CommentIsSymbol(const cmt: string): Boolean;
+function TWatScanner.CommentIsSymbol(const cmt: string): Boolean;
 begin
   Result := false;
 end;
 
-procedure TWatParser.SetSource(const abuf: string);
+procedure TWatScanner.SetSource(const abuf: string);
 begin
   buf:=abuf;
   idx:=1;
 end;
 
-function TWatParser.Next: Boolean;
+function ScanString(const buf: string; var idx: integer): string;
+var
+  j : integer;
+begin
+  if buf[idx]<>'"' then begin
+    Result:='';
+    Exit;
+  end;
+  j:=idx;
+  inc(idx);
+  while (buf[idx]<>'"') and (idx<length(buf)) do begin
+    if buf[idx]='\' then inc(idx);
+    inc(idx);
+  end;
+  inc(idx);
+  Result:=Copy(buf, j, idx-j);
+end;
+
+function TWatScanner.Next: Boolean;
 var
   has2chars: Boolean;
   cmt : string;
   done: boolean;
-  j: integer;
 begin
   Result := idx<=length(buf);
   if not Result then Exit;
@@ -65,10 +186,10 @@ begin
   done:=false;
   resText:='';
   while not done do begin
-    ScanWhile(buf, idx, WhiteSpaceChars);
+    ScanWhile(buf, idx, SpaceEolnChars);
     Result := idx<=length(buf);
     if not Result then Exit;
-    j:=idx;
+    ofs:=idx;
     has2chars := idx<length(buf);
     if has2chars then begin
       if (buf[idx]=';') and (buf[idx+1]=';') then begin
@@ -80,7 +201,7 @@ begin
         cmt := ScanToSubstr(buf, idx, ';)');
 
       if CommentIsSymbol(cmt) then begin
-        entity:=weSymbol;
+        token:=weAsmSymbol;
         done:=true;
       end else
         DoComment(cmt);
@@ -89,19 +210,26 @@ begin
     if not done then begin
       done:=true;
       if buf[idx] = '(' then begin
-        entity:=weOpenBrace;
+        token:=weOpenBrace;
         inc(idx);
       end else if buf[idx]=')' then begin
-        entity:=weCloseBrace;
+        token:=weCloseBrace;
         inc(idx);
-      end else if buf[idx] in AlphabetChars then begin
-        entity:=weIdent;
-        resText:=ScanWhile(buf, idx, AlphaNumCharsUnd);
+      end else if buf[idx]='"' then begin
+        token:=weString;
+        resText:=ScanString(buf, idx);
+      end else if buf[idx] = IdStart then begin
+        token:=weIdent;
+        resText:=ScanWhile(buf, idx, IdBody);
+      end else if buf[idx] in AlphaNumChars then begin
+        resText:=ScanWhile(buf, idx, GrammarChars);
+        GetGrammar(resText, token, instrCode);
+        done:=true;
       end else if buf[idx] in NumericChars then begin
-        entity:=weNumber;
+        token:=weNumber;
         resText:=ScanWhile(buf, idx, NumericChars);
       end else begin
-        entity:=weError;
+        token:=weError;
         inc(idx);
         done:=true;
       end;
@@ -109,7 +237,7 @@ begin
   end;
 
   if resText='' then
-    resText := Copy(buf, j, idx-j);
+    resText := Copy(buf, ofs, idx-ofs);
 end;
 
 end.