Browse Source

* Regexp support for wasm target

Michaël Van Canneyt 11 months ago
parent
commit
46a9fcc5f9

+ 5 - 0
packages/wasm-utils/demo/README.md

@@ -22,3 +22,8 @@ packages/fcl-web/examples/websocket/server
 ```
 ```
 is needed, since this is the websocket server that the demo program will
 is needed, since this is the websocket server that the demo program will
 connect to.
 connect to.
+
+For the regexp demo, you need the corresponding pas2js host program
+```
+demos/wasienv/regexp
+```

+ 66 - 0
packages/wasm-utils/demo/regexp/wasmregexpdemo.lpi

@@ -0,0 +1,66 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<CONFIG>
+  <ProjectOptions>
+    <Version Value="12"/>
+    <General>
+      <Flags>
+        <MainUnitHasCreateFormStatements Value="False"/>
+        <MainUnitHasTitleStatement Value="False"/>
+        <MainUnitHasScaledStatement Value="False"/>
+      </Flags>
+      <SessionStorage Value="InProjectDir"/>
+      <Title Value="wasmregexpdemo"/>
+      <UseAppBundle Value="False"/>
+      <ResourceType Value="res"/>
+    </General>
+    <BuildModes>
+      <Item Name="Default" Default="True"/>
+    </BuildModes>
+    <PublishOptions>
+      <Version Value="2"/>
+      <UseFileFilters Value="True"/>
+    </PublishOptions>
+    <RunParams>
+      <FormatVersion Value="2"/>
+    </RunParams>
+    <Units>
+      <Unit>
+        <Filename Value="wasmregexpdemo.pp"/>
+        <IsPartOfProject Value="True"/>
+      </Unit>
+    </Units>
+  </ProjectOptions>
+  <CompilerOptions>
+    <Version Value="11"/>
+    <Target>
+      <Filename Value="wasmregexpdemo"/>
+    </Target>
+    <SearchPaths>
+      <IncludeFiles Value="$(ProjOutDir)"/>
+      <OtherUnitFiles Value="../../src"/>
+      <UnitOutputDirectory Value="lib/$(TargetCPU)-$(TargetOS)"/>
+    </SearchPaths>
+    <CodeGeneration>
+      <TargetCPU Value="wasm32"/>
+      <TargetOS Value="wasi"/>
+    </CodeGeneration>
+    <Linking>
+      <Debugging>
+        <GenerateDebugInfo Value="False"/>
+      </Debugging>
+    </Linking>
+  </CompilerOptions>
+  <Debugging>
+    <Exceptions>
+      <Item>
+        <Name Value="EAbort"/>
+      </Item>
+      <Item>
+        <Name Value="ECodetoolError"/>
+      </Item>
+      <Item>
+        <Name Value="EFOpenError"/>
+      </Item>
+    </Exceptions>
+  </Debugging>
+</CONFIG>

+ 50 - 0
packages/wasm-utils/demo/regexp/wasmregexpdemo.pp

@@ -0,0 +1,50 @@
+program wasmregexpdemo;
+
+uses sysutils, wasm.regexp.shared, wasm.regexp.api, wasm.regexp.objects;
+
+Const
+   SRegex = 'quick\s(?<color>brown).+?(jumps)';
+   STest  = 'The Quick Brown Fox Jumps Over The Lazy Dog';
+   SFlags  = 'dgi';
+
+Var
+  Regex : TWasmRegExp;
+  Res : TRegExpResult;
+  I : Integer;
+  M : TRegExpMatch;
+  G : TRegExpGroup;
+  S : String;
+
+begin
+  Writeln('Regular expression: ',SRegex);
+  Writeln('Flags: ',SFlags);
+  Regex:=TWasmRegExp.Create(SRegex,SFlags);
+  Writeln('Test string: ',STest);
+  Res:=Regex.Exec(STest);
+  if Res.Index=0 then
+    Writeln('No match')
+  else
+    With Res do
+      begin
+      Writeln('Match at : ',Index);
+      I:=0;
+      For M in Matches do
+        begin
+        S:=Format('(%d) : "%s"',[I,M.Value]);
+        if (rfIndices in Regex.Flags) then
+          S:=S+Format(' [From pos %d to %d]',[M.StartIndex,M.StopIndex]);
+        Writeln(S);
+        Inc(I);
+        end;
+      Writeln('Named groups : ',Length(Groups));
+      For G in Groups do
+        begin
+        S:=Format('(%d): "%s": "%s"',[I,G.Name,G.Value]);
+        if (rfIndices in Regex.Flags) then
+          S:=S+Format(' [From pos %d to %d]',[G.StartIndex,G.StopIndex]);
+        Writeln(S);
+        Inc(I);
+        end;
+      end;
+end.
+

+ 8 - 0
packages/wasm-utils/fpmake.pp

@@ -43,6 +43,14 @@ begin
       T.Dependencies.AddUnit('wasm.websocket.api');
       T.Dependencies.AddUnit('wasm.websocket.api');
       T.Dependencies.AddUnit('wasm.websocket.shared');
       T.Dependencies.AddUnit('wasm.websocket.shared');
       
       
+    T:=P.Targets.AddUnit('wasm.regexp.shared.pas');
+    T:=P.Targets.AddUnit('wasm.regexp.api.pas');
+      T.Dependencies.AddUnit('wasm.regexp.shared');
+      
+    T:=P.Targets.AddUnit('wasm.regexp.objects.pas');
+      T.Dependencies.AddUnit('wasm.regexp.api');
+      T.Dependencies.AddUnit('wasm.regexp.shared');
+      
 {$ifndef ALLPACKAGES}
 {$ifndef ALLPACKAGES}
     Run;
     Run;
     end;
     end;

+ 26 - 0
packages/wasm-utils/src/wasm.regexp.api.pas

@@ -0,0 +1,26 @@
+unit wasm.regexp.api;
+
+{$mode ObjFPC}{$H+}
+
+interface
+
+uses wasm.regexp.shared;
+
+function __wasm_regexp_allocate(aExpr : PByte; aExprLen : longint; aFlags : Longint; aID : PWasmRegExpID) : TWasmRegexpResult; external regexpExportName name regexpFN_Allocate;
+function __wasm_regexp_deallocate(aExprID : TWasmRegExpID) : TWasmRegexpResult; external regexpExportName name regexpFN_DeAllocate;
+function __wasm_regexp_exec(aExprID : TWasmRegExpID; aString : PByte; aStringLen :Longint; aIndex : PLongint; aResultCount : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_Exec;
+function __wasm_regexp_test(aExprID : TWasmRegExpID; aString : PByte; aStringLen :Longint; aResult : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_Test;
+function __wasm_regexp_get_flags(aExprID : TWasmRegExpID; aFlags : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetFlags;
+function __wasm_regexp_get_expression(aExprID : TWasmRegExpID; aExp : PByte; aExpLen : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetExpression;
+function __wasm_regexp_get_last_index(aExprID : TWasmRegExpID; aLastIndex : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetLastIndex;
+function __wasm_regexp_get_result_match(aExprID : TWasmRegExpID; aIndex : Longint; Res : PByte; ResLen : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetResultMatch;
+function __wasm_regexp_get_group_count(aExprID : TWasmRegExpID; aCount: PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetGroupCount;
+function __wasm_regexp_get_group_name(aExprID : TWasmRegExpID; aIndex : Longint; aName : PByte; aNameLen : PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetGroupName;
+function __wasm_regexp_get_named_group(aExprID : TWasmRegExpID; aName : PByte; aNameLen : Longint; aValue : PByte; aValueLen: PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetNamedGroup;
+function __wasm_regexp_get_indexes(aExprID : TWasmRegExpID; aIndex : Longint; aStartIndex : PLongint; aStopIndex: PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetIndexes;
+function __wasm_regexp_get_named_group_indexes(aExprID : TWasmRegExpID; aName : PByte; aNameLen : Integer; aStartIndex : PLongint; aStopIndex: PLongint) : TWasmRegexpResult; external regexpExportName name regexpFN_GetNamedGroupIndexes;
+
+implementation
+
+end.
+

+ 306 - 0
packages/wasm-utils/src/wasm.regexp.objects.pas

@@ -0,0 +1,306 @@
+unit wasm.regexp.objects;
+
+{$mode ObjFPC}{$H+}
+{$modeswitch typehelpers}
+
+interface
+
+uses
+{$IFDEF FPC_DOTTEDUNITS}
+  System.SysUtils,
+{$ELSE}
+  SysUtils,
+{$ENDIF}
+  wasm.regexp.shared,
+  wasm.regexp.api;
+
+Type
+  EWasmRegExp = class(Exception);
+
+  TRegexpFlag = (rfUnknown,rfDotAll,rfGlobal,rfIndices,rfIgnoreCase,rfMultiLine,rfSticky,rfUnicode,rfUnicodeSets);
+  TRegexpFlags = Set of TRegExpFlag;
+
+  { TRegexpFlagHelper }
+
+  TRegexpFlagHelper = type helper for TRegexpFlag
+  Public
+    function ToString: String;
+    Function AsFlag : Longint;
+    Property AsString : String Read ToString;
+  end;
+
+  { TRegexpFlagsHelper }
+
+  TRegexpFlagsHelper = type helper for TRegexpFlags
+  private
+    procedure SetAsFlags(const aValue: Longint);
+  public
+    function ToString: String;
+    Function ToFlags : Longint;
+    class function FromFlags(aFlags : Longint) : TRegExpFlags; static;
+    Property AsString : String Read ToString;
+    Property AsFlags : Longint Read ToFlags Write SetAsFlags;
+  end;
+
+  TRegExpMatch = record
+    Value : UTF8String;
+    StartIndex, StopIndex : Integer;
+  end;
+  TRegExpMatchArray = array of TRegExpMatch;
+
+  TRegExpGroup = record
+    Name,Value : UTF8String;
+    StartIndex, StopIndex : Integer;
+  end;
+  TRegExpGroupArray = array of TRegExpGroup;
+
+  TRegExpResult = record
+    Matches : TRegExpMatchArray;
+    Input : UTF8String;
+    Index : Integer;
+    Groups : TRegExpGroupArray;
+  end;
+
+  { TWasmRegExp }
+
+  TWasmRegExp = Class(TObject)
+  private
+    FRegexpID: TWasmRegexpID;
+    FFlags : Longint;
+    function GetFlags: TRegexpFlags;
+    function GetGroups(aCount: Integer): TRegExpGroupArray;
+    function GetLastIndex: Longint;
+    function GetMatches(aCount: Integer): TRegExpMatchArray;
+  protected
+    function CheckRegExpResult(Res : TWasmRegexpResult; const aOperation : String; aRaise : Boolean = true) : Boolean;
+  Public
+    Constructor Create(const aExpression,aFlags : String); overload;
+    Constructor Create(const aExpression : String; aFlags : Longint); overload;
+    Constructor Create(const aExpression : String; aFlags : TRegexpFlags); overload;
+    destructor Destroy; override;
+    Function Exec(const aString : String) : TRegExpResult;
+    Function Test(const aString : String) : Boolean;
+    Property LastIndex : Longint Read GetLastIndex;
+    Property RegexpID : TWasmRegExpID Read FRegexpID;
+    Property FlagsAsInteger : Integer Read FFlags;
+    Property Flags : TRegexpFlags Read GetFlags;
+  end;
+
+implementation
+
+{ TRegexpFlagHelper }
+
+function TRegexpFlagHelper.ToString: String;
+begin
+
+end;
+
+function TRegexpFlagHelper.AsFlag: Longint;
+
+Const
+  FlagValues : Array[TRegexpFlag] of longint
+             = (0,
+                WASMRE_FLAG_DOTALL,
+                WASMRE_FLAG_GLOBAL,
+                WASMRE_FLAG_INDICES,
+                WASMRE_FLAG_IGNORECASE,
+                WASMRE_FLAG_MULTILINE,
+                WASMRE_FLAG_STICKY,
+                WASMRE_FLAG_UNICODE,
+                WASMRE_FLAG_UNICODESETS);
+
+
+
+begin
+  Result:=FlagValues[Self];
+end;
+
+{ TRegexpFlagsHelper }
+
+procedure TRegexpFlagsHelper.SetAsFlags(const aValue: Longint);
+
+var
+  F : TRegexpFlag;
+  Res : TRegexpFlags;
+
+begin
+  Res:=[];
+  for F in TRegexpFlag do
+    if (F.AsFlag and aValue)<>0 then
+      Include(Res,F);
+  Self:=Res;
+end;
+
+function TRegexpFlagsHelper.ToString: String;
+
+begin
+  Result:=FlagsToString(AsFlags);
+end;
+
+function TRegexpFlagsHelper.ToFlags: Longint;
+var
+  F : TRegexpFlag;
+begin
+  Result:=0;
+  For F in Self do
+    Result:=Result or F.AsFlag;
+end;
+
+class function TRegexpFlagsHelper.FromFlags(aFlags: Longint): TRegExpFlags;
+begin
+  Result.AsFlags:=aFlags;
+end;
+
+
+{ TWasmRegExp }
+
+
+function TWasmRegExp.GetLastIndex: Longint;
+begin
+  CheckRegExpResult(__wasm_regexp_get_last_index(FRegexpID,@Result),'get_last_index');
+end;
+
+function TWasmRegExp.GetFlags: TRegexpFlags;
+begin
+  Result:=TRegExpFlags.FromFlags(FFlags)
+end;
+
+function TWasmRegExp.CheckRegExpResult(Res: TWasmRegexpResult; const aOperation: String; aRaise: Boolean): Boolean;
+begin
+  Result:=Res=WASMRE_RESULT_SUCCESS;
+  if (not Result) and aRaise then
+    Raise EWasmRegExp.CreateFmt('Error %d occured during "%s"',[Res,aOperation]);
+end;
+
+constructor TWasmRegExp.Create(const aExpression, aFlags: String);
+begin
+  Create(aExpression,StringToFlags(aFlags,False));
+end;
+
+constructor TWasmRegExp.Create(const aExpression: String; aFlags: Longint);
+
+var
+  R : RawByteString;
+
+begin
+  R:=UTF8Encode(aExpression);
+  FFlags:=aFlags;
+  CheckRegexpResult(__wasm_regexp_allocate(PByte(R),Length(R),aFlags,@FRegexpID),regexpFN_Allocate);
+end;
+
+constructor TWasmRegExp.Create(const aExpression: String; aFlags: TRegexpFlags);
+begin
+  Create(aExpression,aFlags.AsFlags);
+end;
+
+destructor TWasmRegExp.Destroy;
+begin
+  CheckRegExpResult(__wasm_regexp_deallocate(FRegexpID),regexpFN_Allocate,false);
+  inherited Destroy;
+end;
+
+function TWasmRegExp.GetMatches(aCount: Integer): TRegExpMatchArray;
+
+var
+  I : Integer;
+  Len,lStart,lStop : Longint;
+  Res : TWasmRegexpResult;
+  S : RawByteString;
+  lGetIndexes : Boolean;
+
+begin
+  SetLength(Result,aCount);
+  lGetindexes:=rfIndices in Flags;
+  For I:=0 to aCount-1 do
+    begin
+    Len:=256;
+    Repeat
+      SetLength(S,Len);
+      Res:=__wasm_regexp_get_result_match(FRegexpID,I,Pbyte(S),@Len);
+    Until (Res<>WASMRE_RESULT_NO_MEM);
+    SetLength(S,Len);
+    CheckRegExpResult(Res,regexpFN_GetResultMatch);
+    Result[i].Value:=S;
+    S:='';
+    if lGetIndexes then
+      CheckRegExpResult(__wasm_regexp_get_Indexes(FRegexpID,I,@lStart,@lStop),regexpFN_GetIndexes);
+    Result[i].StartIndex:=lStart+1;
+    Result[i].StopIndex:=lStop+1;
+    end;
+end;
+
+function TWasmRegExp.GetGroups(aCount: Integer): TRegExpGroupArray;
+
+var
+  I : Integer;
+  Len,lStart,lStop : Longint;
+  Res : TWasmRegexpResult;
+  N,V : RawByteString;
+  lGetIndexes : Boolean;
+
+begin
+  N:='';
+  V:='';
+  SetLength(Result,aCount);
+  lGetindexes:=rfIndices in Flags;
+  For I:=0 to aCount-1 do
+    begin
+    Len:=256;
+    Repeat
+      SetLength(N,Len);
+      Res:=__wasm_regexp_get_group_name(FRegexpID,I,Pbyte(N),@Len);
+    Until (Res<>WASMRE_RESULT_NO_MEM);
+    CheckRegExpResult(Res,regexpFN_GetGroupName);
+    SetLength(N,Len);
+    Result[i].Name:=N;
+    Len:=256;
+    Repeat
+      SetLength(V,Len);
+      Res:=__wasm_regexp_get_named_group(FRegexpID,PByte(N),Length(N),Pbyte(V),@Len);
+    Until (Res<>WASMRE_RESULT_NO_MEM);
+    CheckRegExpResult(Res,regexpFN_GetNamedGroup);
+    SetLength(V,Len);
+    Result[I].Value:=V;
+    if lGetIndexes then
+      CheckRegExpResult(__wasm_regexp_get_named_group_indexes(FRegexpID,PByte(N),Length(N),@lStart,@lStop),regexpFN_GetNamedGroup);
+    Result[i].StartIndex:=lStart+1;
+    Result[i].StopIndex:=lStop+1;
+    end;
+end;
+
+
+function TWasmRegExp.Exec(const aString: String): TRegExpResult;
+
+var
+  lGroupCount, lIndex, lMatchCount : longint;
+  R : RawByteString;
+
+begin
+  Result:=Default(TRegExpResult);
+  R:=UTF8Encode(aString);
+  Result.Input:=R;
+  CheckRegexpResult(__wasm_regexp_exec(FRegexpID,Pbyte(R),Length(R),@lIndex,@lMatchCount),regexpFN_exec);
+  If lMatchCount=0 then
+    exit;
+  Result.Index:=lIndex+1;
+  Result.Matches:=GetMatches(lMatchCount);
+  CheckRegExpResult(__wasm_regexp_get_group_count(FRegexpID,@lGroupCount),regexpFN_GetGroupCount);
+  if lGroupCount>0 then
+    Result.Groups:=GetGroups(lGroupCount);
+end;
+
+
+function TWasmRegExp.Test(const aString: String): Boolean;
+
+var
+  R : RawByteString;
+  lRes : Longint;
+
+begin
+  R:=UTF8Encode(aString);
+  CheckRegexpResult(__wasm_regexp_test(FRegexpID,Pbyte(R),Length(R),@lRes),regexpFN_test);
+  Result:=(lRes<>0);
+end;
+
+end.
+

+ 133 - 0
packages/wasm-utils/src/wasm.regexp.shared.pas

@@ -0,0 +1,133 @@
+unit wasm.regexp.shared;
+
+{$mode ObjFPC}{$H+}
+
+interface
+
+Type
+  TWasmRegexpID = Longint;
+  TWasmRegexpResult = Longint;
+  {$IFNDEF PAS2JS}
+  TWasmPointer = Pointer;
+  PWasmRegexpID = ^TWasmRegexpID;
+  {$ELSE}
+  TWasmPointer = Longint;
+  PWasmRegexpID = TWasmPointer;
+  {$ENDIF}
+
+Const
+
+  WASMRE_RESULT_SUCCESS    = 0;
+  WASMRE_RESULT_ERROR      = -1;
+  WASMRE_RESULT_INVALIDID  = -2;
+  WASMRE_RESULT_NO_MEM     = -3;
+  WASMRE_RESULT_NO_REGEXP  = -4;
+  WASMRE_RESULT_INVALIDIDX = -5;
+  WASMRE_RESULT_NOINDEXES  = -6;
+
+  WASMRE_FLAG_DOTALL      = 1;
+  WASMRE_FLAG_GLOBAL      = 2;
+  WASMRE_FLAG_INDICES     = 4;
+  WASMRE_FLAG_IGNORECASE  = 8;
+  WASMRE_FLAG_MULTILINE   = 16;
+  WASMRE_FLAG_STICKY      = 32;
+  WASMRE_FLAG_UNICODE     = 64;
+  WASMRE_FLAG_UNICODESETS = 128;
+
+  // Aliases that correspond to the letters used when creating a regexp
+  WASMRE_FLAG_S = WASMRE_FLAG_DOTALL;
+  WASMRE_FLAG_G = WASMRE_FLAG_GLOBAL;
+  WASMRE_FLAG_D = WASMRE_FLAG_INDICES;
+  WASMRE_FLAG_I = WASMRE_FLAG_IGNORECASE;
+  WASMRE_FLAG_M = WASMRE_FLAG_MULTILINE;
+  WASMRE_FLAG_Y = WASMRE_FLAG_STICKY;
+  WASMRE_FLAG_U = WASMRE_FLAG_UNICODE;
+  WASMRE_FLAG_V = WASMRE_FLAG_UNICODESETS;
+
+  regexpExportName = 'regexp';
+  regexpFN_Allocate = 'allocate';
+  regexpFN_DeAllocate = 'deallocate';
+  regexpFN_Exec = 'exec';
+  regexpFN_Test = 'test';
+  regexpFN_GetFlags = 'get_flags';
+  regexpFN_GetExpression = 'get_expression';
+  regexpFN_GetLastIndex = 'get_last_index';
+  regexpFN_GetResultMatch = 'get_result_match';
+  regexpFN_GetGroupCount = 'get_group_count';
+  regexpFN_GetGroupName = 'get_group_name';
+  regexpFN_GetNamedGroup = 'get_named_group';
+  regexpFN_GetIndexes = 'get_indexes';
+  regexpFN_GetNamedGroupIndexes = 'get_named_group_indexes';
+
+Function StringToFlags(S : String; IgnoreUnknown : Boolean = True) : Longint;
+Function FlagsToString(S : Longint) : String;
+
+implementation
+
+uses
+{$IFDEF FPC_DOTTEDUNITS}
+  System.SysUtils;
+{$ELSE}
+  SysUtils;
+{$ENDIF}
+
+
+Function StringToFlags(S : String; IgnoreUnknown : Boolean = True) : Longint;
+
+var
+  C : Char;
+  Flag : Longint;
+
+begin
+  Result:=0;
+  for C in S do
+   begin
+   case C of
+     's': Flag:=WASMRE_FLAG_S;
+     'g': Flag:=WASMRE_FLAG_G;
+     'd': Flag:=WASMRE_FLAG_D;
+     'i': Flag:=WASMRE_FLAG_I;
+     'm': Flag:=WASMRE_FLAG_M;
+     'y': Flag:=WASMRE_FLAG_Y;
+     'u': Flag:=WASMRE_FLAG_U;
+     'v': Flag:=WASMRE_FLAG_V;
+   else
+     if not IgnoreUnknown then
+       Raise EConvertError.CreateFmt('Unknown regexp flag: %s',[C]);
+     Flag:=0;
+   end;
+   Result:=Result or Flag;
+   end;
+end;
+
+Function FlagsToString(S : Longint) : String;
+
+var
+  C,I : Longint;
+  Flag : Char;
+
+begin
+  Result:='';
+  for I:=0 to 7 do
+    begin
+    C:=S and (1 shl i);
+    case C of
+      WASMRE_FLAG_S : Flag:='s';
+      WASMRE_FLAG_G : Flag:='g';
+      WASMRE_FLAG_D : Flag:='d';
+      WASMRE_FLAG_I : Flag:='i';
+      WASMRE_FLAG_M : Flag:='m';
+      WASMRE_FLAG_Y : Flag:='y';
+      WASMRE_FLAG_U : Flag:='u';
+      WASMRE_FLAG_V : Flag:='v';
+    else
+      Flag:='0';
+    end;
+    if Flag<>'0' then
+      Result:=Result + Flag;
+    end;
+end;
+
+
+end.
+