Browse Source

* Regexp for wasm (using browser API)

Michaël Van Canneyt 11 months ago
parent
commit
cc15c3c2de

+ 6 - 5
packages/vcl-compat/fpmake.pp

@@ -37,6 +37,7 @@ begin
     P.Dependencies.Add('fcl-hash');
     P.Dependencies.Add('fcl-hash');
     P.Dependencies.Add('hash');
     P.Dependencies.Add('hash');
     P.Dependencies.Add('libpcre',[Win64,Linux,darwin]);
     P.Dependencies.Add('libpcre',[Win64,Linux,darwin]);
+    P.Dependencies.Add('wasm-utils',[wasi]);
     P.SourcePath.Add('src');
     P.SourcePath.Add('src');
     P.IncludePath.Add('src');
     P.IncludePath.Add('src');
 
 
@@ -64,12 +65,12 @@ begin
     T.ResourceStrings := True;
     T.ResourceStrings := True;
     T:=P.Targets.AddUnit('system.credentials.pp');
     T:=P.Targets.AddUnit('system.credentials.pp');
     T.ResourceStrings := True;
     T.ResourceStrings := True;
-    T:=P.Targets.AddUnit('system.regularexpressionsconsts.pp',[Win64,Linux,darwin]);
+    T:=P.Targets.AddUnit('system.regularexpressionsconsts.pp',[Win64,Linux,darwin,wasi]);
     T.ResourceStrings := True;
     T.ResourceStrings := True;
-    T:=P.Targets.AddUnit('system.regularexpressionscore.pp',[Win64,Linux,darwin]);
-    T.Dependencies.AddUnit('system.regularexpressionsconsts',[Win64,Linux,darwin]);
-    T:=P.Targets.AddUnit('system.regularexpressions.pp',[Win64,Linux,darwin]);
-    T.Dependencies.AddUnit('system.regularexpressionscore',[Win64,Linux,darwin]);
+    T:=P.Targets.AddUnit('system.regularexpressionscore.pp',[Win64,Linux,darwin,wasi]);
+    T.Dependencies.AddUnit('system.regularexpressionsconsts',[Win64,Linux,darwin,wasi]);
+    T:=P.Targets.AddUnit('system.regularexpressions.pp',[Win64,Linux,darwin,wasi]);
+    T.Dependencies.AddUnit('system.regularexpressionscore',[Win64,Linux,darwin,wasi]);
     T:=P.Targets.AddUnit('system.threading.pp',AllOSes-[go32v2,nativent,atari]);
     T:=P.Targets.AddUnit('system.threading.pp',AllOSes-[go32v2,nativent,atari]);
     T.ResourceStrings := True;
     T.ResourceStrings := True;
 
 

+ 23 - 7
packages/vcl-compat/src/system.regularexpressionscore.pp

@@ -37,9 +37,29 @@ interface
 
 
 uses
 uses
 {$IFDEF FPC_DOTTEDUNITS}
 {$IFDEF FPC_DOTTEDUNITS}
-  System.SysUtils, System.Classes, System.Contnrs, {$IFNDEF USEWIDESTRING} Api.PCRE2_8 {$ELSE} Api.PCRE2_16 {$ENDIF}, System.CTypes, System.RegularExpressionsConsts;
+  System.SysUtils, System.Classes, System.Contnrs, System.CTypes,
+  {$IFNDEF CPUWASM}
+    {$IFNDEF USEWIDESTRING}
+      Api.PCRE2_8
+    {$ELSE}
+      Api.PCRE2_16
+    {$ENDIF},
+  {$ELSE}
+  wasm.pcrebridge,
+  {$ENDIF}
+  System.RegularExpressionsConsts;
 {$ELSE}
 {$ELSE}
-  SysUtils, Classes, Contnrs, {$IFNDEF USEWIDESTRING} libpcre2_8 {$ELSE} libpcre2_16 {$ENDIF}, ctypes, System.RegularExpressionsConsts;
+  SysUtils, Classes, Contnrs,ctypes,
+  {$IFNDEF CPUWASM}
+    {$IFNDEF USEWIDESTRING}
+      libpcre2_8
+    {$ELSE}
+      libpcre2_16
+    {$ENDIF},
+  {$ELSE}
+  wasm.pcrebridge,
+  {$ENDIF}
+ System.RegularExpressionsConsts;
 {$ENDIF}
 {$ENDIF}
 
 
 const
 const
@@ -642,7 +662,6 @@ begin
   if (FCode=nil) then
   if (FCode=nil) then
     raise ERegularExpressionError.CreateFmt(SRegExExpressionError,[ErrorPos+1,GetPCREErrorMsg(ErrorNr)]);
     raise ERegularExpressionError.CreateFmt(SRegExExpressionError,[ErrorPos+1,GetPCREErrorMsg(ErrorNr)]);
   FMatchData:=pcre2_match_data_create_from_pattern(FCode,Nil);
   FMatchData:=pcre2_match_data_create_from_pattern(FCode,Nil);
-
 end;
 end;
 
 
 procedure TPerlRegEx.Study;
 procedure TPerlRegEx.Study;
@@ -705,8 +724,7 @@ end;
 function TPerlRegEx.GetNames(aIndex : Integer): TREString;
 function TPerlRegEx.GetNames(aIndex : Integer): TREString;
 var
 var
   Ptr : PCRE2_SPTR;
   Ptr : PCRE2_SPTR;
-  N,I : Integer;
-  tblName : TREString;
+  I : Integer;
 
 
 begin
 begin
   Ptr:=FNameTable;
   Ptr:=FNameTable;
@@ -715,10 +733,8 @@ begin
   for i:=0 to aIndex-1 do
   for i:=0 to aIndex-1 do
     Inc(Ptr,FNameEntrySize);
     Inc(Ptr,FNameEntrySize);
 {$IFDEF USEWIDESTRING}
 {$IFDEF USEWIDESTRING}
-  n:=ord(ptr[0]);
   Result:=GetStrLen((Ptr+1),FNameEntrySize-2);
   Result:=GetStrLen((Ptr+1),FNameEntrySize-2);
 {$ELSE}
 {$ELSE}
-  n:=(ord(ptr[0]) shl 8) or ord(ptr[1]);
   Result:=GetStrLen((Ptr+2),FNameEntrySize-3);
   Result:=GetStrLen((Ptr+2),FNameEntrySize-3);
 {$ENDIF}
 {$ENDIF}
 end;
 end;

+ 4 - 0
packages/wasm-utils/fpmake.pp

@@ -50,6 +50,10 @@ begin
     T:=P.Targets.AddUnit('wasm.regexp.objects.pas');
     T:=P.Targets.AddUnit('wasm.regexp.objects.pas');
       T.Dependencies.AddUnit('wasm.regexp.api');
       T.Dependencies.AddUnit('wasm.regexp.api');
       T.Dependencies.AddUnit('wasm.regexp.shared');
       T.Dependencies.AddUnit('wasm.regexp.shared');
+    T:=P.Targets.AddUnit('wasm.pcrebridge.pas');
+      T.Dependencies.AddUnit('wasm.regexp.api');
+      T.Dependencies.AddUnit('wasm.regexp.shared');
+      T.Dependencies.AddUnit('wasm.regexp.objects');
       
       
 {$ifndef ALLPACKAGES}
 {$ifndef ALLPACKAGES}
     Run;
     Run;

+ 406 - 0
packages/wasm-utils/src/wasm.pcrebridge.pas

@@ -0,0 +1,406 @@
+unit wasm.pcrebridge;
+
+{$mode ObjFPC}{$H+}
+
+interface
+
+uses
+{$IFDEF FPC_DOTTEDUNITS}
+  System.CTypes, System.SysUtils,
+{$ELSE}
+  ctypes, sysutils, 
+{$ENDIF}
+  wasm.regexp.objects;
+
+{$IF SIZEOF(CHAR)=2}
+{$DEFINE STRING_IS_UNICODE}
+{$ENDIF}
+
+const
+  PCRE2_NEWLINE_ANY         = 1;
+  PCRE2_UTF                 = 1 shl 1;
+  PCRE2_CASELESS            = 1 shl 2;
+  PCRE2_MULTILINE           = 1 shl 3;
+  PCRE2_DOTALL              = 1 shl 4;
+  PCRE2_EXTENDED            = 1 shl 5;
+  PCRE2_ANCHORED            = 1 shl 6;
+  PCRE2_UNGREEDY            = 1 shl 7;
+  PCRE2_NO_AUTO_CAPTURE     = 1 shl 8;
+  PCRE2_ALLOW_EMPTY_CLASS   = 1 shl 9;
+  PCRE2_ALT_BSUX            = 1 shl 10;
+  PCRE2_ALT_CIRCUMFLEX      = 1 shl 11;
+  PCRE2_ALT_VERBNAMES       = 1 shl 12;
+  PCRE2_DOLLAR_ENDONLY      = 1 shl 13;
+  PCRE2_DUPNAMES            = 1 shl 14;
+  PCRE2_ENDANCHORED         = 1 shl 15;
+  PCRE2_FIRSTLINE           = 1 shl 16;
+  PCRE2_LITERAL             = 1 shl 17;
+  PCRE2_MATCH_INVALID_UTF   = 1 shl 18;
+  PCRE2_MATCH_UNSET_BACKREF = 1 shl 19;
+  PCRE2_NEVER_BACKSLASH_C   = 1 shl 20;
+  PCRE2_NO_AUTO_POSSESS     = 1 shl 21;
+  PCRE2_NO_DOTSTAR_ANCHOR   = 1 shl 22;
+  PCRE2_NO_START_OPTIMIZE   = 1 shl 23;
+  PCRE2_NO_UTF_CHECK        = 1 shl 24;
+  PCRE2_USE_OFFSET_LIMIT    = 1 shl 25;
+
+  PCRE2_ERROR_NOMATCH = -(1);
+  PCRE2_ERROR_PARTIAL = -(2);
+  PCRE2_ERROR_WASM    = -(200);
+
+  PCRE2_INFO_ALLOPTIONS    = 0;
+  PCRE2_INFO_NAMECOUNT     = 1;
+  PCRE2_INFO_NAMETABLE     = 2;
+  PCRE2_INFO_NEWLINE       = 3;
+  PCRE2_INFO_NAMEENTRYSIZE = 4;
+
+  PCRE2_NEWLINE_CR = 1;
+  PCRE2_NEWLINE_LF = 2;
+  PCRE2_NEWLINE_CRLF = 3;
+
+  PCRE2_NEWLINE_ANYCRLF = 5;
+  PCRE2_NEWLINE_NUL = 6;
+  PCRE2_BSR_UNICODE = 1;
+  PCRE2_BSR_ANYCRLF = 2;
+
+  PCRE2_NOTEMPTY_ATSTART = 1;
+
+
+Type
+  tsize_t = csize_t;
+  Psize_t = ^tsize_t;
+  PCRE2_SIZE = tsize_t;
+  PTsize_t = ^Tsize_t;
+  PPTsize_t = ^PTsize_t;
+  tuint32_t = cardinal;
+  Tint32_t = cuint32;
+  Tcint = cint;
+
+  { TPCREWasmRegExp }
+
+  TPCREWasmRegExp = Class(TWasmRegExp)
+    matchindex : Integer;
+    lastRes : TRegExpResult;
+    haveres : Boolean;
+    lasterror : String;
+    OVector : Array of Tsize_t;
+    FNamesTable : String;
+    FNamesTableEntrySize : Integer;
+  private
+    procedure CreateNamesTable;
+    function GetCaptureIndex(aStartIndex, aStopIndex: Integer): Integer;
+    function HandleExec(aOffset: TSize_t; const S: String): Tcint;
+  end;
+
+  TPCRE2_SPTR8 = PAnsichar;
+  TPCRE2_SPTR16 = PWideChar;
+  Ppcre2_code_8 = TPCREWasmRegExp;
+  Ppcre2_code_16 = TPCREWasmRegExp;
+  PTpcre2_code_8 = Ppcre2_code_8;
+  PTpcre2_code_16 = Ppcre2_code_16;
+  PTpcre2_compile_context_8 = Pointer;
+  PTpcre2_match_data_8 = TPCREWasmRegExp;
+  PTpcre2_general_context_8 = Pointer;
+  PTpcre2_match_context_8 = Pointer;
+  ppcre2_match_data = PTpcre2_match_data_8;
+  {$IFDEF STRING_IS_UNICODE}
+  PCRE2_SPTR = PWideChar;
+  {$ELSE}
+  PCRE2_SPTR = PAnsiChar;
+  {$ENDIF}
+  PTPCRE2_UCHAR8 = PAnsiChar;
+
+
+
+function pcre2_compile(RegExp: TPCRE2_SPTR8; RegexLen: Tsize_t; Opts: Tuint32_t; ErrorNr: Pcint; ErrorPos: Psize_t; Context: PTpcre2_compile_context_8):Ppcre2_code_8;
+procedure pcre2_code_free(Regexp:PTpcre2_code_8);
+
+function pcre2_match_w(RegExp:PTpcre2_code_8; aSubject: TPCRE2_SPTR16; aSubjectLen: Tsize_t; aOffset: Tsize_t; Opts: Tuint32_t; MatchData: PTpcre2_match_data_8; aContext: PTpcre2_match_context_8):Tcint;
+function pcre2_match(RegExp:PTpcre2_code_8; aSubject: TPCRE2_SPTR8; aSubjectLen: Tsize_t; aOffset: Tsize_t; Opts: Tuint32_t; MatchData: PTpcre2_match_data_8; aContext: PTpcre2_match_context_8):Tcint;
+function pcre2_match_data_create_from_pattern(RegExp: PTpcre2_code_8; aContext: PTpcre2_general_context_8):PTpcre2_match_data_8;
+procedure pcre2_match_data_free(aMatchData:PTpcre2_match_data_8);
+
+function pcre2_pattern_info(RegExp:PTpcre2_code_8; Info: Tuint32_t; Res: pointer):Tcint;
+
+function pcre2_get_ovector_pointer (match: PTpcre2_match_data_8): Psize_t;
+function pcre2_get_startchar(match: PTpcre2_match_data_8): Tsize_t;
+function pcre2_get_error_message(ErrNo:Tcint; aString :PTPCRE2_UCHAR8; aStrlen:Tsize_t):Tcint;
+
+function libpcre28loaded : Boolean;
+procedure Loadlibpcre28;
+
+
+implementation
+
+var
+  gLastError : String;
+
+function OptsToFlags(Opts : Tuint32_t) : TRegexpFlags;
+
+  Procedure Check(PerlFlag :Tuint32_t; RegexFlag :TRegexpFlag);
+  begin
+    if (Opts and PerlFlag)<>0 then
+      Include(Result,RegexFlag);
+  end;
+
+begin
+  Result:=[rfIndices,rfGlobal];
+  Check(PCRE2_MULTILINE,rfMultiLine);
+  Check(PCRE2_CASELESS,rfIgnoreCase);
+  Check(PCRE2_UTF,rfUnicode);
+  Check(PCRE2_DOTALL,rfDotAll);
+  (* The rest is not supported *)
+end;
+
+function pcre2_compile (RegExp:TPCRE2_SPTR8; RegexLen:Tsize_t; Opts :Tuint32_t; ErrorNr :Pcint; ErrorPos:Psize_t; Context :PTpcre2_compile_context_8):Ppcre2_code_8;
+
+var
+  F : TRegexpFlags ;
+  S : String;
+
+begin
+  Result:=Nil;
+  SetLength(S,RegexLen);
+  Move(Regexp^,S[1],SizeOf(Char)*RegexLen);
+  F:=OptsToFlags(Opts);
+  try
+    Result:=TPCREWasmRegExp.Create(S,F);
+  except
+    on E : Exception do
+      begin
+      ErrorNr^:=PCRE2_ERROR_WASM; // Does not exist (yet)
+      gLastError:=E.Message;
+      ErrorPos^:=0;
+      end;
+  end;
+end;
+
+procedure pcre2_code_free(Regexp:PTpcre2_code_8);
+
+var
+  RE : TPCREWasmRegExp absolute Regexp;
+
+begin
+  RE.Free;
+end;
+
+function pcre2_match_data_create_from_pattern(RegExp:PTpcre2_code_8; aContext :PTpcre2_general_context_8):PTpcre2_match_data_8;
+
+begin
+  Result:=Regexp;
+end;
+
+function pcre2_pattern_info(RegExp: PTpcre2_code_8; Info: Tuint32_t; Res: pointer): Tcint;
+
+var
+  RE : TPCREWasmRegExp absolute Regexp;
+
+begin
+  result:=0;
+  case info of
+    PCRE2_INFO_NAMECOUNT :
+      Pcuint32(Res)^:=Length(RE.lastRes.Groups);
+    PCRE2_INFO_NAMETABLE :
+      PPChar(Res)^:=PChar(RE.FNamesTable);
+    PCRE2_INFO_NAMEENTRYSIZE :
+      Pcuint32(Res)^:=RE.FNamesTableEntrySize;
+    PCRE2_INFO_ALLOPTIONS :
+      Pcuint32(Res)^:=PCRE2_UTF;
+    PCRE2_INFO_NEWLINE :
+      Pcuint32(Res)^:=PCRE2_NEWLINE_ANY;
+  else
+    Result:=PCRE2_ERROR_WASM;
+  end;
+end;
+
+
+procedure pcre2_match_data_free(aMatchData: PTpcre2_match_data_8);
+begin
+  // Do nothing
+end;
+
+function TPCREWasmRegExp.GetCaptureIndex(aStartIndex,aStopIndex : Integer) : Integer;
+
+begin
+  Result:=Length(lastRes.Matches)-1;
+  While (Result>=0) and Not lastRes.Matches[Result].MatchPos(aStartIndex,aStopIndex) do
+    Dec(Result);
+end;
+
+procedure TPCREWasmRegExp.CreateNamesTable;
+
+Const
+  ExtraSize = SizeOf(Word);
+  {$IFDEF STRING_IS_UNICODE}
+  CharOffset = 1;
+  {$ELSE}
+  CharOffset = 2;
+  {$ENDIF}
+
+var
+  i, tmp, entrylen,len : Integer;
+  N: UTF8String;
+  NS : String;
+  CaptureIdx : Word;
+  CaptureOffset,NameOffset : integer;
+
+begin
+  FNamesTableEntrySize:=0;
+  FNamesTable:='';
+  Len:=Length(lastRes.Groups);
+  if Len=0 then exit;
+  entryLen:=Length(lastRes.Groups[0].Name);
+  For I:=1 to Len-1 do
+    begin
+    tmp:=Length(lastRes.Groups[i].Name);
+    if tmp>Entrylen then
+      Entrylen:=Tmp;
+    end;
+  EntryLen:=EntryLen+ExtraSize*2;
+  FNamesTableEntrySize:=EntryLen;
+  SetLength(FNamesTable,Len*EntryLen);
+  FillChar(FNamesTable[1],Length(FNamesTable)*SizeOf(Char),0);
+  For I:=0 to Len-1 do
+    begin
+    N:=lastRes.Groups[i].Name;
+    {$IFDEF STRING_IS_UNICODE}
+    NS:=UTF8Decode(N);
+    {$ELSE}
+    NS:=N;
+    {$ENDIF}
+    CaptureIdx:=GetCaptureIndex(lastRes.Groups[i].StartIndex,lastRes.Groups[i].StopIndex);
+    CaptureIdx:=NtoBE(CaptureIdx);
+    CaptureOffset:=1+(I*EntryLen);
+    NameOffset:=1+CharOffset+(I*EntryLen);
+    Move(CaptureIdx,FNamesTable[CaptureOffset],SizeOf(Word));
+    Move(NS[1],FNamesTable[NameOffset],Length(NS)*SizeOf(Char));
+    end;
+end;
+
+function TPCREWasmRegExp.HandleExec(aOffset : TSize_t;Const S : String) : Tcint;
+
+var
+  i, len : Integer;
+
+begin
+  try
+    LastIndex:=aOffset;
+    lastRes:=Exec(S);
+    len:=Length(lastRes.Matches);
+    if len=0 then
+      Result:=PCRE2_ERROR_NOMATCH
+    else
+      Result:=len;
+    SetLength(OVector,Len*2);
+    For i:=0 to len-1 do
+      begin
+      OVector[I*2]:=LastRes.Matches[i].StartIndex-1;
+      OVector[(I*2)+1]:=LastRes.Matches[i].StopIndex-1;
+      end;
+    Len:=Length(lastRes.Groups);
+    if Len>0 then
+      CreateNamesTable;
+  except
+    Result:=PCRE2_ERROR_WASM;
+  end;
+end;
+
+function pcre2_match_w(RegExp: PTpcre2_code_8; aSubject: TPCRE2_SPTR16; aSubjectLen: Tsize_t; aOffset: Tsize_t; Opts: Tuint32_t;
+  MatchData: PTpcre2_match_data_8; aContext: PTpcre2_match_context_8): Tcint;
+
+var
+  RE : TPCREWasmRegExp absolute Regexp;
+  S : String;
+  {$IFNDEF STRING_IS_UNICODE}
+  US : UnicodeString;
+  {$ENDIF}
+
+begin
+  {$IFDEF STRING_IS_UNICODE}
+  S:='';
+  SetLength(S,aSubjectLen);
+  if aSubjectLen>0 then
+    Move(aSubject^,S[1],aSubjectLen*SizeOf(Char));
+  {$ELSE}
+  US:='';
+  SetLength(US,aSubjectLen);
+  if aSubjectLen>0 then
+    Move(aSubject^,US[1],aSubjectLen* SizeOf(Char));
+  S:=UTF8Encode(US);
+  {$ENDIF}
+  Result:=Re.HandleExec(aOffset,S);
+end;
+
+function pcre2_match(RegExp: PTpcre2_code_8; aSubject: TPCRE2_SPTR8; aSubjectLen: Tsize_t; aOffset: Tsize_t; Opts: Tuint32_t;
+  MatchData: PTpcre2_match_data_8; aContext: PTpcre2_match_context_8): Tcint;
+
+var
+  RE : TPCREWasmRegExp absolute Regexp;
+  {$IFDEF STRING_IS_UNICODE}
+  RS : UnicodeString;
+  {$ENDIF}
+  S : String;
+
+begin
+  {$IFDEF STRING_IS_UNICODE}
+  Rs:='';
+  SetLength(RS,aSubjectLen);
+  if aSubjectLen>0 then
+    Move(aSubject^,RS[1],aSubjectLen* SizeOf(Char));
+  S:=UTF8Decode(RS);
+  {$ELSE}
+  S:='';
+  SetLength(S,aSubjectLen);
+  if aSubjectLen>0 then
+    Move(aSubject^,S[1],aSubjectLen*SizeOf(Char));
+  {$ENDIF}
+  Result:=Re.HandleExec(aOffset,S);
+end;
+
+
+function pcre2_get_ovector_pointer(match: PTpcre2_match_data_8): Psize_t;
+var
+  RE : TPCREWasmRegExp absolute match;
+begin
+  Result:=PSize_t(Re.OVector);
+end;
+
+function pcre2_get_startchar(match: PTpcre2_match_data_8): Tsize_t;
+var
+  RE : TPCREWasmRegExp absolute match;
+begin
+  if (Length(Re.lastRes.Matches)>0) then
+    Result:=Re.LastRes.Matches[0].StartIndex
+  else
+    Result:=Length(Re.lastRes.Input);
+end;
+
+function pcre2_get_error_message(ErrNo: Tcint; aString: PTPCRE2_UCHAR8; aStrlen: Tsize_t): Tcint;
+
+var
+  Err : RawByteString;
+
+begin
+  Err:=Format('Unknown error %d',[ErrNo]);
+  Result:=Length(Err);
+  if (Result>aStrLen) then
+    Result:=aStrLen;
+  if Result>0 then
+    Move(Err[1],aString^,Result);
+end;
+
+function libpcre28loaded : Boolean;
+
+begin
+  Result:=True;
+end;
+
+procedure Loadlibpcre28;
+begin
+  //
+end;
+
+
+
+end.
+