123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828 |
- {
- This file is part of the Free Component Library
- WEBIDL source lexical scanner
- Copyright (c) 2018 by Michael Van Canneyt [email protected]
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- {$mode objfpc}
- {$h+}
- unit webidlscanner;
- interface
- uses SysUtils, Classes;
- type
- TWebIDLVersion = (v1,v2);
- TIDLToken = (
- tkEOF,
- tkUnknown ,
- tkComment,
- tkWhitespace,
- tkString,
- tkNumberInteger,
- tkNumberFloat,
- // Simple (one-character) tokens
- tkDot, // '.',
- tkSemiColon, // ';'
- tkComma, // ','
- tkColon, // ':'
- tkBracketOpen, // '('
- tkBracketClose, // ')'
- tkCurlyBraceOpen, // '{'
- tkCurlyBraceClose, // '}'
- tkSquaredBraceOpen, // '['
- tkSquaredBraceClose, // ']'
- tkLess, // '<'
- tkEqual, // '='
- tkLarger, // '>'
- tkQuestionmark, // '?'
- tkminus, // '-'
- tkIdentifier, // Any identifier
- tkTrue,
- tkFalse,
- tkNull,
- tkAny,
- tkAttribute,
- tkCallback,
- tkConst,
- tkDeleter,
- tkDictionary,
- tkEllipsis,
- tkEnum,
- tkGetter,
- tkImplements,
- tkInfinity,
- tkInherit,
- tkInterface,
- tkIterable,
- tkLegacyCaller,
- tkNan,
- tkNegInfinity,
- tkOptional,
- tkOr,
- tkPartial,
- tkReadOnly,
- tkRequired,
- tkSetter,
- tkStatic,
- tkStringifier,
- tkSerializer,
- tkTypedef,
- tkUnrestricted,
- tkPromise,
- tkFrozenArray,
- tkByteString,
- tkDOMString,
- tkUSVString,
- tkboolean,
- tkbyte,
- tkdouble,
- tkfloat,
- tklong,
- tkobject,
- tkoctet,
- tkunsigned,
- tkvoid,
- tkShort,
- tkSequence,
- tkStringToken,
- tkMixin,
- tkIncludes,
- tkMapLike,
- tkRecord,
- tkSetLike,
- tkOther,
- tkConstructor
- );
- TIDLTokens = Set of TIDLToken;
- EWebIDLScanner = class(EParserError);
- Const
- V2Tokens = [tkMixin,tkIncludes,tkMapLike,tkRecord,tkSetLike,tkFrozenArray,tkConstructor];
- V1Tokens = [tkImplements];
- VersionNonTokens : Array[TWebIDLVersion] of TIDLTokens = (V2Tokens,V1Tokens);
- Type
- TWebIDLScannerSkipMode = (wisSkipNone, wisSkipIfBranch, wisSkipElseBranch, wisSkipAll);
- { TWebIDLScanner }
- TWebIDLScanner = class
- private
- FSource : TStringList;
- FCurRow: Integer;
- FCurToken: TIDLToken;
- FCurTokenString: UTF8string;
- FCurLine: UTF8string;
- FVersion: TWebIDLVersion;
- TokenStr: PChar;
- // Preprocessor #IFxxx skipping data
- FSkipMode: TWebIDLScannerSkipMode;
- FIsSkipping: Boolean;
- FSkipStackIndex: Integer;
- FSkipModeStack: array[0..255] of TWebIDLScannerSkipMode;
- FIsSkippingStack: array[0..255] of Boolean;
- function DetermineToken: TIDLToken;
- function DetermineToken2: TIDLToken;
- function FetchLine: Boolean;
- function GetCurColumn: Integer;
- function ReadComment: UTF8String;
- function ReadIdent: UTF8String;
- function ReadNumber(var S: UTF8String): TIDLToken;
- protected
- Function GetErrorPos : String;
- procedure Error(const Msg: string);overload;
- procedure Error(const Msg: string; Const Args: array of Const);overload;
- function ReadString: UTF8String; virtual;
- function DoFetchToken: TIDLToken;
- procedure HandleDirective; virtual;
- procedure HandleIfDef; virtual;
- procedure HandleElse; virtual;
- procedure HandleEndIf; virtual;
- procedure PushSkipMode; virtual;
- function IsDefined(const aName: string): boolean; virtual;
- procedure SkipWhitespace;
- procedure SkipLineBreak;
- public
- constructor Create(Source: TStream); overload;
- constructor Create(const Source: UTF8String); overload;
- constructor CreateFile(const aFileName: UTF8String);
- destructor Destroy; override;
- function FetchToken: TIDLToken;
- property CurLine: UTF8String read FCurLine;
- property CurRow: Integer read FCurRow;
- property CurColumn: Integer read GetCurColumn;
- property CurToken: TIDLToken read FCurToken;
- property CurTokenString: UTF8String read FCurTokenString;
- Property Version : TWebIDLVersion Read FVersion Write FVersion;
- end;
- const
- TokenInfos: array[TIDLToken] of string = (
- '',
- '',
- '',
- '',
- '',
- '',
- '',
- // Simple (one-character) tokens
- '.',
- ';',
- ',', // ','
- ':', // ':'
- '(', // '('
- ')', // ')'
- '{', // '{'
- '}', // '}'
- '[', // '['
- ']', // ']'
- '<',
- '=',
- '>',
- '?',
- '-',
- '', // Any identifier
- 'true',
- 'false',
- 'null',
- 'any',
- 'attribute',
- 'callback',
- 'const',
- 'deleter',
- 'dictionary',
- 'ellipsis',
- 'enum',
- 'getter',
- 'implements',
- 'Infinity',
- 'inherit',
- 'interface',
- 'iterable',
- 'legacycaller',
- 'NaN',
- '-Infinity',
- 'optional',
- 'or',
- 'partial',
- 'readonly',
- 'required',
- 'setter',
- 'static',
- 'stringifier',
- 'serializer',
- 'typedef',
- 'unrestricted',
- 'Promise',
- 'FrozenArray',
- 'ByteString',
- 'DOMString',
- 'USVString',
- 'boolean',
- 'byte',
- 'double',
- 'float',
- 'long',
- 'object',
- 'octet',
- 'unsigned',
- 'void',
- 'short',
- 'sequence',
- 'string',
- 'mixin',
- 'includes',
- 'maplike',
- 'record',
- 'setlike',
- 'other',
- 'constructor'
- );
- Function GetTokenName(aToken : TIDLToken) : String;
- Function GetTokenNames(aTokenList : TIDLTokens) : String;
- implementation
- uses typinfo;
- Resourcestring
- SErrUnknownTerminator = 'Unknown terminator: "%s"';
- SErrInvalidCharacter = 'Invalid character at line %d, pos %d: ''%s''';
- SUnterminatedComment = 'Unterminated comment at line %d, pos %d: ''%s''';
- SErrOpenString = 'string exceeds end of line';
- SErrInvalidEllipsis = 'Invalid ellipsis token';
- SErrUnknownToken = 'Unknown token, expected number or minus : "%s"';
- // SerrExpectedTokenButWasIdentifier = 'Invalid terminator: "%s"';
- Function GetTokenName(aToken : TIDLToken) : String;
- begin
- Result:=TokenInfos[aToken];
- if Result='' then
- begin
- Result:=GetEnumName(TypeInfo(TIDLToken),Ord(aToken));
- Delete(Result,1,2);
- end;
- end;
- Function GetTokenNames(aTokenList : TIDLTokens) : String;
- Var
- T : TIDLToken;
- begin
- Result:='';
- For T in aTokenList do
- begin
- if (Result<>'') then
- Result:=Result+',';
- Result:=Result+GetTokenName(T);
- end;
- end;
- constructor TWebIDLScanner.Create(Source: TStream);
- begin
- FSource:=TStringList.Create;
- FSource.LoadFromStream(Source);
- end;
- constructor TWebIDLScanner.Create(const Source: UTF8String);
- begin
- FSource:=TStringList.Create;
- FSource.Text:=Source;
- end;
- constructor TWebIDLScanner.CreateFile(const aFileName: UTF8String);
- begin
- FSource:=TStringList.Create;
- FSource.LoadFromFile(aFileName);
- end;
- destructor TWebIDLScanner.Destroy;
- begin
- FreeAndNil(FSource);
- Inherited;
- end;
- function TWebIDLScanner.FetchToken: TIDLToken;
- begin
- Result:=DoFetchToken;
- end;
- procedure TWebIDLScanner.Error(const Msg: string);
- begin
- raise EWebIDLScanner.Create(GetErrorPos+Msg);
- end;
- procedure TWebIDLScanner.Error(const Msg: string; const Args: array of const);
- begin
- raise EWebIDLScanner.Create(GetErrorPos+Format(Msg, Args));
- end;
- function TWebIDLScanner.ReadString : UTF8String;
- Var
- C : Char;
- I, OldLength, SectionLength: Integer;
- S : UTF8String;
- TokenStart: PChar;
- begin
- C:=TokenStr[0];
- Inc(TokenStr);
- TokenStart := TokenStr;
- OldLength := 0;
- Result := '';
- while not (TokenStr[0] in [#0,C]) do
- begin
- if (TokenStr[0]='\') then
- begin
- // Save length
- SectionLength := TokenStr - TokenStart;
- Inc(TokenStr);
- // Read escaped token
- Case TokenStr[0] of
- '"' : S:='"';
- '''' : S:='''';
- 't' : S:=#9;
- 'b' : S:=#8;
- 'n' : S:=#10;
- 'r' : S:=#13;
- 'f' : S:=#12;
- '\' : S:='\';
- '/' : S:='/';
- 'u' : begin
- S:='0000';
- For I:=1 to 4 do
- begin
- Inc(TokenStr);
- Case TokenStr[0] of
- '0'..'9','A'..'F','a'..'f' :
- S[i]:=Upcase(TokenStr[0]);
- else
- Error(SErrInvalidCharacter, [CurRow,CurColumn,TokenStr[0]]);
- end;
- end;
- // WideChar takes care of conversion...
- S:=Utf8Encode(WideString(WideChar(StrToInt('$'+S))))
- end;
- #0 : Error(SErrOpenString);
- else
- Error(SErrInvalidCharacter, [CurRow,CurColumn,TokenStr[0]]);
- end;
- SetLength(Result, OldLength + SectionLength+1+Length(S));
- if SectionLength > 0 then
- Move(TokenStart^, Result[OldLength + 1], SectionLength);
- Move(S[1],Result[OldLength + SectionLength+1],Length(S));
- Inc(OldLength, SectionLength+Length(S));
- // Next char
- // Inc(TokenStr);
- TokenStart := TokenStr+1;
- end;
- if TokenStr[0] = #0 then
- Error(SErrOpenString);
- Inc(TokenStr);
- end;
- if TokenStr[0] = #0 then
- Error(SErrOpenString);
- SectionLength := TokenStr - TokenStart;
- SetLength(Result, OldLength + SectionLength);
- if SectionLength > 0 then
- Move(TokenStart^, Result[OldLength + 1], SectionLength);
- Inc(TokenStr);
- end;
- function TWebIDLScanner.ReadIdent: UTF8String;
- Var
- TokenStart : PChar;
- SectionLength : Integer;
- begin
- Result:='';
- if TokenStr[0]='_' then
- Inc(TokenStr);
- if TokenStr[0]=#0 then
- Exit;
- TokenStart := TokenStr;
- repeat
- Inc(TokenStr);
- until not (TokenStr[0] in ['A'..'Z', 'a'..'z', '0'..'9', '_']);
- SectionLength := TokenStr - TokenStart;
- SetString(Result, TokenStart, SectionLength);
- end;
- function TWebIDLScanner.FetchLine: Boolean;
- begin
- Result:=FCurRow<FSource.Count;
- if Result then
- begin
- FCurLine:=FSource[FCurRow];
- TokenStr:=PChar(FCurLine);
- Inc(FCurRow);
- end
- else
- begin
- FCurLine:='';
- TokenStr:=nil;
- end;
- end;
- function TWebIDLScanner.ReadNumber(var S : UTF8String) : TIDLToken;
- Var
- TokenStart : PChar;
- SectionLength : Integer;
- isHex : Boolean;
- begin
- isHex:=False;
- TokenStart := TokenStr;
- Result:=tkNumberInteger;
- while true do
- begin
- Inc(TokenStr);
- SectionLength := TokenStr - TokenStart;
- case TokenStr[0] of
- 'x':
- begin
- isHex:=True;
- end;
- 'I':
- begin
- repeat
- Inc(TokenStr);
- until not (TokenStr[0] in ['i','n','f','t','y']);
- Result:=tkNegInfinity; // We'll check at the end if the string is actually correct
- break;
- end;
- '.':
- begin
- Result:=tkNumberFloat;
- if TokenStr[1] in ['0'..'9', 'e', 'E'] then
- begin
- Inc(TokenStr);
- repeat
- Inc(TokenStr);
- until not (TokenStr[0] in ['0'..'9', 'e', 'E','-','+']);
- end;
- break;
- end;
- '0'..'9':
- begin
- end;
- 'a'..'d','f',
- 'A'..'D','F':
- begin
- if Not isHex then
- Error(SErrUnknownToken,[S]);
- end;
- 'e', 'E':
- begin
- if not IsHex then
- begin
- Inc(TokenStr);
- if TokenStr[0] in ['-','+'] then
- Inc(TokenStr);
- while TokenStr[0] in ['0'..'9'] do
- Inc(TokenStr);
- break;
- end;
- end;
- else
- if (SectionLength=1) and (TokenStart[0]='-') then
- result:=tkMinus;
- break;
- end;
- end;
- SectionLength := TokenStr - TokenStart;
- S:='';
- SetString(S, TokenStart, SectionLength);
- if (Result=tkNegInfinity) and (S<>'-Infinity') then
- Error(SErrUnknownToken,[S]);
- if (Result=tkMinus) and (S<>'-') then
- Error(SErrUnknownTerminator,[s]);
- end;
- function TWebIDLScanner.GetErrorPos: String;
- begin
- Result:=Format('Scanner error at line %d, pos %d: ',[CurRow,CurColumn]);
- end;
- function TWebIDLScanner.ReadComment : UTF8String;
- Var
- TokenStart : PChar;
- SectionLength : Integer;
- EOC,IsStar : Boolean;
- S : String;
- begin
- Result:='';
- TokenStart:=TokenStr;
- Inc(TokenStr);
- Case Tokenstr[0] of
- '/' : begin
- SectionLength := Length(FCurLine)- (TokenStr - PChar(FCurLine));
- Inc(TokenStr);
- SetString(Result, TokenStr, SectionLength);
- Fetchline;
- end;
- '*' :
- begin
- IsStar:=False;
- Inc(TokenStr);
- TokenStart:=TokenStr;
- Repeat
- if (TokenStr[0]=#0) then
- begin
- SectionLength := (TokenStr - TokenStart);
- S:='';
- SetString(S, TokenStart, SectionLength);
- Result:=Result+S;
- if not fetchLine then
- Error(SUnterminatedComment, [CurRow,CurCOlumn,TokenStr[0]]);
- TokenStart:=TokenStr;
- end;
- IsStar:=TokenStr[0]='*';
- Inc(TokenStr);
- EOC:=(isStar and (TokenStr[0]='/'));
- Until EOC;
- if EOC then
- begin
- SectionLength := (TokenStr - TokenStart-1);
- S:='';
- SetString(S, TokenStart, SectionLength);
- Result:=Result+S;
- Inc(TokenStr);
- end;
- end;
- else
- Error(SErrInvalidCharacter, [CurRow,CurCOlumn,TokenStr[0]]);
- end;
- end;
- function TWebIDLScanner.DetermineToken : TIDLToken;
- begin
- Result:=High(TIDLToken);
- While (Result<>tkIdentifier) and (TokenInfos[result]<>FCurTokenString) do
- Result:=Pred(Result);
- if Result in VersionNonTokens[Version] then
- Result:=tkIdentifier;
- // if Result=tkIdentifier then
- // Error(SErrExpectedTokenButWasIdentifier,[FCurTokenString]);
- end;
- function TWebIDLScanner.DetermineToken2 : TIDLToken;
- Const
- InfTokens = [tkNan,tkInfinity,tkNegInfinity,tkByteString,tkUSVString,tkDOMString,tkPromise,tkFrozenArray];
- begin
- For Result in InfTokens do
- if (TokenInfos[result]=FCurTokenString) then exit;
- Result:=tkIdentifier;
- end;
- function TWebIDLScanner.DoFetchToken: TIDLToken;
- Procedure SetSingleToken(tk : TIDLToken);
- begin
- FCurTokenString:=TokenStr[0];
- Inc(TokenStr);
- Result :=tk;
- end;
- begin
- repeat
- if TokenStr = nil then
- if not FetchLine then
- begin
- Result := tkEOF;
- FCurToken := Result;
- exit;
- end;
- FCurTokenString := '';
- case TokenStr[0] of
- #0: // Empty line
- begin
- if not FetchLine then
- Result:=tkEOF
- else
- Result := tkWhitespace;
- end;
- #9, ' ':
- begin
- Result := tkWhitespace;
- repeat
- Inc(TokenStr);
- if TokenStr[0] = #0 then
- if not FetchLine then
- begin
- FCurToken := Result;
- exit;
- end;
- until not (TokenStr[0] in [#9, ' ']);
- end;
- '"':
- begin
- FCurTokenString:=ReadString;
- Result := tkString;
- end;
- ',':
- begin
- Inc(TokenStr);
- Result := tkComma;
- end;
- '0'..'9','-':
- begin
- Result := ReadNumber(FCurTokenString);
- end;
- ':': SetSingleToken(tkColon);
- '(': SetSingleToken(tkBracketOpen);
- ')': SetSingleToken(tkBracketClose);
- '{': SetSingleToken(tkCurlyBraceOpen);
- '}': SetSingleToken(tkCurlyBraceClose);
- '[': SetSingleToken(tkSquaredBraceOpen);
- ']': SetSingleToken(tkSquaredBraceClose);
- '<': SetSingleToken(tkLess);
- '=': SetSingleToken(tkEqual);
- '>': SetSingleToken(tkLarger);
- '?' : SetSingleToken(tkQuestionmark);
- ';' : SetSingleToken(tkSemicolon);
- '.' :
- begin
- inc(TokenStr);
- if TokenStr[0]<>'.' then
- begin
- Dec(Tokenstr);// Setsingletoken advances
- SetSingleToken(tkDot);
- end
- else
- begin
- inc(TokenStr);
- if TokenStr[0]<>'.' then
- Error(SErrInvalidEllipsis);
- inc(TokenStr);
- FCurTokenString:='...';
- Result:=tkEllipsis;
- end;
- end;
- '/' :
- begin
- FCurTokenString:=ReadComment;
- Result:=tkComment;
- end;
- 'a'..'z':
- begin
- FCurTokenString:=ReadIdent;
- Result:=DetermineToken;
- end;
- 'A'..'Z','_':
- begin
- FCurTokenString:=ReadIdent;
- Result:=DetermineToken2;
- end;
- '#':
- begin
- Result:=tkComment;
- HandleDirective;
- end
- else
- Error(SErrInvalidCharacter, [CurRow,CurColumn,TokenStr[0]]);
- end;
- until FSkipMode=wisSkipNone;
- FCurToken := Result;
- end;
- procedure TWebIDLScanner.HandleDirective;
- var
- p: PChar;
- aDirective: string;
- begin
- inc(TokenStr);
- p:=TokenStr;
- while TokenStr^ in ['a'..'z','A'..'Z','_','0'..'9'] do inc(TokenStr);
- SetString(aDirective, p, TokenStr-p);
- SkipWhitespace;
- case lowercase(aDirective) of
- 'ifdef': HandleIfDef;
- 'else': HandleElse;
- 'endif': HandleEndIf;
- end;
- SkipWhitespace;
- SkipLineBreak;
- end;
- procedure TWebIDLScanner.HandleIfDef;
- var
- StartP: PChar;
- aName: string;
- begin
- PushSkipMode;
- if FIsSkipping then
- FSkipMode := wisSkipAll
- else
- begin
- StartP:=TokenStr;
- while TokenStr^ in ['a'..'z','A'..'Z','0'..'9','_'] do
- inc(TokenStr);
- SetString(aName,StartP,TokenStr-StartP);
- if IsDefined(aName) then
- FSkipMode := wisSkipElseBranch
- else
- begin
- FSkipMode := wisSkipIfBranch;
- FIsSkipping := true;
- end;
- //If LogEvent(sleConditionals) then
- // if FSkipMode=wisSkipElseBranch then
- // DoLog(mtInfo,nLogIFDefAccepted,sLogIFDefAccepted,[aName])
- // else
- // DoLog(mtInfo,nLogIFDefRejected,sLogIFDefRejected,[aName]);
- end;
- end;
- procedure TWebIDLScanner.HandleElse;
- begin
- if FSkipStackIndex = 0 then
- Error('Invalid #Else');
- if FSkipMode = wisSkipIfBranch then
- FIsSkipping := false
- else if FSkipMode = wisSkipElseBranch then
- FIsSkipping := true;
- end;
- procedure TWebIDLScanner.HandleEndIf;
- begin
- if FSkipStackIndex = 0 then
- Error('Invalid #EndIf');
- Dec(FSkipStackIndex);
- FSkipMode := FSkipModeStack[FSkipStackIndex];
- FIsSkipping := FIsSkippingStack[FSkipStackIndex];
- end;
- procedure TWebIDLScanner.PushSkipMode;
- begin
- if FSkipStackIndex = High(FSkipModeStack) then
- Error('Nesting of #IFxxx too deep');
- FSkipModeStack[FSkipStackIndex] := FSkipMode;
- FIsSkippingStack[FSkipStackIndex] := FIsSkipping;
- Inc(FSkipStackIndex);
- end;
- function TWebIDLScanner.IsDefined(const aName: string): boolean;
- begin
- Result:=false;
- if aName='' then ;
- end;
- procedure TWebIDLScanner.SkipWhitespace;
- begin
- while TokenStr^ in [' ',#9] do
- inc(TokenStr);
- end;
- procedure TWebIDLScanner.SkipLineBreak;
- begin
- case TokenStr^ of
- #10: inc(TokenStr);
- #13:
- begin
- inc(TokenStr);
- if TokenStr^=#10 then
- inc(TokenStr);
- end;
- end;
- end;
- function TWebIDLScanner.GetCurColumn: Integer;
- begin
- Result := TokenStr - PChar(CurLine);
- end;
- end.
|