123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569 |
- {
- This file is part of the Free Component Library
- JSON source lexical scanner
- Copyright (c) 2007 by Michael Van Canneyt [email protected]
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- {$mode objfpc}
- {$h+}
- { $INLINE ON}
- unit jsonscanner;
- interface
- uses SysUtils, Classes;
- resourcestring
- SErrInvalidCharacter = 'Invalid character at line %d, pos %d: ''%s''';
- SUnterminatedComment = 'Unterminated comment at line %d, pos %d: ''%s''';
- SErrOpenString = 'string exceeds end of line %d';
- type
- TJSONToken = (
- tkEOF,
- tkWhitespace,
- tkString,
- tkNumber,
- tkTrue,
- tkFalse,
- tkNull,
- // Simple (one-character) tokens
- tkComma, // ','
- tkColon, // ':'
- tkCurlyBraceOpen, // '{'
- tkCurlyBraceClose, // '}'
- tkSquaredBraceOpen, // '['
- tkSquaredBraceClose, // ']'
- tkIdentifier, // Any Javascript identifier
- tkComment,
- tkUnknown
- );
- EScannerError = class(EParserError);
- TJSONOption = (joUTF8,joStrict,joComments,joIgnoreTrailingComma);
- TJSONOptions = set of TJSONOption;
- Const
- DefaultOptions = [joUTF8];
- Type
- { TJSONScanner }
- TJSONScanner = class
- private
- FSource: RawByteString;
- FCurPos : PAnsiChar; // Position inside total string
- FCurRow: Integer;
- FCurToken: TJSONToken;
- FCurTokenString: string;
- FCurLine: PChar;
- FTokenStr: PAnsiChar; // position inside FCurLine
- FEOL : PAnsiChar; // EOL
- FOptions : TJSONOptions;
- function GetCurColumn: Integer; inline;
- function GetCurLine: string;
- function GetO(AIndex: TJSONOption): Boolean;
- procedure SetO(AIndex: TJSONOption; AValue: Boolean);
- protected
- procedure Error(const Msg: string);overload;
- procedure Error(const Msg: string; Const Args: array of const);overload;
- // function DoFetchToken: TJSONToken; inline;
- public
- constructor Create(Source : TStream; AUseUTF8 : Boolean = True); overload; deprecated 'use options form instead';
- constructor Create(Source: TStream; AOptions: TJSONOptions); overload;
- constructor Create(const aSource : RawByteString; AUseUTF8 : Boolean = True); overload; deprecated 'use options form instead';
- constructor Create(const aSource: RawByteString; AOptions: TJSONOptions); overload;
- function FetchToken: TJSONToken;
- property CurLine: string read GetCurLine;
- property CurRow: Integer read FCurRow;
- property CurColumn: Integer read GetCurColumn;
- property CurToken: TJSONToken read FCurToken;
- property CurTokenString: string read FCurTokenString;
- // Use strict JSON: " for strings, object members are strings, not identifiers
- Property Strict : Boolean Index joStrict Read GetO Write SetO ; deprecated 'use options instead';
- // if set to TRUE, then strings will be converted to UTF8 ansistrings, not system codepage ansistrings.
- Property UseUTF8 : Boolean index joUTF8 Read GetO Write SetO; deprecated 'Use options instead';
- // Parsing options
- Property Options : TJSONOptions Read FOptions Write FOptions;
- end;
- const
- TokenInfos: array[TJSONToken] of string = (
- 'EOF',
- 'Whitespace',
- 'String',
- 'Number',
- 'True',
- 'False',
- 'Null',
- ',',
- ':',
- '{',
- '}',
- '[',
- ']',
- 'identifier',
- 'comment',
- ''
- );
- implementation
- constructor TJSONScanner.Create(Source : TStream; AUseUTF8 : Boolean = True);
- Var
- O : TJSONOptions;
- begin
- O:=DefaultOptions;
- if AUseUTF8 then
- Include(O,joUTF8)
- else
- Exclude(O,joUTF8);
- Create(Source,O);
- end;
- constructor TJSONScanner.Create(Source: TStream; AOptions: TJSONOptions);
- Var
- S : RawByteString;
- begin
- S:='';
- SetLength(S,Source.Size);
- if Length(S)>0 then
- Source.ReadBuffer(S[1],Length(S));
- Create(S,AOptions)
- end;
- constructor TJSONScanner.Create(const aSource : RawByteString; AUseUTF8 : Boolean = True);
- Var
- O : TJSONOptions;
- begin
- O:=DefaultOptions;
- if AUseUTF8 then
- Include(O,joUTF8)
- else
- Exclude(O,joUTF8);
- Create(aSource,O);
- end;
- constructor TJSONScanner.Create(const aSource: RawByteString; AOptions: TJSONOptions);
- begin
- FSource:=aSource;
- FCurPos:=PAnsiChar(FSource);
- FOptions:=AOptions;
- end;
- function TJSONScanner.GetCurColumn: Integer;
- begin
- Result := FTokenStr - FCurLine;
- end;
- procedure TJSONScanner.Error(const Msg: string);
- begin
- raise EScannerError.Create(Msg);
- end;
- procedure TJSONScanner.Error(const Msg: string; const Args: array of const);
- begin
- raise EScannerError.CreateFmt(Msg, Args);
- end;
- function TJSONScanner.FetchToken: TJSONToken;
- (*
- procedure dumpcurrent;
- begin
- Writeln('Start of line : ',FCurLine);
- Writeln('Cur pos : ',FCurPos);
- Writeln('Start of token : ',FTokenstr);
- Writeln('End of line : ',FTokenstr);
- end;
- *)
- function FetchLine: Boolean;
- begin
- Result:=(FCurPos<>Nil) and (FCurPos^<>#0);
- if Result then
- begin
- FCurLine:=FCurPos;
- FTokenStr:=FCurPos;
- While Not (FCurPos^ in [#0,#10,#13]) do
- Inc(FCurPos);
- FEOL:=FCurPos;
- While (FCurPos^<>#0) and (FCurPos^ in [#10,#13]) do
- begin
- if (FCurPos^=#13) and (FCurPos[1]=#10) then
- Inc(FCurPos); // Skip CR-LF
- Inc(FCurPos); // To start of next line
- Inc(FCurRow); // Increase line index
- end;
- // Len:=FEOL-FTokenStr;
- // FTokenStr:=FCurPos;
- end
- else
- begin
- FCurLine:=Nil;
- FTokenStr:=nil;
- end;
- end;
- var
- TokenStart: PChar;
- it : TJSONToken;
- I : Integer;
- OldLength, SectionLength, tstart,tcol, u1,u2: Integer;
- C , c2: char;
- S : String[4];
- IsStar,EOC: Boolean;
- Procedure MaybeAppendUnicode;
- Var
- u : UTF8String;
- begin
- // if there is a leftover \u, append
- if (u1<>0) then
- begin
- if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then
- U:=Utf8Encode(WideString(WideChar(u1))) // ToDo: use faster function
- else
- U:=String(WideChar(u1)); // WideChar converts the encoding. Should it warn on loss?
- FCurTokenString:=FCurTokenString+U;
- OldLength:=Length(FCurTokenString);
- u1:=0;
- end;
- end;
- begin
- if (FTokenStr = nil) or (FTokenStr=FEOL) then
- begin
- if not FetchLine then
- begin
- Result := tkEOF;
- FCurToken := Result;
- exit;
- end;
- end;
- FCurTokenString := '';
- case FTokenStr^ of
- #0: // Empty line
- begin
- FetchLine;
- Result := tkWhitespace;
- end;
- #9, ' ', #10, #13:
- begin
- Result := tkWhitespace;
- repeat
- if FTokenStr = FEOL then
- begin
- if not FetchLine then
- begin
- FCurToken := Result;
- exit;
- end
- end
- else
- Inc(FTokenStr);
- until not (FTokenStr[0] in [#9, ' ']);
- end;
- '"','''':
- begin
- C:=FTokenStr^;
- If (C='''') and (joStrict in Options) then
- Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
- Inc(FTokenStr);
- TokenStart := FTokenStr;
- OldLength := 0;
- FCurTokenString := '';
- u1:=0;
- while not (FTokenStr^ in [#0,C]) do
- begin
- if (FTokenStr^='\') then
- begin
- // Save length
- SectionLength := FTokenStr - TokenStart;
- Inc(FTokenStr);
- // Read escaped token
- Case FTokenStr^ of
- '"' : S:='"';
- '''' : S:='''';
- 't' : S:=#9;
- 'b' : S:=#8;
- 'n' : S:=#10;
- 'r' : S:=#13;
- 'f' : S:=#12;
- '\' : S:='\';
- '/' : S:='/';
- 'u' : begin
- S:='0000';
- u2:=0;
- For I:=1 to 4 do
- begin
- Inc(FTokenStr);
- c2:=FTokenStr^;
- Case c2 of
- '0'..'9': u2:=u2*16+ord(c2)-ord('0');
- 'A'..'F': u2:=u2*16+ord(c2)-ord('A')+10;
- 'a'..'f': u2:=u2*16+ord(c2)-ord('a')+10;
- else
- Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
- end;
- end;
- // ToDo: 4-bytes UTF16
- if u1<>0 then
- begin
- if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then
- S:=Utf8Encode(WideString(WideChar(u1)+WideChar(u2))) // ToDo: use faster function
- else
- S:=String(WideChar(u1)+WideChar(u2)); // WideChar converts the encoding. Should it warn on loss?
- u1:=0;
- end
- else
- begin
- S:='';
- u1:=u2;
- end
- end;
- #0 : Error(SErrOpenString,[FCurRow]);
- else
- Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
- end;
- I:=Length(S);
- if (SectionLength+I>0) then
- begin
- // If length=1, we know it was not \uXX, but u1 can be nonzero, and we must first append it.
- // example: \u00f8\"
- if (I=1) and (u1<>0) then
- MaybeAppendUnicode;
- SetLength(FCurTokenString, OldLength + SectionLength+i);
- if SectionLength > 0 then
- Move(TokenStart^, FCurTokenString[OldLength + 1], SectionLength);
- if I>0 then
- Move(S[1],FCurTokenString[OldLength + SectionLength+1],i);
- Inc(OldLength, SectionLength+I);
- end;
- // Next char
- TokenStart := FTokenStr+1;
- end
- else if u1<>0 then
- MaybeAppendUnicode;
- if FTokenStr^ = #0 then
- Error(SErrOpenString,[FCurRow]);
- Inc(FTokenStr);
- end;
- if FTokenStr^ = #0 then
- Error(SErrOpenString,[FCurRow]);
- if u1<>0 then
- MaybeAppendUnicode;
- SectionLength := FTokenStr - TokenStart;
- SetLength(FCurTokenString, OldLength + SectionLength);
- if SectionLength > 0 then
- Move(TokenStart^, FCurTokenString[OldLength + 1], SectionLength);
- Inc(FTokenStr);
- Result := tkString;
- end;
- ',':
- begin
- Inc(FTokenStr);
- Result := tkComma;
- end;
- '0'..'9','.','-':
- begin
- TokenStart := FTokenStr;
- while true do
- begin
- Inc(FTokenStr);
- case FTokenStr^ of
- '.':
- begin
- if FTokenStr[1] in ['0'..'9', 'e', 'E'] then
- begin
- Inc(FTokenStr);
- repeat
- Inc(FTokenStr);
- until not (FTokenStr^ in ['0'..'9', 'e', 'E','-','+']);
- end;
- break;
- end;
- '0'..'9': ;
- 'e', 'E':
- begin
- Inc(FTokenStr);
- if FTokenStr^ in ['-','+'] then
- Inc(FTokenStr);
- while FTokenStr^ in ['0'..'9'] do
- Inc(FTokenStr);
- break;
- end;
- else
- if {(FTokenStr<>FEOL) and }not (FTokenStr^ in [#13,#10,#0,'}',']',',',#9,' ']) then
- Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
- break;
- end;
- end;
- SectionLength := FTokenStr - TokenStart;
- FCurTokenString:='';
- SetString(FCurTokenString, TokenStart, SectionLength);
- If (FCurTokenString[1]='.') then
- FCurTokenString:='0'+FCurTokenString;
- Result := tkNumber;
- end;
- ':':
- begin
- Inc(FTokenStr);
- Result := tkColon;
- end;
- '{':
- begin
- Inc(FTokenStr);
- Result := tkCurlyBraceOpen;
- end;
- '}':
- begin
- Inc(FTokenStr);
- Result := tkCurlyBraceClose;
- end;
- '[':
- begin
- Inc(FTokenStr);
- Result := tkSquaredBraceOpen;
- end;
- ']':
- begin
- Inc(FTokenStr);
- Result := tkSquaredBraceClose;
- end;
- '/' :
- begin
- if Not (joComments in Options) then
- Error(SErrInvalidCharacter, [CurRow,CurCOlumn,FTokenStr[0]]);
- TokenStart:=FTokenStr;
- Inc(FTokenStr);
- Case FTokenStr^ of
- '/' : begin
- FCurTokenString:='';
- Inc(FTokenStr);
- TokenStart:=FTokenStr;
- SectionLength := PChar(FEOL)-TokenStart;
- SetString(FCurTokenString, TokenStart, SectionLength);
- FTokenStr:=FCurPos;
- end;
- '*' :
- begin
- IsStar:=False;
- Inc(FTokenStr);
- TokenStart:=FTokenStr;
- Repeat
- if (FTokenStr^=#0) then
- begin
- SectionLength := (FTokenStr - TokenStart);
- S:='';
- SetString(S, TokenStart, SectionLength);
- FCurtokenString:=FCurtokenString+S;
- if not fetchLine then
- Error(SUnterminatedComment, [CurRow,CurCOlumn,FTokenStr[0]]);
- TokenStart:=FTokenStr;
- end;
- IsStar:=FTokenStr^='*';
- Inc(FTokenStr);
- EOC:=(isStar and (FTokenStr^='/'));
- Until EOC;
- if EOC then
- begin
- SectionLength := (FTokenStr - TokenStart-1);
- S:='';
- SetString(S, TokenStart, SectionLength);
- FCurtokenString:=FCurtokenString+S;
- Inc(FTokenStr);
- end;
- end;
- else
- Error(SErrInvalidCharacter, [CurRow,CurCOlumn,FTokenStr[0]]);
- end;
- Result:=tkComment;
- end;
- 'a'..'z','A'..'Z','_':
- begin
- tstart:=CurRow;
- Tcol:=CurColumn;
- TokenStart := FTokenStr;
- repeat
- Inc(FTokenStr);
- until not (FTokenStr^ in ['A'..'Z', 'a'..'z', '0'..'9', '_']);
- SectionLength := FTokenStr - TokenStart;
- FCurTokenString:='';
- SetString(FCurTokenString, TokenStart, SectionLength);
- for it := tkTrue to tkNull do
- if CompareText(CurTokenString, TokenInfos[it]) = 0 then
- begin
- Result := it;
- FCurToken := Result;
- exit;
- end;
- if (joStrict in Options) then
- Error(SErrInvalidCharacter, [tStart,tcol,TokenStart[0]])
- else
- Result:=tkIdentifier;
- end;
- else
- Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
- end;
- FCurToken := Result;
- end;
- {function TJSONScanner.FetchToken: TJSONToken;
- begin
- Result:=DoFetchToken;
- end;}
- function TJSONScanner.GetCurLine: string;
- begin
- Result:='';
- if FCurLine<>Nil then
- begin
- SetLength(Result,FEOL-FCurLine);
- if Length(Result)>0 then
- Move(FCurLine^,Result[1],Length(Result));
- end;
- end;
- function TJSONScanner.GetO(AIndex: TJSONOption): Boolean;
- begin
- Result:=AIndex in FOptions;
- end;
- procedure TJSONScanner.SetO(AIndex: TJSONOption; AValue: Boolean);
- begin
- If AValue then
- Include(Foptions,AIndex)
- else
- Exclude(Foptions,AIndex)
- end;
- end.
|