|
@@ -30,7 +30,7 @@ uses SysUtils, Classes;
|
|
|
|
|
|
resourcestring
|
|
resourcestring
|
|
SErrInvalidCharacter = 'Invalid character at line %d, pos %d: ''%s''';
|
|
SErrInvalidCharacter = 'Invalid character at line %d, pos %d: ''%s''';
|
|
- SUnterminatedComment = 'Unterminated comment at line %d, pos %d: ''%s''';
|
|
|
|
|
|
+ SUnterminatedComment = 'Unterminated comment at line %d, pos %d';
|
|
SErrOpenString = 'string exceeds end of line %d';
|
|
SErrOpenString = 'string exceeds end of line %d';
|
|
|
|
|
|
type
|
|
type
|
|
@@ -69,24 +69,49 @@ Type
|
|
|
|
|
|
TJSONScanner = class
|
|
TJSONScanner = class
|
|
private
|
|
private
|
|
- FSource: RawByteString;
|
|
|
|
- FCurPos : PAnsiChar; // Position inside total string
|
|
|
|
|
|
+ FCurPos, FSourceStart: PAnsiChar;
|
|
FCurRow: Integer;
|
|
FCurRow: Integer;
|
|
FCurToken: TJSONToken;
|
|
FCurToken: TJSONToken;
|
|
- FCurTokenString: AnsiString;
|
|
|
|
- FCurLine: PAnsiChar;
|
|
|
|
- FTokenStr: PAnsiChar; // position inside FCurLine
|
|
|
|
- FEOL : PAnsiChar; // EOL
|
|
|
|
|
|
+ FCurTokenString: AnsiString; // Calculated lazily from FParts. FNParts = -1 if ready.
|
|
|
|
+
|
|
|
|
+ // Describes how to build FCurTokenString if asked.
|
|
|
|
+ // FParts[i] >= 0: piece with start = FSourceStart + FParts[i] and length = FParts[i + 1].
|
|
|
|
+ // FParts[i] = -1 - N: Unicode codepoint N.
|
|
|
|
+ FParts: array of SizeInt;
|
|
|
|
+ FNParts: SizeInt; // -1 if FCurTokenString is ready.
|
|
|
|
+
|
|
FOptions : TJSONOptions;
|
|
FOptions : TJSONOptions;
|
|
|
|
+ FCurLine, FCurLineEnd: PAnsiChar; // FCurLineEnd = nil if needs to be calculated.
|
|
|
|
+ FSource: RawByteString;
|
|
|
|
+
|
|
|
|
+ class function ToOptions(AUseUTF8: boolean): TJSONOptions; static;
|
|
function GetCurColumn: Integer; inline;
|
|
function GetCurColumn: Integer; inline;
|
|
function GetCurLine: AnsiString;
|
|
function GetCurLine: AnsiString;
|
|
function GetO(AIndex: TJSONOption): Boolean;
|
|
function GetO(AIndex: TJSONOption): Boolean;
|
|
function GetAbsolutePos: Integer;
|
|
function GetAbsolutePos: Integer;
|
|
procedure SetO(AIndex: TJSONOption; AValue: Boolean);
|
|
procedure SetO(AIndex: TJSONOption; AValue: Boolean);
|
|
|
|
+ function CountChars(start, ed: PAnsiChar): SizeInt;
|
|
|
|
+
|
|
|
|
+ function GrowParts(by: SizeInt): PSizeInt;
|
|
|
|
+ procedure AddPiece(start, ed: PAnsiChar);
|
|
|
|
+ procedure AddCodepoint(cp: uint32);
|
|
|
|
+ function GetCurTokenString: string;
|
|
|
|
+ procedure BuildCurTokenString;
|
|
|
|
+ class function CodepointToASCII(cp: uint32; Rp: PAnsiChar): SizeInt; static;
|
|
|
|
+
|
|
|
|
+ function ScanNewline(Sp: PAnsiChar): PAnsiChar;
|
|
|
|
+ function ScanString(Sp: PAnsiChar): PAnsiChar;
|
|
|
|
+ function ScanHex(Sp: PAnsiChar; out v: uint32): PAnsiChar;
|
|
|
|
+ function ScanNumber(Sp: PAnsiChar): PAnsiChar;
|
|
|
|
+ function ScanLineComment(Sp: PAnsiChar): PAnsiChar;
|
|
|
|
+ function ScanSlashStarComment(Sp: PAnsiChar): PAnsiChar;
|
|
|
|
+ function RecognizeKeyword(Sp: PAnsiChar; N: SizeInt): TJSONToken;
|
|
|
|
+
|
|
protected
|
|
protected
|
|
procedure Error(const Msg: string);overload;
|
|
procedure Error(const Msg: string);overload;
|
|
procedure Error(const Msg: string; Const Args: array of const);overload;
|
|
procedure Error(const Msg: string; Const Args: array of const);overload;
|
|
-// function DoFetchToken: TJSONToken; inline;
|
|
|
|
|
|
+ procedure InvalidCharacter(Sp: PAnsiChar);
|
|
|
|
+
|
|
public
|
|
public
|
|
constructor Create(Source : TStream; AUseUTF8 : Boolean = True); overload; deprecated 'use options form instead';
|
|
constructor Create(Source : TStream; AUseUTF8 : Boolean = True); overload; deprecated 'use options form instead';
|
|
constructor Create(Source: TStream; AOptions: TJSONOptions); overload;
|
|
constructor Create(Source: TStream; AOptions: TJSONOptions); overload;
|
|
@@ -100,7 +125,7 @@ Type
|
|
property CurColumn: Integer read GetCurColumn;
|
|
property CurColumn: Integer read GetCurColumn;
|
|
Property AbsolutePos : Integer Read GetAbsolutePos;
|
|
Property AbsolutePos : Integer Read GetAbsolutePos;
|
|
property CurToken: TJSONToken read FCurToken;
|
|
property CurToken: TJSONToken read FCurToken;
|
|
- property CurTokenString: Ansistring read FCurTokenString;
|
|
|
|
|
|
+ property CurTokenString: Ansistring read GetCurTokenString;
|
|
// Use strict JSON: " for strings, object members are strings, not identifiers
|
|
// Use strict JSON: " for strings, object members are strings, not identifiers
|
|
Property Strict : Boolean Index joStrict Read GetO Write SetO ; deprecated 'use options instead';
|
|
Property Strict : Boolean Index joStrict Read GetO Write SetO ; deprecated 'use options instead';
|
|
// if set to TRUE, then strings will be converted to UTF8 ansistrings, not system codepage ansistrings.
|
|
// if set to TRUE, then strings will be converted to UTF8 ansistrings, not system codepage ansistrings.
|
|
@@ -132,495 +157,161 @@ const
|
|
|
|
|
|
implementation
|
|
implementation
|
|
|
|
|
|
-constructor TJSONScanner.Create(Source : TStream; AUseUTF8 : Boolean = True);
|
|
|
|
-
|
|
|
|
-Var
|
|
|
|
- O : TJSONOptions;
|
|
|
|
|
|
|
|
|
|
+class function TJSONScanner.ToOptions(AUseUTF8: boolean): TJSONOptions;
|
|
begin
|
|
begin
|
|
- O:=DefaultOptions;
|
|
|
|
if AUseUTF8 then
|
|
if AUseUTF8 then
|
|
- Include(O,joUTF8)
|
|
|
|
|
|
+ Result := DefaultOptions + [joUTF8]
|
|
else
|
|
else
|
|
- Exclude(O,joUTF8);
|
|
|
|
- Create(Source,O);
|
|
|
|
|
|
+ Result := DefaultOptions - [joUTF8];
|
|
end;
|
|
end;
|
|
|
|
|
|
-constructor TJSONScanner.Create(Source: TStream; AOptions: TJSONOptions);
|
|
|
|
-
|
|
|
|
- procedure SkipStreamBOM;
|
|
|
|
- Var
|
|
|
|
- OldPos : integer;
|
|
|
|
- Header : array[0..3] of byte;
|
|
|
|
- begin
|
|
|
|
- OldPos := Source.Position;
|
|
|
|
- FillChar(Header{%H-}, SizeOf(Header), 0);
|
|
|
|
- if Source.Read(Header, 3) = 3 then
|
|
|
|
- if (Header[0]=$EF) and (Header[1]=$BB) and (Header[2]=$BF) then
|
|
|
|
- exit;
|
|
|
|
- Source.Position := OldPos;
|
|
|
|
- end;
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-Var
|
|
|
|
- S : RawByteString;
|
|
|
|
|
|
+constructor TJSONScanner.Create(Source : TStream; AUseUTF8 : Boolean = True);
|
|
|
|
+begin
|
|
|
|
+ Create(Source, ToOptions(AUseUTF8));
|
|
|
|
+end;
|
|
|
|
|
|
|
|
+constructor TJSONScanner.Create(Source: TStream; AOptions: TJSONOptions);
|
|
begin
|
|
begin
|
|
- if (joBOMCheck in aOptions) then
|
|
|
|
- SkipStreamBom;
|
|
|
|
- S:='';
|
|
|
|
- SetLength(S,Source.Size-Source.Position);
|
|
|
|
- if Length(S)>0 then
|
|
|
|
- Source.ReadBuffer(S[1],Length(S));
|
|
|
|
- Create(S,AOptions)
|
|
|
|
|
|
+ SetLength(FSource,Source.Size-Source.Position);
|
|
|
|
+ Source.ReadBuffer(Pointer(FSource)^,Length(FSource));
|
|
|
|
+ Create(FSource,AOptions);
|
|
end;
|
|
end;
|
|
|
|
|
|
constructor TJSONScanner.Create(const aSource : RawByteString; AUseUTF8 : Boolean = True);
|
|
constructor TJSONScanner.Create(const aSource : RawByteString; AUseUTF8 : Boolean = True);
|
|
-Var
|
|
|
|
- O : TJSONOptions;
|
|
|
|
-
|
|
|
|
begin
|
|
begin
|
|
- O:=DefaultOptions;
|
|
|
|
- if AUseUTF8 then
|
|
|
|
- Include(O,joUTF8)
|
|
|
|
- else
|
|
|
|
- Exclude(O,joUTF8);
|
|
|
|
- Create(aSource,O);
|
|
|
|
|
|
+ Create(aSource,ToOptions(AUseUTF8));
|
|
end;
|
|
end;
|
|
|
|
|
|
constructor TJSONScanner.Create(const aSource: RawByteString; AOptions: TJSONOptions);
|
|
constructor TJSONScanner.Create(const aSource: RawByteString; AOptions: TJSONOptions);
|
|
|
|
+var
|
|
|
|
+ Sp: PAnsiChar;
|
|
begin
|
|
begin
|
|
FSource:=aSource;
|
|
FSource:=aSource;
|
|
- FCurPos:=PAnsiChar(FSource);
|
|
|
|
- if FCurPos<>Nil then
|
|
|
|
- FCurRow:=1;
|
|
|
|
|
|
+ Sp:=PAnsiChar(FSource);
|
|
|
|
+ if (joBOMCheck in aOptions) and (ord(Sp[0]) = $EF) and (ord(Sp[1]) = $BB) and (ord(Sp[2]) = $BF) then
|
|
|
|
+ inc(Sp,3);
|
|
|
|
+ FSourceStart:=Sp;
|
|
|
|
+ FCurPos:=Sp;
|
|
|
|
+ FCurLine:=Sp;
|
|
|
|
+ FCurRow:=1;
|
|
FOptions:=AOptions;
|
|
FOptions:=AOptions;
|
|
end;
|
|
end;
|
|
|
|
|
|
function TJSONScanner.GetCurColumn: Integer;
|
|
function TJSONScanner.GetCurColumn: Integer;
|
|
begin
|
|
begin
|
|
- Result := FTokenStr - FCurLine;
|
|
|
|
|
|
+ Result := FCurPos - FCurLine;
|
|
end;
|
|
end;
|
|
|
|
|
|
-
|
|
|
|
procedure TJSONScanner.Error(const Msg: string);
|
|
procedure TJSONScanner.Error(const Msg: string);
|
|
begin
|
|
begin
|
|
raise EScannerError.Create(Msg);
|
|
raise EScannerError.Create(Msg);
|
|
end;
|
|
end;
|
|
|
|
|
|
|
|
+procedure TJSONScanner.InvalidCharacter(Sp: PAnsiChar);
|
|
|
|
+begin
|
|
|
|
+ Error(SErrInvalidCharacter, [CurRow, 1 + CountChars(FCurLine, Sp), Sp^]);
|
|
|
|
+end;
|
|
|
|
+
|
|
procedure TJSONScanner.Error(const Msg: string; const Args: array of const);
|
|
procedure TJSONScanner.Error(const Msg: string; const Args: array of const);
|
|
begin
|
|
begin
|
|
raise EScannerError.CreateFmt(Msg, Args);
|
|
raise EScannerError.CreateFmt(Msg, Args);
|
|
end;
|
|
end;
|
|
|
|
|
|
function TJSONScanner.FetchToken: TJSONToken;
|
|
function TJSONScanner.FetchToken: TJSONToken;
|
|
-
|
|
|
|
-(*
|
|
|
|
- procedure dumpcurrent;
|
|
|
|
-
|
|
|
|
- begin
|
|
|
|
- Writeln('Start of line : ',FCurLine);
|
|
|
|
- Writeln('Cur pos : ',FCurPos);
|
|
|
|
- Writeln('Start of token : ',FTokenstr);
|
|
|
|
- Writeln('End of line : ',FTokenstr);
|
|
|
|
- end;
|
|
|
|
-*)
|
|
|
|
- function FetchLine: Boolean;
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- begin
|
|
|
|
- Result:=(FCurPos<>Nil) and (FCurPos^<>#0);
|
|
|
|
- if Result then
|
|
|
|
- begin
|
|
|
|
- FCurLine:=FCurPos;
|
|
|
|
- FTokenStr:=FCurPos;
|
|
|
|
- While Not (FCurPos^ in [#0,#10,#13]) do
|
|
|
|
- Inc(FCurPos);
|
|
|
|
- FEOL:=FCurPos;
|
|
|
|
- If (FCurPos^<>#0) then
|
|
|
|
-// While (FCurPos^<>#0) and (FCurPos^ in [#10,#13]) do
|
|
|
|
- begin
|
|
|
|
- if (FCurPos^=#13) and (FCurPos[1]=#10) then
|
|
|
|
- Inc(FCurPos); // Skip CR-LF
|
|
|
|
- Inc(FCurPos); // To start of next line
|
|
|
|
- Inc(FCurRow); // Increase line index
|
|
|
|
- end;
|
|
|
|
-// Len:=FEOL-FTokenStr;
|
|
|
|
-// FTokenStr:=FCurPos;
|
|
|
|
- end
|
|
|
|
- else
|
|
|
|
- begin
|
|
|
|
- FCurLine:=Nil;
|
|
|
|
- FTokenStr:=nil;
|
|
|
|
- end;
|
|
|
|
- end;
|
|
|
|
-
|
|
|
|
var
|
|
var
|
|
- TokenStart: PAnsiChar;
|
|
|
|
- it : TJSONToken;
|
|
|
|
- I : Integer;
|
|
|
|
- OldLength, SectionLength, tstart,tcol, u1,u2: Integer;
|
|
|
|
- C , c2: AnsiChar;
|
|
|
|
- S : String[8];
|
|
|
|
- Line : String;
|
|
|
|
- IsStar,EOC: Boolean;
|
|
|
|
-
|
|
|
|
- Procedure MaybeAppendUnicode;
|
|
|
|
-
|
|
|
|
- Var
|
|
|
|
- u : UTF8String;
|
|
|
|
-
|
|
|
|
- begin
|
|
|
|
- // if there is a leftover \u, append
|
|
|
|
- if (u1<>0) then
|
|
|
|
- begin
|
|
|
|
- if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then
|
|
|
|
- U:=Utf8Encode(UnicodeString(WideChar(u1))) // ToDo: use faster function
|
|
|
|
- else
|
|
|
|
- U:=String(WideChar(u1)); // WideChar converts the encoding. Should it warn on loss?
|
|
|
|
- FCurTokenString:=FCurTokenString+U;
|
|
|
|
- OldLength:=Length(FCurTokenString);
|
|
|
|
- u1:=0;
|
|
|
|
- u2:=0;
|
|
|
|
- end;
|
|
|
|
- end;
|
|
|
|
-
|
|
|
|
-
|
|
|
|
|
|
+ Sp, Start: PAnsiChar;
|
|
begin
|
|
begin
|
|
- if (FTokenStr = nil) or (FTokenStr=FEOL) then
|
|
|
|
- begin
|
|
|
|
- if not FetchLine then
|
|
|
|
- begin
|
|
|
|
- Result := tkEOF;
|
|
|
|
- FCurToken := Result;
|
|
|
|
- exit;
|
|
|
|
- end;
|
|
|
|
- end;
|
|
|
|
- FCurTokenString := '';
|
|
|
|
- case FTokenStr^ of
|
|
|
|
- #0: // Empty line
|
|
|
|
- begin
|
|
|
|
- FetchLine;
|
|
|
|
- Result := tkWhitespace;
|
|
|
|
- end;
|
|
|
|
- #9, ' ', #10, #13:
|
|
|
|
|
|
+ FNParts := 0;
|
|
|
|
+ Sp := FCurPos;
|
|
|
|
+ // Don't consider such newline a tkWhitespace.
|
|
|
|
+ if Sp^ in [#13, #10] then
|
|
|
|
+ Sp := ScanNewline(Sp);
|
|
|
|
+ case Sp^ of
|
|
|
|
+ #0: Result := tkEOF;
|
|
|
|
+ #9, ' ', #13, #10:
|
|
begin
|
|
begin
|
|
- Result := tkWhitespace;
|
|
|
|
- repeat
|
|
|
|
- if FTokenStr = FEOL then
|
|
|
|
- begin
|
|
|
|
- if not FetchLine then
|
|
|
|
- begin
|
|
|
|
- FCurToken := Result;
|
|
|
|
- exit;
|
|
|
|
- end
|
|
|
|
- end
|
|
|
|
- else
|
|
|
|
- Inc(FTokenStr);
|
|
|
|
- until not (FTokenStr[0] in [#9, ' ']);
|
|
|
|
|
|
+ while Sp^ in [#9, ' '] do
|
|
|
|
+ Inc(Sp);
|
|
|
|
+ Result := tkWhitespace;
|
|
end;
|
|
end;
|
|
'"','''':
|
|
'"','''':
|
|
begin
|
|
begin
|
|
- C:=FTokenStr^;
|
|
|
|
- If (C='''') and (joStrict in Options) then
|
|
|
|
- Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
|
|
|
|
- Inc(FTokenStr);
|
|
|
|
- TokenStart := FTokenStr;
|
|
|
|
- OldLength := 0;
|
|
|
|
- FCurTokenString := '';
|
|
|
|
- u1:=0;
|
|
|
|
- while not (FTokenStr^ in [#0,C]) do
|
|
|
|
- begin
|
|
|
|
- if (FTokenStr^='\') then
|
|
|
|
- begin
|
|
|
|
- // Save length
|
|
|
|
- SectionLength := FTokenStr - TokenStart;
|
|
|
|
- Inc(FTokenStr);
|
|
|
|
- // Read escaped token
|
|
|
|
- Case FTokenStr^ of
|
|
|
|
- '"' : S:='"';
|
|
|
|
- '''' : S:='''';
|
|
|
|
- 't' : S:=#9;
|
|
|
|
- 'b' : S:=#8;
|
|
|
|
- 'n' : S:=#10;
|
|
|
|
- 'r' : S:=#13;
|
|
|
|
- 'f' : S:=#12;
|
|
|
|
- '\' : S:='\';
|
|
|
|
- '/' : S:='/';
|
|
|
|
- 'u' : begin
|
|
|
|
- u2:=0;
|
|
|
|
- For I:=1 to 4 do
|
|
|
|
- begin
|
|
|
|
- Inc(FTokenStr);
|
|
|
|
- c2:=FTokenStr^;
|
|
|
|
- Case c2 of
|
|
|
|
- '0'..'9': u2:=u2*16+ord(c2)-ord('0');
|
|
|
|
- 'A'..'F': u2:=u2*16+ord(c2)-ord('A')+10;
|
|
|
|
- 'a'..'f': u2:=u2*16+ord(c2)-ord('a')+10;
|
|
|
|
- else
|
|
|
|
- Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
|
|
|
|
- end;
|
|
|
|
- end;
|
|
|
|
- if u1<>0 then
|
|
|
|
- begin
|
|
|
|
- // 4bytes, compose.
|
|
|
|
- if not ((u2>=$DC00) and (u2<=$DFFF)) then
|
|
|
|
- Error(SErrInvalidCharacter, [CurRow,CurColumn,IntToStr(u2)]);
|
|
|
|
- if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then
|
|
|
|
- S:=Utf8Encode(UnicodeString(WideChar(u1)+WideChar(u2))) // ToDo: use faster function
|
|
|
|
- else
|
|
|
|
- S:=String(WideChar(u1)+WideChar(u2)); // WideChar converts the encoding. Should it warn on loss?
|
|
|
|
- u1:=0;
|
|
|
|
- end
|
|
|
|
- else
|
|
|
|
- begin
|
|
|
|
- // Surrogate start
|
|
|
|
- if (u2>=$D800) and (U2<=$DBFF) then
|
|
|
|
- begin
|
|
|
|
- u1:=u2;
|
|
|
|
- S:='';
|
|
|
|
- end
|
|
|
|
- else
|
|
|
|
- begin
|
|
|
|
- if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then
|
|
|
|
- S:=Utf8Encode(UnicodeString(WideChar(u2))) // ToDo: use faster function
|
|
|
|
- else
|
|
|
|
- S:=String(WideChar(u2)); // WideChar converts the encoding. Should it warn on loss?
|
|
|
|
- U1:=0;
|
|
|
|
- U2:=0;
|
|
|
|
- end;
|
|
|
|
- end;
|
|
|
|
- end;
|
|
|
|
- #0 : Error(SErrOpenString,[FCurRow]);
|
|
|
|
- else
|
|
|
|
- Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
|
|
|
|
- end;
|
|
|
|
- I:=Length(S);
|
|
|
|
- if (SectionLength+I>0) then
|
|
|
|
- begin
|
|
|
|
- // If length=1, we know it was not \uXX, but u1 can be nonzero, and we must first append it.
|
|
|
|
- // example: \u00f8\"
|
|
|
|
- if (I=1) and (u1<>0) then
|
|
|
|
- MaybeAppendUnicode;
|
|
|
|
- SetLength(FCurTokenString, OldLength + SectionLength+i);
|
|
|
|
- if SectionLength > 0 then
|
|
|
|
- Move(TokenStart^, FCurTokenString[OldLength + 1], SectionLength);
|
|
|
|
- if I>0 then
|
|
|
|
- Move(S[1],FCurTokenString[OldLength + SectionLength+1],i);
|
|
|
|
- Inc(OldLength, SectionLength+I);
|
|
|
|
- end;
|
|
|
|
- // Next AnsiChar
|
|
|
|
- TokenStart := FTokenStr+1;
|
|
|
|
- end
|
|
|
|
- else if u1<>0 then
|
|
|
|
- MaybeAppendUnicode;
|
|
|
|
- if FTokenStr^ < #$20 then
|
|
|
|
- if FTokenStr^ = #0 then Error(SErrOpenString,[FCurRow])
|
|
|
|
- else if joStrict in Options then Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
|
|
|
|
- Inc(FTokenStr);
|
|
|
|
- end;
|
|
|
|
- if FTokenStr^ = #0 then
|
|
|
|
- Error(SErrOpenString,[FCurRow]);
|
|
|
|
- if u1<>0 then
|
|
|
|
- MaybeAppendUnicode;
|
|
|
|
- SectionLength := FTokenStr - TokenStart;
|
|
|
|
- SetLength(FCurTokenString, OldLength + SectionLength);
|
|
|
|
- if SectionLength > 0 then
|
|
|
|
- Move(TokenStart^, FCurTokenString[OldLength + 1], SectionLength);
|
|
|
|
- Inc(FTokenStr);
|
|
|
|
|
|
+ Sp := ScanString(Sp);
|
|
Result := tkString;
|
|
Result := tkString;
|
|
end;
|
|
end;
|
|
',':
|
|
',':
|
|
begin
|
|
begin
|
|
- Inc(FTokenStr);
|
|
|
|
|
|
+ Inc(Sp);
|
|
Result := tkComma;
|
|
Result := tkComma;
|
|
end;
|
|
end;
|
|
'0'..'9','.','-':
|
|
'0'..'9','.','-':
|
|
begin
|
|
begin
|
|
- TokenStart := FTokenStr;
|
|
|
|
- if FTokenStr^ = '-' then inc(FTokenStr);
|
|
|
|
- case FTokenStr^ of
|
|
|
|
- '1'..'9': Inc(FTokenStr);
|
|
|
|
- '0': begin
|
|
|
|
- Inc(FTokenStr);
|
|
|
|
- if (joStrict in Options) and (FTokenStr^ in ['0'..'9']) then
|
|
|
|
- Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
|
|
|
|
- end;
|
|
|
|
- '.': if joStrict in Options then
|
|
|
|
- Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
|
|
|
|
- else
|
|
|
|
- Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
|
|
|
|
- end;
|
|
|
|
- while true do
|
|
|
|
- begin
|
|
|
|
- case FTokenStr^ of
|
|
|
|
- '0'..'9': inc(FTokenStr);
|
|
|
|
- '.':
|
|
|
|
- begin
|
|
|
|
- case FTokenStr[1] of
|
|
|
|
- '0'..'9': Inc(FTokenStr, 2);
|
|
|
|
- 'e', 'E': begin
|
|
|
|
- if joStrict in Options then
|
|
|
|
- Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
|
|
|
|
- Inc(FTokenStr);
|
|
|
|
- end;
|
|
|
|
- else Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
|
|
|
|
- end;
|
|
|
|
- while FTokenStr^ in ['0'..'9'] do
|
|
|
|
- inc(FTokenStr);
|
|
|
|
- break;
|
|
|
|
- end;
|
|
|
|
- else
|
|
|
|
- break;
|
|
|
|
- end;
|
|
|
|
- end;
|
|
|
|
- if FTokenStr^ in ['e', 'E'] then begin
|
|
|
|
- Inc(FTokenStr);
|
|
|
|
- if FTokenStr^ in ['-','+'] then
|
|
|
|
- Inc(FTokenStr);
|
|
|
|
- if not (FTokenStr^ in ['0'..'9']) then
|
|
|
|
- Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
|
|
|
|
- repeat
|
|
|
|
- Inc(FTokenStr);
|
|
|
|
- until not (FTokenStr^ in ['0'..'9']);
|
|
|
|
- end;
|
|
|
|
- if {(FTokenStr<>FEOL) and }not (FTokenStr^ in [#13,#10,#0,'}',']',',',#9,' ']) then
|
|
|
|
- Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
|
|
|
|
- SectionLength := FTokenStr - TokenStart;
|
|
|
|
- FCurTokenString:='';
|
|
|
|
- SetString(FCurTokenString, TokenStart, SectionLength);
|
|
|
|
- If (FCurTokenString[1]='.') then
|
|
|
|
- FCurTokenString:='0'+FCurTokenString;
|
|
|
|
|
|
+ Sp := ScanNumber(Sp);
|
|
Result := tkNumber;
|
|
Result := tkNumber;
|
|
end;
|
|
end;
|
|
':':
|
|
':':
|
|
begin
|
|
begin
|
|
- Inc(FTokenStr);
|
|
|
|
|
|
+ Inc(Sp);
|
|
Result := tkColon;
|
|
Result := tkColon;
|
|
end;
|
|
end;
|
|
'{':
|
|
'{':
|
|
begin
|
|
begin
|
|
- Inc(FTokenStr);
|
|
|
|
|
|
+ Inc(Sp);
|
|
Result := tkCurlyBraceOpen;
|
|
Result := tkCurlyBraceOpen;
|
|
end;
|
|
end;
|
|
'}':
|
|
'}':
|
|
begin
|
|
begin
|
|
- Inc(FTokenStr);
|
|
|
|
|
|
+ Inc(Sp);
|
|
Result := tkCurlyBraceClose;
|
|
Result := tkCurlyBraceClose;
|
|
- end;
|
|
|
|
|
|
+ end;
|
|
'[':
|
|
'[':
|
|
begin
|
|
begin
|
|
- Inc(FTokenStr);
|
|
|
|
|
|
+ Inc(Sp);
|
|
Result := tkSquaredBraceOpen;
|
|
Result := tkSquaredBraceOpen;
|
|
end;
|
|
end;
|
|
']':
|
|
']':
|
|
begin
|
|
begin
|
|
- Inc(FTokenStr);
|
|
|
|
|
|
+ Inc(Sp);
|
|
Result := tkSquaredBraceClose;
|
|
Result := tkSquaredBraceClose;
|
|
end;
|
|
end;
|
|
- '/' :
|
|
|
|
|
|
+ '/':
|
|
begin
|
|
begin
|
|
- if Not (joComments in Options) then
|
|
|
|
- Error(SErrInvalidCharacter, [CurRow,CurCOlumn,FTokenStr[0]]);
|
|
|
|
- TokenStart:=FTokenStr;
|
|
|
|
- Inc(FTokenStr);
|
|
|
|
- Case FTokenStr^ of
|
|
|
|
- '/' : begin
|
|
|
|
- FCurTokenString:='';
|
|
|
|
- Inc(FTokenStr);
|
|
|
|
- TokenStart:=FTokenStr;
|
|
|
|
- SectionLength := PAnsiChar(FEOL)-TokenStart;
|
|
|
|
- SetString(FCurTokenString, TokenStart, SectionLength);
|
|
|
|
- FetchLine;
|
|
|
|
- end;
|
|
|
|
- '*' :
|
|
|
|
- begin
|
|
|
|
- IsStar:=False;
|
|
|
|
- Inc(FTokenStr);
|
|
|
|
- TokenStart:=FTokenStr;
|
|
|
|
- Repeat
|
|
|
|
- While (FTokenStr=FEOL) do
|
|
|
|
- begin
|
|
|
|
- SectionLength := (FTokenStr - TokenStart);
|
|
|
|
- Line:='';
|
|
|
|
- SetString(Line, TokenStart, SectionLength);
|
|
|
|
- FCurtokenString:=FCurtokenString+Line+sLineBreak;
|
|
|
|
- if not fetchLine then
|
|
|
|
- Error(SUnterminatedComment, [CurRow,CurCOlumn,FTokenStr[0]]);
|
|
|
|
- TokenStart:=FTokenStr;
|
|
|
|
- end;
|
|
|
|
- IsStar:=FTokenStr^='*';
|
|
|
|
- Inc(FTokenStr);
|
|
|
|
- EOC:=(isStar and (FTokenStr^='/'));
|
|
|
|
- Until EOC;
|
|
|
|
- if EOC then
|
|
|
|
- begin
|
|
|
|
- SectionLength := (FTokenStr - TokenStart-1);
|
|
|
|
- Line:='';
|
|
|
|
- SetString(Line, TokenStart, SectionLength);
|
|
|
|
- FCurtokenString:=FCurtokenString+Line;
|
|
|
|
- Inc(FTokenStr);
|
|
|
|
- end;
|
|
|
|
- end;
|
|
|
|
- else
|
|
|
|
- Error(SErrInvalidCharacter, [CurRow,CurCOlumn,FTokenStr[0]]);
|
|
|
|
- end;
|
|
|
|
- Result:=tkComment;
|
|
|
|
|
|
+ if Not (joComments in Options) then
|
|
|
|
+ InvalidCharacter(Sp);
|
|
|
|
+ if Sp[1] = '/' then
|
|
|
|
+ Sp := ScanLineComment(Sp + 2)
|
|
|
|
+ else if Sp[1] = '*' then
|
|
|
|
+ Sp := ScanSlashStarComment(Sp + 2)
|
|
|
|
+ else
|
|
|
|
+ InvalidCharacter(Sp + 1);
|
|
|
|
+ Result := tkComment;
|
|
end;
|
|
end;
|
|
'a'..'z','A'..'Z','_':
|
|
'a'..'z','A'..'Z','_':
|
|
begin
|
|
begin
|
|
- tstart:=CurRow;
|
|
|
|
- Tcol:=CurColumn;
|
|
|
|
- TokenStart := FTokenStr;
|
|
|
|
- Result:=tkIdentifier;
|
|
|
|
- case TokenStart^ of
|
|
|
|
- 't': if (TokenStart[1] = 'r') and (TokenStart[2] = 'u') and (TokenStart[3] = 'e') then
|
|
|
|
- Result:=tkTrue;
|
|
|
|
- 'f': if (TokenStart[1] = 'a') and (TokenStart[2] = 'l') and (TokenStart[3] = 's') and (TokenStart[4] = 'e') then
|
|
|
|
- Result:=tkFalse;
|
|
|
|
- 'n': if (TokenStart[1] = 'u') and (TokenStart[2] = 'l') and (TokenStart[3] = 'l') then
|
|
|
|
- Result:=tkNull;
|
|
|
|
- end;
|
|
|
|
- if result <> tkIdentifier then inc(FTokenStr, length(TokenInfos[result]) - 1);
|
|
|
|
|
|
+ Start := Sp;
|
|
repeat
|
|
repeat
|
|
- Inc(FTokenStr);
|
|
|
|
- until not (FTokenStr^ in ['A'..'Z', 'a'..'z', '0'..'9', '_']);
|
|
|
|
- SectionLength := FTokenStr - TokenStart;
|
|
|
|
- FCurTokenString:='';
|
|
|
|
- SetString(FCurTokenString, TokenStart, SectionLength);
|
|
|
|
- if (result = tkIdentifier) or (SectionLength <> length(TokenInfos[result])) then begin
|
|
|
|
- if (joStrict in Options) then
|
|
|
|
- Error(SErrInvalidCharacter, [tStart,tcol,TokenStart[0]]);
|
|
|
|
- for it := tkTrue to tkNull do
|
|
|
|
- if CompareText(CurTokenString, TokenInfos[it]) = 0 then
|
|
|
|
- begin
|
|
|
|
- Result := it;
|
|
|
|
- FCurToken := Result;
|
|
|
|
- exit;
|
|
|
|
- end;
|
|
|
|
- end;
|
|
|
|
|
|
+ Inc(Sp);
|
|
|
|
+ until not (Sp^ in ['A'..'Z', 'a'..'z', '0'..'9', '_']);
|
|
|
|
+ AddPiece(Start, Sp);
|
|
|
|
+ Result := RecognizeKeyword(Start, Sp - Start);
|
|
end;
|
|
end;
|
|
- else
|
|
|
|
- Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
|
|
|
|
|
|
+ else
|
|
|
|
+ InvalidCharacter(Sp);
|
|
end;
|
|
end;
|
|
|
|
+ FCurPos := Sp;
|
|
FCurToken := Result;
|
|
FCurToken := Result;
|
|
end;
|
|
end;
|
|
|
|
|
|
-{function TJSONScanner.FetchToken: TJSONToken;
|
|
|
|
-
|
|
|
|
-begin
|
|
|
|
- Result:=DoFetchToken;
|
|
|
|
-end;}
|
|
|
|
-
|
|
|
|
function TJSONScanner.GetCurLine: Ansistring;
|
|
function TJSONScanner.GetCurLine: Ansistring;
|
|
begin
|
|
begin
|
|
- Result:='';
|
|
|
|
- if FCurLine<>Nil then
|
|
|
|
- begin
|
|
|
|
- SetLength(Result,FEOL-FCurLine);
|
|
|
|
- if Length(Result)>0 then
|
|
|
|
- Move(FCurLine^,Result[1],Length(Result));
|
|
|
|
- end;
|
|
|
|
|
|
+ if not Assigned(FCurLineEnd) then
|
|
|
|
+ begin
|
|
|
|
+ FCurLineEnd := FCurLine;
|
|
|
|
+ while not (FCurLineEnd^ in [#13, #10, #0]) do
|
|
|
|
+ inc(FCurLineEnd);
|
|
|
|
+ end;
|
|
|
|
+ SetString(Result, FCurLine, FCurLineEnd - FCurLine);
|
|
end;
|
|
end;
|
|
|
|
|
|
function TJSONScanner.GetO(AIndex: TJSONOption): Boolean;
|
|
function TJSONScanner.GetO(AIndex: TJSONOption): Boolean;
|
|
@@ -629,9 +320,8 @@ begin
|
|
end;
|
|
end;
|
|
|
|
|
|
function TJSONScanner.GetAbsolutePos: Integer;
|
|
function TJSONScanner.GetAbsolutePos: Integer;
|
|
-
|
|
|
|
begin
|
|
begin
|
|
- Result:=FCurPos-PAnsiChar(FSource);
|
|
|
|
|
|
+ Result:=FCurPos-FSourceStart;
|
|
end;
|
|
end;
|
|
|
|
|
|
procedure TJSONScanner.SetO(AIndex: TJSONOption; AValue: Boolean);
|
|
procedure TJSONScanner.SetO(AIndex: TJSONOption; AValue: Boolean);
|
|
@@ -642,4 +332,352 @@ begin
|
|
Exclude(Foptions,AIndex)
|
|
Exclude(Foptions,AIndex)
|
|
end;
|
|
end;
|
|
|
|
|
|
|
|
+function TJSONScanner.CountChars(start, ed: PAnsiChar): SizeInt;
|
|
|
|
+begin
|
|
|
|
+ if joUTF8 in Options then
|
|
|
|
+ begin
|
|
|
|
+ // Count UTF-8 start bytes.
|
|
|
|
+ Result := 0;
|
|
|
|
+ while start < ed do
|
|
|
|
+ begin
|
|
|
|
+ if ord(start^) and %11000000 <> %10000000 then
|
|
|
|
+ inc(Result);
|
|
|
|
+ inc(start);
|
|
|
|
+ end;
|
|
|
|
+ end else
|
|
|
|
+ Result := ed - start;
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+function TJSONScanner.GrowParts(by: SizeInt): PSizeInt;
|
|
|
|
+var
|
|
|
|
+ newNParts: SizeInt;
|
|
|
|
+begin
|
|
|
|
+ newNParts := FNParts + by;
|
|
|
|
+ if newNParts > length(FParts) then
|
|
|
|
+ SetLength(FParts, 4 + newNParts + SizeUint(newNParts) div 4);
|
|
|
|
+ Result := @FParts[FNParts];
|
|
|
|
+ FNParts := newNParts;
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+procedure TJSONScanner.AddPiece(start, ed: PAnsiChar);
|
|
|
|
+var
|
|
|
|
+ pp: PSizeInt;
|
|
|
|
+begin
|
|
|
|
+ if start = ed then
|
|
|
|
+ exit;
|
|
|
|
+ pp := GrowParts(2);
|
|
|
|
+ pp[0] := start - FSourceStart;
|
|
|
|
+ pp[1] := ed - start;
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+procedure TJSONScanner.AddCodepoint(cp: uint32);
|
|
|
|
+begin
|
|
|
|
+ GrowParts(1)^ := -1 - SizeInt(cp);
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+function TJSONScanner.GetCurTokenString: string;
|
|
|
|
+begin
|
|
|
|
+ if FNParts >= 0 then
|
|
|
|
+ BuildCurTokenString;
|
|
|
|
+ result := FCurTokenString;
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+procedure TJSONScanner.BuildCurTokenString;
|
|
|
|
+var
|
|
|
|
+ utf8: boolean;
|
|
|
|
+ iPart, len: SizeInt;
|
|
|
|
+ cp: uint32;
|
|
|
|
+ Rp: PAnsiChar;
|
|
|
|
+begin
|
|
|
|
+ utf8 := (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8);
|
|
|
|
+ len := 0;
|
|
|
|
+ // Prepass for length. Exact if utf8, otherwise ceiling.
|
|
|
|
+ iPart := 0;
|
|
|
|
+ while iPart < FNParts do
|
|
|
|
+ begin
|
|
|
|
+ if FParts[iPart] >= 0 then
|
|
|
|
+ begin
|
|
|
|
+ inc(len, FParts[iPart + 1]);
|
|
|
|
+ inc(iPart, 2);
|
|
|
|
+ end else
|
|
|
|
+ begin
|
|
|
|
+ cp := -(FParts[iPart] + 1);
|
|
|
|
+ if cp <= $7F then inc(len) // First 128 characters use 1 byte both in UTF-8 or ANSI encodings.
|
|
|
|
+ // Use 2 in non-utf8 mode as ceiling value, assuming ANSI encodings use at most 2 bytes per codepoint. (Eg: Shift JIS uses 1 or 2.)
|
|
|
|
+ else if (cp <= $7FF) or not utf8 then inc(len, 2)
|
|
|
|
+ else if cp <= $FFFF then inc(len, 3)
|
|
|
|
+ else inc(len, 4);
|
|
|
|
+ inc(iPart);
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+ SetLength(FCurTokenString, len);
|
|
|
|
+ Rp := PAnsiChar(Pointer(FCurTokenString));
|
|
|
|
+ iPart := 0;
|
|
|
|
+ while iPart < FNParts do
|
|
|
|
+ begin
|
|
|
|
+ if FParts[iPart] >= 0 then
|
|
|
|
+ begin
|
|
|
|
+ Move(FSourceStart[FParts[iPart]], Rp^, FParts[iPart + 1]);
|
|
|
|
+ inc(Rp, FParts[iPart + 1]);
|
|
|
|
+ inc(iPart, 2);
|
|
|
|
+ end else
|
|
|
|
+ begin
|
|
|
|
+ cp := -(FParts[iPart] + 1);
|
|
|
|
+ if cp <= $7F then
|
|
|
|
+ begin
|
|
|
|
+ byte(Rp^) := cp;
|
|
|
|
+ inc(Rp);
|
|
|
|
+ end else
|
|
|
|
+ if utf8 then
|
|
|
|
+ if cp <= $7FF then
|
|
|
|
+ begin
|
|
|
|
+ byte(Rp^) := %11000000 or cp shr 6;
|
|
|
|
+ byte(Rp[1]) := %10000000 or cp and %111111;
|
|
|
|
+ Inc(Rp, 2);
|
|
|
|
+ end
|
|
|
|
+ else if cp <= $FFFF then
|
|
|
|
+ begin
|
|
|
|
+ byte(Rp^) := %11100000 or (cp shr 12);
|
|
|
|
+ byte(Rp[1]) := %10000000 or cp shr 6 and %111111;
|
|
|
|
+ byte(Rp[2]) := %10000000 or cp and %111111;
|
|
|
|
+ Inc(Rp, 3);
|
|
|
|
+ end else
|
|
|
|
+ begin
|
|
|
|
+ byte(Rp^) := %11110000 or cp shr 18;
|
|
|
|
+ byte(Rp[1]) := %10000000 or cp shr 12 and %111111;
|
|
|
|
+ byte(Rp[2]) := %10000000 or cp shr 6 and %111111;
|
|
|
|
+ byte(Rp[3]) := %10000000 or cp and %111111;
|
|
|
|
+ Inc(Rp, 4);
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ Inc(Rp, CodepointToASCII(cp, Rp));
|
|
|
|
+ inc(iPart);
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+ SetLength(FCurTokenString, Rp - PAnsiChar(Pointer(FCurTokenString)));
|
|
|
|
+ FNParts := -1;
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+class function TJSONScanner.CodepointToASCII(cp: uint32; Rp: PAnsiChar): SizeInt;
|
|
|
|
+var
|
|
|
|
+ s: ansistring;
|
|
|
|
+begin
|
|
|
|
+ if (cp <= $D7FF) or ((cp >= $E000) and (cp <= $FFFF)) then
|
|
|
|
+ s := ansistring(unicodechar(cp))
|
|
|
|
+ else
|
|
|
|
+ s := ansistring(unicodechar($D800 + (cp - $10000) shr 10) + unicodechar($DC00 + (cp - $10000) and (1 shl 10 - 1)));
|
|
|
|
+ result := length(s);
|
|
|
|
+ Move(pointer(s)^, Rp^, result);
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+function TJSONScanner.ScanNewline(Sp: PAnsiChar): PAnsiChar;
|
|
|
|
+begin
|
|
|
|
+ Result := Sp + 1 + ord((Sp[0] = #13) and (Sp[1] = #10));
|
|
|
|
+ Inc(FCurRow);
|
|
|
|
+ FCurLine := Result;
|
|
|
|
+ FCurLineEnd := nil;
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+function TJSONScanner.ScanString(Sp: PAnsiChar): PAnsiChar;
|
|
|
|
+const
|
|
|
|
+ SimpleEscapes_Spell: array[0 .. 8] of ansichar = 'tbnrf"''\/';
|
|
|
|
+ SimpleEscapes_Meant: array[0 .. High(SimpleEscapes_Spell)] of ansichar = #9#8#10#13#12'"''\/';
|
|
|
|
+var
|
|
|
|
+ StartChar: AnsiChar;
|
|
|
|
+ LiteralStart: PAnsiChar;
|
|
|
|
+ iEsc: SizeInt;
|
|
|
|
+ u, u1: uint32;
|
|
|
|
+begin
|
|
|
|
+ StartChar := Sp^;
|
|
|
|
+ if (StartChar = '''') and (joStrict in Options) then
|
|
|
|
+ InvalidCharacter(Sp);
|
|
|
|
+ LiteralStart := Sp + 1;
|
|
|
|
+ repeat
|
|
|
|
+ Inc(Sp);
|
|
|
|
+ // Fast test for irregularities instead of jumping through several 'if's each time.
|
|
|
|
+ // Loop starts with an increment to improve this common case further at the cost of a bit of comprehensibility in other cases.
|
|
|
|
+ if not (Sp^ in [#0 .. #31, '\', '''', '"']) then
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ if Sp^ = '\' then
|
|
|
|
+ begin
|
|
|
|
+ AddPiece(LiteralStart, Sp);
|
|
|
|
+ if Sp[1] = 'u' then
|
|
|
|
+ begin
|
|
|
|
+ Sp := ScanHex(Sp + 2, u);
|
|
|
|
+ if (u >= $D800) and (u <= $DBFF) then
|
|
|
|
+ // High surrogate. Expect low surrogate.
|
|
|
|
+ if (Sp[0] = '\') and (Sp[1] = 'u') then
|
|
|
|
+ begin
|
|
|
|
+ Sp := ScanHex(Sp + 2, u1);
|
|
|
|
+ if (u1 >= $DC00) and (u1 <= $DFFF) then
|
|
|
|
+ AddCodepoint($10000 + (u - $D800) shl 10 + (u1 - $DC00))
|
|
|
|
+ else
|
|
|
|
+ Error(SErrInvalidCharacter, [CurRow, 1 + CountChars(FCurLine, Sp), IntToStr(u1)]);
|
|
|
|
+ end else
|
|
|
|
+ Error(SErrInvalidCharacter, [CurRow, 1 + CountChars(FCurLine, Sp), IntToStr(u) + ' + ???'])
|
|
|
|
+ else
|
|
|
|
+ AddCodepoint(u);
|
|
|
|
+ LiteralStart := Sp;
|
|
|
|
+ dec(Sp);
|
|
|
|
+ continue;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ iEsc := IndexByte(SimpleEscapes_Spell[0], length(SimpleEscapes_Spell), ord(Sp[1]));
|
|
|
|
+ if iEsc >= 0 then
|
|
|
|
+ begin
|
|
|
|
+ Inc(Sp);
|
|
|
|
+ LiteralStart := Sp + 1;
|
|
|
|
+ if SimpleEscapes_Meant[iEsc] = SimpleEscapes_Spell[iEsc] then
|
|
|
|
+ dec(LiteralStart) // Just start next literal from this very character instead of handling it explicitly somehow.
|
|
|
|
+ else
|
|
|
|
+ GrowParts(1)^ := -1 - ord(SimpleEscapes_Meant[iEsc]);
|
|
|
|
+ continue;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ if Sp[1] = #0 then
|
|
|
|
+ Error(SErrOpenString, [CurRow])
|
|
|
|
+ else
|
|
|
|
+ InvalidCharacter(Sp + 1);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ if Sp^ = StartChar then
|
|
|
|
+ break;
|
|
|
|
+
|
|
|
|
+ if Sp^ < #20 then
|
|
|
|
+ if Sp^ = #0 then
|
|
|
|
+ Error(SErrOpenString, [FCurRow])
|
|
|
|
+ else if joStrict in Options then
|
|
|
|
+ InvalidCharacter(Sp)
|
|
|
|
+ else if Sp^ in [#13, #10] then
|
|
|
|
+ Sp := ScanNewline(Sp) - 1; // Account for newlines when not joStrict.
|
|
|
|
+ until false;
|
|
|
|
+ AddPiece(LiteralStart, Sp);
|
|
|
|
+ Result := Sp + 1;
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+function TJSONScanner.ScanHex(Sp: PAnsiChar; out v: uint32): PAnsiChar;
|
|
|
|
+var
|
|
|
|
+ n: SizeInt;
|
|
|
|
+begin
|
|
|
|
+ v := 0;
|
|
|
|
+ for n := 0 to 3 do
|
|
|
|
+ begin
|
|
|
|
+ if not (Sp^ in ['0' .. '9', 'a' .. 'f', 'A' .. 'F']) then
|
|
|
|
+ InvalidCharacter(Sp);
|
|
|
|
+ v := v * 16;
|
|
|
|
+ if Sp^ >= 'A' then // "ord('0' ~ '9') and 15" gives the corresponding number; "ord('A' ~ 'F', 'a' ~ 'f') and 15" gives the 1-based letter number.
|
|
|
|
+ v := v + 9;
|
|
|
|
+ v := v + ord(Sp^) and 15;
|
|
|
|
+ Inc(Sp);
|
|
|
|
+ end;
|
|
|
|
+ Result := Sp;
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+function TJSONScanner.ScanNumber(Sp: PAnsiChar): PAnsiChar;
|
|
|
|
+var
|
|
|
|
+ Start: PAnsiChar;
|
|
|
|
+begin
|
|
|
|
+ Start := Sp;
|
|
|
|
+ if Sp^ = '-' then
|
|
|
|
+ Inc(Sp);
|
|
|
|
+ if Sp^ in ['0' .. '9'] then
|
|
|
|
+ begin
|
|
|
|
+ if (Sp^ = '0') and (Sp[1] in ['0' .. '9']) and (joStrict in FOptions) then
|
|
|
|
+ InvalidCharacter(Sp);
|
|
|
|
+ repeat
|
|
|
|
+ Inc(Sp);
|
|
|
|
+ until not (Sp^ in ['0' .. '9']);
|
|
|
|
+ end
|
|
|
|
+ else if not ((Sp^ = '.') and not (joStrict in Options)) then
|
|
|
|
+ InvalidCharacter(Sp);
|
|
|
|
+ if Sp^ = '.' then
|
|
|
|
+ begin
|
|
|
|
+ Inc(Sp);
|
|
|
|
+ if Sp^ in ['0' .. '9'] then
|
|
|
|
+ repeat
|
|
|
|
+ Inc(Sp);
|
|
|
|
+ until not (Sp^ in ['0' .. '9'])
|
|
|
|
+ else if joStrict in FOptions then
|
|
|
|
+ InvalidCharacter(Sp);
|
|
|
|
+ end;
|
|
|
|
+ if Sp^ in ['e', 'E'] then
|
|
|
|
+ begin
|
|
|
|
+ Inc(Sp);
|
|
|
|
+ if Sp^ in ['+', '-'] then
|
|
|
|
+ Inc(Sp);
|
|
|
|
+ if not (Sp^ in ['0' .. '9']) then
|
|
|
|
+ InvalidCharacter(Sp);
|
|
|
|
+ repeat
|
|
|
|
+ Inc(Sp);
|
|
|
|
+ until not (Sp^ in ['0' .. '9']);
|
|
|
|
+ end;
|
|
|
|
+ if not (Sp^ in [#13, #10, #0, '}', ']', ',', #9, ' ']) then
|
|
|
|
+ InvalidCharacter(Sp);
|
|
|
|
+ if Start^ = '.' then
|
|
|
|
+ GrowParts(1)^ := -1 - ord('0');
|
|
|
|
+ AddPiece(Start, Sp);
|
|
|
|
+ Result := Sp;
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+function TJSONScanner.ScanLineComment(Sp: PAnsiChar): PAnsiChar;
|
|
|
|
+var
|
|
|
|
+ Start: PAnsiChar;
|
|
|
|
+begin
|
|
|
|
+ Start := Sp;
|
|
|
|
+ while not (Sp^ in [#0, #13, #10]) do
|
|
|
|
+ Inc(Sp);
|
|
|
|
+ AddPiece(Start, Sp);
|
|
|
|
+ if Sp^ in [#13, #10] then
|
|
|
|
+ Sp := ScanNewline(Sp);
|
|
|
|
+ Result := Sp;
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+function TJSONScanner.ScanSlashStarComment(Sp: PAnsiChar): PAnsiChar;
|
|
|
|
+var
|
|
|
|
+ Start: PAnsiChar;
|
|
|
|
+begin
|
|
|
|
+ Start := Sp;
|
|
|
|
+ repeat
|
|
|
|
+ while not (Sp^ in [#0, '*', #13, #10]) do
|
|
|
|
+ Inc(Sp);
|
|
|
|
+ if Sp^ = '*' then
|
|
|
|
+ begin
|
|
|
|
+ Inc(Sp);
|
|
|
|
+ if Sp^ = '/' then
|
|
|
|
+ break;
|
|
|
|
+ end
|
|
|
|
+ else if Sp^ in [#13, #10] then
|
|
|
|
+ Sp := ScanNewline(Sp)
|
|
|
|
+ else
|
|
|
|
+ Error(SUnterminatedComment, [CurRow, 1 + CountChars(FCurLine, Sp)]);
|
|
|
|
+ until false;
|
|
|
|
+ AddPiece(Start, Sp - 1); // Loop breaks with Sp pointing at / in final */.
|
|
|
|
+ Result := Sp + 1;
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+function TJSONScanner.RecognizeKeyword(Sp: PAnsiChar; N: SizeInt): TJSONToken;
|
|
|
|
+const
|
|
|
|
+ U32Byte0Shift = {$ifdef ENDIAN_BIG} 24 {$else} 0 {$endif};
|
|
|
|
+ U32Byte1Shift = {$ifdef ENDIAN_BIG} 16 {$else} 8 {$endif};
|
|
|
|
+ U32Byte2Shift = {$ifdef ENDIAN_BIG} 8 {$else} 16 {$endif};
|
|
|
|
+ U32Byte3Shift = {$ifdef ENDIAN_BIG} 0 {$else} 24 {$endif};
|
|
|
|
+var
|
|
|
|
+ sample: uint32;
|
|
|
|
+begin
|
|
|
|
+ Result := tkIdentifier;
|
|
|
|
+ if N = 4 then
|
|
|
|
+ begin
|
|
|
|
+ sample := unaligned(PUint32(Sp)^);
|
|
|
|
+ if sample = ord('t') shl U32Byte0Shift or ord('r') shl U32Byte1Shift or ord('u') shl U32Byte2Shift or ord('e') shl U32Byte3Shift then
|
|
|
|
+ Result := tkTrue
|
|
|
|
+ else if sample = ord('n') shl U32Byte0Shift or ord('u') shl U32Byte1Shift or ord('l') shl U32Byte2Shift or ord('l') shl U32Byte3Shift then
|
|
|
|
+ Result := tkNull;
|
|
|
|
+ end
|
|
|
|
+ else if (N = 5) and
|
|
|
|
+ (unaligned(PUint32(Sp)^) = ord('f') shl U32Byte0Shift or ord('a') shl U32Byte1Shift or ord('l') shl U32Byte2Shift or ord('s') shl U32Byte3Shift) and
|
|
|
|
+ (Sp[4] = 'e') then
|
|
|
|
+ Result := tkFalse;
|
|
|
|
+end;
|
|
|
|
+
|
|
end.
|
|
end.
|