|
|
@@ -78,6 +78,7 @@ Type
|
|
|
FCurRow: Integer;
|
|
|
FCurToken: TJSONToken;
|
|
|
FCurTokenString: AnsiString; // Calculated lazily from FParts. FPartsBytes = -1 if ready.
|
|
|
+ FCurTokenStringLen: SizeInt;
|
|
|
|
|
|
// Sequence of varints that describes how to build FCurTokenString if asked.
|
|
|
// Number X with lowest bit 0 means piece with start = FPartsSrcPos + X shr 1, length = next number; FPartsSrcPos advances to start + length.
|
|
|
@@ -234,6 +235,7 @@ function TJSONScanner.FetchToken: TJSONToken;
|
|
|
var
|
|
|
Sp, Start: PAnsiChar;
|
|
|
begin
|
|
|
+ FCurTokenStringLen := 0;
|
|
|
FPartsBytes := 0;
|
|
|
Sp := FCurPos;
|
|
|
// Don't consider such newline a tkWhitespace.
|
|
|
@@ -425,6 +427,12 @@ end;
|
|
|
procedure TJSONScanner.AddCodepoint(cp: uint32);
|
|
|
begin
|
|
|
inc(FPartsBytes, ViWrite(EnsurePartsSpace(MaxViLen), cp shl 1 or 1));
|
|
|
+
|
|
|
+ if cp <= $7F then inc(FCurTokenStringLen) // First 128 characters use 1 byte both in UTF-8 or ANSI encodings.
|
|
|
+ // Use 2 in non-utf8 mode as ceiling value, assuming ANSI encodings use at most 2 bytes per codepoint. (Eg: Shift JIS uses 1 or 2.)
|
|
|
+ else if (cp <= $7FF) {or not utf8} then inc(FCurTokenStringLen, 2)
|
|
|
+ else if cp <= $FFFF then inc(FCurTokenStringLen, 3)
|
|
|
+ else inc(FCurTokenStringLen, 4);
|
|
|
end;
|
|
|
|
|
|
procedure TJSONScanner.AddPiece(start, ed: PAnsiChar);
|
|
|
@@ -439,6 +447,7 @@ begin
|
|
|
n := ViWrite(pp, (start - FPartsSrcPos) shl 1);
|
|
|
inc(FPartsBytes, n + ViWrite(pp + n, ed - start));
|
|
|
FPartsSrcPos := ed;
|
|
|
+ FCurTokenStringLen := FCurTokenStringLen + ed - start;
|
|
|
end;
|
|
|
|
|
|
function TJSONScanner.GetCurTokenString: ansistring;
|
|
|
@@ -452,37 +461,16 @@ procedure TJSONScanner.BuildCurTokenString;
|
|
|
var
|
|
|
utf8: boolean;
|
|
|
partp, parte: PUint8;
|
|
|
- len: SizeInt;
|
|
|
v, v2: SizeUint;
|
|
|
cp: uint32;
|
|
|
Srcp, Rp: PAnsiChar;
|
|
|
begin
|
|
|
utf8 := (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8);
|
|
|
- len := 0;
|
|
|
- // Prepass for length. Exact if utf8, otherwise ceiling.
|
|
|
- partp := PUint8(FParts);
|
|
|
- parte := partp + FPartsBytes;
|
|
|
- while partp < parte do
|
|
|
- begin
|
|
|
- inc(partp, ViRead(partp, v));
|
|
|
- if v and 1 = 0 then
|
|
|
- begin
|
|
|
- inc(partp, ViRead(partp, v));
|
|
|
- inc(len, v);
|
|
|
- end else
|
|
|
- begin
|
|
|
- cp := v shr 1;
|
|
|
- if cp <= $7F then inc(len) // First 128 characters use 1 byte both in UTF-8 or ANSI encodings.
|
|
|
- // Use 2 in non-utf8 mode as ceiling value, assuming ANSI encodings use at most 2 bytes per codepoint. (Eg: Shift JIS uses 1 or 2.)
|
|
|
- else if (cp <= $7FF) or not utf8 then inc(len, 2)
|
|
|
- else if cp <= $FFFF then inc(len, 3)
|
|
|
- else inc(len, 4);
|
|
|
- end;
|
|
|
- end;
|
|
|
- SetLength(FCurTokenString, len);
|
|
|
+ SetLength(FCurTokenString, FCurTokenStringLen);
|
|
|
Srcp := FPartsSrcPos;
|
|
|
Rp := PAnsiChar(Pointer(FCurTokenString));
|
|
|
partp := PUint8(FParts);
|
|
|
+ parte := partp + FPartsBytes;
|
|
|
while partp < parte do
|
|
|
begin
|
|
|
inc(partp, ViRead(partp, v));
|