ソースを参照

Calculating string length immediately, speedup 2.5%

Alligator-1 3 週間 前
コミット
67ec135896
1 ファイル変更11 行追加23 行削除
  1. 11 23
      packages/fcl-json/src/jsonscanner.pp

+ 11 - 23
packages/fcl-json/src/jsonscanner.pp

@@ -78,6 +78,7 @@ Type
     FCurRow: Integer;
     FCurToken: TJSONToken;
     FCurTokenString: AnsiString; // Calculated lazily from FParts. FPartsBytes = -1 if ready.
+    FCurTokenStringLen: SizeInt;
 
     // Sequence of varints that describes how to build FCurTokenString if asked.
     // Number X with lowest bit 0 means piece with start = FPartsSrcPos + X shr 1, length = next number; FPartsSrcPos advances to start + length.
@@ -234,6 +235,7 @@ function TJSONScanner.FetchToken: TJSONToken;
 var
   Sp, Start: PAnsiChar;
 begin
+  FCurTokenStringLen := 0;
   FPartsBytes := 0;
   Sp := FCurPos;
   // Don't consider such newline a tkWhitespace.
@@ -425,6 +427,12 @@ end;
 procedure TJSONScanner.AddCodepoint(cp: uint32);
 begin
   inc(FPartsBytes, ViWrite(EnsurePartsSpace(MaxViLen), cp shl 1 or 1));
+
+  if cp <= $7F then inc(FCurTokenStringLen) // First 128 characters use 1 byte both in UTF-8 or ANSI encodings.
+  // Use 2 in non-utf8 mode as ceiling value, assuming ANSI encodings use at most 2 bytes per codepoint. (Eg: Shift JIS uses 1 or 2.)
+  else if (cp <= $7FF) {or not utf8} then inc(FCurTokenStringLen, 2)
+  else if cp <= $FFFF then inc(FCurTokenStringLen, 3)
+  else inc(FCurTokenStringLen, 4);
 end;
 
 procedure TJSONScanner.AddPiece(start, ed: PAnsiChar);
@@ -439,6 +447,7 @@ begin
   n := ViWrite(pp, (start - FPartsSrcPos) shl 1);
   inc(FPartsBytes, n + ViWrite(pp + n, ed - start));
   FPartsSrcPos := ed;
+  FCurTokenStringLen := FCurTokenStringLen + ed - start;
 end;
 
 function TJSONScanner.GetCurTokenString: ansistring;
@@ -452,37 +461,16 @@ procedure TJSONScanner.BuildCurTokenString;
 var
   utf8: boolean;
   partp, parte: PUint8;
-  len: SizeInt;
   v, v2: SizeUint;
   cp: uint32;
   Srcp, Rp: PAnsiChar;
 begin
   utf8 := (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8);
-  len := 0;
-  // Prepass for length. Exact if utf8, otherwise ceiling.
-  partp := PUint8(FParts);
-  parte := partp + FPartsBytes;
-  while partp < parte do
-  begin
-    inc(partp, ViRead(partp, v));
-    if v and 1 = 0 then
-    begin
-      inc(partp, ViRead(partp, v));
-      inc(len, v);
-    end else
-    begin
-      cp := v shr 1;
-      if cp <= $7F then inc(len) // First 128 characters use 1 byte both in UTF-8 or ANSI encodings.
-      // Use 2 in non-utf8 mode as ceiling value, assuming ANSI encodings use at most 2 bytes per codepoint. (Eg: Shift JIS uses 1 or 2.)
-      else if (cp <= $7FF) or not utf8 then inc(len, 2)
-      else if cp <= $FFFF then inc(len, 3)
-      else inc(len, 4);
-    end;
-  end;
-  SetLength(FCurTokenString, len);
+  SetLength(FCurTokenString, FCurTokenStringLen);
   Srcp := FPartsSrcPos;
   Rp := PAnsiChar(Pointer(FCurTokenString));
   partp := PUint8(FParts);
+  parte := partp + FPartsBytes;
   while partp < parte do
   begin
     inc(partp, ViRead(partp, v));