2
0
Эх сурвалжийг харах

* converted widestr.charlength() to use system.Utf8CodePointLen()

git-svn-id: trunk@30050 -
Jonas Maebe 10 жил өмнө
parent
commit
6dd2fe0320
1 өөрчлөгдсөн 35 нэмэгдсэн , 56 устгасан
  1. 35 56
      compiler/widestr.pas

+ 35 - 56
compiler/widestr.pas

@@ -63,8 +63,8 @@ unit widestr;
       d : pchar; dcp : tstringencoding
     );
     function codepagebyname(const s : string) : tstringencoding;
-    function CharLength (P: PChar; L: SizeInt): SizeInt;
-    function CharLength (const S: string): SizeInt;
+    function charlength(p: pchar; len: sizeint): sizeint;
+    function charlength(const s: string): sizeint;
 
   implementation
 
@@ -344,73 +344,52 @@ unit widestr;
           Result:=p^.cp;
       end;
 
-    function CharLength (P: PChar; L: SizeInt): SizeInt;
 
-      function UTF8CodePointLength(firstbyte: byte): SizeInt;
+    function charlength(p: pchar; len: sizeint): sizeint;
       var
-        firstzerobit: SizeInt;
-      begin
-        result:=1;
-        { bsr searches for the leftmost 1 bit. We are interested in the
-          leftmost 0 bit, so first invert the value
-        }
-        firstzerobit:=BsrByte(not(firstbyte));
-        { if there is no zero bit or the first zero bit is the rightmost bit
-          (bit 0), this is an invalid UTF-8 byte ($ff cannot appear in an
-          UTF-8-encoded string, and in the worst case bit 1 has to be zero)
-        }
-        if (firstzerobit=0) or (firstzerobit=7)  then
-          exit;
-        { the number of bytes belonging to this code point is
-          7-(pos first 0-bit).
-        }
-        result:=7-firstzerobit;
-      end;
-
-      var
-        P2: PChar;
-        I, J, K: SizeInt;
+        p2: pchar;
+        i, chars, codepointlen: sizeint;
       begin
 {$IFDEF FPC_HAS_CPSTRING}
-        if L = 0 then
-         begin
-          Result := 0;
-          Exit;
-         end;
+        if len=0 then
+          begin
+            result:=0;
+            exit;
+          end;
 { Length of the string converted to a SBCS codepage (e.g. ISO 8859-1)
   should be equal to the amount of characters in the source string. }
-        if DefaultSystemCodepage = CP_UTF8 then
+        if defaultsystemcodepage=cp_utf8 then
 { ChangeCodePage does not work for UTF-8 apparently... :-( }
-         begin
-          I := 1;
-          J := 0;
-          while I <= L do
-           begin
-            K := Utf8CodePointLength (byte (P^));
-            Inc (I, K);
-            Inc (P, K);
-            Inc (J);
-           end;
-          Result := J;
-         end
-        else if CPAvailable (DefaultSystemCodepage) then
-         begin
-          GetMem (P2, Succ (L));
-          FillChar (P2^, Succ (L), 0);
-          ChangeCodePage (P, L, DefaultSystemCodepage, P2, 28591);
-          Result := StrLen (P2);
-          FreeMem (P2, Succ (L));
-         end
+          begin
+            i:=1;
+            chars:=0;
+            while i<=len do
+              begin
+                codepointlen:=utf8codepointlen(p,len-i+1,true);
+                inc(i,codepointlen);
+                inc(p,codepointlen);
+                inc(chars);
+              end;
+            result:=chars;
+          end
+        else if cpavailable(defaultsystemcodepage) then
+          begin
+            getmem(p2,succ(len));
+            fillchar(p2^,succ(len),0);
+            changecodepage(p,len,defaultsystemcodepage,p2,28591);
+            result:=strlen(p2);
+            freemem(p2,succ(len));
+          end
         else
-         Result := L;
+          result:=len;
 {$ELSE FPC_HAS_CPSTRING}
-        Result := L;
+        result:=len;
 {$ENDIF FPC_HAS_CPSTRING}
       end;
 
-    function CharLength (const S: string): SizeInt;
+    function charlength(const s: string): sizeint;
       begin
-        Result := CharLength (@S [1], Length (S));
+        result:=charlength(@s[1],length(s));
       end;
 
 end.