Browse Source

* Test from Inoussa to test NormalizeNFD

Michaël Van Canneyt 3 years ago
parent
commit
1619bef4a0
1 changed files with 329 additions and 0 deletions
  1. 329 0
      packages/rtl-unicode/tests/testnfd.lpr

+ 329 - 0
packages/rtl-unicode/tests/testnfd.lpr

@@ -0,0 +1,329 @@
+program testnfd;
+{       
+  This program tests the "NormalizeNFD" with the Unicode provided test file.
+  The test file "NormalizationTest.txt" is to find in the Unicode Character
+  Database.
+}
+
+{$mode objfpc}{$H+}
+
+uses
+  SysUtils, Classes, Math, unicodedata;
+
+type
+  TDataPartLine = record
+    c1, c2, c3, c4, c5 : UCS4String;
+  end;
+  PDataPartLine = ^TDataPartLine;
+
+  TDataPart = record
+    Part : AnsiString;
+    Lines : array of TDataPartLine;
+    ActualLength : Integer;
+  end;
+  PDataPart = ^TDataPart;
+   
+const
+  LINE_LENGTH         = 1024;
+  DEFAULT_DATA_LINE_LENGTH = 25000;
+
+var
+  p : PAnsiChar;
+  bufferLength, bufferPos, lineLength, linePos : Integer;
+  line : ansistring;          
+  totalErrorCount : Integer = 0;
+  lineCount, errorCount : Integer;
+  stream : TMemoryStream;
+  part : ansistring;
+  c1, c2, c3, c4, c5 : UCS4String;
+  s1, s2, s3, s4, s5 : UnicodeString;
+  dataList : array of TDataPart;
+  dataListActualLength : Integer;
+  pp, part1 : PDataPart;
+
+function NextLine() : Boolean;
+var
+  locOldPos : Integer;
+  locOldPointer : PAnsiChar;
+begin
+  Result := False;
+  locOldPointer := p;
+  locOldPos := bufferPos;
+  while (bufferPos < bufferLength) and (p^ <> #10) do begin
+    Inc(p);
+    Inc(bufferPos);
+  end;
+  if (locOldPos = bufferPos) and (p^ = #10) then begin
+    lineLength := 0;
+    Inc(p);
+    Inc(bufferPos);
+    linePos := 1;
+    Result := True;
+  end else  if (locOldPos < bufferPos) then begin
+    lineLength := (bufferPos - locOldPos) + 1;
+    Move(locOldPointer^,line[1],lineLength);
+    if (p^ = #10) then begin
+      Dec(lineLength);
+      Inc(p);
+      Inc(bufferPos);
+    end;
+    linePos := 1;
+    Result := True;
+  end;
+  if Result then
+    Inc(lineCount);
+end;    
+
+  procedure SkipSpace();
+  begin
+    while (linePos < lineLength) and (line[linePos] in [' ',#9]) do
+      Inc(linePos);
+  end;
+
+  function NextToken() : ansistring;
+  const C_SEPARATORS  = [';','#','.','[',']','*','@'];
+  var
+    k : Integer;
+  begin
+    SkipSpace();
+    k := linePos;
+    if (linePos <= lineLength) and (line[linePos] in C_SEPARATORS) then begin
+      Result := line[linePos];
+      Inc(linePos);
+      exit;
+    end;
+    while (linePos <= lineLength) and not(line[linePos] in (C_SEPARATORS+[' '])) do
+      Inc(linePos);
+    if (linePos > k) then begin
+      if (line[Min(linePos,lineLength)] in C_SEPARATORS) then
+        Result := Copy(line,k,(linePos-k))
+      else
+        Result := Copy(line,k,(linePos-k+1));
+      Result := Trim(Result);
+    end else begin
+      Result := '';
+    end;
+  end;
+
+  function ParseLine() : Integer;
+  var
+    t : UCS4String;
+    r : array[0..23] of UCS4String;
+    rc, k : Integer;
+    s : ansistring;
+  begin
+    rc := 0;
+    SetLength(c1,0);
+    SetLength(c2,0);
+    SetLength(c3,0);
+    SetLength(c4,0);
+    SetLength(c5,0);
+    SetLength(t,0);
+    while (rc < Length(r)) do begin
+      s := NextToken();
+      if (s = '#') then
+        break;
+      if (s = '@') then begin
+        part := NextToken();
+        rc := 0;
+        continue;
+      end;
+      if (s = '') or (s[1] = '#') then
+        Break;
+      if (s <> ';') then begin
+        k := Length(t);
+        SetLength(t,(k+1));
+        t[k] := StrToInt('$' + s);
+      end else if (s = ';') then begin
+        k := Length(t);
+        SetLength(t,(k+1));
+        t[k] := 0;
+        r[rc] := Copy(t);
+        SetLength(t,0);
+        Inc(rc);
+      end;
+    end;
+    c1 := r[0]; s1 := UCS4StringToUnicodeString(c1);
+    c2 := r[1]; s2 := UCS4StringToUnicodeString(c2);
+    c3 := r[2]; s3 := UCS4StringToUnicodeString(c3);
+    c4 := r[3]; s4 := UCS4StringToUnicodeString(c4);
+    c5 := r[4]; s5 := UCS4StringToUnicodeString(c5);
+    Result := rc;
+  end;
+
+  procedure AddDataLine();
+  var
+    k : Integer;
+    p : PDataPart;
+    pline : PDataPartLine;
+  begin
+    p := nil;
+    for k := Low(dataList) to High(dataList) do begin
+      if (dataList[k].Part = part) then begin
+        p := @dataList[k];
+        break;
+      end;
+    end;
+    if (p = nil) then begin
+      k := dataListActualLength;
+      if (k >= Length(dataList)) then
+        SetLength(dataList,(k+5)); 
+      dataListActualLength := k+1;
+      p := @dataList[k];
+      p^.Part := part;
+    end;
+
+    k := p^.ActualLength;
+    if (k >= Length(p^.Lines)) then
+      SetLength(p^.Lines,(k+DEFAULT_DATA_LINE_LENGTH));
+    pline := @p^.Lines[k];
+    pline^.c1 := c1;
+    pline^.c2 := c2;
+    pline^.c3 := c3;
+    pline^.c4 := c4;
+    pline^.c5 := c5; 
+    p^.ActualLength := k+1;
+    c1 := nil;
+    c2 := nil;
+    c3 := nil;
+    c4 := nil;
+    c5 := nil;
+  end;
+
+  function IsInPart(ACodePoint : UCS4Char; APart : PDataPart) : boolean;
+  var
+    k : Integer;
+    pline : PDataPartLine;
+  begin
+    pline := @APart^.Lines[0];
+    for k := 0 to APart^.ActualLength-1 do begin
+      if (Length(pline^.c1) = 2) and (pline^.c1[0] = ACodePoint) then
+        exit(True);
+      Inc(pline);
+    end;
+    Result := False;
+  end;
+
+  procedure Prepare();
+  begin
+    bufferLength := stream.Size;
+    bufferPos := 0;
+    p := stream.Memory;
+    lineLength := 0;
+    SetLength(line,LINE_LENGTH);
+    SetLength(dataList,10);
+    dataListActualLength := 0;
+  end;
+
+  procedure TestLines();
+  var
+    lineErrors : Integer;
+  begin    
+    while NextLine() do begin
+      if (ParseLine() < 5) then
+        continue;
+      AddDataLine();
+      lineErrors := 0;
+      //c3 ==  toNFD(c1) ==  toNFD(c2) ==  toNFD(c3)
+      if (NormalizeNFD(s1) <> s3) then
+        lineErrors := lineErrors+1;
+      if (NormalizeNFD(s2) <> s3) then
+        lineErrors := lineErrors+1;
+      if (NormalizeNFD(s3) <> s3) then
+        Inc(errorCount);
+      //c5 ==  toNFD(c4) ==  toNFD(c5)
+      if (NormalizeNFD(s4) <> s5) then
+        lineErrors := lineErrors+1;
+      if (NormalizeNFD(s5) <> s5) then
+        lineErrors := lineErrors+1;
+      if (lineErrors <> 0) then
+        errorCount := errorCount+lineErrors;
+    end;
+  end;
+       
+{$IFDEF ALL_CODE_POINTS}
+  procedure TestBmpCodePoints();
+  var
+    cp : Word;
+    s : UnicodeString;
+    pu : PUC_Prop;
+  begin
+    SetLength(s,1);
+    for cp := Low(Word) to High(Word) do begin
+      pu := GetProps(cp);
+      if (pu^.Category <> UGC_Unassigned) and (pu^.Category <> UGC_Surrogate) and
+         not(IsInPart(cp,part1))
+      then begin
+        //X == toNFC(X) == toNFD(X) == toNFKC(X) == toNFKD(X)  
+        PWord(@s[1])^ := cp;
+        if (NormalizeNFD(s) <> s) then
+          errorCount := errorCount+1;
+      end;
+    end;
+  end;       
+
+  procedure TestOBmpCodePoints();
+  var
+    cp : UCS4Char;
+    s : UnicodeString;
+    pu : PUC_Prop;
+  begin
+    SetLength(s,2);
+    s[1] := 'a'; s[2] := 'a';
+    for cp := High(Word)+1 to MAX_LEGAL_UTF32 do begin
+      pu := GetProps(cp);
+      if (pu^.Category <> UGC_Unassigned) and (pu^.Category <> UGC_Surrogate) and
+         not(IsInPart(cp,part1))
+      then begin
+        //X == toNFC(X) == toNFD(X) == toNFKC(X) == toNFKD(X)
+        FromUCS4(cp,s[1],s[2]);
+        if (NormalizeNFD(s) <> s) then
+          errorCount := errorCount+1;
+      end;
+    end;
+  end;
+{$ENDIF ALL_CODE_POINTS}
+
+var
+  i, c : Integer;
+begin
+  errorCount := 0;
+  lineCount := 0;
+  stream := TMemoryStream.Create();
+  try
+    stream.LoadFromFile('NormalizationTest.txt');
+    Prepare();
+    // Direct tests specified in NormalizationTest.txt
+    TestLines();
+    part1 := nil;
+    c := 0;
+    for i := 0 to dataListActualLength-1 do begin
+      pp := @dataList[i];
+      if (Length(pp^.Lines) <> pp^.ActualLength) then
+        SetLength(pp^.Lines,pp^.ActualLength);
+      c := c+pp^.ActualLength;
+      if SameText(pp^.Part,'Part1') then
+        part1 := pp;
+    end;
+    if (part1 = nil) then
+      raise Exception.Create('"Part1" not found !');  
+{ $DEFINE ALL_CODE_POINTS}
+{$IFDEF ALL_CODE_POINTS}
+    // Tests for BMP Codepoints not is PART1
+    TestBmpCodePoints();     
+    // Tests for BMP Codepoints not is PART1
+    TestOBmpCodePoints();   
+{$ENDIF ALL_CODE_POINTS}
+    WriteLn('Line Count = ',lineCount);
+    WriteLn('Actual Test Line Count = ',c);
+    WriteLn('Error Count = ',errorCount);
+    Inc(totalErrorCount,errorCount);
+  finally
+    stream.Free();
+  end; 
+  if (totalErrorCount > 0) then begin
+    WriteLn('Failed.');
+    Halt(1);
+  end;
+end.
+