123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329 |
- program testnfd;
- {
- This program tests the "NormalizeNFD" with the Unicode provided test file.
- The test file "NormalizationTest.txt" is to find in the Unicode Character
- Database.
- }
- {$mode objfpc}{$H+}
- uses
- SysUtils, Classes, Math, unicodedata;
- type
- TDataPartLine = record
- c1, c2, c3, c4, c5 : UCS4String;
- end;
- PDataPartLine = ^TDataPartLine;
- TDataPart = record
- Part : AnsiString;
- Lines : array of TDataPartLine;
- ActualLength : Integer;
- end;
- PDataPart = ^TDataPart;
-
- const
- LINE_LENGTH = 1024;
- DEFAULT_DATA_LINE_LENGTH = 25000;
- var
- p : PAnsiChar;
- bufferLength, bufferPos, lineLength, linePos : Integer;
- line : ansistring;
- totalErrorCount : Integer = 0;
- lineCount, errorCount : Integer;
- stream : TMemoryStream;
- part : ansistring;
- c1, c2, c3, c4, c5 : UCS4String;
- s1, s2, s3, s4, s5 : UnicodeString;
- dataList : array of TDataPart;
- dataListActualLength : Integer;
- pp, part1 : PDataPart;
- function NextLine() : Boolean;
- var
- locOldPos : Integer;
- locOldPointer : PAnsiChar;
- begin
- Result := False;
- locOldPointer := p;
- locOldPos := bufferPos;
- while (bufferPos < bufferLength) and (p^ <> #10) do begin
- Inc(p);
- Inc(bufferPos);
- end;
- if (locOldPos = bufferPos) and (p^ = #10) then begin
- lineLength := 0;
- Inc(p);
- Inc(bufferPos);
- linePos := 1;
- Result := True;
- end else if (locOldPos < bufferPos) then begin
- lineLength := (bufferPos - locOldPos) + 1;
- Move(locOldPointer^,line[1],lineLength);
- if (p^ = #10) then begin
- Dec(lineLength);
- Inc(p);
- Inc(bufferPos);
- end;
- linePos := 1;
- Result := True;
- end;
- if Result then
- Inc(lineCount);
- end;
- procedure SkipSpace();
- begin
- while (linePos < lineLength) and (line[linePos] in [' ',#9]) do
- Inc(linePos);
- end;
- function NextToken() : ansistring;
- const C_SEPARATORS = [';','#','.','[',']','*','@'];
- var
- k : Integer;
- begin
- SkipSpace();
- k := linePos;
- if (linePos <= lineLength) and (line[linePos] in C_SEPARATORS) then begin
- Result := line[linePos];
- Inc(linePos);
- exit;
- end;
- while (linePos <= lineLength) and not(line[linePos] in (C_SEPARATORS+[' '])) do
- Inc(linePos);
- if (linePos > k) then begin
- if (line[Min(linePos,lineLength)] in C_SEPARATORS) then
- Result := Copy(line,k,(linePos-k))
- else
- Result := Copy(line,k,(linePos-k+1));
- Result := Trim(Result);
- end else begin
- Result := '';
- end;
- end;
- function ParseLine() : Integer;
- var
- t : UCS4String;
- r : array[0..23] of UCS4String;
- rc, k : Integer;
- s : ansistring;
- begin
- rc := 0;
- SetLength(c1,0);
- SetLength(c2,0);
- SetLength(c3,0);
- SetLength(c4,0);
- SetLength(c5,0);
- SetLength(t,0);
- while (rc < Length(r)) do begin
- s := NextToken();
- if (s = '#') then
- break;
- if (s = '@') then begin
- part := NextToken();
- rc := 0;
- continue;
- end;
- if (s = '') or (s[1] = '#') then
- Break;
- if (s <> ';') then begin
- k := Length(t);
- SetLength(t,(k+1));
- t[k] := StrToInt('$' + s);
- end else if (s = ';') then begin
- k := Length(t);
- SetLength(t,(k+1));
- t[k] := 0;
- r[rc] := Copy(t);
- SetLength(t,0);
- Inc(rc);
- end;
- end;
- c1 := r[0]; s1 := UCS4StringToUnicodeString(c1);
- c2 := r[1]; s2 := UCS4StringToUnicodeString(c2);
- c3 := r[2]; s3 := UCS4StringToUnicodeString(c3);
- c4 := r[3]; s4 := UCS4StringToUnicodeString(c4);
- c5 := r[4]; s5 := UCS4StringToUnicodeString(c5);
- Result := rc;
- end;
- procedure AddDataLine();
- var
- k : Integer;
- p : PDataPart;
- pline : PDataPartLine;
- begin
- p := nil;
- for k := Low(dataList) to High(dataList) do begin
- if (dataList[k].Part = part) then begin
- p := @dataList[k];
- break;
- end;
- end;
- if (p = nil) then begin
- k := dataListActualLength;
- if (k >= Length(dataList)) then
- SetLength(dataList,(k+5));
- dataListActualLength := k+1;
- p := @dataList[k];
- p^.Part := part;
- end;
- k := p^.ActualLength;
- if (k >= Length(p^.Lines)) then
- SetLength(p^.Lines,(k+DEFAULT_DATA_LINE_LENGTH));
- pline := @p^.Lines[k];
- pline^.c1 := c1;
- pline^.c2 := c2;
- pline^.c3 := c3;
- pline^.c4 := c4;
- pline^.c5 := c5;
- p^.ActualLength := k+1;
- c1 := nil;
- c2 := nil;
- c3 := nil;
- c4 := nil;
- c5 := nil;
- end;
- function IsInPart(ACodePoint : UCS4Char; APart : PDataPart) : boolean;
- var
- k : Integer;
- pline : PDataPartLine;
- begin
- pline := @APart^.Lines[0];
- for k := 0 to APart^.ActualLength-1 do begin
- if (Length(pline^.c1) = 2) and (pline^.c1[0] = ACodePoint) then
- exit(True);
- Inc(pline);
- end;
- Result := False;
- end;
- procedure Prepare();
- begin
- bufferLength := stream.Size;
- bufferPos := 0;
- p := stream.Memory;
- lineLength := 0;
- SetLength(line,LINE_LENGTH);
- SetLength(dataList,10);
- dataListActualLength := 0;
- end;
- procedure TestLines();
- var
- lineErrors : Integer;
- begin
- while NextLine() do begin
- if (ParseLine() < 5) then
- continue;
- AddDataLine();
- lineErrors := 0;
- //c3 == toNFD(c1) == toNFD(c2) == toNFD(c3)
- if (NormalizeNFD(s1) <> s3) then
- lineErrors := lineErrors+1;
- if (NormalizeNFD(s2) <> s3) then
- lineErrors := lineErrors+1;
- if (NormalizeNFD(s3) <> s3) then
- Inc(errorCount);
- //c5 == toNFD(c4) == toNFD(c5)
- if (NormalizeNFD(s4) <> s5) then
- lineErrors := lineErrors+1;
- if (NormalizeNFD(s5) <> s5) then
- lineErrors := lineErrors+1;
- if (lineErrors <> 0) then
- errorCount := errorCount+lineErrors;
- end;
- end;
-
- {$IFDEF ALL_CODE_POINTS}
- procedure TestBmpCodePoints();
- var
- cp : Word;
- s : UnicodeString;
- pu : PUC_Prop;
- begin
- SetLength(s,1);
- for cp := Low(Word) to High(Word) do begin
- pu := GetProps(cp);
- if (pu^.Category <> UGC_Unassigned) and (pu^.Category <> UGC_Surrogate) and
- not(IsInPart(cp,part1))
- then begin
- //X == toNFC(X) == toNFD(X) == toNFKC(X) == toNFKD(X)
- PWord(@s[1])^ := cp;
- if (NormalizeNFD(s) <> s) then
- errorCount := errorCount+1;
- end;
- end;
- end;
- procedure TestOBmpCodePoints();
- var
- cp : UCS4Char;
- s : UnicodeString;
- pu : PUC_Prop;
- begin
- SetLength(s,2);
- s[1] := 'a'; s[2] := 'a';
- for cp := High(Word)+1 to MAX_LEGAL_UTF32 do begin
- pu := GetProps(cp);
- if (pu^.Category <> UGC_Unassigned) and (pu^.Category <> UGC_Surrogate) and
- not(IsInPart(cp,part1))
- then begin
- //X == toNFC(X) == toNFD(X) == toNFKC(X) == toNFKD(X)
- FromUCS4(cp,s[1],s[2]);
- if (NormalizeNFD(s) <> s) then
- errorCount := errorCount+1;
- end;
- end;
- end;
- {$ENDIF ALL_CODE_POINTS}
- var
- i, c : Integer;
- begin
- errorCount := 0;
- lineCount := 0;
- stream := TMemoryStream.Create();
- try
- stream.LoadFromFile('NormalizationTest.txt');
- Prepare();
- // Direct tests specified in NormalizationTest.txt
- TestLines();
- part1 := nil;
- c := 0;
- for i := 0 to dataListActualLength-1 do begin
- pp := @dataList[i];
- if (Length(pp^.Lines) <> pp^.ActualLength) then
- SetLength(pp^.Lines,pp^.ActualLength);
- c := c+pp^.ActualLength;
- if SameText(pp^.Part,'Part1') then
- part1 := pp;
- end;
- if (part1 = nil) then
- raise Exception.Create('"Part1" not found !');
- { $DEFINE ALL_CODE_POINTS}
- {$IFDEF ALL_CODE_POINTS}
- // Tests for BMP Codepoints not is PART1
- TestBmpCodePoints();
- // Tests for BMP Codepoints not is PART1
- TestOBmpCodePoints();
- {$ENDIF ALL_CODE_POINTS}
- WriteLn('Line Count = ',lineCount);
- WriteLn('Actual Test Line Count = ',c);
- WriteLn('Error Count = ',errorCount);
- Inc(totalErrorCount,errorCount);
- finally
- stream.Free();
- end;
- if (totalErrorCount > 0) then begin
- WriteLn('Failed.');
- Halt(1);
- end;
- end.
|