| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497 | {   UnicodeSet implementation.    Copyright (c) 2013-2015 by Inoussa OUEDRAOGO    The source code is distributed under the Library GNU    General Public License with the following modification:        - object files and libraries linked into an application may be          distributed without source code.    If you didn't receive a copy of the file COPYING, contact:          Free Software Foundation          675 Mass Ave          Cambridge, MA  02139          USA    This program is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.}unit unicodeset;{$mode delphi}{$H+}{$scopedenums on}interfaceuses  SysUtils,  grbtree, helper;type  EUnicodeSetException = class(Exception)  end;  TUnicodeSet = class;  { TPatternParser }  TPatternParser = class  private    FBufferStr : UnicodeString;    FBuffer : PUnicodeChar;    FBufferLength : Integer;    FSet : TUnicodeSet;    FPosition : Integer;    FSpecialChar: Boolean;  private    procedure Error(const AMsg : string; const AArgs : array of const);overload;inline;    procedure Error(const AMsg : string);overload;inline;    procedure SetBuffer(const APattern : PUnicodeChar; const ALength : Integer);    procedure CheckEOF();inline;overload;    procedure CheckEOF(ALength : Integer);overload;inline;    procedure UnexpectedEOF();inline;    function IsThis(AItem : UnicodeString; const APosition : Integer) : Boolean;overload;    function IsThis(AItem : UnicodeString) : Boolean;overload;inline;    procedure Expect(AItem : UnicodeString; const APosition : Integer);overload;inline;    procedure Expect(AItem : UnicodeString);overload;inline;    procedure SkipSpaces();inline;    function NextChar() : TUnicodeCodePoint;    procedure ParseItem();    procedure DoParse();    property SpecialChar : Boolean read FSpecialChar;  public    procedure Parse(const APattern : PUnicodeChar; const ALength : Integer);overload;    procedure Parse(const APattern : UnicodeString);overload;inline;    property CurrentSet : TUnicodeSet read FSet write FSet;  end;  TUnicodeCodePointArrayComparator = class  public    // Return    //    * if A>B then  1    //    * if A=B then  0    //    * if A<B then -1    class function Compare(const A, B : TUnicodeCodePointArray) : Integer;static;inline;  end;  { TUnicodeSet }  TUnicodeSet = class  private type      TItem = TUnicodeCodePointArray;      TTree = TRBTree<TItem,TUnicodeCodePointArrayComparator>;  public type    TIterator = TTree.TIterator;  private    FTree : TTree;    FParser : TPatternParser;  private    procedure CreateParser();inline;    function InternalContains(const AString : UnicodeString) : Boolean;overload;  public    constructor Create();    destructor Destroy;override;    procedure Add(AChar : TUnicodeCodePoint);inline;overload;    procedure Add(AString : TUnicodeCodePointArray);inline;overload;    procedure AddRange(const AStart, AEnd : TUnicodeCodePoint);inline;    procedure AddPattern(const APattern : UnicodeString);inline;overload;    procedure AddPattern(const APattern : RawByteString);inline;overload;    function CreateIterator() : TIterator;    function Contains(const AString : array of TUnicodeCodePoint) : Boolean;overload;    function Contains(const AChar : TUnicodeCodePoint) : Boolean;inline;overload;    function Contains(const AChar : UnicodeChar) : Boolean;inline;overload;    function Contains(const AChar : AnsiChar) : Boolean;inline;overload;    function Contains(const AString : UnicodeString) : Boolean;overload;    function Contains(const AString : RawByteString) : Boolean;overload;  end;resourcestring  SInvalidLength = 'Invalid length value : "%d".';  SInvalidPosition = 'Invalid position : "%d".';  SInvalidRangeLimits = 'Invalid range limits : ["%x" , "%x"].';  SExpectedBut = 'Expects "%s" but got "%s..." .';  SUnexpectedEOF = 'Unexpected end of file.';implementationuses  unicodedata;function ToArray(const AItem : TUnicodeCodePoint) : TUnicodeCodePointArray;inline;begin  SetLength(Result,1);  Result[Low(Result)] := AItem;end;function CompareItem(const Item1, Item2 : TUnicodeCodePointArray): Integer;var  a, b : ^TUnicodeCodePoint;  i, ha, hb : Integer;begin  if (Pointer(Item1) = Pointer(Item2)) then    exit(0);  if (Item1 = nil) then    exit(-1);  if (Item2 = nil) then    exit(1);  a := @Item1[0];  b := @Item2[0];  Result := 1;  ha := Length(Item1) - 1;  hb := Length(Item2) - 1;  for i := 0 to ha do begin    if (i > hb) then      exit;    if (a^ < b^) then      exit(-1);    if (a^ > b^) then      exit(1);    Inc(a);    Inc(b);  end;  if (ha = hb) then    exit(0);  exit(-1);end;{ TUnicodeCodePointArrayComparator }class function TUnicodeCodePointArrayComparator.Compare(const A, B : TUnicodeCodePointArray): Integer;begin  Result := CompareItem(A,B);end;{ TPatternParser }procedure TPatternParser.Error(const AMsg: string; const AArgs: array of const);begin  raise EUnicodeSetException.CreateFmt(AMsg,AArgs);end;procedure TPatternParser.Error(const AMsg: string);begin  raise EUnicodeSetException.Create(AMsg);end;procedure TPatternParser.SetBuffer(  const APattern : PUnicodeChar;  const ALength  : Integer);begin  FPosition := 0;  if (ALength <= 1) then begin    FBufferStr := '';    FBuffer := nil;    FBufferLength := 0;    exit;  end;  FBufferLength := ALength;  SetLength(FBufferStr,FBufferLength);  FBuffer := @FBufferStr[1];  Move(APattern^,FBuffer^,(FBufferLength*SizeOf(FBuffer^)));end;procedure TPatternParser.CheckEOF();begin  CheckEOF(0);end;procedure TPatternParser.CheckEOF(ALength : Integer);begin  if (ALength < 0) then    Error(SInvalidLength,[ALength]);  if ((FPosition+ALength) >= FBufferLength) then    UnexpectedEOF();end;procedure TPatternParser.UnexpectedEOF();begin  Error(SUnexpectedEOF);end;function TPatternParser.IsThis(AItem: UnicodeString; const APosition: Integer): Boolean;var  i, k, c : Integer;begin  if (APosition < 0) then    Error(SInvalidPosition,[APosition]);  Result := False;  c := Length(AItem);  if (c = 0) then    exit;  i := APosition;  k := i + c;  if (k >= FBufferLength) then    exit;  if CompareMem(@AItem[1], @FBuffer[APosition],c) then    Result := True;end;function TPatternParser.IsThis(AItem : UnicodeString) : Boolean;begin  Result := IsThis(AItem,FPosition);end;procedure TPatternParser.Expect(AItem: UnicodeString; const APosition: Integer);begin  if not IsThis(AItem,APosition) then    Error(SExpectedBut,[AItem,Copy(FBuffer,APosition,Length(AItem))]);end;procedure TPatternParser.Expect(AItem: UnicodeString);begin  Expect(AItem,FPosition);end;procedure TPatternParser.SkipSpaces();begin  while (FPosition < FBufferLength) do begin    if (FBuffer[FPosition] <> ' ') then      Break;    Inc(FPosition);  end;end;function TPatternParser.NextChar(): TUnicodeCodePoint;var  i : Integer;  c : UnicodeChar;  cp : TUnicodeCodePoint;  s : UnicodeString;begin  SkipSpaces();  CheckEOF();  c := FBuffer[FPosition];  cp := Ord(c);  Inc(FPosition);  if (c = '\') and (FPosition < FBufferLength) then begin    if IsThis('\') then begin      Inc(FPosition);      CheckEOF();      cp := Ord(FBuffer[FPosition]);      Inc(FPosition);    end else if IsThis('u') then begin      Inc(FPosition);      CheckEOF(4);      s := Copy(FBufferStr,(FPosition+1),4);      Inc(FPosition,4);      if not TryStrToInt(string('$'+s),i) then        Error(SExpectedBut,['\uXXXX',s]);      cp := i;    end;  end;  if (cp <= MAX_WORD) and UnicodeIsLowSurrogate(UnicodeChar(Word(cp))) then begin    SkipSpaces();    CheckEOF();    c := UnicodeChar(Word(cp));    if UnicodeIsSurrogatePair(c,FBuffer[FPosition]) then begin      cp := ToUCS4(c,FBuffer[FPosition]);      Inc(FPosition);    end;  end;  FSpecialChar := (cp = Ord('{')) or (cp = Ord('}'));  Result := cp;end;function CompareTo(const A : TUnicodeCodePoint; const B : UnicodeChar) : Boolean;inline;begin  Result := (A = Ord(B));end;procedure TPatternParser.ParseItem();var  cp, lastCp : TUnicodeCodePoint;  charCount, k : Integer;  cpa : TUnicodeCodePointArray;begin  SkipSpaces();  Expect('[');  charCount := 0;  Inc(FPosition);  cp:=0;  while (FPosition < FBufferLength) do begin    lastCp := cp;    cp := NextChar();    if CompareTo(cp,']') then      Break;    if SpecialChar and (cp = Ord('{')) then begin      SetLength(cpa,12);      k := 0;      while True do begin        cp := NextChar();        if SpecialChar and (cp = Ord('}')) then          break;        if (k >= Length(cpa)) then          SetLength(cpa,(2*k));        cpa[k] := cp;        k := k+1;      end;      if (k > 0) then begin        SetLength(cpa,k);        FSet.Add(cpa);      end;    end else begin      if CompareTo(cp,'-') then begin        if (charCount = 0) then          Error(SExpectedBut,['<char>','-']);        cp := NextChar();        FSet.AddRange(lastCp,cp);      end else begin        FSet.Add(cp);      end;    end;    Inc(charCount);  end;end;procedure TPatternParser.DoParse();begin  SkipSpaces();  while (FPosition < FBufferLength) do begin    ParseItem();    SkipSpaces();  end;end;procedure TPatternParser.Parse(const APattern: PUnicodeChar; const ALength: Integer);begin  if (ALength < 2) then    exit;  SetBuffer(APattern,ALength);  DoParse();end;procedure TPatternParser.Parse(const APattern : UnicodeString);begin  Parse(@APattern[1],Length(APattern));end;{ TUnicodeSet }procedure TUnicodeSet.CreateParser();begin  if (FParser = nil) then begin    FParser := TPatternParser.Create();    FParser.CurrentSet := Self;  end;end;function TUnicodeSet.InternalContains(const AString: UnicodeString): Boolean;var  u4 : UCS4String;  c, i : Integer;  cpa : TUnicodeCodePointArray;begin  u4 := UnicodeStringToUCS4String(AString);  c := Length(u4)-1;  if (c = 1) then    exit(Contains(u4[0]));  SetLength(cpa,c);  for i := 0 to c-1 do    cpa[i] := u4[i];  Result := Contains(cpa);end;constructor TUnicodeSet.Create;begin  FTree := TTree.Create();end;destructor TUnicodeSet.Destroy;begin  FParser.Free();  FTree.Free();  inherited Destroy;end;procedure TUnicodeSet.Add(AChar: TUnicodeCodePoint);begin  FTree.Insert(ToArray(AChar));end;procedure TUnicodeSet.Add(AString: TUnicodeCodePointArray);begin  if (AString <> nil) then    FTree.Insert(AString);end;procedure TUnicodeSet.AddRange(const AStart, AEnd : TUnicodeCodePoint);var  i : Integer;begin  if (AStart > AEnd) then    raise EUnicodeSetException.CreateFmt(SInvalidRangeLimits,[AStart,AEnd]);  for i := AStart to AEnd do    Add(i);end;procedure TUnicodeSet.AddPattern(const APattern : UnicodeString);begin  CreateParser();  FParser.Parse(APattern);end;procedure TUnicodeSet.AddPattern(const APattern: RawByteString);var  us : UnicodeString;begin  us := UnicodeString(APattern);  AddPattern(us);end;function TUnicodeSet.CreateIterator() : TIterator;begin  Result := FTree.CreateForwardIterator();end;function TUnicodeSet.Contains(const AString : array of TUnicodeCodePoint) : Boolean;var  c : Integer;  x : TUnicodeCodePointArray;begin  Result := False;  c := Length(AString);  if (c = 0) then    exit;  SetLength(x,c);  Move(AString[Low(AString)],x[Low(x)],(c*SizeOf(x[0])));  if (FTree.FindNode(x) <> nil) then    Result := True;end;function TUnicodeSet.Contains(const AChar : TUnicodeCodePoint) : Boolean;begin  Result := Contains([AChar]);end;function TUnicodeSet.Contains(const AChar : UnicodeChar) : Boolean;begin  Result := Contains(TUnicodeCodePoint(Ord(AChar)));end;function TUnicodeSet.Contains(const AChar : AnsiChar) : Boolean;begin  Result := Contains(TUnicodeCodePoint(Ord(AChar)));end;function TUnicodeSet.Contains(const AString: UnicodeString): Boolean;begin  if (AString = '') then    exit(Contains([]));  if (Length(AString) = 1) then    exit(Contains(AString[1]));  Result := InternalContains(AString);end;function TUnicodeSet.Contains(const AString: RawByteString): Boolean;var  us : UnicodeString;begin  us := UnicodeString(AString);  Result := Contains(us);end;end.
 |