123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498 |
- {
- This file is part of the Free Component Library
- XML utility routines.
- Copyright (c) 2006 by Sergei Gorelkin, [email protected]
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- unit xmlutils;
- {$mode objfpc}
- {$H+}
- interface
- uses
- SysUtils;
- function IsXmlName(const Value: WideString; Xml11: Boolean = False): Boolean; overload;
- function IsXmlName(Value: PWideChar; Len: Integer; Xml11: Boolean = False): Boolean; overload;
- function IsXmlNames(const Value: WideString; Xml11: Boolean = False): Boolean;
- function IsXmlNmToken(const Value: WideString; Xml11: Boolean = False): Boolean;
- function IsXmlNmTokens(const Value: WideString; Xml11: Boolean = False): Boolean;
- function IsValidXmlEncoding(const Value: WideString): Boolean;
- function Xml11NamePages: PByteArray;
- procedure NormalizeSpaces(var Value: WideString);
- function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
- { beware, works in ASCII range only }
- function WStrLIComp(S1, S2: PWideChar; Len: Integer): Integer;
- { a simple hash table with WideString keys }
- type
- PPHashItem = ^PHashItem;
- PHashItem = ^THashItem;
- THashItem = record
- Key: WideString;
- HashValue: LongWord;
- Next: PHashItem;
- Data: TObject;
- end;
- THashForEach = function(Entry: PHashItem; arg: Pointer): Boolean;
- THashTable = class(TObject)
- private
- FCount: LongWord;
- FBucketCount: LongWord;
- FBucket: PPHashItem;
- FOwnsObjects: Boolean;
- function Lookup(Key: PWideChar; KeyLength: Integer; var Found: Boolean; CanCreate: Boolean): PHashItem;
- procedure Resize(NewCapacity: LongWord);
- public
- constructor Create(InitSize: Integer; OwnObjects: Boolean);
- destructor Destroy; override;
- procedure Clear;
- function Find(Key: PWideChar; KeyLen: Integer): PHashItem;
- function FindOrAdd(Key: PWideChar; KeyLen: Integer; var Found: Boolean): PHashItem; overload;
- function FindOrAdd(Key: PWideChar; KeyLen: Integer): PHashItem; overload;
- function Get(Key: PWideChar; KeyLen: Integer): TObject;
- function Remove(Entry: PHashItem): Boolean;
- procedure ForEach(proc: THashForEach; arg: Pointer);
- property Count: LongWord read FCount;
- end;
- {$i names.inc}
- implementation
- var
- Xml11Pg: PByteArray = nil;
- function Xml11NamePages: PByteArray;
- var
- I: Integer;
- p: PByteArray;
- begin
- if Xml11Pg = nil then
- begin
- GetMem(p, 512);
- for I := 0 to 255 do
- p^[I] := ord(Byte(I) in Xml11HighPages);
- p^[0] := 2;
- p^[3] := $2c;
- p^[$20] := $2a;
- p^[$21] := $2b;
- p^[$2f] := $29;
- p^[$30] := $2d;
- p^[$fd] := $28;
- p^[$ff] := $30;
- Move(p^, p^[256], 256);
- p^[$100] := $19;
- p^[$103] := $2E;
- p^[$120] := $2F;
- Xml11Pg := p;
- end;
- Result := Xml11Pg;
- end;
- function IsXml11Char(Value: PWideChar; var Index: Integer): Boolean; overload;
- begin
- if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
- begin
- Inc(Index);
- Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
- end
- else
- Result := False;
- end;
- function IsXml11Char(const Value: WideString; var Index: Integer): Boolean; overload;
- begin
- if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
- begin
- Inc(Index);
- Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
- end
- else
- Result := False;
- end;
- function IsXmlName(const Value: WideString; Xml11: Boolean): Boolean;
- begin
- Result := IsXmlName(PWideChar(Value), Length(Value), Xml11);
- end;
- function IsXmlName(Value: PWideChar; Len: Integer; Xml11: Boolean = False): Boolean; overload;
- var
- Pages: PByteArray;
- I: Integer;
- begin
- Result := False;
- if Xml11 then
- Pages := Xml11NamePages
- else
- Pages := @NamePages;
- I := 0;
- if (Len = 0) or not ((Byte(Value[I]) in NamingBitmap[Pages^[hi(Word(Value[I]))]]) or
- (Value[I] = ':') or
- (Xml11 and IsXml11Char(Value, I))) then
- Exit;
- Inc(I);
- while I < Len do
- begin
- if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
- (Value[I] = ':') or
- (Xml11 and IsXml11Char(Value, I))) then
- Exit;
- Inc(I);
- end;
- Result := True;
- end;
- function IsXmlNames(const Value: WideString; Xml11: Boolean): Boolean;
- var
- Pages: PByteArray;
- I: Integer;
- Offset: Integer;
- begin
- if Xml11 then
- Pages := Xml11NamePages
- else
- Pages := @NamePages;
- Result := False;
- if Value = '' then
- Exit;
- I := 1;
- Offset := 0;
- while I <= Length(Value) do
- begin
- if not ((Byte(Value[I]) in NamingBitmap[Pages^[Offset+hi(Word(Value[I]))]]) or
- (Value[I] = ':') or
- (Xml11 and IsXml11Char(Value, I))) then
- begin
- if (I = Length(Value)) or (Value[I] <> #32) then
- Exit;
- Offset := 0;
- Inc(I);
- Continue;
- end;
- Offset := $100;
- Inc(I);
- end;
- Result := True;
- end;
- function IsXmlNmToken(const Value: WideString; Xml11: Boolean): Boolean;
- var
- I: Integer;
- Pages: PByteArray;
- begin
- if Xml11 then
- Pages := Xml11NamePages
- else
- Pages := @NamePages;
- Result := False;
- if Value = '' then
- Exit;
- I := 1;
- while I <= Length(Value) do
- begin
- if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
- (Value[I] = ':') or
- (Xml11 and IsXml11Char(Value, I))) then
- Exit;
- Inc(I);
- end;
- Result := True;
- end;
- function IsXmlNmTokens(const Value: WideString; Xml11: Boolean): Boolean;
- var
- I: Integer;
- Pages: PByteArray;
- begin
- if Xml11 then
- Pages := Xml11NamePages
- else
- Pages := @NamePages;
- I := 1;
- Result := False;
- if Value = '' then
- Exit;
- while I <= Length(Value) do
- begin
- if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
- (Value[I] = ':') or
- (Xml11 and IsXml11Char(Value, I))) then
- begin
- if (I = Length(Value)) or (Value[I] <> #32) then
- Exit;
- end;
- Inc(I);
- end;
- Result := True;
- end;
- function IsValidXmlEncoding(const Value: WideString): Boolean;
- var
- I: Integer;
- begin
- Result := False;
- if (Value = '') or (Value[1] > #255) or not (char(ord(Value[1])) in ['A'..'Z', 'a'..'z']) then
- Exit;
- for I := 2 to Length(Value) do
- if (Value[I] > #255) or not (char(ord(Value[I])) in ['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']) then
- Exit;
- Result := True;
- end;
- procedure NormalizeSpaces(var Value: WideString);
- var
- I, J: Integer;
- begin
- I := Length(Value);
- // speed: trim only whed needed
- if (I > 0) and ((Value[1] = #32) or (Value[I] = #32)) then
- Value := Trim(Value);
- I := 1;
- while I < Length(Value) do
- begin
- if Value[I] = #32 then
- begin
- J := I+1;
- while (J <= Length(Value)) and (Value[J] = #32) do Inc(J);
- if J-I > 1 then Delete(Value, I+1, J-I-1);
- end;
- Inc(I);
- end;
- end;
- function WStrLIComp(S1, S2: PWideChar; Len: Integer): Integer;
- var
- counter: Integer;
- c1, c2: Word;
- begin
- counter := 0;
- result := 0;
- if Len = 0 then
- exit;
- repeat
- c1 := ord(S1[counter]);
- c2 := ord(S2[counter]);
- if (c1 = 0) or (c2 = 0) then break;
- if c1 <> c2 then
- begin
- if c1 in [97..122] then
- Dec(c1, 32);
- if c2 in [97..122] then
- Dec(c2, 32);
- if c1 <> c2 then
- Break;
- end;
- Inc(counter);
- until counter >= Len;
- result := c1 - c2;
- end;
- function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
- begin
- Result := InitValue;
- while KeyLen <> 0 do
- begin
- Result := Result * $F4243 xor ord(Key^);
- Inc(Key);
- Dec(KeyLen);
- end;
- end;
- function KeyCompare(const Key1: WideString; Key2: Pointer; Key2Len: Integer): Boolean;
- begin
- Result := (Length(Key1)=Key2Len) and (CompareWord(Pointer(Key1)^, Key2^, Key2Len) = 0);
- end;
- { THashTable }
- constructor THashTable.Create(InitSize: Integer; OwnObjects: Boolean);
- var
- I: Integer;
- begin
- inherited Create;
- FOwnsObjects := OwnObjects;
- I := 256;
- while I < InitSize do I := I shl 1;
- FBucketCount := I;
- FBucket := AllocMem(I * sizeof(PHashItem));
- end;
- destructor THashTable.Destroy;
- begin
- Clear;
- FreeMem(FBucket);
- inherited Destroy;
- end;
- procedure THashTable.Clear;
- var
- I: Integer;
- item, next: PHashItem;
- begin
- for I := 0 to FBucketCount-1 do
- begin
- item := FBucket[I];
- while Assigned(item) do
- begin
- next := item^.Next;
- if FOwnsObjects then
- item^.Data.Free;
- Dispose(item);
- item := next;
- end;
- end;
- FillChar(FBucket^, FBucketCount * sizeof(PHashItem), 0);
- end;
- function THashTable.Find(Key: PWideChar; KeyLen: Integer): PHashItem;
- var
- Dummy: Boolean;
- begin
- Result := Lookup(Key, KeyLen, Dummy, False);
- end;
- function THashTable.FindOrAdd(Key: PWideChar; KeyLen: Integer;
- var Found: Boolean): PHashItem;
- begin
- Result := Lookup(Key, KeyLen, Found, True);
- end;
- function THashTable.FindOrAdd(Key: PWideChar; KeyLen: Integer): PHashItem;
- var
- Dummy: Boolean;
- begin
- Result := Lookup(Key, KeyLen, Dummy, True);
- end;
- function THashTable.Get(Key: PWideChar; KeyLen: Integer): TObject;
- var
- e: PHashItem;
- Dummy: Boolean;
- begin
- e := Lookup(Key, KeyLen, Dummy, False);
- if Assigned(e) then
- Result := e^.Data
- else
- Result := nil;
- end;
- function THashTable.Lookup(Key: PWideChar; KeyLength: Integer;
- var Found: Boolean; CanCreate: Boolean): PHashItem;
- var
- Entry: PPHashItem;
- h: LongWord;
- begin
- h := Hash(0, Key, KeyLength);
- Entry := @FBucket[h mod FBucketCount];
- while Assigned(Entry^) and not ((Entry^^.HashValue = h) and KeyCompare(Entry^^.Key, Key, KeyLength) ) do
- Entry := @Entry^^.Next;
- Found := Assigned(Entry^);
- if Found or (not CanCreate) then
- begin
- Result := Entry^;
- Exit;
- end;
- if FCount > FBucketCount then { arbitrary limit, probably too high }
- begin
- Resize(FBucketCount * 2);
- Result := Lookup(Key, KeyLength, Found, CanCreate);
- end
- else
- begin
- New(Result);
- SetString(Result^.Key, Key, KeyLength);
- Result^.HashValue := h;
- Result^.Data := nil;
- Result^.Next := nil;
- Inc(FCount);
- Entry^ := Result;
- end;
- end;
- procedure THashTable.Resize(NewCapacity: LongWord);
- var
- p, chain: PPHashItem;
- i: Integer;
- e, n: PHashItem;
- begin
- p := AllocMem(NewCapacity * sizeof(PHashItem));
- for i := 0 to FBucketCount-1 do
- begin
- e := FBucket[i];
- while Assigned(e) do
- begin
- chain := @p[e^.HashValue mod NewCapacity];
- n := e^.Next;
- e^.Next := chain^;
- chain^ := e;
- e := n;
- end;
- end;
- FBucketCount := NewCapacity;
- FreeMem(FBucket);
- FBucket := p;
- end;
- function THashTable.Remove(Entry: PHashItem): Boolean;
- var
- chain: PPHashItem;
- begin
- chain := @FBucket[Entry^.HashValue mod FBucketCount];
- while Assigned(chain^) do
- begin
- if chain^ = Entry then
- begin
- chain^ := Entry^.Next;
- if FOwnsObjects then
- Entry^.Data.Free;
- Dispose(Entry);
- Dec(FCount);
- Result := True;
- Exit;
- end;
- chain := @chain^^.Next;
- end;
- Result := False;
- end;
- procedure THashTable.ForEach(proc: THashForEach; arg: Pointer);
- var
- i: Integer;
- e: PHashItem;
- begin
- for i := 0 to FBucketCount-1 do
- begin
- e := FBucket[i];
- while Assigned(e) do
- begin
- if not proc(e, arg) then
- Exit;
- e := e^.Next;
- end;
- end;
- end;
- initialization
- finalization
- if Assigned(Xml11Pg) then
- FreeMem(Xml11Pg);
- end.
|