xmlutils.pp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. {
  2. This file is part of the Free Component Library
  3. XML utility routines.
  4. Copyright (c) 2006 by Sergei Gorelkin, [email protected]
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. unit xmlutils;
  12. {$mode objfpc}
  13. {$H+}
  14. interface
  15. uses
  16. SysUtils;
  17. function IsXmlName(const Value: WideString; Xml11: Boolean = False): Boolean; overload;
  18. function IsXmlName(Value: PWideChar; Len: Integer; Xml11: Boolean = False): Boolean; overload;
  19. function IsXmlNames(const Value: WideString; Xml11: Boolean = False): Boolean;
  20. function IsXmlNmToken(const Value: WideString; Xml11: Boolean = False): Boolean;
  21. function IsXmlNmTokens(const Value: WideString; Xml11: Boolean = False): Boolean;
  22. function IsValidXmlEncoding(const Value: WideString): Boolean;
  23. function Xml11NamePages: PByteArray;
  24. procedure NormalizeSpaces(var Value: WideString);
  25. function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
  26. { beware, works in ASCII range only }
  27. function WStrLIComp(S1, S2: PWideChar; Len: Integer): Integer;
  28. { a simple hash table with WideString keys }
  29. type
  30. PPHashItem = ^PHashItem;
  31. PHashItem = ^THashItem;
  32. THashItem = record
  33. Key: WideString;
  34. HashValue: LongWord;
  35. Next: PHashItem;
  36. Data: TObject;
  37. end;
  38. THashForEach = function(Entry: PHashItem; arg: Pointer): Boolean;
  39. THashTable = class(TObject)
  40. private
  41. FCount: LongWord;
  42. FBucketCount: LongWord;
  43. FBucket: PPHashItem;
  44. FOwnsObjects: Boolean;
  45. function Lookup(Key: PWideChar; KeyLength: Integer; var Found: Boolean; CanCreate: Boolean): PHashItem;
  46. procedure Resize(NewCapacity: LongWord);
  47. public
  48. constructor Create(InitSize: Integer; OwnObjects: Boolean);
  49. destructor Destroy; override;
  50. procedure Clear;
  51. function Find(Key: PWideChar; KeyLen: Integer): PHashItem;
  52. function FindOrAdd(Key: PWideChar; KeyLen: Integer; var Found: Boolean): PHashItem; overload;
  53. function FindOrAdd(Key: PWideChar; KeyLen: Integer): PHashItem; overload;
  54. function Get(Key: PWideChar; KeyLen: Integer): TObject;
  55. function Remove(Entry: PHashItem): Boolean;
  56. procedure ForEach(proc: THashForEach; arg: Pointer);
  57. property Count: LongWord read FCount;
  58. end;
  59. {$i names.inc}
  60. implementation
  61. var
  62. Xml11Pg: PByteArray = nil;
  63. function Xml11NamePages: PByteArray;
  64. var
  65. I: Integer;
  66. p: PByteArray;
  67. begin
  68. if Xml11Pg = nil then
  69. begin
  70. GetMem(p, 512);
  71. for I := 0 to 255 do
  72. p^[I] := ord(Byte(I) in Xml11HighPages);
  73. p^[0] := 2;
  74. p^[3] := $2c;
  75. p^[$20] := $2a;
  76. p^[$21] := $2b;
  77. p^[$2f] := $29;
  78. p^[$30] := $2d;
  79. p^[$fd] := $28;
  80. p^[$ff] := $30;
  81. Move(p^, p^[256], 256);
  82. p^[$100] := $19;
  83. p^[$103] := $2E;
  84. p^[$120] := $2F;
  85. Xml11Pg := p;
  86. end;
  87. Result := Xml11Pg;
  88. end;
  89. function IsXml11Char(Value: PWideChar; var Index: Integer): Boolean; overload;
  90. begin
  91. if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
  92. begin
  93. Inc(Index);
  94. Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
  95. end
  96. else
  97. Result := False;
  98. end;
  99. function IsXml11Char(const Value: WideString; var Index: Integer): Boolean; overload;
  100. begin
  101. if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
  102. begin
  103. Inc(Index);
  104. Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
  105. end
  106. else
  107. Result := False;
  108. end;
  109. function IsXmlName(const Value: WideString; Xml11: Boolean): Boolean;
  110. begin
  111. Result := IsXmlName(PWideChar(Value), Length(Value), Xml11);
  112. end;
  113. function IsXmlName(Value: PWideChar; Len: Integer; Xml11: Boolean = False): Boolean; overload;
  114. var
  115. Pages: PByteArray;
  116. I: Integer;
  117. begin
  118. Result := False;
  119. if Xml11 then
  120. Pages := Xml11NamePages
  121. else
  122. Pages := @NamePages;
  123. I := 0;
  124. if (Len = 0) or not ((Byte(Value[I]) in NamingBitmap[Pages^[hi(Word(Value[I]))]]) or
  125. (Value[I] = ':') or
  126. (Xml11 and IsXml11Char(Value, I))) then
  127. Exit;
  128. Inc(I);
  129. while I < Len do
  130. begin
  131. if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
  132. (Value[I] = ':') or
  133. (Xml11 and IsXml11Char(Value, I))) then
  134. Exit;
  135. Inc(I);
  136. end;
  137. Result := True;
  138. end;
  139. function IsXmlNames(const Value: WideString; Xml11: Boolean): Boolean;
  140. var
  141. Pages: PByteArray;
  142. I: Integer;
  143. Offset: Integer;
  144. begin
  145. if Xml11 then
  146. Pages := Xml11NamePages
  147. else
  148. Pages := @NamePages;
  149. Result := False;
  150. if Value = '' then
  151. Exit;
  152. I := 1;
  153. Offset := 0;
  154. while I <= Length(Value) do
  155. begin
  156. if not ((Byte(Value[I]) in NamingBitmap[Pages^[Offset+hi(Word(Value[I]))]]) or
  157. (Value[I] = ':') or
  158. (Xml11 and IsXml11Char(Value, I))) then
  159. begin
  160. if (I = Length(Value)) or (Value[I] <> #32) then
  161. Exit;
  162. Offset := 0;
  163. Inc(I);
  164. Continue;
  165. end;
  166. Offset := $100;
  167. Inc(I);
  168. end;
  169. Result := True;
  170. end;
  171. function IsXmlNmToken(const Value: WideString; Xml11: Boolean): Boolean;
  172. var
  173. I: Integer;
  174. Pages: PByteArray;
  175. begin
  176. if Xml11 then
  177. Pages := Xml11NamePages
  178. else
  179. Pages := @NamePages;
  180. Result := False;
  181. if Value = '' then
  182. Exit;
  183. I := 1;
  184. while I <= Length(Value) do
  185. begin
  186. if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
  187. (Value[I] = ':') or
  188. (Xml11 and IsXml11Char(Value, I))) then
  189. Exit;
  190. Inc(I);
  191. end;
  192. Result := True;
  193. end;
  194. function IsXmlNmTokens(const Value: WideString; Xml11: Boolean): Boolean;
  195. var
  196. I: Integer;
  197. Pages: PByteArray;
  198. begin
  199. if Xml11 then
  200. Pages := Xml11NamePages
  201. else
  202. Pages := @NamePages;
  203. I := 1;
  204. Result := False;
  205. if Value = '' then
  206. Exit;
  207. while I <= Length(Value) do
  208. begin
  209. if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
  210. (Value[I] = ':') or
  211. (Xml11 and IsXml11Char(Value, I))) then
  212. begin
  213. if (I = Length(Value)) or (Value[I] <> #32) then
  214. Exit;
  215. end;
  216. Inc(I);
  217. end;
  218. Result := True;
  219. end;
  220. function IsValidXmlEncoding(const Value: WideString): Boolean;
  221. var
  222. I: Integer;
  223. begin
  224. Result := False;
  225. if (Value = '') or (Value[1] > #255) or not (char(ord(Value[1])) in ['A'..'Z', 'a'..'z']) then
  226. Exit;
  227. for I := 2 to Length(Value) do
  228. if (Value[I] > #255) or not (char(ord(Value[I])) in ['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']) then
  229. Exit;
  230. Result := True;
  231. end;
  232. procedure NormalizeSpaces(var Value: WideString);
  233. var
  234. I, J: Integer;
  235. begin
  236. I := Length(Value);
  237. // speed: trim only whed needed
  238. if (I > 0) and ((Value[1] = #32) or (Value[I] = #32)) then
  239. Value := Trim(Value);
  240. I := 1;
  241. while I < Length(Value) do
  242. begin
  243. if Value[I] = #32 then
  244. begin
  245. J := I+1;
  246. while (J <= Length(Value)) and (Value[J] = #32) do Inc(J);
  247. if J-I > 1 then Delete(Value, I+1, J-I-1);
  248. end;
  249. Inc(I);
  250. end;
  251. end;
  252. function WStrLIComp(S1, S2: PWideChar; Len: Integer): Integer;
  253. var
  254. counter: Integer;
  255. c1, c2: Word;
  256. begin
  257. counter := 0;
  258. result := 0;
  259. if Len = 0 then
  260. exit;
  261. repeat
  262. c1 := ord(S1[counter]);
  263. c2 := ord(S2[counter]);
  264. if (c1 = 0) or (c2 = 0) then break;
  265. if c1 <> c2 then
  266. begin
  267. if c1 in [97..122] then
  268. Dec(c1, 32);
  269. if c2 in [97..122] then
  270. Dec(c2, 32);
  271. if c1 <> c2 then
  272. Break;
  273. end;
  274. Inc(counter);
  275. until counter >= Len;
  276. result := c1 - c2;
  277. end;
  278. function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
  279. begin
  280. Result := InitValue;
  281. while KeyLen <> 0 do
  282. begin
  283. Result := Result * $F4243 xor ord(Key^);
  284. Inc(Key);
  285. Dec(KeyLen);
  286. end;
  287. end;
  288. function KeyCompare(const Key1: WideString; Key2: Pointer; Key2Len: Integer): Boolean;
  289. begin
  290. Result := (Length(Key1)=Key2Len) and (CompareWord(Pointer(Key1)^, Key2^, Key2Len) = 0);
  291. end;
  292. { THashTable }
  293. constructor THashTable.Create(InitSize: Integer; OwnObjects: Boolean);
  294. var
  295. I: Integer;
  296. begin
  297. inherited Create;
  298. FOwnsObjects := OwnObjects;
  299. I := 256;
  300. while I < InitSize do I := I shl 1;
  301. FBucketCount := I;
  302. FBucket := AllocMem(I * sizeof(PHashItem));
  303. end;
  304. destructor THashTable.Destroy;
  305. begin
  306. Clear;
  307. FreeMem(FBucket);
  308. inherited Destroy;
  309. end;
  310. procedure THashTable.Clear;
  311. var
  312. I: Integer;
  313. item, next: PHashItem;
  314. begin
  315. for I := 0 to FBucketCount-1 do
  316. begin
  317. item := FBucket[I];
  318. while Assigned(item) do
  319. begin
  320. next := item^.Next;
  321. if FOwnsObjects then
  322. item^.Data.Free;
  323. Dispose(item);
  324. item := next;
  325. end;
  326. end;
  327. FillChar(FBucket^, FBucketCount * sizeof(PHashItem), 0);
  328. end;
  329. function THashTable.Find(Key: PWideChar; KeyLen: Integer): PHashItem;
  330. var
  331. Dummy: Boolean;
  332. begin
  333. Result := Lookup(Key, KeyLen, Dummy, False);
  334. end;
  335. function THashTable.FindOrAdd(Key: PWideChar; KeyLen: Integer;
  336. var Found: Boolean): PHashItem;
  337. begin
  338. Result := Lookup(Key, KeyLen, Found, True);
  339. end;
  340. function THashTable.FindOrAdd(Key: PWideChar; KeyLen: Integer): PHashItem;
  341. var
  342. Dummy: Boolean;
  343. begin
  344. Result := Lookup(Key, KeyLen, Dummy, True);
  345. end;
  346. function THashTable.Get(Key: PWideChar; KeyLen: Integer): TObject;
  347. var
  348. e: PHashItem;
  349. Dummy: Boolean;
  350. begin
  351. e := Lookup(Key, KeyLen, Dummy, False);
  352. if Assigned(e) then
  353. Result := e^.Data
  354. else
  355. Result := nil;
  356. end;
  357. function THashTable.Lookup(Key: PWideChar; KeyLength: Integer;
  358. var Found: Boolean; CanCreate: Boolean): PHashItem;
  359. var
  360. Entry: PPHashItem;
  361. h: LongWord;
  362. begin
  363. h := Hash(0, Key, KeyLength);
  364. Entry := @FBucket[h mod FBucketCount];
  365. while Assigned(Entry^) and not ((Entry^^.HashValue = h) and KeyCompare(Entry^^.Key, Key, KeyLength) ) do
  366. Entry := @Entry^^.Next;
  367. Found := Assigned(Entry^);
  368. if Found or (not CanCreate) then
  369. begin
  370. Result := Entry^;
  371. Exit;
  372. end;
  373. if FCount > FBucketCount then { arbitrary limit, probably too high }
  374. begin
  375. Resize(FBucketCount * 2);
  376. Result := Lookup(Key, KeyLength, Found, CanCreate);
  377. end
  378. else
  379. begin
  380. New(Result);
  381. SetString(Result^.Key, Key, KeyLength);
  382. Result^.HashValue := h;
  383. Result^.Data := nil;
  384. Result^.Next := nil;
  385. Inc(FCount);
  386. Entry^ := Result;
  387. end;
  388. end;
  389. procedure THashTable.Resize(NewCapacity: LongWord);
  390. var
  391. p, chain: PPHashItem;
  392. i: Integer;
  393. e, n: PHashItem;
  394. begin
  395. p := AllocMem(NewCapacity * sizeof(PHashItem));
  396. for i := 0 to FBucketCount-1 do
  397. begin
  398. e := FBucket[i];
  399. while Assigned(e) do
  400. begin
  401. chain := @p[e^.HashValue mod NewCapacity];
  402. n := e^.Next;
  403. e^.Next := chain^;
  404. chain^ := e;
  405. e := n;
  406. end;
  407. end;
  408. FBucketCount := NewCapacity;
  409. FreeMem(FBucket);
  410. FBucket := p;
  411. end;
  412. function THashTable.Remove(Entry: PHashItem): Boolean;
  413. var
  414. chain: PPHashItem;
  415. begin
  416. chain := @FBucket[Entry^.HashValue mod FBucketCount];
  417. while Assigned(chain^) do
  418. begin
  419. if chain^ = Entry then
  420. begin
  421. chain^ := Entry^.Next;
  422. if FOwnsObjects then
  423. Entry^.Data.Free;
  424. Dispose(Entry);
  425. Dec(FCount);
  426. Result := True;
  427. Exit;
  428. end;
  429. chain := @chain^^.Next;
  430. end;
  431. Result := False;
  432. end;
  433. procedure THashTable.ForEach(proc: THashForEach; arg: Pointer);
  434. var
  435. i: Integer;
  436. e: PHashItem;
  437. begin
  438. for i := 0 to FBucketCount-1 do
  439. begin
  440. e := FBucket[i];
  441. while Assigned(e) do
  442. begin
  443. if not proc(e, arg) then
  444. Exit;
  445. e := e^.Next;
  446. end;
  447. end;
  448. end;
  449. initialization
  450. finalization
  451. if Assigned(Xml11Pg) then
  452. FreeMem(Xml11Pg);
  453. end.