xmlutils.pp 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132
  1. {
  2. This file is part of the Free Component Library
  3. XML utility routines.
  4. Copyright (c) 2006 by Sergei Gorelkin, [email protected]
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. unit xmlutils;
  12. {$ifdef fpc}{$mode objfpc}{$endif}
  13. {$H+}
  14. interface
  15. uses
  16. SysUtils, Classes;
  17. type
  18. TXMLVersion = (xmlVersionUnknown, xmlVersion10, xmlVersion11);
  19. TSetOfChar = set of Char;
  20. XMLString = WideString;
  21. PXMLString = ^XMLString;
  22. PXMLChar = PWideChar;
  23. function IsXmlName(const Value: XMLString; Xml11: Boolean = False): Boolean; overload;
  24. function IsXmlName(Value: PWideChar; Len: Integer; Xml11: Boolean = False): Boolean; overload;
  25. function IsXmlNames(const Value: XMLString; Xml11: Boolean = False): Boolean;
  26. function IsXmlNmToken(const Value: XMLString; Xml11: Boolean = False): Boolean;
  27. function IsXmlNmTokens(const Value: XMLString; Xml11: Boolean = False): Boolean;
  28. function IsValidXmlEncoding(const Value: XMLString): Boolean;
  29. procedure NormalizeSpaces(var Value: XMLString);
  30. function IsXmlWhiteSpace(c: WideChar): Boolean;
  31. function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
  32. { beware, works in ASCII range only }
  33. function WStrLIComp(S1, S2: PWideChar; Len: Integer): Integer;
  34. procedure WStrLower(var S: XMLString);
  35. const
  36. xmlVersionStr: array[TXMLVersion] of XMLString = ('', '1.0', '1.1');
  37. // URIs of predefined namespaces
  38. stduri_xml: XMLString = 'http://www.w3.org/XML/1998/namespace';
  39. stduri_xmlns: XMLString = 'http://www.w3.org/2000/xmlns/';
  40. type
  41. TXMLNodeType = (ntNone, ntElement, ntAttribute, ntText,
  42. ntCDATA, ntEntityReference, ntEntity, ntProcessingInstruction,
  43. ntComment, ntDocument, ntDocumentType, ntDocumentFragment,
  44. ntNotation,
  45. ntWhitespace,
  46. ntSignificantWhitespace,
  47. ntEndElement,
  48. ntEndEntity,
  49. ntXmlDeclaration
  50. );
  51. TAttrDataType = (
  52. dtCdata,
  53. dtId,
  54. dtIdRef,
  55. dtIdRefs,
  56. dtEntity,
  57. dtEntities,
  58. dtNmToken,
  59. dtNmTokens,
  60. dtNotation
  61. );
  62. { a simple hash table with WideString keys }
  63. type
  64. {$ifndef fpc}
  65. PtrInt = LongInt;
  66. TFPList = TList;
  67. {$endif}
  68. PPHashItem = ^PHashItem;
  69. PHashItem = ^THashItem;
  70. THashItem = record
  71. Key: XMLString;
  72. HashValue: LongWord;
  73. Next: PHashItem;
  74. Data: TObject;
  75. end;
  76. THashItemArray = array[0..MaxInt div sizeof(Pointer)-1] of PHashItem;
  77. PHashItemArray = ^THashItemArray;
  78. THashForEach = function(Entry: PHashItem; arg: Pointer): Boolean;
  79. THashTable = class(TObject)
  80. private
  81. FCount: LongWord;
  82. FBucketCount: LongWord;
  83. FBucket: PHashItemArray;
  84. FOwnsObjects: Boolean;
  85. function Lookup(Key: PWideChar; KeyLength: Integer; out Found: Boolean; CanCreate: Boolean): PHashItem;
  86. procedure Resize(NewCapacity: LongWord);
  87. public
  88. constructor Create(InitSize: Integer; OwnObjects: Boolean);
  89. destructor Destroy; override;
  90. procedure Clear;
  91. function Find(Key: PWideChar; KeyLen: Integer): PHashItem;
  92. function FindOrAdd(Key: PWideChar; KeyLen: Integer; var Found: Boolean): PHashItem; overload;
  93. function FindOrAdd(Key: PWideChar; KeyLen: Integer): PHashItem; overload;
  94. function FindOrAdd(const Key: XMLString): PHashItem; overload;
  95. function Get(Key: PWideChar; KeyLen: Integer): TObject;
  96. function Remove(Entry: PHashItem): Boolean;
  97. function RemoveData(aData: TObject): Boolean;
  98. procedure ForEach(proc: THashForEach; arg: Pointer);
  99. property Count: LongWord read FCount;
  100. end;
  101. { another hash, for detecting duplicate namespaced attributes without memory allocations }
  102. TExpHashEntry = record
  103. rev: LongWord;
  104. hash: LongWord;
  105. uriPtr: Pointer;
  106. lname: PWideChar;
  107. lnameLen: Integer;
  108. end;
  109. TExpHashEntryArray = array[0..MaxInt div sizeof(TExpHashEntry)-1] of TExpHashEntry;
  110. PExpHashEntryArray = ^TExpHashEntryArray;
  111. TDblHashArray = class(TObject)
  112. private
  113. FSizeLog: Integer;
  114. FRevision: LongWord;
  115. FData: PExpHashEntryArray;
  116. public
  117. procedure Init(NumSlots: Integer);
  118. function Locate(uri: Pointer; localName: PWideChar; localLength: Integer): Boolean;
  119. destructor Destroy; override;
  120. end;
  121. { Source location. This may be augmented with ByteOffset, UTF8Offset, etc. }
  122. TLocation = record
  123. Line: Integer;
  124. LinePos: Integer;
  125. end;
  126. IXmlLineInfo = interface(IInterface)['{FD0A892B-B26C-4954-9995-103B2A9D178A}']
  127. function GetHasLineInfo: Boolean;
  128. function GetLineNumber: Integer;
  129. function GetLinePosition: Integer;
  130. property HasLineInfo: Boolean read GetHasLineInfo;
  131. property LineNumber: Integer read GetLineNumber;
  132. property LinePosition: Integer read GetLinePosition;
  133. end;
  134. { generic node info record, shared between DOM and reader }
  135. PNodeData = ^TNodeData;
  136. TNodeData = record
  137. FNext: PNodeData;
  138. FQName: PHashItem;
  139. FPrefix: PHashItem;
  140. FNsUri: PHashItem;
  141. FColonPos: Integer;
  142. FTypeInfo: TObject;
  143. FLoc: TLocation;
  144. FLoc2: TLocation; // for attributes: start of value
  145. FIDEntry: PHashItem; // ID attributes: entry in ID map
  146. FNodeType: TXMLNodeType;
  147. FValueStr: XMLString;
  148. FValueStart: PWideChar;
  149. FValueLength: Integer;
  150. FIsDefault: Boolean;
  151. FDenormalized: Boolean; // Whether attribute value changes by normalization
  152. end;
  153. { TNSSupport provides tracking of prefix-uri pairs and namespace fixup for writer }
  154. TBinding = class
  155. public
  156. uri: PHashItem;
  157. next: TBinding;
  158. prevPrefixBinding: TObject;
  159. Prefix: PHashItem;
  160. end;
  161. TAttributeAction = (
  162. aaUnchanged,
  163. aaPrefix, // only override the prefix
  164. aaBoth // override prefix and emit namespace definition
  165. );
  166. TNSSupport = class(TObject)
  167. private
  168. FNameTable: THashTable;
  169. FNesting: Integer;
  170. FPrefixSeqNo: Integer;
  171. FFreeBindings: TBinding;
  172. FBindings: TFPList;
  173. FBindingStack: array of TBinding;
  174. FPrefixes: THashTable;
  175. FDefaultPrefix: THashItem;
  176. public
  177. constructor Create(aNameTable: THashTable);
  178. destructor Destroy; override;
  179. procedure DefineBinding(const Prefix, nsURI: XMLString; out Binding: TBinding);
  180. function CheckAttribute(const Prefix, nsURI: XMLString;
  181. out Binding: TBinding): TAttributeAction;
  182. function GetPrefix(P: PWideChar; Len: Integer): PHashItem;
  183. function BindPrefix(nsURI, aPrefix: PHashItem): TBinding;
  184. function DefaultNSBinding: TBinding;
  185. function LookupNamespace(const APrefix: XMLString): XMLString;
  186. procedure PushScope;
  187. function PopScope: Boolean;
  188. end;
  189. { Buffer builder, used to compose long strings without too much memory allocations }
  190. PWideCharBuf = ^TWideCharBuf;
  191. TWideCharBuf = record
  192. Buffer: PWideChar;
  193. Length: Integer;
  194. MaxLength: Integer;
  195. end;
  196. procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
  197. procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
  198. procedure BufAppendChunk(var ABuf: TWideCharBuf; pstart, pend: PWideChar);
  199. procedure BufAppendString(var ABuf: TWideCharBuf; const AValue: XMLString);
  200. function BufEquals(const ABuf: TWideCharBuf; const Arg: XMLString): Boolean;
  201. procedure BufNormalize(var Buf: TWideCharBuf; out Modified: Boolean);
  202. { Built-in decoder functions for UTF-8, UTF-16 and ISO-8859-1 }
  203. function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  204. function Decode_UCS2_Swapped(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  205. function Decode_UTF8(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  206. function Decode_8859_1(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  207. {$i names.inc}
  208. implementation
  209. function IsXml11Char(Value: PWideChar; var Index: Integer): Boolean; overload;
  210. begin
  211. if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
  212. begin
  213. Inc(Index);
  214. Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
  215. end
  216. else
  217. Result := False;
  218. end;
  219. function IsXml11Char(const Value: XMLString; var Index: Integer): Boolean; overload;
  220. begin
  221. if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
  222. begin
  223. Inc(Index);
  224. Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
  225. end
  226. else
  227. Result := False;
  228. end;
  229. function IsXmlName(const Value: XMLString; Xml11: Boolean): Boolean;
  230. begin
  231. Result := IsXmlName(PWideChar(Value), Length(Value), Xml11);
  232. end;
  233. function IsXmlName(Value: PWideChar; Len: Integer; Xml11: Boolean = False): Boolean;
  234. var
  235. I: Integer;
  236. begin
  237. Result := False;
  238. I := 0;
  239. if (Len = 0) or not ((Byte(Value[I]) in NamingBitmap[NamePages[hi(Word(Value[I]))]]) or
  240. (Value[I] = ':') or IsXml11Char(Value, I)) then
  241. Exit;
  242. Inc(I);
  243. while I < Len do
  244. begin
  245. if not ((Byte(Value[I]) in NamingBitmap[NamePages[$100+hi(Word(Value[I]))]]) or
  246. (Value[I] = ':') or IsXml11Char(Value, I)) then
  247. Exit;
  248. Inc(I);
  249. end;
  250. Result := True;
  251. end;
  252. function IsXmlNames(const Value: XMLString; Xml11: Boolean): Boolean;
  253. var
  254. I: Integer;
  255. Offset: Integer;
  256. begin
  257. Result := False;
  258. if Value = '' then
  259. Exit;
  260. I := 1;
  261. Offset := 0;
  262. while I <= Length(Value) do
  263. begin
  264. if not ((Byte(Value[I]) in NamingBitmap[NamePages[Offset+hi(Word(Value[I]))]]) or
  265. (Value[I] = ':') or IsXml11Char(Value, I)) then
  266. begin
  267. if (I = Length(Value)) or (Value[I] <> #32) then
  268. Exit;
  269. Offset := 0;
  270. Inc(I);
  271. Continue;
  272. end;
  273. Offset := $100;
  274. Inc(I);
  275. end;
  276. Result := True;
  277. end;
  278. function IsXmlNmToken(const Value: XMLString; Xml11: Boolean): Boolean;
  279. var
  280. I: Integer;
  281. begin
  282. Result := False;
  283. if Value = '' then
  284. Exit;
  285. I := 1;
  286. while I <= Length(Value) do
  287. begin
  288. if not ((Byte(Value[I]) in NamingBitmap[NamePages[$100+hi(Word(Value[I]))]]) or
  289. (Value[I] = ':') or IsXml11Char(Value, I)) then
  290. Exit;
  291. Inc(I);
  292. end;
  293. Result := True;
  294. end;
  295. function IsXmlNmTokens(const Value: XMLString; Xml11: Boolean): Boolean;
  296. var
  297. I: Integer;
  298. begin
  299. I := 1;
  300. Result := False;
  301. if Value = '' then
  302. Exit;
  303. while I <= Length(Value) do
  304. begin
  305. if not ((Byte(Value[I]) in NamingBitmap[NamePages[$100+hi(Word(Value[I]))]]) or
  306. (Value[I] = ':') or IsXml11Char(Value, I)) then
  307. begin
  308. if (I = Length(Value)) or (Value[I] <> #32) then
  309. Exit;
  310. end;
  311. Inc(I);
  312. end;
  313. Result := True;
  314. end;
  315. function IsValidXmlEncoding(const Value: XMLString): Boolean;
  316. var
  317. I: Integer;
  318. begin
  319. Result := False;
  320. if (Value = '') or (Value[1] > #255) or not (char(ord(Value[1])) in ['A'..'Z', 'a'..'z']) then
  321. Exit;
  322. for I := 2 to Length(Value) do
  323. if (Value[I] > #255) or not (char(ord(Value[I])) in ['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']) then
  324. Exit;
  325. Result := True;
  326. end;
  327. procedure NormalizeSpaces(var Value: XMLString);
  328. var
  329. I, J: Integer;
  330. begin
  331. I := Length(Value);
  332. // speed: trim only whed needed
  333. if (I > 0) and ((Value[1] = #32) or (Value[I] = #32)) then
  334. Value := Trim(Value);
  335. I := 1;
  336. while I < Length(Value) do
  337. begin
  338. if Value[I] = #32 then
  339. begin
  340. J := I+1;
  341. while (J <= Length(Value)) and (Value[J] = #32) do Inc(J);
  342. if J-I > 1 then Delete(Value, I+1, J-I-1);
  343. end;
  344. Inc(I);
  345. end;
  346. end;
  347. function IsXmlWhiteSpace(c: WideChar): Boolean;
  348. begin
  349. Result := (c = #32) or (c = #9) or (c = #10) or (c = #13);
  350. end;
  351. function WStrLIComp(S1, S2: PWideChar; Len: Integer): Integer;
  352. var
  353. counter: Integer;
  354. c1, c2: Word;
  355. begin
  356. counter := 0;
  357. result := 0;
  358. if Len = 0 then
  359. exit;
  360. repeat
  361. c1 := ord(S1[counter]);
  362. c2 := ord(S2[counter]);
  363. if (c1 = 0) or (c2 = 0) then break;
  364. if c1 <> c2 then
  365. begin
  366. if c1 in [97..122] then
  367. Dec(c1, 32);
  368. if c2 in [97..122] then
  369. Dec(c2, 32);
  370. if c1 <> c2 then
  371. Break;
  372. end;
  373. Inc(counter);
  374. until counter >= Len;
  375. result := c1 - c2;
  376. end;
  377. procedure WStrLower(var S: XMLString);
  378. var
  379. i: Integer;
  380. begin
  381. for i := 1 to Length(S) do
  382. if (S[i] >= 'A') and (S[i] <= 'Z') then
  383. Inc(word(S[i]), 32);
  384. end;
  385. function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
  386. begin
  387. Result := InitValue;
  388. while KeyLen <> 0 do
  389. begin
  390. {$push}{$r-}{$q-}
  391. Result := Result * $F4243 xor ord(Key^);
  392. {$pop}
  393. Inc(Key);
  394. Dec(KeyLen);
  395. end;
  396. end;
  397. function KeyCompare(const Key1: XMLString; Key2: Pointer; Key2Len: Integer): Boolean;
  398. begin
  399. Result := (Length(Key1)=Key2Len) and CompareMem(Pointer(Key1), Key2, Key2Len*2);
  400. end;
  401. { THashTable }
  402. constructor THashTable.Create(InitSize: Integer; OwnObjects: Boolean);
  403. var
  404. I: Integer;
  405. begin
  406. inherited Create;
  407. FOwnsObjects := OwnObjects;
  408. I := 256;
  409. while I < InitSize do I := I shl 1;
  410. FBucketCount := I;
  411. FBucket := AllocMem(I * sizeof(PHashItem));
  412. end;
  413. destructor THashTable.Destroy;
  414. begin
  415. Clear;
  416. FreeMem(FBucket);
  417. inherited Destroy;
  418. end;
  419. procedure THashTable.Clear;
  420. var
  421. I: Integer;
  422. item, next: PHashItem;
  423. begin
  424. for I := 0 to FBucketCount-1 do
  425. begin
  426. item := FBucket^[I];
  427. while Assigned(item) do
  428. begin
  429. next := item^.Next;
  430. if FOwnsObjects then
  431. item^.Data.Free;
  432. Dispose(item);
  433. item := next;
  434. end;
  435. FBucket^[I] := nil;
  436. end;
  437. end;
  438. function THashTable.Find(Key: PWideChar; KeyLen: Integer): PHashItem;
  439. var
  440. Dummy: Boolean;
  441. begin
  442. Result := Lookup(Key, KeyLen, Dummy, False);
  443. end;
  444. function THashTable.FindOrAdd(Key: PWideChar; KeyLen: Integer;
  445. var Found: Boolean): PHashItem;
  446. begin
  447. Result := Lookup(Key, KeyLen, Found, True);
  448. end;
  449. function THashTable.FindOrAdd(Key: PWideChar; KeyLen: Integer): PHashItem;
  450. var
  451. Dummy: Boolean;
  452. begin
  453. Result := Lookup(Key, KeyLen, Dummy, True);
  454. end;
  455. function THashTable.FindOrAdd(const Key: XMLString): PHashItem;
  456. var
  457. Dummy: Boolean;
  458. begin
  459. Result := Lookup(PWideChar(Key), Length(Key), Dummy, True);
  460. end;
  461. function THashTable.Get(Key: PWideChar; KeyLen: Integer): TObject;
  462. var
  463. e: PHashItem;
  464. Dummy: Boolean;
  465. begin
  466. e := Lookup(Key, KeyLen, Dummy, False);
  467. if Assigned(e) then
  468. Result := e^.Data
  469. else
  470. Result := nil;
  471. end;
  472. function THashTable.Lookup(Key: PWideChar; KeyLength: Integer;
  473. out Found: Boolean; CanCreate: Boolean): PHashItem;
  474. var
  475. Entry: PPHashItem;
  476. h: LongWord;
  477. begin
  478. h := Hash(0, Key, KeyLength);
  479. Entry := @FBucket^[h mod FBucketCount];
  480. while Assigned(Entry^) and not ((Entry^^.HashValue = h) and KeyCompare(Entry^^.Key, Key, KeyLength) ) do
  481. Entry := @Entry^^.Next;
  482. Found := Assigned(Entry^);
  483. if Found or (not CanCreate) then
  484. begin
  485. Result := Entry^;
  486. Exit;
  487. end;
  488. if FCount > FBucketCount then { arbitrary limit, probably too high }
  489. begin
  490. Resize(FBucketCount * 2);
  491. Result := Lookup(Key, KeyLength, Found, CanCreate);
  492. end
  493. else
  494. begin
  495. New(Result);
  496. // SetString for WideStrings trims on zero chars [fixed, #14740]
  497. SetLength(Result^.Key, KeyLength);
  498. Move(Key^, Pointer(Result^.Key)^, KeyLength*sizeof(WideChar));
  499. Result^.HashValue := h;
  500. Result^.Data := nil;
  501. Result^.Next := nil;
  502. Inc(FCount);
  503. Entry^ := Result;
  504. end;
  505. end;
  506. procedure THashTable.Resize(NewCapacity: LongWord);
  507. var
  508. p: PHashItemArray;
  509. chain: PPHashItem;
  510. i: Integer;
  511. e, n: PHashItem;
  512. begin
  513. p := AllocMem(NewCapacity * sizeof(PHashItem));
  514. for i := 0 to FBucketCount-1 do
  515. begin
  516. e := FBucket^[i];
  517. while Assigned(e) do
  518. begin
  519. chain := @p^[e^.HashValue mod NewCapacity];
  520. n := e^.Next;
  521. e^.Next := chain^;
  522. chain^ := e;
  523. e := n;
  524. end;
  525. end;
  526. FBucketCount := NewCapacity;
  527. FreeMem(FBucket);
  528. FBucket := p;
  529. end;
  530. function THashTable.Remove(Entry: PHashItem): Boolean;
  531. var
  532. chain: PPHashItem;
  533. begin
  534. chain := @FBucket^[Entry^.HashValue mod FBucketCount];
  535. while Assigned(chain^) do
  536. begin
  537. if chain^ = Entry then
  538. begin
  539. chain^ := Entry^.Next;
  540. if FOwnsObjects then
  541. Entry^.Data.Free;
  542. Dispose(Entry);
  543. Dec(FCount);
  544. Result := True;
  545. Exit;
  546. end;
  547. chain := @chain^^.Next;
  548. end;
  549. Result := False;
  550. end;
  551. // this does not free the aData object
  552. function THashTable.RemoveData(aData: TObject): Boolean;
  553. var
  554. i: Integer;
  555. chain: PPHashItem;
  556. e: PHashItem;
  557. begin
  558. for i := 0 to FBucketCount-1 do
  559. begin
  560. chain := @FBucket^[i];
  561. while Assigned(chain^) do
  562. begin
  563. if chain^^.Data = aData then
  564. begin
  565. e := chain^;
  566. chain^ := e^.Next;
  567. Dispose(e);
  568. Dec(FCount);
  569. Result := True;
  570. Exit;
  571. end;
  572. chain := @chain^^.Next;
  573. end;
  574. end;
  575. Result := False;
  576. end;
  577. procedure THashTable.ForEach(proc: THashForEach; arg: Pointer);
  578. var
  579. i: Integer;
  580. e: PHashItem;
  581. begin
  582. for i := 0 to FBucketCount-1 do
  583. begin
  584. e := FBucket^[i];
  585. while Assigned(e) do
  586. begin
  587. if not proc(e, arg) then
  588. Exit;
  589. e := e^.Next;
  590. end;
  591. end;
  592. end;
  593. { TDblHashArray }
  594. destructor TDblHashArray.Destroy;
  595. begin
  596. FreeMem(FData);
  597. inherited Destroy;
  598. end;
  599. procedure TDblHashArray.Init(NumSlots: Integer);
  600. var
  601. i: Integer;
  602. begin
  603. if ((NumSlots * 2) shr FSizeLog) <> 0 then // need at least twice more entries, and no less than 8
  604. begin
  605. FSizeLog := 3;
  606. while (NumSlots shr FSizeLog) <> 0 do
  607. Inc(FSizeLog);
  608. ReallocMem(FData, (1 shl FSizeLog) * sizeof(TExpHashEntry));
  609. FRevision := 0;
  610. end;
  611. if FRevision = 0 then
  612. begin
  613. FRevision := $FFFFFFFF;
  614. for i := (1 shl FSizeLog)-1 downto 0 do
  615. FData^[i].rev := FRevision;
  616. end;
  617. Dec(FRevision);
  618. end;
  619. function TDblHashArray.Locate(uri: Pointer; localName: PWideChar; localLength: Integer): Boolean;
  620. var
  621. step: Byte;
  622. mask: LongWord;
  623. idx: Integer;
  624. HashValue: LongWord;
  625. begin
  626. HashValue := Hash(LongWord(PtrUInt(uri)), localName, localLength);
  627. mask := (1 shl FSizeLog) - 1;
  628. step := (HashValue and (not mask)) shr (FSizeLog-1) and (mask shr 2) or 1;
  629. idx := HashValue and mask;
  630. result := True;
  631. while FData^[idx].rev = FRevision do
  632. begin
  633. if (HashValue = FData^[idx].hash) and (FData^[idx].uriPtr = uri) and
  634. (FData^[idx].lnameLen = localLength) and
  635. CompareMem(FData^[idx].lname, localName, localLength * sizeof(WideChar)) then
  636. Exit;
  637. if idx < step then
  638. Inc(idx, (1 shl FSizeLog) - step)
  639. else
  640. Dec(idx, step);
  641. end;
  642. with FData^[idx] do
  643. begin
  644. rev := FRevision;
  645. hash := HashValue;
  646. uriPtr := uri;
  647. lname := localName;
  648. lnameLen := localLength;
  649. end;
  650. result := False;
  651. end;
  652. { TNSSupport }
  653. constructor TNSSupport.Create(aNameTable: THashTable);
  654. var
  655. b: TBinding;
  656. begin
  657. inherited Create;
  658. FNameTable := aNameTable;
  659. FPrefixes := THashTable.Create(16, False);
  660. FBindings := TFPList.Create;
  661. SetLength(FBindingStack, 16);
  662. { provide implicit binding for the 'xml' prefix }
  663. DefineBinding('xml', stduri_xml, b);
  664. end;
  665. destructor TNSSupport.Destroy;
  666. var
  667. I: Integer;
  668. begin
  669. for I := FBindings.Count-1 downto 0 do
  670. TObject(FBindings.List^[I]).Free;
  671. FBindings.Free;
  672. FPrefixes.Free;
  673. inherited Destroy;
  674. end;
  675. function TNSSupport.BindPrefix(nsURI, aPrefix: PHashItem): TBinding;
  676. begin
  677. { try to reuse an existing binding }
  678. result := FFreeBindings;
  679. if Assigned(result) then
  680. FFreeBindings := result.Next
  681. else { no free bindings, create a new one }
  682. begin
  683. result := TBinding.Create;
  684. FBindings.Add(result);
  685. end;
  686. { link it into chain of bindings at the current element level }
  687. result.Next := FBindingStack[FNesting];
  688. FBindingStack[FNesting] := result;
  689. { bind }
  690. result.uri := nsURI;
  691. result.Prefix := aPrefix;
  692. result.PrevPrefixBinding := aPrefix^.Data;
  693. aPrefix^.Data := result;
  694. end;
  695. function TNSSupport.DefaultNSBinding: TBinding;
  696. begin
  697. result := TBinding(FDefaultPrefix.Data);
  698. end;
  699. procedure TNSSupport.DefineBinding(const Prefix, nsURI: XMLString;
  700. out Binding: TBinding);
  701. var
  702. Pfx, uri: PHashItem;
  703. begin
  704. Pfx := @FDefaultPrefix;
  705. if (nsURI <> '') and (Prefix <> '') then
  706. Pfx := FPrefixes.FindOrAdd(PWideChar(Prefix), Length(Prefix));
  707. uri := FNameTable.FindOrAdd(PWideChar(nsURI),Length(nsURI));
  708. if (Pfx^.Data = nil) or (TBinding(Pfx^.Data).uri <> uri) then
  709. Binding := BindPrefix(uri, Pfx)
  710. else
  711. Binding := nil;
  712. end;
  713. function TNSSupport.CheckAttribute(const Prefix, nsURI: XMLString;
  714. out Binding: TBinding): TAttributeAction;
  715. var
  716. Pfx: PHashItem;
  717. I: Integer;
  718. b: TBinding;
  719. buf: array[0..31] of WideChar;
  720. p: PWideChar;
  721. uri: PHashItem;
  722. begin
  723. Binding := nil;
  724. Pfx := nil;
  725. Result := aaUnchanged;
  726. if Prefix <> '' then
  727. Pfx := FPrefixes.FindOrAdd(PWideChar(Prefix), Length(Prefix))
  728. else if nsURI = '' then
  729. Exit;
  730. uri := FNameTable.FindOrAdd(PWideChar(nsURI), Length(nsURI));
  731. { if the prefix is already bound to correct URI, we're done }
  732. if Assigned(Pfx) and Assigned(Pfx^.Data) and (TBinding(Pfx^.Data).uri = uri) then
  733. Exit;
  734. { see if there's another prefix bound to the target URI }
  735. // TODO: should use something faster than linear search
  736. for i := FNesting downto 0 do
  737. begin
  738. b := FBindingStack[i];
  739. while Assigned(b) do
  740. begin
  741. if (b.uri = uri) and (b.Prefix <> @FDefaultPrefix) then
  742. begin
  743. Binding := b; // found one -> override the attribute's prefix
  744. Result := aaPrefix;
  745. Exit;
  746. end;
  747. b := b.Next;
  748. end;
  749. end;
  750. { no prefix, or bound (to wrong URI) -> use generated prefix instead }
  751. if (Pfx = nil) or Assigned(Pfx^.Data) then
  752. repeat
  753. Inc(FPrefixSeqNo);
  754. i := FPrefixSeqNo; // This is just 'NS'+IntToStr(FPrefixSeqNo);
  755. p := @Buf[high(Buf)]; // done without using strings
  756. while i <> 0 do
  757. begin
  758. p^ := WideChar(i mod 10+ord('0'));
  759. dec(p);
  760. i := i div 10;
  761. end;
  762. p^ := 'S'; dec(p);
  763. p^ := 'N';
  764. Pfx := FPrefixes.FindOrAdd(p, @Buf[high(Buf)]-p+1);
  765. until Pfx^.Data = nil;
  766. Binding := BindPrefix(uri, Pfx);
  767. Result := aaBoth;
  768. end;
  769. function TNSSupport.GetPrefix(P: PWideChar; Len: Integer): PHashItem;
  770. begin
  771. if Assigned(P) and (Len > 0) then
  772. Result := FPrefixes.FindOrAdd(P, Len)
  773. else
  774. Result := @FDefaultPrefix;
  775. end;
  776. function TNSSupport.LookupNamespace(const APrefix: XMLString): XMLString;
  777. var
  778. prefixatom: PHashItem;
  779. b: TBinding;
  780. begin
  781. prefixatom := GetPrefix(PWideChar(APrefix),Length(APrefix));
  782. b := TBinding(prefixatom^.Data);
  783. if Assigned(b) and Assigned(b.Uri) then
  784. result := b.Uri^.Key
  785. else
  786. result := '';
  787. end;
  788. procedure TNSSupport.PushScope;
  789. begin
  790. Inc(FNesting);
  791. if FNesting >= Length(FBindingStack) then
  792. SetLength(FBindingStack, FNesting * 2);
  793. end;
  794. function TNSSupport.PopScope: Boolean;
  795. var
  796. b, temp: TBinding;
  797. begin
  798. { don't unbind prefixes declared before the first call to PushScope }
  799. Result := FNesting > 0;
  800. if not Result then
  801. Exit;
  802. temp := FBindingStack[FNesting];
  803. while Assigned(temp) do
  804. begin
  805. b := temp;
  806. temp := b.next;
  807. b.next := FFreeBindings;
  808. FFreeBindings := b;
  809. b.Prefix^.Data := b.prevPrefixBinding;
  810. end;
  811. FBindingStack[FNesting] := nil;
  812. Dec(FNesting);
  813. end;
  814. { Buffer builder utils }
  815. procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
  816. begin
  817. ABuffer.MaxLength := ALength;
  818. ABuffer.Length := 0;
  819. ABuffer.Buffer := AllocMem(ABuffer.MaxLength*SizeOf(WideChar));
  820. end;
  821. procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
  822. begin
  823. if ABuffer.Length >= ABuffer.MaxLength then
  824. begin
  825. ReallocMem(ABuffer.Buffer, ABuffer.MaxLength * 2 * SizeOf(WideChar));
  826. FillChar(ABuffer.Buffer[ABuffer.MaxLength], ABuffer.MaxLength * SizeOf(WideChar),0);
  827. ABuffer.MaxLength := ABuffer.MaxLength * 2;
  828. end;
  829. ABuffer.Buffer[ABuffer.Length] := wc;
  830. Inc(ABuffer.Length);
  831. end;
  832. procedure BufAppendChunk(var ABuf: TWideCharBuf; pstart, pend: PWideChar);
  833. var
  834. Len: Integer;
  835. begin
  836. Len := PEnd - PStart;
  837. if Len <= 0 then
  838. Exit;
  839. if Len >= ABuf.MaxLength - ABuf.Length then
  840. begin
  841. ABuf.MaxLength := (Len + ABuf.Length)*2;
  842. // note: memory clean isn't necessary here.
  843. // To avoid garbage, control Length field.
  844. ReallocMem(ABuf.Buffer, ABuf.MaxLength * sizeof(WideChar));
  845. end;
  846. Move(pstart^, ABuf.Buffer[ABuf.Length], Len * sizeof(WideChar));
  847. Inc(ABuf.Length, Len);
  848. end;
  849. procedure BufAppendString(var ABuf: TWideCharBuf; const AValue: XMLString);
  850. var
  851. Len: Integer;
  852. begin
  853. Len := Length(AValue);
  854. if Len <= 0 then
  855. Exit;
  856. if Len >= ABuf.MaxLength - ABuf.Length then
  857. begin
  858. ABuf.MaxLength := (Len + ABuf.Length)*2;
  859. ReallocMem(ABuf.Buffer, ABuf.MaxLength * sizeof(WideChar));
  860. end;
  861. Move(PWideChar(AValue)^, ABuf.Buffer[ABuf.Length], Len * sizeof(WideChar));
  862. Inc(ABuf.Length, Len);
  863. end;
  864. function BufEquals(const ABuf: TWideCharBuf; const Arg: XMLString): Boolean;
  865. begin
  866. Result := (ABuf.Length = Length(Arg)) and
  867. CompareMem(ABuf.Buffer, Pointer(Arg), ABuf.Length*sizeof(WideChar));
  868. end;
  869. procedure BufNormalize(var Buf: TWideCharBuf; out Modified: Boolean);
  870. var
  871. Dst, Src: Integer;
  872. begin
  873. Dst := 0;
  874. Src := 0;
  875. // skip leading space if any
  876. while (Src < Buf.Length) and (Buf.Buffer[Src] = ' ') do
  877. Inc(Src);
  878. while Src < Buf.Length do
  879. begin
  880. if Buf.Buffer[Src] = ' ' then
  881. begin
  882. // Dst cannot be 0 here, because leading space is already skipped
  883. if Buf.Buffer[Dst-1] <> ' ' then
  884. begin
  885. Buf.Buffer[Dst] := ' ';
  886. Inc(Dst);
  887. end;
  888. end
  889. else
  890. begin
  891. Buf.Buffer[Dst] := Buf.Buffer[Src];
  892. Inc(Dst);
  893. end;
  894. Inc(Src);
  895. end;
  896. // trailing space (only one possible due to compression)
  897. if (Dst > 0) and (Buf.Buffer[Dst-1] = ' ') then
  898. Dec(Dst);
  899. Modified := Dst <> Buf.Length;
  900. Buf.Length := Dst;
  901. end;
  902. { standard decoders }
  903. function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  904. var
  905. cnt: Cardinal;
  906. begin
  907. cnt := OutCnt; // num of widechars
  908. if cnt > InCnt div sizeof(WideChar) then
  909. cnt := InCnt div sizeof(WideChar);
  910. Move(InBuf^, OutBuf^, cnt * sizeof(WideChar));
  911. Dec(InCnt, cnt*sizeof(WideChar));
  912. Dec(OutCnt, cnt);
  913. Result := cnt;
  914. end;
  915. function Decode_UCS2_Swapped(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  916. var
  917. I: Integer;
  918. cnt: Cardinal;
  919. InPtr: PChar;
  920. begin
  921. cnt := OutCnt; // num of widechars
  922. if cnt > InCnt div sizeof(WideChar) then
  923. cnt := InCnt div sizeof(WideChar);
  924. InPtr := InBuf;
  925. for I := 0 to cnt-1 do
  926. begin
  927. OutBuf[I] := WideChar((ord(InPtr^) shl 8) or ord(InPtr[1]));
  928. Inc(InPtr, 2);
  929. end;
  930. Dec(InCnt, cnt*sizeof(WideChar));
  931. Dec(OutCnt, cnt);
  932. Result := cnt;
  933. end;
  934. function Decode_8859_1(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  935. var
  936. I: Integer;
  937. cnt: Cardinal;
  938. begin
  939. cnt := OutCnt; // num of widechars
  940. if cnt > InCnt then
  941. cnt := InCnt;
  942. for I := 0 to cnt-1 do
  943. OutBuf[I] := WideChar(ord(InBuf[I]));
  944. Dec(InCnt, cnt);
  945. Dec(OutCnt, cnt);
  946. Result := cnt;
  947. end;
  948. function Decode_UTF8(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  949. const
  950. MaxCode: array[1..4] of Cardinal = ($7F, $7FF, $FFFF, $1FFFFF);
  951. var
  952. i, j, bc: Cardinal;
  953. Value: Cardinal;
  954. begin
  955. result := 0;
  956. i := OutCnt;
  957. while (i > 0) and (InCnt > 0) do
  958. begin
  959. bc := 1;
  960. Value := ord(InBuf^);
  961. if Value < $80 then
  962. OutBuf^ := WideChar(Value)
  963. else
  964. begin
  965. if Value < $C2 then
  966. begin
  967. Result := -1;
  968. Break;
  969. end;
  970. Inc(bc);
  971. if Value > $DF then
  972. begin
  973. Inc(bc);
  974. if Value > $EF then
  975. begin
  976. Inc(bc);
  977. if Value > $F7 then // never encountered in the tests.
  978. begin
  979. Result := -1;
  980. Break;
  981. end;
  982. end;
  983. end;
  984. if InCnt < bc then
  985. Break;
  986. j := 1;
  987. while j < bc do
  988. begin
  989. if InBuf[j] in [#$80..#$BF] then
  990. Value := (Value shl 6) or (Cardinal(InBuf[j]) and $3F)
  991. else
  992. begin
  993. Result := -1;
  994. Break;
  995. end;
  996. Inc(j);
  997. end;
  998. Value := Value and MaxCode[bc];
  999. // RFC2279 check
  1000. if Value <= MaxCode[bc-1] then
  1001. begin
  1002. Result := -1;
  1003. Break;
  1004. end;
  1005. case Value of
  1006. 0..$D7FF, $E000..$FFFF: OutBuf^ := WideChar(Value);
  1007. $10000..$10FFFF:
  1008. begin
  1009. if i < 2 then Break;
  1010. OutBuf^ := WideChar($D7C0 + (Value shr 10));
  1011. OutBuf[1] := WideChar($DC00 xor (Value and $3FF));
  1012. Inc(OutBuf); // once here
  1013. Dec(i);
  1014. end
  1015. else
  1016. begin
  1017. Result := -1;
  1018. Break;
  1019. end;
  1020. end;
  1021. end;
  1022. Inc(OutBuf);
  1023. Inc(InBuf, bc);
  1024. Dec(InCnt, bc);
  1025. Dec(i);
  1026. end;
  1027. if Result >= 0 then
  1028. Result := OutCnt-i;
  1029. OutCnt := i;
  1030. end;
  1031. end.