xmlutils.pp 29 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147
  1. {
  2. This file is part of the Free Component Library
  3. XML utility routines.
  4. Copyright (c) 2006 by Sergei Gorelkin, [email protected]
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. unit xmlutils;
  12. {$ifdef fpc}{$mode objfpc}{$endif}
  13. {$H+}
  14. interface
  15. uses
  16. SysUtils, Classes;
  17. type
  18. TXMLVersion = (xmlVersionUnknown, xmlVersion10, xmlVersion11);
  19. TSetOfChar = set of Char;
  20. XMLString = UnicodeString;
  21. PXMLString = ^XMLString;
  22. PXMLChar = PWideChar;
  23. function IsXmlName(const Value: XMLString; Xml11: Boolean = False): Boolean; overload;
  24. function IsXmlName(Value: PWideChar; Len: Integer; Xml11: Boolean = False): Boolean; overload;
  25. function IsXmlNames(const Value: XMLString; Xml11: Boolean = False): Boolean;
  26. function IsXmlNmToken(const Value: XMLString; Xml11: Boolean = False): Boolean;
  27. function IsXmlNmTokens(const Value: XMLString; Xml11: Boolean = False): Boolean;
  28. function IsValidXmlEncoding(const Value: XMLString): Boolean;
  29. procedure NormalizeSpaces(var Value: XMLString);
  30. function IsXmlWhiteSpace(c: WideChar): Boolean;
  31. function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
  32. { beware, works in ASCII range only }
  33. function WStrLIComp(S1, S2: PWideChar; Len: Integer): Integer;
  34. procedure WStrLower(var S: XMLString);
  35. const
  36. xmlVersionStr: array[TXMLVersion] of XMLString = ('', '1.0', '1.1');
  37. // URIs of predefined namespaces
  38. stduri_xml: XMLString = 'http://www.w3.org/XML/1998/namespace';
  39. stduri_xmlns: XMLString = 'http://www.w3.org/2000/xmlns/';
  40. type
  41. TXMLNodeType = (ntNone, ntElement, ntAttribute, ntText,
  42. ntCDATA, ntEntityReference, ntEntity, ntProcessingInstruction,
  43. ntComment, ntDocument, ntDocumentType, ntDocumentFragment,
  44. ntNotation,
  45. ntWhitespace,
  46. ntSignificantWhitespace,
  47. ntEndElement,
  48. ntEndEntity,
  49. ntXmlDeclaration
  50. );
  51. TAttrDataType = (
  52. dtCdata,
  53. dtId,
  54. dtIdRef,
  55. dtIdRefs,
  56. dtEntity,
  57. dtEntities,
  58. dtNmToken,
  59. dtNmTokens,
  60. dtNotation
  61. );
  62. { a simple hash table with WideString keys }
  63. type
  64. {$ifndef fpc}
  65. PtrInt = LongInt;
  66. TFPList = TList;
  67. {$endif}
  68. PPHashItem = ^PHashItem;
  69. PHashItem = ^THashItem;
  70. THashItem = record
  71. Key: XMLString;
  72. HashValue: LongWord;
  73. Next: PHashItem;
  74. Data: TObject;
  75. end;
  76. {$ifdef CPU16}
  77. THashItemArray = array[0..MaxSmallInt div sizeof(Pointer)-1] of PHashItem;
  78. {$else CPU16}
  79. THashItemArray = array[0..MaxInt div sizeof(Pointer)-1] of PHashItem;
  80. {$endif CPU16}
  81. PHashItemArray = ^THashItemArray;
  82. THashForEach = function(Entry: PHashItem; arg: Pointer): Boolean;
  83. THashTable = class(TObject)
  84. private
  85. FCount: LongWord;
  86. FBucketCount: LongWord;
  87. FBucket: PHashItemArray;
  88. FOwnsObjects: Boolean;
  89. function Lookup(Key: PWideChar; KeyLength: Integer; out Found: Boolean; CanCreate: Boolean): PHashItem;
  90. procedure Resize(NewCapacity: LongWord);
  91. public
  92. constructor Create(InitSize: Integer; OwnObjects: Boolean);
  93. destructor Destroy; override;
  94. procedure Clear;
  95. function Find(Key: PWideChar; KeyLen: Integer): PHashItem;
  96. function FindOrAdd(Key: PWideChar; KeyLen: Integer; var Found: Boolean): PHashItem; overload;
  97. function FindOrAdd(Key: PWideChar; KeyLen: Integer): PHashItem; overload;
  98. function FindOrAdd(const Key: XMLString): PHashItem; overload;
  99. function Get(Key: PWideChar; KeyLen: Integer): TObject;
  100. function Remove(Entry: PHashItem): Boolean;
  101. function RemoveData(aData: TObject): Boolean;
  102. procedure ForEach(proc: THashForEach; arg: Pointer);
  103. property Count: LongWord read FCount;
  104. end;
  105. { another hash, for detecting duplicate namespaced attributes without memory allocations }
  106. TExpHashEntry = record
  107. rev: LongWord;
  108. hash: LongWord;
  109. uriPtr: Pointer;
  110. lname: PWideChar;
  111. lnameLen: Integer;
  112. end;
  113. {$ifdef CPU16}
  114. TExpHashEntryArray = array[0..MaxSmallInt div sizeof(TExpHashEntry)-1] of TExpHashEntry;
  115. {$else CPU16}
  116. TExpHashEntryArray = array[0..MaxInt div sizeof(TExpHashEntry)-1] of TExpHashEntry;
  117. {$endif CPU16}
  118. PExpHashEntryArray = ^TExpHashEntryArray;
  119. TDblHashArray = class(TObject)
  120. private
  121. FSizeLog: Integer;
  122. FRevision: LongWord;
  123. FData: PExpHashEntryArray;
  124. public
  125. procedure Init(NumSlots: Integer);
  126. function Locate(uri: Pointer; localName: PWideChar; localLength: Integer): Boolean;
  127. destructor Destroy; override;
  128. end;
  129. { Source location. This may be augmented with ByteOffset, UTF8Offset, etc. }
  130. TLocation = record
  131. Line: Integer;
  132. LinePos: Integer;
  133. end;
  134. IXmlLineInfo = interface(IInterface)['{FD0A892B-B26C-4954-9995-103B2A9D178A}']
  135. function GetHasLineInfo: Boolean;
  136. function GetLineNumber: Integer;
  137. function GetLinePosition: Integer;
  138. property HasLineInfo: Boolean read GetHasLineInfo;
  139. property LineNumber: Integer read GetLineNumber;
  140. property LinePosition: Integer read GetLinePosition;
  141. end;
  142. { generic node info record, shared between DOM and reader }
  143. PPNodeData = ^PNodeData;
  144. PNodeData = ^TNodeData;
  145. TNodeData = record
  146. FNext: PNodeData;
  147. FQName: PHashItem;
  148. FPrefix: PHashItem;
  149. FNsUri: PHashItem;
  150. FColonPos: Integer;
  151. FTypeInfo: TObject;
  152. FLoc: TLocation;
  153. FLoc2: TLocation; // for attributes: start of value
  154. FIDEntry: PHashItem; // ID attributes: entry in ID map
  155. FNodeType: TXMLNodeType;
  156. FValueStr: XMLString;
  157. FValueStart: PWideChar;
  158. FValueLength: Integer;
  159. FIsDefault: Boolean;
  160. FDenormalized: Boolean; // Whether attribute value changes by normalization
  161. end;
  162. IGetNodeDataPtr = interface(IInterface)['{81F6ADA2-8F5E-41D7-872D-226163FF4E45}']
  163. function CurrentNodePtr: PPNodeData;
  164. end;
  165. { TNSSupport provides tracking of prefix-uri pairs and namespace fixup for writer }
  166. TBinding = class
  167. public
  168. uri: PHashItem;
  169. next: TBinding;
  170. prevPrefixBinding: TObject;
  171. Prefix: PHashItem;
  172. end;
  173. TAttributeAction = (
  174. aaUnchanged,
  175. aaPrefix, // only override the prefix
  176. aaBoth // override prefix and emit namespace definition
  177. );
  178. TNSSupport = class(TObject)
  179. private
  180. FNameTable: THashTable;
  181. FNesting: Integer;
  182. FPrefixSeqNo: Integer;
  183. FFreeBindings: TBinding;
  184. FBindings: TFPList;
  185. FBindingStack: array of TBinding;
  186. FPrefixes: THashTable;
  187. FDefaultPrefix: THashItem;
  188. public
  189. constructor Create(aNameTable: THashTable);
  190. destructor Destroy; override;
  191. procedure DefineBinding(const Prefix, nsURI: XMLString; out Binding: TBinding);
  192. function CheckAttribute(const Prefix, nsURI: XMLString;
  193. out Binding: TBinding): TAttributeAction;
  194. function GetPrefix(P: PWideChar; Len: Integer): PHashItem;
  195. function BindPrefix(nsURI, aPrefix: PHashItem): TBinding;
  196. function DefaultNSBinding: TBinding;
  197. function LookupNamespace(const APrefix: XMLString): XMLString;
  198. procedure PushScope;
  199. function PopScope: Boolean;
  200. end;
  201. { Buffer builder, used to compose long strings without too much memory allocations }
  202. PWideCharBuf = ^TWideCharBuf;
  203. TWideCharBuf = record
  204. Buffer: PWideChar;
  205. Length: Integer;
  206. MaxLength: Integer;
  207. end;
  208. procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
  209. procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
  210. procedure BufAppendChunk(var ABuf: TWideCharBuf; pstart, pend: PWideChar);
  211. procedure BufAppendString(var ABuf: TWideCharBuf; const AValue: XMLString);
  212. function BufEquals(const ABuf: TWideCharBuf; const Arg: XMLString): Boolean;
  213. procedure BufNormalize(var Buf: TWideCharBuf; out Modified: Boolean);
  214. { Built-in decoder functions for UTF-8, UTF-16 and ISO-8859-1 }
  215. function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  216. function Decode_UCS2_Swapped(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  217. function Decode_UTF8(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  218. function Decode_8859_1(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  219. {$i names.inc}
  220. implementation
  221. function IsXml11Char(Value: PWideChar; var Index: Integer): Boolean; overload;
  222. begin
  223. if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
  224. begin
  225. Inc(Index);
  226. Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
  227. end
  228. else
  229. Result := False;
  230. end;
  231. function IsXml11Char(const Value: XMLString; var Index: Integer): Boolean; overload;
  232. begin
  233. if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
  234. begin
  235. Inc(Index);
  236. Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
  237. end
  238. else
  239. Result := False;
  240. end;
  241. function IsXmlName(const Value: XMLString; Xml11: Boolean): Boolean;
  242. begin
  243. Result := IsXmlName(PWideChar(Value), Length(Value), Xml11);
  244. end;
  245. function IsXmlName(Value: PWideChar; Len: Integer; Xml11: Boolean = False): Boolean;
  246. var
  247. I: Integer;
  248. begin
  249. Result := False;
  250. I := 0;
  251. if (Len = 0) or not ((Byte(Value[I]) in NamingBitmap[NamePages[hi(Word(Value[I]))]]) or
  252. (Value[I] = ':') or IsXml11Char(Value, I)) then
  253. Exit;
  254. Inc(I);
  255. while I < Len do
  256. begin
  257. if not ((Byte(Value[I]) in NamingBitmap[NamePages[$100+hi(Word(Value[I]))]]) or
  258. (Value[I] = ':') or IsXml11Char(Value, I)) then
  259. Exit;
  260. Inc(I);
  261. end;
  262. Result := True;
  263. end;
  264. function IsXmlNames(const Value: XMLString; Xml11: Boolean): Boolean;
  265. var
  266. I: Integer;
  267. Offset: Integer;
  268. begin
  269. Result := False;
  270. if Value = '' then
  271. Exit;
  272. I := 1;
  273. Offset := 0;
  274. while I <= Length(Value) do
  275. begin
  276. if not ((Byte(Value[I]) in NamingBitmap[NamePages[Offset+hi(Word(Value[I]))]]) or
  277. (Value[I] = ':') or IsXml11Char(Value, I)) then
  278. begin
  279. if (I = Length(Value)) or (Value[I] <> #32) then
  280. Exit;
  281. Offset := 0;
  282. Inc(I);
  283. Continue;
  284. end;
  285. Offset := $100;
  286. Inc(I);
  287. end;
  288. Result := True;
  289. end;
  290. function IsXmlNmToken(const Value: XMLString; Xml11: Boolean): Boolean;
  291. var
  292. I: Integer;
  293. begin
  294. Result := False;
  295. if Value = '' then
  296. Exit;
  297. I := 1;
  298. while I <= Length(Value) do
  299. begin
  300. if not ((Byte(Value[I]) in NamingBitmap[NamePages[$100+hi(Word(Value[I]))]]) or
  301. (Value[I] = ':') or IsXml11Char(Value, I)) then
  302. Exit;
  303. Inc(I);
  304. end;
  305. Result := True;
  306. end;
  307. function IsXmlNmTokens(const Value: XMLString; Xml11: Boolean): Boolean;
  308. var
  309. I: Integer;
  310. begin
  311. I := 1;
  312. Result := False;
  313. if Value = '' then
  314. Exit;
  315. while I <= Length(Value) do
  316. begin
  317. if not ((Byte(Value[I]) in NamingBitmap[NamePages[$100+hi(Word(Value[I]))]]) or
  318. (Value[I] = ':') or IsXml11Char(Value, I)) then
  319. begin
  320. if (I = Length(Value)) or (Value[I] <> #32) then
  321. Exit;
  322. end;
  323. Inc(I);
  324. end;
  325. Result := True;
  326. end;
  327. function IsValidXmlEncoding(const Value: XMLString): Boolean;
  328. var
  329. I: Integer;
  330. begin
  331. Result := False;
  332. if (Value = '') or (Value[1] > #255) or not (char(ord(Value[1])) in ['A'..'Z', 'a'..'z']) then
  333. Exit;
  334. for I := 2 to Length(Value) do
  335. if (Value[I] > #255) or not (char(ord(Value[I])) in ['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']) then
  336. Exit;
  337. Result := True;
  338. end;
  339. procedure NormalizeSpaces(var Value: XMLString);
  340. var
  341. I, J: Integer;
  342. begin
  343. I := Length(Value);
  344. // speed: trim only whed needed
  345. if (I > 0) and ((Value[1] = #32) or (Value[I] = #32)) then
  346. Value := Trim(Value);
  347. I := 1;
  348. while I < Length(Value) do
  349. begin
  350. if Value[I] = #32 then
  351. begin
  352. J := I+1;
  353. while (J <= Length(Value)) and (Value[J] = #32) do Inc(J);
  354. if J-I > 1 then Delete(Value, I+1, J-I-1);
  355. end;
  356. Inc(I);
  357. end;
  358. end;
  359. function IsXmlWhiteSpace(c: WideChar): Boolean;
  360. begin
  361. Result := (c = #32) or (c = #9) or (c = #10) or (c = #13);
  362. end;
  363. function WStrLIComp(S1, S2: PWideChar; Len: Integer): Integer;
  364. var
  365. counter: Integer;
  366. c1, c2: Word;
  367. begin
  368. counter := 0;
  369. result := 0;
  370. if Len = 0 then
  371. exit;
  372. repeat
  373. c1 := ord(S1[counter]);
  374. c2 := ord(S2[counter]);
  375. if (c1 = 0) or (c2 = 0) then break;
  376. if c1 <> c2 then
  377. begin
  378. if c1 in [97..122] then
  379. Dec(c1, 32);
  380. if c2 in [97..122] then
  381. Dec(c2, 32);
  382. if c1 <> c2 then
  383. Break;
  384. end;
  385. Inc(counter);
  386. until counter >= Len;
  387. result := c1 - c2;
  388. end;
  389. procedure WStrLower(var S: XMLString);
  390. var
  391. i: Integer;
  392. begin
  393. for i := 1 to Length(S) do
  394. if (S[i] >= 'A') and (S[i] <= 'Z') then
  395. Inc(word(S[i]), 32);
  396. end;
  397. function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
  398. begin
  399. Result := InitValue;
  400. while KeyLen <> 0 do
  401. begin
  402. {$push}{$r-}{$q-}
  403. Result := Result * $F4243 xor ord(Key^);
  404. {$pop}
  405. Inc(Key);
  406. Dec(KeyLen);
  407. end;
  408. end;
  409. function KeyCompare(const Key1: XMLString; Key2: Pointer; Key2Len: Integer): Boolean;
  410. begin
  411. Result := (Length(Key1)=Key2Len) and CompareMem(Pointer(Key1), Key2, Key2Len*2);
  412. end;
  413. { THashTable }
  414. constructor THashTable.Create(InitSize: Integer; OwnObjects: Boolean);
  415. var
  416. I: Integer;
  417. begin
  418. inherited Create;
  419. FOwnsObjects := OwnObjects;
  420. I := 256;
  421. while I < InitSize do I := I shl 1;
  422. FBucketCount := I;
  423. FBucket := AllocMem(I * sizeof(PHashItem));
  424. end;
  425. destructor THashTable.Destroy;
  426. begin
  427. Clear;
  428. FreeMem(FBucket);
  429. inherited Destroy;
  430. end;
  431. procedure THashTable.Clear;
  432. var
  433. I: Integer;
  434. item, next: PHashItem;
  435. begin
  436. for I := 0 to FBucketCount-1 do
  437. begin
  438. item := FBucket^[I];
  439. while Assigned(item) do
  440. begin
  441. next := item^.Next;
  442. if FOwnsObjects then
  443. item^.Data.Free;
  444. Dispose(item);
  445. item := next;
  446. end;
  447. FBucket^[I] := nil;
  448. end;
  449. end;
  450. function THashTable.Find(Key: PWideChar; KeyLen: Integer): PHashItem;
  451. var
  452. Dummy: Boolean;
  453. begin
  454. Result := Lookup(Key, KeyLen, Dummy, False);
  455. end;
  456. function THashTable.FindOrAdd(Key: PWideChar; KeyLen: Integer;
  457. var Found: Boolean): PHashItem;
  458. begin
  459. Result := Lookup(Key, KeyLen, Found, True);
  460. end;
  461. function THashTable.FindOrAdd(Key: PWideChar; KeyLen: Integer): PHashItem;
  462. var
  463. Dummy: Boolean;
  464. begin
  465. Result := Lookup(Key, KeyLen, Dummy, True);
  466. end;
  467. function THashTable.FindOrAdd(const Key: XMLString): PHashItem;
  468. var
  469. Dummy: Boolean;
  470. begin
  471. Result := Lookup(PWideChar(Key), Length(Key), Dummy, True);
  472. end;
  473. function THashTable.Get(Key: PWideChar; KeyLen: Integer): TObject;
  474. var
  475. e: PHashItem;
  476. Dummy: Boolean;
  477. begin
  478. e := Lookup(Key, KeyLen, Dummy, False);
  479. if Assigned(e) then
  480. Result := e^.Data
  481. else
  482. Result := nil;
  483. end;
  484. function THashTable.Lookup(Key: PWideChar; KeyLength: Integer;
  485. out Found: Boolean; CanCreate: Boolean): PHashItem;
  486. var
  487. Entry: PPHashItem;
  488. h: LongWord;
  489. begin
  490. h := Hash(0, Key, KeyLength);
  491. Entry := @FBucket^[h mod FBucketCount];
  492. while Assigned(Entry^) and not ((Entry^^.HashValue = h) and KeyCompare(Entry^^.Key, Key, KeyLength) ) do
  493. Entry := @Entry^^.Next;
  494. Found := Assigned(Entry^);
  495. if Found or (not CanCreate) then
  496. begin
  497. Result := Entry^;
  498. Exit;
  499. end;
  500. if FCount > FBucketCount then { arbitrary limit, probably too high }
  501. begin
  502. Resize(FBucketCount * 2);
  503. Result := Lookup(Key, KeyLength, Found, CanCreate);
  504. end
  505. else
  506. begin
  507. New(Result);
  508. // SetString for WideStrings trims on zero chars [fixed, #14740]
  509. SetLength(Result^.Key, KeyLength);
  510. Move(Key^, Pointer(Result^.Key)^, KeyLength*sizeof(WideChar));
  511. Result^.HashValue := h;
  512. Result^.Data := nil;
  513. Result^.Next := nil;
  514. Inc(FCount);
  515. Entry^ := Result;
  516. end;
  517. end;
  518. procedure THashTable.Resize(NewCapacity: LongWord);
  519. var
  520. p: PHashItemArray;
  521. chain: PPHashItem;
  522. i: Integer;
  523. e, n: PHashItem;
  524. begin
  525. p := AllocMem(NewCapacity * sizeof(PHashItem));
  526. for i := 0 to FBucketCount-1 do
  527. begin
  528. e := FBucket^[i];
  529. while Assigned(e) do
  530. begin
  531. chain := @p^[e^.HashValue mod NewCapacity];
  532. n := e^.Next;
  533. e^.Next := chain^;
  534. chain^ := e;
  535. e := n;
  536. end;
  537. end;
  538. FBucketCount := NewCapacity;
  539. FreeMem(FBucket);
  540. FBucket := p;
  541. end;
  542. function THashTable.Remove(Entry: PHashItem): Boolean;
  543. var
  544. chain: PPHashItem;
  545. begin
  546. chain := @FBucket^[Entry^.HashValue mod FBucketCount];
  547. while Assigned(chain^) do
  548. begin
  549. if chain^ = Entry then
  550. begin
  551. chain^ := Entry^.Next;
  552. if FOwnsObjects then
  553. Entry^.Data.Free;
  554. Dispose(Entry);
  555. Dec(FCount);
  556. Result := True;
  557. Exit;
  558. end;
  559. chain := @chain^^.Next;
  560. end;
  561. Result := False;
  562. end;
  563. // this does not free the aData object
  564. function THashTable.RemoveData(aData: TObject): Boolean;
  565. var
  566. i: Integer;
  567. chain: PPHashItem;
  568. e: PHashItem;
  569. begin
  570. for i := 0 to FBucketCount-1 do
  571. begin
  572. chain := @FBucket^[i];
  573. while Assigned(chain^) do
  574. begin
  575. if chain^^.Data = aData then
  576. begin
  577. e := chain^;
  578. chain^ := e^.Next;
  579. Dispose(e);
  580. Dec(FCount);
  581. Result := True;
  582. Exit;
  583. end;
  584. chain := @chain^^.Next;
  585. end;
  586. end;
  587. Result := False;
  588. end;
  589. procedure THashTable.ForEach(proc: THashForEach; arg: Pointer);
  590. var
  591. i: Integer;
  592. e: PHashItem;
  593. begin
  594. for i := 0 to FBucketCount-1 do
  595. begin
  596. e := FBucket^[i];
  597. while Assigned(e) do
  598. begin
  599. if not proc(e, arg) then
  600. Exit;
  601. e := e^.Next;
  602. end;
  603. end;
  604. end;
  605. { TDblHashArray }
  606. destructor TDblHashArray.Destroy;
  607. begin
  608. FreeMem(FData);
  609. inherited Destroy;
  610. end;
  611. procedure TDblHashArray.Init(NumSlots: Integer);
  612. var
  613. i: Integer;
  614. begin
  615. if ((NumSlots * 2) shr FSizeLog) <> 0 then // need at least twice more entries, and no less than 8
  616. begin
  617. FSizeLog := 3;
  618. while (NumSlots shr FSizeLog) <> 0 do
  619. Inc(FSizeLog);
  620. ReallocMem(FData, (1 shl FSizeLog) * sizeof(TExpHashEntry));
  621. FRevision := 0;
  622. end;
  623. if FRevision = 0 then
  624. begin
  625. FRevision := $FFFFFFFF;
  626. for i := (1 shl FSizeLog)-1 downto 0 do
  627. FData^[i].rev := FRevision;
  628. end;
  629. Dec(FRevision);
  630. end;
  631. function TDblHashArray.Locate(uri: Pointer; localName: PWideChar; localLength: Integer): Boolean;
  632. var
  633. step: Byte;
  634. mask: LongWord;
  635. idx: Integer;
  636. HashValue: LongWord;
  637. begin
  638. HashValue := Hash(LongWord(PtrUInt(uri)), localName, localLength);
  639. mask := (1 shl FSizeLog) - 1;
  640. step := (HashValue and (not mask)) shr (FSizeLog-1) and (mask shr 2) or 1;
  641. idx := HashValue and mask;
  642. result := True;
  643. while FData^[idx].rev = FRevision do
  644. begin
  645. if (HashValue = FData^[idx].hash) and (FData^[idx].uriPtr = uri) and
  646. (FData^[idx].lnameLen = localLength) and
  647. CompareMem(FData^[idx].lname, localName, localLength * sizeof(WideChar)) then
  648. Exit;
  649. if idx < step then
  650. Inc(idx, (1 shl FSizeLog) - step)
  651. else
  652. Dec(idx, step);
  653. end;
  654. with FData^[idx] do
  655. begin
  656. rev := FRevision;
  657. hash := HashValue;
  658. uriPtr := uri;
  659. lname := localName;
  660. lnameLen := localLength;
  661. end;
  662. result := False;
  663. end;
  664. { TNSSupport }
  665. constructor TNSSupport.Create(aNameTable: THashTable);
  666. var
  667. b: TBinding;
  668. begin
  669. inherited Create;
  670. FNameTable := aNameTable;
  671. FPrefixes := THashTable.Create(16, False);
  672. FBindings := TFPList.Create;
  673. SetLength(FBindingStack, 16);
  674. { provide implicit binding for the 'xml' prefix }
  675. DefineBinding('xml', stduri_xml, b);
  676. { bind default namespace to empty string }
  677. DefineBinding('', '', b);
  678. end;
  679. destructor TNSSupport.Destroy;
  680. var
  681. I: Integer;
  682. begin
  683. for I := FBindings.Count-1 downto 0 do
  684. TObject(FBindings.List^[I]).Free;
  685. FBindings.Free;
  686. FPrefixes.Free;
  687. inherited Destroy;
  688. end;
  689. function TNSSupport.BindPrefix(nsURI, aPrefix: PHashItem): TBinding;
  690. begin
  691. { try to reuse an existing binding }
  692. result := FFreeBindings;
  693. if Assigned(result) then
  694. FFreeBindings := result.Next
  695. else { no free bindings, create a new one }
  696. begin
  697. result := TBinding.Create;
  698. FBindings.Add(result);
  699. end;
  700. { link it into chain of bindings at the current element level }
  701. result.Next := FBindingStack[FNesting];
  702. FBindingStack[FNesting] := result;
  703. { bind }
  704. result.uri := nsURI;
  705. result.Prefix := aPrefix;
  706. result.PrevPrefixBinding := aPrefix^.Data;
  707. aPrefix^.Data := result;
  708. end;
  709. function TNSSupport.DefaultNSBinding: TBinding;
  710. begin
  711. result := TBinding(FDefaultPrefix.Data);
  712. end;
  713. procedure TNSSupport.DefineBinding(const Prefix, nsURI: XMLString;
  714. out Binding: TBinding);
  715. var
  716. Pfx, uri: PHashItem;
  717. begin
  718. Pfx := @FDefaultPrefix;
  719. if (nsURI <> '') and (Prefix <> '') then
  720. Pfx := FPrefixes.FindOrAdd(PWideChar(Prefix), Length(Prefix));
  721. uri := FNameTable.FindOrAdd(PWideChar(nsURI),Length(nsURI));
  722. if (Pfx^.Data = nil) or (TBinding(Pfx^.Data).uri <> uri) then
  723. Binding := BindPrefix(uri, Pfx)
  724. else
  725. Binding := nil;
  726. end;
  727. function TNSSupport.CheckAttribute(const Prefix, nsURI: XMLString;
  728. out Binding: TBinding): TAttributeAction;
  729. var
  730. Pfx: PHashItem;
  731. I: Integer;
  732. b: TBinding;
  733. buf: array[0..31] of WideChar;
  734. p: PWideChar;
  735. uri: PHashItem;
  736. begin
  737. Binding := nil;
  738. Pfx := nil;
  739. Result := aaUnchanged;
  740. if Prefix <> '' then
  741. Pfx := FPrefixes.FindOrAdd(PWideChar(Prefix), Length(Prefix))
  742. else if nsURI = '' then
  743. Exit;
  744. uri := FNameTable.FindOrAdd(PWideChar(nsURI), Length(nsURI));
  745. { if the prefix is already bound to correct URI, we're done }
  746. if Assigned(Pfx) and Assigned(Pfx^.Data) and (TBinding(Pfx^.Data).uri = uri) then
  747. Exit;
  748. { see if there's another prefix bound to the target URI }
  749. // TODO: should use something faster than linear search
  750. for i := FNesting downto 0 do
  751. begin
  752. b := FBindingStack[i];
  753. while Assigned(b) do
  754. begin
  755. if (b.uri = uri) and (b.Prefix <> @FDefaultPrefix) then
  756. begin
  757. Binding := b; // found one -> override the attribute's prefix
  758. Result := aaPrefix;
  759. Exit;
  760. end;
  761. b := b.Next;
  762. end;
  763. end;
  764. { no prefix, or bound (to wrong URI) -> use generated prefix instead }
  765. if (Pfx = nil) or Assigned(Pfx^.Data) then
  766. repeat
  767. Inc(FPrefixSeqNo);
  768. i := FPrefixSeqNo; // This is just 'NS'+IntToStr(FPrefixSeqNo);
  769. p := @Buf[high(Buf)]; // done without using strings
  770. while i <> 0 do
  771. begin
  772. p^ := WideChar(i mod 10+ord('0'));
  773. dec(p);
  774. i := i div 10;
  775. end;
  776. p^ := 'S'; dec(p);
  777. p^ := 'N';
  778. Pfx := FPrefixes.FindOrAdd(p, @Buf[high(Buf)]-p+1);
  779. until Pfx^.Data = nil;
  780. Binding := BindPrefix(uri, Pfx);
  781. Result := aaBoth;
  782. end;
  783. function TNSSupport.GetPrefix(P: PWideChar; Len: Integer): PHashItem;
  784. begin
  785. if Assigned(P) and (Len > 0) then
  786. Result := FPrefixes.FindOrAdd(P, Len)
  787. else
  788. Result := @FDefaultPrefix;
  789. end;
  790. function TNSSupport.LookupNamespace(const APrefix: XMLString): XMLString;
  791. var
  792. prefixatom: PHashItem;
  793. b: TBinding;
  794. begin
  795. prefixatom := GetPrefix(PWideChar(APrefix),Length(APrefix));
  796. b := TBinding(prefixatom^.Data);
  797. if Assigned(b) and Assigned(b.Uri) then
  798. result := b.Uri^.Key
  799. else
  800. result := '';
  801. end;
  802. procedure TNSSupport.PushScope;
  803. begin
  804. Inc(FNesting);
  805. if FNesting >= Length(FBindingStack) then
  806. SetLength(FBindingStack, FNesting * 2);
  807. end;
  808. function TNSSupport.PopScope: Boolean;
  809. var
  810. b, temp: TBinding;
  811. begin
  812. { don't unbind prefixes declared before the first call to PushScope }
  813. Result := FNesting > 0;
  814. if not Result then
  815. Exit;
  816. temp := FBindingStack[FNesting];
  817. while Assigned(temp) do
  818. begin
  819. b := temp;
  820. temp := b.next;
  821. b.next := FFreeBindings;
  822. FFreeBindings := b;
  823. b.Prefix^.Data := b.prevPrefixBinding;
  824. end;
  825. FBindingStack[FNesting] := nil;
  826. Dec(FNesting);
  827. end;
  828. { Buffer builder utils }
  829. procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
  830. begin
  831. ABuffer.MaxLength := ALength;
  832. ABuffer.Length := 0;
  833. ABuffer.Buffer := AllocMem(ABuffer.MaxLength*SizeOf(WideChar));
  834. end;
  835. procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
  836. begin
  837. if ABuffer.Length >= ABuffer.MaxLength then
  838. begin
  839. ReallocMem(ABuffer.Buffer, ABuffer.MaxLength * 2 * SizeOf(WideChar));
  840. FillChar(ABuffer.Buffer[ABuffer.MaxLength], ABuffer.MaxLength * SizeOf(WideChar),0);
  841. ABuffer.MaxLength := ABuffer.MaxLength * 2;
  842. end;
  843. ABuffer.Buffer[ABuffer.Length] := wc;
  844. Inc(ABuffer.Length);
  845. end;
  846. procedure BufAppendChunk(var ABuf: TWideCharBuf; pstart, pend: PWideChar);
  847. var
  848. Len: Integer;
  849. begin
  850. Len := PEnd - PStart;
  851. if Len <= 0 then
  852. Exit;
  853. if Len >= ABuf.MaxLength - ABuf.Length then
  854. begin
  855. ABuf.MaxLength := (Len + ABuf.Length)*2;
  856. // note: memory clean isn't necessary here.
  857. // To avoid garbage, control Length field.
  858. ReallocMem(ABuf.Buffer, ABuf.MaxLength * sizeof(WideChar));
  859. end;
  860. Move(pstart^, ABuf.Buffer[ABuf.Length], Len * sizeof(WideChar));
  861. Inc(ABuf.Length, Len);
  862. end;
  863. procedure BufAppendString(var ABuf: TWideCharBuf; const AValue: XMLString);
  864. var
  865. Len: Integer;
  866. begin
  867. Len := Length(AValue);
  868. if Len <= 0 then
  869. Exit;
  870. if Len >= ABuf.MaxLength - ABuf.Length then
  871. begin
  872. ABuf.MaxLength := (Len + ABuf.Length)*2;
  873. ReallocMem(ABuf.Buffer, ABuf.MaxLength * sizeof(WideChar));
  874. end;
  875. Move(PWideChar(AValue)^, ABuf.Buffer[ABuf.Length], Len * sizeof(WideChar));
  876. Inc(ABuf.Length, Len);
  877. end;
  878. function BufEquals(const ABuf: TWideCharBuf; const Arg: XMLString): Boolean;
  879. begin
  880. Result := (ABuf.Length = Length(Arg)) and
  881. CompareMem(ABuf.Buffer, Pointer(Arg), ABuf.Length*sizeof(WideChar));
  882. end;
  883. procedure BufNormalize(var Buf: TWideCharBuf; out Modified: Boolean);
  884. var
  885. Dst, Src: Integer;
  886. begin
  887. Dst := 0;
  888. Src := 0;
  889. // skip leading space if any
  890. while (Src < Buf.Length) and (Buf.Buffer[Src] = ' ') do
  891. Inc(Src);
  892. while Src < Buf.Length do
  893. begin
  894. if Buf.Buffer[Src] = ' ' then
  895. begin
  896. // Dst cannot be 0 here, because leading space is already skipped
  897. if Buf.Buffer[Dst-1] <> ' ' then
  898. begin
  899. Buf.Buffer[Dst] := ' ';
  900. Inc(Dst);
  901. end;
  902. end
  903. else
  904. begin
  905. Buf.Buffer[Dst] := Buf.Buffer[Src];
  906. Inc(Dst);
  907. end;
  908. Inc(Src);
  909. end;
  910. // trailing space (only one possible due to compression)
  911. if (Dst > 0) and (Buf.Buffer[Dst-1] = ' ') then
  912. Dec(Dst);
  913. Modified := Dst <> Buf.Length;
  914. Buf.Length := Dst;
  915. end;
  916. { standard decoders }
  917. function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  918. var
  919. cnt: Cardinal;
  920. begin
  921. cnt := OutCnt; // num of widechars
  922. if cnt > InCnt div sizeof(WideChar) then
  923. cnt := InCnt div sizeof(WideChar);
  924. Move(InBuf^, OutBuf^, cnt * sizeof(WideChar));
  925. Dec(InCnt, cnt*sizeof(WideChar));
  926. Dec(OutCnt, cnt);
  927. Result := cnt;
  928. end;
  929. function Decode_UCS2_Swapped(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  930. var
  931. I: Integer;
  932. cnt: Cardinal;
  933. InPtr: PChar;
  934. begin
  935. cnt := OutCnt; // num of widechars
  936. if cnt > InCnt div sizeof(WideChar) then
  937. cnt := InCnt div sizeof(WideChar);
  938. InPtr := InBuf;
  939. for I := 0 to cnt-1 do
  940. begin
  941. OutBuf[I] := WideChar((ord(InPtr^) shl 8) or ord(InPtr[1]));
  942. Inc(InPtr, 2);
  943. end;
  944. Dec(InCnt, cnt*sizeof(WideChar));
  945. Dec(OutCnt, cnt);
  946. Result := cnt;
  947. end;
  948. function Decode_8859_1(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  949. var
  950. I: Integer;
  951. cnt: Cardinal;
  952. begin
  953. cnt := OutCnt; // num of widechars
  954. if cnt > InCnt then
  955. cnt := InCnt;
  956. for I := 0 to cnt-1 do
  957. OutBuf[I] := WideChar(ord(InBuf[I]));
  958. Dec(InCnt, cnt);
  959. Dec(OutCnt, cnt);
  960. Result := cnt;
  961. end;
  962. function Decode_UTF8(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  963. const
  964. MaxCode: array[1..4] of Cardinal = ($7F, $7FF, $FFFF, $1FFFFF);
  965. var
  966. i, j, bc: Cardinal;
  967. Value: Cardinal;
  968. begin
  969. result := 0;
  970. i := OutCnt;
  971. while (i > 0) and (InCnt > 0) do
  972. begin
  973. bc := 1;
  974. Value := ord(InBuf^);
  975. if Value < $80 then
  976. OutBuf^ := WideChar(Value)
  977. else
  978. begin
  979. if Value < $C2 then
  980. begin
  981. Result := -1;
  982. Break;
  983. end;
  984. Inc(bc);
  985. if Value > $DF then
  986. begin
  987. Inc(bc);
  988. if Value > $EF then
  989. begin
  990. Inc(bc);
  991. if Value > $F7 then // never encountered in the tests.
  992. begin
  993. Result := -1;
  994. Break;
  995. end;
  996. end;
  997. end;
  998. if InCnt < bc then
  999. Break;
  1000. j := 1;
  1001. while j < bc do
  1002. begin
  1003. if InBuf[j] in [#$80..#$BF] then
  1004. Value := (Value shl 6) or (Cardinal(InBuf[j]) and $3F)
  1005. else
  1006. begin
  1007. Result := -1;
  1008. Break;
  1009. end;
  1010. Inc(j);
  1011. end;
  1012. Value := Value and MaxCode[bc];
  1013. // RFC2279 check
  1014. if Value <= MaxCode[bc-1] then
  1015. begin
  1016. Result := -1;
  1017. Break;
  1018. end;
  1019. case Value of
  1020. 0..$D7FF, $E000..$FFFF: OutBuf^ := WideChar(Value);
  1021. $10000..$10FFFF:
  1022. begin
  1023. if i < 2 then Break;
  1024. OutBuf^ := WideChar($D7C0 + (Value shr 10));
  1025. OutBuf[1] := WideChar($DC00 xor (Value and $3FF));
  1026. Inc(OutBuf); // once here
  1027. Dec(i);
  1028. end
  1029. else
  1030. begin
  1031. Result := -1;
  1032. Break;
  1033. end;
  1034. end;
  1035. end;
  1036. Inc(OutBuf);
  1037. Inc(InBuf, bc);
  1038. Dec(InCnt, bc);
  1039. Dec(i);
  1040. end;
  1041. if Result >= 0 then
  1042. Result := OutCnt-i;
  1043. OutCnt := i;
  1044. end;
  1045. end.