xmlutils.pp 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158
  1. {
  2. This file is part of the Free Component Library
  3. XML utility routines.
  4. Copyright (c) 2006 by Sergei Gorelkin, [email protected]
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. unit xmlutils;
  12. {$ifdef fpc}{$mode objfpc}{$endif}
  13. {$H+}
  14. interface
  15. uses
  16. SysUtils, Classes;
  17. type
  18. TXMLVersion = (xmlVersionUnknown, xmlVersion10, xmlVersion11);
  19. TSetOfChar = set of Char;
  20. XMLString = WideString;
  21. PXMLChar = PWideChar;
  22. function IsXmlName(const Value: XMLString; Xml11: Boolean = False): Boolean; overload;
  23. function IsXmlName(Value: PWideChar; Len: Integer; Xml11: Boolean = False): Boolean; overload;
  24. function IsXmlNames(const Value: XMLString; Xml11: Boolean = False): Boolean;
  25. function IsXmlNmToken(const Value: XMLString; Xml11: Boolean = False): Boolean;
  26. function IsXmlNmTokens(const Value: XMLString; Xml11: Boolean = False): Boolean;
  27. function IsValidXmlEncoding(const Value: XMLString): Boolean;
  28. function Xml11NamePages: PByteArray;
  29. procedure NormalizeSpaces(var Value: XMLString);
  30. function IsXmlWhiteSpace(c: WideChar): Boolean;
  31. function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
  32. { beware, works in ASCII range only }
  33. function WStrLIComp(S1, S2: PWideChar; Len: Integer): Integer;
  34. procedure WStrLower(var S: XMLString);
  35. const
  36. xmlVersionStr: array[TXMLVersion] of XMLString = ('', '1.0', '1.1');
  37. type
  38. TXMLNodeType = (ntNone, ntElement, ntAttribute, ntText,
  39. ntCDATA, ntEntityReference, ntEntity, ntProcessingInstruction,
  40. ntComment, ntDocument, ntDocumentType, ntDocumentFragment,
  41. ntNotation,
  42. ntWhitespace,
  43. ntSignificantWhitespace,
  44. ntEndElement,
  45. ntEndEntity,
  46. ntXmlDeclaration
  47. );
  48. TAttrDataType = (
  49. dtCdata,
  50. dtId,
  51. dtIdRef,
  52. dtIdRefs,
  53. dtEntity,
  54. dtEntities,
  55. dtNmToken,
  56. dtNmTokens,
  57. dtNotation
  58. );
  59. { a simple hash table with WideString keys }
  60. type
  61. {$ifndef fpc}
  62. PtrInt = LongInt;
  63. TFPList = TList;
  64. {$endif}
  65. PPHashItem = ^PHashItem;
  66. PHashItem = ^THashItem;
  67. THashItem = record
  68. Key: XMLString;
  69. HashValue: LongWord;
  70. Next: PHashItem;
  71. Data: TObject;
  72. end;
  73. THashItemArray = array[0..MaxInt div sizeof(Pointer)-1] of PHashItem;
  74. PHashItemArray = ^THashItemArray;
  75. THashForEach = function(Entry: PHashItem; arg: Pointer): Boolean;
  76. THashTable = class(TObject)
  77. private
  78. FCount: LongWord;
  79. FBucketCount: LongWord;
  80. FBucket: PHashItemArray;
  81. FOwnsObjects: Boolean;
  82. function Lookup(Key: PWideChar; KeyLength: Integer; out Found: Boolean; CanCreate: Boolean): PHashItem;
  83. procedure Resize(NewCapacity: LongWord);
  84. public
  85. constructor Create(InitSize: Integer; OwnObjects: Boolean);
  86. destructor Destroy; override;
  87. procedure Clear;
  88. function Find(Key: PWideChar; KeyLen: Integer): PHashItem;
  89. function FindOrAdd(Key: PWideChar; KeyLen: Integer; var Found: Boolean): PHashItem; overload;
  90. function FindOrAdd(Key: PWideChar; KeyLen: Integer): PHashItem; overload;
  91. function FindOrAdd(const Key: XMLString): PHashItem; overload;
  92. function Get(Key: PWideChar; KeyLen: Integer): TObject;
  93. function Remove(Entry: PHashItem): Boolean;
  94. function RemoveData(aData: TObject): Boolean;
  95. procedure ForEach(proc: THashForEach; arg: Pointer);
  96. property Count: LongWord read FCount;
  97. end;
  98. { another hash, for detecting duplicate namespaced attributes without memory allocations }
  99. TExpHashEntry = record
  100. rev: LongWord;
  101. hash: LongWord;
  102. uriPtr: PWideString;
  103. lname: PWideChar;
  104. lnameLen: Integer;
  105. end;
  106. TExpHashEntryArray = array[0..MaxInt div sizeof(TExpHashEntry)-1] of TExpHashEntry;
  107. PExpHashEntryArray = ^TExpHashEntryArray;
  108. TDblHashArray = class(TObject)
  109. private
  110. FSizeLog: Integer;
  111. FRevision: LongWord;
  112. FData: PExpHashEntryArray;
  113. public
  114. procedure Init(NumSlots: Integer);
  115. function Locate(uri: PWideString; localName: PWideChar; localLength: Integer): Boolean;
  116. destructor Destroy; override;
  117. end;
  118. { Source location. This may be augmented with ByteOffset, UTF8Offset, etc. }
  119. TLocation = record
  120. Line: Integer;
  121. LinePos: Integer;
  122. end;
  123. { generic node info record, shared between DOM and reader }
  124. PNodeData = ^TNodeData;
  125. TNodeData = record
  126. FNext: PNodeData;
  127. FQName: PHashItem;
  128. FPrefix: PHashItem;
  129. FNsUri: PHashItem;
  130. FColonPos: Integer;
  131. FTypeInfo: TObject;
  132. FLoc: TLocation;
  133. FLoc2: TLocation; // for attributes: start of value
  134. FIDEntry: PHashItem; // ID attributes: entry in ID map
  135. FNodeType: TXMLNodeType;
  136. FValueStr: XMLString;
  137. FValueStart: PWideChar;
  138. FValueLength: Integer;
  139. FIsDefault: Boolean;
  140. FDenormalized: Boolean; // Whether attribute value changes by normalization
  141. end;
  142. { TNSSupport provides tracking of prefix-uri pairs and namespace fixup for writer }
  143. TBinding = class
  144. public
  145. uri: XMLString;
  146. next: TBinding;
  147. prevPrefixBinding: TObject;
  148. Prefix: PHashItem;
  149. end;
  150. TAttributeAction = (
  151. aaUnchanged,
  152. aaPrefix, // only override the prefix
  153. aaBoth // override prefix and emit namespace definition
  154. );
  155. TNSSupport = class(TObject)
  156. private
  157. FNesting: Integer;
  158. FPrefixSeqNo: Integer;
  159. FFreeBindings: TBinding;
  160. FBindings: TFPList;
  161. FBindingStack: array of TBinding;
  162. FPrefixes: THashTable;
  163. FDefaultPrefix: THashItem;
  164. public
  165. constructor Create;
  166. destructor Destroy; override;
  167. procedure DefineBinding(const Prefix, nsURI: XMLString; out Binding: TBinding);
  168. function CheckAttribute(const Prefix, nsURI: XMLString;
  169. out Binding: TBinding): TAttributeAction;
  170. function IsPrefixBound(P: PWideChar; Len: Integer; out Prefix: PHashItem): Boolean;
  171. function GetPrefix(P: PWideChar; Len: Integer): PHashItem;
  172. function BindPrefix(const nsURI: XMLString; aPrefix: PHashItem): TBinding;
  173. function DefaultNSBinding: TBinding;
  174. procedure StartElement;
  175. procedure EndElement;
  176. end;
  177. { Buffer builder, used to compose long strings without too much memory allocations }
  178. PWideCharBuf = ^TWideCharBuf;
  179. TWideCharBuf = record
  180. Buffer: PWideChar;
  181. Length: Integer;
  182. MaxLength: Integer;
  183. end;
  184. procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
  185. procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
  186. procedure BufAppendChunk(var ABuf: TWideCharBuf; pstart, pend: PWideChar);
  187. function BufEquals(const ABuf: TWideCharBuf; const Arg: XMLString): Boolean;
  188. procedure BufNormalize(var Buf: TWideCharBuf; out Modified: Boolean);
  189. { Built-in decoder functions for UTF-8, UTF-16 and ISO-8859-1 }
  190. function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  191. function Decode_UCS2_Swapped(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  192. function Decode_UTF8(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  193. function Decode_8859_1(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  194. {$i names.inc}
  195. implementation
  196. var
  197. Xml11Pg: PByteArray = nil;
  198. function Xml11NamePages: PByteArray;
  199. var
  200. I: Integer;
  201. p: PByteArray;
  202. begin
  203. if Xml11Pg = nil then
  204. begin
  205. GetMem(p, 512);
  206. for I := 0 to 255 do
  207. p^[I] := ord(Byte(I) in Xml11HighPages);
  208. p^[0] := 2;
  209. p^[3] := $2c;
  210. p^[$20] := $2a;
  211. p^[$21] := $2b;
  212. p^[$2f] := $29;
  213. p^[$30] := $2d;
  214. p^[$fd] := $28;
  215. p^[$ff] := $30;
  216. Move(p^, p^[256], 256);
  217. p^[$100] := $19;
  218. p^[$103] := $2E;
  219. p^[$120] := $2F;
  220. Xml11Pg := p;
  221. end;
  222. Result := Xml11Pg;
  223. end;
  224. function IsXml11Char(Value: PWideChar; var Index: Integer): Boolean; overload;
  225. begin
  226. if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
  227. begin
  228. Inc(Index);
  229. Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
  230. end
  231. else
  232. Result := False;
  233. end;
  234. function IsXml11Char(const Value: XMLString; var Index: Integer): Boolean; overload;
  235. begin
  236. if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
  237. begin
  238. Inc(Index);
  239. Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
  240. end
  241. else
  242. Result := False;
  243. end;
  244. function IsXmlName(const Value: XMLString; Xml11: Boolean): Boolean;
  245. begin
  246. Result := IsXmlName(PWideChar(Value), Length(Value), Xml11);
  247. end;
  248. function IsXmlName(Value: PWideChar; Len: Integer; Xml11: Boolean = False): Boolean;
  249. var
  250. Pages: PByteArray;
  251. I: Integer;
  252. begin
  253. Result := False;
  254. if Xml11 then
  255. Pages := Xml11NamePages
  256. else
  257. Pages := @NamePages;
  258. I := 0;
  259. if (Len = 0) or not ((Byte(Value[I]) in NamingBitmap[Pages^[hi(Word(Value[I]))]]) or
  260. (Value[I] = ':') or
  261. (Xml11 and IsXml11Char(Value, I))) then
  262. Exit;
  263. Inc(I);
  264. while I < Len do
  265. begin
  266. if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
  267. (Value[I] = ':') or
  268. (Xml11 and IsXml11Char(Value, I))) then
  269. Exit;
  270. Inc(I);
  271. end;
  272. Result := True;
  273. end;
  274. function IsXmlNames(const Value: XMLString; Xml11: Boolean): Boolean;
  275. var
  276. Pages: PByteArray;
  277. I: Integer;
  278. Offset: Integer;
  279. begin
  280. if Xml11 then
  281. Pages := Xml11NamePages
  282. else
  283. Pages := @NamePages;
  284. Result := False;
  285. if Value = '' then
  286. Exit;
  287. I := 1;
  288. Offset := 0;
  289. while I <= Length(Value) do
  290. begin
  291. if not ((Byte(Value[I]) in NamingBitmap[Pages^[Offset+hi(Word(Value[I]))]]) or
  292. (Value[I] = ':') or
  293. (Xml11 and IsXml11Char(Value, I))) then
  294. begin
  295. if (I = Length(Value)) or (Value[I] <> #32) then
  296. Exit;
  297. Offset := 0;
  298. Inc(I);
  299. Continue;
  300. end;
  301. Offset := $100;
  302. Inc(I);
  303. end;
  304. Result := True;
  305. end;
  306. function IsXmlNmToken(const Value: XMLString; Xml11: Boolean): Boolean;
  307. var
  308. I: Integer;
  309. Pages: PByteArray;
  310. begin
  311. if Xml11 then
  312. Pages := Xml11NamePages
  313. else
  314. Pages := @NamePages;
  315. Result := False;
  316. if Value = '' then
  317. Exit;
  318. I := 1;
  319. while I <= Length(Value) do
  320. begin
  321. if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
  322. (Value[I] = ':') or
  323. (Xml11 and IsXml11Char(Value, I))) then
  324. Exit;
  325. Inc(I);
  326. end;
  327. Result := True;
  328. end;
  329. function IsXmlNmTokens(const Value: XMLString; Xml11: Boolean): Boolean;
  330. var
  331. I: Integer;
  332. Pages: PByteArray;
  333. begin
  334. if Xml11 then
  335. Pages := Xml11NamePages
  336. else
  337. Pages := @NamePages;
  338. I := 1;
  339. Result := False;
  340. if Value = '' then
  341. Exit;
  342. while I <= Length(Value) do
  343. begin
  344. if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
  345. (Value[I] = ':') or
  346. (Xml11 and IsXml11Char(Value, I))) then
  347. begin
  348. if (I = Length(Value)) or (Value[I] <> #32) then
  349. Exit;
  350. end;
  351. Inc(I);
  352. end;
  353. Result := True;
  354. end;
  355. function IsValidXmlEncoding(const Value: XMLString): Boolean;
  356. var
  357. I: Integer;
  358. begin
  359. Result := False;
  360. if (Value = '') or (Value[1] > #255) or not (char(ord(Value[1])) in ['A'..'Z', 'a'..'z']) then
  361. Exit;
  362. for I := 2 to Length(Value) do
  363. if (Value[I] > #255) or not (char(ord(Value[I])) in ['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']) then
  364. Exit;
  365. Result := True;
  366. end;
  367. procedure NormalizeSpaces(var Value: XMLString);
  368. var
  369. I, J: Integer;
  370. begin
  371. I := Length(Value);
  372. // speed: trim only whed needed
  373. if (I > 0) and ((Value[1] = #32) or (Value[I] = #32)) then
  374. Value := Trim(Value);
  375. I := 1;
  376. while I < Length(Value) do
  377. begin
  378. if Value[I] = #32 then
  379. begin
  380. J := I+1;
  381. while (J <= Length(Value)) and (Value[J] = #32) do Inc(J);
  382. if J-I > 1 then Delete(Value, I+1, J-I-1);
  383. end;
  384. Inc(I);
  385. end;
  386. end;
  387. function IsXmlWhiteSpace(c: WideChar): Boolean;
  388. begin
  389. Result := (c = #32) or (c = #9) or (c = #10) or (c = #13);
  390. end;
  391. function WStrLIComp(S1, S2: PWideChar; Len: Integer): Integer;
  392. var
  393. counter: Integer;
  394. c1, c2: Word;
  395. begin
  396. counter := 0;
  397. result := 0;
  398. if Len = 0 then
  399. exit;
  400. repeat
  401. c1 := ord(S1[counter]);
  402. c2 := ord(S2[counter]);
  403. if (c1 = 0) or (c2 = 0) then break;
  404. if c1 <> c2 then
  405. begin
  406. if c1 in [97..122] then
  407. Dec(c1, 32);
  408. if c2 in [97..122] then
  409. Dec(c2, 32);
  410. if c1 <> c2 then
  411. Break;
  412. end;
  413. Inc(counter);
  414. until counter >= Len;
  415. result := c1 - c2;
  416. end;
  417. procedure WStrLower(var S: XMLString);
  418. var
  419. i: Integer;
  420. begin
  421. for i := 1 to Length(S) do
  422. if (S[i] >= 'A') and (S[i] <= 'Z') then
  423. Inc(word(S[i]), 32);
  424. end;
  425. function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
  426. begin
  427. Result := InitValue;
  428. while KeyLen <> 0 do
  429. begin
  430. {$push}{$r-}{$q-}
  431. Result := Result * $F4243 xor ord(Key^);
  432. {$pop}
  433. Inc(Key);
  434. Dec(KeyLen);
  435. end;
  436. end;
  437. function KeyCompare(const Key1: XMLString; Key2: Pointer; Key2Len: Integer): Boolean;
  438. begin
  439. {$IFDEF FPC}
  440. Result := (Length(Key1)=Key2Len) and (CompareWord(Pointer(Key1)^, Key2^, Key2Len) = 0);
  441. {$ELSE}
  442. Result := (Length(Key1)=Key2Len) and CompareMem(Pointer(Key1), Key2, Key2Len*2);
  443. {$ENDIF}
  444. end;
  445. { THashTable }
  446. constructor THashTable.Create(InitSize: Integer; OwnObjects: Boolean);
  447. var
  448. I: Integer;
  449. begin
  450. inherited Create;
  451. FOwnsObjects := OwnObjects;
  452. I := 256;
  453. while I < InitSize do I := I shl 1;
  454. FBucketCount := I;
  455. FBucket := AllocMem(I * sizeof(PHashItem));
  456. end;
  457. destructor THashTable.Destroy;
  458. begin
  459. Clear;
  460. FreeMem(FBucket);
  461. inherited Destroy;
  462. end;
  463. procedure THashTable.Clear;
  464. var
  465. I: Integer;
  466. item, next: PHashItem;
  467. begin
  468. for I := 0 to FBucketCount-1 do
  469. begin
  470. item := FBucket^[I];
  471. while Assigned(item) do
  472. begin
  473. next := item^.Next;
  474. if FOwnsObjects then
  475. item^.Data.Free;
  476. Dispose(item);
  477. item := next;
  478. end;
  479. FBucket^[I] := nil;
  480. end;
  481. end;
  482. function THashTable.Find(Key: PWideChar; KeyLen: Integer): PHashItem;
  483. var
  484. Dummy: Boolean;
  485. begin
  486. Result := Lookup(Key, KeyLen, Dummy, False);
  487. end;
  488. function THashTable.FindOrAdd(Key: PWideChar; KeyLen: Integer;
  489. var Found: Boolean): PHashItem;
  490. begin
  491. Result := Lookup(Key, KeyLen, Found, True);
  492. end;
  493. function THashTable.FindOrAdd(Key: PWideChar; KeyLen: Integer): PHashItem;
  494. var
  495. Dummy: Boolean;
  496. begin
  497. Result := Lookup(Key, KeyLen, Dummy, True);
  498. end;
  499. function THashTable.FindOrAdd(const Key: XMLString): PHashItem;
  500. var
  501. Dummy: Boolean;
  502. begin
  503. Result := Lookup(PWideChar(Key), Length(Key), Dummy, True);
  504. end;
  505. function THashTable.Get(Key: PWideChar; KeyLen: Integer): TObject;
  506. var
  507. e: PHashItem;
  508. Dummy: Boolean;
  509. begin
  510. e := Lookup(Key, KeyLen, Dummy, False);
  511. if Assigned(e) then
  512. Result := e^.Data
  513. else
  514. Result := nil;
  515. end;
  516. function THashTable.Lookup(Key: PWideChar; KeyLength: Integer;
  517. out Found: Boolean; CanCreate: Boolean): PHashItem;
  518. var
  519. Entry: PPHashItem;
  520. h: LongWord;
  521. begin
  522. h := Hash(0, Key, KeyLength);
  523. Entry := @FBucket^[h mod FBucketCount];
  524. while Assigned(Entry^) and not ((Entry^^.HashValue = h) and KeyCompare(Entry^^.Key, Key, KeyLength) ) do
  525. Entry := @Entry^^.Next;
  526. Found := Assigned(Entry^);
  527. if Found or (not CanCreate) then
  528. begin
  529. Result := Entry^;
  530. Exit;
  531. end;
  532. if FCount > FBucketCount then { arbitrary limit, probably too high }
  533. begin
  534. Resize(FBucketCount * 2);
  535. Result := Lookup(Key, KeyLength, Found, CanCreate);
  536. end
  537. else
  538. begin
  539. New(Result);
  540. // SetString for WideStrings trims on zero chars [fixed, #14740]
  541. SetLength(Result^.Key, KeyLength);
  542. Move(Key^, Pointer(Result^.Key)^, KeyLength*sizeof(WideChar));
  543. Result^.HashValue := h;
  544. Result^.Data := nil;
  545. Result^.Next := nil;
  546. Inc(FCount);
  547. Entry^ := Result;
  548. end;
  549. end;
  550. procedure THashTable.Resize(NewCapacity: LongWord);
  551. var
  552. p: PHashItemArray;
  553. chain: PPHashItem;
  554. i: Integer;
  555. e, n: PHashItem;
  556. begin
  557. p := AllocMem(NewCapacity * sizeof(PHashItem));
  558. for i := 0 to FBucketCount-1 do
  559. begin
  560. e := FBucket^[i];
  561. while Assigned(e) do
  562. begin
  563. chain := @p^[e^.HashValue mod NewCapacity];
  564. n := e^.Next;
  565. e^.Next := chain^;
  566. chain^ := e;
  567. e := n;
  568. end;
  569. end;
  570. FBucketCount := NewCapacity;
  571. FreeMem(FBucket);
  572. FBucket := p;
  573. end;
  574. function THashTable.Remove(Entry: PHashItem): Boolean;
  575. var
  576. chain: PPHashItem;
  577. begin
  578. chain := @FBucket^[Entry^.HashValue mod FBucketCount];
  579. while Assigned(chain^) do
  580. begin
  581. if chain^ = Entry then
  582. begin
  583. chain^ := Entry^.Next;
  584. if FOwnsObjects then
  585. Entry^.Data.Free;
  586. Dispose(Entry);
  587. Dec(FCount);
  588. Result := True;
  589. Exit;
  590. end;
  591. chain := @chain^^.Next;
  592. end;
  593. Result := False;
  594. end;
  595. // this does not free the aData object
  596. function THashTable.RemoveData(aData: TObject): Boolean;
  597. var
  598. i: Integer;
  599. chain: PPHashItem;
  600. e: PHashItem;
  601. begin
  602. for i := 0 to FBucketCount-1 do
  603. begin
  604. chain := @FBucket^[i];
  605. while Assigned(chain^) do
  606. begin
  607. if chain^^.Data = aData then
  608. begin
  609. e := chain^;
  610. chain^ := e^.Next;
  611. Dispose(e);
  612. Dec(FCount);
  613. Result := True;
  614. Exit;
  615. end;
  616. chain := @chain^^.Next;
  617. end;
  618. end;
  619. Result := False;
  620. end;
  621. procedure THashTable.ForEach(proc: THashForEach; arg: Pointer);
  622. var
  623. i: Integer;
  624. e: PHashItem;
  625. begin
  626. for i := 0 to FBucketCount-1 do
  627. begin
  628. e := FBucket^[i];
  629. while Assigned(e) do
  630. begin
  631. if not proc(e, arg) then
  632. Exit;
  633. e := e^.Next;
  634. end;
  635. end;
  636. end;
  637. { TDblHashArray }
  638. destructor TDblHashArray.Destroy;
  639. begin
  640. FreeMem(FData);
  641. inherited Destroy;
  642. end;
  643. procedure TDblHashArray.Init(NumSlots: Integer);
  644. var
  645. i: Integer;
  646. begin
  647. if ((NumSlots * 2) shr FSizeLog) <> 0 then // need at least twice more entries, and no less than 8
  648. begin
  649. FSizeLog := 3;
  650. while (NumSlots shr FSizeLog) <> 0 do
  651. Inc(FSizeLog);
  652. ReallocMem(FData, (1 shl FSizeLog) * sizeof(TExpHashEntry));
  653. FRevision := 0;
  654. end;
  655. if FRevision = 0 then
  656. begin
  657. FRevision := $FFFFFFFF;
  658. for i := (1 shl FSizeLog)-1 downto 0 do
  659. FData^[i].rev := FRevision;
  660. end;
  661. Dec(FRevision);
  662. end;
  663. function TDblHashArray.Locate(uri: PWideString; localName: PWideChar; localLength: Integer): Boolean;
  664. var
  665. step: Byte;
  666. mask: LongWord;
  667. idx: Integer;
  668. HashValue: LongWord;
  669. begin
  670. HashValue := Hash(0, PWideChar(uri^), Length(uri^));
  671. HashValue := Hash(HashValue, localName, localLength);
  672. mask := (1 shl FSizeLog) - 1;
  673. step := (HashValue and (not mask)) shr (FSizeLog-1) and (mask shr 2) or 1;
  674. idx := HashValue and mask;
  675. result := True;
  676. while FData^[idx].rev = FRevision do
  677. begin
  678. if (HashValue = FData^[idx].hash) and (FData^[idx].uriPtr^ = uri^) and
  679. (FData^[idx].lnameLen = localLength) and
  680. CompareMem(FData^[idx].lname, localName, localLength * sizeof(WideChar)) then
  681. Exit;
  682. if idx < step then
  683. Inc(idx, (1 shl FSizeLog) - step)
  684. else
  685. Dec(idx, step);
  686. end;
  687. with FData^[idx] do
  688. begin
  689. rev := FRevision;
  690. hash := HashValue;
  691. uriPtr := uri;
  692. lname := localName;
  693. lnameLen := localLength;
  694. end;
  695. result := False;
  696. end;
  697. { TNSSupport }
  698. constructor TNSSupport.Create;
  699. var
  700. b: TBinding;
  701. begin
  702. inherited Create;
  703. FPrefixes := THashTable.Create(16, False);
  704. FBindings := TFPList.Create;
  705. SetLength(FBindingStack, 16);
  706. { provide implicit binding for the 'xml' prefix }
  707. // TODO: move stduri_xml, etc. to this unit, so they are reused.
  708. DefineBinding('xml', 'http://www.w3.org/XML/1998/namespace', b);
  709. end;
  710. destructor TNSSupport.Destroy;
  711. var
  712. I: Integer;
  713. begin
  714. for I := FBindings.Count-1 downto 0 do
  715. TObject(FBindings.List^[I]).Free;
  716. FBindings.Free;
  717. FPrefixes.Free;
  718. inherited Destroy;
  719. end;
  720. function TNSSupport.BindPrefix(const nsURI: XMLString; aPrefix: PHashItem): TBinding;
  721. begin
  722. { try to reuse an existing binding }
  723. result := FFreeBindings;
  724. if Assigned(result) then
  725. FFreeBindings := result.Next
  726. else { no free bindings, create a new one }
  727. begin
  728. result := TBinding.Create;
  729. FBindings.Add(result);
  730. end;
  731. { link it into chain of bindings at the current element level }
  732. result.Next := FBindingStack[FNesting];
  733. FBindingStack[FNesting] := result;
  734. { bind }
  735. result.uri := nsURI;
  736. result.Prefix := aPrefix;
  737. result.PrevPrefixBinding := aPrefix^.Data;
  738. aPrefix^.Data := result;
  739. end;
  740. function TNSSupport.DefaultNSBinding: TBinding;
  741. begin
  742. result := TBinding(FDefaultPrefix.Data);
  743. end;
  744. procedure TNSSupport.DefineBinding(const Prefix, nsURI: XMLString;
  745. out Binding: TBinding);
  746. var
  747. Pfx: PHashItem;
  748. begin
  749. Pfx := @FDefaultPrefix;
  750. if (nsURI <> '') and (Prefix <> '') then
  751. Pfx := FPrefixes.FindOrAdd(PWideChar(Prefix), Length(Prefix));
  752. if (Pfx^.Data = nil) or (TBinding(Pfx^.Data).uri <> nsURI) then
  753. Binding := BindPrefix(nsURI, Pfx)
  754. else
  755. Binding := nil;
  756. end;
  757. function TNSSupport.CheckAttribute(const Prefix, nsURI: XMLString;
  758. out Binding: TBinding): TAttributeAction;
  759. var
  760. Pfx: PHashItem;
  761. I: Integer;
  762. b: TBinding;
  763. buf: array[0..31] of WideChar;
  764. p: PWideChar;
  765. begin
  766. Binding := nil;
  767. Pfx := nil;
  768. Result := aaUnchanged;
  769. if Prefix <> '' then
  770. Pfx := FPrefixes.FindOrAdd(PWideChar(Prefix), Length(Prefix))
  771. else if nsURI = '' then
  772. Exit;
  773. { if the prefix is already bound to correct URI, we're done }
  774. if Assigned(Pfx) and Assigned(Pfx^.Data) and (TBinding(Pfx^.Data).uri = nsURI) then
  775. Exit;
  776. { see if there's another prefix bound to the target URI }
  777. // TODO: should use something faster than linear search
  778. for i := FNesting downto 0 do
  779. begin
  780. b := FBindingStack[i];
  781. while Assigned(b) do
  782. begin
  783. if (b.uri = nsURI) and (b.Prefix <> @FDefaultPrefix) then
  784. begin
  785. Binding := b; // found one -> override the attribute's prefix
  786. Result := aaPrefix;
  787. Exit;
  788. end;
  789. b := b.Next;
  790. end;
  791. end;
  792. { no prefix, or bound (to wrong URI) -> use generated prefix instead }
  793. if (Pfx = nil) or Assigned(Pfx^.Data) then
  794. repeat
  795. Inc(FPrefixSeqNo);
  796. i := FPrefixSeqNo; // This is just 'NS'+IntToStr(FPrefixSeqNo);
  797. p := @Buf[high(Buf)]; // done without using strings
  798. while i <> 0 do
  799. begin
  800. p^ := WideChar(i mod 10+ord('0'));
  801. dec(p);
  802. i := i div 10;
  803. end;
  804. p^ := 'S'; dec(p);
  805. p^ := 'N';
  806. Pfx := FPrefixes.FindOrAdd(p, @Buf[high(Buf)]-p+1);
  807. until Pfx^.Data = nil;
  808. Binding := BindPrefix(nsURI, Pfx);
  809. Result := aaBoth;
  810. end;
  811. function TNSSupport.IsPrefixBound(P: PWideChar; Len: Integer; out
  812. Prefix: PHashItem): Boolean;
  813. begin
  814. Prefix := FPrefixes.FindOrAdd(P, Len);
  815. Result := Assigned(Prefix^.Data) and (TBinding(Prefix^.Data).uri <> '');
  816. end;
  817. function TNSSupport.GetPrefix(P: PWideChar; Len: Integer): PHashItem;
  818. begin
  819. if Assigned(P) and (Len > 0) then
  820. Result := FPrefixes.FindOrAdd(P, Len)
  821. else
  822. Result := @FDefaultPrefix;
  823. end;
  824. procedure TNSSupport.StartElement;
  825. begin
  826. Inc(FNesting);
  827. if FNesting >= Length(FBindingStack) then
  828. SetLength(FBindingStack, FNesting * 2);
  829. end;
  830. procedure TNSSupport.EndElement;
  831. var
  832. b, temp: TBinding;
  833. begin
  834. temp := FBindingStack[FNesting];
  835. while Assigned(temp) do
  836. begin
  837. b := temp;
  838. temp := b.next;
  839. b.next := FFreeBindings;
  840. FFreeBindings := b;
  841. b.Prefix^.Data := b.prevPrefixBinding;
  842. end;
  843. FBindingStack[FNesting] := nil;
  844. if FNesting > 0 then
  845. Dec(FNesting);
  846. end;
  847. { Buffer builder utils }
  848. procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
  849. begin
  850. ABuffer.MaxLength := ALength;
  851. ABuffer.Length := 0;
  852. ABuffer.Buffer := AllocMem(ABuffer.MaxLength*SizeOf(WideChar));
  853. end;
  854. procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
  855. begin
  856. if ABuffer.Length >= ABuffer.MaxLength then
  857. begin
  858. ReallocMem(ABuffer.Buffer, ABuffer.MaxLength * 2 * SizeOf(WideChar));
  859. FillChar(ABuffer.Buffer[ABuffer.MaxLength], ABuffer.MaxLength * SizeOf(WideChar),0);
  860. ABuffer.MaxLength := ABuffer.MaxLength * 2;
  861. end;
  862. ABuffer.Buffer[ABuffer.Length] := wc;
  863. Inc(ABuffer.Length);
  864. end;
  865. procedure BufAppendChunk(var ABuf: TWideCharBuf; pstart, pend: PWideChar);
  866. var
  867. Len: Integer;
  868. begin
  869. Len := PEnd - PStart;
  870. if Len <= 0 then
  871. Exit;
  872. if Len >= ABuf.MaxLength - ABuf.Length then
  873. begin
  874. ABuf.MaxLength := (Len + ABuf.Length)*2;
  875. // note: memory clean isn't necessary here.
  876. // To avoid garbage, control Length field.
  877. ReallocMem(ABuf.Buffer, ABuf.MaxLength * sizeof(WideChar));
  878. end;
  879. Move(pstart^, ABuf.Buffer[ABuf.Length], Len * sizeof(WideChar));
  880. Inc(ABuf.Length, Len);
  881. end;
  882. function BufEquals(const ABuf: TWideCharBuf; const Arg: XMLString): Boolean;
  883. begin
  884. Result := (ABuf.Length = Length(Arg)) and
  885. CompareMem(ABuf.Buffer, Pointer(Arg), ABuf.Length*sizeof(WideChar));
  886. end;
  887. procedure BufNormalize(var Buf: TWideCharBuf; out Modified: Boolean);
  888. var
  889. Dst, Src: Integer;
  890. begin
  891. Dst := 0;
  892. Src := 0;
  893. // skip leading space if any
  894. while (Src < Buf.Length) and (Buf.Buffer[Src] = ' ') do
  895. Inc(Src);
  896. while Src < Buf.Length do
  897. begin
  898. if Buf.Buffer[Src] = ' ' then
  899. begin
  900. // Dst cannot be 0 here, because leading space is already skipped
  901. if Buf.Buffer[Dst-1] <> ' ' then
  902. begin
  903. Buf.Buffer[Dst] := ' ';
  904. Inc(Dst);
  905. end;
  906. end
  907. else
  908. begin
  909. Buf.Buffer[Dst] := Buf.Buffer[Src];
  910. Inc(Dst);
  911. end;
  912. Inc(Src);
  913. end;
  914. // trailing space (only one possible due to compression)
  915. if (Dst > 0) and (Buf.Buffer[Dst-1] = ' ') then
  916. Dec(Dst);
  917. Modified := Dst <> Buf.Length;
  918. Buf.Length := Dst;
  919. end;
  920. { standard decoders }
  921. function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  922. var
  923. cnt: Cardinal;
  924. begin
  925. cnt := OutCnt; // num of widechars
  926. if cnt > InCnt div sizeof(WideChar) then
  927. cnt := InCnt div sizeof(WideChar);
  928. Move(InBuf^, OutBuf^, cnt * sizeof(WideChar));
  929. Dec(InCnt, cnt*sizeof(WideChar));
  930. Dec(OutCnt, cnt);
  931. Result := cnt;
  932. end;
  933. function Decode_UCS2_Swapped(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  934. var
  935. I: Integer;
  936. cnt: Cardinal;
  937. InPtr: PChar;
  938. begin
  939. cnt := OutCnt; // num of widechars
  940. if cnt > InCnt div sizeof(WideChar) then
  941. cnt := InCnt div sizeof(WideChar);
  942. InPtr := InBuf;
  943. for I := 0 to cnt-1 do
  944. begin
  945. OutBuf[I] := WideChar((ord(InPtr^) shl 8) or ord(InPtr[1]));
  946. Inc(InPtr, 2);
  947. end;
  948. Dec(InCnt, cnt*sizeof(WideChar));
  949. Dec(OutCnt, cnt);
  950. Result := cnt;
  951. end;
  952. function Decode_8859_1(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  953. var
  954. I: Integer;
  955. cnt: Cardinal;
  956. begin
  957. cnt := OutCnt; // num of widechars
  958. if cnt > InCnt then
  959. cnt := InCnt;
  960. for I := 0 to cnt-1 do
  961. OutBuf[I] := WideChar(ord(InBuf[I]));
  962. Dec(InCnt, cnt);
  963. Dec(OutCnt, cnt);
  964. Result := cnt;
  965. end;
  966. function Decode_UTF8(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  967. const
  968. MaxCode: array[1..4] of Cardinal = ($7F, $7FF, $FFFF, $1FFFFF);
  969. var
  970. i, j, bc: Cardinal;
  971. Value: Cardinal;
  972. begin
  973. result := 0;
  974. i := OutCnt;
  975. while (i > 0) and (InCnt > 0) do
  976. begin
  977. bc := 1;
  978. Value := ord(InBuf^);
  979. if Value < $80 then
  980. OutBuf^ := WideChar(Value)
  981. else
  982. begin
  983. if Value < $C2 then
  984. begin
  985. Result := -1;
  986. Break;
  987. end;
  988. Inc(bc);
  989. if Value > $DF then
  990. begin
  991. Inc(bc);
  992. if Value > $EF then
  993. begin
  994. Inc(bc);
  995. if Value > $F7 then // never encountered in the tests.
  996. begin
  997. Result := -1;
  998. Break;
  999. end;
  1000. end;
  1001. end;
  1002. if InCnt < bc then
  1003. Break;
  1004. j := 1;
  1005. while j < bc do
  1006. begin
  1007. if InBuf[j] in [#$80..#$BF] then
  1008. Value := (Value shl 6) or (Cardinal(InBuf[j]) and $3F)
  1009. else
  1010. begin
  1011. Result := -1;
  1012. Break;
  1013. end;
  1014. Inc(j);
  1015. end;
  1016. Value := Value and MaxCode[bc];
  1017. // RFC2279 check
  1018. if Value <= MaxCode[bc-1] then
  1019. begin
  1020. Result := -1;
  1021. Break;
  1022. end;
  1023. case Value of
  1024. 0..$D7FF, $E000..$FFFF: OutBuf^ := WideChar(Value);
  1025. $10000..$10FFFF:
  1026. begin
  1027. if i < 2 then Break;
  1028. OutBuf^ := WideChar($D7C0 + (Value shr 10));
  1029. OutBuf[1] := WideChar($DC00 xor (Value and $3FF));
  1030. Inc(OutBuf); // once here
  1031. Dec(i);
  1032. end
  1033. else
  1034. begin
  1035. Result := -1;
  1036. Break;
  1037. end;
  1038. end;
  1039. end;
  1040. Inc(OutBuf);
  1041. Inc(InBuf, bc);
  1042. Dec(InCnt, bc);
  1043. Dec(i);
  1044. end;
  1045. if Result >= 0 then
  1046. Result := OutCnt-i;
  1047. OutCnt := i;
  1048. end;
  1049. initialization
  1050. finalization
  1051. if Assigned(Xml11Pg) then
  1052. FreeMem(Xml11Pg);
  1053. end.