xmlutils.pp 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155
  1. {
  2. This file is part of the Free Component Library
  3. XML utility routines.
  4. Copyright (c) 2006 by Sergei Gorelkin, [email protected]
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. unit xmlutils;
  12. {$ifdef fpc}{$mode objfpc}{$endif}
  13. {$H+}
  14. {$ifopt Q+}{$define overflow_check}{$endif}
  15. interface
  16. uses
  17. SysUtils, Classes;
  18. function IsXmlName(const Value: WideString; Xml11: Boolean = False): Boolean; overload;
  19. function IsXmlName(Value: PWideChar; Len: Integer; Xml11: Boolean = False): Boolean; overload;
  20. function IsXmlNames(const Value: WideString; Xml11: Boolean = False): Boolean;
  21. function IsXmlNmToken(const Value: WideString; Xml11: Boolean = False): Boolean;
  22. function IsXmlNmTokens(const Value: WideString; Xml11: Boolean = False): Boolean;
  23. function IsValidXmlEncoding(const Value: WideString): Boolean;
  24. function Xml11NamePages: PByteArray;
  25. procedure NormalizeSpaces(var Value: WideString);
  26. function IsXmlWhiteSpace(c: WideChar): Boolean;
  27. function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
  28. { beware, works in ASCII range only }
  29. function WStrLIComp(S1, S2: PWideChar; Len: Integer): Integer;
  30. procedure WStrLower(var S: WideString);
  31. type
  32. TXMLVersion = (xmlVersionUnknown, xmlVersion10, xmlVersion11);
  33. const
  34. xmlVersionStr: array[TXMLVersion] of WideString = ('', '1.0', '1.1');
  35. type
  36. TXMLNodeType = (ntNone, ntElement, ntAttribute, ntText,
  37. ntCDATA, ntEntityReference, ntEntity, ntProcessingInstruction,
  38. ntComment, ntDocument, ntDocumentType, ntDocumentFragment,
  39. ntNotation,
  40. ntWhitespace,
  41. ntSignificantWhitespace,
  42. ntEndElement,
  43. ntEndEntity,
  44. ntXmlDeclaration
  45. );
  46. TAttrDataType = (
  47. dtCdata,
  48. dtId,
  49. dtIdRef,
  50. dtIdRefs,
  51. dtEntity,
  52. dtEntities,
  53. dtNmToken,
  54. dtNmTokens,
  55. dtNotation
  56. );
  57. { a simple hash table with WideString keys }
  58. type
  59. {$ifndef fpc}
  60. PtrInt = LongInt;
  61. TFPList = TList;
  62. {$endif}
  63. PPHashItem = ^PHashItem;
  64. PHashItem = ^THashItem;
  65. THashItem = record
  66. Key: WideString;
  67. HashValue: LongWord;
  68. Next: PHashItem;
  69. Data: TObject;
  70. end;
  71. THashItemArray = array[0..MaxInt div sizeof(Pointer)-1] of PHashItem;
  72. PHashItemArray = ^THashItemArray;
  73. THashForEach = function(Entry: PHashItem; arg: Pointer): Boolean;
  74. THashTable = class(TObject)
  75. private
  76. FCount: LongWord;
  77. FBucketCount: LongWord;
  78. FBucket: PHashItemArray;
  79. FOwnsObjects: Boolean;
  80. function Lookup(Key: PWideChar; KeyLength: Integer; out Found: Boolean; CanCreate: Boolean): PHashItem;
  81. procedure Resize(NewCapacity: LongWord);
  82. public
  83. constructor Create(InitSize: Integer; OwnObjects: Boolean);
  84. destructor Destroy; override;
  85. procedure Clear;
  86. function Find(Key: PWideChar; KeyLen: Integer): PHashItem;
  87. function FindOrAdd(Key: PWideChar; KeyLen: Integer; var Found: Boolean): PHashItem; overload;
  88. function FindOrAdd(Key: PWideChar; KeyLen: Integer): PHashItem; overload;
  89. function FindOrAdd(const Key: WideString): PHashItem; overload;
  90. function Get(Key: PWideChar; KeyLen: Integer): TObject;
  91. function Remove(Entry: PHashItem): Boolean;
  92. function RemoveData(aData: TObject): Boolean;
  93. procedure ForEach(proc: THashForEach; arg: Pointer);
  94. property Count: LongWord read FCount;
  95. end;
  96. { another hash, for detecting duplicate namespaced attributes without memory allocations }
  97. TExpHashEntry = record
  98. rev: LongWord;
  99. hash: LongWord;
  100. uriPtr: PWideString;
  101. lname: PWideChar;
  102. lnameLen: Integer;
  103. end;
  104. TExpHashEntryArray = array[0..MaxInt div sizeof(TExpHashEntry)-1] of TExpHashEntry;
  105. PExpHashEntryArray = ^TExpHashEntryArray;
  106. TDblHashArray = class(TObject)
  107. private
  108. FSizeLog: Integer;
  109. FRevision: LongWord;
  110. FData: PExpHashEntryArray;
  111. public
  112. procedure Init(NumSlots: Integer);
  113. function Locate(uri: PWideString; localName: PWideChar; localLength: Integer): Boolean;
  114. destructor Destroy; override;
  115. end;
  116. { Source location. This may be augmented with ByteOffset, UTF8Offset, etc. }
  117. TLocation = record
  118. Line: Integer;
  119. LinePos: Integer;
  120. end;
  121. { generic node info record, shared between DOM and reader }
  122. PNodeData = ^TNodeData;
  123. TNodeData = record
  124. FNext: PNodeData;
  125. FQName: PHashItem;
  126. FPrefix: PHashItem;
  127. FNsUri: PHashItem;
  128. FColonPos: Integer;
  129. FTypeInfo: TObject;
  130. FLoc: TLocation;
  131. FLoc2: TLocation; // for attributes: start of value
  132. FIDEntry: PHashItem; // ID attributes: entry in ID map
  133. FNodeType: TXMLNodeType;
  134. FValueStr: WideString;
  135. FValueStart: PWideChar;
  136. FValueLength: Integer;
  137. FIsDefault: Boolean;
  138. FDenormalized: Boolean; // Whether attribute value changes by normalization
  139. end;
  140. { TNSSupport provides tracking of prefix-uri pairs and namespace fixup for writer }
  141. TBinding = class
  142. public
  143. uri: WideString;
  144. next: TBinding;
  145. prevPrefixBinding: TObject;
  146. Prefix: PHashItem;
  147. end;
  148. TAttributeAction = (
  149. aaUnchanged,
  150. aaPrefix, // only override the prefix
  151. aaBoth // override prefix and emit namespace definition
  152. );
  153. TNSSupport = class(TObject)
  154. private
  155. FNesting: Integer;
  156. FPrefixSeqNo: Integer;
  157. FFreeBindings: TBinding;
  158. FBindings: TFPList;
  159. FBindingStack: array of TBinding;
  160. FPrefixes: THashTable;
  161. FDefaultPrefix: THashItem;
  162. public
  163. constructor Create;
  164. destructor Destroy; override;
  165. procedure DefineBinding(const Prefix, nsURI: WideString; out Binding: TBinding);
  166. function CheckAttribute(const Prefix, nsURI: WideString;
  167. out Binding: TBinding): TAttributeAction;
  168. function IsPrefixBound(P: PWideChar; Len: Integer; out Prefix: PHashItem): Boolean;
  169. function GetPrefix(P: PWideChar; Len: Integer): PHashItem;
  170. function BindPrefix(const nsURI: WideString; aPrefix: PHashItem): TBinding;
  171. function DefaultNSBinding: TBinding;
  172. procedure StartElement;
  173. procedure EndElement;
  174. end;
  175. { Buffer builder, used to compose long strings without too much memory allocations }
  176. PWideCharBuf = ^TWideCharBuf;
  177. TWideCharBuf = record
  178. Buffer: PWideChar;
  179. Length: Integer;
  180. MaxLength: Integer;
  181. end;
  182. procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
  183. procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
  184. procedure BufAppendChunk(var ABuf: TWideCharBuf; pstart, pend: PWideChar);
  185. function BufEquals(const ABuf: TWideCharBuf; const Arg: WideString): Boolean;
  186. procedure BufNormalize(var Buf: TWideCharBuf; out Modified: Boolean);
  187. { Built-in decoder functions for UTF-8, UTF-16 and ISO-8859-1 }
  188. function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  189. function Decode_UCS2_Swapped(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  190. function Decode_UTF8(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  191. function Decode_8859_1(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  192. {$i names.inc}
  193. implementation
  194. var
  195. Xml11Pg: PByteArray = nil;
  196. function Xml11NamePages: PByteArray;
  197. var
  198. I: Integer;
  199. p: PByteArray;
  200. begin
  201. if Xml11Pg = nil then
  202. begin
  203. GetMem(p, 512);
  204. for I := 0 to 255 do
  205. p^[I] := ord(Byte(I) in Xml11HighPages);
  206. p^[0] := 2;
  207. p^[3] := $2c;
  208. p^[$20] := $2a;
  209. p^[$21] := $2b;
  210. p^[$2f] := $29;
  211. p^[$30] := $2d;
  212. p^[$fd] := $28;
  213. p^[$ff] := $30;
  214. Move(p^, p^[256], 256);
  215. p^[$100] := $19;
  216. p^[$103] := $2E;
  217. p^[$120] := $2F;
  218. Xml11Pg := p;
  219. end;
  220. Result := Xml11Pg;
  221. end;
  222. function IsXml11Char(Value: PWideChar; var Index: Integer): Boolean; overload;
  223. begin
  224. if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
  225. begin
  226. Inc(Index);
  227. Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
  228. end
  229. else
  230. Result := False;
  231. end;
  232. function IsXml11Char(const Value: WideString; var Index: Integer): Boolean; overload;
  233. begin
  234. if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
  235. begin
  236. Inc(Index);
  237. Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
  238. end
  239. else
  240. Result := False;
  241. end;
  242. function IsXmlName(const Value: WideString; Xml11: Boolean): Boolean;
  243. begin
  244. Result := IsXmlName(PWideChar(Value), Length(Value), Xml11);
  245. end;
  246. function IsXmlName(Value: PWideChar; Len: Integer; Xml11: Boolean = False): Boolean;
  247. var
  248. Pages: PByteArray;
  249. I: Integer;
  250. begin
  251. Result := False;
  252. if Xml11 then
  253. Pages := Xml11NamePages
  254. else
  255. Pages := @NamePages;
  256. I := 0;
  257. if (Len = 0) or not ((Byte(Value[I]) in NamingBitmap[Pages^[hi(Word(Value[I]))]]) or
  258. (Value[I] = ':') or
  259. (Xml11 and IsXml11Char(Value, I))) then
  260. Exit;
  261. Inc(I);
  262. while I < Len do
  263. begin
  264. if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
  265. (Value[I] = ':') or
  266. (Xml11 and IsXml11Char(Value, I))) then
  267. Exit;
  268. Inc(I);
  269. end;
  270. Result := True;
  271. end;
  272. function IsXmlNames(const Value: WideString; Xml11: Boolean): Boolean;
  273. var
  274. Pages: PByteArray;
  275. I: Integer;
  276. Offset: Integer;
  277. begin
  278. if Xml11 then
  279. Pages := Xml11NamePages
  280. else
  281. Pages := @NamePages;
  282. Result := False;
  283. if Value = '' then
  284. Exit;
  285. I := 1;
  286. Offset := 0;
  287. while I <= Length(Value) do
  288. begin
  289. if not ((Byte(Value[I]) in NamingBitmap[Pages^[Offset+hi(Word(Value[I]))]]) or
  290. (Value[I] = ':') or
  291. (Xml11 and IsXml11Char(Value, I))) then
  292. begin
  293. if (I = Length(Value)) or (Value[I] <> #32) then
  294. Exit;
  295. Offset := 0;
  296. Inc(I);
  297. Continue;
  298. end;
  299. Offset := $100;
  300. Inc(I);
  301. end;
  302. Result := True;
  303. end;
  304. function IsXmlNmToken(const Value: WideString; Xml11: Boolean): Boolean;
  305. var
  306. I: Integer;
  307. Pages: PByteArray;
  308. begin
  309. if Xml11 then
  310. Pages := Xml11NamePages
  311. else
  312. Pages := @NamePages;
  313. Result := False;
  314. if Value = '' then
  315. Exit;
  316. I := 1;
  317. while I <= Length(Value) do
  318. begin
  319. if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
  320. (Value[I] = ':') or
  321. (Xml11 and IsXml11Char(Value, I))) then
  322. Exit;
  323. Inc(I);
  324. end;
  325. Result := True;
  326. end;
  327. function IsXmlNmTokens(const Value: WideString; Xml11: Boolean): Boolean;
  328. var
  329. I: Integer;
  330. Pages: PByteArray;
  331. begin
  332. if Xml11 then
  333. Pages := Xml11NamePages
  334. else
  335. Pages := @NamePages;
  336. I := 1;
  337. Result := False;
  338. if Value = '' then
  339. Exit;
  340. while I <= Length(Value) do
  341. begin
  342. if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
  343. (Value[I] = ':') or
  344. (Xml11 and IsXml11Char(Value, I))) then
  345. begin
  346. if (I = Length(Value)) or (Value[I] <> #32) then
  347. Exit;
  348. end;
  349. Inc(I);
  350. end;
  351. Result := True;
  352. end;
  353. function IsValidXmlEncoding(const Value: WideString): Boolean;
  354. var
  355. I: Integer;
  356. begin
  357. Result := False;
  358. if (Value = '') or (Value[1] > #255) or not (char(ord(Value[1])) in ['A'..'Z', 'a'..'z']) then
  359. Exit;
  360. for I := 2 to Length(Value) do
  361. if (Value[I] > #255) or not (char(ord(Value[I])) in ['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']) then
  362. Exit;
  363. Result := True;
  364. end;
  365. procedure NormalizeSpaces(var Value: WideString);
  366. var
  367. I, J: Integer;
  368. begin
  369. I := Length(Value);
  370. // speed: trim only whed needed
  371. if (I > 0) and ((Value[1] = #32) or (Value[I] = #32)) then
  372. Value := Trim(Value);
  373. I := 1;
  374. while I < Length(Value) do
  375. begin
  376. if Value[I] = #32 then
  377. begin
  378. J := I+1;
  379. while (J <= Length(Value)) and (Value[J] = #32) do Inc(J);
  380. if J-I > 1 then Delete(Value, I+1, J-I-1);
  381. end;
  382. Inc(I);
  383. end;
  384. end;
  385. function IsXmlWhiteSpace(c: WideChar): Boolean;
  386. begin
  387. Result := (c = #32) or (c = #9) or (c = #10) or (c = #13);
  388. end;
  389. function WStrLIComp(S1, S2: PWideChar; Len: Integer): Integer;
  390. var
  391. counter: Integer;
  392. c1, c2: Word;
  393. begin
  394. counter := 0;
  395. result := 0;
  396. if Len = 0 then
  397. exit;
  398. repeat
  399. c1 := ord(S1[counter]);
  400. c2 := ord(S2[counter]);
  401. if (c1 = 0) or (c2 = 0) then break;
  402. if c1 <> c2 then
  403. begin
  404. if c1 in [97..122] then
  405. Dec(c1, 32);
  406. if c2 in [97..122] then
  407. Dec(c2, 32);
  408. if c1 <> c2 then
  409. Break;
  410. end;
  411. Inc(counter);
  412. until counter >= Len;
  413. result := c1 - c2;
  414. end;
  415. procedure WStrLower(var S: WideString);
  416. var
  417. i: Integer;
  418. begin
  419. for i := 1 to Length(S) do
  420. if (S[i] >= 'A') and (S[i] <= 'Z') then
  421. Inc(word(S[i]), 32);
  422. end;
  423. function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
  424. begin
  425. Result := InitValue;
  426. while KeyLen <> 0 do
  427. begin
  428. {$ifdef overflow_check}{$q-}{$endif}
  429. Result := Result * $F4243 xor ord(Key^);
  430. {$ifdef overflow_check}{$q+}{$endif}
  431. Inc(Key);
  432. Dec(KeyLen);
  433. end;
  434. end;
  435. function KeyCompare(const Key1: WideString; Key2: Pointer; Key2Len: Integer): Boolean;
  436. begin
  437. {$IFDEF FPC}
  438. Result := (Length(Key1)=Key2Len) and (CompareWord(Pointer(Key1)^, Key2^, Key2Len) = 0);
  439. {$ELSE}
  440. Result := (Length(Key1)=Key2Len) and CompareMem(Pointer(Key1), Key2, Key2Len*2);
  441. {$ENDIF}
  442. end;
  443. { THashTable }
  444. constructor THashTable.Create(InitSize: Integer; OwnObjects: Boolean);
  445. var
  446. I: Integer;
  447. begin
  448. inherited Create;
  449. FOwnsObjects := OwnObjects;
  450. I := 256;
  451. while I < InitSize do I := I shl 1;
  452. FBucketCount := I;
  453. FBucket := AllocMem(I * sizeof(PHashItem));
  454. end;
  455. destructor THashTable.Destroy;
  456. begin
  457. Clear;
  458. FreeMem(FBucket);
  459. inherited Destroy;
  460. end;
  461. procedure THashTable.Clear;
  462. var
  463. I: Integer;
  464. item, next: PHashItem;
  465. begin
  466. for I := 0 to FBucketCount-1 do
  467. begin
  468. item := FBucket^[I];
  469. while Assigned(item) do
  470. begin
  471. next := item^.Next;
  472. if FOwnsObjects then
  473. item^.Data.Free;
  474. Dispose(item);
  475. item := next;
  476. end;
  477. FBucket^[I] := nil;
  478. end;
  479. end;
  480. function THashTable.Find(Key: PWideChar; KeyLen: Integer): PHashItem;
  481. var
  482. Dummy: Boolean;
  483. begin
  484. Result := Lookup(Key, KeyLen, Dummy, False);
  485. end;
  486. function THashTable.FindOrAdd(Key: PWideChar; KeyLen: Integer;
  487. var Found: Boolean): PHashItem;
  488. begin
  489. Result := Lookup(Key, KeyLen, Found, True);
  490. end;
  491. function THashTable.FindOrAdd(Key: PWideChar; KeyLen: Integer): PHashItem;
  492. var
  493. Dummy: Boolean;
  494. begin
  495. Result := Lookup(Key, KeyLen, Dummy, True);
  496. end;
  497. function THashTable.FindOrAdd(const Key: WideString): PHashItem;
  498. var
  499. Dummy: Boolean;
  500. begin
  501. Result := Lookup(PWideChar(Key), Length(Key), Dummy, True);
  502. end;
  503. function THashTable.Get(Key: PWideChar; KeyLen: Integer): TObject;
  504. var
  505. e: PHashItem;
  506. Dummy: Boolean;
  507. begin
  508. e := Lookup(Key, KeyLen, Dummy, False);
  509. if Assigned(e) then
  510. Result := e^.Data
  511. else
  512. Result := nil;
  513. end;
  514. function THashTable.Lookup(Key: PWideChar; KeyLength: Integer;
  515. out Found: Boolean; CanCreate: Boolean): PHashItem;
  516. var
  517. Entry: PPHashItem;
  518. h: LongWord;
  519. begin
  520. h := Hash(0, Key, KeyLength);
  521. Entry := @FBucket^[h mod FBucketCount];
  522. while Assigned(Entry^) and not ((Entry^^.HashValue = h) and KeyCompare(Entry^^.Key, Key, KeyLength) ) do
  523. Entry := @Entry^^.Next;
  524. Found := Assigned(Entry^);
  525. if Found or (not CanCreate) then
  526. begin
  527. Result := Entry^;
  528. Exit;
  529. end;
  530. if FCount > FBucketCount then { arbitrary limit, probably too high }
  531. begin
  532. Resize(FBucketCount * 2);
  533. Result := Lookup(Key, KeyLength, Found, CanCreate);
  534. end
  535. else
  536. begin
  537. New(Result);
  538. // SetString for WideStrings trims on zero chars [fixed, #14740]
  539. SetLength(Result^.Key, KeyLength);
  540. Move(Key^, Pointer(Result^.Key)^, KeyLength*sizeof(WideChar));
  541. Result^.HashValue := h;
  542. Result^.Data := nil;
  543. Result^.Next := nil;
  544. Inc(FCount);
  545. Entry^ := Result;
  546. end;
  547. end;
  548. procedure THashTable.Resize(NewCapacity: LongWord);
  549. var
  550. p: PHashItemArray;
  551. chain: PPHashItem;
  552. i: Integer;
  553. e, n: PHashItem;
  554. begin
  555. p := AllocMem(NewCapacity * sizeof(PHashItem));
  556. for i := 0 to FBucketCount-1 do
  557. begin
  558. e := FBucket^[i];
  559. while Assigned(e) do
  560. begin
  561. chain := @p^[e^.HashValue mod NewCapacity];
  562. n := e^.Next;
  563. e^.Next := chain^;
  564. chain^ := e;
  565. e := n;
  566. end;
  567. end;
  568. FBucketCount := NewCapacity;
  569. FreeMem(FBucket);
  570. FBucket := p;
  571. end;
  572. function THashTable.Remove(Entry: PHashItem): Boolean;
  573. var
  574. chain: PPHashItem;
  575. begin
  576. chain := @FBucket^[Entry^.HashValue mod FBucketCount];
  577. while Assigned(chain^) do
  578. begin
  579. if chain^ = Entry then
  580. begin
  581. chain^ := Entry^.Next;
  582. if FOwnsObjects then
  583. Entry^.Data.Free;
  584. Dispose(Entry);
  585. Dec(FCount);
  586. Result := True;
  587. Exit;
  588. end;
  589. chain := @chain^^.Next;
  590. end;
  591. Result := False;
  592. end;
  593. // this does not free the aData object
  594. function THashTable.RemoveData(aData: TObject): Boolean;
  595. var
  596. i: Integer;
  597. chain: PPHashItem;
  598. e: PHashItem;
  599. begin
  600. for i := 0 to FBucketCount-1 do
  601. begin
  602. chain := @FBucket^[i];
  603. while Assigned(chain^) do
  604. begin
  605. if chain^^.Data = aData then
  606. begin
  607. e := chain^;
  608. chain^ := e^.Next;
  609. Dispose(e);
  610. Dec(FCount);
  611. Result := True;
  612. Exit;
  613. end;
  614. chain := @chain^^.Next;
  615. end;
  616. end;
  617. Result := False;
  618. end;
  619. procedure THashTable.ForEach(proc: THashForEach; arg: Pointer);
  620. var
  621. i: Integer;
  622. e: PHashItem;
  623. begin
  624. for i := 0 to FBucketCount-1 do
  625. begin
  626. e := FBucket^[i];
  627. while Assigned(e) do
  628. begin
  629. if not proc(e, arg) then
  630. Exit;
  631. e := e^.Next;
  632. end;
  633. end;
  634. end;
  635. { TDblHashArray }
  636. destructor TDblHashArray.Destroy;
  637. begin
  638. FreeMem(FData);
  639. inherited Destroy;
  640. end;
  641. procedure TDblHashArray.Init(NumSlots: Integer);
  642. var
  643. i: Integer;
  644. begin
  645. if ((NumSlots * 2) shr FSizeLog) <> 0 then // need at least twice more entries, and no less than 8
  646. begin
  647. FSizeLog := 3;
  648. while (NumSlots shr FSizeLog) <> 0 do
  649. Inc(FSizeLog);
  650. ReallocMem(FData, (1 shl FSizeLog) * sizeof(TExpHashEntry));
  651. FRevision := 0;
  652. end;
  653. if FRevision = 0 then
  654. begin
  655. FRevision := $FFFFFFFF;
  656. for i := (1 shl FSizeLog)-1 downto 0 do
  657. FData^[i].rev := FRevision;
  658. end;
  659. Dec(FRevision);
  660. end;
  661. function TDblHashArray.Locate(uri: PWideString; localName: PWideChar; localLength: Integer): Boolean;
  662. var
  663. step: Byte;
  664. mask: LongWord;
  665. idx: Integer;
  666. HashValue: LongWord;
  667. begin
  668. HashValue := Hash(0, PWideChar(uri^), Length(uri^));
  669. HashValue := Hash(HashValue, localName, localLength);
  670. mask := (1 shl FSizeLog) - 1;
  671. step := (HashValue and (not mask)) shr (FSizeLog-1) and (mask shr 2) or 1;
  672. idx := HashValue and mask;
  673. result := True;
  674. while FData^[idx].rev = FRevision do
  675. begin
  676. if (HashValue = FData^[idx].hash) and (FData^[idx].uriPtr^ = uri^) and
  677. (FData^[idx].lnameLen = localLength) and
  678. CompareMem(FData^[idx].lname, localName, localLength * sizeof(WideChar)) then
  679. Exit;
  680. if idx < step then
  681. Inc(idx, (1 shl FSizeLog) - step)
  682. else
  683. Dec(idx, step);
  684. end;
  685. with FData^[idx] do
  686. begin
  687. rev := FRevision;
  688. hash := HashValue;
  689. uriPtr := uri;
  690. lname := localName;
  691. lnameLen := localLength;
  692. end;
  693. result := False;
  694. end;
  695. { TNSSupport }
  696. constructor TNSSupport.Create;
  697. var
  698. b: TBinding;
  699. begin
  700. inherited Create;
  701. FPrefixes := THashTable.Create(16, False);
  702. FBindings := TFPList.Create;
  703. SetLength(FBindingStack, 16);
  704. { provide implicit binding for the 'xml' prefix }
  705. // TODO: move stduri_xml, etc. to this unit, so they are reused.
  706. DefineBinding('xml', 'http://www.w3.org/XML/1998/namespace', b);
  707. end;
  708. destructor TNSSupport.Destroy;
  709. var
  710. I: Integer;
  711. begin
  712. for I := FBindings.Count-1 downto 0 do
  713. TObject(FBindings.List^[I]).Free;
  714. FBindings.Free;
  715. FPrefixes.Free;
  716. inherited Destroy;
  717. end;
  718. function TNSSupport.BindPrefix(const nsURI: WideString; aPrefix: PHashItem): TBinding;
  719. begin
  720. { try to reuse an existing binding }
  721. result := FFreeBindings;
  722. if Assigned(result) then
  723. FFreeBindings := result.Next
  724. else { no free bindings, create a new one }
  725. begin
  726. result := TBinding.Create;
  727. FBindings.Add(result);
  728. end;
  729. { link it into chain of bindings at the current element level }
  730. result.Next := FBindingStack[FNesting];
  731. FBindingStack[FNesting] := result;
  732. { bind }
  733. result.uri := nsURI;
  734. result.Prefix := aPrefix;
  735. result.PrevPrefixBinding := aPrefix^.Data;
  736. aPrefix^.Data := result;
  737. end;
  738. function TNSSupport.DefaultNSBinding: TBinding;
  739. begin
  740. result := TBinding(FDefaultPrefix.Data);
  741. end;
  742. procedure TNSSupport.DefineBinding(const Prefix, nsURI: WideString;
  743. out Binding: TBinding);
  744. var
  745. Pfx: PHashItem;
  746. begin
  747. Pfx := @FDefaultPrefix;
  748. if (nsURI <> '') and (Prefix <> '') then
  749. Pfx := FPrefixes.FindOrAdd(PWideChar(Prefix), Length(Prefix));
  750. if (Pfx^.Data = nil) or (TBinding(Pfx^.Data).uri <> nsURI) then
  751. Binding := BindPrefix(nsURI, Pfx)
  752. else
  753. Binding := nil;
  754. end;
  755. function TNSSupport.CheckAttribute(const Prefix, nsURI: WideString;
  756. out Binding: TBinding): TAttributeAction;
  757. var
  758. Pfx: PHashItem;
  759. I: Integer;
  760. b: TBinding;
  761. buf: array[0..31] of WideChar;
  762. p: PWideChar;
  763. begin
  764. Binding := nil;
  765. Pfx := nil;
  766. Result := aaUnchanged;
  767. if Prefix <> '' then
  768. Pfx := FPrefixes.FindOrAdd(PWideChar(Prefix), Length(Prefix))
  769. else if nsURI = '' then
  770. Exit;
  771. { if the prefix is already bound to correct URI, we're done }
  772. if Assigned(Pfx) and Assigned(Pfx^.Data) and (TBinding(Pfx^.Data).uri = nsURI) then
  773. Exit;
  774. { see if there's another prefix bound to the target URI }
  775. // TODO: should use something faster than linear search
  776. for i := FNesting downto 0 do
  777. begin
  778. b := FBindingStack[i];
  779. while Assigned(b) do
  780. begin
  781. if (b.uri = nsURI) and (b.Prefix <> @FDefaultPrefix) then
  782. begin
  783. Binding := b; // found one -> override the attribute's prefix
  784. Result := aaPrefix;
  785. Exit;
  786. end;
  787. b := b.Next;
  788. end;
  789. end;
  790. { no prefix, or bound (to wrong URI) -> use generated prefix instead }
  791. if (Pfx = nil) or Assigned(Pfx^.Data) then
  792. repeat
  793. Inc(FPrefixSeqNo);
  794. i := FPrefixSeqNo; // This is just 'NS'+IntToStr(FPrefixSeqNo);
  795. p := @Buf[high(Buf)]; // done without using strings
  796. while i <> 0 do
  797. begin
  798. p^ := WideChar(i mod 10+ord('0'));
  799. dec(p);
  800. i := i div 10;
  801. end;
  802. p^ := 'S'; dec(p);
  803. p^ := 'N';
  804. Pfx := FPrefixes.FindOrAdd(p, @Buf[high(Buf)]-p+1);
  805. until Pfx^.Data = nil;
  806. Binding := BindPrefix(nsURI, Pfx);
  807. Result := aaBoth;
  808. end;
  809. function TNSSupport.IsPrefixBound(P: PWideChar; Len: Integer; out
  810. Prefix: PHashItem): Boolean;
  811. begin
  812. Prefix := FPrefixes.FindOrAdd(P, Len);
  813. Result := Assigned(Prefix^.Data) and (TBinding(Prefix^.Data).uri <> '');
  814. end;
  815. function TNSSupport.GetPrefix(P: PWideChar; Len: Integer): PHashItem;
  816. begin
  817. if Assigned(P) and (Len > 0) then
  818. Result := FPrefixes.FindOrAdd(P, Len)
  819. else
  820. Result := @FDefaultPrefix;
  821. end;
  822. procedure TNSSupport.StartElement;
  823. begin
  824. Inc(FNesting);
  825. if FNesting >= Length(FBindingStack) then
  826. SetLength(FBindingStack, FNesting * 2);
  827. end;
  828. procedure TNSSupport.EndElement;
  829. var
  830. b, temp: TBinding;
  831. begin
  832. temp := FBindingStack[FNesting];
  833. while Assigned(temp) do
  834. begin
  835. b := temp;
  836. temp := b.next;
  837. b.next := FFreeBindings;
  838. FFreeBindings := b;
  839. b.Prefix^.Data := b.prevPrefixBinding;
  840. end;
  841. FBindingStack[FNesting] := nil;
  842. if FNesting > 0 then
  843. Dec(FNesting);
  844. end;
  845. { Buffer builder utils }
  846. procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
  847. begin
  848. ABuffer.MaxLength := ALength;
  849. ABuffer.Length := 0;
  850. ABuffer.Buffer := AllocMem(ABuffer.MaxLength*SizeOf(WideChar));
  851. end;
  852. procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
  853. begin
  854. if ABuffer.Length >= ABuffer.MaxLength then
  855. begin
  856. ReallocMem(ABuffer.Buffer, ABuffer.MaxLength * 2 * SizeOf(WideChar));
  857. FillChar(ABuffer.Buffer[ABuffer.MaxLength], ABuffer.MaxLength * SizeOf(WideChar),0);
  858. ABuffer.MaxLength := ABuffer.MaxLength * 2;
  859. end;
  860. ABuffer.Buffer[ABuffer.Length] := wc;
  861. Inc(ABuffer.Length);
  862. end;
  863. procedure BufAppendChunk(var ABuf: TWideCharBuf; pstart, pend: PWideChar);
  864. var
  865. Len: Integer;
  866. begin
  867. Len := PEnd - PStart;
  868. if Len <= 0 then
  869. Exit;
  870. if Len >= ABuf.MaxLength - ABuf.Length then
  871. begin
  872. ABuf.MaxLength := (Len + ABuf.Length)*2;
  873. // note: memory clean isn't necessary here.
  874. // To avoid garbage, control Length field.
  875. ReallocMem(ABuf.Buffer, ABuf.MaxLength * sizeof(WideChar));
  876. end;
  877. Move(pstart^, ABuf.Buffer[ABuf.Length], Len * sizeof(WideChar));
  878. Inc(ABuf.Length, Len);
  879. end;
  880. function BufEquals(const ABuf: TWideCharBuf; const Arg: WideString): Boolean;
  881. begin
  882. Result := (ABuf.Length = Length(Arg)) and
  883. CompareMem(ABuf.Buffer, Pointer(Arg), ABuf.Length*sizeof(WideChar));
  884. end;
  885. procedure BufNormalize(var Buf: TWideCharBuf; out Modified: Boolean);
  886. var
  887. Dst, Src: Integer;
  888. begin
  889. Dst := 0;
  890. Src := 0;
  891. // skip leading space if any
  892. while (Src < Buf.Length) and (Buf.Buffer[Src] = ' ') do
  893. Inc(Src);
  894. while Src < Buf.Length do
  895. begin
  896. if Buf.Buffer[Src] = ' ' then
  897. begin
  898. // Dst cannot be 0 here, because leading space is already skipped
  899. if Buf.Buffer[Dst-1] <> ' ' then
  900. begin
  901. Buf.Buffer[Dst] := ' ';
  902. Inc(Dst);
  903. end;
  904. end
  905. else
  906. begin
  907. Buf.Buffer[Dst] := Buf.Buffer[Src];
  908. Inc(Dst);
  909. end;
  910. Inc(Src);
  911. end;
  912. // trailing space (only one possible due to compression)
  913. if (Dst > 0) and (Buf.Buffer[Dst-1] = ' ') then
  914. Dec(Dst);
  915. Modified := Dst <> Buf.Length;
  916. Buf.Length := Dst;
  917. end;
  918. { standard decoders }
  919. function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  920. var
  921. cnt: Cardinal;
  922. begin
  923. cnt := OutCnt; // num of widechars
  924. if cnt > InCnt div sizeof(WideChar) then
  925. cnt := InCnt div sizeof(WideChar);
  926. Move(InBuf^, OutBuf^, cnt * sizeof(WideChar));
  927. Dec(InCnt, cnt*sizeof(WideChar));
  928. Dec(OutCnt, cnt);
  929. Result := cnt;
  930. end;
  931. function Decode_UCS2_Swapped(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  932. var
  933. I: Integer;
  934. cnt: Cardinal;
  935. InPtr: PChar;
  936. begin
  937. cnt := OutCnt; // num of widechars
  938. if cnt > InCnt div sizeof(WideChar) then
  939. cnt := InCnt div sizeof(WideChar);
  940. InPtr := InBuf;
  941. for I := 0 to cnt-1 do
  942. begin
  943. OutBuf[I] := WideChar((ord(InPtr^) shl 8) or ord(InPtr[1]));
  944. Inc(InPtr, 2);
  945. end;
  946. Dec(InCnt, cnt*sizeof(WideChar));
  947. Dec(OutCnt, cnt);
  948. Result := cnt;
  949. end;
  950. function Decode_8859_1(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  951. var
  952. I: Integer;
  953. cnt: Cardinal;
  954. begin
  955. cnt := OutCnt; // num of widechars
  956. if cnt > InCnt then
  957. cnt := InCnt;
  958. for I := 0 to cnt-1 do
  959. OutBuf[I] := WideChar(ord(InBuf[I]));
  960. Dec(InCnt, cnt);
  961. Dec(OutCnt, cnt);
  962. Result := cnt;
  963. end;
  964. function Decode_UTF8(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  965. const
  966. MaxCode: array[1..4] of Cardinal = ($7F, $7FF, $FFFF, $1FFFFF);
  967. var
  968. i, j, bc: Cardinal;
  969. Value: Cardinal;
  970. begin
  971. result := 0;
  972. i := OutCnt;
  973. while (i > 0) and (InCnt > 0) do
  974. begin
  975. bc := 1;
  976. Value := ord(InBuf^);
  977. if Value < $80 then
  978. OutBuf^ := WideChar(Value)
  979. else
  980. begin
  981. if Value < $C2 then
  982. begin
  983. Result := -1;
  984. Break;
  985. end;
  986. Inc(bc);
  987. if Value > $DF then
  988. begin
  989. Inc(bc);
  990. if Value > $EF then
  991. begin
  992. Inc(bc);
  993. if Value > $F7 then // never encountered in the tests.
  994. begin
  995. Result := -1;
  996. Break;
  997. end;
  998. end;
  999. end;
  1000. if InCnt < bc then
  1001. Break;
  1002. j := 1;
  1003. while j < bc do
  1004. begin
  1005. if InBuf[j] in [#$80..#$BF] then
  1006. Value := (Value shl 6) or (Cardinal(InBuf[j]) and $3F)
  1007. else
  1008. begin
  1009. Result := -1;
  1010. Break;
  1011. end;
  1012. Inc(j);
  1013. end;
  1014. Value := Value and MaxCode[bc];
  1015. // RFC2279 check
  1016. if Value <= MaxCode[bc-1] then
  1017. begin
  1018. Result := -1;
  1019. Break;
  1020. end;
  1021. case Value of
  1022. 0..$D7FF, $E000..$FFFF: OutBuf^ := WideChar(Value);
  1023. $10000..$10FFFF:
  1024. begin
  1025. if i < 2 then Break;
  1026. OutBuf^ := WideChar($D7C0 + (Value shr 10));
  1027. OutBuf[1] := WideChar($DC00 xor (Value and $3FF));
  1028. Inc(OutBuf); // once here
  1029. Dec(i);
  1030. end
  1031. else
  1032. begin
  1033. Result := -1;
  1034. Break;
  1035. end;
  1036. end;
  1037. end;
  1038. Inc(OutBuf);
  1039. Inc(InBuf, bc);
  1040. Dec(InCnt, bc);
  1041. Dec(i);
  1042. end;
  1043. if Result >= 0 then
  1044. Result := OutCnt-i;
  1045. OutCnt := i;
  1046. end;
  1047. initialization
  1048. finalization
  1049. if Assigned(Xml11Pg) then
  1050. FreeMem(Xml11Pg);
  1051. end.