xmlutils.pp 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154
  1. {
  2. This file is part of the Free Component Library
  3. XML utility routines.
  4. Copyright (c) 2006 by Sergei Gorelkin, [email protected]
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. unit xmlutils;
  12. {$ifdef fpc}{$mode objfpc}{$endif}
  13. {$H+}
  14. interface
  15. uses
  16. SysUtils, Classes;
  17. function IsXmlName(const Value: WideString; Xml11: Boolean = False): Boolean; overload;
  18. function IsXmlName(Value: PWideChar; Len: Integer; Xml11: Boolean = False): Boolean; overload;
  19. function IsXmlNames(const Value: WideString; Xml11: Boolean = False): Boolean;
  20. function IsXmlNmToken(const Value: WideString; Xml11: Boolean = False): Boolean;
  21. function IsXmlNmTokens(const Value: WideString; Xml11: Boolean = False): Boolean;
  22. function IsValidXmlEncoding(const Value: WideString): Boolean;
  23. function Xml11NamePages: PByteArray;
  24. procedure NormalizeSpaces(var Value: WideString);
  25. function IsXmlWhiteSpace(c: WideChar): Boolean;
  26. function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
  27. { beware, works in ASCII range only }
  28. function WStrLIComp(S1, S2: PWideChar; Len: Integer): Integer;
  29. procedure WStrLower(var S: WideString);
  30. type
  31. TXMLVersion = (xmlVersionUnknown, xmlVersion10, xmlVersion11);
  32. const
  33. xmlVersionStr: array[TXMLVersion] of WideString = ('', '1.0', '1.1');
  34. type
  35. TXMLNodeType = (ntNone, ntElement, ntAttribute, ntText,
  36. ntCDATA, ntEntityReference, ntEntity, ntProcessingInstruction,
  37. ntComment, ntDocument, ntDocumentType, ntDocumentFragment,
  38. ntNotation,
  39. ntWhitespace,
  40. ntSignificantWhitespace,
  41. ntEndElement,
  42. ntEndEntity,
  43. ntXmlDeclaration
  44. );
  45. TAttrDataType = (
  46. dtCdata,
  47. dtId,
  48. dtIdRef,
  49. dtIdRefs,
  50. dtEntity,
  51. dtEntities,
  52. dtNmToken,
  53. dtNmTokens,
  54. dtNotation
  55. );
  56. { a simple hash table with WideString keys }
  57. type
  58. {$ifndef fpc}
  59. PtrInt = LongInt;
  60. TFPList = TList;
  61. {$endif}
  62. PPHashItem = ^PHashItem;
  63. PHashItem = ^THashItem;
  64. THashItem = record
  65. Key: WideString;
  66. HashValue: LongWord;
  67. Next: PHashItem;
  68. Data: TObject;
  69. end;
  70. THashItemArray = array[0..MaxInt div sizeof(Pointer)-1] of PHashItem;
  71. PHashItemArray = ^THashItemArray;
  72. THashForEach = function(Entry: PHashItem; arg: Pointer): Boolean;
  73. THashTable = class(TObject)
  74. private
  75. FCount: LongWord;
  76. FBucketCount: LongWord;
  77. FBucket: PHashItemArray;
  78. FOwnsObjects: Boolean;
  79. function Lookup(Key: PWideChar; KeyLength: Integer; out Found: Boolean; CanCreate: Boolean): PHashItem;
  80. procedure Resize(NewCapacity: LongWord);
  81. public
  82. constructor Create(InitSize: Integer; OwnObjects: Boolean);
  83. destructor Destroy; override;
  84. procedure Clear;
  85. function Find(Key: PWideChar; KeyLen: Integer): PHashItem;
  86. function FindOrAdd(Key: PWideChar; KeyLen: Integer; var Found: Boolean): PHashItem; overload;
  87. function FindOrAdd(Key: PWideChar; KeyLen: Integer): PHashItem; overload;
  88. function FindOrAdd(const Key: WideString): PHashItem; overload;
  89. function Get(Key: PWideChar; KeyLen: Integer): TObject;
  90. function Remove(Entry: PHashItem): Boolean;
  91. function RemoveData(aData: TObject): Boolean;
  92. procedure ForEach(proc: THashForEach; arg: Pointer);
  93. property Count: LongWord read FCount;
  94. end;
  95. { another hash, for detecting duplicate namespaced attributes without memory allocations }
  96. TExpHashEntry = record
  97. rev: LongWord;
  98. hash: LongWord;
  99. uriPtr: PWideString;
  100. lname: PWideChar;
  101. lnameLen: Integer;
  102. end;
  103. TExpHashEntryArray = array[0..MaxInt div sizeof(TExpHashEntry)-1] of TExpHashEntry;
  104. PExpHashEntryArray = ^TExpHashEntryArray;
  105. TDblHashArray = class(TObject)
  106. private
  107. FSizeLog: Integer;
  108. FRevision: LongWord;
  109. FData: PExpHashEntryArray;
  110. public
  111. procedure Init(NumSlots: Integer);
  112. function Locate(uri: PWideString; localName: PWideChar; localLength: Integer): Boolean;
  113. destructor Destroy; override;
  114. end;
  115. { Source location. This may be augmented with ByteOffset, UTF8Offset, etc. }
  116. TLocation = record
  117. Line: Integer;
  118. LinePos: Integer;
  119. end;
  120. { generic node info record, shared between DOM and reader }
  121. PNodeData = ^TNodeData;
  122. TNodeData = record
  123. FNext: PNodeData;
  124. FQName: PHashItem;
  125. FPrefix: PHashItem;
  126. FNsUri: PHashItem;
  127. FColonPos: Integer;
  128. FTypeInfo: TObject;
  129. FLoc: TLocation;
  130. FLoc2: TLocation; // for attributes: start of value
  131. FIDEntry: PHashItem; // ID attributes: entry in ID map
  132. FNodeType: TXMLNodeType;
  133. FValueStr: WideString;
  134. FValueStart: PWideChar;
  135. FValueLength: Integer;
  136. FIsDefault: Boolean;
  137. FDenormalized: Boolean; // Whether attribute value changes by normalization
  138. end;
  139. { TNSSupport provides tracking of prefix-uri pairs and namespace fixup for writer }
  140. TBinding = class
  141. public
  142. uri: WideString;
  143. next: TBinding;
  144. prevPrefixBinding: TObject;
  145. Prefix: PHashItem;
  146. end;
  147. TAttributeAction = (
  148. aaUnchanged,
  149. aaPrefix, // only override the prefix
  150. aaBoth // override prefix and emit namespace definition
  151. );
  152. TNSSupport = class(TObject)
  153. private
  154. FNesting: Integer;
  155. FPrefixSeqNo: Integer;
  156. FFreeBindings: TBinding;
  157. FBindings: TFPList;
  158. FBindingStack: array of TBinding;
  159. FPrefixes: THashTable;
  160. FDefaultPrefix: THashItem;
  161. public
  162. constructor Create;
  163. destructor Destroy; override;
  164. procedure DefineBinding(const Prefix, nsURI: WideString; out Binding: TBinding);
  165. function CheckAttribute(const Prefix, nsURI: WideString;
  166. out Binding: TBinding): TAttributeAction;
  167. function IsPrefixBound(P: PWideChar; Len: Integer; out Prefix: PHashItem): Boolean;
  168. function GetPrefix(P: PWideChar; Len: Integer): PHashItem;
  169. function BindPrefix(const nsURI: WideString; aPrefix: PHashItem): TBinding;
  170. function DefaultNSBinding: TBinding;
  171. procedure StartElement;
  172. procedure EndElement;
  173. end;
  174. { Buffer builder, used to compose long strings without too much memory allocations }
  175. PWideCharBuf = ^TWideCharBuf;
  176. TWideCharBuf = record
  177. Buffer: PWideChar;
  178. Length: Integer;
  179. MaxLength: Integer;
  180. end;
  181. procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
  182. procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
  183. procedure BufAppendChunk(var ABuf: TWideCharBuf; pstart, pend: PWideChar);
  184. function BufEquals(const ABuf: TWideCharBuf; const Arg: WideString): Boolean;
  185. procedure BufNormalize(var Buf: TWideCharBuf; out Modified: Boolean);
  186. { Built-in decoder functions for UTF-8, UTF-16 and ISO-8859-1 }
  187. function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  188. function Decode_UCS2_Swapped(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  189. function Decode_UTF8(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  190. function Decode_8859_1(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  191. {$i names.inc}
  192. implementation
  193. var
  194. Xml11Pg: PByteArray = nil;
  195. function Xml11NamePages: PByteArray;
  196. var
  197. I: Integer;
  198. p: PByteArray;
  199. begin
  200. if Xml11Pg = nil then
  201. begin
  202. GetMem(p, 512);
  203. for I := 0 to 255 do
  204. p^[I] := ord(Byte(I) in Xml11HighPages);
  205. p^[0] := 2;
  206. p^[3] := $2c;
  207. p^[$20] := $2a;
  208. p^[$21] := $2b;
  209. p^[$2f] := $29;
  210. p^[$30] := $2d;
  211. p^[$fd] := $28;
  212. p^[$ff] := $30;
  213. Move(p^, p^[256], 256);
  214. p^[$100] := $19;
  215. p^[$103] := $2E;
  216. p^[$120] := $2F;
  217. Xml11Pg := p;
  218. end;
  219. Result := Xml11Pg;
  220. end;
  221. function IsXml11Char(Value: PWideChar; var Index: Integer): Boolean; overload;
  222. begin
  223. if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
  224. begin
  225. Inc(Index);
  226. Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
  227. end
  228. else
  229. Result := False;
  230. end;
  231. function IsXml11Char(const Value: WideString; var Index: Integer): Boolean; overload;
  232. begin
  233. if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
  234. begin
  235. Inc(Index);
  236. Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
  237. end
  238. else
  239. Result := False;
  240. end;
  241. function IsXmlName(const Value: WideString; Xml11: Boolean): Boolean;
  242. begin
  243. Result := IsXmlName(PWideChar(Value), Length(Value), Xml11);
  244. end;
  245. function IsXmlName(Value: PWideChar; Len: Integer; Xml11: Boolean = False): Boolean;
  246. var
  247. Pages: PByteArray;
  248. I: Integer;
  249. begin
  250. Result := False;
  251. if Xml11 then
  252. Pages := Xml11NamePages
  253. else
  254. Pages := @NamePages;
  255. I := 0;
  256. if (Len = 0) or not ((Byte(Value[I]) in NamingBitmap[Pages^[hi(Word(Value[I]))]]) or
  257. (Value[I] = ':') or
  258. (Xml11 and IsXml11Char(Value, I))) then
  259. Exit;
  260. Inc(I);
  261. while I < Len do
  262. begin
  263. if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
  264. (Value[I] = ':') or
  265. (Xml11 and IsXml11Char(Value, I))) then
  266. Exit;
  267. Inc(I);
  268. end;
  269. Result := True;
  270. end;
  271. function IsXmlNames(const Value: WideString; Xml11: Boolean): Boolean;
  272. var
  273. Pages: PByteArray;
  274. I: Integer;
  275. Offset: Integer;
  276. begin
  277. if Xml11 then
  278. Pages := Xml11NamePages
  279. else
  280. Pages := @NamePages;
  281. Result := False;
  282. if Value = '' then
  283. Exit;
  284. I := 1;
  285. Offset := 0;
  286. while I <= Length(Value) do
  287. begin
  288. if not ((Byte(Value[I]) in NamingBitmap[Pages^[Offset+hi(Word(Value[I]))]]) or
  289. (Value[I] = ':') or
  290. (Xml11 and IsXml11Char(Value, I))) then
  291. begin
  292. if (I = Length(Value)) or (Value[I] <> #32) then
  293. Exit;
  294. Offset := 0;
  295. Inc(I);
  296. Continue;
  297. end;
  298. Offset := $100;
  299. Inc(I);
  300. end;
  301. Result := True;
  302. end;
  303. function IsXmlNmToken(const Value: WideString; Xml11: Boolean): Boolean;
  304. var
  305. I: Integer;
  306. Pages: PByteArray;
  307. begin
  308. if Xml11 then
  309. Pages := Xml11NamePages
  310. else
  311. Pages := @NamePages;
  312. Result := False;
  313. if Value = '' then
  314. Exit;
  315. I := 1;
  316. while I <= Length(Value) do
  317. begin
  318. if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
  319. (Value[I] = ':') or
  320. (Xml11 and IsXml11Char(Value, I))) then
  321. Exit;
  322. Inc(I);
  323. end;
  324. Result := True;
  325. end;
  326. function IsXmlNmTokens(const Value: WideString; Xml11: Boolean): Boolean;
  327. var
  328. I: Integer;
  329. Pages: PByteArray;
  330. begin
  331. if Xml11 then
  332. Pages := Xml11NamePages
  333. else
  334. Pages := @NamePages;
  335. I := 1;
  336. Result := False;
  337. if Value = '' then
  338. Exit;
  339. while I <= Length(Value) do
  340. begin
  341. if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
  342. (Value[I] = ':') or
  343. (Xml11 and IsXml11Char(Value, I))) then
  344. begin
  345. if (I = Length(Value)) or (Value[I] <> #32) then
  346. Exit;
  347. end;
  348. Inc(I);
  349. end;
  350. Result := True;
  351. end;
  352. function IsValidXmlEncoding(const Value: WideString): Boolean;
  353. var
  354. I: Integer;
  355. begin
  356. Result := False;
  357. if (Value = '') or (Value[1] > #255) or not (char(ord(Value[1])) in ['A'..'Z', 'a'..'z']) then
  358. Exit;
  359. for I := 2 to Length(Value) do
  360. if (Value[I] > #255) or not (char(ord(Value[I])) in ['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']) then
  361. Exit;
  362. Result := True;
  363. end;
  364. procedure NormalizeSpaces(var Value: WideString);
  365. var
  366. I, J: Integer;
  367. begin
  368. I := Length(Value);
  369. // speed: trim only whed needed
  370. if (I > 0) and ((Value[1] = #32) or (Value[I] = #32)) then
  371. Value := Trim(Value);
  372. I := 1;
  373. while I < Length(Value) do
  374. begin
  375. if Value[I] = #32 then
  376. begin
  377. J := I+1;
  378. while (J <= Length(Value)) and (Value[J] = #32) do Inc(J);
  379. if J-I > 1 then Delete(Value, I+1, J-I-1);
  380. end;
  381. Inc(I);
  382. end;
  383. end;
  384. function IsXmlWhiteSpace(c: WideChar): Boolean;
  385. begin
  386. Result := (c = #32) or (c = #9) or (c = #10) or (c = #13);
  387. end;
  388. function WStrLIComp(S1, S2: PWideChar; Len: Integer): Integer;
  389. var
  390. counter: Integer;
  391. c1, c2: Word;
  392. begin
  393. counter := 0;
  394. result := 0;
  395. if Len = 0 then
  396. exit;
  397. repeat
  398. c1 := ord(S1[counter]);
  399. c2 := ord(S2[counter]);
  400. if (c1 = 0) or (c2 = 0) then break;
  401. if c1 <> c2 then
  402. begin
  403. if c1 in [97..122] then
  404. Dec(c1, 32);
  405. if c2 in [97..122] then
  406. Dec(c2, 32);
  407. if c1 <> c2 then
  408. Break;
  409. end;
  410. Inc(counter);
  411. until counter >= Len;
  412. result := c1 - c2;
  413. end;
  414. procedure WStrLower(var S: WideString);
  415. var
  416. i: Integer;
  417. begin
  418. for i := 1 to Length(S) do
  419. if (S[i] >= 'A') and (S[i] <= 'Z') then
  420. Inc(word(S[i]), 32);
  421. end;
  422. function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
  423. begin
  424. Result := InitValue;
  425. while KeyLen <> 0 do
  426. begin
  427. {$push}{$q-}
  428. Result := Result * $F4243 xor ord(Key^);
  429. {$pop}
  430. Inc(Key);
  431. Dec(KeyLen);
  432. end;
  433. end;
  434. function KeyCompare(const Key1: WideString; Key2: Pointer; Key2Len: Integer): Boolean;
  435. begin
  436. {$IFDEF FPC}
  437. Result := (Length(Key1)=Key2Len) and (CompareWord(Pointer(Key1)^, Key2^, Key2Len) = 0);
  438. {$ELSE}
  439. Result := (Length(Key1)=Key2Len) and CompareMem(Pointer(Key1), Key2, Key2Len*2);
  440. {$ENDIF}
  441. end;
  442. { THashTable }
  443. constructor THashTable.Create(InitSize: Integer; OwnObjects: Boolean);
  444. var
  445. I: Integer;
  446. begin
  447. inherited Create;
  448. FOwnsObjects := OwnObjects;
  449. I := 256;
  450. while I < InitSize do I := I shl 1;
  451. FBucketCount := I;
  452. FBucket := AllocMem(I * sizeof(PHashItem));
  453. end;
  454. destructor THashTable.Destroy;
  455. begin
  456. Clear;
  457. FreeMem(FBucket);
  458. inherited Destroy;
  459. end;
  460. procedure THashTable.Clear;
  461. var
  462. I: Integer;
  463. item, next: PHashItem;
  464. begin
  465. for I := 0 to FBucketCount-1 do
  466. begin
  467. item := FBucket^[I];
  468. while Assigned(item) do
  469. begin
  470. next := item^.Next;
  471. if FOwnsObjects then
  472. item^.Data.Free;
  473. Dispose(item);
  474. item := next;
  475. end;
  476. FBucket^[I] := nil;
  477. end;
  478. end;
  479. function THashTable.Find(Key: PWideChar; KeyLen: Integer): PHashItem;
  480. var
  481. Dummy: Boolean;
  482. begin
  483. Result := Lookup(Key, KeyLen, Dummy, False);
  484. end;
  485. function THashTable.FindOrAdd(Key: PWideChar; KeyLen: Integer;
  486. var Found: Boolean): PHashItem;
  487. begin
  488. Result := Lookup(Key, KeyLen, Found, True);
  489. end;
  490. function THashTable.FindOrAdd(Key: PWideChar; KeyLen: Integer): PHashItem;
  491. var
  492. Dummy: Boolean;
  493. begin
  494. Result := Lookup(Key, KeyLen, Dummy, True);
  495. end;
  496. function THashTable.FindOrAdd(const Key: WideString): PHashItem;
  497. var
  498. Dummy: Boolean;
  499. begin
  500. Result := Lookup(PWideChar(Key), Length(Key), Dummy, True);
  501. end;
  502. function THashTable.Get(Key: PWideChar; KeyLen: Integer): TObject;
  503. var
  504. e: PHashItem;
  505. Dummy: Boolean;
  506. begin
  507. e := Lookup(Key, KeyLen, Dummy, False);
  508. if Assigned(e) then
  509. Result := e^.Data
  510. else
  511. Result := nil;
  512. end;
  513. function THashTable.Lookup(Key: PWideChar; KeyLength: Integer;
  514. out Found: Boolean; CanCreate: Boolean): PHashItem;
  515. var
  516. Entry: PPHashItem;
  517. h: LongWord;
  518. begin
  519. h := Hash(0, Key, KeyLength);
  520. Entry := @FBucket^[h mod FBucketCount];
  521. while Assigned(Entry^) and not ((Entry^^.HashValue = h) and KeyCompare(Entry^^.Key, Key, KeyLength) ) do
  522. Entry := @Entry^^.Next;
  523. Found := Assigned(Entry^);
  524. if Found or (not CanCreate) then
  525. begin
  526. Result := Entry^;
  527. Exit;
  528. end;
  529. if FCount > FBucketCount then { arbitrary limit, probably too high }
  530. begin
  531. Resize(FBucketCount * 2);
  532. Result := Lookup(Key, KeyLength, Found, CanCreate);
  533. end
  534. else
  535. begin
  536. New(Result);
  537. // SetString for WideStrings trims on zero chars [fixed, #14740]
  538. SetLength(Result^.Key, KeyLength);
  539. Move(Key^, Pointer(Result^.Key)^, KeyLength*sizeof(WideChar));
  540. Result^.HashValue := h;
  541. Result^.Data := nil;
  542. Result^.Next := nil;
  543. Inc(FCount);
  544. Entry^ := Result;
  545. end;
  546. end;
  547. procedure THashTable.Resize(NewCapacity: LongWord);
  548. var
  549. p: PHashItemArray;
  550. chain: PPHashItem;
  551. i: Integer;
  552. e, n: PHashItem;
  553. begin
  554. p := AllocMem(NewCapacity * sizeof(PHashItem));
  555. for i := 0 to FBucketCount-1 do
  556. begin
  557. e := FBucket^[i];
  558. while Assigned(e) do
  559. begin
  560. chain := @p^[e^.HashValue mod NewCapacity];
  561. n := e^.Next;
  562. e^.Next := chain^;
  563. chain^ := e;
  564. e := n;
  565. end;
  566. end;
  567. FBucketCount := NewCapacity;
  568. FreeMem(FBucket);
  569. FBucket := p;
  570. end;
  571. function THashTable.Remove(Entry: PHashItem): Boolean;
  572. var
  573. chain: PPHashItem;
  574. begin
  575. chain := @FBucket^[Entry^.HashValue mod FBucketCount];
  576. while Assigned(chain^) do
  577. begin
  578. if chain^ = Entry then
  579. begin
  580. chain^ := Entry^.Next;
  581. if FOwnsObjects then
  582. Entry^.Data.Free;
  583. Dispose(Entry);
  584. Dec(FCount);
  585. Result := True;
  586. Exit;
  587. end;
  588. chain := @chain^^.Next;
  589. end;
  590. Result := False;
  591. end;
  592. // this does not free the aData object
  593. function THashTable.RemoveData(aData: TObject): Boolean;
  594. var
  595. i: Integer;
  596. chain: PPHashItem;
  597. e: PHashItem;
  598. begin
  599. for i := 0 to FBucketCount-1 do
  600. begin
  601. chain := @FBucket^[i];
  602. while Assigned(chain^) do
  603. begin
  604. if chain^^.Data = aData then
  605. begin
  606. e := chain^;
  607. chain^ := e^.Next;
  608. Dispose(e);
  609. Dec(FCount);
  610. Result := True;
  611. Exit;
  612. end;
  613. chain := @chain^^.Next;
  614. end;
  615. end;
  616. Result := False;
  617. end;
  618. procedure THashTable.ForEach(proc: THashForEach; arg: Pointer);
  619. var
  620. i: Integer;
  621. e: PHashItem;
  622. begin
  623. for i := 0 to FBucketCount-1 do
  624. begin
  625. e := FBucket^[i];
  626. while Assigned(e) do
  627. begin
  628. if not proc(e, arg) then
  629. Exit;
  630. e := e^.Next;
  631. end;
  632. end;
  633. end;
  634. { TDblHashArray }
  635. destructor TDblHashArray.Destroy;
  636. begin
  637. FreeMem(FData);
  638. inherited Destroy;
  639. end;
  640. procedure TDblHashArray.Init(NumSlots: Integer);
  641. var
  642. i: Integer;
  643. begin
  644. if ((NumSlots * 2) shr FSizeLog) <> 0 then // need at least twice more entries, and no less than 8
  645. begin
  646. FSizeLog := 3;
  647. while (NumSlots shr FSizeLog) <> 0 do
  648. Inc(FSizeLog);
  649. ReallocMem(FData, (1 shl FSizeLog) * sizeof(TExpHashEntry));
  650. FRevision := 0;
  651. end;
  652. if FRevision = 0 then
  653. begin
  654. FRevision := $FFFFFFFF;
  655. for i := (1 shl FSizeLog)-1 downto 0 do
  656. FData^[i].rev := FRevision;
  657. end;
  658. Dec(FRevision);
  659. end;
  660. function TDblHashArray.Locate(uri: PWideString; localName: PWideChar; localLength: Integer): Boolean;
  661. var
  662. step: Byte;
  663. mask: LongWord;
  664. idx: Integer;
  665. HashValue: LongWord;
  666. begin
  667. HashValue := Hash(0, PWideChar(uri^), Length(uri^));
  668. HashValue := Hash(HashValue, localName, localLength);
  669. mask := (1 shl FSizeLog) - 1;
  670. step := (HashValue and (not mask)) shr (FSizeLog-1) and (mask shr 2) or 1;
  671. idx := HashValue and mask;
  672. result := True;
  673. while FData^[idx].rev = FRevision do
  674. begin
  675. if (HashValue = FData^[idx].hash) and (FData^[idx].uriPtr^ = uri^) and
  676. (FData^[idx].lnameLen = localLength) and
  677. CompareMem(FData^[idx].lname, localName, localLength * sizeof(WideChar)) then
  678. Exit;
  679. if idx < step then
  680. Inc(idx, (1 shl FSizeLog) - step)
  681. else
  682. Dec(idx, step);
  683. end;
  684. with FData^[idx] do
  685. begin
  686. rev := FRevision;
  687. hash := HashValue;
  688. uriPtr := uri;
  689. lname := localName;
  690. lnameLen := localLength;
  691. end;
  692. result := False;
  693. end;
  694. { TNSSupport }
  695. constructor TNSSupport.Create;
  696. var
  697. b: TBinding;
  698. begin
  699. inherited Create;
  700. FPrefixes := THashTable.Create(16, False);
  701. FBindings := TFPList.Create;
  702. SetLength(FBindingStack, 16);
  703. { provide implicit binding for the 'xml' prefix }
  704. // TODO: move stduri_xml, etc. to this unit, so they are reused.
  705. DefineBinding('xml', 'http://www.w3.org/XML/1998/namespace', b);
  706. end;
  707. destructor TNSSupport.Destroy;
  708. var
  709. I: Integer;
  710. begin
  711. for I := FBindings.Count-1 downto 0 do
  712. TObject(FBindings.List^[I]).Free;
  713. FBindings.Free;
  714. FPrefixes.Free;
  715. inherited Destroy;
  716. end;
  717. function TNSSupport.BindPrefix(const nsURI: WideString; aPrefix: PHashItem): TBinding;
  718. begin
  719. { try to reuse an existing binding }
  720. result := FFreeBindings;
  721. if Assigned(result) then
  722. FFreeBindings := result.Next
  723. else { no free bindings, create a new one }
  724. begin
  725. result := TBinding.Create;
  726. FBindings.Add(result);
  727. end;
  728. { link it into chain of bindings at the current element level }
  729. result.Next := FBindingStack[FNesting];
  730. FBindingStack[FNesting] := result;
  731. { bind }
  732. result.uri := nsURI;
  733. result.Prefix := aPrefix;
  734. result.PrevPrefixBinding := aPrefix^.Data;
  735. aPrefix^.Data := result;
  736. end;
  737. function TNSSupport.DefaultNSBinding: TBinding;
  738. begin
  739. result := TBinding(FDefaultPrefix.Data);
  740. end;
  741. procedure TNSSupport.DefineBinding(const Prefix, nsURI: WideString;
  742. out Binding: TBinding);
  743. var
  744. Pfx: PHashItem;
  745. begin
  746. Pfx := @FDefaultPrefix;
  747. if (nsURI <> '') and (Prefix <> '') then
  748. Pfx := FPrefixes.FindOrAdd(PWideChar(Prefix), Length(Prefix));
  749. if (Pfx^.Data = nil) or (TBinding(Pfx^.Data).uri <> nsURI) then
  750. Binding := BindPrefix(nsURI, Pfx)
  751. else
  752. Binding := nil;
  753. end;
  754. function TNSSupport.CheckAttribute(const Prefix, nsURI: WideString;
  755. out Binding: TBinding): TAttributeAction;
  756. var
  757. Pfx: PHashItem;
  758. I: Integer;
  759. b: TBinding;
  760. buf: array[0..31] of WideChar;
  761. p: PWideChar;
  762. begin
  763. Binding := nil;
  764. Pfx := nil;
  765. Result := aaUnchanged;
  766. if Prefix <> '' then
  767. Pfx := FPrefixes.FindOrAdd(PWideChar(Prefix), Length(Prefix))
  768. else if nsURI = '' then
  769. Exit;
  770. { if the prefix is already bound to correct URI, we're done }
  771. if Assigned(Pfx) and Assigned(Pfx^.Data) and (TBinding(Pfx^.Data).uri = nsURI) then
  772. Exit;
  773. { see if there's another prefix bound to the target URI }
  774. // TODO: should use something faster than linear search
  775. for i := FNesting downto 0 do
  776. begin
  777. b := FBindingStack[i];
  778. while Assigned(b) do
  779. begin
  780. if (b.uri = nsURI) and (b.Prefix <> @FDefaultPrefix) then
  781. begin
  782. Binding := b; // found one -> override the attribute's prefix
  783. Result := aaPrefix;
  784. Exit;
  785. end;
  786. b := b.Next;
  787. end;
  788. end;
  789. { no prefix, or bound (to wrong URI) -> use generated prefix instead }
  790. if (Pfx = nil) or Assigned(Pfx^.Data) then
  791. repeat
  792. Inc(FPrefixSeqNo);
  793. i := FPrefixSeqNo; // This is just 'NS'+IntToStr(FPrefixSeqNo);
  794. p := @Buf[high(Buf)]; // done without using strings
  795. while i <> 0 do
  796. begin
  797. p^ := WideChar(i mod 10+ord('0'));
  798. dec(p);
  799. i := i div 10;
  800. end;
  801. p^ := 'S'; dec(p);
  802. p^ := 'N';
  803. Pfx := FPrefixes.FindOrAdd(p, @Buf[high(Buf)]-p+1);
  804. until Pfx^.Data = nil;
  805. Binding := BindPrefix(nsURI, Pfx);
  806. Result := aaBoth;
  807. end;
  808. function TNSSupport.IsPrefixBound(P: PWideChar; Len: Integer; out
  809. Prefix: PHashItem): Boolean;
  810. begin
  811. Prefix := FPrefixes.FindOrAdd(P, Len);
  812. Result := Assigned(Prefix^.Data) and (TBinding(Prefix^.Data).uri <> '');
  813. end;
  814. function TNSSupport.GetPrefix(P: PWideChar; Len: Integer): PHashItem;
  815. begin
  816. if Assigned(P) and (Len > 0) then
  817. Result := FPrefixes.FindOrAdd(P, Len)
  818. else
  819. Result := @FDefaultPrefix;
  820. end;
  821. procedure TNSSupport.StartElement;
  822. begin
  823. Inc(FNesting);
  824. if FNesting >= Length(FBindingStack) then
  825. SetLength(FBindingStack, FNesting * 2);
  826. end;
  827. procedure TNSSupport.EndElement;
  828. var
  829. b, temp: TBinding;
  830. begin
  831. temp := FBindingStack[FNesting];
  832. while Assigned(temp) do
  833. begin
  834. b := temp;
  835. temp := b.next;
  836. b.next := FFreeBindings;
  837. FFreeBindings := b;
  838. b.Prefix^.Data := b.prevPrefixBinding;
  839. end;
  840. FBindingStack[FNesting] := nil;
  841. if FNesting > 0 then
  842. Dec(FNesting);
  843. end;
  844. { Buffer builder utils }
  845. procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
  846. begin
  847. ABuffer.MaxLength := ALength;
  848. ABuffer.Length := 0;
  849. ABuffer.Buffer := AllocMem(ABuffer.MaxLength*SizeOf(WideChar));
  850. end;
  851. procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
  852. begin
  853. if ABuffer.Length >= ABuffer.MaxLength then
  854. begin
  855. ReallocMem(ABuffer.Buffer, ABuffer.MaxLength * 2 * SizeOf(WideChar));
  856. FillChar(ABuffer.Buffer[ABuffer.MaxLength], ABuffer.MaxLength * SizeOf(WideChar),0);
  857. ABuffer.MaxLength := ABuffer.MaxLength * 2;
  858. end;
  859. ABuffer.Buffer[ABuffer.Length] := wc;
  860. Inc(ABuffer.Length);
  861. end;
  862. procedure BufAppendChunk(var ABuf: TWideCharBuf; pstart, pend: PWideChar);
  863. var
  864. Len: Integer;
  865. begin
  866. Len := PEnd - PStart;
  867. if Len <= 0 then
  868. Exit;
  869. if Len >= ABuf.MaxLength - ABuf.Length then
  870. begin
  871. ABuf.MaxLength := (Len + ABuf.Length)*2;
  872. // note: memory clean isn't necessary here.
  873. // To avoid garbage, control Length field.
  874. ReallocMem(ABuf.Buffer, ABuf.MaxLength * sizeof(WideChar));
  875. end;
  876. Move(pstart^, ABuf.Buffer[ABuf.Length], Len * sizeof(WideChar));
  877. Inc(ABuf.Length, Len);
  878. end;
  879. function BufEquals(const ABuf: TWideCharBuf; const Arg: WideString): Boolean;
  880. begin
  881. Result := (ABuf.Length = Length(Arg)) and
  882. CompareMem(ABuf.Buffer, Pointer(Arg), ABuf.Length*sizeof(WideChar));
  883. end;
  884. procedure BufNormalize(var Buf: TWideCharBuf; out Modified: Boolean);
  885. var
  886. Dst, Src: Integer;
  887. begin
  888. Dst := 0;
  889. Src := 0;
  890. // skip leading space if any
  891. while (Src < Buf.Length) and (Buf.Buffer[Src] = ' ') do
  892. Inc(Src);
  893. while Src < Buf.Length do
  894. begin
  895. if Buf.Buffer[Src] = ' ' then
  896. begin
  897. // Dst cannot be 0 here, because leading space is already skipped
  898. if Buf.Buffer[Dst-1] <> ' ' then
  899. begin
  900. Buf.Buffer[Dst] := ' ';
  901. Inc(Dst);
  902. end;
  903. end
  904. else
  905. begin
  906. Buf.Buffer[Dst] := Buf.Buffer[Src];
  907. Inc(Dst);
  908. end;
  909. Inc(Src);
  910. end;
  911. // trailing space (only one possible due to compression)
  912. if (Dst > 0) and (Buf.Buffer[Dst-1] = ' ') then
  913. Dec(Dst);
  914. Modified := Dst <> Buf.Length;
  915. Buf.Length := Dst;
  916. end;
  917. { standard decoders }
  918. function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  919. var
  920. cnt: Cardinal;
  921. begin
  922. cnt := OutCnt; // num of widechars
  923. if cnt > InCnt div sizeof(WideChar) then
  924. cnt := InCnt div sizeof(WideChar);
  925. Move(InBuf^, OutBuf^, cnt * sizeof(WideChar));
  926. Dec(InCnt, cnt*sizeof(WideChar));
  927. Dec(OutCnt, cnt);
  928. Result := cnt;
  929. end;
  930. function Decode_UCS2_Swapped(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  931. var
  932. I: Integer;
  933. cnt: Cardinal;
  934. InPtr: PChar;
  935. begin
  936. cnt := OutCnt; // num of widechars
  937. if cnt > InCnt div sizeof(WideChar) then
  938. cnt := InCnt div sizeof(WideChar);
  939. InPtr := InBuf;
  940. for I := 0 to cnt-1 do
  941. begin
  942. OutBuf[I] := WideChar((ord(InPtr^) shl 8) or ord(InPtr[1]));
  943. Inc(InPtr, 2);
  944. end;
  945. Dec(InCnt, cnt*sizeof(WideChar));
  946. Dec(OutCnt, cnt);
  947. Result := cnt;
  948. end;
  949. function Decode_8859_1(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  950. var
  951. I: Integer;
  952. cnt: Cardinal;
  953. begin
  954. cnt := OutCnt; // num of widechars
  955. if cnt > InCnt then
  956. cnt := InCnt;
  957. for I := 0 to cnt-1 do
  958. OutBuf[I] := WideChar(ord(InBuf[I]));
  959. Dec(InCnt, cnt);
  960. Dec(OutCnt, cnt);
  961. Result := cnt;
  962. end;
  963. function Decode_UTF8(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  964. const
  965. MaxCode: array[1..4] of Cardinal = ($7F, $7FF, $FFFF, $1FFFFF);
  966. var
  967. i, j, bc: Cardinal;
  968. Value: Cardinal;
  969. begin
  970. result := 0;
  971. i := OutCnt;
  972. while (i > 0) and (InCnt > 0) do
  973. begin
  974. bc := 1;
  975. Value := ord(InBuf^);
  976. if Value < $80 then
  977. OutBuf^ := WideChar(Value)
  978. else
  979. begin
  980. if Value < $C2 then
  981. begin
  982. Result := -1;
  983. Break;
  984. end;
  985. Inc(bc);
  986. if Value > $DF then
  987. begin
  988. Inc(bc);
  989. if Value > $EF then
  990. begin
  991. Inc(bc);
  992. if Value > $F7 then // never encountered in the tests.
  993. begin
  994. Result := -1;
  995. Break;
  996. end;
  997. end;
  998. end;
  999. if InCnt < bc then
  1000. Break;
  1001. j := 1;
  1002. while j < bc do
  1003. begin
  1004. if InBuf[j] in [#$80..#$BF] then
  1005. Value := (Value shl 6) or (Cardinal(InBuf[j]) and $3F)
  1006. else
  1007. begin
  1008. Result := -1;
  1009. Break;
  1010. end;
  1011. Inc(j);
  1012. end;
  1013. Value := Value and MaxCode[bc];
  1014. // RFC2279 check
  1015. if Value <= MaxCode[bc-1] then
  1016. begin
  1017. Result := -1;
  1018. Break;
  1019. end;
  1020. case Value of
  1021. 0..$D7FF, $E000..$FFFF: OutBuf^ := WideChar(Value);
  1022. $10000..$10FFFF:
  1023. begin
  1024. if i < 2 then Break;
  1025. OutBuf^ := WideChar($D7C0 + (Value shr 10));
  1026. OutBuf[1] := WideChar($DC00 xor (Value and $3FF));
  1027. Inc(OutBuf); // once here
  1028. Dec(i);
  1029. end
  1030. else
  1031. begin
  1032. Result := -1;
  1033. Break;
  1034. end;
  1035. end;
  1036. end;
  1037. Inc(OutBuf);
  1038. Inc(InBuf, bc);
  1039. Dec(InCnt, bc);
  1040. Dec(i);
  1041. end;
  1042. if Result >= 0 then
  1043. Result := OutCnt-i;
  1044. OutCnt := i;
  1045. end;
  1046. initialization
  1047. finalization
  1048. if Assigned(Xml11Pg) then
  1049. FreeMem(Xml11Pg);
  1050. end.