xmlutils.pp 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154
  1. {
  2. This file is part of the Free Component Library
  3. XML utility routines.
  4. Copyright (c) 2006 by Sergei Gorelkin, [email protected]
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. {$IFNDEF FPC_DOTTEDUNITS}
  12. unit xmlutils;
  13. {$ENDIF FPC_DOTTEDUNITS}
  14. {$ifdef fpc}{$mode objfpc}{$endif}
  15. {$H+}
  16. interface
  17. {$IFDEF FPC_DOTTEDUNITS}
  18. uses
  19. System.SysUtils, System.Classes;
  20. {$ELSE FPC_DOTTEDUNITS}
  21. uses
  22. SysUtils, Classes;
  23. {$ENDIF FPC_DOTTEDUNITS}
  24. type
  25. TXMLVersion = (xmlVersionUnknown, xmlVersion10, xmlVersion11);
  26. TSetOfChar = set of AnsiChar;
  27. XMLString = UnicodeString;
  28. PXMLString = ^XMLString;
  29. PXMLChar = PWideChar;
  30. function IsXmlName(const Value: XMLString; Xml11: Boolean = False): Boolean; overload;
  31. function IsXmlName(Value: PWideChar; Len: Integer; Xml11: Boolean = False): Boolean; overload;
  32. function IsXmlNames(const Value: XMLString; Xml11: Boolean = False): Boolean;
  33. function IsXmlNmToken(const Value: XMLString; Xml11: Boolean = False): Boolean;
  34. function IsXmlNmTokens(const Value: XMLString; Xml11: Boolean = False): Boolean;
  35. function IsValidXmlEncoding(const Value: XMLString): Boolean;
  36. procedure NormalizeSpaces(var Value: XMLString);
  37. function IsXmlWhiteSpace(c: WideChar): Boolean;
  38. function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
  39. { beware, works in ASCII range only }
  40. function WStrLIComp(S1, S2: PWideChar; Len: Integer): Integer;
  41. procedure WStrLower(var S: XMLString);
  42. const
  43. xmlVersionStr: array[TXMLVersion] of XMLString = ('', '1.0', '1.1');
  44. // URIs of predefined namespaces
  45. stduri_xml: XMLString = 'http://www.w3.org/XML/1998/namespace';
  46. stduri_xmlns: XMLString = 'http://www.w3.org/2000/xmlns/';
  47. type
  48. TXMLNodeType = (ntNone, ntElement, ntAttribute, ntText,
  49. ntCDATA, ntEntityReference, ntEntity, ntProcessingInstruction,
  50. ntComment, ntDocument, ntDocumentType, ntDocumentFragment,
  51. ntNotation,
  52. ntWhitespace,
  53. ntSignificantWhitespace,
  54. ntEndElement,
  55. ntEndEntity,
  56. ntXmlDeclaration
  57. );
  58. TAttrDataType = (
  59. dtCdata,
  60. dtId,
  61. dtIdRef,
  62. dtIdRefs,
  63. dtEntity,
  64. dtEntities,
  65. dtNmToken,
  66. dtNmTokens,
  67. dtNotation
  68. );
  69. { a simple hash table with WideString keys }
  70. type
  71. {$ifndef fpc}
  72. PtrInt = LongInt;
  73. TFPList = TList;
  74. {$endif}
  75. PPHashItem = ^PHashItem;
  76. PHashItem = ^THashItem;
  77. THashItem = record
  78. Key: XMLString;
  79. HashValue: LongWord;
  80. Next: PHashItem;
  81. Data: TObject;
  82. end;
  83. {$ifdef CPU16}
  84. THashItemArray = array[0..MaxSmallInt div sizeof(Pointer)-1] of PHashItem;
  85. {$else CPU16}
  86. THashItemArray = array[0..MaxInt div sizeof(Pointer)-1] of PHashItem;
  87. {$endif CPU16}
  88. PHashItemArray = ^THashItemArray;
  89. THashForEach = function(Entry: PHashItem; arg: Pointer): Boolean;
  90. THashTable = class(TObject)
  91. private
  92. FCount: LongWord;
  93. FBucketCount: LongWord;
  94. FBucket: PHashItemArray;
  95. FOwnsObjects: Boolean;
  96. function Lookup(Key: PWideChar; KeyLength: Integer; out Found: Boolean; CanCreate: Boolean): PHashItem;
  97. procedure Resize(NewCapacity: LongWord);
  98. public
  99. constructor Create(InitSize: Integer; OwnObjects: Boolean);
  100. destructor Destroy; override;
  101. procedure Clear;
  102. function Find(Key: PWideChar; KeyLen: Integer): PHashItem;
  103. function FindOrAdd(Key: PWideChar; KeyLen: Integer; var Found: Boolean): PHashItem; overload;
  104. function FindOrAdd(Key: PWideChar; KeyLen: Integer): PHashItem; overload;
  105. function FindOrAdd(const Key: XMLString): PHashItem; overload;
  106. function Get(Key: PWideChar; KeyLen: Integer): TObject;
  107. function Remove(Entry: PHashItem): Boolean;
  108. function RemoveData(aData: TObject): Boolean;
  109. procedure ForEach(proc: THashForEach; arg: Pointer);
  110. property Count: LongWord read FCount;
  111. end;
  112. { another hash, for detecting duplicate namespaced attributes without memory allocations }
  113. TExpHashEntry = record
  114. rev: LongWord;
  115. hash: LongWord;
  116. uriPtr: Pointer;
  117. lname: PWideChar;
  118. lnameLen: Integer;
  119. end;
  120. {$ifdef CPU16}
  121. TExpHashEntryArray = array[0..MaxSmallInt div sizeof(TExpHashEntry)-1] of TExpHashEntry;
  122. {$else CPU16}
  123. TExpHashEntryArray = array[0..MaxInt div sizeof(TExpHashEntry)-1] of TExpHashEntry;
  124. {$endif CPU16}
  125. PExpHashEntryArray = ^TExpHashEntryArray;
  126. TDblHashArray = class(TObject)
  127. private
  128. FSizeLog: Integer;
  129. FRevision: LongWord;
  130. FData: PExpHashEntryArray;
  131. public
  132. procedure Init(NumSlots: Integer);
  133. function Locate(uri: Pointer; localName: PWideChar; localLength: Integer): Boolean;
  134. destructor Destroy; override;
  135. end;
  136. { Source location. This may be augmented with ByteOffset, UTF8Offset, etc. }
  137. TLocation = record
  138. Line: Integer;
  139. LinePos: Integer;
  140. end;
  141. IXmlLineInfo = interface(IInterface)['{FD0A892B-B26C-4954-9995-103B2A9D178A}']
  142. function GetHasLineInfo: Boolean;
  143. function GetLineNumber: Integer;
  144. function GetLinePosition: Integer;
  145. property HasLineInfo: Boolean read GetHasLineInfo;
  146. property LineNumber: Integer read GetLineNumber;
  147. property LinePosition: Integer read GetLinePosition;
  148. end;
  149. { generic node info record, shared between DOM and reader }
  150. PPNodeData = ^PNodeData;
  151. PNodeData = ^TNodeData;
  152. TNodeData = record
  153. FNext: PNodeData;
  154. FQName: PHashItem;
  155. FPrefix: PHashItem;
  156. FNsUri: PHashItem;
  157. FColonPos: Integer;
  158. FTypeInfo: TObject;
  159. FLoc: TLocation;
  160. FLoc2: TLocation; // for attributes: start of value
  161. FIDEntry: PHashItem; // ID attributes: entry in ID map
  162. FNodeType: TXMLNodeType;
  163. FValueStr: XMLString;
  164. FValueStart: PWideChar;
  165. FValueLength: Integer;
  166. FIsDefault: Boolean;
  167. FDenormalized: Boolean; // Whether attribute value changes by normalization
  168. end;
  169. IGetNodeDataPtr = interface(IInterface)['{81F6ADA2-8F5E-41D7-872D-226163FF4E45}']
  170. function CurrentNodePtr: PPNodeData;
  171. end;
  172. { TNSSupport provides tracking of prefix-uri pairs and namespace fixup for writer }
  173. TBinding = class
  174. public
  175. uri: PHashItem;
  176. next: TBinding;
  177. prevPrefixBinding: TObject;
  178. Prefix: PHashItem;
  179. end;
  180. TAttributeAction = (
  181. aaUnchanged,
  182. aaPrefix, // only override the prefix
  183. aaBoth // override prefix and emit namespace definition
  184. );
  185. TNSSupport = class(TObject)
  186. private
  187. FNameTable: THashTable;
  188. FNesting: Integer;
  189. FPrefixSeqNo: Integer;
  190. FFreeBindings: TBinding;
  191. FBindings: TFPList;
  192. FBindingStack: array of TBinding;
  193. FPrefixes: THashTable;
  194. FDefaultPrefix: THashItem;
  195. public
  196. constructor Create(aNameTable: THashTable);
  197. destructor Destroy; override;
  198. procedure DefineBinding(const Prefix, nsURI: XMLString; out Binding: TBinding);
  199. function CheckAttribute(const Prefix, nsURI: XMLString;
  200. out Binding: TBinding): TAttributeAction;
  201. function GetPrefix(P: PWideChar; Len: Integer): PHashItem;
  202. function BindPrefix(nsURI, aPrefix: PHashItem): TBinding;
  203. function DefaultNSBinding: TBinding;
  204. function LookupNamespace(const APrefix: XMLString): XMLString;
  205. procedure PushScope;
  206. function PopScope: Boolean;
  207. end;
  208. { Buffer builder, used to compose long strings without too much memory allocations }
  209. PWideCharBuf = ^TWideCharBuf;
  210. TWideCharBuf = record
  211. Buffer: PWideChar;
  212. Length: Integer;
  213. MaxLength: Integer;
  214. end;
  215. procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
  216. procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
  217. procedure BufAppendChunk(var ABuf: TWideCharBuf; pstart, pend: PWideChar);
  218. procedure BufAppendString(var ABuf: TWideCharBuf; const AValue: XMLString);
  219. function BufEquals(const ABuf: TWideCharBuf; const Arg: XMLString): Boolean;
  220. procedure BufNormalize(var Buf: TWideCharBuf; out Modified: Boolean);
  221. { Built-in decoder functions for UTF-8, UTF-16 and ISO-8859-1 }
  222. function Decode_UCS2(Context: Pointer; InBuf: PAnsiChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  223. function Decode_UCS2_Swapped(Context: Pointer; InBuf: PAnsiChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  224. function Decode_UTF8(Context: Pointer; InBuf: PAnsiChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  225. function Decode_8859_1(Context: Pointer; InBuf: PAnsiChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  226. {$i names.inc}
  227. implementation
  228. function IsXml11Char(Value: PWideChar; var Index: Integer): Boolean; overload;
  229. begin
  230. if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
  231. begin
  232. Inc(Index);
  233. Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
  234. end
  235. else
  236. Result := False;
  237. end;
  238. function IsXml11Char(const Value: XMLString; var Index: Integer): Boolean; overload;
  239. begin
  240. if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
  241. begin
  242. Inc(Index);
  243. Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
  244. end
  245. else
  246. Result := False;
  247. end;
  248. function IsXmlName(const Value: XMLString; Xml11: Boolean): Boolean;
  249. begin
  250. Result := IsXmlName(PWideChar(Value), Length(Value), Xml11);
  251. end;
  252. function IsXmlName(Value: PWideChar; Len: Integer; Xml11: Boolean = False): Boolean;
  253. var
  254. I: Integer;
  255. begin
  256. Result := False;
  257. I := 0;
  258. if (Len = 0) or not ((Byte(Value[I]) in NamingBitmap[NamePages[hi(Word(Value[I]))]]) or
  259. (Value[I] = ':') or IsXml11Char(Value, I)) then
  260. Exit;
  261. Inc(I);
  262. while I < Len do
  263. begin
  264. if not ((Byte(Value[I]) in NamingBitmap[NamePages[$100+hi(Word(Value[I]))]]) or
  265. (Value[I] = ':') or IsXml11Char(Value, I)) then
  266. Exit;
  267. Inc(I);
  268. end;
  269. Result := True;
  270. end;
  271. function IsXmlNames(const Value: XMLString; Xml11: Boolean): Boolean;
  272. var
  273. I: Integer;
  274. Offset: Integer;
  275. begin
  276. Result := False;
  277. if Value = '' then
  278. Exit;
  279. I := 1;
  280. Offset := 0;
  281. while I <= Length(Value) do
  282. begin
  283. if not ((Byte(Value[I]) in NamingBitmap[NamePages[Offset+hi(Word(Value[I]))]]) or
  284. (Value[I] = ':') or IsXml11Char(Value, I)) then
  285. begin
  286. if (I = Length(Value)) or (Value[I] <> #32) then
  287. Exit;
  288. Offset := 0;
  289. Inc(I);
  290. Continue;
  291. end;
  292. Offset := $100;
  293. Inc(I);
  294. end;
  295. Result := True;
  296. end;
  297. function IsXmlNmToken(const Value: XMLString; Xml11: Boolean): Boolean;
  298. var
  299. I: Integer;
  300. begin
  301. Result := False;
  302. if Value = '' then
  303. Exit;
  304. I := 1;
  305. while I <= Length(Value) do
  306. begin
  307. if not ((Byte(Value[I]) in NamingBitmap[NamePages[$100+hi(Word(Value[I]))]]) or
  308. (Value[I] = ':') or IsXml11Char(Value, I)) then
  309. Exit;
  310. Inc(I);
  311. end;
  312. Result := True;
  313. end;
  314. function IsXmlNmTokens(const Value: XMLString; Xml11: Boolean): Boolean;
  315. var
  316. I: Integer;
  317. begin
  318. I := 1;
  319. Result := False;
  320. if Value = '' then
  321. Exit;
  322. while I <= Length(Value) do
  323. begin
  324. if not ((Byte(Value[I]) in NamingBitmap[NamePages[$100+hi(Word(Value[I]))]]) or
  325. (Value[I] = ':') or IsXml11Char(Value, I)) then
  326. begin
  327. if (I = Length(Value)) or (Value[I] <> #32) then
  328. Exit;
  329. end;
  330. Inc(I);
  331. end;
  332. Result := True;
  333. end;
  334. function IsValidXmlEncoding(const Value: XMLString): Boolean;
  335. var
  336. I: Integer;
  337. begin
  338. Result := False;
  339. if (Value = '') or (Value[1] > #255) or not (AnsiChar(ord(Value[1])) in ['A'..'Z', 'a'..'z']) then
  340. Exit;
  341. for I := 2 to Length(Value) do
  342. if (Value[I] > #255) or not (AnsiChar(ord(Value[I])) in ['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']) then
  343. Exit;
  344. Result := True;
  345. end;
  346. procedure NormalizeSpaces(var Value: XMLString);
  347. var
  348. I, J: Integer;
  349. begin
  350. I := Length(Value);
  351. // speed: trim only whed needed
  352. if (I > 0) and ((Value[1] = #32) or (Value[I] = #32)) then
  353. Value := Trim(Value);
  354. I := 1;
  355. while I < Length(Value) do
  356. begin
  357. if Value[I] = #32 then
  358. begin
  359. J := I+1;
  360. while (J <= Length(Value)) and (Value[J] = #32) do Inc(J);
  361. if J-I > 1 then Delete(Value, I+1, J-I-1);
  362. end;
  363. Inc(I);
  364. end;
  365. end;
  366. function IsXmlWhiteSpace(c: WideChar): Boolean;
  367. begin
  368. Result := (c = #32) or (c = #9) or (c = #10) or (c = #13);
  369. end;
  370. function WStrLIComp(S1, S2: PWideChar; Len: Integer): Integer;
  371. var
  372. counter: Integer;
  373. c1, c2: Word;
  374. begin
  375. counter := 0;
  376. result := 0;
  377. if Len = 0 then
  378. exit;
  379. repeat
  380. c1 := ord(S1[counter]);
  381. c2 := ord(S2[counter]);
  382. if (c1 = 0) or (c2 = 0) then break;
  383. if c1 <> c2 then
  384. begin
  385. if c1 in [97..122] then
  386. Dec(c1, 32);
  387. if c2 in [97..122] then
  388. Dec(c2, 32);
  389. if c1 <> c2 then
  390. Break;
  391. end;
  392. Inc(counter);
  393. until counter >= Len;
  394. result := c1 - c2;
  395. end;
  396. procedure WStrLower(var S: XMLString);
  397. var
  398. i: Integer;
  399. begin
  400. for i := 1 to Length(S) do
  401. if (S[i] >= 'A') and (S[i] <= 'Z') then
  402. Inc(word(S[i]), 32);
  403. end;
  404. function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
  405. begin
  406. Result := InitValue;
  407. while KeyLen <> 0 do
  408. begin
  409. {$push}{$r-}{$q-}
  410. Result := Result * $F4243 xor ord(Key^);
  411. {$pop}
  412. Inc(Key);
  413. Dec(KeyLen);
  414. end;
  415. end;
  416. function KeyCompare(const Key1: XMLString; Key2: Pointer; Key2Len: Integer): Boolean;
  417. begin
  418. Result := (Length(Key1)=Key2Len) and CompareMem(Pointer(Key1), Key2, Key2Len*2);
  419. end;
  420. { THashTable }
  421. constructor THashTable.Create(InitSize: Integer; OwnObjects: Boolean);
  422. var
  423. I: Integer;
  424. begin
  425. inherited Create;
  426. FOwnsObjects := OwnObjects;
  427. I := 256;
  428. while I < InitSize do I := I shl 1;
  429. FBucketCount := I;
  430. FBucket := AllocMem(I * sizeof(PHashItem));
  431. end;
  432. destructor THashTable.Destroy;
  433. begin
  434. Clear;
  435. FreeMem(FBucket);
  436. inherited Destroy;
  437. end;
  438. procedure THashTable.Clear;
  439. var
  440. I: Integer;
  441. item, next: PHashItem;
  442. begin
  443. for I := 0 to FBucketCount-1 do
  444. begin
  445. item := FBucket^[I];
  446. while Assigned(item) do
  447. begin
  448. next := item^.Next;
  449. if FOwnsObjects then
  450. item^.Data.Free;
  451. Dispose(item);
  452. item := next;
  453. end;
  454. FBucket^[I] := nil;
  455. end;
  456. end;
  457. function THashTable.Find(Key: PWideChar; KeyLen: Integer): PHashItem;
  458. var
  459. Dummy: Boolean;
  460. begin
  461. Result := Lookup(Key, KeyLen, Dummy, False);
  462. end;
  463. function THashTable.FindOrAdd(Key: PWideChar; KeyLen: Integer;
  464. var Found: Boolean): PHashItem;
  465. begin
  466. Result := Lookup(Key, KeyLen, Found, True);
  467. end;
  468. function THashTable.FindOrAdd(Key: PWideChar; KeyLen: Integer): PHashItem;
  469. var
  470. Dummy: Boolean;
  471. begin
  472. Result := Lookup(Key, KeyLen, Dummy, True);
  473. end;
  474. function THashTable.FindOrAdd(const Key: XMLString): PHashItem;
  475. var
  476. Dummy: Boolean;
  477. begin
  478. Result := Lookup(PWideChar(Key), Length(Key), Dummy, True);
  479. end;
  480. function THashTable.Get(Key: PWideChar; KeyLen: Integer): TObject;
  481. var
  482. e: PHashItem;
  483. Dummy: Boolean;
  484. begin
  485. e := Lookup(Key, KeyLen, Dummy, False);
  486. if Assigned(e) then
  487. Result := e^.Data
  488. else
  489. Result := nil;
  490. end;
  491. function THashTable.Lookup(Key: PWideChar; KeyLength: Integer;
  492. out Found: Boolean; CanCreate: Boolean): PHashItem;
  493. var
  494. Entry: PPHashItem;
  495. h: LongWord;
  496. begin
  497. h := Hash(0, Key, KeyLength);
  498. Entry := @FBucket^[h mod FBucketCount];
  499. while Assigned(Entry^) and not ((Entry^^.HashValue = h) and KeyCompare(Entry^^.Key, Key, KeyLength) ) do
  500. Entry := @Entry^^.Next;
  501. Found := Assigned(Entry^);
  502. if Found or (not CanCreate) then
  503. begin
  504. Result := Entry^;
  505. Exit;
  506. end;
  507. if FCount > FBucketCount then { arbitrary limit, probably too high }
  508. begin
  509. Resize(FBucketCount * 2);
  510. Result := Lookup(Key, KeyLength, Found, CanCreate);
  511. end
  512. else
  513. begin
  514. New(Result);
  515. // SetString for WideStrings trims on zero chars [fixed, #14740]
  516. SetLength(Result^.Key, KeyLength);
  517. Move(Key^, Pointer(Result^.Key)^, KeyLength*sizeof(WideChar));
  518. Result^.HashValue := h;
  519. Result^.Data := nil;
  520. Result^.Next := nil;
  521. Inc(FCount);
  522. Entry^ := Result;
  523. end;
  524. end;
  525. procedure THashTable.Resize(NewCapacity: LongWord);
  526. var
  527. p: PHashItemArray;
  528. chain: PPHashItem;
  529. i: Integer;
  530. e, n: PHashItem;
  531. begin
  532. p := AllocMem(NewCapacity * sizeof(PHashItem));
  533. for i := 0 to FBucketCount-1 do
  534. begin
  535. e := FBucket^[i];
  536. while Assigned(e) do
  537. begin
  538. chain := @p^[e^.HashValue mod NewCapacity];
  539. n := e^.Next;
  540. e^.Next := chain^;
  541. chain^ := e;
  542. e := n;
  543. end;
  544. end;
  545. FBucketCount := NewCapacity;
  546. FreeMem(FBucket);
  547. FBucket := p;
  548. end;
  549. function THashTable.Remove(Entry: PHashItem): Boolean;
  550. var
  551. chain: PPHashItem;
  552. begin
  553. chain := @FBucket^[Entry^.HashValue mod FBucketCount];
  554. while Assigned(chain^) do
  555. begin
  556. if chain^ = Entry then
  557. begin
  558. chain^ := Entry^.Next;
  559. if FOwnsObjects then
  560. Entry^.Data.Free;
  561. Dispose(Entry);
  562. Dec(FCount);
  563. Result := True;
  564. Exit;
  565. end;
  566. chain := @chain^^.Next;
  567. end;
  568. Result := False;
  569. end;
  570. // this does not free the aData object
  571. function THashTable.RemoveData(aData: TObject): Boolean;
  572. var
  573. i: Integer;
  574. chain: PPHashItem;
  575. e: PHashItem;
  576. begin
  577. for i := 0 to FBucketCount-1 do
  578. begin
  579. chain := @FBucket^[i];
  580. while Assigned(chain^) do
  581. begin
  582. if chain^^.Data = aData then
  583. begin
  584. e := chain^;
  585. chain^ := e^.Next;
  586. Dispose(e);
  587. Dec(FCount);
  588. Result := True;
  589. Exit;
  590. end;
  591. chain := @chain^^.Next;
  592. end;
  593. end;
  594. Result := False;
  595. end;
  596. procedure THashTable.ForEach(proc: THashForEach; arg: Pointer);
  597. var
  598. i: Integer;
  599. e: PHashItem;
  600. begin
  601. for i := 0 to FBucketCount-1 do
  602. begin
  603. e := FBucket^[i];
  604. while Assigned(e) do
  605. begin
  606. if not proc(e, arg) then
  607. Exit;
  608. e := e^.Next;
  609. end;
  610. end;
  611. end;
  612. { TDblHashArray }
  613. destructor TDblHashArray.Destroy;
  614. begin
  615. FreeMem(FData);
  616. inherited Destroy;
  617. end;
  618. procedure TDblHashArray.Init(NumSlots: Integer);
  619. var
  620. i: Integer;
  621. begin
  622. if ((NumSlots * 2) shr FSizeLog) <> 0 then // need at least twice more entries, and no less than 8
  623. begin
  624. FSizeLog := 3;
  625. while (NumSlots shr FSizeLog) <> 0 do
  626. Inc(FSizeLog);
  627. ReallocMem(FData, (1 shl FSizeLog) * sizeof(TExpHashEntry));
  628. FRevision := 0;
  629. end;
  630. if FRevision = 0 then
  631. begin
  632. FRevision := $FFFFFFFF;
  633. for i := (1 shl FSizeLog)-1 downto 0 do
  634. FData^[i].rev := FRevision;
  635. end;
  636. Dec(FRevision);
  637. end;
  638. function TDblHashArray.Locate(uri: Pointer; localName: PWideChar; localLength: Integer): Boolean;
  639. var
  640. step: Byte;
  641. mask: LongWord;
  642. idx: Integer;
  643. HashValue: LongWord;
  644. begin
  645. HashValue := Hash(LongWord(PtrUInt(uri)), localName, localLength);
  646. mask := (1 shl FSizeLog) - 1;
  647. step := (HashValue and (not mask)) shr (FSizeLog-1) and (mask shr 2) or 1;
  648. idx := HashValue and mask;
  649. result := True;
  650. while FData^[idx].rev = FRevision do
  651. begin
  652. if (HashValue = FData^[idx].hash) and (FData^[idx].uriPtr = uri) and
  653. (FData^[idx].lnameLen = localLength) and
  654. CompareMem(FData^[idx].lname, localName, localLength * sizeof(WideChar)) then
  655. Exit;
  656. if idx < step then
  657. Inc(idx, (1 shl FSizeLog) - step)
  658. else
  659. Dec(idx, step);
  660. end;
  661. with FData^[idx] do
  662. begin
  663. rev := FRevision;
  664. hash := HashValue;
  665. uriPtr := uri;
  666. lname := localName;
  667. lnameLen := localLength;
  668. end;
  669. result := False;
  670. end;
  671. { TNSSupport }
  672. constructor TNSSupport.Create(aNameTable: THashTable);
  673. var
  674. b: TBinding;
  675. begin
  676. inherited Create;
  677. FNameTable := aNameTable;
  678. FPrefixes := THashTable.Create(16, False);
  679. FBindings := TFPList.Create;
  680. SetLength(FBindingStack, 16);
  681. { provide implicit binding for the 'xml' prefix }
  682. DefineBinding('xml', stduri_xml, b);
  683. { bind default namespace to empty string }
  684. DefineBinding('', '', b);
  685. end;
  686. destructor TNSSupport.Destroy;
  687. var
  688. I: Integer;
  689. begin
  690. for I := FBindings.Count-1 downto 0 do
  691. TObject(FBindings.List^[I]).Free;
  692. FBindings.Free;
  693. FPrefixes.Free;
  694. inherited Destroy;
  695. end;
  696. function TNSSupport.BindPrefix(nsURI, aPrefix: PHashItem): TBinding;
  697. begin
  698. { try to reuse an existing binding }
  699. result := FFreeBindings;
  700. if Assigned(result) then
  701. FFreeBindings := result.Next
  702. else { no free bindings, create a new one }
  703. begin
  704. result := TBinding.Create;
  705. FBindings.Add(result);
  706. end;
  707. { link it into chain of bindings at the current element level }
  708. result.Next := FBindingStack[FNesting];
  709. FBindingStack[FNesting] := result;
  710. { bind }
  711. result.uri := nsURI;
  712. result.Prefix := aPrefix;
  713. result.PrevPrefixBinding := aPrefix^.Data;
  714. aPrefix^.Data := result;
  715. end;
  716. function TNSSupport.DefaultNSBinding: TBinding;
  717. begin
  718. result := TBinding(FDefaultPrefix.Data);
  719. end;
  720. procedure TNSSupport.DefineBinding(const Prefix, nsURI: XMLString;
  721. out Binding: TBinding);
  722. var
  723. Pfx, uri: PHashItem;
  724. begin
  725. Pfx := @FDefaultPrefix;
  726. if (nsURI <> '') and (Prefix <> '') then
  727. Pfx := FPrefixes.FindOrAdd(PWideChar(Prefix), Length(Prefix));
  728. uri := FNameTable.FindOrAdd(PWideChar(nsURI),Length(nsURI));
  729. if (Pfx^.Data = nil) or (TBinding(Pfx^.Data).uri <> uri) then
  730. Binding := BindPrefix(uri, Pfx)
  731. else
  732. Binding := nil;
  733. end;
  734. function TNSSupport.CheckAttribute(const Prefix, nsURI: XMLString;
  735. out Binding: TBinding): TAttributeAction;
  736. var
  737. Pfx: PHashItem;
  738. I: Integer;
  739. b: TBinding;
  740. buf: array[0..31] of WideChar;
  741. p: PWideChar;
  742. uri: PHashItem;
  743. begin
  744. Binding := nil;
  745. Pfx := nil;
  746. Result := aaUnchanged;
  747. if Prefix <> '' then
  748. Pfx := FPrefixes.FindOrAdd(PWideChar(Prefix), Length(Prefix))
  749. else if nsURI = '' then
  750. Exit;
  751. uri := FNameTable.FindOrAdd(PWideChar(nsURI), Length(nsURI));
  752. { if the prefix is already bound to correct URI, we're done }
  753. if Assigned(Pfx) and Assigned(Pfx^.Data) and (TBinding(Pfx^.Data).uri = uri) then
  754. Exit;
  755. { see if there's another prefix bound to the target URI }
  756. // TODO: should use something faster than linear search
  757. for i := FNesting downto 0 do
  758. begin
  759. b := FBindingStack[i];
  760. while Assigned(b) do
  761. begin
  762. if (b.uri = uri) and (b.Prefix <> @FDefaultPrefix) then
  763. begin
  764. Binding := b; // found one -> override the attribute's prefix
  765. Result := aaPrefix;
  766. Exit;
  767. end;
  768. b := b.Next;
  769. end;
  770. end;
  771. { no prefix, or bound (to wrong URI) -> use generated prefix instead }
  772. if (Pfx = nil) or Assigned(Pfx^.Data) then
  773. repeat
  774. Inc(FPrefixSeqNo);
  775. i := FPrefixSeqNo; // This is just 'NS'+IntToStr(FPrefixSeqNo);
  776. p := @Buf[high(Buf)]; // done without using strings
  777. while i <> 0 do
  778. begin
  779. p^ := WideChar(i mod 10+ord('0'));
  780. dec(p);
  781. i := i div 10;
  782. end;
  783. p^ := 'S'; dec(p);
  784. p^ := 'N';
  785. Pfx := FPrefixes.FindOrAdd(p, @Buf[high(Buf)]-p+1);
  786. until Pfx^.Data = nil;
  787. Binding := BindPrefix(uri, Pfx);
  788. Result := aaBoth;
  789. end;
  790. function TNSSupport.GetPrefix(P: PWideChar; Len: Integer): PHashItem;
  791. begin
  792. if Assigned(P) and (Len > 0) then
  793. Result := FPrefixes.FindOrAdd(P, Len)
  794. else
  795. Result := @FDefaultPrefix;
  796. end;
  797. function TNSSupport.LookupNamespace(const APrefix: XMLString): XMLString;
  798. var
  799. prefixatom: PHashItem;
  800. b: TBinding;
  801. begin
  802. prefixatom := GetPrefix(PWideChar(APrefix),Length(APrefix));
  803. b := TBinding(prefixatom^.Data);
  804. if Assigned(b) and Assigned(b.Uri) then
  805. result := b.Uri^.Key
  806. else
  807. result := '';
  808. end;
  809. procedure TNSSupport.PushScope;
  810. begin
  811. Inc(FNesting);
  812. if FNesting >= Length(FBindingStack) then
  813. SetLength(FBindingStack, FNesting * 2);
  814. end;
  815. function TNSSupport.PopScope: Boolean;
  816. var
  817. b, temp: TBinding;
  818. begin
  819. { don't unbind prefixes declared before the first call to PushScope }
  820. Result := FNesting > 0;
  821. if not Result then
  822. Exit;
  823. temp := FBindingStack[FNesting];
  824. while Assigned(temp) do
  825. begin
  826. b := temp;
  827. temp := b.next;
  828. b.next := FFreeBindings;
  829. FFreeBindings := b;
  830. b.Prefix^.Data := b.prevPrefixBinding;
  831. end;
  832. FBindingStack[FNesting] := nil;
  833. Dec(FNesting);
  834. end;
  835. { Buffer builder utils }
  836. procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
  837. begin
  838. ABuffer.MaxLength := ALength;
  839. ABuffer.Length := 0;
  840. ABuffer.Buffer := AllocMem(ABuffer.MaxLength*SizeOf(WideChar));
  841. end;
  842. procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
  843. begin
  844. if ABuffer.Length >= ABuffer.MaxLength then
  845. begin
  846. ReallocMem(ABuffer.Buffer, ABuffer.MaxLength * 2 * SizeOf(WideChar));
  847. FillChar(ABuffer.Buffer[ABuffer.MaxLength], ABuffer.MaxLength * SizeOf(WideChar),0);
  848. ABuffer.MaxLength := ABuffer.MaxLength * 2;
  849. end;
  850. ABuffer.Buffer[ABuffer.Length] := wc;
  851. Inc(ABuffer.Length);
  852. end;
  853. procedure BufAppendChunk(var ABuf: TWideCharBuf; pstart, pend: PWideChar);
  854. var
  855. Len: Integer;
  856. begin
  857. Len := PEnd - PStart;
  858. if Len <= 0 then
  859. Exit;
  860. if Len >= ABuf.MaxLength - ABuf.Length then
  861. begin
  862. ABuf.MaxLength := (Len + ABuf.Length)*2;
  863. // note: memory clean isn't necessary here.
  864. // To avoid garbage, control Length field.
  865. ReallocMem(ABuf.Buffer, ABuf.MaxLength * sizeof(WideChar));
  866. end;
  867. Move(pstart^, ABuf.Buffer[ABuf.Length], Len * sizeof(WideChar));
  868. Inc(ABuf.Length, Len);
  869. end;
  870. procedure BufAppendString(var ABuf: TWideCharBuf; const AValue: XMLString);
  871. var
  872. Len: Integer;
  873. begin
  874. Len := Length(AValue);
  875. if Len <= 0 then
  876. Exit;
  877. if Len >= ABuf.MaxLength - ABuf.Length then
  878. begin
  879. ABuf.MaxLength := (Len + ABuf.Length)*2;
  880. ReallocMem(ABuf.Buffer, ABuf.MaxLength * sizeof(WideChar));
  881. end;
  882. Move(PWideChar(AValue)^, ABuf.Buffer[ABuf.Length], Len * sizeof(WideChar));
  883. Inc(ABuf.Length, Len);
  884. end;
  885. function BufEquals(const ABuf: TWideCharBuf; const Arg: XMLString): Boolean;
  886. begin
  887. Result := (ABuf.Length = Length(Arg)) and
  888. CompareMem(ABuf.Buffer, Pointer(Arg), ABuf.Length*sizeof(WideChar));
  889. end;
  890. procedure BufNormalize(var Buf: TWideCharBuf; out Modified: Boolean);
  891. var
  892. Dst, Src: Integer;
  893. begin
  894. Dst := 0;
  895. Src := 0;
  896. // skip leading space if any
  897. while (Src < Buf.Length) and (Buf.Buffer[Src] = ' ') do
  898. Inc(Src);
  899. while Src < Buf.Length do
  900. begin
  901. if Buf.Buffer[Src] = ' ' then
  902. begin
  903. // Dst cannot be 0 here, because leading space is already skipped
  904. if Buf.Buffer[Dst-1] <> ' ' then
  905. begin
  906. Buf.Buffer[Dst] := ' ';
  907. Inc(Dst);
  908. end;
  909. end
  910. else
  911. begin
  912. Buf.Buffer[Dst] := Buf.Buffer[Src];
  913. Inc(Dst);
  914. end;
  915. Inc(Src);
  916. end;
  917. // trailing space (only one possible due to compression)
  918. if (Dst > 0) and (Buf.Buffer[Dst-1] = ' ') then
  919. Dec(Dst);
  920. Modified := Dst <> Buf.Length;
  921. Buf.Length := Dst;
  922. end;
  923. { standard decoders }
  924. function Decode_UCS2(Context: Pointer; InBuf: PAnsiChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  925. var
  926. cnt: Cardinal;
  927. begin
  928. cnt := OutCnt; // num of widechars
  929. if cnt > InCnt div sizeof(WideChar) then
  930. cnt := InCnt div sizeof(WideChar);
  931. Move(InBuf^, OutBuf^, cnt * sizeof(WideChar));
  932. Dec(InCnt, cnt*sizeof(WideChar));
  933. Dec(OutCnt, cnt);
  934. Result := cnt;
  935. end;
  936. function Decode_UCS2_Swapped(Context: Pointer; InBuf: PAnsiChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  937. var
  938. I: Integer;
  939. cnt: Cardinal;
  940. InPtr: PAnsiChar;
  941. begin
  942. cnt := OutCnt; // num of widechars
  943. if cnt > InCnt div sizeof(WideChar) then
  944. cnt := InCnt div sizeof(WideChar);
  945. InPtr := InBuf;
  946. for I := 0 to cnt-1 do
  947. begin
  948. OutBuf[I] := WideChar((ord(InPtr^) shl 8) or ord(InPtr[1]));
  949. Inc(InPtr, 2);
  950. end;
  951. Dec(InCnt, cnt*sizeof(WideChar));
  952. Dec(OutCnt, cnt);
  953. Result := cnt;
  954. end;
  955. function Decode_8859_1(Context: Pointer; InBuf: PAnsiChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  956. var
  957. I: Integer;
  958. cnt: Cardinal;
  959. begin
  960. cnt := OutCnt; // num of widechars
  961. if cnt > InCnt then
  962. cnt := InCnt;
  963. for I := 0 to cnt-1 do
  964. OutBuf[I] := WideChar(ord(InBuf[I]));
  965. Dec(InCnt, cnt);
  966. Dec(OutCnt, cnt);
  967. Result := cnt;
  968. end;
  969. function Decode_UTF8(Context: Pointer; InBuf: PAnsiChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  970. const
  971. MaxCode: array[1..4] of Cardinal = ($7F, $7FF, $FFFF, $1FFFFF);
  972. var
  973. i, j, bc: Cardinal;
  974. Value: Cardinal;
  975. begin
  976. result := 0;
  977. i := OutCnt;
  978. while (i > 0) and (InCnt > 0) do
  979. begin
  980. bc := 1;
  981. Value := ord(InBuf^);
  982. if Value < $80 then
  983. OutBuf^ := WideChar(Value)
  984. else
  985. begin
  986. if Value < $C2 then
  987. begin
  988. Result := -1;
  989. Break;
  990. end;
  991. Inc(bc);
  992. if Value > $DF then
  993. begin
  994. Inc(bc);
  995. if Value > $EF then
  996. begin
  997. Inc(bc);
  998. if Value > $F7 then // never encountered in the tests.
  999. begin
  1000. Result := -1;
  1001. Break;
  1002. end;
  1003. end;
  1004. end;
  1005. if InCnt < bc then
  1006. Break;
  1007. j := 1;
  1008. while j < bc do
  1009. begin
  1010. if InBuf[j] in [#$80..#$BF] then
  1011. Value := (Value shl 6) or (Cardinal(InBuf[j]) and $3F)
  1012. else
  1013. begin
  1014. Result := -1;
  1015. Break;
  1016. end;
  1017. Inc(j);
  1018. end;
  1019. Value := Value and MaxCode[bc];
  1020. // RFC2279 check
  1021. if Value <= MaxCode[bc-1] then
  1022. begin
  1023. Result := -1;
  1024. Break;
  1025. end;
  1026. case Value of
  1027. 0..$D7FF, $E000..$FFFF: OutBuf^ := WideChar(Value);
  1028. $10000..$10FFFF:
  1029. begin
  1030. if i < 2 then Break;
  1031. OutBuf^ := WideChar($D7C0 + (Value shr 10));
  1032. OutBuf[1] := WideChar($DC00 xor (Value and $3FF));
  1033. Inc(OutBuf); // once here
  1034. Dec(i);
  1035. end
  1036. else
  1037. begin
  1038. Result := -1;
  1039. Break;
  1040. end;
  1041. end;
  1042. end;
  1043. Inc(OutBuf);
  1044. Inc(InBuf, bc);
  1045. Dec(InCnt, bc);
  1046. Dec(i);
  1047. end;
  1048. if Result >= 0 then
  1049. Result := OutCnt-i;
  1050. OutCnt := i;
  1051. end;
  1052. end.