xmlread.pp 38 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616
  1. {
  2. This file is part of the Free Component Library
  3. XML reading routines.
  4. Copyright (c) 1999-2000 by Sebastian Guenther, [email protected]
  5. Modified in 2006 by Sergei Gorelkin, [email protected]
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. unit XMLRead;
  13. {$ifdef fpc}
  14. {$MODE objfpc}{$H+}
  15. {$endif}
  16. interface
  17. {off $DEFINE MEM_CHECK}
  18. uses
  19. {$IFDEF MEM_CHECK}MemCheck,{$ENDIF}
  20. SysUtils, Classes, DOM;
  21. type
  22. EXMLReadError = class(Exception);
  23. procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String); overload;
  24. procedure ReadXMLFile(out ADoc: TXMLDocument; var f: File); overload;
  25. procedure ReadXMLFile(out ADoc: TXMLDocument; var f: TStream); overload;
  26. procedure ReadXMLFile(out ADoc: TXMLDocument; var f: TStream; const AFilename: String); overload;
  27. procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String); overload;
  28. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: File); overload;
  29. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream); overload;
  30. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream; const AFilename: String); overload;
  31. procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String); overload;
  32. procedure ReadDTDFile(out ADoc: TXMLDocument; var f: File); overload;
  33. procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream); overload;
  34. procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream; const AFilename: String); overload;
  35. // =======================================================
  36. implementation
  37. type
  38. TSetOfChar = set of Char;
  39. const
  40. Letter = ['A'..'Z', 'a'..'z'];
  41. Digit = ['0'..'9'];
  42. PubidChars: TSetOfChar = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
  43. '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
  44. '#', '@', '$', '_', '%'];
  45. WhitespaceChars: TSetOfChar = [#9, #10, #13, ' '];
  46. NmToken: TSetOfChar = Letter + Digit + ['.', '-', '_', ':'];
  47. type
  48. TXMLReaderDocument = class(TXMLDocument)
  49. public
  50. procedure SetDocType(ADocType: TDOMDocumentType);
  51. end;
  52. TXMLReaderDocumentType = class(TDOMDocumentType)
  53. public
  54. constructor Create(ADocument: TXMLReaderDocument);
  55. property Name: DOMString read FNodeName write FNodeName;
  56. end;
  57. { supported encodings }
  58. TEncoding = (enUnknown, enUTF8, enUTF16BE, enUTF16LE);
  59. TXMLReader = class
  60. private
  61. FCurChar: WideChar;
  62. FLine: Integer;
  63. FColumn: Integer;
  64. FSeenCR: Boolean;
  65. FEncoding: TEncoding;
  66. FValue: array of WideChar;
  67. FValueLength: Integer;
  68. FPrologParsed: Boolean;
  69. procedure RaiseExpectedQmark;
  70. procedure InternalGetChar;
  71. function GetChar: WideChar;
  72. procedure AppendValue(wc: WideChar);
  73. procedure DetectEncoding;
  74. protected
  75. buf: PChar;
  76. Filename: String;
  77. procedure RaiseExc(const descr: String); overload;
  78. procedure RaiseExc(Expected, Found: WideChar); overload;
  79. function SkipWhitespace: Boolean;
  80. procedure ExpectWhitespace;
  81. procedure ExpectString(const s: String);
  82. procedure ExpectChar(wc: WideChar);
  83. function CheckForChar(c: WideChar): Boolean;
  84. procedure SkipString(const ValidChars: TSetOfChar);
  85. function GetString(const ValidChars: TSetOfChar): WideString;
  86. procedure RaiseNameNotFound;
  87. function CheckName: Boolean;
  88. function ExpectName: WideString; // [5]
  89. procedure SkipName;
  90. procedure ExpectAttValue(attr: TDOMAttr); // [10]
  91. procedure SkipPubidLiteral; // [12]
  92. procedure ParseComment(AOwner: TDOMNode); // [15]
  93. procedure ParsePI; // [16]
  94. procedure ExpectProlog; // [22]
  95. procedure ParseProlog;
  96. function ParseEq: Boolean; // [25]
  97. procedure ExpectEq;
  98. procedure ParseMisc(AOwner: TDOMNode); // [27]
  99. function ParseMarkupDecl: Boolean; // [29]
  100. procedure ParseCharData(AOwner: TDOMNode); // [14]
  101. procedure ParseCDSect(AOwner: TDOMNode); // [18]
  102. function ParseElementContent(AOwner: TDOMNode): Boolean;
  103. procedure ParseElement(AOwner: TDOMNode); // [39]
  104. procedure ExpectElement(AOwner: TDOMNode);
  105. procedure ParseReference(AOwner: TDOMNode); // [67]
  106. function ParsePEReference: Boolean; // [69]
  107. function ParseExternalID: Boolean; // [75]
  108. procedure ExpectExternalID;
  109. procedure SkipEncodingDecl; // [80]
  110. procedure ParseEntityDecl;
  111. procedure ParseAttlistDecl;
  112. procedure ParseElementDecl;
  113. procedure ParseNotationDecl;
  114. procedure ResolveEntities(RootNode: TDOMNode);
  115. public
  116. doc: TDOMDocument;
  117. procedure ProcessXML(ABuf: PChar; const AFilename: String); // [1]
  118. procedure ProcessFragment(AOwner: TDOMNode; ABuf: PChar; const AFilename: String);
  119. procedure ProcessDTD(ABuf: PChar; const AFilename: String); // ([29])
  120. end;
  121. { TXMLReaderDocument }
  122. procedure TXMLReaderDocument.SetDocType(ADocType: TDOMDocumentType);
  123. begin
  124. FDocType := ADocType;
  125. end;
  126. constructor TXMLReaderDocumentType.Create(ADocument: TXMLReaderDocument);
  127. begin
  128. inherited Create(ADocument);
  129. end;
  130. // TODO: this and others must use table approach for speed-up
  131. function IsNameStartChar(wc: WideChar): Boolean; // [4]
  132. begin
  133. case wc of
  134. // (note) excludes single $D7, $F7, $37E,
  135. ':', 'A'..'Z', '_', 'a'..'z', #$C0..#$D6, #$D8..#$F6, #$F8..#$2FF,
  136. #$370..#$37D, #$37F..#$1FFF, #$200C, #$200D, #$2070..#$218F,
  137. #$2C00..#$2FEF, #$3001..#$D7FF, #$F900..#$FDCF, #$FDF0..#$FFFD: Result := True;
  138. else
  139. Result := False;
  140. end;
  141. end;
  142. function IsNameChar(wc: WideChar): Boolean; // [4a]
  143. begin
  144. Result := IsNameStartChar(wc) or ((wc = '-') or (wc = '.') or ((wc >= '0') and (wc <= '9')) or
  145. (wc = #$B7) or ((wc >= #$300) and (wc <= #$36F)) or (wc = #$203F) or (wc = #$2040));
  146. end;
  147. function IsWhitespace(wc: WideChar): Boolean;
  148. begin
  149. Result := (wc = ' ') or (wc = #10) or (wc = #13) or (wc = #9);
  150. end;
  151. { TXMLReader }
  152. procedure TXMLReader.DetectEncoding;
  153. var
  154. w: Word;
  155. Enc: TEncoding;
  156. function CheckByte(value: Byte): Boolean;
  157. var
  158. cb: Byte;
  159. begin
  160. cb := ord(buf[0]); Inc(buf);
  161. Result := (cb = value);
  162. end;
  163. function CheckWord(value: Word): Boolean;
  164. var
  165. cw: Word;
  166. begin
  167. cw := PWord(buf)^; Inc(buf, sizeof(Word));
  168. {$IFDEF ENDIAN_BIG} Swap(cw); {$ENDIF} // TODO: Is that correct?
  169. Result := (cw = value);
  170. end;
  171. begin
  172. Enc := enUnknown;
  173. w := PWord(Buf)^; Inc(Buf, sizeof(Word));
  174. {$IFDEF ENDIAN_BIG} Swap(w); {$ENDIF} // TODO: Is that correct?
  175. // case of no BOM
  176. if (w = (ord('?') shl 8 + ord('<'))) { $3F3C } then
  177. Enc := enUTF8 // not known, in fact, just a default
  178. else if (w = ord('<')) and CheckWord(ord('?')) then
  179. Enc := enUTF16LE
  180. else if (w = ord('<') shl 8) and CheckWord(ord('?') shl 8) then
  181. Enc := enUTF16BE;
  182. if Enc <> enUnknown then // any of above tests succeeded, must start from '?'
  183. begin
  184. FEncoding := Enc;
  185. FCurChar := '?';
  186. Inc(FColumn);
  187. Exit;
  188. end;
  189. if w = $FFFE then
  190. FEncoding := enUTF16BE
  191. else if w = $FEFF then
  192. FEncoding := enUTF16LE
  193. else if (w = $BBEF) and CheckByte($BF) then
  194. FEncoding := enUTF8;
  195. GetChar;
  196. end;
  197. procedure TXMLReader.InternalGetChar;
  198. var
  199. ch, ch2, ch3: Byte;
  200. procedure BadChar;
  201. begin
  202. RaiseExc('Invalid character in UTF8 sequence');
  203. end;
  204. begin
  205. if FEncoding in [enUnknown, enUTF8] then
  206. begin
  207. ch := ord(buf[0]);
  208. Inc(Buf);
  209. end
  210. else
  211. begin // Endianness: no swapping here; see below
  212. FCurChar := PWideChar(Buf)^;
  213. Inc(Buf, sizeof(WideChar));
  214. end;
  215. case FEncoding of
  216. enUnknown:
  217. FCurChar := WideChar(Ch);
  218. enUTF8:
  219. if Ch < 128 then { ASCII }
  220. FCurChar := WideChar(Ch)
  221. else if (Ch and $E0) = $C0 then { #$0080 - #$07FF }
  222. begin
  223. ch2 := ord(buf[0]); Inc(Buf);
  224. if (Ch2 and $C0) <> $80 then
  225. BadChar;
  226. FCurChar := WideChar((Ch and $1F) shl 6 + (Ch2 and $3F));
  227. end
  228. else if (Ch and $F0) = $E0 then { #$0800 - #$FFFF }
  229. begin
  230. ch2 := ord(buf[0]); Inc(buf);
  231. if (Ch2 and $C0) <> $80 then
  232. BadChar;
  233. ch3 := ord(buf[0]); Inc(buf);
  234. if (Ch3 and $C0) <> $80 then
  235. BadChar;
  236. FCurChar := WideChar(Word((Ch and $0F) shl 12) +
  237. (Ch2 and $3F) shl 6 + (Ch3 and $3F));
  238. end
  239. else
  240. RaiseExc('Unsupported UTF8 character');
  241. {$IFDEF ENDIAN_BIG}
  242. enUTF16LE:
  243. {$ELSE}
  244. enUTF16BE:
  245. {$ENDIF}
  246. FCurChar :=
  247. WideChar((Ord(FCurChar) and $FF) shl 8 + (Ord(FCurChar) shr 8));
  248. end;
  249. end;
  250. function TXMLReader.GetChar: WideChar;
  251. begin
  252. InternalGetChar;
  253. if FSeenCR then
  254. begin
  255. case FCurChar of
  256. #10, #$85: InternalGetChar; // #$85 is xml 1.1 specific
  257. end;
  258. FSeenCR := False;
  259. end;
  260. if FCurChar = #13 then
  261. begin
  262. FSeenCR := True;
  263. FCurChar := #10;
  264. end
  265. else // xml 1.1 specific check
  266. if (FCurChar = #$85) or (FCurChar = #$2028) then
  267. FCurChar := #10;
  268. if FCurChar = #10 then
  269. begin
  270. Inc(FLine);
  271. FColumn := 0;
  272. end
  273. else
  274. Inc(FColumn);
  275. Result := FCurChar;
  276. end;
  277. procedure TXMLReader.AppendValue(wc: WideChar);
  278. var
  279. Alloc: Integer;
  280. begin
  281. Alloc := Length(FValue);
  282. if FValueLength >= Alloc then
  283. begin
  284. if Alloc = 0 then
  285. Alloc := 512
  286. else
  287. Alloc := Alloc * 2;
  288. SetLength(FValue, Alloc);
  289. end;
  290. FValue[FValueLength] := wc;
  291. Inc(FValueLength);
  292. end;
  293. procedure TXMLReader.RaiseExpectedQmark;
  294. begin
  295. RaiseExc('Expected single or double quotation mark');
  296. end;
  297. procedure TXMLReader.RaiseExc(Expected, Found: WideChar);
  298. begin
  299. RaiseExc('Expected "' + Expected + '", but found "' + Found + '",');
  300. end;
  301. procedure TXMLReader.RaiseExc(const descr: String);
  302. begin
  303. raise EXMLReadError.CreateFmt('In %s (line %d pos %d): %s', [Filename, FLine, FColumn, descr]);
  304. end;
  305. function TXMLReader.SkipWhitespace: Boolean;
  306. begin
  307. Result := False;
  308. while IsWhitespace(FCurChar) do
  309. begin
  310. GetChar;
  311. Result := True;
  312. end;
  313. end;
  314. procedure TXMLReader.ExpectWhitespace;
  315. begin
  316. if not SkipWhitespace then
  317. RaiseExc('Expected whitespace');
  318. end;
  319. procedure TXMLReader.ExpectChar(wc: WideChar);
  320. begin
  321. if not CheckForChar(wc) then
  322. RaiseExc(wc, FCurChar);
  323. end;
  324. procedure TXMLReader.ExpectString(const s: String);
  325. procedure RaiseStringNotFound;
  326. begin
  327. RaiseExc('Expected "' + s + '"');
  328. end;
  329. var
  330. I: Integer;
  331. begin
  332. for I := 1 to Length(s) do
  333. begin
  334. if FCurChar <> WideChar(s[i]) then
  335. RaiseStringNotFound;
  336. GetChar;
  337. end;
  338. end;
  339. function TXMLReader.CheckForChar(c: WideChar): Boolean;
  340. begin
  341. Result := (FCurChar = c);
  342. if Result then
  343. GetChar;
  344. end;
  345. procedure TXMLReader.SkipString(const ValidChars: TSetOfChar);
  346. begin
  347. FValueLength := 0;
  348. while (ord(FCurChar) < 256) and (char(FCurChar) in ValidChars) do
  349. begin
  350. AppendValue(FCurChar);
  351. GetChar;
  352. end;
  353. end;
  354. function TXMLReader.GetString(const ValidChars: TSetOfChar): WideString;
  355. begin
  356. SkipString(ValidChars);
  357. SetString(Result, PWideChar(@FValue[0]), FValueLength);
  358. end;
  359. procedure TXMLReader.ProcessXML(ABuf: PChar; const AFilename: String); // [1]
  360. begin
  361. buf := ABuf;
  362. Filename := AFilename;
  363. FLine := 1;
  364. FColumn := 0;
  365. doc := TXMLReaderDocument.Create;
  366. DetectEncoding;
  367. ExpectProlog;
  368. {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt('TXMLReader.ProcessXML A');{$ENDIF}
  369. ExpectElement(doc);
  370. {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt('TXMLReader.ProcessXML B');{$ENDIF}
  371. ParseMisc(doc);
  372. if FCurChar <> #0 then
  373. RaiseExc('Text after end of document element found');
  374. end;
  375. procedure TXMLReader.ProcessFragment(AOwner: TDOMNode; ABuf: PChar; const AFilename: String);
  376. begin
  377. buf := ABuf;
  378. Filename := AFilename;
  379. FLine := 1;
  380. FColumn := 0;
  381. FEncoding := enUTF8; // TODO: Detect it? Not sure for now...
  382. GetChar;
  383. if not ParseElementContent(AOwner) then
  384. ;
  385. end;
  386. function TXMLReader.CheckName: Boolean; // [5]
  387. begin
  388. Result := IsNameStartChar(FCurChar);
  389. if Result then
  390. begin
  391. FValueLength := 0;
  392. repeat
  393. AppendValue(FCurChar);
  394. GetChar;
  395. until (FCurChar = #0) or not IsNameChar(FCurChar);
  396. end;
  397. end;
  398. procedure TXMLReader.RaiseNameNotFound;
  399. begin
  400. RaiseExc('Expected letter, "_" or ":" for name, found "' + FCurChar + '"');
  401. end;
  402. function TXMLReader.ExpectName: WideString; // [5]
  403. begin
  404. if not CheckName then
  405. RaiseNameNotFound;
  406. SetString(Result, PWideChar(@FValue[0]), FValueLength);
  407. end;
  408. procedure TXMLReader.SkipName;
  409. begin
  410. if not CheckName then
  411. RaiseNameNotFound;
  412. end;
  413. // ---------------------
  414. procedure TXMLReader.ExpectAttValue(attr: TDOMAttr); // [10]
  415. procedure FlushStringBuffer;
  416. var
  417. s: WideString;
  418. begin
  419. if FValueLength > 0 then
  420. begin
  421. SetString(s, PWideChar(@FValue[0]),FValueLength);
  422. FValueLength := 0;
  423. attr.AppendChild(doc.CreateTextNode(s));
  424. //SetLength(s, 0); // cleared implicitly
  425. end;
  426. end;
  427. var
  428. Delim: WideChar;
  429. begin
  430. if (FCurChar <> '''') and (FCurChar <> '"') then
  431. RaiseExpectedQmark;
  432. Delim := FCurChar;
  433. GetChar; // skip quote
  434. FValueLength := 0;
  435. while (FCurChar <> Delim) and (FCurChar <> #0) do
  436. begin
  437. if FCurChar <> '&' then
  438. begin
  439. AppendValue(FCurChar);
  440. GetChar;
  441. end
  442. else
  443. begin
  444. if FValueLength > 0 then FlushStringBuffer;
  445. ParseReference(attr);
  446. FValueLength := 0;
  447. end;
  448. end;
  449. if FValueLength > 0 then FlushStringBuffer;
  450. GetChar; // skip trailing quote
  451. ResolveEntities(Attr);
  452. end;
  453. procedure TXMLReader.SkipPubidLiteral; // [12]
  454. var
  455. Delim: WideChar;
  456. begin
  457. if (FCurChar = '''') or (FCurChar = '"') then
  458. begin
  459. Delim := FCurChar;
  460. GetChar; // skip quote
  461. SkipString(PubidChars - [Char(Delim)]); // <-- PubidChars do not contain `"`
  462. ExpectChar(Delim);
  463. end
  464. else
  465. RaiseExpectedQMark;
  466. end;
  467. // starting '<!' already consumed, FCurChar = '-'
  468. procedure TXMLReader.ParseComment(AOwner: TDOMNode); // [15]
  469. var
  470. comment: WideString;
  471. begin
  472. ExpectString('--');
  473. FValueLength := 0;
  474. repeat
  475. AppendValue(FCurChar);
  476. GetChar;
  477. if (FValueLength >= 3) and (FValue[FValueLength-1] = '>') and
  478. (FValue[FValueLength-2] = '-') and (FValue[FValueLength-3] = '-') then
  479. begin
  480. Dec(FValueLength, 3);
  481. Break;
  482. end;
  483. until FCurChar = #0; // should not happen
  484. SetString(comment, PWideChar(@FValue[0]), FValueLength);
  485. AOwner.AppendChild(doc.CreateComment(comment));
  486. end;
  487. // starting '?' contained in FCurChar
  488. procedure TXMLReader.ParsePI; // [16]
  489. begin
  490. GetChar; // skip '?'
  491. SkipName;
  492. // ugly but uses no temp string. Need StrLIComp(PWideChar, PWideChar).
  493. if (FValueLength = 3) and
  494. ((FValue[0] = 'X') or (FValue[0] = 'x')) and
  495. ((FValue[1] = 'M') or (FValue[1] = 'm')) and
  496. ((FValue[2] = 'L') or (FValue[2] = 'l')) then
  497. begin
  498. if not FPrologParsed then
  499. begin
  500. ParseProlog;
  501. FPrologParsed := True;
  502. Exit;
  503. end
  504. else
  505. RaiseExc('"<?xml" processing instruction not allowed here');
  506. end;
  507. if SkipWhitespace then
  508. begin
  509. FValueLength := 0;
  510. repeat
  511. AppendValue(FCurChar);
  512. GetChar;
  513. if (FValueLength >= 2) and (FValue[FValueLength-1] = '>') and
  514. (FValue[FValueLength-2] = '?') then
  515. begin
  516. Dec(FValueLength, 2);
  517. Break;
  518. end;
  519. until FCurChar = #0; // should not happen
  520. end;
  521. end;
  522. // here we come from ParsePI, 'xml' is already consumed
  523. procedure TXMLReader.ParseProlog;
  524. var
  525. Delim: WideChar;
  526. begin
  527. // '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  528. // VersionInfo: S 'version' Eq (' VersionNum ' | " VersionNum ")
  529. SkipWhitespace;
  530. ExpectString('version');
  531. ExpectEq;
  532. if (FCurChar = '''') or (FCurChar = '"') then
  533. begin
  534. Delim := FCurChar;
  535. GetChar; // skip quote
  536. if doc.InheritsFrom(TXMLDocument) then
  537. TXMLDocument(doc).XMLVersion :=
  538. GetString(NmToken); {['a'..'z', 'A'..'Z', '0'..'9', '_', '.', ':', '-']}
  539. ExpectChar(Delim);
  540. end
  541. else
  542. RaiseExpectedQMark;
  543. // EncodingDecl?
  544. SkipEncodingDecl;
  545. // SDDecl?
  546. SkipWhitespace;
  547. if CheckForChar('s') then
  548. begin
  549. ExpectString('tandalone');
  550. ExpectEq;
  551. if (FCurChar = '''') or (FCurChar = '"') then
  552. begin
  553. Delim := FCurChar;
  554. GetChar; // skip quote
  555. ExpectName; // TODO: must check for 'yes' or 'no'
  556. ExpectChar(Delim);
  557. end
  558. else
  559. RaiseExpectedQMark;
  560. SkipWhitespace;
  561. end;
  562. ExpectString('?>');
  563. end;
  564. procedure TXMLReader.ExpectProlog; // [22]
  565. var
  566. DocType: TXMLReaderDocumentType;
  567. begin
  568. FPrologParsed := False;
  569. // The special case when first chars had been consumed by DetectEncoding()
  570. if FCurChar = '?' then
  571. ParsePI;
  572. // Check for "Misc*"
  573. ParseMisc(doc);
  574. // Check for "(doctypedecl Misc*)?" [28]
  575. if CheckForChar('D') then
  576. begin
  577. ExpectString('OCTYPE');
  578. // create the DTD object
  579. DocType := TXMLReaderDocumentType.Create(doc as TXMLReaderDocument);
  580. if doc.InheritsFrom(TXMLReaderDocument) then
  581. TXMLReaderDocument(doc).SetDocType(DocType);
  582. SkipWhitespace;
  583. DocType.Name := ExpectName;
  584. SkipWhitespace;
  585. ParseExternalID; // may be absent, ignore result
  586. SkipWhitespace;
  587. if CheckForChar('[') then
  588. begin
  589. repeat
  590. SkipWhitespace;
  591. until not (ParseMarkupDecl or ParsePEReference);
  592. ExpectChar(']');
  593. SkipWhitespace;
  594. ExpectChar('>');
  595. end;
  596. ParseMisc(doc);
  597. end;
  598. end;
  599. function TXMLReader.ParseEq: Boolean; // [25]
  600. begin
  601. SkipWhitespace;
  602. Result := CheckForChar('=');
  603. if Result then
  604. SkipWhitespace;
  605. end;
  606. procedure TXMLReader.ExpectEq;
  607. begin
  608. if not ParseEq then
  609. RaiseExc('Expected "="');
  610. end;
  611. // Parse "Misc*":
  612. // Misc ::= Comment | PI | S
  613. procedure TXMLReader.ParseMisc(AOwner: TDOMNode); // [27]
  614. begin
  615. repeat
  616. SkipWhitespace;
  617. if FCurChar <> '<' then
  618. Break;
  619. GetChar;
  620. if FCurChar = '!' then
  621. begin
  622. GetChar;
  623. if FCurChar = '-' then
  624. ParseComment(AOwner)
  625. else
  626. Break;
  627. end
  628. else
  629. if FCurChar = '?' then
  630. ParsePI
  631. else
  632. Break;
  633. until False;
  634. end;
  635. { DTD stuff }
  636. procedure TXMLReader.ParseElementDecl; // [45]
  637. procedure ExpectChoiceOrSeq; // [49], [50]
  638. procedure ExpectCP; // [48]
  639. begin
  640. if CheckForChar('(') then
  641. ExpectChoiceOrSeq
  642. else
  643. SkipName;
  644. if CheckForChar('?') then
  645. else if CheckForChar('*') then
  646. else if CheckForChar('+') then;
  647. end;
  648. var
  649. delimiter: WideChar;
  650. begin
  651. SkipWhitespace;
  652. ExpectCP;
  653. SkipWhitespace;
  654. delimiter := #0;
  655. repeat
  656. if (FCurChar = #0) or CheckForChar(')') then
  657. Break;
  658. if delimiter = #0 then
  659. begin
  660. if (FCurChar = '|') or (FCurChar = ',') then
  661. delimiter := FCurChar
  662. else
  663. RaiseExc('Expected "|" or ","');
  664. end
  665. else
  666. if FCurChar <> delimiter then
  667. RaiseExc(delimiter, FCurChar);
  668. GetChar; // skip delimiter
  669. SkipWhitespace;
  670. ExpectCP;
  671. until False;
  672. end;
  673. begin // starting '<!E' already consumed
  674. ExpectString('LEMENT');
  675. ExpectWhitespace;
  676. SkipName;
  677. ExpectWhitespace;
  678. // Get contentspec [46]
  679. if FCurChar = 'E' then
  680. ExpectString('EMPTY')
  681. else if FCurChar = 'A' then
  682. ExpectString('ANY')
  683. else if CheckForChar('(') then
  684. begin
  685. SkipWhitespace;
  686. if CheckForChar('#') then
  687. begin
  688. // Parse Mixed section [51]
  689. ExpectString('PCDATA');
  690. SkipWhitespace;
  691. if not CheckForChar(')') then
  692. repeat
  693. ExpectChar('|');
  694. SkipWhitespace;
  695. SkipName;
  696. // TODO: verify
  697. until (FCurChar = ')') and (GetChar = '*'); //CheckFor(')*');
  698. end
  699. else
  700. begin
  701. // Parse Children section [47]
  702. ExpectChoiceOrSeq;
  703. if CheckForChar('?') then
  704. else if CheckForChar('*') then
  705. else if CheckForChar('+') then;
  706. end;
  707. end
  708. else
  709. RaiseExc('Invalid content specification');
  710. SkipWhitespace;
  711. ExpectChar('>');
  712. end;
  713. // starting '<!' already consumed
  714. procedure TXMLReader.ParseNotationDecl; // [82]
  715. begin
  716. ExpectString('NOTATION');
  717. ExpectWhitespace;
  718. SkipName;
  719. ExpectWhitespace;
  720. // Unclear rule...
  721. // IE understands 'SYSTEM' followed by literal and 'PUBLIC' followed by 2 literals
  722. // this is what is handled in ParseExternalID().
  723. if ParseExternalID then
  724. (* else if CheckFor('PUBLIC') then
  725. begin // [83]
  726. ExpectWhitespace;
  727. SkipPubidLiteral;
  728. end *) else
  729. RaiseExc('Expected external or public ID');
  730. SkipWhitespace;
  731. ExpectChar('>');
  732. end;
  733. // starting '<!' already consumed
  734. procedure TXMLReader.ParseAttlistDecl; // [52]
  735. var
  736. attr: TDOMAttr;
  737. ValueRequired: Boolean;
  738. begin
  739. ExpectString('ATTLIST');
  740. ExpectWhitespace;
  741. SkipName;
  742. SkipWhitespace;
  743. while not CheckForChar('>') do
  744. begin
  745. SkipName;
  746. ExpectWhitespace;
  747. // Get AttType [54], [55], [56]
  748. // TODO: possibly extract all letters and compare with list...
  749. if FCurChar = 'C' then
  750. ExpectString('CDATA')
  751. else if CheckForChar('I') then // ID, IDREF, IDREFS
  752. begin
  753. ExpectChar('D');
  754. if FCurChar = 'R' then
  755. begin
  756. ExpectString('REF');
  757. CheckForChar('S');
  758. end;
  759. end
  760. else if FCurChar = 'E' then
  761. begin
  762. ExpectString('ENTIT');
  763. if not CheckForChar('Y') then
  764. ExpectString('IES');
  765. end
  766. else if CheckForChar('N') then
  767. begin
  768. if FCurChar = 'M' then
  769. begin
  770. ExpectString('TOKEN');
  771. CheckForChar('S');
  772. end
  773. else if FCurChar = 'O' then // [57], [58]
  774. begin
  775. ExpectString('OTATION');
  776. ExpectWhitespace;
  777. ExpectChar('(');
  778. SkipWhitespace;
  779. SkipName;
  780. SkipWhitespace;
  781. while not CheckForChar(')') do
  782. begin
  783. ExpectChar('|');
  784. SkipWhitespace;
  785. SkipName;
  786. SkipWhitespace;
  787. end;
  788. end;
  789. end
  790. else
  791. if CheckForChar('(') then
  792. begin // [59]
  793. SkipWhitespace;
  794. SkipString(Nmtoken);
  795. SkipWhitespace;
  796. while not CheckForChar(')') do
  797. begin
  798. ExpectChar('|');
  799. SkipWhitespace;
  800. SkipString(Nmtoken);
  801. SkipWhitespace;
  802. end;
  803. end else
  804. RaiseExc('Invalid tokenized type');
  805. ExpectWhitespace;
  806. // Get DefaultDecl [60]
  807. ValueRequired := False;
  808. if CheckForChar('#') then
  809. begin
  810. if FCurChar = 'R' then
  811. ExpectString('REQUIRED')
  812. else if FCurChar = 'I' then
  813. ExpectString('IMPLIED')
  814. else if FCurChar = 'F' then
  815. begin
  816. ExpectString('FIXED');
  817. SkipWhitespace;
  818. ValueRequired := True;
  819. end;
  820. end
  821. else
  822. ValueRequired := True;
  823. if ValueRequired then
  824. begin
  825. attr := doc.CreateAttribute('');
  826. ExpectAttValue(attr);
  827. end;
  828. SkipWhitespace;
  829. end;
  830. end;
  831. // starting '<!' already consumed
  832. procedure TXMLReader.ParseEntityDecl; // [70]
  833. var
  834. NewEntity: TDOMEntity;
  835. function ParseEntityValue: Boolean; // [9]
  836. var
  837. Delim: WideChar;
  838. begin
  839. if (FCurChar = '''') or (FCurChar = '"') then
  840. begin
  841. Delim := FCurChar;
  842. GetChar; // skip quote
  843. while not CheckForChar(Delim) do
  844. if ParsePEReference then
  845. else if FCurChar = '&' then ParseReference(NewEntity)
  846. else begin
  847. GetChar; // Normal character
  848. end;
  849. Result := True;
  850. end
  851. else
  852. Result := False;
  853. end;
  854. begin
  855. ExpectString('NTITY');
  856. ExpectWhitespace;
  857. if CheckForChar('%') then // [72]
  858. begin
  859. ExpectWhitespace;
  860. NewEntity := doc.CreateEntity(ExpectName);
  861. ExpectWhitespace;
  862. // Get PEDef [74]
  863. if ParseEntityValue then
  864. // SYSTEM | PUBLIC
  865. else if ParseExternalID then
  866. else
  867. RaiseExc('Expected entity value or external ID');
  868. end
  869. else // [71]
  870. begin
  871. NewEntity := doc.CreateEntity(ExpectName);
  872. ExpectWhitespace;
  873. // Get EntityDef [73]
  874. if ParseEntityValue then
  875. else
  876. begin
  877. ExpectExternalID;
  878. // Get NDataDecl [76]
  879. SkipWhitespace;
  880. if FCurChar = 'N' then
  881. begin
  882. ExpectString('NDATA');
  883. ExpectWhitespace;
  884. SkipName;
  885. end;
  886. end;
  887. end;
  888. SkipWhitespace;
  889. ExpectChar('>');
  890. end;
  891. function TXMLReader.ParseMarkupDecl: Boolean; // [29]
  892. begin
  893. Result := False;
  894. repeat
  895. SkipWhitespace;
  896. if FCurChar <> '<' then // condition is true for #0
  897. Exit;
  898. GetChar;
  899. if FCurChar = '!' then
  900. begin
  901. GetChar;
  902. if FCurChar = 'E' then // either ELEMENT or ENTITY
  903. begin
  904. GetChar;
  905. if FCurChar = 'L' then
  906. ParseElementDecl
  907. else if FCurChar = 'N' then
  908. ParseEntityDecl;
  909. end
  910. else if FCurChar = 'A' then // ATTLIST
  911. ParseAttlistDecl
  912. else if FCurChar = 'N' then // NOTATION
  913. ParseNotationDecl
  914. else if FCurChar = '-' then
  915. ParseComment(Doc);
  916. end
  917. else if FCurChar = '?' then
  918. ParsePI;
  919. until False;
  920. end;
  921. procedure TXMLReader.ProcessDTD(ABuf: PChar; const AFilename: String);
  922. begin
  923. buf := ABuf;
  924. Filename := AFilename;
  925. FLine := 1;
  926. FColumn := 0;
  927. FEncoding := enUTF8; // TODO: Detect? Don't know for sure now...
  928. GetChar;
  929. doc := TXMLReaderDocument.Create;
  930. ParseMarkupDecl; // TODO: PEReferences?
  931. {
  932. if buf[0] <> #0 then begin
  933. DebugLn('=== Unparsed: ===');
  934. //DebugLn(buf);
  935. DebugLn(StrLen(buf), ' chars');
  936. end;
  937. }
  938. end;
  939. procedure TXMLReader.ParseCharData(AOwner: TDOMNode); // [14]
  940. var
  941. nonWs: Boolean;
  942. name: WideString;
  943. begin
  944. FValueLength := 0;
  945. nonWs := False;
  946. while not ((FCurChar = #0) or (FCurChar = '<') or (FCurChar = '&')) do
  947. begin
  948. if not IsWhitespace(FCurChar) then
  949. nonWs := True;
  950. AppendValue(FCurChar);
  951. GetChar;
  952. end;
  953. if nonWs then
  954. begin
  955. SetString(name, PWideChar(@FValue[0]), FValueLength);
  956. AOwner.AppendChild(doc.CreateTextNode(name));
  957. end;
  958. end;
  959. // starting '<!' already consumed
  960. procedure TXMLReader.ParseCDSect(AOwner: TDOMNode); // [18]
  961. var
  962. name: WideString;
  963. begin
  964. ExpectString('[CDATA[');
  965. FValueLength := 0;
  966. repeat
  967. AppendValue(FCurChar);
  968. GetChar;
  969. if (FValueLength >= 3) and (FValue[FValueLength-1] = '>') and
  970. (FValue[FValueLength-2] = ']') and (FValue[FValueLength-3] = ']') then
  971. begin
  972. Dec(FValueLength, 3);
  973. Break;
  974. end;
  975. until FCurChar = #0;
  976. SetString(name, PWideChar(@FValue[0]), FValueLength);
  977. AOwner.AppendChild(doc.CreateCDATASection(name));
  978. end;
  979. {
  980. returns True at end of stream.
  981. this is ok for fragments but error for document
  982. returns False when '<' is followed by ([^![?] | NameStartChar)
  983. this is ok for document (expect ETag then) but error for fragment
  984. }
  985. function TXMLReader.ParseElementContent(AOwner: TDOMNode): Boolean;
  986. begin
  987. Result := False;
  988. repeat
  989. SkipWhitespace;
  990. if FCurChar = '<' then
  991. begin
  992. GetChar;
  993. if FCurChar = '!' then
  994. begin
  995. GetChar;
  996. if FCurChar = '[' then
  997. ParseCDSect(AOwner)
  998. else if FCurChar = '-' then
  999. ParseComment(AOwner);
  1000. end
  1001. else if FCurChar = '?' then
  1002. ParsePI
  1003. else if CheckName then
  1004. ParseElement(AOwner)
  1005. else
  1006. Exit;
  1007. end
  1008. else if FCurChar = '&' then
  1009. ParseReference(AOwner)
  1010. else
  1011. ParseCharData(AOwner);
  1012. until FCurChar = #0;
  1013. Result := True;
  1014. end;
  1015. // Element name already in FValueBuffer
  1016. procedure TXMLReader.ParseElement(AOwner: TDOMNode); // [39] [40] [44]
  1017. var
  1018. NewElem: TDOMElement;
  1019. IsEmpty: Boolean;
  1020. attr: TDOMAttr;
  1021. name: WideString;
  1022. begin
  1023. {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt(' ParseElement A');{$ENDIF}
  1024. SetString(name, PWideChar(@FValue[0]), FValueLength);
  1025. NewElem := doc.CreateElement(name);
  1026. AOwner.AppendChild(NewElem);
  1027. SkipWhitespace;
  1028. IsEmpty := False;
  1029. while not CheckForChar('>') do
  1030. begin
  1031. {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt(' ParseElement E');{$ENDIF}
  1032. if CheckForChar('/') then
  1033. begin
  1034. ExpectChar('>'); // '>' should follow, otherwise it's fatal error
  1035. IsEmpty := True;
  1036. Break;
  1037. end;
  1038. // Get Attribute [41]
  1039. attr := doc.CreateAttribute(ExpectName);
  1040. NewElem.Attributes.SetNamedItem(attr);
  1041. ExpectEq;
  1042. ExpectAttValue(attr);
  1043. SkipWhitespace;
  1044. end;
  1045. if not IsEmpty then
  1046. begin
  1047. if not ParseElementContent(NewElem) then
  1048. begin
  1049. if CheckForChar('/') then // Get ETag [42]
  1050. begin
  1051. if ExpectName <> NewElem.NodeName then
  1052. RaiseExc('Unmatching element end tag (expected "</' + NewElem.NodeName + '>")');
  1053. SkipWhitespace;
  1054. ExpectChar('>');
  1055. end
  1056. else
  1057. RaiseExc('Invalid name start character');
  1058. end;
  1059. end;
  1060. {$IFDEF MEM_CHECK}CheckHeapWrtMemCnt(' ParseElement END');{$ENDIF}
  1061. ResolveEntities(NewElem);
  1062. end;
  1063. procedure TXMLReader.ExpectElement(AOwner: TDOMNode);
  1064. begin
  1065. if CheckName then
  1066. ParseElement(AOwner)
  1067. else
  1068. RaiseExc('Expected element');
  1069. end;
  1070. function TXMLReader.ParsePEReference: Boolean; // [69]
  1071. begin
  1072. Result := CheckForChar('%');
  1073. if Result then
  1074. begin
  1075. SkipName;
  1076. ExpectChar(';');
  1077. end;
  1078. end;
  1079. // FCurChar = '&' here
  1080. procedure TXMLReader.ParseReference(AOwner: TDOMNode); // [67] [68]
  1081. var
  1082. StrBuf: array[0..31] of char;
  1083. StrLength: Integer;
  1084. s: string;
  1085. Value: Integer;
  1086. PrevNode: TDomNode;
  1087. procedure AppendChar(c: WideChar);
  1088. begin
  1089. if StrLength < High(StrBuf) then
  1090. begin
  1091. StrBuf[StrLength] := char(c);
  1092. Inc(StrLength);
  1093. end;
  1094. GetChar;
  1095. end;
  1096. begin
  1097. GetChar; // skip '&'
  1098. if CheckForChar('#') then
  1099. begin // Test for CharRef [66]
  1100. StrLength := 0;
  1101. if CheckForChar('x') then
  1102. begin
  1103. AppendChar('$');
  1104. while ((ord(FCurChar) < 256) and (char(FCurChar) in ['0'..'9', 'a'..'f', 'A'..'F'])) do
  1105. AppendChar(FCurChar);
  1106. end else
  1107. while ((ord(FCurChar) < 256) and (char(FCurChar) in ['0'..'9'])) do
  1108. AppendChar(FCurChar);
  1109. // TODO: get rid of temp string here
  1110. SetString(s, StrBuf, StrLength);
  1111. // This will handle case of no digits present
  1112. Value := StrToIntDef(s, -1);
  1113. if (Value < 0) or (Value > $FFFF) then
  1114. RaiseExc('Invalid character reference')
  1115. else
  1116. begin
  1117. PrevNode := AOwner.LastChild;
  1118. // TODO: partial solution, check other similar cases
  1119. if Assigned(PrevNode) and (PrevNode.NodeType = TEXT_NODE) then
  1120. TDomCharacterData(PrevNode).AppendData(WideChar(Value))
  1121. else
  1122. AOwner.AppendChild(doc.CreateTextNode(WideChar(Value)));
  1123. end;
  1124. end else
  1125. AOwner.AppendChild(doc.CreateEntityReference(ExpectName));
  1126. ExpectChar(';');
  1127. end;
  1128. function TXMLReader.ParseExternalID: Boolean; // [75]
  1129. procedure SkipSystemLiteral;
  1130. var
  1131. Delim: WideChar;
  1132. begin
  1133. if (FCurChar = '''') or (FCurChar = '"') then
  1134. begin
  1135. Delim := FCurChar;
  1136. GetChar; // skip quote
  1137. while (FCurChar <> Delim) and (FCurChar <> #0) do
  1138. begin
  1139. GetChar;
  1140. end;
  1141. ExpectChar(Delim); // <-- to check the EOF only
  1142. end;
  1143. end;
  1144. begin
  1145. if FCurChar = 'S' then
  1146. begin
  1147. ExpectString('SYSTEM');
  1148. ExpectWhitespace;
  1149. SkipSystemLiteral;
  1150. Result := True;
  1151. end
  1152. else
  1153. if FCurChar = 'P' then
  1154. begin
  1155. ExpectString('PUBLIC');
  1156. ExpectWhitespace;
  1157. SkipPubidLiteral;
  1158. ExpectWhitespace;
  1159. SkipSystemLiteral;
  1160. Result := True;
  1161. end else
  1162. Result := False;
  1163. end;
  1164. procedure TXMLReader.ExpectExternalID;
  1165. begin
  1166. if not ParseExternalID then
  1167. RaiseExc('Expected external ID');
  1168. end;
  1169. procedure TXMLReader.SkipEncodingDecl; // [80]
  1170. var
  1171. Delim: WideChar;
  1172. begin
  1173. SkipWhitespace;
  1174. if CheckForChar('e') then
  1175. begin
  1176. ExpectString('ncoding');
  1177. ExpectEq;
  1178. if (FCurChar = '''') or (FCurChar = '"') then
  1179. begin
  1180. Delim := FCurChar;
  1181. GetChar; // skip quote
  1182. if not ((ord(FCurChar) < 256) and (char(FCurChar) in ['A'..'Z', 'a'..'z'])) then
  1183. RaiseExc('Expected character (A-Z, a-z)');
  1184. SkipString(['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']);
  1185. ExpectChar(Delim);
  1186. end;
  1187. end;
  1188. end;
  1189. { Currently, this method will only resolve the entities which are
  1190. predefined in XML: }
  1191. procedure TXMLReader.ResolveEntities(RootNode: TDOMNode);
  1192. var
  1193. Node, NextNode: TDOMNode;
  1194. procedure ReplaceEntityRef(EntityNode: TDOMNode; const Replacement: WideString);
  1195. var
  1196. PrevSibling, NextSibling: TDOMNode;
  1197. begin
  1198. PrevSibling := EntityNode.PreviousSibling;
  1199. NextSibling := EntityNode.NextSibling;
  1200. if Assigned(PrevSibling) and (PrevSibling.NodeType = TEXT_NODE) then
  1201. begin
  1202. TDOMCharacterData(PrevSibling).AppendData(Replacement);
  1203. RootNode.RemoveChild(EntityNode);
  1204. if Assigned(NextSibling) and (NextSibling.NodeType = TEXT_NODE) then
  1205. begin
  1206. // next sibling is to be removed, so we can't use it anymore
  1207. NextNode := NextSibling.NextSibling;
  1208. TDOMCharacterData(PrevSibling).AppendData(
  1209. TDOMCharacterData(NextSibling).Data);
  1210. RootNode.RemoveChild(NextSibling);
  1211. end
  1212. end else
  1213. if Assigned(NextSibling) and (NextSibling.NodeType = TEXT_NODE) then
  1214. begin
  1215. TDOMCharacterData(NextSibling).InsertData(0, Replacement);
  1216. RootNode.RemoveChild(EntityNode);
  1217. end else
  1218. RootNode.ReplaceChild(Doc.CreateTextNode(Replacement), EntityNode);
  1219. end;
  1220. begin
  1221. Node := RootNode.FirstChild;
  1222. while Assigned(Node) do
  1223. begin
  1224. NextNode := Node.NextSibling;
  1225. if Node.NodeType = ENTITY_REFERENCE_NODE then
  1226. if Node.NodeName = 'amp' then
  1227. ReplaceEntityRef(Node, '&')
  1228. else if Node.NodeName = 'apos' then
  1229. ReplaceEntityRef(Node, '''')
  1230. else if Node.NodeName = 'gt' then
  1231. ReplaceEntityRef(Node, '>')
  1232. else if Node.NodeName = 'lt' then
  1233. ReplaceEntityRef(Node, '<')
  1234. else if Node.NodeName = 'quot' then
  1235. ReplaceEntityRef(Node, '"');
  1236. Node := NextNode;
  1237. end;
  1238. end;
  1239. procedure ReadXMLFile(out ADoc: TXMLDocument; var f: File);
  1240. var
  1241. reader: TXMLReader;
  1242. buf: PChar;
  1243. BufSize: LongInt;
  1244. begin
  1245. ADoc := nil;
  1246. BufSize := FileSize(f) + 2; // need double termination for the case of Unicode
  1247. if BufSize <= 2 then
  1248. exit;
  1249. GetMem(buf, BufSize);
  1250. try
  1251. BlockRead(f, buf^, BufSize - 2);
  1252. buf[BufSize - 1] := #0;
  1253. buf[BufSize] := #0;
  1254. Reader := TXMLReader.Create;
  1255. try
  1256. Reader.ProcessXML(buf, TFileRec(f).name);
  1257. ADoc := TXMLDocument(Reader.doc);
  1258. finally
  1259. Reader.Free;
  1260. end;
  1261. finally
  1262. FreeMem(buf);
  1263. end;
  1264. end;
  1265. procedure ReadXMLFile(out ADoc: TXMLDocument; var f: TStream; const AFilename: String);
  1266. var
  1267. reader: TXMLReader;
  1268. buf: PChar;
  1269. StreamSize: Int64;
  1270. begin
  1271. ADoc := nil;
  1272. StreamSize := f.Size; // access to Size causes at least two seeks...
  1273. if StreamSize = 0 then exit;
  1274. GetMem(buf, StreamSize + 2);
  1275. try
  1276. f.Read(buf^, StreamSize);
  1277. buf[StreamSize] := #0;
  1278. buf[StreamSize+1] := #0;
  1279. Reader := TXMLReader.Create;
  1280. try
  1281. Reader.ProcessXML(buf, AFilename);
  1282. finally
  1283. ADoc := TXMLDocument(Reader.doc);
  1284. Reader.Free;
  1285. end;
  1286. finally
  1287. FreeMem(buf);
  1288. end;
  1289. end;
  1290. procedure ReadXMLFile(out ADoc: TXMLDocument; var f: TStream);
  1291. begin
  1292. ReadXMLFile(ADoc, f, '<Stream>');
  1293. end;
  1294. procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String);
  1295. var
  1296. FileStream: TStream;
  1297. begin
  1298. ADoc := nil;
  1299. FileStream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
  1300. if FileStream = nil then exit; //? it throws exception if cannot be created...
  1301. try
  1302. ReadXMLFile(ADoc, FileStream, AFilename);
  1303. finally
  1304. FileStream.Free;
  1305. end;
  1306. end;
  1307. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: File);
  1308. var
  1309. Reader: TXMLReader;
  1310. buf: PChar;
  1311. BufSize: LongInt;
  1312. begin
  1313. BufSize := FileSize(f) + 2;
  1314. if BufSize <= 2 then
  1315. exit;
  1316. GetMem(buf, BufSize);
  1317. try
  1318. BlockRead(f, buf^, BufSize - 2);
  1319. buf[BufSize - 1] := #0;
  1320. buf[BufSize] := #0;
  1321. Reader := TXMLReader.Create;
  1322. try
  1323. Reader.Doc := AParentNode.OwnerDocument;
  1324. Reader.ProcessFragment(AParentNode, buf, TFileRec(f).name);
  1325. finally
  1326. Reader.Free;
  1327. end;
  1328. finally
  1329. FreeMem(buf);
  1330. end;
  1331. end;
  1332. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream; const AFilename: String);
  1333. var
  1334. Reader: TXMLReader;
  1335. buf: PChar;
  1336. StreamSize: Int64;
  1337. begin
  1338. StreamSize := f.Size;
  1339. if StreamSize = 0 then
  1340. exit;
  1341. GetMem(buf, StreamSize + 2);
  1342. try
  1343. f.Read(buf^, StreamSize);
  1344. buf[StreamSize] := #0;
  1345. buf[StreamSize+1] := #0;
  1346. Reader := TXMLReader.Create;
  1347. Reader.Doc := AParentNode.OwnerDocument;
  1348. try
  1349. Reader.ProcessFragment(AParentNode, buf, AFilename);
  1350. finally
  1351. Reader.Free;
  1352. end;
  1353. finally
  1354. FreeMem(buf);
  1355. end;
  1356. end;
  1357. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream);
  1358. begin
  1359. ReadXMLFragment(AParentNode, f, '<Stream>');
  1360. end;
  1361. procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String);
  1362. var
  1363. Stream: TStream;
  1364. begin
  1365. Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
  1366. try
  1367. ReadXMLFragment(AParentNode, Stream, AFilename);
  1368. finally
  1369. Stream.Free;
  1370. end;
  1371. end;
  1372. procedure ReadDTDFile(out ADoc: TXMLDocument; var f: File);
  1373. var
  1374. Reader: TXMLReader;
  1375. buf: PChar;
  1376. BufSize: LongInt;
  1377. begin
  1378. ADoc := nil;
  1379. BufSize := FileSize(f) + 1;
  1380. if BufSize <= 1 then
  1381. exit;
  1382. GetMem(buf, BufSize);
  1383. try
  1384. BlockRead(f, buf^, BufSize - 1);
  1385. buf[BufSize - 1] := #0;
  1386. Reader := TXMLReader.Create;
  1387. try
  1388. Reader.ProcessDTD(buf, TFileRec(f).name);
  1389. ADoc := TXMLDocument(Reader.doc);
  1390. finally
  1391. Reader.Free;
  1392. end;
  1393. finally
  1394. FreeMem(buf);
  1395. end;
  1396. end;
  1397. procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream; const AFilename: String);
  1398. var
  1399. Reader: TXMLReader;
  1400. buf: PChar;
  1401. begin
  1402. ADoc := nil;
  1403. if f.Size = 0 then
  1404. exit;
  1405. GetMem(buf, f.Size + 1);
  1406. try
  1407. f.Read(buf^, f.Size);
  1408. buf[f.Size] := #0;
  1409. Reader := TXMLReader.Create;
  1410. try
  1411. Reader.ProcessDTD(buf, AFilename);
  1412. ADoc := TXMLDocument(Reader.doc);
  1413. finally
  1414. Reader.Free;
  1415. end;
  1416. finally
  1417. FreeMem(buf);
  1418. end;
  1419. end;
  1420. procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream);
  1421. begin
  1422. ReadDTDFile(ADoc, f, '<Stream>');
  1423. end;
  1424. procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String);
  1425. var
  1426. Stream: TStream;
  1427. begin
  1428. ADoc := nil;
  1429. Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
  1430. try
  1431. ReadDTDFile(ADoc, Stream, AFilename);
  1432. finally
  1433. Stream.Free;
  1434. end;
  1435. end;
  1436. end.