xmlread.pp 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133
  1. {
  2. $Id$
  3. This file is part of the Free Component Library
  4. XML reading routines.
  5. Copyright (c) 1999-2000 by Sebastian Guenther, [email protected]
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$MODE objfpc}
  13. {$H+}
  14. unit XMLRead;
  15. interface
  16. uses SysUtils, Classes, DOM;
  17. type
  18. EXMLReadError = class(Exception);
  19. procedure ReadXMLFile(var ADoc: TXMLDocument; const AFilename: String);
  20. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: File);
  21. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream);
  22. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream;
  23. const AFilename: String);
  24. procedure ReadDTDFile(var ADoc: TXMLDocument; const AFilename: String);
  25. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: File);
  26. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream);
  27. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream;
  28. const AFilename: String);
  29. // =======================================================
  30. implementation
  31. const
  32. Letter = ['A'..'Z', 'a'..'z'];
  33. Digit = ['0'..'9'];
  34. PubidChars: set of Char = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
  35. '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
  36. '#', '@', '$', '_', '%'];
  37. WhitespaceChars: set of Char = [#9, #10, #13, ' '];
  38. NmToken: set of Char = Letter + Digit + ['.', '-', '_', ':'];
  39. type
  40. TXMLReaderDocument = class(TXMLDocument)
  41. public
  42. procedure SetDocType(ADocType: TDOMDocumentType);
  43. end;
  44. TXMLReaderDocumentType = class(TDOMDocumentType)
  45. public
  46. constructor Create(ADocument: TXMLReaderDocument);
  47. property Name: DOMString read FNodeName write FNodeName;
  48. end;
  49. TSetOfChar = set of Char;
  50. TXMLReader = class
  51. protected
  52. buf, BufStart: PChar;
  53. Filename: String;
  54. procedure RaiseExc(descr: String);
  55. function SkipWhitespace: Boolean;
  56. procedure ExpectWhitespace;
  57. procedure ExpectString(s: String);
  58. function CheckFor(s: PChar): Boolean;
  59. function GetString(ValidChars: TSetOfChar): String;
  60. function GetName(var s: String): Boolean;
  61. function ExpectName: String; // [5]
  62. procedure ExpectAttValue(attr: TDOMAttr); // [10]
  63. function ExpectPubidLiteral: String; // [12]
  64. function ParseComment(AOwner: TDOMNode): Boolean; // [15]
  65. function ParsePI: Boolean; // [16]
  66. procedure ExpectProlog; // [22]
  67. function ParseEq: Boolean; // [25]
  68. procedure ExpectEq;
  69. procedure ParseMisc(AOwner: TDOMNode); // [27]
  70. function ParseMarkupDecl: Boolean; // [29]
  71. function ParseElement(AOwner: TDOMNode): Boolean; // [39]
  72. procedure ExpectElement(AOwner: TDOMNode);
  73. function ParseReference(AOwner: TDOMNode): Boolean; // [67]
  74. procedure ExpectReference(AOwner: TDOMNode);
  75. function ParsePEReference: Boolean; // [69]
  76. function ParseExternalID: Boolean; // [75]
  77. procedure ExpectExternalID;
  78. function ParseEncodingDecl: String; // [80]
  79. procedure ResolveEntities(RootNode: TDOMNode);
  80. public
  81. doc: TXMLReaderDocument;
  82. procedure ProcessXML(ABuf: PChar; AFilename: String); // [1]
  83. procedure ProcessDTD(ABuf: PChar; AFilename: String); // ([29])
  84. end;
  85. procedure TXMLReaderDocument.SetDocType(ADocType: TDOMDocumentType);
  86. begin
  87. FDocType := ADocType;
  88. end;
  89. constructor TXMLReaderDocumentType.Create(ADocument: TXMLReaderDocument);
  90. begin
  91. inherited Create(ADocument);
  92. end;
  93. procedure TXMLReader.RaiseExc(descr: String);
  94. var
  95. apos: PChar;
  96. x, y: Integer;
  97. begin
  98. // find out the line in which the error occured
  99. apos := BufStart;
  100. x := 1;
  101. y := 1;
  102. while apos < buf do begin
  103. if apos[0] = #10 then begin
  104. Inc(y);
  105. x := 1;
  106. end else
  107. Inc(x);
  108. Inc(apos);
  109. end;
  110. raise EXMLReadError.Create('In ' + Filename + ' (line ' + IntToStr(y) + ' pos ' +
  111. IntToStr(x) + '): ' + descr);
  112. end;
  113. function TXMLReader.SkipWhitespace: Boolean;
  114. begin
  115. Result := False;
  116. while buf[0] in WhitespaceChars do
  117. begin
  118. Inc(buf);
  119. Result := True;
  120. end;
  121. end;
  122. procedure TXMLReader.ExpectWhitespace;
  123. begin
  124. if not SkipWhitespace then
  125. RaiseExc('Expected whitespace');
  126. end;
  127. procedure TXMLReader.ExpectString(s: String);
  128. var
  129. i: Integer;
  130. s2: PChar;
  131. s3: String;
  132. begin
  133. for i := 1 to Length(s) do
  134. if buf[i - 1] <> s[i] then begin
  135. GetMem(s2, Length(s) + 1);
  136. StrLCopy(s2, buf, Length(s));
  137. s3 := StrPas(s2);
  138. FreeMem(s2, Length(s) + 1);
  139. RaiseExc('Expected "' + s + '", found "' + s3 + '"');
  140. end;
  141. Inc(buf, Length(s));
  142. end;
  143. function TXMLReader.CheckFor(s: PChar): Boolean;
  144. begin
  145. if buf[0] = #0 then begin
  146. Result := False;
  147. exit;
  148. end;
  149. if StrLComp(buf, s, StrLen(s)) = 0 then begin
  150. Inc(buf, StrLen(s));
  151. Result := True;
  152. end else
  153. Result := False;
  154. end;
  155. function TXMLReader.GetString(ValidChars: TSetOfChar): String;
  156. begin
  157. SetLength(Result, 0);
  158. while buf[0] in ValidChars do begin
  159. Result := Result + buf[0];
  160. Inc(buf);
  161. end;
  162. end;
  163. procedure TXMLReader.ProcessXML(ABuf: PChar; AFilename: String); // [1]
  164. var
  165. LastNodeBeforeDoc: TDOMNode;
  166. begin
  167. buf := ABuf;
  168. BufStart := ABuf;
  169. Filename := AFilename;
  170. doc := TXMLReaderDocument.Create;
  171. ExpectProlog;
  172. LastNodeBeforeDoc := doc.LastChild;
  173. ExpectElement(doc);
  174. ParseMisc(doc);
  175. if buf[0] <> #0 then
  176. RaiseExc('Text after end of document element found');
  177. {
  178. if buf[0] <> #0 then begin
  179. WriteLn('=== Unparsed: ===');
  180. //WriteLn(buf);
  181. WriteLn(StrLen(buf), ' chars');
  182. end;
  183. }
  184. end;
  185. function TXMLReader.GetName(var s: String): Boolean; // [5]
  186. begin
  187. SetLength(s, 0);
  188. if not (buf[0] in (Letter + ['_', ':'])) then begin
  189. Result := False;
  190. exit;
  191. end;
  192. s := buf[0];
  193. Inc(buf);
  194. s := s + GetString(Letter + ['0'..'9', '.', '-', '_', ':']);
  195. Result := True;
  196. end;
  197. function TXMLReader.ExpectName: String; // [5]
  198. begin
  199. if not (buf[0] in (Letter + ['_', ':'])) then
  200. RaiseExc('Expected letter, "_" or ":" for name, found "' + buf[0] + '"');
  201. Result := buf[0];
  202. Inc(buf);
  203. Result := Result + GetString(Letter + ['0'..'9', '.', '-', '_', ':']);
  204. end;
  205. procedure TXMLReader.ExpectAttValue(attr: TDOMAttr); // [10]
  206. var
  207. s: String;
  208. procedure FlushStringBuffer;
  209. begin
  210. if Length(s) > 0 then
  211. begin
  212. attr.AppendChild(doc.CreateTextNode(s));
  213. SetLength(s, 0);
  214. end;
  215. end;
  216. var
  217. StrDel: array[0..1] of Char; // String delimiter
  218. begin
  219. if (buf[0] <> '''') and (buf[0] <> '"') then
  220. RaiseExc('Expected quotation marks');
  221. StrDel[0] := buf[0];
  222. StrDel[1] := #0;
  223. Inc(buf);
  224. SetLength(s, 0);
  225. while not CheckFor(StrDel) do
  226. if buf[0] = '&' then
  227. begin
  228. FlushStringBuffer;
  229. ParseReference(attr);
  230. end else
  231. begin
  232. s := s + buf[0];
  233. Inc(buf);
  234. end;
  235. FlushStringBuffer;
  236. ResolveEntities(Attr);
  237. end;
  238. function TXMLReader.ExpectPubidLiteral: String;
  239. begin
  240. SetLength(Result, 0);
  241. if CheckFor('''') then begin
  242. GetString(PubidChars - ['''']);
  243. ExpectString('''');
  244. end else if CheckFor('"') then begin
  245. GetString(PubidChars - ['"']);
  246. ExpectString('"');
  247. end else
  248. RaiseExc('Expected quotation marks');
  249. end;
  250. function TXMLReader.ParseComment(AOwner: TDOMNode): Boolean; // [15]
  251. var
  252. comment: String;
  253. begin
  254. if CheckFor('<!--') then begin
  255. SetLength(comment, 0);
  256. while (buf[0] <> #0) and (buf[1] <> #0) and
  257. ((buf[0] <> '-') or (buf[1] <> '-')) do begin
  258. comment := comment + buf[0];
  259. Inc(buf);
  260. end;
  261. AOwner.AppendChild(doc.CreateComment(comment));
  262. ExpectString('-->');
  263. Result := True;
  264. end else
  265. Result := False;
  266. end;
  267. function TXMLReader.ParsePI: Boolean; // [16]
  268. var
  269. checkbuf: array[0..3] of char;
  270. begin
  271. if CheckFor('<?') then begin
  272. StrLCopy(checkbuf, buf, 3);
  273. if UpCase(StrPas(checkbuf)) = 'XML' then
  274. RaiseExc('"<?xml" processing instruction not allowed here');
  275. ExpectName;
  276. if SkipWhitespace then
  277. while (buf[0] <> #0) and (buf[1] <> #0) and not
  278. ((buf[0] = '?') and (buf[1] = '>')) do Inc(buf);
  279. ExpectString('?>');
  280. Result := True;
  281. end else
  282. Result := False;
  283. end;
  284. procedure TXMLReader.ExpectProlog; // [22]
  285. procedure ParseVersionNum;
  286. begin
  287. doc.XMLVersion :=
  288. GetString(['a'..'z', 'A'..'Z', '0'..'9', '_', '.', ':', '-']);
  289. end;
  290. procedure ParseDoctypeDecls;
  291. begin
  292. repeat
  293. SkipWhitespace;
  294. until not (ParseMarkupDecl or ParsePEReference);
  295. ExpectString(']');
  296. end;
  297. var
  298. DocType: TXMLReaderDocumentType;
  299. begin
  300. if CheckFor('<?xml') then
  301. begin
  302. // '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  303. // VersionInfo: S 'version' Eq (' VersionNum ' | " VersionNum ")
  304. SkipWhitespace;
  305. ExpectString('version');
  306. ParseEq;
  307. if buf[0] = '''' then
  308. begin
  309. Inc(buf);
  310. ParseVersionNum;
  311. ExpectString('''');
  312. end else if buf[0] = '"' then
  313. begin
  314. Inc(buf);
  315. ParseVersionNum;
  316. ExpectString('"');
  317. end else
  318. RaiseExc('Expected single or double quotation mark');
  319. // EncodingDecl?
  320. ParseEncodingDecl;
  321. // SDDecl?
  322. SkipWhitespace;
  323. if CheckFor('standalone') then
  324. begin
  325. ExpectEq;
  326. if buf[0] = '''' then
  327. begin
  328. Inc(buf);
  329. if not (CheckFor('yes''') or CheckFor('no''')) then
  330. RaiseExc('Expected ''yes'' or ''no''');
  331. end else if buf[0] = '''' then
  332. begin
  333. Inc(buf);
  334. if not (CheckFor('yes"') or CheckFor('no"')) then
  335. RaiseExc('Expected "yes" or "no"');
  336. end;
  337. SkipWhitespace;
  338. end;
  339. ExpectString('?>');
  340. end;
  341. // Check for "Misc*"
  342. ParseMisc(doc);
  343. // Check for "(doctypedecl Misc*)?" [28]
  344. if CheckFor('<!DOCTYPE') then
  345. begin
  346. DocType := TXMLReaderDocumentType.Create(doc);
  347. doc.SetDocType(DocType);
  348. SkipWhitespace;
  349. DocType.Name := ExpectName;
  350. SkipWhitespace;
  351. if CheckFor('[') then
  352. begin
  353. ParseDoctypeDecls;
  354. SkipWhitespace;
  355. ExpectString('>');
  356. end else if not CheckFor('>') then
  357. begin
  358. ParseExternalID;
  359. SkipWhitespace;
  360. if CheckFor('[') then
  361. begin
  362. ParseDoctypeDecls;
  363. SkipWhitespace;
  364. end;
  365. ExpectString('>');
  366. end;
  367. ParseMisc(doc);
  368. end;
  369. end;
  370. function TXMLReader.ParseEq: Boolean; // [25]
  371. var
  372. savedbuf: PChar;
  373. begin
  374. savedbuf := buf;
  375. SkipWhitespace;
  376. if buf[0] = '=' then begin
  377. Inc(buf);
  378. SkipWhitespace;
  379. Result := True;
  380. end else begin
  381. buf := savedbuf;
  382. Result := False;
  383. end;
  384. end;
  385. procedure TXMLReader.ExpectEq;
  386. begin
  387. if not ParseEq then
  388. RaiseExc('Expected "="');
  389. end;
  390. // Parse "Misc*":
  391. // Misc ::= Comment | PI | S
  392. procedure TXMLReader.ParseMisc(AOwner: TDOMNode); // [27]
  393. begin
  394. repeat
  395. SkipWhitespace;
  396. until not (ParseComment(AOwner) or ParsePI);
  397. end;
  398. function TXMLReader.ParseMarkupDecl: Boolean; // [29]
  399. function ParseElementDecl: Boolean; // [45]
  400. procedure ExpectChoiceOrSeq; // [49], [50]
  401. procedure ExpectCP; // [48]
  402. begin
  403. if CheckFor('(') then
  404. ExpectChoiceOrSeq
  405. else
  406. ExpectName;
  407. if CheckFor('?') then
  408. else if CheckFor('*') then
  409. else if CheckFor('+') then;
  410. end;
  411. var
  412. delimiter: Char;
  413. begin
  414. SkipWhitespace;
  415. ExpectCP;
  416. SkipWhitespace;
  417. delimiter := #0;
  418. while not CheckFor(')') do begin
  419. if delimiter = #0 then begin
  420. if (buf[0] = '|') or (buf[0] = ',') then
  421. delimiter := buf[0]
  422. else
  423. RaiseExc('Expected "|" or ","');
  424. Inc(buf);
  425. end else
  426. ExpectString(delimiter);
  427. SkipWhitespace;
  428. ExpectCP;
  429. end;
  430. end;
  431. begin
  432. if CheckFor('<!ELEMENT') then begin
  433. ExpectWhitespace;
  434. ExpectName;
  435. ExpectWhitespace;
  436. // Get contentspec [46]
  437. if CheckFor('EMPTY') then
  438. else if CheckFor('ANY') then
  439. else if CheckFor('(') then begin
  440. SkipWhitespace;
  441. if CheckFor('#PCDATA') then begin
  442. // Parse Mixed section [51]
  443. SkipWhitespace;
  444. if not CheckFor(')') then
  445. repeat
  446. ExpectString('|');
  447. SkipWhitespace;
  448. ExpectName;
  449. until CheckFor(')*');
  450. end else begin
  451. // Parse Children section [47]
  452. ExpectChoiceOrSeq;
  453. if CheckFor('?') then
  454. else if CheckFor('*') then
  455. else if CheckFor('+') then;
  456. end;
  457. end else
  458. RaiseExc('Invalid content specification');
  459. SkipWhitespace;
  460. ExpectString('>');
  461. Result := True;
  462. end else
  463. Result := False;
  464. end;
  465. function ParseAttlistDecl: Boolean; // [52]
  466. var
  467. attr: TDOMAttr;
  468. begin
  469. if CheckFor('<!ATTLIST') then begin
  470. ExpectWhitespace;
  471. ExpectName;
  472. SkipWhitespace;
  473. while not CheckFor('>') do begin
  474. ExpectName;
  475. ExpectWhitespace;
  476. // Get AttType [54], [55], [56]
  477. if CheckFor('CDATA') then
  478. else if CheckFor('ID') then
  479. else if CheckFor('IDREF') then
  480. else if CheckFor('IDREFS') then
  481. else if CheckFor('ENTITTY') then
  482. else if CheckFor('ENTITIES') then
  483. else if CheckFor('NMTOKEN') then
  484. else if CheckFor('NMTOKENS') then
  485. else if CheckFor('NOTATION') then begin // [57], [58]
  486. ExpectWhitespace;
  487. ExpectString('(');
  488. SkipWhitespace;
  489. ExpectName;
  490. SkipWhitespace;
  491. while not CheckFor(')') do begin
  492. ExpectString('|');
  493. SkipWhitespace;
  494. ExpectName;
  495. SkipWhitespace;
  496. end;
  497. end else if CheckFor('(') then begin // [59]
  498. SkipWhitespace;
  499. GetString(Nmtoken);
  500. SkipWhitespace;
  501. while not CheckFor(')') do begin
  502. ExpectString('|');
  503. SkipWhitespace;
  504. GetString(Nmtoken);
  505. SkipWhitespace;
  506. end;
  507. end else
  508. RaiseExc('Invalid tokenized type');
  509. ExpectWhitespace;
  510. // Get DefaultDecl [60]
  511. if CheckFor('#REQUIRED') then
  512. else if CheckFor('#IMPLIED') then
  513. else begin
  514. if CheckFor('#FIXED') then
  515. SkipWhitespace;
  516. attr := doc.CreateAttribute('');
  517. ExpectAttValue(attr);
  518. end;
  519. SkipWhitespace;
  520. end;
  521. Result := True;
  522. end else
  523. Result := False;
  524. end;
  525. function ParseEntityDecl: Boolean; // [70]
  526. var
  527. NewEntity: TDOMEntity;
  528. function ParseEntityValue: Boolean; // [9]
  529. var
  530. strdel: array[0..1] of Char;
  531. begin
  532. if (buf[0] <> '''') and (buf[0] <> '"') then begin
  533. Result := False;
  534. exit;
  535. end;
  536. strdel[0] := buf[0];
  537. strdel[1] := #0;
  538. Inc(buf);
  539. while not CheckFor(strdel) do
  540. if ParsePEReference then
  541. else if ParseReference(NewEntity) then
  542. else begin
  543. Inc(buf); // Normal haracter
  544. end;
  545. Result := True;
  546. end;
  547. begin
  548. if CheckFor('<!ENTITY') then begin
  549. ExpectWhitespace;
  550. if CheckFor('%') then begin // [72]
  551. ExpectWhitespace;
  552. NewEntity := doc.CreateEntity(ExpectName);
  553. ExpectWhitespace;
  554. // Get PEDef [74]
  555. if ParseEntityValue then
  556. else if ParseExternalID then
  557. else
  558. RaiseExc('Expected entity value or external ID');
  559. end else begin // [71]
  560. NewEntity := doc.CreateEntity(ExpectName);
  561. ExpectWhitespace;
  562. // Get EntityDef [73]
  563. if ParseEntityValue then
  564. else begin
  565. ExpectExternalID;
  566. // Get NDataDecl [76]
  567. ExpectWhitespace;
  568. ExpectString('NDATA');
  569. ExpectWhitespace;
  570. ExpectName;
  571. end;
  572. end;
  573. SkipWhitespace;
  574. ExpectString('>');
  575. Result := True;
  576. end else
  577. Result := False;
  578. end;
  579. function ParseNotationDecl: Boolean; // [82]
  580. begin
  581. if CheckFor('<!NOTATION') then begin
  582. ExpectWhitespace;
  583. ExpectName;
  584. ExpectWhitespace;
  585. if ParseExternalID then
  586. else if CheckFor('PUBLIC') then begin // [83]
  587. ExpectWhitespace;
  588. ExpectPubidLiteral;
  589. end else
  590. RaiseExc('Expected external or public ID');
  591. SkipWhitespace;
  592. ExpectString('>');
  593. Result := True;
  594. end else
  595. Result := False;
  596. end;
  597. begin
  598. Result := False;
  599. while ParseElementDecl or ParseAttlistDecl or ParseEntityDecl or
  600. ParseNotationDecl or ParsePI or ParseComment(doc) or SkipWhitespace do
  601. Result := True;
  602. end;
  603. procedure TXMLReader.ProcessDTD(ABuf: PChar; AFilename: String);
  604. begin
  605. buf := ABuf;
  606. BufStart := ABuf;
  607. Filename := AFilename;
  608. doc := TXMLReaderDocument.Create;
  609. ParseMarkupDecl;
  610. {
  611. if buf[0] <> #0 then begin
  612. WriteLn('=== Unparsed: ===');
  613. //WriteLn(buf);
  614. WriteLn(StrLen(buf), ' chars');
  615. end;
  616. }
  617. end;
  618. function TXMLReader.ParseElement(AOwner: TDOMNode): Boolean; // [39] [40] [44]
  619. var
  620. NewElem: TDOMElement;
  621. function ParseCharData: Boolean; // [14]
  622. var
  623. s: String;
  624. i: Integer;
  625. begin
  626. SetLength(s, 0);
  627. while not (buf[0] in [#0, '<', '&']) do
  628. begin
  629. s := s + buf[0];
  630. Inc(buf);
  631. end;
  632. if Length(s) > 0 then
  633. begin
  634. // Check if s has non-whitespace content
  635. i := Length(s);
  636. while (i > 0) and (s[i] in WhitespaceChars) do
  637. Dec(i);
  638. if i > 0 then
  639. NewElem.AppendChild(doc.CreateTextNode(s));
  640. Result := True;
  641. end else
  642. Result := False;
  643. end;
  644. function ParseCDSect: Boolean; // [18]
  645. var
  646. cdata: String;
  647. begin
  648. if CheckFor('<![CDATA[') then
  649. begin
  650. SetLength(cdata, 0);
  651. while not CheckFor(']]>') do
  652. begin
  653. cdata := cdata + buf[0];
  654. Inc(buf);
  655. end;
  656. NewElem.AppendChild(doc.CreateCDATASection(cdata));
  657. Result := True;
  658. end else
  659. Result := False;
  660. end;
  661. var
  662. IsEmpty: Boolean;
  663. name: String;
  664. oldpos: PChar;
  665. attr: TDOMAttr;
  666. begin
  667. oldpos := buf;
  668. if CheckFor('<') then
  669. begin
  670. if not GetName(name) then
  671. begin
  672. buf := oldpos;
  673. Result := False;
  674. exit;
  675. end;
  676. NewElem := doc.CreateElement(name);
  677. AOwner.AppendChild(NewElem);
  678. SkipWhitespace;
  679. IsEmpty := False;
  680. while True do
  681. begin
  682. if CheckFor('/>') then
  683. begin
  684. IsEmpty := True;
  685. break;
  686. end;
  687. if CheckFor('>') then
  688. break;
  689. // Get Attribute [41]
  690. attr := doc.CreateAttribute(ExpectName);
  691. NewElem.Attributes.SetNamedItem(attr);
  692. ExpectEq;
  693. ExpectAttValue(attr);
  694. SkipWhitespace;
  695. end;
  696. if not IsEmpty then
  697. begin
  698. // Get content
  699. SkipWhitespace;
  700. while ParseCharData or ParseCDSect or ParsePI or
  701. ParseComment(NewElem) or ParseElement(NewElem) or
  702. ParseReference(NewElem) do;
  703. // Get ETag [42]
  704. ExpectString('</');
  705. if ExpectName <> name then
  706. RaiseExc('Unmatching element end tag (expected "</' + name + '>")');
  707. SkipWhitespace;
  708. ExpectString('>');
  709. end;
  710. ResolveEntities(NewElem);
  711. Result := True;
  712. end else
  713. Result := False;
  714. end;
  715. procedure TXMLReader.ExpectElement(AOwner: TDOMNode);
  716. begin
  717. if not ParseElement(AOwner) then
  718. RaiseExc('Expected element');
  719. end;
  720. function TXMLReader.ParsePEReference: Boolean; // [69]
  721. begin
  722. if CheckFor('%') then begin
  723. ExpectName;
  724. ExpectString(';');
  725. Result := True;
  726. end else
  727. Result := False;
  728. end;
  729. function TXMLReader.ParseReference(AOwner: TDOMNode): Boolean; // [67] [68]
  730. begin
  731. if not CheckFor('&') then begin
  732. Result := False;
  733. exit;
  734. end;
  735. if CheckFor('#') then begin // Test for CharRef [66]
  736. if CheckFor('x') then begin
  737. // !!!: there must be at least one digit
  738. while buf[0] in ['0'..'9', 'a'..'f', 'A'..'F'] do Inc(buf);
  739. end else
  740. // !!!: there must be at least one digit
  741. while buf[0] in ['0'..'9'] do Inc(buf);
  742. end else
  743. AOwner.AppendChild(doc.CreateEntityReference(ExpectName));
  744. ExpectString(';');
  745. Result := True;
  746. end;
  747. procedure TXMLReader.ExpectReference(AOwner: TDOMNode);
  748. begin
  749. if not ParseReference(AOwner) then
  750. RaiseExc('Expected reference ("&Name;" or "%Name;")');
  751. end;
  752. function TXMLReader.ParseExternalID: Boolean; // [75]
  753. function GetSystemLiteral: String;
  754. begin
  755. SetLength(Result, 0);
  756. if buf[0] = '''' then begin
  757. Inc(buf);
  758. while (buf[0] <> '''') and (buf[0] <> #0) do begin
  759. Result := Result + buf[0];
  760. Inc(buf);
  761. end;
  762. ExpectString('''');
  763. end else if buf[0] = '"' then begin
  764. Inc(buf);
  765. while (buf[0] <> '"') and (buf[0] <> #0) do begin
  766. Result := Result + buf[0];
  767. Inc(buf);
  768. end;
  769. ExpectString('"');
  770. end;
  771. end;
  772. begin
  773. if CheckFor('SYSTEM') then begin
  774. ExpectWhitespace;
  775. GetSystemLiteral;
  776. Result := True;
  777. end else if CheckFor('PUBLIC') then begin
  778. ExpectWhitespace;
  779. ExpectPubidLiteral;
  780. ExpectWhitespace;
  781. GetSystemLiteral;
  782. Result := True;
  783. end else
  784. Result := False;
  785. end;
  786. procedure TXMLReader.ExpectExternalID;
  787. begin
  788. if not ParseExternalID then
  789. RaiseExc('Expected external ID');
  790. end;
  791. function TXMLReader.ParseEncodingDecl: String; // [80]
  792. function ParseEncName: String;
  793. begin
  794. if not (buf[0] in ['A'..'Z', 'a'..'z']) then
  795. RaiseExc('Expected character (A-Z, a-z)');
  796. Result := buf[0];
  797. Inc(buf);
  798. Result := Result + GetString(['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']);
  799. end;
  800. begin
  801. SetLength(Result, 0);
  802. SkipWhitespace;
  803. if CheckFor('encoding') then begin
  804. ExpectEq;
  805. if buf[0] = '''' then begin
  806. Inc(buf);
  807. Result := ParseEncName;
  808. ExpectString('''');
  809. end else if buf[0] = '"' then begin
  810. Inc(buf);
  811. Result := ParseEncName;
  812. ExpectString('"');
  813. end;
  814. end;
  815. end;
  816. { Currently, this method will only resolve the entities which are
  817. predefined in XML: }
  818. procedure TXMLReader.ResolveEntities(RootNode: TDOMNode);
  819. procedure ReplaceEntityRef(EntityNode: TDOMNode; const Replacement: String);
  820. var
  821. PrevSibling, NextSibling: TDOMNode;
  822. begin
  823. PrevSibling := EntityNode.PreviousSibling;
  824. NextSibling := EntityNode.NextSibling;
  825. if Assigned(PrevSibling) and (PrevSibling.NodeType = TEXT_NODE) then
  826. begin
  827. TDOMCharacterData(PrevSibling).AppendData(Replacement);
  828. RootNode.RemoveChild(EntityNode);
  829. if Assigned(NextSibling) and (NextSibling.NodeType = TEXT_NODE) then
  830. begin
  831. TDOMCharacterData(PrevSibling).AppendData(
  832. TDOMCharacterData(NextSibling).Data);
  833. RootNode.RemoveChild(NextSibling);
  834. end
  835. end else
  836. if Assigned(NextSibling) and (NextSibling.NodeType = TEXT_NODE) then
  837. begin
  838. TDOMCharacterData(NextSibling).InsertData(0, Replacement);
  839. RootNode.RemoveChild(EntityNode);
  840. end else
  841. RootNode.ReplaceChild(Doc.CreateTextNode(Replacement), EntityNode);
  842. end;
  843. var
  844. Node, NextSibling: TDOMNode;
  845. begin
  846. Node := RootNode.FirstChild;
  847. while Assigned(Node) do
  848. begin
  849. NextSibling := Node.NextSibling;
  850. if Node.NodeType = ENTITY_REFERENCE_NODE then
  851. if Node.NodeName = 'amp' then
  852. ReplaceEntityRef(Node, '&')
  853. else if Node.NodeName = 'apos' then
  854. ReplaceEntityRef(Node, '''')
  855. else if Node.NodeName = 'gt' then
  856. ReplaceEntityRef(Node, '>')
  857. else if Node.NodeName = 'lt' then
  858. ReplaceEntityRef(Node, '<')
  859. else if Node.NodeName = 'quot' then
  860. ReplaceEntityRef(Node, '"');
  861. Node := NextSibling;
  862. end;
  863. end;
  864. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: File);
  865. var
  866. reader: TXMLReader;
  867. buf: PChar;
  868. BufSize: LongInt;
  869. begin
  870. ADoc := nil;
  871. BufSize := FileSize(f) + 1;
  872. if BufSize <= 1 then exit;
  873. GetMem(buf, BufSize);
  874. BlockRead(f, buf^, BufSize - 1);
  875. buf[BufSize - 1] := #0;
  876. reader := TXMLReader.Create;
  877. reader.ProcessXML(buf, Filerec(f).name);
  878. FreeMem(buf, BufSize);
  879. ADoc := reader.doc;
  880. reader.Free;
  881. end;
  882. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream;
  883. const AFilename: String);
  884. var
  885. reader: TXMLReader;
  886. buf: PChar;
  887. begin
  888. ADoc := nil;
  889. if f.Size = 0 then exit;
  890. GetMem(buf, f.Size + 1);
  891. f.Read(buf^, f.Size);
  892. buf[f.Size] := #0;
  893. reader := TXMLReader.Create;
  894. reader.ProcessXML(buf, AFilename);
  895. FreeMem(buf, f.Size + 1);
  896. ADoc := reader.doc;
  897. reader.Free;
  898. end;
  899. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream);
  900. begin
  901. ReadXMLFile(ADoc, f, '<Stream>');
  902. end;
  903. procedure ReadXMLFile(var ADoc: TXMLDocument; const AFilename: String);
  904. var
  905. stream: TFileStream;
  906. begin
  907. ADoc := nil;
  908. stream := TFileStream.Create(AFilename, fmOpenRead);
  909. try
  910. ReadXMLFile(ADoc, stream, AFilename);
  911. finally
  912. stream.Free;
  913. end;
  914. end;
  915. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: File);
  916. var
  917. reader: TXMLReader;
  918. buf: PChar;
  919. BufSize: LongInt;
  920. begin
  921. ADoc := nil;
  922. BufSize := FileSize(f) + 1;
  923. if BufSize <= 1 then exit;
  924. GetMem(buf, BufSize + 1);
  925. BlockRead(f, buf^, BufSize - 1);
  926. buf[BufSize - 1] := #0;
  927. reader := TXMLReader.Create;
  928. reader.ProcessDTD(buf, Filerec(f).name);
  929. FreeMem(buf, BufSize);
  930. ADoc := reader.doc;
  931. reader.Free;
  932. end;
  933. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream;
  934. const AFilename: String);
  935. var
  936. reader: TXMLReader;
  937. buf: PChar;
  938. begin
  939. ADoc := nil;
  940. if f.Size = 0 then exit;
  941. GetMem(buf, f.Size + 1);
  942. f.Read(buf^, f.Size);
  943. buf[f.Size] := #0;
  944. reader := TXMLReader.Create;
  945. reader.ProcessDTD(buf, AFilename);
  946. FreeMem(buf, f.Size + 1);
  947. ADoc := reader.doc;
  948. reader.Free;
  949. end;
  950. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream);
  951. begin
  952. ReadDTDFile(ADoc, f, '<Stream>');
  953. end;
  954. procedure ReadDTDFile(var ADoc: TXMLDocument; const AFilename: String);
  955. var
  956. stream: TFileStream;
  957. begin
  958. ADoc := nil;
  959. stream := TFileStream.Create(AFilename, fmOpenRead);
  960. try
  961. ReadDTDFile(ADoc, stream, AFilename);
  962. finally
  963. stream.Free;
  964. end;
  965. end;
  966. end.
  967. {
  968. $Log$
  969. Revision 1.5 2000-10-14 09:41:45 sg
  970. * Fixed typo in previous fix. (forgot closing bracket. Oops.)
  971. Revision 1.4 2000/10/14 09:40:44 sg
  972. * Extended the "Unmatching element end tag" exception, now the expected
  973. tag name is included in the message string.
  974. Revision 1.3 2000/07/29 14:52:25 sg
  975. * Modified the copyright notice to remove ambiguities
  976. Revision 1.2 2000/07/13 11:33:07 michael
  977. + removed logs
  978. }