xmlread.pp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969
  1. {
  2. $Id$
  3. This file is part of the Free Pascal run time library.
  4. Copyright (c) 1999 Sebastian Guenther
  5. XML reading routines.
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$MODE objfpc}
  13. {$H+}
  14. unit xmlread;
  15. interface
  16. uses classes, DOM;
  17. function ReadXMLFile(const AFileName: String): TXMLDocument;
  18. function ReadXMLFile(var f: File): TXMLDocument;
  19. function ReadXMLFile(var f: TStream): TXMLDocument;
  20. function ReadDTDFile(const AFileName: String): TXMLDocument;
  21. function ReadDTDFile(var f: File): TXMLDocument;
  22. function ReadDTDFile(var f: TStream): TXMLDocument;
  23. // =======================================================
  24. implementation
  25. uses sysutils;
  26. const
  27. Letter = ['A'..'Z', 'a'..'z'];
  28. Digit = ['0'..'9'];
  29. PubidChars: set of Char = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
  30. '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
  31. '#', '@', '$', '_', '%'];
  32. NmToken: set of Char = Letter + Digit + ['.', '-', '_', ':'];
  33. type
  34. TSetOfChar = set of Char;
  35. TXMLReader = class
  36. protected
  37. doc: TXMLDocument;
  38. buf, BufStart: PChar;
  39. procedure RaiseExc(descr: String);
  40. function SkipWhitespace: Boolean;
  41. procedure ExpectWhitespace;
  42. procedure ExpectString(s: String);
  43. function CheckFor(s: PChar): Boolean;
  44. function GetString(ValidChars: TSetOfChar): String;
  45. function GetName(var s: String): Boolean;
  46. function ExpectName: String; // [5]
  47. procedure ExpectAttValue(attr: TDOMAttr); // [10]
  48. function ExpectPubidLiteral: String; // [12]
  49. function ParseComment(AOwner: TDOMNode): Boolean; // [15]
  50. function ParsePI: Boolean; // [16]
  51. procedure ExpectProlog; // [22]
  52. function ParseEq: Boolean; // [25]
  53. procedure ExpectEq;
  54. procedure ParseMisc(AOwner: TDOMNode); // [27]
  55. function ParseMarkupDecl: Boolean; // [29]
  56. function ParseElement(AOwner: TDOMNode): Boolean; // [39]
  57. procedure ExpectElement(AOwner: TDOMNode);
  58. function ParseReference(AOwner: TDOMNode): Boolean; // [67]
  59. procedure ExpectReference(AOwner: TDOMNode);
  60. function ParsePEReference: Boolean; // [69]
  61. function ParseExternalID: Boolean; // [75]
  62. procedure ExpectExternalID;
  63. function ParseEncodingDecl: String; // [80]
  64. public
  65. function ProcessXML(ABuf: PChar): TXMLDocument; // [1]
  66. function ProcessDTD(ABuf: PChar): TXMLDocument; // ([29])
  67. end;
  68. procedure TXMLReader.RaiseExc(descr: String);
  69. var
  70. apos: PChar;
  71. x, y: Integer;
  72. begin
  73. // find out the line in which the error occured
  74. apos := BufStart;
  75. x := 1;
  76. y := 1;
  77. while apos < buf do begin
  78. if apos[0] = #10 then begin
  79. Inc(y);
  80. x := 1;
  81. end else
  82. Inc(x);
  83. Inc(apos);
  84. end;
  85. raise Exception.Create('In XML reader (line ' + IntToStr(y) + ' pos ' +
  86. IntToStr(x) + '): ' + descr);
  87. end;
  88. function TXMLReader.SkipWhitespace: Boolean;
  89. begin
  90. Result := False;
  91. while buf[0] in [#9, #10, #13, ' '] do begin
  92. Inc(buf);
  93. Result := True;
  94. end;
  95. end;
  96. procedure TXMLReader.ExpectWhitespace;
  97. begin
  98. if not SkipWhitespace then
  99. RaiseExc('Expected whitespace');
  100. end;
  101. procedure TXMLReader.ExpectString(s: String);
  102. var
  103. i: Integer;
  104. s2: PChar;
  105. s3: String;
  106. begin
  107. for i := 1 to Length(s) do
  108. if buf[i - 1] <> s[i] then begin
  109. GetMem(s2, Length(s) + 1);
  110. StrLCopy(s2, buf, Length(s));
  111. s3 := StrPas(s2);
  112. FreeMem(s2, Length(s) + 1);
  113. RaiseExc('Expected "' + s + '", found "' + s3 + '"');
  114. end;
  115. Inc(buf, Length(s));
  116. end;
  117. function TXMLReader.CheckFor(s: PChar): Boolean;
  118. begin
  119. if buf[0] = #0 then begin
  120. Result := False;
  121. exit;
  122. end;
  123. if StrLComp(buf, s, StrLen(s)) = 0 then begin
  124. Inc(buf, StrLen(s));
  125. Result := True;
  126. end else
  127. Result := False;
  128. end;
  129. function TXMLReader.GetString(ValidChars: TSetOfChar): String;
  130. begin
  131. Result := '';
  132. while buf[0] in ValidChars do begin
  133. Result := Result + buf[0];
  134. Inc(buf);
  135. end;
  136. end;
  137. function TXMLReader.ProcessXML(ABuf: PChar): TXMLDocument; // [1]
  138. var
  139. LastNodeBeforeDoc: TDOMNode;
  140. begin
  141. buf := ABuf;
  142. BufStart := ABuf;
  143. doc := TXMLDocument.Create;
  144. ExpectProlog;
  145. LastNodeBeforeDoc := doc.LastChild;
  146. ExpectElement(doc);
  147. ParseMisc(doc);
  148. {
  149. if buf[0] <> #0 then begin
  150. WriteLn('=== Unparsed: ===');
  151. //WriteLn(buf);
  152. WriteLn(StrLen(buf), ' chars');
  153. end;
  154. }
  155. Result := doc;
  156. end;
  157. function TXMLReader.GetName(var s: String): Boolean; // [5]
  158. begin
  159. s := '';
  160. if not (buf[0] in (Letter + ['_', ':'])) then begin
  161. Result := False;
  162. exit;
  163. end;
  164. s := buf[0];
  165. Inc(buf);
  166. s := s + GetString(Letter + ['0'..'9', '.', '-', '_', ':']);
  167. Result := True;
  168. end;
  169. function TXMLReader.ExpectName: String; // [5]
  170. begin
  171. if not (buf[0] in (Letter + ['_', ':'])) then
  172. RaiseExc('Expected letter, "_" or ":" for name, found "' + buf[0] + '"');
  173. Result := buf[0];
  174. Inc(buf);
  175. Result := Result + GetString(Letter + ['0'..'9', '.', '-', '_', ':']);
  176. end;
  177. procedure TXMLReader.ExpectAttValue(attr: TDOMAttr); // [10]
  178. var
  179. strdel: array[0..1] of Char;
  180. s: String;
  181. begin
  182. if (buf[0] <> '''') and (buf[0] <> '"') then
  183. RaiseExc('Expected quotation marks');
  184. strdel[0] := buf[0];
  185. strdel[1] := #0;
  186. Inc(buf);
  187. s := '';
  188. while not CheckFor(strdel) do
  189. if not ParseReference(attr) then begin
  190. s := s + buf[0];
  191. Inc(buf);
  192. end else begin
  193. if s <> '' then begin
  194. attr.AppendChild(doc.CreateTextNode(s));
  195. s := '';
  196. end;
  197. end;
  198. if s <> '' then
  199. attr.AppendChild(doc.CreateTextNode(s));
  200. end;
  201. function TXMLReader.ExpectPubidLiteral: String;
  202. begin
  203. Result := '';
  204. if CheckFor('''') then begin
  205. GetString(PubidChars - ['''']);
  206. ExpectString('''');
  207. end else if CheckFor('"') then begin
  208. GetString(PubidChars - ['"']);
  209. ExpectString('"');
  210. end else
  211. RaiseExc('Expected quotation marks');
  212. end;
  213. function TXMLReader.ParseComment(AOwner: TDOMNode): Boolean; // [15]
  214. var
  215. comment: String;
  216. begin
  217. if CheckFor('<!--') then begin
  218. comment := '';
  219. while (buf[0] <> #0) and (buf[1] <> #0) and
  220. ((buf[0] <> '-') or (buf[1] <> '-')) do begin
  221. comment := comment + buf[0];
  222. Inc(buf);
  223. end;
  224. AOwner.AppendChild(doc.CreateComment(comment));
  225. ExpectString('-->');
  226. Result := True;
  227. end else
  228. Result := False;
  229. end;
  230. function TXMLReader.ParsePI: Boolean; // [16]
  231. var
  232. checkbuf: array[0..3] of char;
  233. begin
  234. if CheckFor('<?') then begin
  235. StrLCopy(checkbuf, buf, 3);
  236. if UpCase(StrPas(checkbuf)) = 'XML' then
  237. RaiseExc('"<?xml" processing instruction not allowed here');
  238. ExpectName;
  239. if SkipWhitespace then
  240. while (buf[0] <> #0) and (buf[1] <> #0) and not
  241. ((buf[0] = '?') and (buf[1] = '>')) do Inc(buf);
  242. ExpectString('?>');
  243. Result := True;
  244. end else
  245. Result := False;
  246. end;
  247. procedure TXMLReader.ExpectProlog; // [22]
  248. procedure ParseVersionNum;
  249. begin
  250. doc.XMLVersion :=
  251. GetString(['a'..'z', 'A'..'Z', '0'..'9', '_', '.', ':', '-']);
  252. end;
  253. begin
  254. if CheckFor('<?xml') then begin
  255. // '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  256. // VersionInfo: S 'version' Eq (' VersionNum ' | " VersionNum ")
  257. SkipWhitespace;
  258. ExpectString('version');
  259. ParseEq;
  260. if buf[0] = '''' then begin
  261. Inc(buf);
  262. ParseVersionNum;
  263. ExpectString('''');
  264. end else if buf[0] = '"' then begin
  265. Inc(buf);
  266. ParseVersionNum;
  267. ExpectString('"');
  268. end else
  269. RaiseExc('Expected single or double quotation mark');
  270. // EncodingDecl?
  271. ParseEncodingDecl;
  272. // SDDecl?
  273. SkipWhitespace;
  274. if CheckFor('standalone') then begin
  275. ExpectEq;
  276. if buf[0] = '''' then begin
  277. Inc(buf);
  278. if not (CheckFor('yes''') or CheckFor('no''')) then
  279. RaiseExc('Expected ''yes'' or ''no''');
  280. end else if buf[0] = '''' then begin
  281. Inc(buf);
  282. if not (CheckFor('yes"') or CheckFor('no"')) then
  283. RaiseExc('Expected "yes" or "no"');
  284. end;
  285. SkipWhitespace;
  286. end;
  287. ExpectString('?>');
  288. end;
  289. // Check for "Misc*"
  290. ParseMisc(doc);
  291. // Check for "(doctypedecl Misc*)?" [28]
  292. if CheckFor('<!DOCTYPE') then begin
  293. SkipWhitespace;
  294. ExpectName;
  295. SkipWhitespace;
  296. ParseExternalID;
  297. SkipWhitespace;
  298. if CheckFor('[') then begin
  299. repeat
  300. SkipWhitespace;
  301. until not (ParseMarkupDecl or ParsePEReference);
  302. ExpectString(']');
  303. SkipWhitespace;
  304. end;
  305. ExpectString('>');
  306. ParseMisc(doc);
  307. end;
  308. end;
  309. function TXMLReader.ParseEq: Boolean; // [25]
  310. var
  311. savedbuf: PChar;
  312. begin
  313. savedbuf := buf;
  314. SkipWhitespace;
  315. if buf[0] = '=' then begin
  316. Inc(buf);
  317. SkipWhitespace;
  318. Result := True;
  319. end else begin
  320. buf := savedbuf;
  321. Result := False;
  322. end;
  323. end;
  324. procedure TXMLReader.ExpectEq;
  325. begin
  326. if not ParseEq then
  327. RaiseExc('Expected "="');
  328. end;
  329. // Parse "Misc*":
  330. // Misc ::= Comment | PI | S
  331. procedure TXMLReader.ParseMisc(AOwner: TDOMNode); // [27]
  332. begin
  333. repeat
  334. SkipWhitespace;
  335. until not (ParseComment(AOwner) or ParsePI);
  336. end;
  337. function TXMLReader.ParseMarkupDecl: Boolean; // [29]
  338. function ParseElementDecl: Boolean; // [45]
  339. procedure ExpectChoiceOrSeq; // [49], [50]
  340. procedure ExpectCP; // [48]
  341. begin
  342. if CheckFor('(') then
  343. ExpectChoiceOrSeq
  344. else
  345. ExpectName;
  346. if CheckFor('?') then
  347. else if CheckFor('*') then
  348. else if CheckFor('+') then;
  349. end;
  350. var
  351. delimiter: Char;
  352. begin
  353. SkipWhitespace;
  354. ExpectCP;
  355. SkipWhitespace;
  356. delimiter := #0;
  357. while not CheckFor(')') do begin
  358. if delimiter = #0 then begin
  359. if (buf[0] = '|') or (buf[0] = ',') then
  360. delimiter := buf[0]
  361. else
  362. RaiseExc('Expected "|" or ","');
  363. Inc(buf);
  364. end else
  365. ExpectString(delimiter);
  366. SkipWhitespace;
  367. ExpectCP;
  368. end;
  369. end;
  370. begin
  371. if CheckFor('<!ELEMENT') then begin
  372. ExpectWhitespace;
  373. ExpectName;
  374. ExpectWhitespace;
  375. // Get contentspec [46]
  376. if CheckFor('EMPTY') then
  377. else if CheckFor('ANY') then
  378. else if CheckFor('(') then begin
  379. SkipWhitespace;
  380. if CheckFor('#PCDATA') then begin
  381. // Parse Mixed section [51]
  382. SkipWhitespace;
  383. if not CheckFor(')') then
  384. repeat
  385. ExpectString('|');
  386. SkipWhitespace;
  387. ExpectName;
  388. until CheckFor(')*');
  389. end else begin
  390. // Parse Children section [47]
  391. ExpectChoiceOrSeq;
  392. if CheckFor('?') then
  393. else if CheckFor('*') then
  394. else if CheckFor('+') then;
  395. end;
  396. end else
  397. RaiseExc('Invalid content specification');
  398. SkipWhitespace;
  399. ExpectString('>');
  400. Result := True;
  401. end else
  402. Result := False;
  403. end;
  404. function ParseAttlistDecl: Boolean; // [52]
  405. var
  406. attr: TDOMAttr;
  407. begin
  408. if CheckFor('<!ATTLIST') then begin
  409. ExpectWhitespace;
  410. ExpectName;
  411. SkipWhitespace;
  412. while not CheckFor('>') do begin
  413. ExpectName;
  414. ExpectWhitespace;
  415. // Get AttType [54], [55], [56]
  416. if CheckFor('CDATA') then
  417. else if CheckFor('ID') then
  418. else if CheckFor('IDREF') then
  419. else if CheckFor('IDREFS') then
  420. else if CheckFor('ENTITTY') then
  421. else if CheckFor('ENTITIES') then
  422. else if CheckFor('NMTOKEN') then
  423. else if CheckFor('NMTOKENS') then
  424. else if CheckFor('NOTATION') then begin // [57], [58]
  425. ExpectWhitespace;
  426. ExpectString('(');
  427. SkipWhitespace;
  428. ExpectName;
  429. SkipWhitespace;
  430. while not CheckFor(')') do begin
  431. ExpectString('|');
  432. SkipWhitespace;
  433. ExpectName;
  434. SkipWhitespace;
  435. end;
  436. end else if CheckFor('(') then begin // [59]
  437. SkipWhitespace;
  438. GetString(Nmtoken);
  439. SkipWhitespace;
  440. while not CheckFor(')') do begin
  441. ExpectString('|');
  442. SkipWhitespace;
  443. GetString(Nmtoken);
  444. SkipWhitespace;
  445. end;
  446. end else
  447. RaiseExc('Invalid tokenized type');
  448. ExpectWhitespace;
  449. // Get DefaultDecl [60]
  450. if CheckFor('#REQUIRED') then
  451. else if CheckFor('#IMPLIED') then
  452. else begin
  453. if CheckFor('#FIXED') then
  454. SkipWhitespace;
  455. attr := doc.CreateAttribute('');
  456. ExpectAttValue(attr);
  457. end;
  458. SkipWhitespace;
  459. end;
  460. Result := True;
  461. end else
  462. Result := False;
  463. end;
  464. function ParseEntityDecl: Boolean; // [70]
  465. var
  466. NewEntity: TDOMEntity;
  467. function ParseEntityValue: Boolean; // [9]
  468. var
  469. strdel: array[0..1] of Char;
  470. begin
  471. if (buf[0] <> '''') and (buf[0] <> '"') then begin
  472. Result := False;
  473. exit;
  474. end;
  475. strdel[0] := buf[0];
  476. strdel[1] := #0;
  477. Inc(buf);
  478. while not CheckFor(strdel) do
  479. if ParsePEReference then
  480. else if ParseReference(NewEntity) then
  481. else begin
  482. Inc(buf); // Normal haracter
  483. end;
  484. Result := True;
  485. end;
  486. begin
  487. if CheckFor('<!ENTITY') then begin
  488. ExpectWhitespace;
  489. if CheckFor('%') then begin // [72]
  490. ExpectWhitespace;
  491. NewEntity := doc.CreateEntity(ExpectName);
  492. ExpectWhitespace;
  493. // Get PEDef [74]
  494. if ParseEntityValue then
  495. else if ParseExternalID then
  496. else
  497. RaiseExc('Expected entity value or external ID');
  498. end else begin // [71]
  499. NewEntity := doc.CreateEntity(ExpectName);
  500. ExpectWhitespace;
  501. // Get EntityDef [73]
  502. if ParseEntityValue then
  503. else begin
  504. ExpectExternalID;
  505. // Get NDataDecl [76]
  506. ExpectWhitespace;
  507. ExpectString('NDATA');
  508. ExpectWhitespace;
  509. ExpectName;
  510. end;
  511. end;
  512. SkipWhitespace;
  513. ExpectString('>');
  514. Result := True;
  515. end else
  516. Result := False;
  517. end;
  518. function ParseNotationDecl: Boolean; // [82]
  519. begin
  520. if CheckFor('<!NOTATION') then begin
  521. ExpectWhitespace;
  522. ExpectName;
  523. ExpectWhitespace;
  524. if ParseExternalID then
  525. else if CheckFor('PUBLIC') then begin // [83]
  526. ExpectWhitespace;
  527. ExpectPubidLiteral;
  528. end else
  529. RaiseExc('Expected external or public ID');
  530. SkipWhitespace;
  531. ExpectString('>');
  532. Result := True;
  533. end else
  534. Result := False;
  535. end;
  536. begin
  537. Result := False;
  538. while ParseElementDecl or ParseAttlistDecl or ParseEntityDecl or
  539. ParseNotationDecl or ParsePI or ParseComment(doc) or SkipWhitespace do
  540. Result := True;
  541. end;
  542. function TXMLReader.ProcessDTD(ABuf: PChar): TXMLDocument;
  543. begin
  544. buf := ABuf;
  545. BufStart := ABuf;
  546. doc := TXMLDocument.Create;
  547. ParseMarkupDecl;
  548. {
  549. if buf[0] <> #0 then begin
  550. WriteLn('=== Unparsed: ===');
  551. //WriteLn(buf);
  552. WriteLn(StrLen(buf), ' chars');
  553. end;
  554. }
  555. Result := doc;
  556. end;
  557. function TXMLReader.ParseElement(AOwner: TDOMNode): Boolean; // [39] [40] [44]
  558. var
  559. NewElem: TDOMElement;
  560. function ParseCharData: Boolean; // [14]
  561. var
  562. s: String;
  563. i: Integer;
  564. begin
  565. s := '';
  566. while not (buf[0] in [#0, '<', '&']) do begin
  567. s := s + buf[0];
  568. Inc(buf);
  569. end;
  570. if s <> '' then begin
  571. // Strip whitespace from end of s
  572. i := Length(s);
  573. while (i > 0) and (s[i] in [#10, #13, ' ']) do Dec(i);
  574. NewElem.AppendChild(doc.CreateTextNode(Copy(s, 1, i)));
  575. Result := True;
  576. end else
  577. Result := False;
  578. end;
  579. function ParseCDSect: Boolean; // [18]
  580. var
  581. cdata: String;
  582. begin
  583. if CheckFor('<![CDATA[') then begin
  584. cdata := '';
  585. while not CheckFor(']]>') do begin
  586. cdata := cdata + buf[0];
  587. Inc(buf);
  588. end;
  589. NewElem.AppendChild(doc.CreateCDATASection(cdata));
  590. Result := True;
  591. end else
  592. Result := False;
  593. end;
  594. var
  595. IsEmpty: Boolean;
  596. name: String;
  597. oldpos: PChar;
  598. attr: TDOMAttr;
  599. begin
  600. oldpos := buf;
  601. if CheckFor('<') then begin
  602. if not GetName(name) then begin
  603. buf := oldpos;
  604. Result := False;
  605. exit;
  606. end;
  607. NewElem := doc.CreateElement(name);
  608. AOwner.AppendChild(NewElem);
  609. SkipWhitespace;
  610. IsEmpty := False;
  611. while True do begin
  612. if CheckFor('/>') then begin
  613. IsEmpty := True;
  614. break;
  615. end;
  616. if CheckFor('>') then break;
  617. // Get Attribute [41]
  618. attr := doc.CreateAttribute(ExpectName);
  619. NewElem.Attributes.SetNamedItem(attr);
  620. ExpectEq;
  621. ExpectAttValue(attr);
  622. SkipWhitespace;
  623. end;
  624. if not IsEmpty then begin
  625. // Get content
  626. while SkipWhitespace or ParseCharData or ParseCDSect or ParsePI or
  627. ParseComment(NewElem) or ParseElement(NewElem) or
  628. ParseReference(NewElem) do;
  629. // Get ETag [42]
  630. ExpectString('</');
  631. ExpectName;
  632. SkipWhitespace;
  633. ExpectString('>');
  634. end;
  635. Result := True;
  636. end else
  637. Result := False;
  638. end;
  639. procedure TXMLReader.ExpectElement(AOwner: TDOMNode);
  640. begin
  641. if not ParseElement(AOwner) then
  642. RaiseExc('Expected element');
  643. end;
  644. function TXMLReader.ParsePEReference: Boolean; // [69]
  645. begin
  646. if CheckFor('%') then begin
  647. ExpectName;
  648. ExpectString(';');
  649. Result := True;
  650. end else
  651. Result := False;
  652. end;
  653. function TXMLReader.ParseReference(AOwner: TDOMNode): Boolean; // [67] [68]
  654. begin
  655. if not CheckFor('&') then begin
  656. Result := False;
  657. exit;
  658. end;
  659. if CheckFor('#') then begin // Test for CharRef [66]
  660. if CheckFor('x') then begin
  661. // *** there must be at leat one digit
  662. while buf[0] in ['0'..'9', 'a'..'f', 'A'..'F'] do Inc(buf);
  663. end else
  664. // *** there must be at leat one digit
  665. while buf[0] in ['0'..'9'] do Inc(buf);
  666. end else
  667. AOwner.AppendChild(doc.CreateEntityReference(ExpectName));
  668. ExpectString(';');
  669. Result := True;
  670. end;
  671. procedure TXMLReader.ExpectReference(AOwner: TDOMNode);
  672. begin
  673. if not ParseReference(AOwner) then
  674. RaiseExc('Expected reference ("&Name;" or "%Name;")');
  675. end;
  676. function TXMLReader.ParseExternalID: Boolean; // [75]
  677. function GetSystemLiteral: String;
  678. begin
  679. if buf[0] = '''' then begin
  680. Inc(buf);
  681. Result := '';
  682. while (buf[0] <> '''') and (buf[0] <> #0) do begin
  683. Result := Result + buf[0];
  684. Inc(buf);
  685. end;
  686. ExpectString('''');
  687. end else if buf[0] = '"' then begin
  688. Inc(buf);
  689. Result := '';
  690. while (buf[0] <> '"') and (buf[0] <> #0) do begin
  691. Result := Result + buf[0];
  692. Inc(buf);
  693. end;
  694. ExpectString('"');
  695. end;
  696. end;
  697. begin
  698. if CheckFor('SYSTEM') then begin
  699. ExpectWhitespace;
  700. GetSystemLiteral;
  701. Result := True;
  702. end else if CheckFor('PUBLIC') then begin
  703. ExpectWhitespace;
  704. ExpectPubidLiteral;
  705. ExpectWhitespace;
  706. GetSystemLiteral;
  707. Result := True;
  708. end else
  709. Result := False;
  710. end;
  711. procedure TXMLReader.ExpectExternalID;
  712. begin
  713. if not ParseExternalID then
  714. RaiseExc('Expected external ID');
  715. end;
  716. function TXMLReader.ParseEncodingDecl: String; // [80]
  717. function ParseEncName: String;
  718. begin
  719. if not (buf[0] in ['A'..'Z', 'a'..'z']) then
  720. RaiseExc('Expected character (A-Z, a-z)');
  721. Result := buf[0];
  722. Inc(buf);
  723. Result := Result + GetString(['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']);
  724. end;
  725. begin
  726. Result := '';
  727. SkipWhitespace;
  728. if CheckFor('encoding') then begin
  729. ExpectEq;
  730. if buf[0] = '''' then begin
  731. Inc(buf);
  732. Result := ParseEncName;
  733. ExpectString('''');
  734. end else if buf[0] = '"' then begin
  735. Inc(buf);
  736. Result := ParseEncName;
  737. ExpectString('"');
  738. end;
  739. end;
  740. end;
  741. function ReadXMLFile(var f: File): TXMLDocument;
  742. var
  743. reader: TXMLReader;
  744. buf: PChar;
  745. BufSize: LongInt;
  746. begin
  747. BufSize := FileSize(f) + 1;
  748. if BufSize <= 1 then begin
  749. Result := nil;
  750. exit;
  751. end;
  752. GetMem(buf, BufSize);
  753. BlockRead(f, buf^, BufSize - 1);
  754. buf[BufSize - 1] := #0;
  755. reader := TXMLReader.Create;
  756. Result := reader.ProcessXML(buf);
  757. FreeMem(buf, BufSize);
  758. reader.Free;
  759. end;
  760. function ReadXMLFile(var f: TStream): TXMLDocument;
  761. var
  762. reader: TXMLReader;
  763. buf: PChar;
  764. begin
  765. if f.Size = 0 then begin
  766. Result := nil;
  767. exit;
  768. end;
  769. GetMem(buf, f.Size + 1);
  770. f.Read(buf^, f.Size);
  771. buf[f.Size] := #0;
  772. reader := TXMLReader.Create;
  773. Result := reader.ProcessXML(buf);
  774. FreeMem(buf, f.Size + 1);
  775. reader.Free;
  776. end;
  777. function ReadXMLFile(const AFileName: String): TXMLDocument;
  778. var
  779. stream: TFileStream;
  780. begin
  781. stream := TFileStream.Create(AFileName, fmOpenRead);
  782. Result := ReadXMLFile(stream);
  783. stream.Free;
  784. end;
  785. function ReadDTDFile(var f: File): TXMLDocument;
  786. var
  787. reader: TXMLReader;
  788. buf: PChar;
  789. BufSize: LongInt;
  790. begin
  791. BufSize := FileSize(f) + 1;
  792. if BufSize <= 1 then begin
  793. Result := nil;
  794. end;
  795. GetMem(buf, BufSize + 1);
  796. BlockRead(f, buf^, BufSize - 1);
  797. buf[BufSize - 1] := #0;
  798. reader := TXMLReader.Create;
  799. Result := reader.ProcessDTD(buf);
  800. FreeMem(buf, BufSize);
  801. reader.Free;
  802. end;
  803. function ReadDTDFile(var f: TStream): TXMLDocument;
  804. var
  805. reader: TXMLReader;
  806. buf: PChar;
  807. begin
  808. if f.Size = 0 then begin
  809. Result := nil;
  810. exit;
  811. end;
  812. GetMem(buf, f.Size + 1);
  813. f.Read(buf^, f.Size);
  814. buf[f.Size] := #0;
  815. reader := TXMLReader.Create;
  816. Result := reader.ProcessDTD(buf);
  817. FreeMem(buf, f.Size + 1);
  818. reader.Free;
  819. end;
  820. function ReadDTDFile(const AFileName: String): TXMLDocument;
  821. var
  822. stream: TFileStream;
  823. begin
  824. stream := TFileStream.Create(AFileName, fmOpenRead);
  825. Result := ReadDTDFile(stream);
  826. stream.Free;
  827. end;
  828. end.
  829. {
  830. $Log$
  831. Revision 1.4 1999-07-11 20:20:12 michael
  832. + Fixes from Sebastian Guenther
  833. Revision 1.3 1999/07/09 21:05:51 michael
  834. + fixes from Guenther Sebastian
  835. Revision 1.2 1999/07/09 10:42:50 michael
  836. * Removed debug statements
  837. Revision 1.1 1999/07/09 08:35:09 michael
  838. + Initial implementation by Sebastian Guenther
  839. }