xmlread.pp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023
  1. {
  2. $Id$
  3. This file is part of the Free Pascal run time library.
  4. Copyright (c) 1999 Sebastian Guenther
  5. XML reading routines.
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$MODE objfpc}
  13. {$H+}
  14. unit xmlread;
  15. interface
  16. uses sysutils, classes, DOM;
  17. type
  18. EXMLReadError = class(Exception);
  19. procedure ReadXMLFile(var ADoc: TXMLDocument; const AFilename: String);
  20. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: File);
  21. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream);
  22. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream;
  23. const AFilename: String);
  24. procedure ReadDTDFile(var ADoc: TXMLDocument; const AFilename: String);
  25. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: File);
  26. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream);
  27. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream;
  28. const AFilename: String);
  29. // =======================================================
  30. implementation
  31. const
  32. Letter = ['A'..'Z', 'a'..'z'];
  33. Digit = ['0'..'9'];
  34. PubidChars: set of Char = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
  35. '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
  36. '#', '@', '$', '_', '%'];
  37. NmToken: set of Char = Letter + Digit + ['.', '-', '_', ':'];
  38. type
  39. TSetOfChar = set of Char;
  40. TXMLReader = class
  41. protected
  42. buf, BufStart: PChar;
  43. Filename: String;
  44. procedure RaiseExc(descr: String);
  45. function SkipWhitespace: Boolean;
  46. procedure ExpectWhitespace;
  47. procedure ExpectString(s: String);
  48. function CheckFor(s: PChar): Boolean;
  49. function GetString(ValidChars: TSetOfChar): String;
  50. function GetName(var s: String): Boolean;
  51. function ExpectName: String; // [5]
  52. procedure ExpectAttValue(attr: TDOMAttr); // [10]
  53. function ExpectPubidLiteral: String; // [12]
  54. function ParseComment(AOwner: TDOMNode): Boolean; // [15]
  55. function ParsePI: Boolean; // [16]
  56. procedure ExpectProlog; // [22]
  57. function ParseEq: Boolean; // [25]
  58. procedure ExpectEq;
  59. procedure ParseMisc(AOwner: TDOMNode); // [27]
  60. function ParseMarkupDecl: Boolean; // [29]
  61. function ParseElement(AOwner: TDOMNode): Boolean; // [39]
  62. procedure ExpectElement(AOwner: TDOMNode);
  63. function ParseReference(AOwner: TDOMNode): Boolean; // [67]
  64. procedure ExpectReference(AOwner: TDOMNode);
  65. function ParsePEReference: Boolean; // [69]
  66. function ParseExternalID: Boolean; // [75]
  67. procedure ExpectExternalID;
  68. function ParseEncodingDecl: String; // [80]
  69. public
  70. doc: TXMLDocument;
  71. procedure ProcessXML(ABuf: PChar; AFilename: String); // [1]
  72. procedure ProcessDTD(ABuf: PChar; AFilename: String); // ([29])
  73. end;
  74. procedure TXMLReader.RaiseExc(descr: String);
  75. var
  76. apos: PChar;
  77. x, y: Integer;
  78. begin
  79. // find out the line in which the error occured
  80. apos := BufStart;
  81. x := 1;
  82. y := 1;
  83. while apos < buf do begin
  84. if apos[0] = #10 then begin
  85. Inc(y);
  86. x := 1;
  87. end else
  88. Inc(x);
  89. Inc(apos);
  90. end;
  91. raise EXMLReadError.Create('In ' + Filename + ' (line ' + IntToStr(y) + ' pos ' +
  92. IntToStr(x) + '): ' + descr);
  93. end;
  94. function TXMLReader.SkipWhitespace: Boolean;
  95. begin
  96. Result := False;
  97. while buf[0] in [#9, #10, #13, ' '] do begin
  98. Inc(buf);
  99. Result := True;
  100. end;
  101. end;
  102. procedure TXMLReader.ExpectWhitespace;
  103. begin
  104. if not SkipWhitespace then
  105. RaiseExc('Expected whitespace');
  106. end;
  107. procedure TXMLReader.ExpectString(s: String);
  108. var
  109. i: Integer;
  110. s2: PChar;
  111. s3: String;
  112. begin
  113. for i := 1 to Length(s) do
  114. if buf[i - 1] <> s[i] then begin
  115. GetMem(s2, Length(s) + 1);
  116. StrLCopy(s2, buf, Length(s));
  117. s3 := StrPas(s2);
  118. FreeMem(s2, Length(s) + 1);
  119. RaiseExc('Expected "' + s + '", found "' + s3 + '"');
  120. end;
  121. Inc(buf, Length(s));
  122. end;
  123. function TXMLReader.CheckFor(s: PChar): Boolean;
  124. begin
  125. if buf[0] = #0 then begin
  126. Result := False;
  127. exit;
  128. end;
  129. if StrLComp(buf, s, StrLen(s)) = 0 then begin
  130. Inc(buf, StrLen(s));
  131. Result := True;
  132. end else
  133. Result := False;
  134. end;
  135. function TXMLReader.GetString(ValidChars: TSetOfChar): String;
  136. begin
  137. Result := '';
  138. while buf[0] in ValidChars do begin
  139. Result := Result + buf[0];
  140. Inc(buf);
  141. end;
  142. end;
  143. procedure TXMLReader.ProcessXML(ABuf: PChar; AFilename: String); // [1]
  144. var
  145. LastNodeBeforeDoc: TDOMNode;
  146. begin
  147. buf := ABuf;
  148. BufStart := ABuf;
  149. Filename := AFilename;
  150. doc := TXMLDocument.Create;
  151. ExpectProlog;
  152. LastNodeBeforeDoc := doc.LastChild;
  153. ExpectElement(doc);
  154. ParseMisc(doc);
  155. if Assigned(LastNodeBeforeDoc) then
  156. doc.SetDocumentElement(LastNodeBeforeDoc.NextSibling as TDOMElement)
  157. else
  158. doc.SetDocumentElement(doc.FirstChild as TDOMElement);
  159. if buf[0] <> #0 then
  160. RaiseExc('Text after end of document element found');
  161. {
  162. if buf[0] <> #0 then begin
  163. WriteLn('=== Unparsed: ===');
  164. //WriteLn(buf);
  165. WriteLn(StrLen(buf), ' chars');
  166. end;
  167. }
  168. end;
  169. function TXMLReader.GetName(var s: String): Boolean; // [5]
  170. begin
  171. s := '';
  172. if not (buf[0] in (Letter + ['_', ':'])) then begin
  173. Result := False;
  174. exit;
  175. end;
  176. s := buf[0];
  177. Inc(buf);
  178. s := s + GetString(Letter + ['0'..'9', '.', '-', '_', ':']);
  179. Result := True;
  180. end;
  181. function TXMLReader.ExpectName: String; // [5]
  182. begin
  183. if not (buf[0] in (Letter + ['_', ':'])) then
  184. RaiseExc('Expected letter, "_" or ":" for name, found "' + buf[0] + '"');
  185. Result := buf[0];
  186. Inc(buf);
  187. Result := Result + GetString(Letter + ['0'..'9', '.', '-', '_', ':']);
  188. end;
  189. procedure TXMLReader.ExpectAttValue(attr: TDOMAttr); // [10]
  190. var
  191. strdel: array[0..1] of Char;
  192. s: String;
  193. begin
  194. if (buf[0] <> '''') and (buf[0] <> '"') then
  195. RaiseExc('Expected quotation marks');
  196. strdel[0] := buf[0];
  197. strdel[1] := #0;
  198. Inc(buf);
  199. s := '';
  200. while not CheckFor(strdel) do
  201. if not ParseReference(attr) then begin
  202. s := s + buf[0];
  203. Inc(buf);
  204. end else begin
  205. if s <> '' then begin
  206. attr.AppendChild(doc.CreateTextNode(s));
  207. s := '';
  208. end;
  209. end;
  210. if s <> '' then
  211. attr.AppendChild(doc.CreateTextNode(s));
  212. end;
  213. function TXMLReader.ExpectPubidLiteral: String;
  214. begin
  215. Result := '';
  216. if CheckFor('''') then begin
  217. GetString(PubidChars - ['''']);
  218. ExpectString('''');
  219. end else if CheckFor('"') then begin
  220. GetString(PubidChars - ['"']);
  221. ExpectString('"');
  222. end else
  223. RaiseExc('Expected quotation marks');
  224. end;
  225. function TXMLReader.ParseComment(AOwner: TDOMNode): Boolean; // [15]
  226. var
  227. comment: String;
  228. begin
  229. if CheckFor('<!--') then begin
  230. comment := '';
  231. while (buf[0] <> #0) and (buf[1] <> #0) and
  232. ((buf[0] <> '-') or (buf[1] <> '-')) do begin
  233. comment := comment + buf[0];
  234. Inc(buf);
  235. end;
  236. AOwner.AppendChild(doc.CreateComment(comment));
  237. ExpectString('-->');
  238. Result := True;
  239. end else
  240. Result := False;
  241. end;
  242. function TXMLReader.ParsePI: Boolean; // [16]
  243. var
  244. checkbuf: array[0..3] of char;
  245. begin
  246. if CheckFor('<?') then begin
  247. StrLCopy(checkbuf, buf, 3);
  248. if UpCase(StrPas(checkbuf)) = 'XML' then
  249. RaiseExc('"<?xml" processing instruction not allowed here');
  250. ExpectName;
  251. if SkipWhitespace then
  252. while (buf[0] <> #0) and (buf[1] <> #0) and not
  253. ((buf[0] = '?') and (buf[1] = '>')) do Inc(buf);
  254. ExpectString('?>');
  255. Result := True;
  256. end else
  257. Result := False;
  258. end;
  259. procedure TXMLReader.ExpectProlog; // [22]
  260. procedure ParseVersionNum;
  261. begin
  262. doc.XMLVersion :=
  263. GetString(['a'..'z', 'A'..'Z', '0'..'9', '_', '.', ':', '-']);
  264. end;
  265. begin
  266. if CheckFor('<?xml') then begin
  267. // '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  268. // VersionInfo: S 'version' Eq (' VersionNum ' | " VersionNum ")
  269. SkipWhitespace;
  270. ExpectString('version');
  271. ParseEq;
  272. if buf[0] = '''' then begin
  273. Inc(buf);
  274. ParseVersionNum;
  275. ExpectString('''');
  276. end else if buf[0] = '"' then begin
  277. Inc(buf);
  278. ParseVersionNum;
  279. ExpectString('"');
  280. end else
  281. RaiseExc('Expected single or double quotation mark');
  282. // EncodingDecl?
  283. ParseEncodingDecl;
  284. // SDDecl?
  285. SkipWhitespace;
  286. if CheckFor('standalone') then begin
  287. ExpectEq;
  288. if buf[0] = '''' then begin
  289. Inc(buf);
  290. if not (CheckFor('yes''') or CheckFor('no''')) then
  291. RaiseExc('Expected ''yes'' or ''no''');
  292. end else if buf[0] = '''' then begin
  293. Inc(buf);
  294. if not (CheckFor('yes"') or CheckFor('no"')) then
  295. RaiseExc('Expected "yes" or "no"');
  296. end;
  297. SkipWhitespace;
  298. end;
  299. ExpectString('?>');
  300. end;
  301. // Check for "Misc*"
  302. ParseMisc(doc);
  303. // Check for "(doctypedecl Misc*)?" [28]
  304. if CheckFor('<!DOCTYPE') then begin
  305. SkipWhitespace;
  306. ExpectName;
  307. SkipWhitespace;
  308. ParseExternalID;
  309. SkipWhitespace;
  310. if CheckFor('[') then begin
  311. repeat
  312. SkipWhitespace;
  313. until not (ParseMarkupDecl or ParsePEReference);
  314. ExpectString(']');
  315. SkipWhitespace;
  316. end;
  317. ExpectString('>');
  318. ParseMisc(doc);
  319. end;
  320. end;
  321. function TXMLReader.ParseEq: Boolean; // [25]
  322. var
  323. savedbuf: PChar;
  324. begin
  325. savedbuf := buf;
  326. SkipWhitespace;
  327. if buf[0] = '=' then begin
  328. Inc(buf);
  329. SkipWhitespace;
  330. Result := True;
  331. end else begin
  332. buf := savedbuf;
  333. Result := False;
  334. end;
  335. end;
  336. procedure TXMLReader.ExpectEq;
  337. begin
  338. if not ParseEq then
  339. RaiseExc('Expected "="');
  340. end;
  341. // Parse "Misc*":
  342. // Misc ::= Comment | PI | S
  343. procedure TXMLReader.ParseMisc(AOwner: TDOMNode); // [27]
  344. begin
  345. repeat
  346. SkipWhitespace;
  347. until not (ParseComment(AOwner) or ParsePI);
  348. end;
  349. function TXMLReader.ParseMarkupDecl: Boolean; // [29]
  350. function ParseElementDecl: Boolean; // [45]
  351. procedure ExpectChoiceOrSeq; // [49], [50]
  352. procedure ExpectCP; // [48]
  353. begin
  354. if CheckFor('(') then
  355. ExpectChoiceOrSeq
  356. else
  357. ExpectName;
  358. if CheckFor('?') then
  359. else if CheckFor('*') then
  360. else if CheckFor('+') then;
  361. end;
  362. var
  363. delimiter: Char;
  364. begin
  365. SkipWhitespace;
  366. ExpectCP;
  367. SkipWhitespace;
  368. delimiter := #0;
  369. while not CheckFor(')') do begin
  370. if delimiter = #0 then begin
  371. if (buf[0] = '|') or (buf[0] = ',') then
  372. delimiter := buf[0]
  373. else
  374. RaiseExc('Expected "|" or ","');
  375. Inc(buf);
  376. end else
  377. ExpectString(delimiter);
  378. SkipWhitespace;
  379. ExpectCP;
  380. end;
  381. end;
  382. begin
  383. if CheckFor('<!ELEMENT') then begin
  384. ExpectWhitespace;
  385. ExpectName;
  386. ExpectWhitespace;
  387. // Get contentspec [46]
  388. if CheckFor('EMPTY') then
  389. else if CheckFor('ANY') then
  390. else if CheckFor('(') then begin
  391. SkipWhitespace;
  392. if CheckFor('#PCDATA') then begin
  393. // Parse Mixed section [51]
  394. SkipWhitespace;
  395. if not CheckFor(')') then
  396. repeat
  397. ExpectString('|');
  398. SkipWhitespace;
  399. ExpectName;
  400. until CheckFor(')*');
  401. end else begin
  402. // Parse Children section [47]
  403. ExpectChoiceOrSeq;
  404. if CheckFor('?') then
  405. else if CheckFor('*') then
  406. else if CheckFor('+') then;
  407. end;
  408. end else
  409. RaiseExc('Invalid content specification');
  410. SkipWhitespace;
  411. ExpectString('>');
  412. Result := True;
  413. end else
  414. Result := False;
  415. end;
  416. function ParseAttlistDecl: Boolean; // [52]
  417. var
  418. attr: TDOMAttr;
  419. begin
  420. if CheckFor('<!ATTLIST') then begin
  421. ExpectWhitespace;
  422. ExpectName;
  423. SkipWhitespace;
  424. while not CheckFor('>') do begin
  425. ExpectName;
  426. ExpectWhitespace;
  427. // Get AttType [54], [55], [56]
  428. if CheckFor('CDATA') then
  429. else if CheckFor('ID') then
  430. else if CheckFor('IDREF') then
  431. else if CheckFor('IDREFS') then
  432. else if CheckFor('ENTITTY') then
  433. else if CheckFor('ENTITIES') then
  434. else if CheckFor('NMTOKEN') then
  435. else if CheckFor('NMTOKENS') then
  436. else if CheckFor('NOTATION') then begin // [57], [58]
  437. ExpectWhitespace;
  438. ExpectString('(');
  439. SkipWhitespace;
  440. ExpectName;
  441. SkipWhitespace;
  442. while not CheckFor(')') do begin
  443. ExpectString('|');
  444. SkipWhitespace;
  445. ExpectName;
  446. SkipWhitespace;
  447. end;
  448. end else if CheckFor('(') then begin // [59]
  449. SkipWhitespace;
  450. GetString(Nmtoken);
  451. SkipWhitespace;
  452. while not CheckFor(')') do begin
  453. ExpectString('|');
  454. SkipWhitespace;
  455. GetString(Nmtoken);
  456. SkipWhitespace;
  457. end;
  458. end else
  459. RaiseExc('Invalid tokenized type');
  460. ExpectWhitespace;
  461. // Get DefaultDecl [60]
  462. if CheckFor('#REQUIRED') then
  463. else if CheckFor('#IMPLIED') then
  464. else begin
  465. if CheckFor('#FIXED') then
  466. SkipWhitespace;
  467. attr := doc.CreateAttribute('');
  468. ExpectAttValue(attr);
  469. end;
  470. SkipWhitespace;
  471. end;
  472. Result := True;
  473. end else
  474. Result := False;
  475. end;
  476. function ParseEntityDecl: Boolean; // [70]
  477. var
  478. NewEntity: TDOMEntity;
  479. function ParseEntityValue: Boolean; // [9]
  480. var
  481. strdel: array[0..1] of Char;
  482. begin
  483. if (buf[0] <> '''') and (buf[0] <> '"') then begin
  484. Result := False;
  485. exit;
  486. end;
  487. strdel[0] := buf[0];
  488. strdel[1] := #0;
  489. Inc(buf);
  490. while not CheckFor(strdel) do
  491. if ParsePEReference then
  492. else if ParseReference(NewEntity) then
  493. else begin
  494. Inc(buf); // Normal haracter
  495. end;
  496. Result := True;
  497. end;
  498. begin
  499. if CheckFor('<!ENTITY') then begin
  500. ExpectWhitespace;
  501. if CheckFor('%') then begin // [72]
  502. ExpectWhitespace;
  503. NewEntity := doc.CreateEntity(ExpectName);
  504. ExpectWhitespace;
  505. // Get PEDef [74]
  506. if ParseEntityValue then
  507. else if ParseExternalID then
  508. else
  509. RaiseExc('Expected entity value or external ID');
  510. end else begin // [71]
  511. NewEntity := doc.CreateEntity(ExpectName);
  512. ExpectWhitespace;
  513. // Get EntityDef [73]
  514. if ParseEntityValue then
  515. else begin
  516. ExpectExternalID;
  517. // Get NDataDecl [76]
  518. ExpectWhitespace;
  519. ExpectString('NDATA');
  520. ExpectWhitespace;
  521. ExpectName;
  522. end;
  523. end;
  524. SkipWhitespace;
  525. ExpectString('>');
  526. Result := True;
  527. end else
  528. Result := False;
  529. end;
  530. function ParseNotationDecl: Boolean; // [82]
  531. begin
  532. if CheckFor('<!NOTATION') then begin
  533. ExpectWhitespace;
  534. ExpectName;
  535. ExpectWhitespace;
  536. if ParseExternalID then
  537. else if CheckFor('PUBLIC') then begin // [83]
  538. ExpectWhitespace;
  539. ExpectPubidLiteral;
  540. end else
  541. RaiseExc('Expected external or public ID');
  542. SkipWhitespace;
  543. ExpectString('>');
  544. Result := True;
  545. end else
  546. Result := False;
  547. end;
  548. begin
  549. Result := False;
  550. while ParseElementDecl or ParseAttlistDecl or ParseEntityDecl or
  551. ParseNotationDecl or ParsePI or ParseComment(doc) or SkipWhitespace do
  552. Result := True;
  553. end;
  554. procedure TXMLReader.ProcessDTD(ABuf: PChar; AFilename: String);
  555. begin
  556. buf := ABuf;
  557. BufStart := ABuf;
  558. Filename := AFilename;
  559. doc := TXMLDocument.Create;
  560. ParseMarkupDecl;
  561. {
  562. if buf[0] <> #0 then begin
  563. WriteLn('=== Unparsed: ===');
  564. //WriteLn(buf);
  565. WriteLn(StrLen(buf), ' chars');
  566. end;
  567. }
  568. end;
  569. function TXMLReader.ParseElement(AOwner: TDOMNode): Boolean; // [39] [40] [44]
  570. var
  571. NewElem: TDOMElement;
  572. function ParseCharData: Boolean; // [14]
  573. var
  574. s: String;
  575. i: Integer;
  576. begin
  577. s := '';
  578. while not (buf[0] in [#0, '<', '&']) do begin
  579. s := s + buf[0];
  580. Inc(buf);
  581. end;
  582. if s <> '' then begin
  583. // Strip whitespace from end of s
  584. i := Length(s);
  585. while (i > 0) and (s[i] in [#10, #13, ' ']) do Dec(i);
  586. NewElem.AppendChild(doc.CreateTextNode(Copy(s, 1, i)));
  587. Result := True;
  588. end else
  589. Result := False;
  590. end;
  591. function ParseCDSect: Boolean; // [18]
  592. var
  593. cdata: String;
  594. begin
  595. if CheckFor('<![CDATA[') then begin
  596. cdata := '';
  597. while not CheckFor(']]>') do begin
  598. cdata := cdata + buf[0];
  599. Inc(buf);
  600. end;
  601. NewElem.AppendChild(doc.CreateCDATASection(cdata));
  602. Result := True;
  603. end else
  604. Result := False;
  605. end;
  606. var
  607. IsEmpty: Boolean;
  608. name: String;
  609. oldpos: PChar;
  610. attr: TDOMAttr;
  611. begin
  612. oldpos := buf;
  613. if CheckFor('<') then begin
  614. if not GetName(name) then begin
  615. buf := oldpos;
  616. Result := False;
  617. exit;
  618. end;
  619. NewElem := doc.CreateElement(name);
  620. AOwner.AppendChild(NewElem);
  621. SkipWhitespace;
  622. IsEmpty := False;
  623. while True do begin
  624. if CheckFor('/>') then begin
  625. IsEmpty := True;
  626. break;
  627. end;
  628. if CheckFor('>') then break;
  629. // Get Attribute [41]
  630. attr := doc.CreateAttribute(ExpectName);
  631. NewElem.Attributes.SetNamedItem(attr);
  632. ExpectEq;
  633. ExpectAttValue(attr);
  634. SkipWhitespace;
  635. end;
  636. if not IsEmpty then begin
  637. // Get content
  638. while SkipWhitespace or ParseCharData or ParseCDSect or ParsePI or
  639. ParseComment(NewElem) or ParseElement(NewElem) or
  640. ParseReference(NewElem) do;
  641. // Get ETag [42]
  642. ExpectString('</');
  643. ExpectName;
  644. SkipWhitespace;
  645. ExpectString('>');
  646. end;
  647. Result := True;
  648. end else
  649. Result := False;
  650. end;
  651. procedure TXMLReader.ExpectElement(AOwner: TDOMNode);
  652. begin
  653. if not ParseElement(AOwner) then
  654. RaiseExc('Expected element');
  655. end;
  656. function TXMLReader.ParsePEReference: Boolean; // [69]
  657. begin
  658. if CheckFor('%') then begin
  659. ExpectName;
  660. ExpectString(';');
  661. Result := True;
  662. end else
  663. Result := False;
  664. end;
  665. function TXMLReader.ParseReference(AOwner: TDOMNode): Boolean; // [67] [68]
  666. begin
  667. if not CheckFor('&') then begin
  668. Result := False;
  669. exit;
  670. end;
  671. if CheckFor('#') then begin // Test for CharRef [66]
  672. if CheckFor('x') then begin
  673. // *** there must be at leat one digit
  674. while buf[0] in ['0'..'9', 'a'..'f', 'A'..'F'] do Inc(buf);
  675. end else
  676. // *** there must be at leat one digit
  677. while buf[0] in ['0'..'9'] do Inc(buf);
  678. end else
  679. AOwner.AppendChild(doc.CreateEntityReference(ExpectName));
  680. ExpectString(';');
  681. Result := True;
  682. end;
  683. procedure TXMLReader.ExpectReference(AOwner: TDOMNode);
  684. begin
  685. if not ParseReference(AOwner) then
  686. RaiseExc('Expected reference ("&Name;" or "%Name;")');
  687. end;
  688. function TXMLReader.ParseExternalID: Boolean; // [75]
  689. function GetSystemLiteral: String;
  690. begin
  691. if buf[0] = '''' then begin
  692. Inc(buf);
  693. Result := '';
  694. while (buf[0] <> '''') and (buf[0] <> #0) do begin
  695. Result := Result + buf[0];
  696. Inc(buf);
  697. end;
  698. ExpectString('''');
  699. end else if buf[0] = '"' then begin
  700. Inc(buf);
  701. Result := '';
  702. while (buf[0] <> '"') and (buf[0] <> #0) do begin
  703. Result := Result + buf[0];
  704. Inc(buf);
  705. end;
  706. ExpectString('"');
  707. end;
  708. end;
  709. begin
  710. if CheckFor('SYSTEM') then begin
  711. ExpectWhitespace;
  712. GetSystemLiteral;
  713. Result := True;
  714. end else if CheckFor('PUBLIC') then begin
  715. ExpectWhitespace;
  716. ExpectPubidLiteral;
  717. ExpectWhitespace;
  718. GetSystemLiteral;
  719. Result := True;
  720. end else
  721. Result := False;
  722. end;
  723. procedure TXMLReader.ExpectExternalID;
  724. begin
  725. if not ParseExternalID then
  726. RaiseExc('Expected external ID');
  727. end;
  728. function TXMLReader.ParseEncodingDecl: String; // [80]
  729. function ParseEncName: String;
  730. begin
  731. if not (buf[0] in ['A'..'Z', 'a'..'z']) then
  732. RaiseExc('Expected character (A-Z, a-z)');
  733. Result := buf[0];
  734. Inc(buf);
  735. Result := Result + GetString(['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']);
  736. end;
  737. begin
  738. Result := '';
  739. SkipWhitespace;
  740. if CheckFor('encoding') then begin
  741. ExpectEq;
  742. if buf[0] = '''' then begin
  743. Inc(buf);
  744. Result := ParseEncName;
  745. ExpectString('''');
  746. end else if buf[0] = '"' then begin
  747. Inc(buf);
  748. Result := ParseEncName;
  749. ExpectString('"');
  750. end;
  751. end;
  752. end;
  753. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: File);
  754. var
  755. reader: TXMLReader;
  756. buf: PChar;
  757. BufSize: LongInt;
  758. begin
  759. ADoc := nil;
  760. BufSize := FileSize(f) + 1;
  761. if BufSize <= 1 then exit;
  762. GetMem(buf, BufSize);
  763. BlockRead(f, buf^, BufSize - 1);
  764. buf[BufSize - 1] := #0;
  765. reader := TXMLReader.Create;
  766. reader.ProcessXML(buf, Filerec(f).name);
  767. FreeMem(buf, BufSize);
  768. ADoc := reader.doc;
  769. reader.Free;
  770. end;
  771. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream;
  772. const AFilename: String);
  773. var
  774. reader: TXMLReader;
  775. buf: PChar;
  776. begin
  777. ADoc := nil;
  778. if f.Size = 0 then exit;
  779. GetMem(buf, f.Size + 1);
  780. f.Read(buf^, f.Size);
  781. buf[f.Size] := #0;
  782. reader := TXMLReader.Create;
  783. reader.ProcessXML(buf, AFilename);
  784. FreeMem(buf, f.Size + 1);
  785. ADoc := reader.doc;
  786. reader.Free;
  787. end;
  788. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream);
  789. begin
  790. ReadXMLFile(ADoc, f, '<Stream>');
  791. end;
  792. procedure ReadXMLFile(var ADoc: TXMLDocument; const AFilename: String);
  793. var
  794. stream: TFileStream;
  795. begin
  796. ADoc := nil;
  797. stream := TFileStream.Create(AFilename, fmOpenRead);
  798. try
  799. ReadXMLFile(ADoc, stream, AFilename);
  800. finally
  801. stream.Free;
  802. end;
  803. end;
  804. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: File);
  805. var
  806. reader: TXMLReader;
  807. buf: PChar;
  808. BufSize: LongInt;
  809. begin
  810. ADoc := nil;
  811. BufSize := FileSize(f) + 1;
  812. if BufSize <= 1 then exit;
  813. GetMem(buf, BufSize + 1);
  814. BlockRead(f, buf^, BufSize - 1);
  815. buf[BufSize - 1] := #0;
  816. reader := TXMLReader.Create;
  817. reader.ProcessDTD(buf, Filerec(f).name);
  818. FreeMem(buf, BufSize);
  819. ADoc := reader.doc;
  820. reader.Free;
  821. end;
  822. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream;
  823. const AFilename: String);
  824. var
  825. reader: TXMLReader;
  826. buf: PChar;
  827. begin
  828. ADoc := nil;
  829. if f.Size = 0 then exit;
  830. GetMem(buf, f.Size + 1);
  831. f.Read(buf^, f.Size);
  832. buf[f.Size] := #0;
  833. reader := TXMLReader.Create;
  834. reader.ProcessDTD(buf, AFilename);
  835. FreeMem(buf, f.Size + 1);
  836. ADoc := reader.doc;
  837. reader.Free;
  838. end;
  839. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream);
  840. begin
  841. ReadDTDFile(ADoc, f, '<Stream>');
  842. end;
  843. procedure ReadDTDFile(var ADoc: TXMLDocument; const AFilename: String);
  844. var
  845. stream: TFileStream;
  846. begin
  847. ADoc := nil;
  848. stream := TFileStream.Create(AFilename, fmOpenRead);
  849. try
  850. ReadDTDFile(ADoc, stream, AFilename);
  851. finally
  852. stream.Free;
  853. end;
  854. end;
  855. end.
  856. {
  857. $Log$
  858. Revision 1.10 1999-12-22 13:39:55 sg
  859. * Fixed parser bug: SetDocumentElement failed if the XML document contains
  860. only a single element at the top hierarchy level
  861. * Changed the error message if there is text after the end of the main
  862. XML element
  863. Revision 1.9 1999/12/05 22:02:11 sg
  864. * The reader now sets the DocumentElement for a DOM document
  865. * The XML parser raises an exception if there is additional data after
  866. the end of the XML document element
  867. Revision 1.8 1999/08/10 15:39:59 michael
  868. * restored previous setting
  869. Revision 1.6 1999/07/27 13:01:59 peter
  870. * remove filerec.inc, it was missing from sysutils! You shouldn't need
  871. to compile with -Irtl/inc !!
  872. Revision 1.5 1999/07/25 16:24:14 michael
  873. + Fixes from Sebastiam Guenther - more error-proof
  874. Revision 1.4 1999/07/11 20:20:12 michael
  875. + Fixes from Sebastian Guenther
  876. Revision 1.3 1999/07/09 21:05:51 michael
  877. + fixes from Guenther Sebastian
  878. Revision 1.2 1999/07/09 10:42:50 michael
  879. * Removed debug statements
  880. Revision 1.1 1999/07/09 08:35:09 michael
  881. + Initial implementation by Sebastian Guenther
  882. }
  883. --------------ECFEA19D0E6E5FF5CDAF6681--)