xmlread.pp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035
  1. {
  2. $Id$
  3. This file is part of the Free Component Library
  4. Copyright (c) 1999-2000 by Sebastian Guenther
  5. XML reading routines.
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$MODE objfpc}
  13. {$H+}
  14. unit xmlread;
  15. interface
  16. uses sysutils, classes, DOM;
  17. type
  18. EXMLReadError = class(Exception);
  19. procedure ReadXMLFile(var ADoc: TXMLDocument; const AFilename: String);
  20. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: File);
  21. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream);
  22. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream;
  23. const AFilename: String);
  24. procedure ReadDTDFile(var ADoc: TXMLDocument; const AFilename: String);
  25. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: File);
  26. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream);
  27. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream;
  28. const AFilename: String);
  29. // =======================================================
  30. implementation
  31. const
  32. Letter = ['A'..'Z', 'a'..'z'];
  33. Digit = ['0'..'9'];
  34. PubidChars: set of Char = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
  35. '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
  36. '#', '@', '$', '_', '%'];
  37. NmToken: set of Char = Letter + Digit + ['.', '-', '_', ':'];
  38. type
  39. TSetOfChar = set of Char;
  40. TXMLReader = class
  41. protected
  42. buf, BufStart: PChar;
  43. Filename: String;
  44. procedure RaiseExc(descr: String);
  45. function SkipWhitespace: Boolean;
  46. procedure ExpectWhitespace;
  47. procedure ExpectString(s: String);
  48. function CheckFor(s: PChar): Boolean;
  49. function GetString(ValidChars: TSetOfChar): String;
  50. function GetName(var s: String): Boolean;
  51. function ExpectName: String; // [5]
  52. procedure ExpectAttValue(attr: TDOMAttr); // [10]
  53. function ExpectPubidLiteral: String; // [12]
  54. function ParseComment(AOwner: TDOMNode): Boolean; // [15]
  55. function ParsePI: Boolean; // [16]
  56. procedure ExpectProlog; // [22]
  57. function ParseEq: Boolean; // [25]
  58. procedure ExpectEq;
  59. procedure ParseMisc(AOwner: TDOMNode); // [27]
  60. function ParseMarkupDecl: Boolean; // [29]
  61. function ParseElement(AOwner: TDOMNode): Boolean; // [39]
  62. procedure ExpectElement(AOwner: TDOMNode);
  63. function ParseReference(AOwner: TDOMNode): Boolean; // [67]
  64. procedure ExpectReference(AOwner: TDOMNode);
  65. function ParsePEReference: Boolean; // [69]
  66. function ParseExternalID: Boolean; // [75]
  67. procedure ExpectExternalID;
  68. function ParseEncodingDecl: String; // [80]
  69. public
  70. doc: TXMLDocument;
  71. procedure ProcessXML(ABuf: PChar; AFilename: String); // [1]
  72. procedure ProcessDTD(ABuf: PChar; AFilename: String); // ([29])
  73. end;
  74. procedure TXMLReader.RaiseExc(descr: String);
  75. var
  76. apos: PChar;
  77. x, y: Integer;
  78. begin
  79. // find out the line in which the error occured
  80. apos := BufStart;
  81. x := 1;
  82. y := 1;
  83. while apos < buf do begin
  84. if apos[0] = #10 then begin
  85. Inc(y);
  86. x := 1;
  87. end else
  88. Inc(x);
  89. Inc(apos);
  90. end;
  91. raise EXMLReadError.Create('In ' + Filename + ' (line ' + IntToStr(y) + ' pos ' +
  92. IntToStr(x) + '): ' + descr);
  93. end;
  94. function TXMLReader.SkipWhitespace: Boolean;
  95. begin
  96. Result := False;
  97. while buf[0] in [#9, #10, #13, ' '] do begin
  98. Inc(buf);
  99. Result := True;
  100. end;
  101. end;
  102. procedure TXMLReader.ExpectWhitespace;
  103. begin
  104. if not SkipWhitespace then
  105. RaiseExc('Expected whitespace');
  106. end;
  107. procedure TXMLReader.ExpectString(s: String);
  108. var
  109. i: Integer;
  110. s2: PChar;
  111. s3: String;
  112. begin
  113. for i := 1 to Length(s) do
  114. if buf[i - 1] <> s[i] then begin
  115. GetMem(s2, Length(s) + 1);
  116. StrLCopy(s2, buf, Length(s));
  117. s3 := StrPas(s2);
  118. FreeMem(s2, Length(s) + 1);
  119. RaiseExc('Expected "' + s + '", found "' + s3 + '"');
  120. end;
  121. Inc(buf, Length(s));
  122. end;
  123. function TXMLReader.CheckFor(s: PChar): Boolean;
  124. begin
  125. if buf[0] = #0 then begin
  126. Result := False;
  127. exit;
  128. end;
  129. if StrLComp(buf, s, StrLen(s)) = 0 then begin
  130. Inc(buf, StrLen(s));
  131. Result := True;
  132. end else
  133. Result := False;
  134. end;
  135. function TXMLReader.GetString(ValidChars: TSetOfChar): String;
  136. begin
  137. SetLength(Result, 0);
  138. while buf[0] in ValidChars do begin
  139. Result := Result + buf[0];
  140. Inc(buf);
  141. end;
  142. end;
  143. procedure TXMLReader.ProcessXML(ABuf: PChar; AFilename: String); // [1]
  144. var
  145. LastNodeBeforeDoc: TDOMNode;
  146. begin
  147. buf := ABuf;
  148. BufStart := ABuf;
  149. Filename := AFilename;
  150. doc := TXMLDocument.Create;
  151. ExpectProlog;
  152. LastNodeBeforeDoc := doc.LastChild;
  153. ExpectElement(doc);
  154. ParseMisc(doc);
  155. if Assigned(LastNodeBeforeDoc) then
  156. doc.SetDocumentElement(LastNodeBeforeDoc.NextSibling as TDOMElement)
  157. else
  158. doc.SetDocumentElement(doc.FirstChild as TDOMElement);
  159. if buf[0] <> #0 then
  160. RaiseExc('Text after end of document element found');
  161. {
  162. if buf[0] <> #0 then begin
  163. WriteLn('=== Unparsed: ===');
  164. //WriteLn(buf);
  165. WriteLn(StrLen(buf), ' chars');
  166. end;
  167. }
  168. end;
  169. function TXMLReader.GetName(var s: String): Boolean; // [5]
  170. begin
  171. SetLength(s, 0);
  172. if not (buf[0] in (Letter + ['_', ':'])) then begin
  173. Result := False;
  174. exit;
  175. end;
  176. s := buf[0];
  177. Inc(buf);
  178. s := s + GetString(Letter + ['0'..'9', '.', '-', '_', ':']);
  179. Result := True;
  180. end;
  181. function TXMLReader.ExpectName: String; // [5]
  182. begin
  183. if not (buf[0] in (Letter + ['_', ':'])) then
  184. RaiseExc('Expected letter, "_" or ":" for name, found "' + buf[0] + '"');
  185. Result := buf[0];
  186. Inc(buf);
  187. Result := Result + GetString(Letter + ['0'..'9', '.', '-', '_', ':']);
  188. end;
  189. procedure TXMLReader.ExpectAttValue(attr: TDOMAttr); // [10]
  190. var
  191. strdel: array[0..1] of Char;
  192. s: String;
  193. begin
  194. if (buf[0] <> '''') and (buf[0] <> '"') then
  195. RaiseExc('Expected quotation marks');
  196. strdel[0] := buf[0];
  197. strdel[1] := #0;
  198. Inc(buf);
  199. SetLength(s, 0);
  200. while not CheckFor(strdel) do
  201. if not ParseReference(attr) then begin
  202. s := s + buf[0];
  203. Inc(buf);
  204. end else begin
  205. if Length(s) > 0 then begin
  206. attr.AppendChild(doc.CreateTextNode(s));
  207. SetLength(s, 0);
  208. end;
  209. end;
  210. if Length(s) > 0 then
  211. //attr.AppendChild(doc.CreateTextNode(s));
  212. attr.NodeValue := s;
  213. end;
  214. function TXMLReader.ExpectPubidLiteral: String;
  215. begin
  216. SetLength(Result, 0);
  217. if CheckFor('''') then begin
  218. GetString(PubidChars - ['''']);
  219. ExpectString('''');
  220. end else if CheckFor('"') then begin
  221. GetString(PubidChars - ['"']);
  222. ExpectString('"');
  223. end else
  224. RaiseExc('Expected quotation marks');
  225. end;
  226. function TXMLReader.ParseComment(AOwner: TDOMNode): Boolean; // [15]
  227. var
  228. comment: String;
  229. begin
  230. if CheckFor('<!--') then begin
  231. SetLength(comment, 0);
  232. while (buf[0] <> #0) and (buf[1] <> #0) and
  233. ((buf[0] <> '-') or (buf[1] <> '-')) do begin
  234. comment := comment + buf[0];
  235. Inc(buf);
  236. end;
  237. AOwner.AppendChild(doc.CreateComment(comment));
  238. ExpectString('-->');
  239. Result := True;
  240. end else
  241. Result := False;
  242. end;
  243. function TXMLReader.ParsePI: Boolean; // [16]
  244. var
  245. checkbuf: array[0..3] of char;
  246. begin
  247. if CheckFor('<?') then begin
  248. StrLCopy(checkbuf, buf, 3);
  249. if UpCase(StrPas(checkbuf)) = 'XML' then
  250. RaiseExc('"<?xml" processing instruction not allowed here');
  251. ExpectName;
  252. if SkipWhitespace then
  253. while (buf[0] <> #0) and (buf[1] <> #0) and not
  254. ((buf[0] = '?') and (buf[1] = '>')) do Inc(buf);
  255. ExpectString('?>');
  256. Result := True;
  257. end else
  258. Result := False;
  259. end;
  260. procedure TXMLReader.ExpectProlog; // [22]
  261. procedure ParseVersionNum;
  262. begin
  263. doc.XMLVersion :=
  264. GetString(['a'..'z', 'A'..'Z', '0'..'9', '_', '.', ':', '-']);
  265. end;
  266. begin
  267. if CheckFor('<?xml') then begin
  268. // '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  269. // VersionInfo: S 'version' Eq (' VersionNum ' | " VersionNum ")
  270. SkipWhitespace;
  271. ExpectString('version');
  272. ParseEq;
  273. if buf[0] = '''' then begin
  274. Inc(buf);
  275. ParseVersionNum;
  276. ExpectString('''');
  277. end else if buf[0] = '"' then begin
  278. Inc(buf);
  279. ParseVersionNum;
  280. ExpectString('"');
  281. end else
  282. RaiseExc('Expected single or double quotation mark');
  283. // EncodingDecl?
  284. ParseEncodingDecl;
  285. // SDDecl?
  286. SkipWhitespace;
  287. if CheckFor('standalone') then begin
  288. ExpectEq;
  289. if buf[0] = '''' then begin
  290. Inc(buf);
  291. if not (CheckFor('yes''') or CheckFor('no''')) then
  292. RaiseExc('Expected ''yes'' or ''no''');
  293. end else if buf[0] = '''' then begin
  294. Inc(buf);
  295. if not (CheckFor('yes"') or CheckFor('no"')) then
  296. RaiseExc('Expected "yes" or "no"');
  297. end;
  298. SkipWhitespace;
  299. end;
  300. ExpectString('?>');
  301. end;
  302. // Check for "Misc*"
  303. ParseMisc(doc);
  304. // Check for "(doctypedecl Misc*)?" [28]
  305. if CheckFor('<!DOCTYPE') then begin
  306. SkipWhitespace;
  307. ExpectName;
  308. SkipWhitespace;
  309. ParseExternalID;
  310. SkipWhitespace;
  311. if CheckFor('[') then begin
  312. repeat
  313. SkipWhitespace;
  314. until not (ParseMarkupDecl or ParsePEReference);
  315. ExpectString(']');
  316. SkipWhitespace;
  317. end;
  318. ExpectString('>');
  319. ParseMisc(doc);
  320. end;
  321. end;
  322. function TXMLReader.ParseEq: Boolean; // [25]
  323. var
  324. savedbuf: PChar;
  325. begin
  326. savedbuf := buf;
  327. SkipWhitespace;
  328. if buf[0] = '=' then begin
  329. Inc(buf);
  330. SkipWhitespace;
  331. Result := True;
  332. end else begin
  333. buf := savedbuf;
  334. Result := False;
  335. end;
  336. end;
  337. procedure TXMLReader.ExpectEq;
  338. begin
  339. if not ParseEq then
  340. RaiseExc('Expected "="');
  341. end;
  342. // Parse "Misc*":
  343. // Misc ::= Comment | PI | S
  344. procedure TXMLReader.ParseMisc(AOwner: TDOMNode); // [27]
  345. begin
  346. repeat
  347. SkipWhitespace;
  348. until not (ParseComment(AOwner) or ParsePI);
  349. end;
  350. function TXMLReader.ParseMarkupDecl: Boolean; // [29]
  351. function ParseElementDecl: Boolean; // [45]
  352. procedure ExpectChoiceOrSeq; // [49], [50]
  353. procedure ExpectCP; // [48]
  354. begin
  355. if CheckFor('(') then
  356. ExpectChoiceOrSeq
  357. else
  358. ExpectName;
  359. if CheckFor('?') then
  360. else if CheckFor('*') then
  361. else if CheckFor('+') then;
  362. end;
  363. var
  364. delimiter: Char;
  365. begin
  366. SkipWhitespace;
  367. ExpectCP;
  368. SkipWhitespace;
  369. delimiter := #0;
  370. while not CheckFor(')') do begin
  371. if delimiter = #0 then begin
  372. if (buf[0] = '|') or (buf[0] = ',') then
  373. delimiter := buf[0]
  374. else
  375. RaiseExc('Expected "|" or ","');
  376. Inc(buf);
  377. end else
  378. ExpectString(delimiter);
  379. SkipWhitespace;
  380. ExpectCP;
  381. end;
  382. end;
  383. begin
  384. if CheckFor('<!ELEMENT') then begin
  385. ExpectWhitespace;
  386. ExpectName;
  387. ExpectWhitespace;
  388. // Get contentspec [46]
  389. if CheckFor('EMPTY') then
  390. else if CheckFor('ANY') then
  391. else if CheckFor('(') then begin
  392. SkipWhitespace;
  393. if CheckFor('#PCDATA') then begin
  394. // Parse Mixed section [51]
  395. SkipWhitespace;
  396. if not CheckFor(')') then
  397. repeat
  398. ExpectString('|');
  399. SkipWhitespace;
  400. ExpectName;
  401. until CheckFor(')*');
  402. end else begin
  403. // Parse Children section [47]
  404. ExpectChoiceOrSeq;
  405. if CheckFor('?') then
  406. else if CheckFor('*') then
  407. else if CheckFor('+') then;
  408. end;
  409. end else
  410. RaiseExc('Invalid content specification');
  411. SkipWhitespace;
  412. ExpectString('>');
  413. Result := True;
  414. end else
  415. Result := False;
  416. end;
  417. function ParseAttlistDecl: Boolean; // [52]
  418. var
  419. attr: TDOMAttr;
  420. begin
  421. if CheckFor('<!ATTLIST') then begin
  422. ExpectWhitespace;
  423. ExpectName;
  424. SkipWhitespace;
  425. while not CheckFor('>') do begin
  426. ExpectName;
  427. ExpectWhitespace;
  428. // Get AttType [54], [55], [56]
  429. if CheckFor('CDATA') then
  430. else if CheckFor('ID') then
  431. else if CheckFor('IDREF') then
  432. else if CheckFor('IDREFS') then
  433. else if CheckFor('ENTITTY') then
  434. else if CheckFor('ENTITIES') then
  435. else if CheckFor('NMTOKEN') then
  436. else if CheckFor('NMTOKENS') then
  437. else if CheckFor('NOTATION') then begin // [57], [58]
  438. ExpectWhitespace;
  439. ExpectString('(');
  440. SkipWhitespace;
  441. ExpectName;
  442. SkipWhitespace;
  443. while not CheckFor(')') do begin
  444. ExpectString('|');
  445. SkipWhitespace;
  446. ExpectName;
  447. SkipWhitespace;
  448. end;
  449. end else if CheckFor('(') then begin // [59]
  450. SkipWhitespace;
  451. GetString(Nmtoken);
  452. SkipWhitespace;
  453. while not CheckFor(')') do begin
  454. ExpectString('|');
  455. SkipWhitespace;
  456. GetString(Nmtoken);
  457. SkipWhitespace;
  458. end;
  459. end else
  460. RaiseExc('Invalid tokenized type');
  461. ExpectWhitespace;
  462. // Get DefaultDecl [60]
  463. if CheckFor('#REQUIRED') then
  464. else if CheckFor('#IMPLIED') then
  465. else begin
  466. if CheckFor('#FIXED') then
  467. SkipWhitespace;
  468. attr := doc.CreateAttribute('');
  469. ExpectAttValue(attr);
  470. end;
  471. SkipWhitespace;
  472. end;
  473. Result := True;
  474. end else
  475. Result := False;
  476. end;
  477. function ParseEntityDecl: Boolean; // [70]
  478. var
  479. NewEntity: TDOMEntity;
  480. function ParseEntityValue: Boolean; // [9]
  481. var
  482. strdel: array[0..1] of Char;
  483. begin
  484. if (buf[0] <> '''') and (buf[0] <> '"') then begin
  485. Result := False;
  486. exit;
  487. end;
  488. strdel[0] := buf[0];
  489. strdel[1] := #0;
  490. Inc(buf);
  491. while not CheckFor(strdel) do
  492. if ParsePEReference then
  493. else if ParseReference(NewEntity) then
  494. else begin
  495. Inc(buf); // Normal haracter
  496. end;
  497. Result := True;
  498. end;
  499. begin
  500. if CheckFor('<!ENTITY') then begin
  501. ExpectWhitespace;
  502. if CheckFor('%') then begin // [72]
  503. ExpectWhitespace;
  504. NewEntity := doc.CreateEntity(ExpectName);
  505. ExpectWhitespace;
  506. // Get PEDef [74]
  507. if ParseEntityValue then
  508. else if ParseExternalID then
  509. else
  510. RaiseExc('Expected entity value or external ID');
  511. end else begin // [71]
  512. NewEntity := doc.CreateEntity(ExpectName);
  513. ExpectWhitespace;
  514. // Get EntityDef [73]
  515. if ParseEntityValue then
  516. else begin
  517. ExpectExternalID;
  518. // Get NDataDecl [76]
  519. ExpectWhitespace;
  520. ExpectString('NDATA');
  521. ExpectWhitespace;
  522. ExpectName;
  523. end;
  524. end;
  525. SkipWhitespace;
  526. ExpectString('>');
  527. Result := True;
  528. end else
  529. Result := False;
  530. end;
  531. function ParseNotationDecl: Boolean; // [82]
  532. begin
  533. if CheckFor('<!NOTATION') then begin
  534. ExpectWhitespace;
  535. ExpectName;
  536. ExpectWhitespace;
  537. if ParseExternalID then
  538. else if CheckFor('PUBLIC') then begin // [83]
  539. ExpectWhitespace;
  540. ExpectPubidLiteral;
  541. end else
  542. RaiseExc('Expected external or public ID');
  543. SkipWhitespace;
  544. ExpectString('>');
  545. Result := True;
  546. end else
  547. Result := False;
  548. end;
  549. begin
  550. Result := False;
  551. while ParseElementDecl or ParseAttlistDecl or ParseEntityDecl or
  552. ParseNotationDecl or ParsePI or ParseComment(doc) or SkipWhitespace do
  553. Result := True;
  554. end;
  555. procedure TXMLReader.ProcessDTD(ABuf: PChar; AFilename: String);
  556. begin
  557. buf := ABuf;
  558. BufStart := ABuf;
  559. Filename := AFilename;
  560. doc := TXMLDocument.Create;
  561. ParseMarkupDecl;
  562. {
  563. if buf[0] <> #0 then begin
  564. WriteLn('=== Unparsed: ===');
  565. //WriteLn(buf);
  566. WriteLn(StrLen(buf), ' chars');
  567. end;
  568. }
  569. end;
  570. function TXMLReader.ParseElement(AOwner: TDOMNode): Boolean; // [39] [40] [44]
  571. var
  572. NewElem: TDOMElement;
  573. function ParseCharData: Boolean; // [14]
  574. var
  575. s: String;
  576. i: Integer;
  577. begin
  578. SetLength(s, 0);
  579. while not (buf[0] in [#0, '<', '&']) do begin
  580. s := s + buf[0];
  581. Inc(buf);
  582. end;
  583. if Length(s) > 0 then begin
  584. // Strip whitespace from end of s
  585. i := Length(s);
  586. while (i > 0) and (s[i] in [#10, #13, ' ']) do Dec(i);
  587. NewElem.AppendChild(doc.CreateTextNode(Copy(s, 1, i)));
  588. Result := True;
  589. end else
  590. Result := False;
  591. end;
  592. function ParseCDSect: Boolean; // [18]
  593. var
  594. cdata: String;
  595. begin
  596. if CheckFor('<![CDATA[') then begin
  597. SetLength(cdata, 0);
  598. while not CheckFor(']]>') do begin
  599. cdata := cdata + buf[0];
  600. Inc(buf);
  601. end;
  602. NewElem.AppendChild(doc.CreateCDATASection(cdata));
  603. Result := True;
  604. end else
  605. Result := False;
  606. end;
  607. var
  608. IsEmpty: Boolean;
  609. name: String;
  610. oldpos: PChar;
  611. attr: TDOMAttr;
  612. begin
  613. oldpos := buf;
  614. if CheckFor('<') then begin
  615. if not GetName(name) then begin
  616. buf := oldpos;
  617. Result := False;
  618. exit;
  619. end;
  620. NewElem := doc.CreateElement(name);
  621. AOwner.AppendChild(NewElem);
  622. SkipWhitespace;
  623. IsEmpty := False;
  624. while True do begin
  625. if CheckFor('/>') then begin
  626. IsEmpty := True;
  627. break;
  628. end;
  629. if CheckFor('>') then break;
  630. // Get Attribute [41]
  631. attr := doc.CreateAttribute(ExpectName);
  632. NewElem.Attributes.SetNamedItem(attr);
  633. ExpectEq;
  634. ExpectAttValue(attr);
  635. SkipWhitespace;
  636. end;
  637. if not IsEmpty then begin
  638. // Get content
  639. while SkipWhitespace or ParseCharData or ParseCDSect or ParsePI or
  640. ParseComment(NewElem) or ParseElement(NewElem) or
  641. ParseReference(NewElem) do;
  642. // Get ETag [42]
  643. ExpectString('</');
  644. ExpectName;
  645. SkipWhitespace;
  646. ExpectString('>');
  647. end;
  648. Result := True;
  649. end else
  650. Result := False;
  651. end;
  652. procedure TXMLReader.ExpectElement(AOwner: TDOMNode);
  653. begin
  654. if not ParseElement(AOwner) then
  655. RaiseExc('Expected element');
  656. end;
  657. function TXMLReader.ParsePEReference: Boolean; // [69]
  658. begin
  659. if CheckFor('%') then begin
  660. ExpectName;
  661. ExpectString(';');
  662. Result := True;
  663. end else
  664. Result := False;
  665. end;
  666. function TXMLReader.ParseReference(AOwner: TDOMNode): Boolean; // [67] [68]
  667. begin
  668. if not CheckFor('&') then begin
  669. Result := False;
  670. exit;
  671. end;
  672. if CheckFor('#') then begin // Test for CharRef [66]
  673. if CheckFor('x') then begin
  674. // *** there must be at leat one digit
  675. while buf[0] in ['0'..'9', 'a'..'f', 'A'..'F'] do Inc(buf);
  676. end else
  677. // *** there must be at leat one digit
  678. while buf[0] in ['0'..'9'] do Inc(buf);
  679. end else
  680. AOwner.AppendChild(doc.CreateEntityReference(ExpectName));
  681. ExpectString(';');
  682. Result := True;
  683. end;
  684. procedure TXMLReader.ExpectReference(AOwner: TDOMNode);
  685. begin
  686. if not ParseReference(AOwner) then
  687. RaiseExc('Expected reference ("&Name;" or "%Name;")');
  688. end;
  689. function TXMLReader.ParseExternalID: Boolean; // [75]
  690. function GetSystemLiteral: String;
  691. begin
  692. SetLength(Result, 0);
  693. if buf[0] = '''' then begin
  694. Inc(buf);
  695. while (buf[0] <> '''') and (buf[0] <> #0) do begin
  696. Result := Result + buf[0];
  697. Inc(buf);
  698. end;
  699. ExpectString('''');
  700. end else if buf[0] = '"' then begin
  701. Inc(buf);
  702. while (buf[0] <> '"') and (buf[0] <> #0) do begin
  703. Result := Result + buf[0];
  704. Inc(buf);
  705. end;
  706. ExpectString('"');
  707. end;
  708. end;
  709. begin
  710. if CheckFor('SYSTEM') then begin
  711. ExpectWhitespace;
  712. GetSystemLiteral;
  713. Result := True;
  714. end else if CheckFor('PUBLIC') then begin
  715. ExpectWhitespace;
  716. ExpectPubidLiteral;
  717. ExpectWhitespace;
  718. GetSystemLiteral;
  719. Result := True;
  720. end else
  721. Result := False;
  722. end;
  723. procedure TXMLReader.ExpectExternalID;
  724. begin
  725. if not ParseExternalID then
  726. RaiseExc('Expected external ID');
  727. end;
  728. function TXMLReader.ParseEncodingDecl: String; // [80]
  729. function ParseEncName: String;
  730. begin
  731. if not (buf[0] in ['A'..'Z', 'a'..'z']) then
  732. RaiseExc('Expected character (A-Z, a-z)');
  733. Result := buf[0];
  734. Inc(buf);
  735. Result := Result + GetString(['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']);
  736. end;
  737. begin
  738. SetLength(Result, 0);
  739. SkipWhitespace;
  740. if CheckFor('encoding') then begin
  741. ExpectEq;
  742. if buf[0] = '''' then begin
  743. Inc(buf);
  744. Result := ParseEncName;
  745. ExpectString('''');
  746. end else if buf[0] = '"' then begin
  747. Inc(buf);
  748. Result := ParseEncName;
  749. ExpectString('"');
  750. end;
  751. end;
  752. end;
  753. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: File);
  754. var
  755. reader: TXMLReader;
  756. buf: PChar;
  757. BufSize: LongInt;
  758. begin
  759. ADoc := nil;
  760. BufSize := FileSize(f) + 1;
  761. if BufSize <= 1 then exit;
  762. GetMem(buf, BufSize);
  763. BlockRead(f, buf^, BufSize - 1);
  764. buf[BufSize - 1] := #0;
  765. reader := TXMLReader.Create;
  766. reader.ProcessXML(buf, Filerec(f).name);
  767. FreeMem(buf, BufSize);
  768. ADoc := reader.doc;
  769. reader.Free;
  770. end;
  771. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream;
  772. const AFilename: String);
  773. var
  774. reader: TXMLReader;
  775. buf: PChar;
  776. begin
  777. ADoc := nil;
  778. if f.Size = 0 then exit;
  779. GetMem(buf, f.Size + 1);
  780. f.Read(buf^, f.Size);
  781. buf[f.Size] := #0;
  782. reader := TXMLReader.Create;
  783. reader.ProcessXML(buf, AFilename);
  784. FreeMem(buf, f.Size + 1);
  785. ADoc := reader.doc;
  786. reader.Free;
  787. end;
  788. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream);
  789. begin
  790. ReadXMLFile(ADoc, f, '<Stream>');
  791. end;
  792. procedure ReadXMLFile(var ADoc: TXMLDocument; const AFilename: String);
  793. var
  794. stream: TFileStream;
  795. begin
  796. ADoc := nil;
  797. stream := TFileStream.Create(AFilename, fmOpenRead);
  798. try
  799. ReadXMLFile(ADoc, stream, AFilename);
  800. finally
  801. stream.Free;
  802. end;
  803. end;
  804. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: File);
  805. var
  806. reader: TXMLReader;
  807. buf: PChar;
  808. BufSize: LongInt;
  809. begin
  810. ADoc := nil;
  811. BufSize := FileSize(f) + 1;
  812. if BufSize <= 1 then exit;
  813. GetMem(buf, BufSize + 1);
  814. BlockRead(f, buf^, BufSize - 1);
  815. buf[BufSize - 1] := #0;
  816. reader := TXMLReader.Create;
  817. reader.ProcessDTD(buf, Filerec(f).name);
  818. FreeMem(buf, BufSize);
  819. ADoc := reader.doc;
  820. reader.Free;
  821. end;
  822. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream;
  823. const AFilename: String);
  824. var
  825. reader: TXMLReader;
  826. buf: PChar;
  827. begin
  828. ADoc := nil;
  829. if f.Size = 0 then exit;
  830. GetMem(buf, f.Size + 1);
  831. f.Read(buf^, f.Size);
  832. buf[f.Size] := #0;
  833. reader := TXMLReader.Create;
  834. reader.ProcessDTD(buf, AFilename);
  835. FreeMem(buf, f.Size + 1);
  836. ADoc := reader.doc;
  837. reader.Free;
  838. end;
  839. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream);
  840. begin
  841. ReadDTDFile(ADoc, f, '<Stream>');
  842. end;
  843. procedure ReadDTDFile(var ADoc: TXMLDocument; const AFilename: String);
  844. var
  845. stream: TFileStream;
  846. begin
  847. ADoc := nil;
  848. stream := TFileStream.Create(AFilename, fmOpenRead);
  849. try
  850. ReadDTDFile(ADoc, stream, AFilename);
  851. finally
  852. stream.Free;
  853. end;
  854. end;
  855. end.
  856. {
  857. $Log$
  858. Revision 1.14 2000-01-30 22:19:13 sg
  859. * Made some optimizations and cosmetic changes
  860. Revision 1.13 2000/01/07 01:24:34 peter
  861. * updated copyright to 2000
  862. Revision 1.12 2000/01/06 01:20:37 peter
  863. * moved out of packages/ back to topdir
  864. Revision 1.1 2000/01/03 19:33:11 peter
  865. * moved to packages dir
  866. Revision 1.10 1999/12/22 13:39:55 sg
  867. * Fixed parser bug: SetDocumentElement failed if the XML document contains
  868. only a single element at the top hierarchy level
  869. * Changed the error message if there is text after the end of the main
  870. XML element
  871. Revision 1.9 1999/12/05 22:02:11 sg
  872. * The reader now sets the DocumentElement for a DOM document
  873. * The XML parser raises an exception if there is additional data after
  874. the end of the XML document element
  875. Revision 1.8 1999/08/10 15:39:59 michael
  876. * restored previous setting
  877. Revision 1.6 1999/07/27 13:01:59 peter
  878. * remove filerec.inc, it was missing from sysutils! You shouldn't need
  879. to compile with -Irtl/inc !!
  880. Revision 1.5 1999/07/25 16:24:14 michael
  881. + Fixes from Sebastiam Guenther - more error-proof
  882. Revision 1.4 1999/07/11 20:20:12 michael
  883. + Fixes from Sebastian Guenther
  884. Revision 1.3 1999/07/09 21:05:51 michael
  885. + fixes from Guenther Sebastian
  886. Revision 1.2 1999/07/09 10:42:50 michael
  887. * Removed debug statements
  888. Revision 1.1 1999/07/09 08:35:09 michael
  889. + Initial implementation by Sebastian Guenther
  890. }
  891. --------------ECFEA19D0E6E5FF5CDAF6681--)