xmlread.pp 25 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036
  1. {
  2. $Id$
  3. This file is part of the Free Component Library
  4. Copyright (c) 1999-2000 by Sebastian Guenther
  5. XML reading routines.
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$MODE objfpc}
  13. {$H+}
  14. unit xmlread;
  15. interface
  16. uses sysutils, classes, DOM;
  17. type
  18. EXMLReadError = class(Exception);
  19. procedure ReadXMLFile(var ADoc: TXMLDocument; const AFilename: String);
  20. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: File);
  21. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream);
  22. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream;
  23. const AFilename: String);
  24. procedure ReadDTDFile(var ADoc: TXMLDocument; const AFilename: String);
  25. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: File);
  26. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream);
  27. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream;
  28. const AFilename: String);
  29. // =======================================================
  30. implementation
  31. const
  32. Letter = ['A'..'Z', 'a'..'z'];
  33. Digit = ['0'..'9'];
  34. PubidChars: set of Char = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
  35. '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
  36. '#', '@', '$', '_', '%'];
  37. NmToken: set of Char = Letter + Digit + ['.', '-', '_', ':'];
  38. type
  39. TSetOfChar = set of Char;
  40. TXMLReader = class
  41. protected
  42. buf, BufStart: PChar;
  43. Filename: String;
  44. procedure RaiseExc(descr: String);
  45. function SkipWhitespace: Boolean;
  46. procedure ExpectWhitespace;
  47. procedure ExpectString(s: String);
  48. function CheckFor(s: PChar): Boolean;
  49. function GetString(ValidChars: TSetOfChar): String;
  50. function GetName(var s: String): Boolean;
  51. function ExpectName: String; // [5]
  52. procedure ExpectAttValue(attr: TDOMAttr); // [10]
  53. function ExpectPubidLiteral: String; // [12]
  54. function ParseComment(AOwner: TDOMNode): Boolean; // [15]
  55. function ParsePI: Boolean; // [16]
  56. procedure ExpectProlog; // [22]
  57. function ParseEq: Boolean; // [25]
  58. procedure ExpectEq;
  59. procedure ParseMisc(AOwner: TDOMNode); // [27]
  60. function ParseMarkupDecl: Boolean; // [29]
  61. function ParseElement(AOwner: TDOMNode): Boolean; // [39]
  62. procedure ExpectElement(AOwner: TDOMNode);
  63. function ParseReference(AOwner: TDOMNode): Boolean; // [67]
  64. procedure ExpectReference(AOwner: TDOMNode);
  65. function ParsePEReference: Boolean; // [69]
  66. function ParseExternalID: Boolean; // [75]
  67. procedure ExpectExternalID;
  68. function ParseEncodingDecl: String; // [80]
  69. public
  70. doc: TXMLDocument;
  71. procedure ProcessXML(ABuf: PChar; AFilename: String); // [1]
  72. procedure ProcessDTD(ABuf: PChar; AFilename: String); // ([29])
  73. end;
  74. procedure TXMLReader.RaiseExc(descr: String);
  75. var
  76. apos: PChar;
  77. x, y: Integer;
  78. begin
  79. // find out the line in which the error occured
  80. apos := BufStart;
  81. x := 1;
  82. y := 1;
  83. while apos < buf do begin
  84. if apos[0] = #10 then begin
  85. Inc(y);
  86. x := 1;
  87. end else
  88. Inc(x);
  89. Inc(apos);
  90. end;
  91. raise EXMLReadError.Create('In ' + Filename + ' (line ' + IntToStr(y) + ' pos ' +
  92. IntToStr(x) + '): ' + descr);
  93. end;
  94. function TXMLReader.SkipWhitespace: Boolean;
  95. begin
  96. Result := False;
  97. while buf[0] in [#9, #10, #13, ' '] do begin
  98. Inc(buf);
  99. Result := True;
  100. end;
  101. end;
  102. procedure TXMLReader.ExpectWhitespace;
  103. begin
  104. if not SkipWhitespace then
  105. RaiseExc('Expected whitespace');
  106. end;
  107. procedure TXMLReader.ExpectString(s: String);
  108. var
  109. i: Integer;
  110. s2: PChar;
  111. s3: String;
  112. begin
  113. for i := 1 to Length(s) do
  114. if buf[i - 1] <> s[i] then begin
  115. GetMem(s2, Length(s) + 1);
  116. StrLCopy(s2, buf, Length(s));
  117. s3 := StrPas(s2);
  118. FreeMem(s2, Length(s) + 1);
  119. RaiseExc('Expected "' + s + '", found "' + s3 + '"');
  120. end;
  121. Inc(buf, Length(s));
  122. end;
  123. function TXMLReader.CheckFor(s: PChar): Boolean;
  124. begin
  125. if buf[0] = #0 then begin
  126. Result := False;
  127. exit;
  128. end;
  129. if StrLComp(buf, s, StrLen(s)) = 0 then begin
  130. Inc(buf, StrLen(s));
  131. Result := True;
  132. end else
  133. Result := False;
  134. end;
  135. function TXMLReader.GetString(ValidChars: TSetOfChar): String;
  136. begin
  137. SetLength(Result, 0);
  138. while buf[0] in ValidChars do begin
  139. Result := Result + buf[0];
  140. Inc(buf);
  141. end;
  142. end;
  143. procedure TXMLReader.ProcessXML(ABuf: PChar; AFilename: String); // [1]
  144. var
  145. LastNodeBeforeDoc: TDOMNode;
  146. begin
  147. buf := ABuf;
  148. BufStart := ABuf;
  149. Filename := AFilename;
  150. doc := TXMLDocument.Create;
  151. ExpectProlog;
  152. LastNodeBeforeDoc := doc.LastChild;
  153. ExpectElement(doc);
  154. ParseMisc(doc);
  155. if buf[0] <> #0 then
  156. RaiseExc('Text after end of document element found');
  157. {
  158. if buf[0] <> #0 then begin
  159. WriteLn('=== Unparsed: ===');
  160. //WriteLn(buf);
  161. WriteLn(StrLen(buf), ' chars');
  162. end;
  163. }
  164. end;
  165. function TXMLReader.GetName(var s: String): Boolean; // [5]
  166. begin
  167. SetLength(s, 0);
  168. if not (buf[0] in (Letter + ['_', ':'])) then begin
  169. Result := False;
  170. exit;
  171. end;
  172. s := buf[0];
  173. Inc(buf);
  174. s := s + GetString(Letter + ['0'..'9', '.', '-', '_', ':']);
  175. Result := True;
  176. end;
  177. function TXMLReader.ExpectName: String; // [5]
  178. begin
  179. if not (buf[0] in (Letter + ['_', ':'])) then
  180. RaiseExc('Expected letter, "_" or ":" for name, found "' + buf[0] + '"');
  181. Result := buf[0];
  182. Inc(buf);
  183. Result := Result + GetString(Letter + ['0'..'9', '.', '-', '_', ':']);
  184. end;
  185. procedure TXMLReader.ExpectAttValue(attr: TDOMAttr); // [10]
  186. var
  187. strdel: array[0..1] of Char;
  188. s: String;
  189. begin
  190. if (buf[0] <> '''') and (buf[0] <> '"') then
  191. RaiseExc('Expected quotation marks');
  192. strdel[0] := buf[0];
  193. strdel[1] := #0;
  194. Inc(buf);
  195. SetLength(s, 0);
  196. while not CheckFor(strdel) do
  197. if not ParseReference(attr) then begin
  198. s := s + buf[0];
  199. Inc(buf);
  200. end else begin
  201. if Length(s) > 0 then begin
  202. attr.AppendChild(doc.CreateTextNode(s));
  203. SetLength(s, 0);
  204. end;
  205. end;
  206. if Length(s) > 0 then
  207. //attr.AppendChild(doc.CreateTextNode(s));
  208. attr.NodeValue := s;
  209. end;
  210. function TXMLReader.ExpectPubidLiteral: String;
  211. begin
  212. SetLength(Result, 0);
  213. if CheckFor('''') then begin
  214. GetString(PubidChars - ['''']);
  215. ExpectString('''');
  216. end else if CheckFor('"') then begin
  217. GetString(PubidChars - ['"']);
  218. ExpectString('"');
  219. end else
  220. RaiseExc('Expected quotation marks');
  221. end;
  222. function TXMLReader.ParseComment(AOwner: TDOMNode): Boolean; // [15]
  223. var
  224. comment: String;
  225. begin
  226. if CheckFor('<!--') then begin
  227. SetLength(comment, 0);
  228. while (buf[0] <> #0) and (buf[1] <> #0) and
  229. ((buf[0] <> '-') or (buf[1] <> '-')) do begin
  230. comment := comment + buf[0];
  231. Inc(buf);
  232. end;
  233. AOwner.AppendChild(doc.CreateComment(comment));
  234. ExpectString('-->');
  235. Result := True;
  236. end else
  237. Result := False;
  238. end;
  239. function TXMLReader.ParsePI: Boolean; // [16]
  240. var
  241. checkbuf: array[0..3] of char;
  242. begin
  243. if CheckFor('<?') then begin
  244. StrLCopy(checkbuf, buf, 3);
  245. if UpCase(StrPas(checkbuf)) = 'XML' then
  246. RaiseExc('"<?xml" processing instruction not allowed here');
  247. ExpectName;
  248. if SkipWhitespace then
  249. while (buf[0] <> #0) and (buf[1] <> #0) and not
  250. ((buf[0] = '?') and (buf[1] = '>')) do Inc(buf);
  251. ExpectString('?>');
  252. Result := True;
  253. end else
  254. Result := False;
  255. end;
  256. procedure TXMLReader.ExpectProlog; // [22]
  257. procedure ParseVersionNum;
  258. begin
  259. doc.XMLVersion :=
  260. GetString(['a'..'z', 'A'..'Z', '0'..'9', '_', '.', ':', '-']);
  261. end;
  262. begin
  263. if CheckFor('<?xml') then begin
  264. // '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  265. // VersionInfo: S 'version' Eq (' VersionNum ' | " VersionNum ")
  266. SkipWhitespace;
  267. ExpectString('version');
  268. ParseEq;
  269. if buf[0] = '''' then begin
  270. Inc(buf);
  271. ParseVersionNum;
  272. ExpectString('''');
  273. end else if buf[0] = '"' then begin
  274. Inc(buf);
  275. ParseVersionNum;
  276. ExpectString('"');
  277. end else
  278. RaiseExc('Expected single or double quotation mark');
  279. // EncodingDecl?
  280. ParseEncodingDecl;
  281. // SDDecl?
  282. SkipWhitespace;
  283. if CheckFor('standalone') then begin
  284. ExpectEq;
  285. if buf[0] = '''' then begin
  286. Inc(buf);
  287. if not (CheckFor('yes''') or CheckFor('no''')) then
  288. RaiseExc('Expected ''yes'' or ''no''');
  289. end else if buf[0] = '''' then begin
  290. Inc(buf);
  291. if not (CheckFor('yes"') or CheckFor('no"')) then
  292. RaiseExc('Expected "yes" or "no"');
  293. end;
  294. SkipWhitespace;
  295. end;
  296. ExpectString('?>');
  297. end;
  298. // Check for "Misc*"
  299. ParseMisc(doc);
  300. // Check for "(doctypedecl Misc*)?" [28]
  301. if CheckFor('<!DOCTYPE') then begin
  302. SkipWhitespace;
  303. ExpectName;
  304. SkipWhitespace;
  305. ParseExternalID;
  306. SkipWhitespace;
  307. if CheckFor('[') then begin
  308. repeat
  309. SkipWhitespace;
  310. until not (ParseMarkupDecl or ParsePEReference);
  311. ExpectString(']');
  312. SkipWhitespace;
  313. end;
  314. ExpectString('>');
  315. ParseMisc(doc);
  316. end;
  317. end;
  318. function TXMLReader.ParseEq: Boolean; // [25]
  319. var
  320. savedbuf: PChar;
  321. begin
  322. savedbuf := buf;
  323. SkipWhitespace;
  324. if buf[0] = '=' then begin
  325. Inc(buf);
  326. SkipWhitespace;
  327. Result := True;
  328. end else begin
  329. buf := savedbuf;
  330. Result := False;
  331. end;
  332. end;
  333. procedure TXMLReader.ExpectEq;
  334. begin
  335. if not ParseEq then
  336. RaiseExc('Expected "="');
  337. end;
  338. // Parse "Misc*":
  339. // Misc ::= Comment | PI | S
  340. procedure TXMLReader.ParseMisc(AOwner: TDOMNode); // [27]
  341. begin
  342. repeat
  343. SkipWhitespace;
  344. until not (ParseComment(AOwner) or ParsePI);
  345. end;
  346. function TXMLReader.ParseMarkupDecl: Boolean; // [29]
  347. function ParseElementDecl: Boolean; // [45]
  348. procedure ExpectChoiceOrSeq; // [49], [50]
  349. procedure ExpectCP; // [48]
  350. begin
  351. if CheckFor('(') then
  352. ExpectChoiceOrSeq
  353. else
  354. ExpectName;
  355. if CheckFor('?') then
  356. else if CheckFor('*') then
  357. else if CheckFor('+') then;
  358. end;
  359. var
  360. delimiter: Char;
  361. begin
  362. SkipWhitespace;
  363. ExpectCP;
  364. SkipWhitespace;
  365. delimiter := #0;
  366. while not CheckFor(')') do begin
  367. if delimiter = #0 then begin
  368. if (buf[0] = '|') or (buf[0] = ',') then
  369. delimiter := buf[0]
  370. else
  371. RaiseExc('Expected "|" or ","');
  372. Inc(buf);
  373. end else
  374. ExpectString(delimiter);
  375. SkipWhitespace;
  376. ExpectCP;
  377. end;
  378. end;
  379. begin
  380. if CheckFor('<!ELEMENT') then begin
  381. ExpectWhitespace;
  382. ExpectName;
  383. ExpectWhitespace;
  384. // Get contentspec [46]
  385. if CheckFor('EMPTY') then
  386. else if CheckFor('ANY') then
  387. else if CheckFor('(') then begin
  388. SkipWhitespace;
  389. if CheckFor('#PCDATA') then begin
  390. // Parse Mixed section [51]
  391. SkipWhitespace;
  392. if not CheckFor(')') then
  393. repeat
  394. ExpectString('|');
  395. SkipWhitespace;
  396. ExpectName;
  397. until CheckFor(')*');
  398. end else begin
  399. // Parse Children section [47]
  400. ExpectChoiceOrSeq;
  401. if CheckFor('?') then
  402. else if CheckFor('*') then
  403. else if CheckFor('+') then;
  404. end;
  405. end else
  406. RaiseExc('Invalid content specification');
  407. SkipWhitespace;
  408. ExpectString('>');
  409. Result := True;
  410. end else
  411. Result := False;
  412. end;
  413. function ParseAttlistDecl: Boolean; // [52]
  414. var
  415. attr: TDOMAttr;
  416. begin
  417. if CheckFor('<!ATTLIST') then begin
  418. ExpectWhitespace;
  419. ExpectName;
  420. SkipWhitespace;
  421. while not CheckFor('>') do begin
  422. ExpectName;
  423. ExpectWhitespace;
  424. // Get AttType [54], [55], [56]
  425. if CheckFor('CDATA') then
  426. else if CheckFor('ID') then
  427. else if CheckFor('IDREF') then
  428. else if CheckFor('IDREFS') then
  429. else if CheckFor('ENTITTY') then
  430. else if CheckFor('ENTITIES') then
  431. else if CheckFor('NMTOKEN') then
  432. else if CheckFor('NMTOKENS') then
  433. else if CheckFor('NOTATION') then begin // [57], [58]
  434. ExpectWhitespace;
  435. ExpectString('(');
  436. SkipWhitespace;
  437. ExpectName;
  438. SkipWhitespace;
  439. while not CheckFor(')') do begin
  440. ExpectString('|');
  441. SkipWhitespace;
  442. ExpectName;
  443. SkipWhitespace;
  444. end;
  445. end else if CheckFor('(') then begin // [59]
  446. SkipWhitespace;
  447. GetString(Nmtoken);
  448. SkipWhitespace;
  449. while not CheckFor(')') do begin
  450. ExpectString('|');
  451. SkipWhitespace;
  452. GetString(Nmtoken);
  453. SkipWhitespace;
  454. end;
  455. end else
  456. RaiseExc('Invalid tokenized type');
  457. ExpectWhitespace;
  458. // Get DefaultDecl [60]
  459. if CheckFor('#REQUIRED') then
  460. else if CheckFor('#IMPLIED') then
  461. else begin
  462. if CheckFor('#FIXED') then
  463. SkipWhitespace;
  464. attr := doc.CreateAttribute('');
  465. ExpectAttValue(attr);
  466. end;
  467. SkipWhitespace;
  468. end;
  469. Result := True;
  470. end else
  471. Result := False;
  472. end;
  473. function ParseEntityDecl: Boolean; // [70]
  474. var
  475. NewEntity: TDOMEntity;
  476. function ParseEntityValue: Boolean; // [9]
  477. var
  478. strdel: array[0..1] of Char;
  479. begin
  480. if (buf[0] <> '''') and (buf[0] <> '"') then begin
  481. Result := False;
  482. exit;
  483. end;
  484. strdel[0] := buf[0];
  485. strdel[1] := #0;
  486. Inc(buf);
  487. while not CheckFor(strdel) do
  488. if ParsePEReference then
  489. else if ParseReference(NewEntity) then
  490. else begin
  491. Inc(buf); // Normal haracter
  492. end;
  493. Result := True;
  494. end;
  495. begin
  496. if CheckFor('<!ENTITY') then begin
  497. ExpectWhitespace;
  498. if CheckFor('%') then begin // [72]
  499. ExpectWhitespace;
  500. NewEntity := doc.CreateEntity(ExpectName);
  501. ExpectWhitespace;
  502. // Get PEDef [74]
  503. if ParseEntityValue then
  504. else if ParseExternalID then
  505. else
  506. RaiseExc('Expected entity value or external ID');
  507. end else begin // [71]
  508. NewEntity := doc.CreateEntity(ExpectName);
  509. ExpectWhitespace;
  510. // Get EntityDef [73]
  511. if ParseEntityValue then
  512. else begin
  513. ExpectExternalID;
  514. // Get NDataDecl [76]
  515. ExpectWhitespace;
  516. ExpectString('NDATA');
  517. ExpectWhitespace;
  518. ExpectName;
  519. end;
  520. end;
  521. SkipWhitespace;
  522. ExpectString('>');
  523. Result := True;
  524. end else
  525. Result := False;
  526. end;
  527. function ParseNotationDecl: Boolean; // [82]
  528. begin
  529. if CheckFor('<!NOTATION') then begin
  530. ExpectWhitespace;
  531. ExpectName;
  532. ExpectWhitespace;
  533. if ParseExternalID then
  534. else if CheckFor('PUBLIC') then begin // [83]
  535. ExpectWhitespace;
  536. ExpectPubidLiteral;
  537. end else
  538. RaiseExc('Expected external or public ID');
  539. SkipWhitespace;
  540. ExpectString('>');
  541. Result := True;
  542. end else
  543. Result := False;
  544. end;
  545. begin
  546. Result := False;
  547. while ParseElementDecl or ParseAttlistDecl or ParseEntityDecl or
  548. ParseNotationDecl or ParsePI or ParseComment(doc) or SkipWhitespace do
  549. Result := True;
  550. end;
  551. procedure TXMLReader.ProcessDTD(ABuf: PChar; AFilename: String);
  552. begin
  553. buf := ABuf;
  554. BufStart := ABuf;
  555. Filename := AFilename;
  556. doc := TXMLDocument.Create;
  557. ParseMarkupDecl;
  558. {
  559. if buf[0] <> #0 then begin
  560. WriteLn('=== Unparsed: ===');
  561. //WriteLn(buf);
  562. WriteLn(StrLen(buf), ' chars');
  563. end;
  564. }
  565. end;
  566. function TXMLReader.ParseElement(AOwner: TDOMNode): Boolean; // [39] [40] [44]
  567. var
  568. NewElem: TDOMElement;
  569. function ParseCharData: Boolean; // [14]
  570. var
  571. s: String;
  572. i: Integer;
  573. begin
  574. SetLength(s, 0);
  575. while not (buf[0] in [#0, '<', '&']) do begin
  576. s := s + buf[0];
  577. Inc(buf);
  578. end;
  579. if Length(s) > 0 then begin
  580. // Strip whitespace from end of s
  581. i := Length(s);
  582. while (i > 0) and (s[i] in [#10, #13, ' ']) do Dec(i);
  583. NewElem.AppendChild(doc.CreateTextNode(Copy(s, 1, i)));
  584. Result := True;
  585. end else
  586. Result := False;
  587. end;
  588. function ParseCDSect: Boolean; // [18]
  589. var
  590. cdata: String;
  591. begin
  592. if CheckFor('<![CDATA[') then begin
  593. SetLength(cdata, 0);
  594. while not CheckFor(']]>') do begin
  595. cdata := cdata + buf[0];
  596. Inc(buf);
  597. end;
  598. NewElem.AppendChild(doc.CreateCDATASection(cdata));
  599. Result := True;
  600. end else
  601. Result := False;
  602. end;
  603. var
  604. IsEmpty: Boolean;
  605. name: String;
  606. oldpos: PChar;
  607. attr: TDOMAttr;
  608. begin
  609. oldpos := buf;
  610. if CheckFor('<') then begin
  611. if not GetName(name) then begin
  612. buf := oldpos;
  613. Result := False;
  614. exit;
  615. end;
  616. NewElem := doc.CreateElement(name);
  617. AOwner.AppendChild(NewElem);
  618. SkipWhitespace;
  619. IsEmpty := False;
  620. while True do begin
  621. if CheckFor('/>') then begin
  622. IsEmpty := True;
  623. break;
  624. end;
  625. if CheckFor('>') then break;
  626. // Get Attribute [41]
  627. attr := doc.CreateAttribute(ExpectName);
  628. NewElem.Attributes.SetNamedItem(attr);
  629. ExpectEq;
  630. ExpectAttValue(attr);
  631. SkipWhitespace;
  632. end;
  633. if not IsEmpty then begin
  634. // Get content
  635. while SkipWhitespace or ParseCharData or ParseCDSect or ParsePI or
  636. ParseComment(NewElem) or ParseElement(NewElem) or
  637. ParseReference(NewElem) do;
  638. // Get ETag [42]
  639. ExpectString('</');
  640. ExpectName;
  641. SkipWhitespace;
  642. ExpectString('>');
  643. end;
  644. Result := True;
  645. end else
  646. Result := False;
  647. end;
  648. procedure TXMLReader.ExpectElement(AOwner: TDOMNode);
  649. begin
  650. if not ParseElement(AOwner) then
  651. RaiseExc('Expected element');
  652. end;
  653. function TXMLReader.ParsePEReference: Boolean; // [69]
  654. begin
  655. if CheckFor('%') then begin
  656. ExpectName;
  657. ExpectString(';');
  658. Result := True;
  659. end else
  660. Result := False;
  661. end;
  662. function TXMLReader.ParseReference(AOwner: TDOMNode): Boolean; // [67] [68]
  663. begin
  664. if not CheckFor('&') then begin
  665. Result := False;
  666. exit;
  667. end;
  668. if CheckFor('#') then begin // Test for CharRef [66]
  669. if CheckFor('x') then begin
  670. // *** there must be at leat one digit
  671. while buf[0] in ['0'..'9', 'a'..'f', 'A'..'F'] do Inc(buf);
  672. end else
  673. // *** there must be at leat one digit
  674. while buf[0] in ['0'..'9'] do Inc(buf);
  675. end else
  676. AOwner.AppendChild(doc.CreateEntityReference(ExpectName));
  677. ExpectString(';');
  678. Result := True;
  679. end;
  680. procedure TXMLReader.ExpectReference(AOwner: TDOMNode);
  681. begin
  682. if not ParseReference(AOwner) then
  683. RaiseExc('Expected reference ("&Name;" or "%Name;")');
  684. end;
  685. function TXMLReader.ParseExternalID: Boolean; // [75]
  686. function GetSystemLiteral: String;
  687. begin
  688. SetLength(Result, 0);
  689. if buf[0] = '''' then begin
  690. Inc(buf);
  691. while (buf[0] <> '''') and (buf[0] <> #0) do begin
  692. Result := Result + buf[0];
  693. Inc(buf);
  694. end;
  695. ExpectString('''');
  696. end else if buf[0] = '"' then begin
  697. Inc(buf);
  698. while (buf[0] <> '"') and (buf[0] <> #0) do begin
  699. Result := Result + buf[0];
  700. Inc(buf);
  701. end;
  702. ExpectString('"');
  703. end;
  704. end;
  705. begin
  706. if CheckFor('SYSTEM') then begin
  707. ExpectWhitespace;
  708. GetSystemLiteral;
  709. Result := True;
  710. end else if CheckFor('PUBLIC') then begin
  711. ExpectWhitespace;
  712. ExpectPubidLiteral;
  713. ExpectWhitespace;
  714. GetSystemLiteral;
  715. Result := True;
  716. end else
  717. Result := False;
  718. end;
  719. procedure TXMLReader.ExpectExternalID;
  720. begin
  721. if not ParseExternalID then
  722. RaiseExc('Expected external ID');
  723. end;
  724. function TXMLReader.ParseEncodingDecl: String; // [80]
  725. function ParseEncName: String;
  726. begin
  727. if not (buf[0] in ['A'..'Z', 'a'..'z']) then
  728. RaiseExc('Expected character (A-Z, a-z)');
  729. Result := buf[0];
  730. Inc(buf);
  731. Result := Result + GetString(['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']);
  732. end;
  733. begin
  734. SetLength(Result, 0);
  735. SkipWhitespace;
  736. if CheckFor('encoding') then begin
  737. ExpectEq;
  738. if buf[0] = '''' then begin
  739. Inc(buf);
  740. Result := ParseEncName;
  741. ExpectString('''');
  742. end else if buf[0] = '"' then begin
  743. Inc(buf);
  744. Result := ParseEncName;
  745. ExpectString('"');
  746. end;
  747. end;
  748. end;
  749. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: File);
  750. var
  751. reader: TXMLReader;
  752. buf: PChar;
  753. BufSize: LongInt;
  754. begin
  755. ADoc := nil;
  756. BufSize := FileSize(f) + 1;
  757. if BufSize <= 1 then exit;
  758. GetMem(buf, BufSize);
  759. BlockRead(f, buf^, BufSize - 1);
  760. buf[BufSize - 1] := #0;
  761. reader := TXMLReader.Create;
  762. reader.ProcessXML(buf, Filerec(f).name);
  763. FreeMem(buf, BufSize);
  764. ADoc := reader.doc;
  765. reader.Free;
  766. end;
  767. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream;
  768. const AFilename: String);
  769. var
  770. reader: TXMLReader;
  771. buf: PChar;
  772. begin
  773. ADoc := nil;
  774. if f.Size = 0 then exit;
  775. GetMem(buf, f.Size + 1);
  776. f.Read(buf^, f.Size);
  777. buf[f.Size] := #0;
  778. reader := TXMLReader.Create;
  779. reader.ProcessXML(buf, AFilename);
  780. FreeMem(buf, f.Size + 1);
  781. ADoc := reader.doc;
  782. reader.Free;
  783. end;
  784. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream);
  785. begin
  786. ReadXMLFile(ADoc, f, '<Stream>');
  787. end;
  788. procedure ReadXMLFile(var ADoc: TXMLDocument; const AFilename: String);
  789. var
  790. stream: TFileStream;
  791. begin
  792. ADoc := nil;
  793. stream := TFileStream.Create(AFilename, fmOpenRead);
  794. try
  795. ReadXMLFile(ADoc, stream, AFilename);
  796. finally
  797. stream.Free;
  798. end;
  799. end;
  800. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: File);
  801. var
  802. reader: TXMLReader;
  803. buf: PChar;
  804. BufSize: LongInt;
  805. begin
  806. ADoc := nil;
  807. BufSize := FileSize(f) + 1;
  808. if BufSize <= 1 then exit;
  809. GetMem(buf, BufSize + 1);
  810. BlockRead(f, buf^, BufSize - 1);
  811. buf[BufSize - 1] := #0;
  812. reader := TXMLReader.Create;
  813. reader.ProcessDTD(buf, Filerec(f).name);
  814. FreeMem(buf, BufSize);
  815. ADoc := reader.doc;
  816. reader.Free;
  817. end;
  818. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream;
  819. const AFilename: String);
  820. var
  821. reader: TXMLReader;
  822. buf: PChar;
  823. begin
  824. ADoc := nil;
  825. if f.Size = 0 then exit;
  826. GetMem(buf, f.Size + 1);
  827. f.Read(buf^, f.Size);
  828. buf[f.Size] := #0;
  829. reader := TXMLReader.Create;
  830. reader.ProcessDTD(buf, AFilename);
  831. FreeMem(buf, f.Size + 1);
  832. ADoc := reader.doc;
  833. reader.Free;
  834. end;
  835. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream);
  836. begin
  837. ReadDTDFile(ADoc, f, '<Stream>');
  838. end;
  839. procedure ReadDTDFile(var ADoc: TXMLDocument; const AFilename: String);
  840. var
  841. stream: TFileStream;
  842. begin
  843. ADoc := nil;
  844. stream := TFileStream.Create(AFilename, fmOpenRead);
  845. try
  846. ReadDTDFile(ADoc, stream, AFilename);
  847. finally
  848. stream.Free;
  849. end;
  850. end;
  851. end.
  852. {
  853. $Log$
  854. Revision 1.15 2000-02-13 10:03:31 sg
  855. * Hopefully final fix for TDOMDocument.DocumentElement:
  856. - Reading this property always delivers the first element in the document
  857. - Removed SetDocumentElement. Use "AppendChild" or one of the other
  858. generic methods for TDOMNode instead.
  859. Revision 1.14 2000/01/30 22:19:13 sg
  860. * Made some optimizations and cosmetic changes
  861. Revision 1.13 2000/01/07 01:24:34 peter
  862. * updated copyright to 2000
  863. Revision 1.12 2000/01/06 01:20:37 peter
  864. * moved out of packages/ back to topdir
  865. Revision 1.1 2000/01/03 19:33:11 peter
  866. * moved to packages dir
  867. Revision 1.10 1999/12/22 13:39:55 sg
  868. * Fixed parser bug: SetDocumentElement failed if the XML document contains
  869. only a single element at the top hierarchy level
  870. * Changed the error message if there is text after the end of the main
  871. XML element
  872. Revision 1.9 1999/12/05 22:02:11 sg
  873. * The reader now sets the DocumentElement for a DOM document
  874. * The XML parser raises an exception if there is additional data after
  875. the end of the XML document element
  876. Revision 1.8 1999/08/10 15:39:59 michael
  877. * restored previous setting
  878. Revision 1.6 1999/07/27 13:01:59 peter
  879. * remove filerec.inc, it was missing from sysutils! You shouldn't need
  880. to compile with -Irtl/inc !!
  881. Revision 1.5 1999/07/25 16:24:14 michael
  882. + Fixes from Sebastiam Guenther - more error-proof
  883. Revision 1.4 1999/07/11 20:20:12 michael
  884. + Fixes from Sebastian Guenther
  885. Revision 1.3 1999/07/09 21:05:51 michael
  886. + fixes from Guenther Sebastian
  887. Revision 1.2 1999/07/09 10:42:50 michael
  888. * Removed debug statements
  889. Revision 1.1 1999/07/09 08:35:09 michael
  890. + Initial implementation by Sebastian Guenther
  891. }
  892. --------------ECFEA19D0E6E5FF5CDAF6681--)