xmlread.pp 24 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004
  1. {
  2. $Id$
  3. This file is part of the Free Pascal run time library.
  4. Copyright (c) 1999 Sebastian Guenther
  5. XML reading routines.
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$MODE objfpc}
  13. {$H+}
  14. unit xmlread;
  15. interface
  16. uses sysutils, classes, DOM;
  17. type
  18. EXMLReadError = class(Exception);
  19. procedure ReadXMLFile(var ADoc: TXMLDocument; const AFilename: String);
  20. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: File);
  21. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream);
  22. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream;
  23. const AFilename: String);
  24. procedure ReadDTDFile(var ADoc: TXMLDocument; const AFilename: String);
  25. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: File);
  26. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream);
  27. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream;
  28. const AFilename: String);
  29. // =======================================================
  30. implementation
  31. const
  32. Letter = ['A'..'Z', 'a'..'z'];
  33. Digit = ['0'..'9'];
  34. PubidChars: set of Char = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
  35. '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
  36. '#', '@', '$', '_', '%'];
  37. NmToken: set of Char = Letter + Digit + ['.', '-', '_', ':'];
  38. type
  39. TSetOfChar = set of Char;
  40. TXMLReader = class
  41. protected
  42. buf, BufStart: PChar;
  43. Filename: String;
  44. procedure RaiseExc(descr: String);
  45. function SkipWhitespace: Boolean;
  46. procedure ExpectWhitespace;
  47. procedure ExpectString(s: String);
  48. function CheckFor(s: PChar): Boolean;
  49. function GetString(ValidChars: TSetOfChar): String;
  50. function GetName(var s: String): Boolean;
  51. function ExpectName: String; // [5]
  52. procedure ExpectAttValue(attr: TDOMAttr); // [10]
  53. function ExpectPubidLiteral: String; // [12]
  54. function ParseComment(AOwner: TDOMNode): Boolean; // [15]
  55. function ParsePI: Boolean; // [16]
  56. procedure ExpectProlog; // [22]
  57. function ParseEq: Boolean; // [25]
  58. procedure ExpectEq;
  59. procedure ParseMisc(AOwner: TDOMNode); // [27]
  60. function ParseMarkupDecl: Boolean; // [29]
  61. function ParseElement(AOwner: TDOMNode): Boolean; // [39]
  62. procedure ExpectElement(AOwner: TDOMNode);
  63. function ParseReference(AOwner: TDOMNode): Boolean; // [67]
  64. procedure ExpectReference(AOwner: TDOMNode);
  65. function ParsePEReference: Boolean; // [69]
  66. function ParseExternalID: Boolean; // [75]
  67. procedure ExpectExternalID;
  68. function ParseEncodingDecl: String; // [80]
  69. public
  70. doc: TXMLDocument;
  71. procedure ProcessXML(ABuf: PChar; AFilename: String); // [1]
  72. procedure ProcessDTD(ABuf: PChar; AFilename: String); // ([29])
  73. end;
  74. procedure TXMLReader.RaiseExc(descr: String);
  75. var
  76. apos: PChar;
  77. x, y: Integer;
  78. begin
  79. // find out the line in which the error occured
  80. apos := BufStart;
  81. x := 1;
  82. y := 1;
  83. while apos < buf do begin
  84. if apos[0] = #10 then begin
  85. Inc(y);
  86. x := 1;
  87. end else
  88. Inc(x);
  89. Inc(apos);
  90. end;
  91. raise EXMLReadError.Create('In ' + Filename + ' (line ' + IntToStr(y) + ' pos ' +
  92. IntToStr(x) + '): ' + descr);
  93. end;
  94. function TXMLReader.SkipWhitespace: Boolean;
  95. begin
  96. Result := False;
  97. while buf[0] in [#9, #10, #13, ' '] do begin
  98. Inc(buf);
  99. Result := True;
  100. end;
  101. end;
  102. procedure TXMLReader.ExpectWhitespace;
  103. begin
  104. if not SkipWhitespace then
  105. RaiseExc('Expected whitespace');
  106. end;
  107. procedure TXMLReader.ExpectString(s: String);
  108. var
  109. i: Integer;
  110. s2: PChar;
  111. s3: String;
  112. begin
  113. for i := 1 to Length(s) do
  114. if buf[i - 1] <> s[i] then begin
  115. GetMem(s2, Length(s) + 1);
  116. StrLCopy(s2, buf, Length(s));
  117. s3 := StrPas(s2);
  118. FreeMem(s2, Length(s) + 1);
  119. RaiseExc('Expected "' + s + '", found "' + s3 + '"');
  120. end;
  121. Inc(buf, Length(s));
  122. end;
  123. function TXMLReader.CheckFor(s: PChar): Boolean;
  124. begin
  125. if buf[0] = #0 then begin
  126. Result := False;
  127. exit;
  128. end;
  129. if StrLComp(buf, s, StrLen(s)) = 0 then begin
  130. Inc(buf, StrLen(s));
  131. Result := True;
  132. end else
  133. Result := False;
  134. end;
  135. function TXMLReader.GetString(ValidChars: TSetOfChar): String;
  136. begin
  137. Result := '';
  138. while buf[0] in ValidChars do begin
  139. Result := Result + buf[0];
  140. Inc(buf);
  141. end;
  142. end;
  143. procedure TXMLReader.ProcessXML(ABuf: PChar; AFilename: String); // [1]
  144. var
  145. LastNodeBeforeDoc: TDOMNode;
  146. begin
  147. buf := ABuf;
  148. BufStart := ABuf;
  149. Filename := AFilename;
  150. doc := TXMLDocument.Create;
  151. ExpectProlog;
  152. LastNodeBeforeDoc := doc.LastChild;
  153. ExpectElement(doc);
  154. ParseMisc(doc);
  155. {
  156. if buf[0] <> #0 then begin
  157. WriteLn('=== Unparsed: ===');
  158. //WriteLn(buf);
  159. WriteLn(StrLen(buf), ' chars');
  160. end;
  161. }
  162. end;
  163. function TXMLReader.GetName(var s: String): Boolean; // [5]
  164. begin
  165. s := '';
  166. if not (buf[0] in (Letter + ['_', ':'])) then begin
  167. Result := False;
  168. exit;
  169. end;
  170. s := buf[0];
  171. Inc(buf);
  172. s := s + GetString(Letter + ['0'..'9', '.', '-', '_', ':']);
  173. Result := True;
  174. end;
  175. function TXMLReader.ExpectName: String; // [5]
  176. begin
  177. if not (buf[0] in (Letter + ['_', ':'])) then
  178. RaiseExc('Expected letter, "_" or ":" for name, found "' + buf[0] + '"');
  179. Result := buf[0];
  180. Inc(buf);
  181. Result := Result + GetString(Letter + ['0'..'9', '.', '-', '_', ':']);
  182. end;
  183. procedure TXMLReader.ExpectAttValue(attr: TDOMAttr); // [10]
  184. var
  185. strdel: array[0..1] of Char;
  186. s: String;
  187. begin
  188. if (buf[0] <> '''') and (buf[0] <> '"') then
  189. RaiseExc('Expected quotation marks');
  190. strdel[0] := buf[0];
  191. strdel[1] := #0;
  192. Inc(buf);
  193. s := '';
  194. while not CheckFor(strdel) do
  195. if not ParseReference(attr) then begin
  196. s := s + buf[0];
  197. Inc(buf);
  198. end else begin
  199. if s <> '' then begin
  200. attr.AppendChild(doc.CreateTextNode(s));
  201. s := '';
  202. end;
  203. end;
  204. if s <> '' then
  205. attr.AppendChild(doc.CreateTextNode(s));
  206. end;
  207. function TXMLReader.ExpectPubidLiteral: String;
  208. begin
  209. Result := '';
  210. if CheckFor('''') then begin
  211. GetString(PubidChars - ['''']);
  212. ExpectString('''');
  213. end else if CheckFor('"') then begin
  214. GetString(PubidChars - ['"']);
  215. ExpectString('"');
  216. end else
  217. RaiseExc('Expected quotation marks');
  218. end;
  219. function TXMLReader.ParseComment(AOwner: TDOMNode): Boolean; // [15]
  220. var
  221. comment: String;
  222. begin
  223. if CheckFor('<!--') then begin
  224. comment := '';
  225. while (buf[0] <> #0) and (buf[1] <> #0) and
  226. ((buf[0] <> '-') or (buf[1] <> '-')) do begin
  227. comment := comment + buf[0];
  228. Inc(buf);
  229. end;
  230. AOwner.AppendChild(doc.CreateComment(comment));
  231. ExpectString('-->');
  232. Result := True;
  233. end else
  234. Result := False;
  235. end;
  236. function TXMLReader.ParsePI: Boolean; // [16]
  237. var
  238. checkbuf: array[0..3] of char;
  239. begin
  240. if CheckFor('<?') then begin
  241. StrLCopy(checkbuf, buf, 3);
  242. if UpCase(StrPas(checkbuf)) = 'XML' then
  243. RaiseExc('"<?xml" processing instruction not allowed here');
  244. ExpectName;
  245. if SkipWhitespace then
  246. while (buf[0] <> #0) and (buf[1] <> #0) and not
  247. ((buf[0] = '?') and (buf[1] = '>')) do Inc(buf);
  248. ExpectString('?>');
  249. Result := True;
  250. end else
  251. Result := False;
  252. end;
  253. procedure TXMLReader.ExpectProlog; // [22]
  254. procedure ParseVersionNum;
  255. begin
  256. doc.XMLVersion :=
  257. GetString(['a'..'z', 'A'..'Z', '0'..'9', '_', '.', ':', '-']);
  258. end;
  259. begin
  260. if CheckFor('<?xml') then begin
  261. // '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  262. // VersionInfo: S 'version' Eq (' VersionNum ' | " VersionNum ")
  263. SkipWhitespace;
  264. ExpectString('version');
  265. ParseEq;
  266. if buf[0] = '''' then begin
  267. Inc(buf);
  268. ParseVersionNum;
  269. ExpectString('''');
  270. end else if buf[0] = '"' then begin
  271. Inc(buf);
  272. ParseVersionNum;
  273. ExpectString('"');
  274. end else
  275. RaiseExc('Expected single or double quotation mark');
  276. // EncodingDecl?
  277. ParseEncodingDecl;
  278. // SDDecl?
  279. SkipWhitespace;
  280. if CheckFor('standalone') then begin
  281. ExpectEq;
  282. if buf[0] = '''' then begin
  283. Inc(buf);
  284. if not (CheckFor('yes''') or CheckFor('no''')) then
  285. RaiseExc('Expected ''yes'' or ''no''');
  286. end else if buf[0] = '''' then begin
  287. Inc(buf);
  288. if not (CheckFor('yes"') or CheckFor('no"')) then
  289. RaiseExc('Expected "yes" or "no"');
  290. end;
  291. SkipWhitespace;
  292. end;
  293. ExpectString('?>');
  294. end;
  295. // Check for "Misc*"
  296. ParseMisc(doc);
  297. // Check for "(doctypedecl Misc*)?" [28]
  298. if CheckFor('<!DOCTYPE') then begin
  299. SkipWhitespace;
  300. ExpectName;
  301. SkipWhitespace;
  302. ParseExternalID;
  303. SkipWhitespace;
  304. if CheckFor('[') then begin
  305. repeat
  306. SkipWhitespace;
  307. until not (ParseMarkupDecl or ParsePEReference);
  308. ExpectString(']');
  309. SkipWhitespace;
  310. end;
  311. ExpectString('>');
  312. ParseMisc(doc);
  313. end;
  314. end;
  315. function TXMLReader.ParseEq: Boolean; // [25]
  316. var
  317. savedbuf: PChar;
  318. begin
  319. savedbuf := buf;
  320. SkipWhitespace;
  321. if buf[0] = '=' then begin
  322. Inc(buf);
  323. SkipWhitespace;
  324. Result := True;
  325. end else begin
  326. buf := savedbuf;
  327. Result := False;
  328. end;
  329. end;
  330. procedure TXMLReader.ExpectEq;
  331. begin
  332. if not ParseEq then
  333. RaiseExc('Expected "="');
  334. end;
  335. // Parse "Misc*":
  336. // Misc ::= Comment | PI | S
  337. procedure TXMLReader.ParseMisc(AOwner: TDOMNode); // [27]
  338. begin
  339. repeat
  340. SkipWhitespace;
  341. until not (ParseComment(AOwner) or ParsePI);
  342. end;
  343. function TXMLReader.ParseMarkupDecl: Boolean; // [29]
  344. function ParseElementDecl: Boolean; // [45]
  345. procedure ExpectChoiceOrSeq; // [49], [50]
  346. procedure ExpectCP; // [48]
  347. begin
  348. if CheckFor('(') then
  349. ExpectChoiceOrSeq
  350. else
  351. ExpectName;
  352. if CheckFor('?') then
  353. else if CheckFor('*') then
  354. else if CheckFor('+') then;
  355. end;
  356. var
  357. delimiter: Char;
  358. begin
  359. SkipWhitespace;
  360. ExpectCP;
  361. SkipWhitespace;
  362. delimiter := #0;
  363. while not CheckFor(')') do begin
  364. if delimiter = #0 then begin
  365. if (buf[0] = '|') or (buf[0] = ',') then
  366. delimiter := buf[0]
  367. else
  368. RaiseExc('Expected "|" or ","');
  369. Inc(buf);
  370. end else
  371. ExpectString(delimiter);
  372. SkipWhitespace;
  373. ExpectCP;
  374. end;
  375. end;
  376. begin
  377. if CheckFor('<!ELEMENT') then begin
  378. ExpectWhitespace;
  379. ExpectName;
  380. ExpectWhitespace;
  381. // Get contentspec [46]
  382. if CheckFor('EMPTY') then
  383. else if CheckFor('ANY') then
  384. else if CheckFor('(') then begin
  385. SkipWhitespace;
  386. if CheckFor('#PCDATA') then begin
  387. // Parse Mixed section [51]
  388. SkipWhitespace;
  389. if not CheckFor(')') then
  390. repeat
  391. ExpectString('|');
  392. SkipWhitespace;
  393. ExpectName;
  394. until CheckFor(')*');
  395. end else begin
  396. // Parse Children section [47]
  397. ExpectChoiceOrSeq;
  398. if CheckFor('?') then
  399. else if CheckFor('*') then
  400. else if CheckFor('+') then;
  401. end;
  402. end else
  403. RaiseExc('Invalid content specification');
  404. SkipWhitespace;
  405. ExpectString('>');
  406. Result := True;
  407. end else
  408. Result := False;
  409. end;
  410. function ParseAttlistDecl: Boolean; // [52]
  411. var
  412. attr: TDOMAttr;
  413. begin
  414. if CheckFor('<!ATTLIST') then begin
  415. ExpectWhitespace;
  416. ExpectName;
  417. SkipWhitespace;
  418. while not CheckFor('>') do begin
  419. ExpectName;
  420. ExpectWhitespace;
  421. // Get AttType [54], [55], [56]
  422. if CheckFor('CDATA') then
  423. else if CheckFor('ID') then
  424. else if CheckFor('IDREF') then
  425. else if CheckFor('IDREFS') then
  426. else if CheckFor('ENTITTY') then
  427. else if CheckFor('ENTITIES') then
  428. else if CheckFor('NMTOKEN') then
  429. else if CheckFor('NMTOKENS') then
  430. else if CheckFor('NOTATION') then begin // [57], [58]
  431. ExpectWhitespace;
  432. ExpectString('(');
  433. SkipWhitespace;
  434. ExpectName;
  435. SkipWhitespace;
  436. while not CheckFor(')') do begin
  437. ExpectString('|');
  438. SkipWhitespace;
  439. ExpectName;
  440. SkipWhitespace;
  441. end;
  442. end else if CheckFor('(') then begin // [59]
  443. SkipWhitespace;
  444. GetString(Nmtoken);
  445. SkipWhitespace;
  446. while not CheckFor(')') do begin
  447. ExpectString('|');
  448. SkipWhitespace;
  449. GetString(Nmtoken);
  450. SkipWhitespace;
  451. end;
  452. end else
  453. RaiseExc('Invalid tokenized type');
  454. ExpectWhitespace;
  455. // Get DefaultDecl [60]
  456. if CheckFor('#REQUIRED') then
  457. else if CheckFor('#IMPLIED') then
  458. else begin
  459. if CheckFor('#FIXED') then
  460. SkipWhitespace;
  461. attr := doc.CreateAttribute('');
  462. ExpectAttValue(attr);
  463. end;
  464. SkipWhitespace;
  465. end;
  466. Result := True;
  467. end else
  468. Result := False;
  469. end;
  470. function ParseEntityDecl: Boolean; // [70]
  471. var
  472. NewEntity: TDOMEntity;
  473. function ParseEntityValue: Boolean; // [9]
  474. var
  475. strdel: array[0..1] of Char;
  476. begin
  477. if (buf[0] <> '''') and (buf[0] <> '"') then begin
  478. Result := False;
  479. exit;
  480. end;
  481. strdel[0] := buf[0];
  482. strdel[1] := #0;
  483. Inc(buf);
  484. while not CheckFor(strdel) do
  485. if ParsePEReference then
  486. else if ParseReference(NewEntity) then
  487. else begin
  488. Inc(buf); // Normal haracter
  489. end;
  490. Result := True;
  491. end;
  492. begin
  493. if CheckFor('<!ENTITY') then begin
  494. ExpectWhitespace;
  495. if CheckFor('%') then begin // [72]
  496. ExpectWhitespace;
  497. NewEntity := doc.CreateEntity(ExpectName);
  498. ExpectWhitespace;
  499. // Get PEDef [74]
  500. if ParseEntityValue then
  501. else if ParseExternalID then
  502. else
  503. RaiseExc('Expected entity value or external ID');
  504. end else begin // [71]
  505. NewEntity := doc.CreateEntity(ExpectName);
  506. ExpectWhitespace;
  507. // Get EntityDef [73]
  508. if ParseEntityValue then
  509. else begin
  510. ExpectExternalID;
  511. // Get NDataDecl [76]
  512. ExpectWhitespace;
  513. ExpectString('NDATA');
  514. ExpectWhitespace;
  515. ExpectName;
  516. end;
  517. end;
  518. SkipWhitespace;
  519. ExpectString('>');
  520. Result := True;
  521. end else
  522. Result := False;
  523. end;
  524. function ParseNotationDecl: Boolean; // [82]
  525. begin
  526. if CheckFor('<!NOTATION') then begin
  527. ExpectWhitespace;
  528. ExpectName;
  529. ExpectWhitespace;
  530. if ParseExternalID then
  531. else if CheckFor('PUBLIC') then begin // [83]
  532. ExpectWhitespace;
  533. ExpectPubidLiteral;
  534. end else
  535. RaiseExc('Expected external or public ID');
  536. SkipWhitespace;
  537. ExpectString('>');
  538. Result := True;
  539. end else
  540. Result := False;
  541. end;
  542. begin
  543. Result := False;
  544. while ParseElementDecl or ParseAttlistDecl or ParseEntityDecl or
  545. ParseNotationDecl or ParsePI or ParseComment(doc) or SkipWhitespace do
  546. Result := True;
  547. end;
  548. procedure TXMLReader.ProcessDTD(ABuf: PChar; AFilename: String);
  549. begin
  550. buf := ABuf;
  551. BufStart := ABuf;
  552. Filename := AFilename;
  553. doc := TXMLDocument.Create;
  554. ParseMarkupDecl;
  555. {
  556. if buf[0] <> #0 then begin
  557. WriteLn('=== Unparsed: ===');
  558. //WriteLn(buf);
  559. WriteLn(StrLen(buf), ' chars');
  560. end;
  561. }
  562. end;
  563. function TXMLReader.ParseElement(AOwner: TDOMNode): Boolean; // [39] [40] [44]
  564. var
  565. NewElem: TDOMElement;
  566. function ParseCharData: Boolean; // [14]
  567. var
  568. s: String;
  569. i: Integer;
  570. begin
  571. s := '';
  572. while not (buf[0] in [#0, '<', '&']) do begin
  573. s := s + buf[0];
  574. Inc(buf);
  575. end;
  576. if s <> '' then begin
  577. // Strip whitespace from end of s
  578. i := Length(s);
  579. while (i > 0) and (s[i] in [#10, #13, ' ']) do Dec(i);
  580. NewElem.AppendChild(doc.CreateTextNode(Copy(s, 1, i)));
  581. Result := True;
  582. end else
  583. Result := False;
  584. end;
  585. function ParseCDSect: Boolean; // [18]
  586. var
  587. cdata: String;
  588. begin
  589. if CheckFor('<![CDATA[') then begin
  590. cdata := '';
  591. while not CheckFor(']]>') do begin
  592. cdata := cdata + buf[0];
  593. Inc(buf);
  594. end;
  595. NewElem.AppendChild(doc.CreateCDATASection(cdata));
  596. Result := True;
  597. end else
  598. Result := False;
  599. end;
  600. var
  601. IsEmpty: Boolean;
  602. name: String;
  603. oldpos: PChar;
  604. attr: TDOMAttr;
  605. begin
  606. oldpos := buf;
  607. if CheckFor('<') then begin
  608. if not GetName(name) then begin
  609. buf := oldpos;
  610. Result := False;
  611. exit;
  612. end;
  613. NewElem := doc.CreateElement(name);
  614. AOwner.AppendChild(NewElem);
  615. SkipWhitespace;
  616. IsEmpty := False;
  617. while True do begin
  618. if CheckFor('/>') then begin
  619. IsEmpty := True;
  620. break;
  621. end;
  622. if CheckFor('>') then break;
  623. // Get Attribute [41]
  624. attr := doc.CreateAttribute(ExpectName);
  625. NewElem.Attributes.SetNamedItem(attr);
  626. ExpectEq;
  627. ExpectAttValue(attr);
  628. SkipWhitespace;
  629. end;
  630. if not IsEmpty then begin
  631. // Get content
  632. while SkipWhitespace or ParseCharData or ParseCDSect or ParsePI or
  633. ParseComment(NewElem) or ParseElement(NewElem) or
  634. ParseReference(NewElem) do;
  635. // Get ETag [42]
  636. ExpectString('</');
  637. ExpectName;
  638. SkipWhitespace;
  639. ExpectString('>');
  640. end;
  641. Result := True;
  642. end else
  643. Result := False;
  644. end;
  645. procedure TXMLReader.ExpectElement(AOwner: TDOMNode);
  646. begin
  647. if not ParseElement(AOwner) then
  648. RaiseExc('Expected element');
  649. end;
  650. function TXMLReader.ParsePEReference: Boolean; // [69]
  651. begin
  652. if CheckFor('%') then begin
  653. ExpectName;
  654. ExpectString(';');
  655. Result := True;
  656. end else
  657. Result := False;
  658. end;
  659. function TXMLReader.ParseReference(AOwner: TDOMNode): Boolean; // [67] [68]
  660. begin
  661. if not CheckFor('&') then begin
  662. Result := False;
  663. exit;
  664. end;
  665. if CheckFor('#') then begin // Test for CharRef [66]
  666. if CheckFor('x') then begin
  667. // *** there must be at leat one digit
  668. while buf[0] in ['0'..'9', 'a'..'f', 'A'..'F'] do Inc(buf);
  669. end else
  670. // *** there must be at leat one digit
  671. while buf[0] in ['0'..'9'] do Inc(buf);
  672. end else
  673. AOwner.AppendChild(doc.CreateEntityReference(ExpectName));
  674. ExpectString(';');
  675. Result := True;
  676. end;
  677. procedure TXMLReader.ExpectReference(AOwner: TDOMNode);
  678. begin
  679. if not ParseReference(AOwner) then
  680. RaiseExc('Expected reference ("&Name;" or "%Name;")');
  681. end;
  682. function TXMLReader.ParseExternalID: Boolean; // [75]
  683. function GetSystemLiteral: String;
  684. begin
  685. if buf[0] = '''' then begin
  686. Inc(buf);
  687. Result := '';
  688. while (buf[0] <> '''') and (buf[0] <> #0) do begin
  689. Result := Result + buf[0];
  690. Inc(buf);
  691. end;
  692. ExpectString('''');
  693. end else if buf[0] = '"' then begin
  694. Inc(buf);
  695. Result := '';
  696. while (buf[0] <> '"') and (buf[0] <> #0) do begin
  697. Result := Result + buf[0];
  698. Inc(buf);
  699. end;
  700. ExpectString('"');
  701. end;
  702. end;
  703. begin
  704. if CheckFor('SYSTEM') then begin
  705. ExpectWhitespace;
  706. GetSystemLiteral;
  707. Result := True;
  708. end else if CheckFor('PUBLIC') then begin
  709. ExpectWhitespace;
  710. ExpectPubidLiteral;
  711. ExpectWhitespace;
  712. GetSystemLiteral;
  713. Result := True;
  714. end else
  715. Result := False;
  716. end;
  717. procedure TXMLReader.ExpectExternalID;
  718. begin
  719. if not ParseExternalID then
  720. RaiseExc('Expected external ID');
  721. end;
  722. function TXMLReader.ParseEncodingDecl: String; // [80]
  723. function ParseEncName: String;
  724. begin
  725. if not (buf[0] in ['A'..'Z', 'a'..'z']) then
  726. RaiseExc('Expected character (A-Z, a-z)');
  727. Result := buf[0];
  728. Inc(buf);
  729. Result := Result + GetString(['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']);
  730. end;
  731. begin
  732. Result := '';
  733. SkipWhitespace;
  734. if CheckFor('encoding') then begin
  735. ExpectEq;
  736. if buf[0] = '''' then begin
  737. Inc(buf);
  738. Result := ParseEncName;
  739. ExpectString('''');
  740. end else if buf[0] = '"' then begin
  741. Inc(buf);
  742. Result := ParseEncName;
  743. ExpectString('"');
  744. end;
  745. end;
  746. end;
  747. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: File);
  748. var
  749. reader: TXMLReader;
  750. buf: PChar;
  751. BufSize: LongInt;
  752. begin
  753. ADoc := nil;
  754. BufSize := FileSize(f) + 1;
  755. if BufSize <= 1 then exit;
  756. GetMem(buf, BufSize);
  757. BlockRead(f, buf^, BufSize - 1);
  758. buf[BufSize - 1] := #0;
  759. reader := TXMLReader.Create;
  760. reader.ProcessXML(buf, Filerec(f).name);
  761. FreeMem(buf, BufSize);
  762. ADoc := reader.doc;
  763. reader.Free;
  764. end;
  765. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream;
  766. const AFilename: String);
  767. var
  768. reader: TXMLReader;
  769. buf: PChar;
  770. begin
  771. ADoc := nil;
  772. if f.Size = 0 then exit;
  773. GetMem(buf, f.Size + 1);
  774. f.Read(buf^, f.Size);
  775. buf[f.Size] := #0;
  776. reader := TXMLReader.Create;
  777. reader.ProcessXML(buf, AFilename);
  778. FreeMem(buf, f.Size + 1);
  779. ADoc := reader.doc;
  780. reader.Free;
  781. end;
  782. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream);
  783. begin
  784. ReadXMLFile(ADoc, f, '<Stream>');
  785. end;
  786. procedure ReadXMLFile(var ADoc: TXMLDocument; const AFilename: String);
  787. var
  788. stream: TFileStream;
  789. begin
  790. ADoc := nil;
  791. stream := TFileStream.Create(AFilename, fmOpenRead);
  792. try
  793. ReadXMLFile(ADoc, stream, AFilename);
  794. finally
  795. stream.Free;
  796. end;
  797. end;
  798. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: File);
  799. var
  800. reader: TXMLReader;
  801. buf: PChar;
  802. BufSize: LongInt;
  803. begin
  804. ADoc := nil;
  805. BufSize := FileSize(f) + 1;
  806. if BufSize <= 1 then exit;
  807. GetMem(buf, BufSize + 1);
  808. BlockRead(f, buf^, BufSize - 1);
  809. buf[BufSize - 1] := #0;
  810. reader := TXMLReader.Create;
  811. reader.ProcessDTD(buf, Filerec(f).name);
  812. FreeMem(buf, BufSize);
  813. ADoc := reader.doc;
  814. reader.Free;
  815. end;
  816. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream;
  817. const AFilename: String);
  818. var
  819. reader: TXMLReader;
  820. buf: PChar;
  821. begin
  822. ADoc := nil;
  823. if f.Size = 0 then exit;
  824. GetMem(buf, f.Size + 1);
  825. f.Read(buf^, f.Size);
  826. buf[f.Size] := #0;
  827. reader := TXMLReader.Create;
  828. reader.ProcessDTD(buf, AFilename);
  829. FreeMem(buf, f.Size + 1);
  830. ADoc := reader.doc;
  831. reader.Free;
  832. end;
  833. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream);
  834. begin
  835. ReadDTDFile(ADoc, f, '<Stream>');
  836. end;
  837. procedure ReadDTDFile(var ADoc: TXMLDocument; const AFilename: String);
  838. var
  839. stream: TFileStream;
  840. begin
  841. ADoc := nil;
  842. stream := TFileStream.Create(AFilename, fmOpenRead);
  843. try
  844. ReadDTDFile(ADoc, stream, AFilename);
  845. finally
  846. stream.Free;
  847. end;
  848. end;
  849. end.
  850. {
  851. $Log$
  852. Revision 1.8 1999-08-10 15:39:59 michael
  853. * restored previous setting
  854. Revision 1.6 1999/07/27 13:01:59 peter
  855. * remove filerec.inc, it was missing from sysutils! You shouldn't need
  856. to compile with -Irtl/inc !!
  857. Revision 1.5 1999/07/25 16:24:14 michael
  858. + Fixes from Sebastiam Guenther - more error-proof
  859. Revision 1.4 1999/07/11 20:20:12 michael
  860. + Fixes from Sebastian Guenther
  861. Revision 1.3 1999/07/09 21:05:51 michael
  862. + fixes from Guenther Sebastian
  863. Revision 1.2 1999/07/09 10:42:50 michael
  864. * Removed debug statements
  865. Revision 1.1 1999/07/09 08:35:09 michael
  866. + Initial implementation by Sebastian Guenther
  867. }
  868. --------------ECFEA19D0E6E5FF5CDAF6681--)