xmlread.pp 24 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046
  1. {
  2. $Id$
  3. This file is part of the Free Component Library
  4. Copyright (c) 1999-2000 by Sebastian Guenther
  5. XML reading routines.
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$MODE objfpc}
  13. {$H+}
  14. unit XMLRead;
  15. interface
  16. uses SysUtils, Classes, DOM;
  17. type
  18. EXMLReadError = class(Exception);
  19. procedure ReadXMLFile(var ADoc: TXMLDocument; const AFilename: String);
  20. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: File);
  21. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream);
  22. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream;
  23. const AFilename: String);
  24. procedure ReadDTDFile(var ADoc: TXMLDocument; const AFilename: String);
  25. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: File);
  26. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream);
  27. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream;
  28. const AFilename: String);
  29. // =======================================================
  30. implementation
  31. const
  32. Letter = ['A'..'Z', 'a'..'z'];
  33. Digit = ['0'..'9'];
  34. PubidChars: set of Char = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
  35. '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
  36. '#', '@', '$', '_', '%'];
  37. NmToken: set of Char = Letter + Digit + ['.', '-', '_', ':'];
  38. type
  39. TXMLReaderDocument = class(TXMLDocument)
  40. public
  41. procedure SetDocType(ADocType: TDOMDocumentType);
  42. end;
  43. TXMLReaderDocumentType = class(TDOMDocumentType)
  44. public
  45. constructor Create(ADocument: TXMLReaderDocument);
  46. property Name: DOMString read FNodeName write FNodeName;
  47. end;
  48. TSetOfChar = set of Char;
  49. TXMLReader = class
  50. protected
  51. buf, BufStart: PChar;
  52. Filename: String;
  53. procedure RaiseExc(descr: String);
  54. function SkipWhitespace: Boolean;
  55. procedure ExpectWhitespace;
  56. procedure ExpectString(s: String);
  57. function CheckFor(s: PChar): Boolean;
  58. function GetString(ValidChars: TSetOfChar): String;
  59. function GetName(var s: String): Boolean;
  60. function ExpectName: String; // [5]
  61. procedure ExpectAttValue(attr: TDOMAttr); // [10]
  62. function ExpectPubidLiteral: String; // [12]
  63. function ParseComment(AOwner: TDOMNode): Boolean; // [15]
  64. function ParsePI: Boolean; // [16]
  65. procedure ExpectProlog; // [22]
  66. function ParseEq: Boolean; // [25]
  67. procedure ExpectEq;
  68. procedure ParseMisc(AOwner: TDOMNode); // [27]
  69. function ParseMarkupDecl: Boolean; // [29]
  70. function ParseElement(AOwner: TDOMNode): Boolean; // [39]
  71. procedure ExpectElement(AOwner: TDOMNode);
  72. function ParseReference(AOwner: TDOMNode): Boolean; // [67]
  73. procedure ExpectReference(AOwner: TDOMNode);
  74. function ParsePEReference: Boolean; // [69]
  75. function ParseExternalID: Boolean; // [75]
  76. procedure ExpectExternalID;
  77. function ParseEncodingDecl: String; // [80]
  78. public
  79. doc: TXMLReaderDocument;
  80. procedure ProcessXML(ABuf: PChar; AFilename: String); // [1]
  81. procedure ProcessDTD(ABuf: PChar; AFilename: String); // ([29])
  82. end;
  83. procedure TXMLReaderDocument.SetDocType(ADocType: TDOMDocumentType);
  84. begin
  85. FDocType := ADocType;
  86. end;
  87. constructor TXMLReaderDocumentType.Create(ADocument: TXMLReaderDocument);
  88. begin
  89. inherited Create(ADocument);
  90. end;
  91. procedure TXMLReader.RaiseExc(descr: String);
  92. var
  93. apos: PChar;
  94. x, y: Integer;
  95. begin
  96. // find out the line in which the error occured
  97. apos := BufStart;
  98. x := 1;
  99. y := 1;
  100. while apos < buf do begin
  101. if apos[0] = #10 then begin
  102. Inc(y);
  103. x := 1;
  104. end else
  105. Inc(x);
  106. Inc(apos);
  107. end;
  108. raise EXMLReadError.Create('In ' + Filename + ' (line ' + IntToStr(y) + ' pos ' +
  109. IntToStr(x) + '): ' + descr);
  110. end;
  111. function TXMLReader.SkipWhitespace: Boolean;
  112. begin
  113. Result := False;
  114. while buf[0] in [#9, #10, #13, ' '] do begin
  115. Inc(buf);
  116. Result := True;
  117. end;
  118. end;
  119. procedure TXMLReader.ExpectWhitespace;
  120. begin
  121. if not SkipWhitespace then
  122. RaiseExc('Expected whitespace');
  123. end;
  124. procedure TXMLReader.ExpectString(s: String);
  125. var
  126. i: Integer;
  127. s2: PChar;
  128. s3: String;
  129. begin
  130. for i := 1 to Length(s) do
  131. if buf[i - 1] <> s[i] then begin
  132. GetMem(s2, Length(s) + 1);
  133. StrLCopy(s2, buf, Length(s));
  134. s3 := StrPas(s2);
  135. FreeMem(s2, Length(s) + 1);
  136. RaiseExc('Expected "' + s + '", found "' + s3 + '"');
  137. end;
  138. Inc(buf, Length(s));
  139. end;
  140. function TXMLReader.CheckFor(s: PChar): Boolean;
  141. begin
  142. if buf[0] = #0 then begin
  143. Result := False;
  144. exit;
  145. end;
  146. if StrLComp(buf, s, StrLen(s)) = 0 then begin
  147. Inc(buf, StrLen(s));
  148. Result := True;
  149. end else
  150. Result := False;
  151. end;
  152. function TXMLReader.GetString(ValidChars: TSetOfChar): String;
  153. begin
  154. SetLength(Result, 0);
  155. while buf[0] in ValidChars do begin
  156. Result := Result + buf[0];
  157. Inc(buf);
  158. end;
  159. end;
  160. procedure TXMLReader.ProcessXML(ABuf: PChar; AFilename: String); // [1]
  161. var
  162. LastNodeBeforeDoc: TDOMNode;
  163. begin
  164. buf := ABuf;
  165. BufStart := ABuf;
  166. Filename := AFilename;
  167. doc := TXMLReaderDocument.Create;
  168. ExpectProlog;
  169. LastNodeBeforeDoc := doc.LastChild;
  170. ExpectElement(doc);
  171. ParseMisc(doc);
  172. if buf[0] <> #0 then
  173. RaiseExc('Text after end of document element found');
  174. {
  175. if buf[0] <> #0 then begin
  176. WriteLn('=== Unparsed: ===');
  177. //WriteLn(buf);
  178. WriteLn(StrLen(buf), ' chars');
  179. end;
  180. }
  181. end;
  182. function TXMLReader.GetName(var s: String): Boolean; // [5]
  183. begin
  184. SetLength(s, 0);
  185. if not (buf[0] in (Letter + ['_', ':'])) then begin
  186. Result := False;
  187. exit;
  188. end;
  189. s := buf[0];
  190. Inc(buf);
  191. s := s + GetString(Letter + ['0'..'9', '.', '-', '_', ':']);
  192. Result := True;
  193. end;
  194. function TXMLReader.ExpectName: String; // [5]
  195. begin
  196. if not (buf[0] in (Letter + ['_', ':'])) then
  197. RaiseExc('Expected letter, "_" or ":" for name, found "' + buf[0] + '"');
  198. Result := buf[0];
  199. Inc(buf);
  200. Result := Result + GetString(Letter + ['0'..'9', '.', '-', '_', ':']);
  201. end;
  202. procedure TXMLReader.ExpectAttValue(attr: TDOMAttr); // [10]
  203. var
  204. strdel: array[0..1] of Char;
  205. s: String;
  206. begin
  207. if (buf[0] <> '''') and (buf[0] <> '"') then
  208. RaiseExc('Expected quotation marks');
  209. strdel[0] := buf[0];
  210. strdel[1] := #0;
  211. Inc(buf);
  212. SetLength(s, 0);
  213. while not CheckFor(strdel) do
  214. if not ParseReference(attr) then begin
  215. s := s + buf[0];
  216. Inc(buf);
  217. end else begin
  218. if Length(s) > 0 then begin
  219. attr.AppendChild(doc.CreateTextNode(s));
  220. SetLength(s, 0);
  221. end;
  222. end;
  223. if Length(s) > 0 then
  224. //attr.AppendChild(doc.CreateTextNode(s));
  225. attr.NodeValue := s;
  226. end;
  227. function TXMLReader.ExpectPubidLiteral: String;
  228. begin
  229. SetLength(Result, 0);
  230. if CheckFor('''') then begin
  231. GetString(PubidChars - ['''']);
  232. ExpectString('''');
  233. end else if CheckFor('"') then begin
  234. GetString(PubidChars - ['"']);
  235. ExpectString('"');
  236. end else
  237. RaiseExc('Expected quotation marks');
  238. end;
  239. function TXMLReader.ParseComment(AOwner: TDOMNode): Boolean; // [15]
  240. var
  241. comment: String;
  242. begin
  243. if CheckFor('<!--') then begin
  244. SetLength(comment, 0);
  245. while (buf[0] <> #0) and (buf[1] <> #0) and
  246. ((buf[0] <> '-') or (buf[1] <> '-')) do begin
  247. comment := comment + buf[0];
  248. Inc(buf);
  249. end;
  250. AOwner.AppendChild(doc.CreateComment(comment));
  251. ExpectString('-->');
  252. Result := True;
  253. end else
  254. Result := False;
  255. end;
  256. function TXMLReader.ParsePI: Boolean; // [16]
  257. var
  258. checkbuf: array[0..3] of char;
  259. begin
  260. if CheckFor('<?') then begin
  261. StrLCopy(checkbuf, buf, 3);
  262. if UpCase(StrPas(checkbuf)) = 'XML' then
  263. RaiseExc('"<?xml" processing instruction not allowed here');
  264. ExpectName;
  265. if SkipWhitespace then
  266. while (buf[0] <> #0) and (buf[1] <> #0) and not
  267. ((buf[0] = '?') and (buf[1] = '>')) do Inc(buf);
  268. ExpectString('?>');
  269. Result := True;
  270. end else
  271. Result := False;
  272. end;
  273. procedure TXMLReader.ExpectProlog; // [22]
  274. procedure ParseVersionNum;
  275. begin
  276. doc.XMLVersion :=
  277. GetString(['a'..'z', 'A'..'Z', '0'..'9', '_', '.', ':', '-']);
  278. end;
  279. procedure ParseDoctypeDecls;
  280. begin
  281. repeat
  282. SkipWhitespace;
  283. until not (ParseMarkupDecl or ParsePEReference);
  284. ExpectString(']');
  285. end;
  286. var
  287. DocType: TXMLReaderDocumentType;
  288. begin
  289. if CheckFor('<?xml') then
  290. begin
  291. // '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  292. // VersionInfo: S 'version' Eq (' VersionNum ' | " VersionNum ")
  293. SkipWhitespace;
  294. ExpectString('version');
  295. ParseEq;
  296. if buf[0] = '''' then
  297. begin
  298. Inc(buf);
  299. ParseVersionNum;
  300. ExpectString('''');
  301. end else if buf[0] = '"' then
  302. begin
  303. Inc(buf);
  304. ParseVersionNum;
  305. ExpectString('"');
  306. end else
  307. RaiseExc('Expected single or double quotation mark');
  308. // EncodingDecl?
  309. ParseEncodingDecl;
  310. // SDDecl?
  311. SkipWhitespace;
  312. if CheckFor('standalone') then
  313. begin
  314. ExpectEq;
  315. if buf[0] = '''' then
  316. begin
  317. Inc(buf);
  318. if not (CheckFor('yes''') or CheckFor('no''')) then
  319. RaiseExc('Expected ''yes'' or ''no''');
  320. end else if buf[0] = '''' then
  321. begin
  322. Inc(buf);
  323. if not (CheckFor('yes"') or CheckFor('no"')) then
  324. RaiseExc('Expected "yes" or "no"');
  325. end;
  326. SkipWhitespace;
  327. end;
  328. ExpectString('?>');
  329. end;
  330. // Check for "Misc*"
  331. ParseMisc(doc);
  332. // Check for "(doctypedecl Misc*)?" [28]
  333. if CheckFor('<!DOCTYPE') then
  334. begin
  335. DocType := TXMLReaderDocumentType.Create(doc);
  336. doc.SetDocType(DocType);
  337. SkipWhitespace;
  338. DocType.Name := ExpectName;
  339. SkipWhitespace;
  340. if CheckFor('[') then
  341. begin
  342. ParseDoctypeDecls;
  343. SkipWhitespace;
  344. ExpectString('>');
  345. end else if not CheckFor('>') then
  346. begin
  347. ParseExternalID;
  348. SkipWhitespace;
  349. if CheckFor('[') then
  350. begin
  351. ParseDoctypeDecls;
  352. SkipWhitespace;
  353. end;
  354. ExpectString('>');
  355. end;
  356. ParseMisc(doc);
  357. end;
  358. end;
  359. function TXMLReader.ParseEq: Boolean; // [25]
  360. var
  361. savedbuf: PChar;
  362. begin
  363. savedbuf := buf;
  364. SkipWhitespace;
  365. if buf[0] = '=' then begin
  366. Inc(buf);
  367. SkipWhitespace;
  368. Result := True;
  369. end else begin
  370. buf := savedbuf;
  371. Result := False;
  372. end;
  373. end;
  374. procedure TXMLReader.ExpectEq;
  375. begin
  376. if not ParseEq then
  377. RaiseExc('Expected "="');
  378. end;
  379. // Parse "Misc*":
  380. // Misc ::= Comment | PI | S
  381. procedure TXMLReader.ParseMisc(AOwner: TDOMNode); // [27]
  382. begin
  383. repeat
  384. SkipWhitespace;
  385. until not (ParseComment(AOwner) or ParsePI);
  386. end;
  387. function TXMLReader.ParseMarkupDecl: Boolean; // [29]
  388. function ParseElementDecl: Boolean; // [45]
  389. procedure ExpectChoiceOrSeq; // [49], [50]
  390. procedure ExpectCP; // [48]
  391. begin
  392. if CheckFor('(') then
  393. ExpectChoiceOrSeq
  394. else
  395. ExpectName;
  396. if CheckFor('?') then
  397. else if CheckFor('*') then
  398. else if CheckFor('+') then;
  399. end;
  400. var
  401. delimiter: Char;
  402. begin
  403. SkipWhitespace;
  404. ExpectCP;
  405. SkipWhitespace;
  406. delimiter := #0;
  407. while not CheckFor(')') do begin
  408. if delimiter = #0 then begin
  409. if (buf[0] = '|') or (buf[0] = ',') then
  410. delimiter := buf[0]
  411. else
  412. RaiseExc('Expected "|" or ","');
  413. Inc(buf);
  414. end else
  415. ExpectString(delimiter);
  416. SkipWhitespace;
  417. ExpectCP;
  418. end;
  419. end;
  420. begin
  421. if CheckFor('<!ELEMENT') then begin
  422. ExpectWhitespace;
  423. ExpectName;
  424. ExpectWhitespace;
  425. // Get contentspec [46]
  426. if CheckFor('EMPTY') then
  427. else if CheckFor('ANY') then
  428. else if CheckFor('(') then begin
  429. SkipWhitespace;
  430. if CheckFor('#PCDATA') then begin
  431. // Parse Mixed section [51]
  432. SkipWhitespace;
  433. if not CheckFor(')') then
  434. repeat
  435. ExpectString('|');
  436. SkipWhitespace;
  437. ExpectName;
  438. until CheckFor(')*');
  439. end else begin
  440. // Parse Children section [47]
  441. ExpectChoiceOrSeq;
  442. if CheckFor('?') then
  443. else if CheckFor('*') then
  444. else if CheckFor('+') then;
  445. end;
  446. end else
  447. RaiseExc('Invalid content specification');
  448. SkipWhitespace;
  449. ExpectString('>');
  450. Result := True;
  451. end else
  452. Result := False;
  453. end;
  454. function ParseAttlistDecl: Boolean; // [52]
  455. var
  456. attr: TDOMAttr;
  457. begin
  458. if CheckFor('<!ATTLIST') then begin
  459. ExpectWhitespace;
  460. ExpectName;
  461. SkipWhitespace;
  462. while not CheckFor('>') do begin
  463. ExpectName;
  464. ExpectWhitespace;
  465. // Get AttType [54], [55], [56]
  466. if CheckFor('CDATA') then
  467. else if CheckFor('ID') then
  468. else if CheckFor('IDREF') then
  469. else if CheckFor('IDREFS') then
  470. else if CheckFor('ENTITTY') then
  471. else if CheckFor('ENTITIES') then
  472. else if CheckFor('NMTOKEN') then
  473. else if CheckFor('NMTOKENS') then
  474. else if CheckFor('NOTATION') then begin // [57], [58]
  475. ExpectWhitespace;
  476. ExpectString('(');
  477. SkipWhitespace;
  478. ExpectName;
  479. SkipWhitespace;
  480. while not CheckFor(')') do begin
  481. ExpectString('|');
  482. SkipWhitespace;
  483. ExpectName;
  484. SkipWhitespace;
  485. end;
  486. end else if CheckFor('(') then begin // [59]
  487. SkipWhitespace;
  488. GetString(Nmtoken);
  489. SkipWhitespace;
  490. while not CheckFor(')') do begin
  491. ExpectString('|');
  492. SkipWhitespace;
  493. GetString(Nmtoken);
  494. SkipWhitespace;
  495. end;
  496. end else
  497. RaiseExc('Invalid tokenized type');
  498. ExpectWhitespace;
  499. // Get DefaultDecl [60]
  500. if CheckFor('#REQUIRED') then
  501. else if CheckFor('#IMPLIED') then
  502. else begin
  503. if CheckFor('#FIXED') then
  504. SkipWhitespace;
  505. attr := doc.CreateAttribute('');
  506. ExpectAttValue(attr);
  507. end;
  508. SkipWhitespace;
  509. end;
  510. Result := True;
  511. end else
  512. Result := False;
  513. end;
  514. function ParseEntityDecl: Boolean; // [70]
  515. var
  516. NewEntity: TDOMEntity;
  517. function ParseEntityValue: Boolean; // [9]
  518. var
  519. strdel: array[0..1] of Char;
  520. begin
  521. if (buf[0] <> '''') and (buf[0] <> '"') then begin
  522. Result := False;
  523. exit;
  524. end;
  525. strdel[0] := buf[0];
  526. strdel[1] := #0;
  527. Inc(buf);
  528. while not CheckFor(strdel) do
  529. if ParsePEReference then
  530. else if ParseReference(NewEntity) then
  531. else begin
  532. Inc(buf); // Normal haracter
  533. end;
  534. Result := True;
  535. end;
  536. begin
  537. if CheckFor('<!ENTITY') then begin
  538. ExpectWhitespace;
  539. if CheckFor('%') then begin // [72]
  540. ExpectWhitespace;
  541. NewEntity := doc.CreateEntity(ExpectName);
  542. ExpectWhitespace;
  543. // Get PEDef [74]
  544. if ParseEntityValue then
  545. else if ParseExternalID then
  546. else
  547. RaiseExc('Expected entity value or external ID');
  548. end else begin // [71]
  549. NewEntity := doc.CreateEntity(ExpectName);
  550. ExpectWhitespace;
  551. // Get EntityDef [73]
  552. if ParseEntityValue then
  553. else begin
  554. ExpectExternalID;
  555. // Get NDataDecl [76]
  556. ExpectWhitespace;
  557. ExpectString('NDATA');
  558. ExpectWhitespace;
  559. ExpectName;
  560. end;
  561. end;
  562. SkipWhitespace;
  563. ExpectString('>');
  564. Result := True;
  565. end else
  566. Result := False;
  567. end;
  568. function ParseNotationDecl: Boolean; // [82]
  569. begin
  570. if CheckFor('<!NOTATION') then begin
  571. ExpectWhitespace;
  572. ExpectName;
  573. ExpectWhitespace;
  574. if ParseExternalID then
  575. else if CheckFor('PUBLIC') then begin // [83]
  576. ExpectWhitespace;
  577. ExpectPubidLiteral;
  578. end else
  579. RaiseExc('Expected external or public ID');
  580. SkipWhitespace;
  581. ExpectString('>');
  582. Result := True;
  583. end else
  584. Result := False;
  585. end;
  586. begin
  587. Result := False;
  588. while ParseElementDecl or ParseAttlistDecl or ParseEntityDecl or
  589. ParseNotationDecl or ParsePI or ParseComment(doc) or SkipWhitespace do
  590. Result := True;
  591. end;
  592. procedure TXMLReader.ProcessDTD(ABuf: PChar; AFilename: String);
  593. begin
  594. buf := ABuf;
  595. BufStart := ABuf;
  596. Filename := AFilename;
  597. doc := TXMLReaderDocument.Create;
  598. ParseMarkupDecl;
  599. {
  600. if buf[0] <> #0 then begin
  601. WriteLn('=== Unparsed: ===');
  602. //WriteLn(buf);
  603. WriteLn(StrLen(buf), ' chars');
  604. end;
  605. }
  606. end;
  607. function TXMLReader.ParseElement(AOwner: TDOMNode): Boolean; // [39] [40] [44]
  608. var
  609. NewElem: TDOMElement;
  610. function ParseCharData: Boolean; // [14]
  611. var
  612. s: String;
  613. i: Integer;
  614. begin
  615. SetLength(s, 0);
  616. while not (buf[0] in [#0, '<', '&']) do begin
  617. s := s + buf[0];
  618. Inc(buf);
  619. end;
  620. if Length(s) > 0 then begin
  621. // Strip whitespace from end of s
  622. i := Length(s);
  623. while (i > 0) and (s[i] in [#10, #13, ' ']) do Dec(i);
  624. NewElem.AppendChild(doc.CreateTextNode(Copy(s, 1, i)));
  625. Result := True;
  626. end else
  627. Result := False;
  628. end;
  629. function ParseCDSect: Boolean; // [18]
  630. var
  631. cdata: String;
  632. begin
  633. if CheckFor('<![CDATA[') then begin
  634. SetLength(cdata, 0);
  635. while not CheckFor(']]>') do begin
  636. cdata := cdata + buf[0];
  637. Inc(buf);
  638. end;
  639. NewElem.AppendChild(doc.CreateCDATASection(cdata));
  640. Result := True;
  641. end else
  642. Result := False;
  643. end;
  644. var
  645. IsEmpty: Boolean;
  646. name: String;
  647. oldpos: PChar;
  648. attr: TDOMAttr;
  649. begin
  650. oldpos := buf;
  651. if CheckFor('<') then begin
  652. if not GetName(name) then begin
  653. buf := oldpos;
  654. Result := False;
  655. exit;
  656. end;
  657. NewElem := doc.CreateElement(name);
  658. AOwner.AppendChild(NewElem);
  659. SkipWhitespace;
  660. IsEmpty := False;
  661. while True do begin
  662. if CheckFor('/>') then begin
  663. IsEmpty := True;
  664. break;
  665. end;
  666. if CheckFor('>') then break;
  667. // Get Attribute [41]
  668. attr := doc.CreateAttribute(ExpectName);
  669. NewElem.Attributes.SetNamedItem(attr);
  670. ExpectEq;
  671. ExpectAttValue(attr);
  672. SkipWhitespace;
  673. end;
  674. if not IsEmpty then begin
  675. // Get content
  676. while SkipWhitespace or ParseCharData or ParseCDSect or ParsePI or
  677. ParseComment(NewElem) or ParseElement(NewElem) or
  678. ParseReference(NewElem) do;
  679. // Get ETag [42]
  680. ExpectString('</');
  681. ExpectName;
  682. SkipWhitespace;
  683. ExpectString('>');
  684. end;
  685. Result := True;
  686. end else
  687. Result := False;
  688. end;
  689. procedure TXMLReader.ExpectElement(AOwner: TDOMNode);
  690. begin
  691. if not ParseElement(AOwner) then
  692. RaiseExc('Expected element');
  693. end;
  694. function TXMLReader.ParsePEReference: Boolean; // [69]
  695. begin
  696. if CheckFor('%') then begin
  697. ExpectName;
  698. ExpectString(';');
  699. Result := True;
  700. end else
  701. Result := False;
  702. end;
  703. function TXMLReader.ParseReference(AOwner: TDOMNode): Boolean; // [67] [68]
  704. begin
  705. if not CheckFor('&') then begin
  706. Result := False;
  707. exit;
  708. end;
  709. if CheckFor('#') then begin // Test for CharRef [66]
  710. if CheckFor('x') then begin
  711. // !!!: there must be at least one digit
  712. while buf[0] in ['0'..'9', 'a'..'f', 'A'..'F'] do Inc(buf);
  713. end else
  714. // !!!: there must be at least one digit
  715. while buf[0] in ['0'..'9'] do Inc(buf);
  716. end else
  717. AOwner.AppendChild(doc.CreateEntityReference(ExpectName));
  718. ExpectString(';');
  719. Result := True;
  720. end;
  721. procedure TXMLReader.ExpectReference(AOwner: TDOMNode);
  722. begin
  723. if not ParseReference(AOwner) then
  724. RaiseExc('Expected reference ("&Name;" or "%Name;")');
  725. end;
  726. function TXMLReader.ParseExternalID: Boolean; // [75]
  727. function GetSystemLiteral: String;
  728. begin
  729. SetLength(Result, 0);
  730. if buf[0] = '''' then begin
  731. Inc(buf);
  732. while (buf[0] <> '''') and (buf[0] <> #0) do begin
  733. Result := Result + buf[0];
  734. Inc(buf);
  735. end;
  736. ExpectString('''');
  737. end else if buf[0] = '"' then begin
  738. Inc(buf);
  739. while (buf[0] <> '"') and (buf[0] <> #0) do begin
  740. Result := Result + buf[0];
  741. Inc(buf);
  742. end;
  743. ExpectString('"');
  744. end;
  745. end;
  746. begin
  747. if CheckFor('SYSTEM') then begin
  748. ExpectWhitespace;
  749. GetSystemLiteral;
  750. Result := True;
  751. end else if CheckFor('PUBLIC') then begin
  752. ExpectWhitespace;
  753. ExpectPubidLiteral;
  754. ExpectWhitespace;
  755. GetSystemLiteral;
  756. Result := True;
  757. end else
  758. Result := False;
  759. end;
  760. procedure TXMLReader.ExpectExternalID;
  761. begin
  762. if not ParseExternalID then
  763. RaiseExc('Expected external ID');
  764. end;
  765. function TXMLReader.ParseEncodingDecl: String; // [80]
  766. function ParseEncName: String;
  767. begin
  768. if not (buf[0] in ['A'..'Z', 'a'..'z']) then
  769. RaiseExc('Expected character (A-Z, a-z)');
  770. Result := buf[0];
  771. Inc(buf);
  772. Result := Result + GetString(['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']);
  773. end;
  774. begin
  775. SetLength(Result, 0);
  776. SkipWhitespace;
  777. if CheckFor('encoding') then begin
  778. ExpectEq;
  779. if buf[0] = '''' then begin
  780. Inc(buf);
  781. Result := ParseEncName;
  782. ExpectString('''');
  783. end else if buf[0] = '"' then begin
  784. Inc(buf);
  785. Result := ParseEncName;
  786. ExpectString('"');
  787. end;
  788. end;
  789. end;
  790. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: File);
  791. var
  792. reader: TXMLReader;
  793. buf: PChar;
  794. BufSize: LongInt;
  795. begin
  796. ADoc := nil;
  797. BufSize := FileSize(f) + 1;
  798. if BufSize <= 1 then exit;
  799. GetMem(buf, BufSize);
  800. BlockRead(f, buf^, BufSize - 1);
  801. buf[BufSize - 1] := #0;
  802. reader := TXMLReader.Create;
  803. reader.ProcessXML(buf, Filerec(f).name);
  804. FreeMem(buf, BufSize);
  805. ADoc := reader.doc;
  806. reader.Free;
  807. end;
  808. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream;
  809. const AFilename: String);
  810. var
  811. reader: TXMLReader;
  812. buf: PChar;
  813. begin
  814. ADoc := nil;
  815. if f.Size = 0 then exit;
  816. GetMem(buf, f.Size + 1);
  817. f.Read(buf^, f.Size);
  818. buf[f.Size] := #0;
  819. reader := TXMLReader.Create;
  820. reader.ProcessXML(buf, AFilename);
  821. FreeMem(buf, f.Size + 1);
  822. ADoc := reader.doc;
  823. reader.Free;
  824. end;
  825. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream);
  826. begin
  827. ReadXMLFile(ADoc, f, '<Stream>');
  828. end;
  829. procedure ReadXMLFile(var ADoc: TXMLDocument; const AFilename: String);
  830. var
  831. stream: TFileStream;
  832. begin
  833. ADoc := nil;
  834. stream := TFileStream.Create(AFilename, fmOpenRead);
  835. try
  836. ReadXMLFile(ADoc, stream, AFilename);
  837. finally
  838. stream.Free;
  839. end;
  840. end;
  841. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: File);
  842. var
  843. reader: TXMLReader;
  844. buf: PChar;
  845. BufSize: LongInt;
  846. begin
  847. ADoc := nil;
  848. BufSize := FileSize(f) + 1;
  849. if BufSize <= 1 then exit;
  850. GetMem(buf, BufSize + 1);
  851. BlockRead(f, buf^, BufSize - 1);
  852. buf[BufSize - 1] := #0;
  853. reader := TXMLReader.Create;
  854. reader.ProcessDTD(buf, Filerec(f).name);
  855. FreeMem(buf, BufSize);
  856. ADoc := reader.doc;
  857. reader.Free;
  858. end;
  859. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream;
  860. const AFilename: String);
  861. var
  862. reader: TXMLReader;
  863. buf: PChar;
  864. begin
  865. ADoc := nil;
  866. if f.Size = 0 then exit;
  867. GetMem(buf, f.Size + 1);
  868. f.Read(buf^, f.Size);
  869. buf[f.Size] := #0;
  870. reader := TXMLReader.Create;
  871. reader.ProcessDTD(buf, AFilename);
  872. FreeMem(buf, f.Size + 1);
  873. ADoc := reader.doc;
  874. reader.Free;
  875. end;
  876. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream);
  877. begin
  878. ReadDTDFile(ADoc, f, '<Stream>');
  879. end;
  880. procedure ReadDTDFile(var ADoc: TXMLDocument; const AFilename: String);
  881. var
  882. stream: TFileStream;
  883. begin
  884. ADoc := nil;
  885. stream := TFileStream.Create(AFilename, fmOpenRead);
  886. try
  887. ReadDTDFile(ADoc, stream, AFilename);
  888. finally
  889. stream.Free;
  890. end;
  891. end;
  892. end.
  893. {
  894. $Log$
  895. Revision 1.16 2000-04-20 14:15:45 sg
  896. * Minor bugfixes
  897. * Started support for DOM level 2
  898. Revision 1.15 2000/02/13 10:03:31 sg
  899. * Hopefully final fix for TDOMDocument.DocumentElement:
  900. - Reading this property always delivers the first element in the document
  901. - Removed SetDocumentElement. Use "AppendChild" or one of the other
  902. generic methods for TDOMNode instead.
  903. Revision 1.14 2000/01/30 22:19:13 sg
  904. * Made some optimizations and cosmetic changes
  905. }