xmlread.pp 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208
  1. {
  2. $Id$
  3. This file is part of the Free Component Library
  4. XML reading routines.
  5. Copyright (c) 1999-2002 by Sebastian Guenther, [email protected]
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$MODE objfpc}
  13. {$H+}
  14. unit XMLRead;
  15. interface
  16. uses SysUtils, Classes, DOM;
  17. type
  18. EXMLReadError = class(Exception);
  19. procedure ReadXMLFile(var ADoc: TXMLDocument; const AFilename: String);
  20. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: File);
  21. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream);
  22. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream;
  23. const AFilename: String);
  24. procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String);
  25. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: File);
  26. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream);
  27. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream;
  28. const AFilename: String);
  29. procedure ReadDTDFile(var ADoc: TXMLDocument; const AFilename: String);
  30. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: File);
  31. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream);
  32. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream;
  33. const AFilename: String);
  34. // =======================================================
  35. implementation
  36. const
  37. Letter = ['A'..'Z', 'a'..'z'];
  38. Digit = ['0'..'9'];
  39. PubidChars: set of Char = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
  40. '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
  41. '#', '@', '$', '_', '%'];
  42. WhitespaceChars: set of Char = [#9, #10, #13, ' '];
  43. NmToken: set of Char = Letter + Digit + ['.', '-', '_', ':'];
  44. type
  45. TXMLReaderDocument = class(TXMLDocument)
  46. public
  47. procedure SetDocType(ADocType: TDOMDocumentType);
  48. end;
  49. TXMLReaderDocumentType = class(TDOMDocumentType)
  50. public
  51. constructor Create(ADocument: TXMLReaderDocument);
  52. property Name: DOMString read FNodeName write FNodeName;
  53. end;
  54. TSetOfChar = set of Char;
  55. TXMLReader = class
  56. protected
  57. buf, BufStart: PChar;
  58. Filename: String;
  59. procedure RaiseExc(descr: String);
  60. function SkipWhitespace: Boolean;
  61. procedure ExpectWhitespace;
  62. procedure ExpectString(s: String);
  63. function CheckFor(s: PChar): Boolean;
  64. function GetString(ValidChars: TSetOfChar): String;
  65. function GetName(var s: String): Boolean;
  66. function ExpectName: String; // [5]
  67. procedure ExpectAttValue(attr: TDOMAttr); // [10]
  68. function ExpectPubidLiteral: String; // [12]
  69. function ParseComment(AOwner: TDOMNode): Boolean; // [15]
  70. function ParsePI: Boolean; // [16]
  71. procedure ExpectProlog; // [22]
  72. function ParseEq: Boolean; // [25]
  73. procedure ExpectEq;
  74. procedure ParseMisc(AOwner: TDOMNode); // [27]
  75. function ParseMarkupDecl: Boolean; // [29]
  76. function ParseCharData(AOwner: TDOMNode): Boolean; // [14]
  77. function ParseCDSect(AOwner: TDOMNode): Boolean; // [18]
  78. function ParseElement(AOwner: TDOMNode): Boolean; // [39]
  79. procedure ExpectElement(AOwner: TDOMNode);
  80. function ParseReference(AOwner: TDOMNode): Boolean; // [67]
  81. procedure ExpectReference(AOwner: TDOMNode);
  82. function ParsePEReference: Boolean; // [69]
  83. function ParseExternalID: Boolean; // [75]
  84. procedure ExpectExternalID;
  85. function ParseEncodingDecl: String; // [80]
  86. procedure ResolveEntities(RootNode: TDOMNode);
  87. public
  88. doc: TDOMDocument;
  89. procedure ProcessXML(ABuf: PChar; AFilename: String); // [1]
  90. procedure ProcessFragment(AOwner: TDOMNode; ABuf: PChar; AFilename: String);
  91. procedure ProcessDTD(ABuf: PChar; AFilename: String); // ([29])
  92. end;
  93. procedure TXMLReaderDocument.SetDocType(ADocType: TDOMDocumentType);
  94. begin
  95. FDocType := ADocType;
  96. end;
  97. constructor TXMLReaderDocumentType.Create(ADocument: TXMLReaderDocument);
  98. begin
  99. inherited Create(ADocument);
  100. end;
  101. procedure TXMLReader.RaiseExc(descr: String);
  102. var
  103. apos: PChar;
  104. x, y: Integer;
  105. begin
  106. // find out the line in which the error occured
  107. apos := BufStart;
  108. x := 1;
  109. y := 1;
  110. while apos < buf do begin
  111. if apos[0] = #10 then begin
  112. Inc(y);
  113. x := 1;
  114. end else
  115. Inc(x);
  116. Inc(apos);
  117. end;
  118. raise EXMLReadError.Create('In ' + Filename + ' (line ' + IntToStr(y) + ' pos ' +
  119. IntToStr(x) + '): ' + descr);
  120. end;
  121. function TXMLReader.SkipWhitespace: Boolean;
  122. begin
  123. Result := False;
  124. while buf[0] in WhitespaceChars do
  125. begin
  126. Inc(buf);
  127. Result := True;
  128. end;
  129. end;
  130. procedure TXMLReader.ExpectWhitespace;
  131. begin
  132. if not SkipWhitespace then
  133. RaiseExc('Expected whitespace');
  134. end;
  135. procedure TXMLReader.ExpectString(s: String);
  136. var
  137. i: Integer;
  138. s2: PChar;
  139. s3: String;
  140. begin
  141. for i := 1 to Length(s) do
  142. if buf[i - 1] <> s[i] then begin
  143. GetMem(s2, Length(s) + 1);
  144. StrLCopy(s2, buf, Length(s));
  145. s3 := StrPas(s2);
  146. FreeMem(s2, Length(s) + 1);
  147. RaiseExc('Expected "' + s + '", found "' + s3 + '"');
  148. end;
  149. Inc(buf, Length(s));
  150. end;
  151. function TXMLReader.CheckFor(s: PChar): Boolean;
  152. begin
  153. if buf[0] = #0 then begin
  154. Result := False;
  155. exit;
  156. end;
  157. if StrLComp(buf, s, StrLen(s)) = 0 then begin
  158. Inc(buf, StrLen(s));
  159. Result := True;
  160. end else
  161. Result := False;
  162. end;
  163. function TXMLReader.GetString(ValidChars: TSetOfChar): String;
  164. begin
  165. SetLength(Result, 0);
  166. while buf[0] in ValidChars do begin
  167. Result := Result + buf[0];
  168. Inc(buf);
  169. end;
  170. end;
  171. procedure TXMLReader.ProcessXML(ABuf: PChar; AFilename: String); // [1]
  172. var
  173. LastNodeBeforeDoc: TDOMNode;
  174. begin
  175. buf := ABuf;
  176. BufStart := ABuf;
  177. Filename := AFilename;
  178. doc := TXMLReaderDocument.Create;
  179. ExpectProlog;
  180. LastNodeBeforeDoc := doc.LastChild;
  181. ExpectElement(doc);
  182. ParseMisc(doc);
  183. if buf[0] <> #0 then
  184. RaiseExc('Text after end of document element found');
  185. {
  186. if buf[0] <> #0 then begin
  187. WriteLn('=== Unparsed: ===');
  188. //WriteLn(buf);
  189. WriteLn(StrLen(buf), ' chars');
  190. end;
  191. }
  192. end;
  193. procedure TXMLReader.ProcessFragment(AOwner: TDOMNode; ABuf: PChar;
  194. AFilename: String);
  195. begin
  196. buf := ABuf;
  197. BufStart := ABuf;
  198. Filename := AFilename;
  199. SkipWhitespace;
  200. while ParseCharData(AOwner) or ParseCDSect(AOwner) or ParsePI or
  201. ParseComment(AOwner) or ParseElement(AOwner) or
  202. ParseReference(AOwner) do
  203. SkipWhitespace;
  204. end;
  205. function TXMLReader.GetName(var s: String): Boolean; // [5]
  206. begin
  207. SetLength(s, 0);
  208. if not (buf[0] in (Letter + ['_', ':'])) then begin
  209. Result := False;
  210. exit;
  211. end;
  212. s := buf[0];
  213. Inc(buf);
  214. s := s + GetString(Letter + ['0'..'9', '.', '-', '_', ':']);
  215. Result := True;
  216. end;
  217. function TXMLReader.ExpectName: String; // [5]
  218. begin
  219. if not (buf[0] in (Letter + ['_', ':'])) then
  220. RaiseExc('Expected letter, "_" or ":" for name, found "' + buf[0] + '"');
  221. Result := buf[0];
  222. Inc(buf);
  223. Result := Result + GetString(Letter + ['0'..'9', '.', '-', '_', ':']);
  224. end;
  225. procedure TXMLReader.ExpectAttValue(attr: TDOMAttr); // [10]
  226. var
  227. s: String;
  228. procedure FlushStringBuffer;
  229. begin
  230. if Length(s) > 0 then
  231. begin
  232. attr.AppendChild(doc.CreateTextNode(s));
  233. SetLength(s, 0);
  234. end;
  235. end;
  236. var
  237. StrDel: array[0..1] of Char; // String delimiter
  238. begin
  239. if (buf[0] <> '''') and (buf[0] <> '"') then
  240. RaiseExc('Expected quotation marks');
  241. StrDel[0] := buf[0];
  242. StrDel[1] := #0;
  243. Inc(buf);
  244. SetLength(s, 0);
  245. while not CheckFor(StrDel) do
  246. if buf[0] = '&' then
  247. begin
  248. FlushStringBuffer;
  249. ParseReference(attr);
  250. end else
  251. begin
  252. s := s + buf[0];
  253. Inc(buf);
  254. end;
  255. FlushStringBuffer;
  256. ResolveEntities(Attr);
  257. end;
  258. function TXMLReader.ExpectPubidLiteral: String;
  259. begin
  260. SetLength(Result, 0);
  261. if CheckFor('''') then begin
  262. GetString(PubidChars - ['''']);
  263. ExpectString('''');
  264. end else if CheckFor('"') then begin
  265. GetString(PubidChars - ['"']);
  266. ExpectString('"');
  267. end else
  268. RaiseExc('Expected quotation marks');
  269. end;
  270. function TXMLReader.ParseComment(AOwner: TDOMNode): Boolean; // [15]
  271. var
  272. comment: String;
  273. begin
  274. if CheckFor('<!--') then begin
  275. SetLength(comment, 0);
  276. while (buf[0] <> #0) and (buf[1] <> #0) and
  277. ((buf[0] <> '-') or (buf[1] <> '-')) do begin
  278. comment := comment + buf[0];
  279. Inc(buf);
  280. end;
  281. AOwner.AppendChild(doc.CreateComment(comment));
  282. ExpectString('-->');
  283. Result := True;
  284. end else
  285. Result := False;
  286. end;
  287. function TXMLReader.ParsePI: Boolean; // [16]
  288. var
  289. checkbuf: array[0..3] of char;
  290. begin
  291. if CheckFor('<?') then begin
  292. StrLCopy(checkbuf, buf, 3);
  293. if UpCase(StrPas(checkbuf)) = 'XML' then
  294. RaiseExc('"<?xml" processing instruction not allowed here');
  295. ExpectName;
  296. if SkipWhitespace then
  297. while (buf[0] <> #0) and (buf[1] <> #0) and not
  298. ((buf[0] = '?') and (buf[1] = '>')) do Inc(buf);
  299. ExpectString('?>');
  300. Result := True;
  301. end else
  302. Result := False;
  303. end;
  304. procedure TXMLReader.ExpectProlog; // [22]
  305. procedure ParseVersionNum;
  306. begin
  307. if doc.InheritsFrom(TXMLDocument) then
  308. TXMLDocument(doc).XMLVersion :=
  309. GetString(['a'..'z', 'A'..'Z', '0'..'9', '_', '.', ':', '-']);
  310. end;
  311. procedure ParseDoctypeDecls;
  312. begin
  313. repeat
  314. SkipWhitespace;
  315. until not (ParseMarkupDecl or ParsePEReference);
  316. ExpectString(']');
  317. end;
  318. var
  319. DocType: TXMLReaderDocumentType;
  320. begin
  321. if CheckFor('<?xml') then
  322. begin
  323. // '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  324. // VersionInfo: S 'version' Eq (' VersionNum ' | " VersionNum ")
  325. SkipWhitespace;
  326. ExpectString('version');
  327. ParseEq;
  328. if buf[0] = '''' then
  329. begin
  330. Inc(buf);
  331. ParseVersionNum;
  332. ExpectString('''');
  333. end else if buf[0] = '"' then
  334. begin
  335. Inc(buf);
  336. ParseVersionNum;
  337. ExpectString('"');
  338. end else
  339. RaiseExc('Expected single or double quotation mark');
  340. // EncodingDecl?
  341. ParseEncodingDecl;
  342. // SDDecl?
  343. SkipWhitespace;
  344. if CheckFor('standalone') then
  345. begin
  346. ExpectEq;
  347. if buf[0] = '''' then
  348. begin
  349. Inc(buf);
  350. if not (CheckFor('yes''') or CheckFor('no''')) then
  351. RaiseExc('Expected ''yes'' or ''no''');
  352. end else if buf[0] = '''' then
  353. begin
  354. Inc(buf);
  355. if not (CheckFor('yes"') or CheckFor('no"')) then
  356. RaiseExc('Expected "yes" or "no"');
  357. end;
  358. SkipWhitespace;
  359. end;
  360. ExpectString('?>');
  361. end;
  362. // Check for "Misc*"
  363. ParseMisc(doc);
  364. // Check for "(doctypedecl Misc*)?" [28]
  365. if CheckFor('<!DOCTYPE') then
  366. begin
  367. DocType := TXMLReaderDocumentType.Create(doc as TXMLReaderDocument);
  368. if doc.InheritsFrom(TXMLReaderDocument) then
  369. TXMLReaderDocument(doc).SetDocType(DocType);
  370. SkipWhitespace;
  371. DocType.Name := ExpectName;
  372. SkipWhitespace;
  373. if CheckFor('[') then
  374. begin
  375. ParseDoctypeDecls;
  376. SkipWhitespace;
  377. ExpectString('>');
  378. end else if not CheckFor('>') then
  379. begin
  380. ParseExternalID;
  381. SkipWhitespace;
  382. if CheckFor('[') then
  383. begin
  384. ParseDoctypeDecls;
  385. SkipWhitespace;
  386. end;
  387. ExpectString('>');
  388. end;
  389. ParseMisc(doc);
  390. end;
  391. end;
  392. function TXMLReader.ParseEq: Boolean; // [25]
  393. var
  394. savedbuf: PChar;
  395. begin
  396. savedbuf := buf;
  397. SkipWhitespace;
  398. if buf[0] = '=' then begin
  399. Inc(buf);
  400. SkipWhitespace;
  401. Result := True;
  402. end else begin
  403. buf := savedbuf;
  404. Result := False;
  405. end;
  406. end;
  407. procedure TXMLReader.ExpectEq;
  408. begin
  409. if not ParseEq then
  410. RaiseExc('Expected "="');
  411. end;
  412. // Parse "Misc*":
  413. // Misc ::= Comment | PI | S
  414. procedure TXMLReader.ParseMisc(AOwner: TDOMNode); // [27]
  415. begin
  416. repeat
  417. SkipWhitespace;
  418. until not (ParseComment(AOwner) or ParsePI);
  419. end;
  420. function TXMLReader.ParseMarkupDecl: Boolean; // [29]
  421. function ParseElementDecl: Boolean; // [45]
  422. procedure ExpectChoiceOrSeq; // [49], [50]
  423. procedure ExpectCP; // [48]
  424. begin
  425. if CheckFor('(') then
  426. ExpectChoiceOrSeq
  427. else
  428. ExpectName;
  429. if CheckFor('?') then
  430. else if CheckFor('*') then
  431. else if CheckFor('+') then;
  432. end;
  433. var
  434. delimiter: Char;
  435. begin
  436. SkipWhitespace;
  437. ExpectCP;
  438. SkipWhitespace;
  439. delimiter := #0;
  440. while not CheckFor(')') do begin
  441. if delimiter = #0 then begin
  442. if (buf[0] = '|') or (buf[0] = ',') then
  443. delimiter := buf[0]
  444. else
  445. RaiseExc('Expected "|" or ","');
  446. Inc(buf);
  447. end else
  448. ExpectString(delimiter);
  449. SkipWhitespace;
  450. ExpectCP;
  451. end;
  452. end;
  453. begin
  454. if CheckFor('<!ELEMENT') then begin
  455. ExpectWhitespace;
  456. ExpectName;
  457. ExpectWhitespace;
  458. // Get contentspec [46]
  459. if CheckFor('EMPTY') then
  460. else if CheckFor('ANY') then
  461. else if CheckFor('(') then begin
  462. SkipWhitespace;
  463. if CheckFor('#PCDATA') then begin
  464. // Parse Mixed section [51]
  465. SkipWhitespace;
  466. if not CheckFor(')') then
  467. repeat
  468. ExpectString('|');
  469. SkipWhitespace;
  470. ExpectName;
  471. until CheckFor(')*');
  472. end else begin
  473. // Parse Children section [47]
  474. ExpectChoiceOrSeq;
  475. if CheckFor('?') then
  476. else if CheckFor('*') then
  477. else if CheckFor('+') then;
  478. end;
  479. end else
  480. RaiseExc('Invalid content specification');
  481. SkipWhitespace;
  482. ExpectString('>');
  483. Result := True;
  484. end else
  485. Result := False;
  486. end;
  487. function ParseAttlistDecl: Boolean; // [52]
  488. var
  489. attr: TDOMAttr;
  490. begin
  491. if CheckFor('<!ATTLIST') then begin
  492. ExpectWhitespace;
  493. ExpectName;
  494. SkipWhitespace;
  495. while not CheckFor('>') do begin
  496. ExpectName;
  497. ExpectWhitespace;
  498. // Get AttType [54], [55], [56]
  499. if CheckFor('CDATA') then
  500. else if CheckFor('ID') then
  501. else if CheckFor('IDREF') then
  502. else if CheckFor('IDREFS') then
  503. else if CheckFor('ENTITTY') then
  504. else if CheckFor('ENTITIES') then
  505. else if CheckFor('NMTOKEN') then
  506. else if CheckFor('NMTOKENS') then
  507. else if CheckFor('NOTATION') then begin // [57], [58]
  508. ExpectWhitespace;
  509. ExpectString('(');
  510. SkipWhitespace;
  511. ExpectName;
  512. SkipWhitespace;
  513. while not CheckFor(')') do begin
  514. ExpectString('|');
  515. SkipWhitespace;
  516. ExpectName;
  517. SkipWhitespace;
  518. end;
  519. end else if CheckFor('(') then begin // [59]
  520. SkipWhitespace;
  521. GetString(Nmtoken);
  522. SkipWhitespace;
  523. while not CheckFor(')') do begin
  524. ExpectString('|');
  525. SkipWhitespace;
  526. GetString(Nmtoken);
  527. SkipWhitespace;
  528. end;
  529. end else
  530. RaiseExc('Invalid tokenized type');
  531. ExpectWhitespace;
  532. // Get DefaultDecl [60]
  533. if CheckFor('#REQUIRED') then
  534. else if CheckFor('#IMPLIED') then
  535. else begin
  536. if CheckFor('#FIXED') then
  537. SkipWhitespace;
  538. attr := doc.CreateAttribute('');
  539. ExpectAttValue(attr);
  540. end;
  541. SkipWhitespace;
  542. end;
  543. Result := True;
  544. end else
  545. Result := False;
  546. end;
  547. function ParseEntityDecl: Boolean; // [70]
  548. var
  549. NewEntity: TDOMEntity;
  550. function ParseEntityValue: Boolean; // [9]
  551. var
  552. strdel: array[0..1] of Char;
  553. begin
  554. if (buf[0] <> '''') and (buf[0] <> '"') then begin
  555. Result := False;
  556. exit;
  557. end;
  558. strdel[0] := buf[0];
  559. strdel[1] := #0;
  560. Inc(buf);
  561. while not CheckFor(strdel) do
  562. if ParsePEReference then
  563. else if ParseReference(NewEntity) then
  564. else begin
  565. Inc(buf); // Normal haracter
  566. end;
  567. Result := True;
  568. end;
  569. begin
  570. if CheckFor('<!ENTITY') then begin
  571. ExpectWhitespace;
  572. if CheckFor('%') then begin // [72]
  573. ExpectWhitespace;
  574. NewEntity := doc.CreateEntity(ExpectName);
  575. ExpectWhitespace;
  576. // Get PEDef [74]
  577. if ParseEntityValue then
  578. else if ParseExternalID then
  579. else
  580. RaiseExc('Expected entity value or external ID');
  581. end else begin // [71]
  582. NewEntity := doc.CreateEntity(ExpectName);
  583. ExpectWhitespace;
  584. // Get EntityDef [73]
  585. if ParseEntityValue then
  586. else begin
  587. ExpectExternalID;
  588. // Get NDataDecl [76]
  589. ExpectWhitespace;
  590. ExpectString('NDATA');
  591. ExpectWhitespace;
  592. ExpectName;
  593. end;
  594. end;
  595. SkipWhitespace;
  596. ExpectString('>');
  597. Result := True;
  598. end else
  599. Result := False;
  600. end;
  601. function ParseNotationDecl: Boolean; // [82]
  602. begin
  603. if CheckFor('<!NOTATION') then begin
  604. ExpectWhitespace;
  605. ExpectName;
  606. ExpectWhitespace;
  607. if ParseExternalID then
  608. else if CheckFor('PUBLIC') then begin // [83]
  609. ExpectWhitespace;
  610. ExpectPubidLiteral;
  611. end else
  612. RaiseExc('Expected external or public ID');
  613. SkipWhitespace;
  614. ExpectString('>');
  615. Result := True;
  616. end else
  617. Result := False;
  618. end;
  619. begin
  620. Result := False;
  621. while ParseElementDecl or ParseAttlistDecl or ParseEntityDecl or
  622. ParseNotationDecl or ParsePI or ParseComment(doc) or SkipWhitespace do
  623. Result := True;
  624. end;
  625. procedure TXMLReader.ProcessDTD(ABuf: PChar; AFilename: String);
  626. begin
  627. buf := ABuf;
  628. BufStart := ABuf;
  629. Filename := AFilename;
  630. doc := TXMLReaderDocument.Create;
  631. ParseMarkupDecl;
  632. {
  633. if buf[0] <> #0 then begin
  634. WriteLn('=== Unparsed: ===');
  635. //WriteLn(buf);
  636. WriteLn(StrLen(buf), ' chars');
  637. end;
  638. }
  639. end;
  640. function TXMLReader.ParseCharData(AOwner: TDOMNode): Boolean; // [14]
  641. var
  642. s: String;
  643. i: Integer;
  644. begin
  645. SetLength(s, 0);
  646. while not (buf[0] in [#0, '<', '&']) do
  647. begin
  648. s := s + buf[0];
  649. Inc(buf);
  650. end;
  651. if Length(s) > 0 then
  652. begin
  653. // Check if s has non-whitespace content
  654. i := Length(s);
  655. while (i > 0) and (s[i] in WhitespaceChars) do
  656. Dec(i);
  657. if i > 0 then
  658. AOwner.AppendChild(doc.CreateTextNode(s));
  659. Result := True;
  660. end else
  661. Result := False;
  662. end;
  663. function TXMLReader.ParseCDSect(AOwner: TDOMNode): Boolean; // [18]
  664. var
  665. cdata: String;
  666. begin
  667. if CheckFor('<![CDATA[') then
  668. begin
  669. SetLength(cdata, 0);
  670. while not CheckFor(']]>') do
  671. begin
  672. cdata := cdata + buf[0];
  673. Inc(buf);
  674. end;
  675. AOwner.AppendChild(doc.CreateCDATASection(cdata));
  676. Result := True;
  677. end else
  678. Result := False;
  679. end;
  680. function TXMLReader.ParseElement(AOwner: TDOMNode): Boolean; // [39] [40] [44]
  681. var
  682. NewElem: TDOMElement;
  683. var
  684. IsEmpty: Boolean;
  685. name: String;
  686. oldpos: PChar;
  687. attr: TDOMAttr;
  688. begin
  689. oldpos := buf;
  690. if CheckFor('<') then
  691. begin
  692. if not GetName(name) then
  693. begin
  694. buf := oldpos;
  695. Result := False;
  696. exit;
  697. end;
  698. NewElem := doc.CreateElement(name);
  699. AOwner.AppendChild(NewElem);
  700. SkipWhitespace;
  701. IsEmpty := False;
  702. while True do
  703. begin
  704. if CheckFor('/>') then
  705. begin
  706. IsEmpty := True;
  707. break;
  708. end;
  709. if CheckFor('>') then
  710. break;
  711. // Get Attribute [41]
  712. attr := doc.CreateAttribute(ExpectName);
  713. NewElem.Attributes.SetNamedItem(attr);
  714. ExpectEq;
  715. ExpectAttValue(attr);
  716. SkipWhitespace;
  717. end;
  718. if not IsEmpty then
  719. begin
  720. // Get content
  721. SkipWhitespace;
  722. while ParseCharData(NewElem) or ParseCDSect(NewElem) or ParsePI or
  723. ParseComment(NewElem) or ParseElement(NewElem) or
  724. ParseReference(NewElem) do;
  725. // Get ETag [42]
  726. ExpectString('</');
  727. if ExpectName <> name then
  728. RaiseExc('Unmatching element end tag (expected "</' + name + '>")');
  729. SkipWhitespace;
  730. ExpectString('>');
  731. end;
  732. ResolveEntities(NewElem);
  733. Result := True;
  734. end else
  735. Result := False;
  736. end;
  737. procedure TXMLReader.ExpectElement(AOwner: TDOMNode);
  738. begin
  739. if not ParseElement(AOwner) then
  740. RaiseExc('Expected element');
  741. end;
  742. function TXMLReader.ParsePEReference: Boolean; // [69]
  743. begin
  744. if CheckFor('%') then begin
  745. ExpectName;
  746. ExpectString(';');
  747. Result := True;
  748. end else
  749. Result := False;
  750. end;
  751. function TXMLReader.ParseReference(AOwner: TDOMNode): Boolean; // [67] [68]
  752. begin
  753. if not CheckFor('&') then begin
  754. Result := False;
  755. exit;
  756. end;
  757. if CheckFor('#') then begin // Test for CharRef [66]
  758. if CheckFor('x') then begin
  759. // !!!: there must be at least one digit
  760. while buf[0] in ['0'..'9', 'a'..'f', 'A'..'F'] do Inc(buf);
  761. end else
  762. // !!!: there must be at least one digit
  763. while buf[0] in ['0'..'9'] do Inc(buf);
  764. end else
  765. AOwner.AppendChild(doc.CreateEntityReference(ExpectName));
  766. ExpectString(';');
  767. Result := True;
  768. end;
  769. procedure TXMLReader.ExpectReference(AOwner: TDOMNode);
  770. begin
  771. if not ParseReference(AOwner) then
  772. RaiseExc('Expected reference ("&Name;" or "%Name;")');
  773. end;
  774. function TXMLReader.ParseExternalID: Boolean; // [75]
  775. function GetSystemLiteral: String;
  776. begin
  777. SetLength(Result, 0);
  778. if buf[0] = '''' then begin
  779. Inc(buf);
  780. while (buf[0] <> '''') and (buf[0] <> #0) do begin
  781. Result := Result + buf[0];
  782. Inc(buf);
  783. end;
  784. ExpectString('''');
  785. end else if buf[0] = '"' then begin
  786. Inc(buf);
  787. while (buf[0] <> '"') and (buf[0] <> #0) do begin
  788. Result := Result + buf[0];
  789. Inc(buf);
  790. end;
  791. ExpectString('"');
  792. end;
  793. end;
  794. begin
  795. if CheckFor('SYSTEM') then begin
  796. ExpectWhitespace;
  797. GetSystemLiteral;
  798. Result := True;
  799. end else if CheckFor('PUBLIC') then begin
  800. ExpectWhitespace;
  801. ExpectPubidLiteral;
  802. ExpectWhitespace;
  803. GetSystemLiteral;
  804. Result := True;
  805. end else
  806. Result := False;
  807. end;
  808. procedure TXMLReader.ExpectExternalID;
  809. begin
  810. if not ParseExternalID then
  811. RaiseExc('Expected external ID');
  812. end;
  813. function TXMLReader.ParseEncodingDecl: String; // [80]
  814. function ParseEncName: String;
  815. begin
  816. if not (buf[0] in ['A'..'Z', 'a'..'z']) then
  817. RaiseExc('Expected character (A-Z, a-z)');
  818. Result := buf[0];
  819. Inc(buf);
  820. Result := Result + GetString(['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']);
  821. end;
  822. begin
  823. SetLength(Result, 0);
  824. SkipWhitespace;
  825. if CheckFor('encoding') then begin
  826. ExpectEq;
  827. if buf[0] = '''' then begin
  828. Inc(buf);
  829. Result := ParseEncName;
  830. ExpectString('''');
  831. end else if buf[0] = '"' then begin
  832. Inc(buf);
  833. Result := ParseEncName;
  834. ExpectString('"');
  835. end;
  836. end;
  837. end;
  838. { Currently, this method will only resolve the entities which are
  839. predefined in XML: }
  840. procedure TXMLReader.ResolveEntities(RootNode: TDOMNode);
  841. procedure ReplaceEntityRef(EntityNode: TDOMNode; const Replacement: String);
  842. var
  843. PrevSibling, NextSibling: TDOMNode;
  844. begin
  845. PrevSibling := EntityNode.PreviousSibling;
  846. NextSibling := EntityNode.NextSibling;
  847. if Assigned(PrevSibling) and (PrevSibling.NodeType = TEXT_NODE) then
  848. begin
  849. TDOMCharacterData(PrevSibling).AppendData(Replacement);
  850. RootNode.RemoveChild(EntityNode);
  851. if Assigned(NextSibling) and (NextSibling.NodeType = TEXT_NODE) then
  852. begin
  853. TDOMCharacterData(PrevSibling).AppendData(
  854. TDOMCharacterData(NextSibling).Data);
  855. RootNode.RemoveChild(NextSibling);
  856. end
  857. end else
  858. if Assigned(NextSibling) and (NextSibling.NodeType = TEXT_NODE) then
  859. begin
  860. TDOMCharacterData(NextSibling).InsertData(0, Replacement);
  861. RootNode.RemoveChild(EntityNode);
  862. end else
  863. RootNode.ReplaceChild(Doc.CreateTextNode(Replacement), EntityNode);
  864. end;
  865. var
  866. Node, NextSibling: TDOMNode;
  867. begin
  868. Node := RootNode.FirstChild;
  869. while Assigned(Node) do
  870. begin
  871. NextSibling := Node.NextSibling;
  872. if Node.NodeType = ENTITY_REFERENCE_NODE then
  873. if Node.NodeName = 'amp' then
  874. ReplaceEntityRef(Node, '&')
  875. else if Node.NodeName = 'apos' then
  876. ReplaceEntityRef(Node, '''')
  877. else if Node.NodeName = 'gt' then
  878. ReplaceEntityRef(Node, '>')
  879. else if Node.NodeName = 'lt' then
  880. ReplaceEntityRef(Node, '<')
  881. else if Node.NodeName = 'quot' then
  882. ReplaceEntityRef(Node, '"');
  883. Node := NextSibling;
  884. end;
  885. end;
  886. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: File);
  887. var
  888. Reader: TXMLReader;
  889. buf: PChar;
  890. BufSize: LongInt;
  891. begin
  892. ADoc := nil;
  893. BufSize := FileSize(f) + 1;
  894. if BufSize <= 1 then
  895. exit;
  896. GetMem(buf, BufSize);
  897. BlockRead(f, buf^, BufSize - 1);
  898. buf[BufSize - 1] := #0;
  899. Reader := TXMLReader.Create;
  900. Reader.ProcessXML(buf, Filerec(f).name);
  901. FreeMem(buf, BufSize);
  902. ADoc := TXMLDocument(Reader.doc);
  903. Reader.Free;
  904. end;
  905. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream;
  906. const AFilename: String);
  907. var
  908. Reader: TXMLReader;
  909. buf: PChar;
  910. begin
  911. ADoc := nil;
  912. if f.Size = 0 then
  913. exit;
  914. GetMem(buf, f.Size + 1);
  915. f.Read(buf^, f.Size);
  916. buf[f.Size] := #0;
  917. Reader := TXMLReader.Create;
  918. Reader.ProcessXML(buf, AFilename);
  919. FreeMem(buf, f.Size + 1);
  920. ADoc := TXMLDocument(Reader.doc);
  921. Reader.Free;
  922. end;
  923. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream);
  924. begin
  925. ReadXMLFile(ADoc, f, '<Stream>');
  926. end;
  927. procedure ReadXMLFile(var ADoc: TXMLDocument; const AFilename: String);
  928. var
  929. Stream: TFileStream;
  930. begin
  931. ADoc := nil;
  932. Stream := TFileStream.Create(AFilename, fmOpenRead);
  933. try
  934. ReadXMLFile(ADoc, Stream, AFilename);
  935. finally
  936. Stream.Free;
  937. end;
  938. end;
  939. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: File);
  940. var
  941. Reader: TXMLReader;
  942. buf: PChar;
  943. BufSize: LongInt;
  944. begin
  945. BufSize := FileSize(f) + 1;
  946. if BufSize <= 1 then
  947. exit;
  948. GetMem(buf, BufSize);
  949. BlockRead(f, buf^, BufSize - 1);
  950. buf[BufSize - 1] := #0;
  951. Reader := TXMLReader.Create;
  952. Reader.Doc := AParentNode.OwnerDocument;
  953. Reader.ProcessFragment(AParentNode, buf, Filerec(f).name);
  954. FreeMem(buf, BufSize);
  955. Reader.Free;
  956. end;
  957. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream;
  958. const AFilename: String);
  959. var
  960. Reader: TXMLReader;
  961. buf: PChar;
  962. begin
  963. if f.Size = 0 then
  964. exit;
  965. GetMem(buf, f.Size + 1);
  966. f.Read(buf^, f.Size);
  967. buf[f.Size] := #0;
  968. Reader := TXMLReader.Create;
  969. Reader.Doc := AParentNode.OwnerDocument;
  970. Reader.ProcessFragment(AParentNode, buf, AFilename);
  971. FreeMem(buf, f.Size + 1);
  972. Reader.Free;
  973. end;
  974. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream);
  975. begin
  976. ReadXMLFragment(AParentNode, f, '<Stream>');
  977. end;
  978. procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String);
  979. var
  980. Stream: TFileStream;
  981. begin
  982. Stream := TFileStream.Create(AFilename, fmOpenRead);
  983. try
  984. ReadXMLFragment(AParentNode, Stream, AFilename);
  985. finally
  986. Stream.Free;
  987. end;
  988. end;
  989. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: File);
  990. var
  991. Reader: TXMLReader;
  992. buf: PChar;
  993. BufSize: LongInt;
  994. begin
  995. ADoc := nil;
  996. BufSize := FileSize(f) + 1;
  997. if BufSize <= 1 then exit;
  998. GetMem(buf, BufSize + 1);
  999. BlockRead(f, buf^, BufSize - 1);
  1000. buf[BufSize - 1] := #0;
  1001. Reader := TXMLReader.Create;
  1002. Reader.ProcessDTD(buf, Filerec(f).name);
  1003. FreeMem(buf, BufSize);
  1004. ADoc := TXMLDocument(Reader.doc);
  1005. Reader.Free;
  1006. end;
  1007. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream;
  1008. const AFilename: String);
  1009. var
  1010. Reader: TXMLReader;
  1011. buf: PChar;
  1012. begin
  1013. ADoc := nil;
  1014. if f.Size = 0 then exit;
  1015. GetMem(buf, f.Size + 1);
  1016. f.Read(buf^, f.Size);
  1017. buf[f.Size] := #0;
  1018. Reader := TXMLReader.Create;
  1019. Reader.ProcessDTD(buf, AFilename);
  1020. FreeMem(buf, f.Size + 1);
  1021. ADoc := TXMLDocument(Reader.doc);
  1022. Reader.Free;
  1023. end;
  1024. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream);
  1025. begin
  1026. ReadDTDFile(ADoc, f, '<Stream>');
  1027. end;
  1028. procedure ReadDTDFile(var ADoc: TXMLDocument; const AFilename: String);
  1029. var
  1030. stream: TFileStream;
  1031. begin
  1032. ADoc := nil;
  1033. stream := TFileStream.Create(AFilename, fmOpenRead);
  1034. try
  1035. ReadDTDFile(ADoc, stream, AFilename);
  1036. finally
  1037. stream.Free;
  1038. end;
  1039. end;
  1040. end.
  1041. {
  1042. $Log$
  1043. Revision 1.7 2002-09-21 19:22:38 sg
  1044. * Added procedures to process XML fragments only (e.g. for merging them
  1045. into an existing DOM document)
  1046. Revision 1.6 2002/09/07 15:15:29 peter
  1047. * old logs removed and tabs fixed
  1048. }