xmlread.pp 29 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240
  1. {
  2. $Id$
  3. This file is part of the Free Component Library
  4. XML reading routines.
  5. Copyright (c) 1999-2003 by Sebastian Guenther, [email protected]
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. unit XMLRead;
  13. interface
  14. uses SysUtils, Classes, DOM;
  15. type
  16. EXMLReadError = class(Exception);
  17. procedure ReadXMLFile(var ADoc: TXMLDocument; const AFilename: String);
  18. overload;
  19. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: File); overload;
  20. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream); overload;
  21. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream;
  22. const AFilename: String); overload;
  23. procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String);
  24. overload;
  25. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: File); overload;
  26. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream); overload;
  27. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream;
  28. const AFilename: String); overload;
  29. procedure ReadDTDFile(var ADoc: TXMLDocument; const AFilename: String);
  30. overload;
  31. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: File); overload;
  32. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream); overload;
  33. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream;
  34. const AFilename: String); overload;
  35. // =======================================================
  36. implementation
  37. const
  38. Letter = ['A'..'Z', 'a'..'z'];
  39. Digit = ['0'..'9'];
  40. PubidChars: set of Char = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
  41. '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
  42. '#', '@', '$', '_', '%'];
  43. WhitespaceChars: set of Char = [#9, #10, #13, ' '];
  44. NmToken: set of Char = Letter + Digit + ['.', '-', '_', ':'];
  45. type
  46. TXMLReaderDocument = class(TXMLDocument)
  47. public
  48. procedure SetDocType(ADocType: TDOMDocumentType);
  49. end;
  50. TXMLReaderDocumentType = class(TDOMDocumentType)
  51. public
  52. constructor Create(ADocument: TXMLReaderDocument);
  53. property Name: DOMString read FNodeName write FNodeName;
  54. end;
  55. TSetOfChar = set of Char;
  56. TXMLReader = class
  57. protected
  58. buf, BufStart: PChar;
  59. Filename: String;
  60. procedure RaiseExc(descr: String);
  61. function SkipWhitespace: Boolean;
  62. procedure ExpectWhitespace;
  63. procedure ExpectString(s: String);
  64. function CheckFor(s: PChar): Boolean;
  65. function GetString(ValidChars: TSetOfChar): String;
  66. function GetName(var s: String): Boolean;
  67. function ExpectName: String; // [5]
  68. procedure ExpectAttValue(attr: TDOMAttr); // [10]
  69. function ExpectPubidLiteral: String; // [12]
  70. function ParseComment(AOwner: TDOMNode): Boolean; // [15]
  71. function ParsePI: Boolean; // [16]
  72. procedure ExpectProlog; // [22]
  73. function ParseEq: Boolean; // [25]
  74. procedure ExpectEq;
  75. procedure ParseMisc(AOwner: TDOMNode); // [27]
  76. function ParseMarkupDecl: Boolean; // [29]
  77. function ParseCharData(AOwner: TDOMNode): Boolean; // [14]
  78. function ParseCDSect(AOwner: TDOMNode): Boolean; // [18]
  79. function ParseElement(AOwner: TDOMNode): Boolean; // [39]
  80. procedure ExpectElement(AOwner: TDOMNode);
  81. function ParseReference(AOwner: TDOMNode): Boolean; // [67]
  82. procedure ExpectReference(AOwner: TDOMNode);
  83. function ParsePEReference: Boolean; // [69]
  84. function ParseExternalID: Boolean; // [75]
  85. procedure ExpectExternalID;
  86. function ParseEncodingDecl: String; // [80]
  87. procedure ResolveEntities(RootNode: TDOMNode);
  88. public
  89. doc: TDOMDocument;
  90. procedure ProcessXML(ABuf: PChar; AFilename: String); // [1]
  91. procedure ProcessFragment(AOwner: TDOMNode; ABuf: PChar; AFilename: String);
  92. procedure ProcessDTD(ABuf: PChar; AFilename: String); // ([29])
  93. end;
  94. procedure TXMLReaderDocument.SetDocType(ADocType: TDOMDocumentType);
  95. begin
  96. FDocType := ADocType;
  97. end;
  98. constructor TXMLReaderDocumentType.Create(ADocument: TXMLReaderDocument);
  99. begin
  100. inherited Create(ADocument);
  101. end;
  102. procedure TXMLReader.RaiseExc(descr: String);
  103. var
  104. apos: PChar;
  105. x, y: Integer;
  106. begin
  107. // find out the line in which the error occured
  108. apos := BufStart;
  109. x := 1;
  110. y := 1;
  111. while apos < buf do begin
  112. if apos[0] = #10 then begin
  113. Inc(y);
  114. x := 1;
  115. end else
  116. Inc(x);
  117. Inc(apos);
  118. end;
  119. raise EXMLReadError.Create('In ' + Filename + ' (line ' + IntToStr(y) + ' pos ' +
  120. IntToStr(x) + '): ' + descr);
  121. end;
  122. function TXMLReader.SkipWhitespace: Boolean;
  123. begin
  124. Result := False;
  125. while buf[0] in WhitespaceChars do
  126. begin
  127. Inc(buf);
  128. Result := True;
  129. end;
  130. end;
  131. procedure TXMLReader.ExpectWhitespace;
  132. begin
  133. if not SkipWhitespace then
  134. RaiseExc('Expected whitespace');
  135. end;
  136. procedure TXMLReader.ExpectString(s: String);
  137. var
  138. i: Integer;
  139. s2: PChar;
  140. s3: String;
  141. begin
  142. for i := 1 to Length(s) do
  143. if buf[i - 1] <> s[i] then begin
  144. GetMem(s2, Length(s) + 1);
  145. StrLCopy(s2, buf, Length(s));
  146. s3 := StrPas(s2);
  147. FreeMem(s2);
  148. RaiseExc('Expected "' + s + '", found "' + s3 + '"');
  149. end;
  150. Inc(buf, Length(s));
  151. end;
  152. function TXMLReader.CheckFor(s: PChar): Boolean;
  153. begin
  154. if buf[0] = #0 then begin
  155. Result := False;
  156. exit;
  157. end;
  158. if StrLComp(buf, s, StrLen(s)) = 0 then begin
  159. Inc(buf, StrLen(s));
  160. Result := True;
  161. end else
  162. Result := False;
  163. end;
  164. function TXMLReader.GetString(ValidChars: TSetOfChar): String;
  165. begin
  166. SetLength(Result, 0);
  167. while buf[0] in ValidChars do begin
  168. Result := Result + buf[0];
  169. Inc(buf);
  170. end;
  171. end;
  172. procedure TXMLReader.ProcessXML(ABuf: PChar; AFilename: String); // [1]
  173. begin
  174. buf := ABuf;
  175. BufStart := ABuf;
  176. Filename := AFilename;
  177. doc := TXMLReaderDocument.Create;
  178. ExpectProlog;
  179. ExpectElement(doc);
  180. ParseMisc(doc);
  181. if buf[0] <> #0 then
  182. RaiseExc('Text after end of document element found');
  183. end;
  184. procedure TXMLReader.ProcessFragment(AOwner: TDOMNode; ABuf: PChar;
  185. AFilename: String);
  186. begin
  187. buf := ABuf;
  188. BufStart := ABuf;
  189. Filename := AFilename;
  190. SkipWhitespace;
  191. while ParseCharData(AOwner) or ParseCDSect(AOwner) or ParsePI or
  192. ParseComment(AOwner) or ParseElement(AOwner) or
  193. ParseReference(AOwner) do
  194. SkipWhitespace;
  195. end;
  196. function TXMLReader.GetName(var s: String): Boolean; // [5]
  197. begin
  198. SetLength(s, 0);
  199. if not (buf[0] in (Letter + ['_', ':'])) then begin
  200. Result := False;
  201. exit;
  202. end;
  203. s := buf[0];
  204. Inc(buf);
  205. s := s + GetString(Letter + ['0'..'9', '.', '-', '_', ':']);
  206. Result := True;
  207. end;
  208. function TXMLReader.ExpectName: String; // [5]
  209. begin
  210. if not (buf[0] in (Letter + ['_', ':'])) then
  211. RaiseExc('Expected letter, "_" or ":" for name, found "' + buf[0] + '"');
  212. Result := buf[0];
  213. Inc(buf);
  214. Result := Result + GetString(Letter + ['0'..'9', '.', '-', '_', ':']);
  215. end;
  216. procedure TXMLReader.ExpectAttValue(attr: TDOMAttr); // [10]
  217. var
  218. s: String;
  219. procedure FlushStringBuffer;
  220. begin
  221. if Length(s) > 0 then
  222. begin
  223. attr.AppendChild(doc.CreateTextNode(s));
  224. SetLength(s, 0);
  225. end;
  226. end;
  227. var
  228. StrDel: array[0..1] of Char; // String delimiter
  229. begin
  230. if (buf[0] <> '''') and (buf[0] <> '"') then
  231. RaiseExc('Expected quotation marks');
  232. StrDel[0] := buf[0];
  233. StrDel[1] := #0;
  234. Inc(buf);
  235. SetLength(s, 0);
  236. while not CheckFor(StrDel) do
  237. if buf[0] = '&' then
  238. begin
  239. FlushStringBuffer;
  240. ParseReference(attr);
  241. end else
  242. begin
  243. s := s + buf[0];
  244. Inc(buf);
  245. end;
  246. FlushStringBuffer;
  247. ResolveEntities(Attr);
  248. end;
  249. function TXMLReader.ExpectPubidLiteral: String;
  250. begin
  251. SetLength(Result, 0);
  252. if CheckFor('''') then begin
  253. GetString(PubidChars - ['''']);
  254. ExpectString('''');
  255. end else if CheckFor('"') then begin
  256. GetString(PubidChars - ['"']);
  257. ExpectString('"');
  258. end else
  259. RaiseExc('Expected quotation marks');
  260. end;
  261. function TXMLReader.ParseComment(AOwner: TDOMNode): Boolean; // [15]
  262. var
  263. comment: String;
  264. begin
  265. if CheckFor('<!--') then begin
  266. SetLength(comment, 0);
  267. while (buf[0] <> #0) and (buf[1] <> #0) and
  268. ((buf[0] <> '-') or (buf[1] <> '-')) do begin
  269. comment := comment + buf[0];
  270. Inc(buf);
  271. end;
  272. AOwner.AppendChild(doc.CreateComment(comment));
  273. ExpectString('-->');
  274. Result := True;
  275. end else
  276. Result := False;
  277. end;
  278. function TXMLReader.ParsePI: Boolean; // [16]
  279. var
  280. checkbuf: array[0..3] of char;
  281. begin
  282. if CheckFor('<?') then begin
  283. StrLCopy(checkbuf, buf, 3);
  284. if UpperCase(StrPas(checkbuf)) = 'XML' then
  285. RaiseExc('"<?xml" processing instruction not allowed here');
  286. ExpectName;
  287. if SkipWhitespace then
  288. while (buf[0] <> #0) and (buf[1] <> #0) and not
  289. ((buf[0] = '?') and (buf[1] = '>')) do Inc(buf);
  290. ExpectString('?>');
  291. Result := True;
  292. end else
  293. Result := False;
  294. end;
  295. procedure TXMLReader.ExpectProlog; // [22]
  296. procedure ParseVersionNum;
  297. begin
  298. if doc.InheritsFrom(TXMLDocument) then
  299. TXMLDocument(doc).XMLVersion :=
  300. GetString(['a'..'z', 'A'..'Z', '0'..'9', '_', '.', ':', '-']);
  301. end;
  302. procedure ParseDoctypeDecls;
  303. begin
  304. repeat
  305. SkipWhitespace;
  306. until not (ParseMarkupDecl or ParsePEReference);
  307. ExpectString(']');
  308. end;
  309. var
  310. DocType: TXMLReaderDocumentType;
  311. begin
  312. if CheckFor('<?xml') then
  313. begin
  314. // '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  315. // VersionInfo: S 'version' Eq (' VersionNum ' | " VersionNum ")
  316. SkipWhitespace;
  317. ExpectString('version');
  318. ParseEq;
  319. if buf[0] = '''' then
  320. begin
  321. Inc(buf);
  322. ParseVersionNum;
  323. ExpectString('''');
  324. end else if buf[0] = '"' then
  325. begin
  326. Inc(buf);
  327. ParseVersionNum;
  328. ExpectString('"');
  329. end else
  330. RaiseExc('Expected single or double quotation mark');
  331. // EncodingDecl?
  332. ParseEncodingDecl;
  333. // SDDecl?
  334. SkipWhitespace;
  335. if CheckFor('standalone') then
  336. begin
  337. ExpectEq;
  338. if buf[0] = '''' then
  339. begin
  340. Inc(buf);
  341. if not (CheckFor('yes''') or CheckFor('no''')) then
  342. RaiseExc('Expected ''yes'' or ''no''');
  343. end else if buf[0] = '''' then
  344. begin
  345. Inc(buf);
  346. if not (CheckFor('yes"') or CheckFor('no"')) then
  347. RaiseExc('Expected "yes" or "no"');
  348. end;
  349. SkipWhitespace;
  350. end;
  351. ExpectString('?>');
  352. end;
  353. // Check for "Misc*"
  354. ParseMisc(doc);
  355. // Check for "(doctypedecl Misc*)?" [28]
  356. if CheckFor('<!DOCTYPE') then
  357. begin
  358. DocType := TXMLReaderDocumentType.Create(doc as TXMLReaderDocument);
  359. if doc.InheritsFrom(TXMLReaderDocument) then
  360. TXMLReaderDocument(doc).SetDocType(DocType);
  361. SkipWhitespace;
  362. DocType.Name := ExpectName;
  363. SkipWhitespace;
  364. if CheckFor('[') then
  365. begin
  366. ParseDoctypeDecls;
  367. SkipWhitespace;
  368. ExpectString('>');
  369. end else if not CheckFor('>') then
  370. begin
  371. ParseExternalID;
  372. SkipWhitespace;
  373. if CheckFor('[') then
  374. begin
  375. ParseDoctypeDecls;
  376. SkipWhitespace;
  377. end;
  378. ExpectString('>');
  379. end;
  380. ParseMisc(doc);
  381. end;
  382. end;
  383. function TXMLReader.ParseEq: Boolean; // [25]
  384. var
  385. savedbuf: PChar;
  386. begin
  387. savedbuf := buf;
  388. SkipWhitespace;
  389. if buf[0] = '=' then begin
  390. Inc(buf);
  391. SkipWhitespace;
  392. Result := True;
  393. end else begin
  394. buf := savedbuf;
  395. Result := False;
  396. end;
  397. end;
  398. procedure TXMLReader.ExpectEq;
  399. begin
  400. if not ParseEq then
  401. RaiseExc('Expected "="');
  402. end;
  403. // Parse "Misc*":
  404. // Misc ::= Comment | PI | S
  405. procedure TXMLReader.ParseMisc(AOwner: TDOMNode); // [27]
  406. begin
  407. repeat
  408. SkipWhitespace;
  409. until not (ParseComment(AOwner) or ParsePI);
  410. end;
  411. function TXMLReader.ParseMarkupDecl: Boolean; // [29]
  412. function ParseElementDecl: Boolean; // [45]
  413. procedure ExpectChoiceOrSeq; // [49], [50]
  414. procedure ExpectCP; // [48]
  415. begin
  416. if CheckFor('(') then
  417. ExpectChoiceOrSeq
  418. else
  419. ExpectName;
  420. if CheckFor('?') then
  421. else if CheckFor('*') then
  422. else if CheckFor('+') then;
  423. end;
  424. var
  425. delimiter: Char;
  426. begin
  427. SkipWhitespace;
  428. ExpectCP;
  429. SkipWhitespace;
  430. delimiter := #0;
  431. while not CheckFor(')') do begin
  432. if delimiter = #0 then begin
  433. if (buf[0] = '|') or (buf[0] = ',') then
  434. delimiter := buf[0]
  435. else
  436. RaiseExc('Expected "|" or ","');
  437. Inc(buf);
  438. end else
  439. ExpectString(delimiter);
  440. SkipWhitespace;
  441. ExpectCP;
  442. end;
  443. end;
  444. begin
  445. if CheckFor('<!ELEMENT') then begin
  446. ExpectWhitespace;
  447. ExpectName;
  448. ExpectWhitespace;
  449. // Get contentspec [46]
  450. if CheckFor('EMPTY') then
  451. else if CheckFor('ANY') then
  452. else if CheckFor('(') then begin
  453. SkipWhitespace;
  454. if CheckFor('#PCDATA') then begin
  455. // Parse Mixed section [51]
  456. SkipWhitespace;
  457. if not CheckFor(')') then
  458. repeat
  459. ExpectString('|');
  460. SkipWhitespace;
  461. ExpectName;
  462. until CheckFor(')*');
  463. end else begin
  464. // Parse Children section [47]
  465. ExpectChoiceOrSeq;
  466. if CheckFor('?') then
  467. else if CheckFor('*') then
  468. else if CheckFor('+') then;
  469. end;
  470. end else
  471. RaiseExc('Invalid content specification');
  472. SkipWhitespace;
  473. ExpectString('>');
  474. Result := True;
  475. end else
  476. Result := False;
  477. end;
  478. function ParseAttlistDecl: Boolean; // [52]
  479. var
  480. attr: TDOMAttr;
  481. begin
  482. if CheckFor('<!ATTLIST') then begin
  483. ExpectWhitespace;
  484. ExpectName;
  485. SkipWhitespace;
  486. while not CheckFor('>') do begin
  487. ExpectName;
  488. ExpectWhitespace;
  489. // Get AttType [54], [55], [56]
  490. if CheckFor('CDATA') then
  491. else if CheckFor('ID') then
  492. else if CheckFor('IDREF') then
  493. else if CheckFor('IDREFS') then
  494. else if CheckFor('ENTITTY') then
  495. else if CheckFor('ENTITIES') then
  496. else if CheckFor('NMTOKEN') then
  497. else if CheckFor('NMTOKENS') then
  498. else if CheckFor('NOTATION') then begin // [57], [58]
  499. ExpectWhitespace;
  500. ExpectString('(');
  501. SkipWhitespace;
  502. ExpectName;
  503. SkipWhitespace;
  504. while not CheckFor(')') do begin
  505. ExpectString('|');
  506. SkipWhitespace;
  507. ExpectName;
  508. SkipWhitespace;
  509. end;
  510. end else if CheckFor('(') then begin // [59]
  511. SkipWhitespace;
  512. GetString(Nmtoken);
  513. SkipWhitespace;
  514. while not CheckFor(')') do begin
  515. ExpectString('|');
  516. SkipWhitespace;
  517. GetString(Nmtoken);
  518. SkipWhitespace;
  519. end;
  520. end else
  521. RaiseExc('Invalid tokenized type');
  522. ExpectWhitespace;
  523. // Get DefaultDecl [60]
  524. if CheckFor('#REQUIRED') then
  525. else if CheckFor('#IMPLIED') then
  526. else begin
  527. if CheckFor('#FIXED') then
  528. SkipWhitespace;
  529. attr := doc.CreateAttribute('');
  530. ExpectAttValue(attr);
  531. end;
  532. SkipWhitespace;
  533. end;
  534. Result := True;
  535. end else
  536. Result := False;
  537. end;
  538. function ParseEntityDecl: Boolean; // [70]
  539. var
  540. NewEntity: TDOMEntity;
  541. function ParseEntityValue: Boolean; // [9]
  542. var
  543. strdel: array[0..1] of Char;
  544. begin
  545. if (buf[0] <> '''') and (buf[0] <> '"') then begin
  546. Result := False;
  547. exit;
  548. end;
  549. strdel[0] := buf[0];
  550. strdel[1] := #0;
  551. Inc(buf);
  552. while not CheckFor(strdel) do
  553. if ParsePEReference then
  554. else if ParseReference(NewEntity) then
  555. else begin
  556. Inc(buf); // Normal haracter
  557. end;
  558. Result := True;
  559. end;
  560. begin
  561. if CheckFor('<!ENTITY') then begin
  562. ExpectWhitespace;
  563. if CheckFor('%') then begin // [72]
  564. ExpectWhitespace;
  565. NewEntity := doc.CreateEntity(ExpectName);
  566. ExpectWhitespace;
  567. // Get PEDef [74]
  568. if ParseEntityValue then
  569. else if ParseExternalID then
  570. else
  571. RaiseExc('Expected entity value or external ID');
  572. end else begin // [71]
  573. NewEntity := doc.CreateEntity(ExpectName);
  574. ExpectWhitespace;
  575. // Get EntityDef [73]
  576. if ParseEntityValue then
  577. else begin
  578. ExpectExternalID;
  579. // Get NDataDecl [76]
  580. ExpectWhitespace;
  581. ExpectString('NDATA');
  582. ExpectWhitespace;
  583. ExpectName;
  584. end;
  585. end;
  586. SkipWhitespace;
  587. ExpectString('>');
  588. Result := True;
  589. end else
  590. Result := False;
  591. end;
  592. function ParseNotationDecl: Boolean; // [82]
  593. begin
  594. if CheckFor('<!NOTATION') then begin
  595. ExpectWhitespace;
  596. ExpectName;
  597. ExpectWhitespace;
  598. if ParseExternalID then
  599. else if CheckFor('PUBLIC') then begin // [83]
  600. ExpectWhitespace;
  601. ExpectPubidLiteral;
  602. end else
  603. RaiseExc('Expected external or public ID');
  604. SkipWhitespace;
  605. ExpectString('>');
  606. Result := True;
  607. end else
  608. Result := False;
  609. end;
  610. begin
  611. Result := False;
  612. while ParseElementDecl or ParseAttlistDecl or ParseEntityDecl or
  613. ParseNotationDecl or ParsePI or ParseComment(doc) or SkipWhitespace do
  614. Result := True;
  615. end;
  616. procedure TXMLReader.ProcessDTD(ABuf: PChar; AFilename: String);
  617. begin
  618. buf := ABuf;
  619. BufStart := ABuf;
  620. Filename := AFilename;
  621. doc := TXMLReaderDocument.Create;
  622. ParseMarkupDecl;
  623. {
  624. if buf[0] <> #0 then begin
  625. WriteLn('=== Unparsed: ===');
  626. //WriteLn(buf);
  627. WriteLn(StrLen(buf), ' chars');
  628. end;
  629. }
  630. end;
  631. function TXMLReader.ParseCharData(AOwner: TDOMNode): Boolean; // [14]
  632. var
  633. s: String;
  634. i: Integer;
  635. begin
  636. SetLength(s, 0);
  637. while not (buf[0] in [#0, '<', '&']) do
  638. begin
  639. s := s + buf[0];
  640. Inc(buf);
  641. end;
  642. if Length(s) > 0 then
  643. begin
  644. // Check if s has non-whitespace content
  645. i := Length(s);
  646. while (i > 0) and (s[i] in WhitespaceChars) do
  647. Dec(i);
  648. if i > 0 then
  649. AOwner.AppendChild(doc.CreateTextNode(s));
  650. Result := True;
  651. end else
  652. Result := False;
  653. end;
  654. function TXMLReader.ParseCDSect(AOwner: TDOMNode): Boolean; // [18]
  655. var
  656. cdata: String;
  657. begin
  658. if CheckFor('<![CDATA[') then
  659. begin
  660. SetLength(cdata, 0);
  661. while not CheckFor(']]>') do
  662. begin
  663. cdata := cdata + buf[0];
  664. Inc(buf);
  665. end;
  666. AOwner.AppendChild(doc.CreateCDATASection(cdata));
  667. Result := True;
  668. end else
  669. Result := False;
  670. end;
  671. function TXMLReader.ParseElement(AOwner: TDOMNode): Boolean; // [39] [40] [44]
  672. var
  673. NewElem: TDOMElement;
  674. var
  675. IsEmpty: Boolean;
  676. name: String;
  677. oldpos: PChar;
  678. attr: TDOMAttr;
  679. begin
  680. oldpos := buf;
  681. if CheckFor('<') then
  682. begin
  683. if not GetName(name) then
  684. begin
  685. buf := oldpos;
  686. Result := False;
  687. exit;
  688. end;
  689. NewElem := doc.CreateElement(name);
  690. AOwner.AppendChild(NewElem);
  691. SkipWhitespace;
  692. IsEmpty := False;
  693. while True do
  694. begin
  695. if CheckFor('/>') then
  696. begin
  697. IsEmpty := True;
  698. break;
  699. end;
  700. if CheckFor('>') then
  701. break;
  702. // Get Attribute [41]
  703. attr := doc.CreateAttribute(ExpectName);
  704. NewElem.Attributes.SetNamedItem(attr);
  705. ExpectEq;
  706. ExpectAttValue(attr);
  707. SkipWhitespace;
  708. end;
  709. if not IsEmpty then
  710. begin
  711. // Get content
  712. SkipWhitespace;
  713. while ParseCharData(NewElem) or ParseCDSect(NewElem) or ParsePI or
  714. ParseComment(NewElem) or ParseElement(NewElem) or
  715. ParseReference(NewElem) do;
  716. // Get ETag [42]
  717. ExpectString('</');
  718. if ExpectName <> name then
  719. RaiseExc('Unmatching element end tag (expected "</' + name + '>")');
  720. SkipWhitespace;
  721. ExpectString('>');
  722. end;
  723. ResolveEntities(NewElem);
  724. Result := True;
  725. end else
  726. Result := False;
  727. end;
  728. procedure TXMLReader.ExpectElement(AOwner: TDOMNode);
  729. begin
  730. if not ParseElement(AOwner) then
  731. RaiseExc('Expected element');
  732. end;
  733. function TXMLReader.ParsePEReference: Boolean; // [69]
  734. begin
  735. if CheckFor('%') then begin
  736. ExpectName;
  737. ExpectString(';');
  738. Result := True;
  739. end else
  740. Result := False;
  741. end;
  742. function TXMLReader.ParseReference(AOwner: TDOMNode): Boolean; // [67] [68]
  743. begin
  744. if not CheckFor('&') then begin
  745. Result := False;
  746. exit;
  747. end;
  748. if CheckFor('#') then begin // Test for CharRef [66]
  749. if CheckFor('x') then begin
  750. // !!!: there must be at least one digit
  751. while buf[0] in ['0'..'9', 'a'..'f', 'A'..'F'] do Inc(buf);
  752. end else
  753. // !!!: there must be at least one digit
  754. while buf[0] in ['0'..'9'] do Inc(buf);
  755. end else
  756. AOwner.AppendChild(doc.CreateEntityReference(ExpectName));
  757. ExpectString(';');
  758. Result := True;
  759. end;
  760. procedure TXMLReader.ExpectReference(AOwner: TDOMNode);
  761. begin
  762. if not ParseReference(AOwner) then
  763. RaiseExc('Expected reference ("&Name;" or "%Name;")');
  764. end;
  765. function TXMLReader.ParseExternalID: Boolean; // [75]
  766. function GetSystemLiteral: String;
  767. begin
  768. SetLength(Result, 0);
  769. if buf[0] = '''' then begin
  770. Inc(buf);
  771. while (buf[0] <> '''') and (buf[0] <> #0) do begin
  772. Result := Result + buf[0];
  773. Inc(buf);
  774. end;
  775. ExpectString('''');
  776. end else if buf[0] = '"' then begin
  777. Inc(buf);
  778. while (buf[0] <> '"') and (buf[0] <> #0) do begin
  779. Result := Result + buf[0];
  780. Inc(buf);
  781. end;
  782. ExpectString('"');
  783. end;
  784. end;
  785. begin
  786. if CheckFor('SYSTEM') then begin
  787. ExpectWhitespace;
  788. GetSystemLiteral;
  789. Result := True;
  790. end else if CheckFor('PUBLIC') then begin
  791. ExpectWhitespace;
  792. ExpectPubidLiteral;
  793. ExpectWhitespace;
  794. GetSystemLiteral;
  795. Result := True;
  796. end else
  797. Result := False;
  798. end;
  799. procedure TXMLReader.ExpectExternalID;
  800. begin
  801. if not ParseExternalID then
  802. RaiseExc('Expected external ID');
  803. end;
  804. function TXMLReader.ParseEncodingDecl: String; // [80]
  805. function ParseEncName: String;
  806. begin
  807. if not (buf[0] in ['A'..'Z', 'a'..'z']) then
  808. RaiseExc('Expected character (A-Z, a-z)');
  809. Result := buf[0];
  810. Inc(buf);
  811. Result := Result + GetString(['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']);
  812. end;
  813. begin
  814. SetLength(Result, 0);
  815. SkipWhitespace;
  816. if CheckFor('encoding') then begin
  817. ExpectEq;
  818. if buf[0] = '''' then begin
  819. Inc(buf);
  820. Result := ParseEncName;
  821. ExpectString('''');
  822. end else if buf[0] = '"' then begin
  823. Inc(buf);
  824. Result := ParseEncName;
  825. ExpectString('"');
  826. end;
  827. end;
  828. end;
  829. { Currently, this method will only resolve the entities which are
  830. predefined in XML: }
  831. procedure TXMLReader.ResolveEntities(RootNode: TDOMNode);
  832. procedure ReplaceEntityRef(EntityNode: TDOMNode; const Replacement: String);
  833. var
  834. PrevSibling, NextSibling: TDOMNode;
  835. begin
  836. PrevSibling := EntityNode.PreviousSibling;
  837. NextSibling := EntityNode.NextSibling;
  838. if Assigned(PrevSibling) and (PrevSibling.NodeType = TEXT_NODE) then
  839. begin
  840. TDOMCharacterData(PrevSibling).AppendData(Replacement);
  841. RootNode.RemoveChild(EntityNode);
  842. if Assigned(NextSibling) and (NextSibling.NodeType = TEXT_NODE) then
  843. begin
  844. TDOMCharacterData(PrevSibling).AppendData(
  845. TDOMCharacterData(NextSibling).Data);
  846. RootNode.RemoveChild(NextSibling);
  847. end
  848. end else
  849. if Assigned(NextSibling) and (NextSibling.NodeType = TEXT_NODE) then
  850. begin
  851. TDOMCharacterData(NextSibling).InsertData(0, Replacement);
  852. RootNode.RemoveChild(EntityNode);
  853. end else
  854. RootNode.ReplaceChild(Doc.CreateTextNode(Replacement), EntityNode);
  855. end;
  856. var
  857. Node, NextSibling: TDOMNode;
  858. begin
  859. Node := RootNode.FirstChild;
  860. while Assigned(Node) do
  861. begin
  862. NextSibling := Node.NextSibling;
  863. if Node.NodeType = ENTITY_REFERENCE_NODE then
  864. if Node.NodeName = 'amp' then
  865. ReplaceEntityRef(Node, '&')
  866. else if Node.NodeName = 'apos' then
  867. ReplaceEntityRef(Node, '''')
  868. else if Node.NodeName = 'gt' then
  869. ReplaceEntityRef(Node, '>')
  870. else if Node.NodeName = 'lt' then
  871. ReplaceEntityRef(Node, '<')
  872. else if Node.NodeName = 'quot' then
  873. ReplaceEntityRef(Node, '"');
  874. Node := NextSibling;
  875. end;
  876. end;
  877. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: File);
  878. var
  879. Reader: TXMLReader;
  880. buf: PChar;
  881. BufSize: LongInt;
  882. begin
  883. ADoc := nil;
  884. BufSize := FileSize(f) + 1;
  885. if BufSize <= 1 then
  886. exit;
  887. GetMem(buf, BufSize);
  888. try
  889. BlockRead(f, buf^, BufSize - 1);
  890. buf[BufSize - 1] := #0;
  891. Reader := TXMLReader.Create;
  892. try
  893. Reader.ProcessXML(buf, TFileRec(f).name);
  894. ADoc := TXMLDocument(Reader.doc);
  895. finally
  896. Reader.Free;
  897. end;
  898. finally
  899. FreeMem(buf);
  900. end;
  901. end;
  902. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream;
  903. const AFilename: String);
  904. var
  905. Reader: TXMLReader;
  906. buf: PChar;
  907. begin
  908. ADoc := nil;
  909. if f.Size = 0 then
  910. exit;
  911. GetMem(buf, f.Size + 1);
  912. try
  913. f.Read(buf^, f.Size);
  914. buf[f.Size] := #0;
  915. Reader := TXMLReader.Create;
  916. try
  917. Reader.ProcessXML(buf, AFilename);
  918. ADoc := TXMLDocument(Reader.doc);
  919. finally
  920. Reader.Free;
  921. end;
  922. finally
  923. FreeMem(buf);
  924. end;
  925. end;
  926. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream);
  927. begin
  928. ReadXMLFile(ADoc, f, '<Stream>');
  929. end;
  930. procedure ReadXMLFile(var ADoc: TXMLDocument; const AFilename: String);
  931. var
  932. Stream: TStream;
  933. begin
  934. ADoc := nil;
  935. Stream := TFileStream.Create(AFilename, fmOpenRead);
  936. try
  937. ReadXMLFile(ADoc, Stream, AFilename);
  938. finally
  939. Stream.Free;
  940. end;
  941. end;
  942. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: File);
  943. var
  944. Reader: TXMLReader;
  945. buf: PChar;
  946. BufSize: LongInt;
  947. begin
  948. BufSize := FileSize(f) + 1;
  949. if BufSize <= 1 then
  950. exit;
  951. GetMem(buf, BufSize);
  952. try
  953. BlockRead(f, buf^, BufSize - 1);
  954. buf[BufSize - 1] := #0;
  955. Reader := TXMLReader.Create;
  956. try
  957. Reader.Doc := AParentNode.OwnerDocument;
  958. Reader.ProcessFragment(AParentNode, buf, TFileRec(f).name);
  959. finally
  960. Reader.Free;
  961. end;
  962. finally
  963. FreeMem(buf);
  964. end;
  965. end;
  966. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream;
  967. const AFilename: String);
  968. var
  969. Reader: TXMLReader;
  970. buf: PChar;
  971. begin
  972. if f.Size = 0 then
  973. exit;
  974. GetMem(buf, f.Size + 1);
  975. try
  976. f.Read(buf^, f.Size);
  977. buf[f.Size] := #0;
  978. Reader := TXMLReader.Create;
  979. Reader.Doc := AParentNode.OwnerDocument;
  980. try
  981. Reader.ProcessFragment(AParentNode, buf, AFilename);
  982. finally
  983. Reader.Free;
  984. end;
  985. finally
  986. FreeMem(buf);
  987. end;
  988. end;
  989. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream);
  990. begin
  991. ReadXMLFragment(AParentNode, f, '<Stream>');
  992. end;
  993. procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String);
  994. var
  995. Stream: TStream;
  996. begin
  997. Stream := TFileStream.Create(AFilename, fmOpenRead);
  998. try
  999. ReadXMLFragment(AParentNode, Stream, AFilename);
  1000. finally
  1001. Stream.Free;
  1002. end;
  1003. end;
  1004. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: File);
  1005. var
  1006. Reader: TXMLReader;
  1007. buf: PChar;
  1008. BufSize: LongInt;
  1009. begin
  1010. ADoc := nil;
  1011. BufSize := FileSize(f) + 1;
  1012. if BufSize <= 1 then
  1013. exit;
  1014. GetMem(buf, BufSize);
  1015. try
  1016. BlockRead(f, buf^, BufSize - 1);
  1017. buf[BufSize - 1] := #0;
  1018. Reader := TXMLReader.Create;
  1019. try
  1020. Reader.ProcessDTD(buf, TFileRec(f).name);
  1021. ADoc := TXMLDocument(Reader.doc);
  1022. finally
  1023. Reader.Free;
  1024. end;
  1025. finally
  1026. FreeMem(buf);
  1027. end;
  1028. end;
  1029. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream;
  1030. const AFilename: String);
  1031. var
  1032. Reader: TXMLReader;
  1033. buf: PChar;
  1034. begin
  1035. ADoc := nil;
  1036. if f.Size = 0 then
  1037. exit;
  1038. GetMem(buf, f.Size + 1);
  1039. try
  1040. f.Read(buf^, f.Size);
  1041. buf[f.Size] := #0;
  1042. Reader := TXMLReader.Create;
  1043. try
  1044. Reader.ProcessDTD(buf, AFilename);
  1045. ADoc := TXMLDocument(Reader.doc);
  1046. finally
  1047. Reader.Free;
  1048. end;
  1049. finally
  1050. FreeMem(buf);
  1051. end;
  1052. end;
  1053. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream);
  1054. begin
  1055. ReadDTDFile(ADoc, f, '<Stream>');
  1056. end;
  1057. procedure ReadDTDFile(var ADoc: TXMLDocument; const AFilename: String);
  1058. var
  1059. Stream: TStream;
  1060. begin
  1061. ADoc := nil;
  1062. Stream := TFileStream.Create(AFilename, fmOpenRead);
  1063. try
  1064. ReadDTDFile(ADoc, Stream, AFilename);
  1065. finally
  1066. Stream.Free;
  1067. end;
  1068. end;
  1069. end.
  1070. {
  1071. $Log$
  1072. Revision 1.8 2003-01-15 21:59:55 sg
  1073. * the units DOM, XMLRead and XMLWrite now compile with Delphi without
  1074. modifications as well
  1075. Revision 1.7 2002/09/21 19:22:38 sg
  1076. * Added procedures to process XML fragments only (e.g. for merging them
  1077. into an existing DOM document)
  1078. Revision 1.6 2002/09/07 15:15:29 peter
  1079. * old logs removed and tabs fixed
  1080. }