XMLRead.pas 32 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319
  1. {
  2. This file is part of the Free Component Library
  3. XML reading routines.
  4. Copyright (c) 1999-2003 by Sebastian Guenther, [email protected]
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. unit XMLRead;
  12. interface
  13. uses SysUtils, Classes, DOM;
  14. type
  15. EXMLReadError = class(Exception);
  16. procedure ReadXMLFile(var ADoc: TXMLDocument; const AFilename: String);
  17. overload;
  18. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: File); overload;
  19. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream); overload;
  20. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream;
  21. const AFilename: String); overload;
  22. procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String);
  23. overload;
  24. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: File); overload;
  25. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream); overload;
  26. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream;
  27. const AFilename: String); overload;
  28. procedure ReadDTDFile(var ADoc: TXMLDocument; const AFilename: String);
  29. overload;
  30. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: File); overload;
  31. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream); overload;
  32. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream;
  33. const AFilename: String); overload;
  34. // =======================================================
  35. implementation
  36. const
  37. Letter = ['A'..'Z', 'a'..'z'];
  38. Digit = ['0'..'9'];
  39. PubidChars: set of Char = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
  40. '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
  41. '#', '@', '$', '_', '%'];
  42. WhitespaceChars: set of Char = [#9, #10, #13, ' '];
  43. NmToken: set of Char = Letter + Digit + ['.', '-', '_', ':'];
  44. type
  45. TXMLReaderDocument = class(TXMLDocument)
  46. public
  47. procedure SetDocType(ADocType: TDOMDocumentType);
  48. end;
  49. TXMLReaderDocumentType = class(TDOMDocumentType)
  50. public
  51. constructor Create(ADocument: TXMLReaderDocument);
  52. property Name: DOMString read FNodeName write FNodeName;
  53. end;
  54. TSetOfChar = set of Char;
  55. TXMLReader = class
  56. protected
  57. buf, BufStart: PChar;
  58. Filename: String;
  59. procedure RaiseExc(descr: String);
  60. function SkipWhitespace: Boolean;
  61. procedure ExpectWhitespace;
  62. procedure ExpectString(s: String);
  63. function CheckFor(s: PChar): Boolean;
  64. function GetString(ValidChars: TSetOfChar): String;
  65. function GetName(var s: String): Boolean;
  66. function ExpectName: String; // [5]
  67. procedure ExpectAttValue(attr: TDOMAttr); // [10]
  68. function ExpectPubidLiteral: String; // [12]
  69. function ParseComment(AOwner: TDOMNode): Boolean; // [15]
  70. function ParsePI: Boolean; // [16]
  71. procedure ExpectProlog; // [22]
  72. function ParseEq: Boolean; // [25]
  73. procedure ExpectEq;
  74. procedure ParseMisc(AOwner: TDOMNode); // [27]
  75. function ParseMarkupDecl: Boolean; // [29]
  76. function ParseCharData(AOwner: TDOMNode): Boolean; // [14]
  77. function ParseCDSect(AOwner: TDOMNode): Boolean; // [18]
  78. function ParseElement(AOwner: TDOMNode): Boolean; // [39]
  79. procedure ExpectElement(AOwner: TDOMNode);
  80. function ParseReference(AOwner: TDOMNode): Boolean; // [67]
  81. procedure ExpectReference(AOwner: TDOMNode);
  82. function ParsePEReference: Boolean; // [69]
  83. function ParseExternalID: Boolean; // [75]
  84. procedure ExpectExternalID;
  85. function ParseEncodingDecl: String; // [80]
  86. procedure ResolveEntities(RootNode: TDOMNode);
  87. public
  88. doc: TDOMDocument;
  89. procedure ProcessXML(ABuf: PChar; AFilename: String); // [1]
  90. procedure ProcessFragment(AOwner: TDOMNode; ABuf: PChar; AFilename: String);
  91. procedure ProcessDTD(ABuf: PChar; AFilename: String); // ([29])
  92. end;
  93. procedure TXMLReaderDocument.SetDocType(ADocType: TDOMDocumentType);
  94. begin
  95. FDocType := ADocType;
  96. end;
  97. constructor TXMLReaderDocumentType.Create(ADocument: TXMLReaderDocument);
  98. begin
  99. inherited Create(ADocument);
  100. end;
  101. procedure TXMLReader.RaiseExc(descr: String);
  102. var
  103. apos: PChar;
  104. x, y: Integer;
  105. begin
  106. // find out the line in which the error occured
  107. apos := BufStart;
  108. x := 1;
  109. y := 1;
  110. while apos < buf do begin
  111. if apos[0] = #10 then begin
  112. Inc(y);
  113. x := 1;
  114. end else
  115. Inc(x);
  116. Inc(apos);
  117. end;
  118. raise EXMLReadError.Create('In ' + Filename + ' (line ' + IntToStr(y) + ' pos ' +
  119. IntToStr(x) + '): ' + descr);
  120. end;
  121. function TXMLReader.SkipWhitespace: Boolean;
  122. begin
  123. Result := False;
  124. while buf[0] in WhitespaceChars do
  125. begin
  126. Inc(buf);
  127. Result := True;
  128. end;
  129. end;
  130. procedure TXMLReader.ExpectWhitespace;
  131. begin
  132. if not SkipWhitespace then
  133. RaiseExc('Expected whitespace');
  134. end;
  135. procedure TXMLReader.ExpectString(s: String);
  136. var
  137. i: Integer;
  138. s2: PChar;
  139. s3: String;
  140. begin
  141. for i := 1 to Length(s) do
  142. if buf[i - 1] <> s[i] then begin
  143. GetMem(s2, Length(s) + 1);
  144. StrLCopy(s2, buf, Length(s));
  145. s3 := StrPas(s2);
  146. FreeMem(s2);
  147. RaiseExc('Expected "' + s + '", found "' + s3 + '"');
  148. end;
  149. Inc(buf, Length(s));
  150. end;
  151. function TXMLReader.CheckFor(s: PChar): Boolean;
  152. begin
  153. if buf[0] = #0 then begin
  154. Result := False;
  155. exit;
  156. end;
  157. if StrLComp(buf, s, StrLen(s)) = 0 then begin
  158. Inc(buf, StrLen(s));
  159. Result := True;
  160. end else
  161. Result := False;
  162. end;
  163. function TXMLReader.GetString(ValidChars: TSetOfChar): String;
  164. begin
  165. SetLength(Result, 0);
  166. while buf[0] in ValidChars do begin
  167. Result := Result + buf[0];
  168. Inc(buf);
  169. end;
  170. end;
  171. {$IFDEF FPC}
  172. {$IFNDEF VER1_0}
  173. {$DEFINE UsesFPCWidestrings}
  174. {$ENDIF}
  175. {$ENDIF}
  176. {$IFDEF UsesFPCWidestrings}
  177. procedure SimpleWide2AnsiMove(source:pwidechar;dest:pchar;len:sizeint);
  178. var
  179. i : sizeint;
  180. begin
  181. for i:=1 to len do
  182. begin
  183. if word(source^)<256 then
  184. dest^:=char(word(source^))
  185. else
  186. dest^:='?';
  187. inc(dest);
  188. inc(source);
  189. end;
  190. end;
  191. procedure SimpleAnsi2WideMove(source:pchar;dest:pwidechar;len:sizeint);
  192. var
  193. i : sizeint;
  194. begin
  195. for i:=1 to len do
  196. begin
  197. dest^:=widechar(byte(source^));
  198. inc(dest);
  199. inc(source);
  200. end;
  201. end;
  202. const
  203. WideStringManager: TWideStringManager = (
  204. Wide2AnsiMove: @SimpleWide2AnsiMove;
  205. Ansi2WideMove: @SimpleAnsi2WideMove
  206. );
  207. {$ENDIF}
  208. procedure TXMLReader.ProcessXML(ABuf: PChar; AFilename: String); // [1]
  209. {$IFDEF UsesFPCWidestrings}
  210. var
  211. OldWideStringManager: TWideStringManager;
  212. {$ENDIF}
  213. begin
  214. buf := ABuf;
  215. BufStart := ABuf;
  216. Filename := AFilename;
  217. {$IFDEF UsesFPCWidestrings}
  218. SetWideStringManager(WideStringManager, OldWideStringManager);
  219. try
  220. {$ENDIF}
  221. doc := TXMLReaderDocument.Create;
  222. ExpectProlog;
  223. ExpectElement(doc);
  224. ParseMisc(doc);
  225. {$IFDEF UsesFPCWidestrings}
  226. finally
  227. SetWideStringManager(OldWideStringManager);
  228. end;
  229. {$ENDIF}
  230. if buf[0] <> #0 then
  231. RaiseExc('Text after end of document element found');
  232. end;
  233. procedure TXMLReader.ProcessFragment(AOwner: TDOMNode; ABuf: PChar;
  234. AFilename: String);
  235. {$IFDEF UsesFPCWidestrings}
  236. var
  237. OldWideStringManager: TWideStringManager;
  238. {$ENDIF}
  239. begin
  240. buf := ABuf;
  241. BufStart := ABuf;
  242. Filename := AFilename;
  243. {$IFDEF UsesFPCWidestrings}
  244. SetWideStringManager(WideStringManager, OldWideStringManager);
  245. try
  246. {$ENDIF}
  247. SkipWhitespace;
  248. while ParseCharData(AOwner) or ParseCDSect(AOwner) or ParsePI or
  249. ParseComment(AOwner) or ParseElement(AOwner) or
  250. ParseReference(AOwner) do
  251. SkipWhitespace;
  252. {$IFDEF UsesFPCWidestrings}
  253. finally
  254. SetWideStringManager(OldWideStringManager);
  255. end;
  256. {$ENDIF}
  257. end;
  258. function TXMLReader.GetName(var s: String): Boolean; // [5]
  259. begin
  260. SetLength(s, 0);
  261. if not (buf[0] in (Letter + ['_', ':'])) then begin
  262. Result := False;
  263. exit;
  264. end;
  265. s := buf[0];
  266. Inc(buf);
  267. s := s + GetString(Letter + ['0'..'9', '.', '-', '_', ':']);
  268. Result := True;
  269. end;
  270. function TXMLReader.ExpectName: String; // [5]
  271. begin
  272. if not (buf[0] in (Letter + ['_', ':'])) then
  273. RaiseExc('Expected letter, "_" or ":" for name, found "' + buf[0] + '"');
  274. Result := buf[0];
  275. Inc(buf);
  276. Result := Result + GetString(Letter + ['0'..'9', '.', '-', '_', ':']);
  277. end;
  278. procedure TXMLReader.ExpectAttValue(attr: TDOMAttr); // [10]
  279. var
  280. s: String;
  281. procedure FlushStringBuffer;
  282. begin
  283. if Length(s) > 0 then
  284. begin
  285. attr.AppendChild(doc.CreateTextNode(s));
  286. SetLength(s, 0);
  287. end;
  288. end;
  289. var
  290. StrDel: array[0..1] of Char; // String delimiter
  291. begin
  292. if (buf[0] <> '''') and (buf[0] <> '"') then
  293. RaiseExc('Expected quotation marks');
  294. StrDel[0] := buf[0];
  295. StrDel[1] := #0;
  296. Inc(buf);
  297. SetLength(s, 0);
  298. while not CheckFor(StrDel) do
  299. if buf[0] = '&' then
  300. begin
  301. FlushStringBuffer;
  302. ParseReference(attr);
  303. end else
  304. begin
  305. s := s + buf[0];
  306. Inc(buf);
  307. end;
  308. FlushStringBuffer;
  309. ResolveEntities(Attr);
  310. end;
  311. function TXMLReader.ExpectPubidLiteral: String;
  312. begin
  313. SetLength(Result, 0);
  314. if CheckFor('''') then begin
  315. GetString(PubidChars - ['''']);
  316. ExpectString('''');
  317. end else if CheckFor('"') then begin
  318. GetString(PubidChars - ['"']);
  319. ExpectString('"');
  320. end else
  321. RaiseExc('Expected quotation marks');
  322. end;
  323. function TXMLReader.ParseComment(AOwner: TDOMNode): Boolean; // [15]
  324. var
  325. comment: String;
  326. begin
  327. if CheckFor('<!--') then begin
  328. SetLength(comment, 0);
  329. while (buf[0] <> #0) and (buf[1] <> #0) and
  330. ((buf[0] <> '-') or (buf[1] <> '-')) do begin
  331. comment := comment + buf[0];
  332. Inc(buf);
  333. end;
  334. AOwner.AppendChild(doc.CreateComment(comment));
  335. ExpectString('-->');
  336. Result := True;
  337. end else
  338. Result := False;
  339. end;
  340. function TXMLReader.ParsePI: Boolean; // [16]
  341. var
  342. checkbuf: array[0..3] of char;
  343. begin
  344. if CheckFor('<?') then begin
  345. { StrLCopy(checkbuf, buf, 4);
  346. if UpperCase(StrPas(checkbuf)) = 'XML ' then
  347. RaiseExc('"<?xml" processing instruction not allowed here');
  348. } ExpectName;
  349. if SkipWhitespace then
  350. while (buf[0] <> #0) and (buf[1] <> #0) and not
  351. ((buf[0] = '?') and (buf[1] = '>')) do Inc(buf);
  352. ExpectString('?>');
  353. Result := True;
  354. end else
  355. Result := False;
  356. end;
  357. procedure TXMLReader.ExpectProlog; // [22]
  358. procedure ParseVersionNum;
  359. begin
  360. if doc.InheritsFrom(TXMLDocument) then
  361. TXMLDocument(doc).XMLVersion :=
  362. GetString(['a'..'z', 'A'..'Z', '0'..'9', '_', '.', ':', '-']);
  363. end;
  364. procedure ParseDoctypeDecls;
  365. begin
  366. repeat
  367. SkipWhitespace;
  368. until not (ParseMarkupDecl or ParsePEReference);
  369. ExpectString(']');
  370. end;
  371. var
  372. DocType: TXMLReaderDocumentType;
  373. begin
  374. if CheckFor('<?xml') then
  375. begin
  376. // '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  377. // VersionInfo: S 'version' Eq (' VersionNum ' | " VersionNum ")
  378. SkipWhitespace;
  379. ExpectString('version');
  380. ParseEq;
  381. if buf[0] = '''' then
  382. begin
  383. Inc(buf);
  384. ParseVersionNum;
  385. ExpectString('''');
  386. end else if buf[0] = '"' then
  387. begin
  388. Inc(buf);
  389. ParseVersionNum;
  390. ExpectString('"');
  391. end else
  392. RaiseExc('Expected single or double quotation mark');
  393. // EncodingDecl?
  394. ParseEncodingDecl;
  395. // SDDecl?
  396. SkipWhitespace;
  397. if CheckFor('standalone') then
  398. begin
  399. ExpectEq;
  400. if buf[0] = '''' then
  401. begin
  402. Inc(buf);
  403. if not (CheckFor('yes''') or CheckFor('no''')) then
  404. RaiseExc('Expected ''yes'' or ''no''');
  405. end else if buf[0] = '''' then
  406. begin
  407. Inc(buf);
  408. if not (CheckFor('yes"') or CheckFor('no"')) then
  409. RaiseExc('Expected "yes" or "no"');
  410. end;
  411. SkipWhitespace;
  412. end;
  413. ExpectString('?>');
  414. end;
  415. // Check for "Misc*"
  416. ParseMisc(doc);
  417. // Check for "(doctypedecl Misc*)?" [28]
  418. if CheckFor('<!DOCTYPE') then
  419. begin
  420. DocType := TXMLReaderDocumentType.Create(doc as TXMLReaderDocument);
  421. if doc.InheritsFrom(TXMLReaderDocument) then
  422. TXMLReaderDocument(doc).SetDocType(DocType);
  423. SkipWhitespace;
  424. DocType.Name := ExpectName;
  425. SkipWhitespace;
  426. if CheckFor('[') then
  427. begin
  428. ParseDoctypeDecls;
  429. SkipWhitespace;
  430. ExpectString('>');
  431. end else if not CheckFor('>') then
  432. begin
  433. ParseExternalID;
  434. SkipWhitespace;
  435. if CheckFor('[') then
  436. begin
  437. ParseDoctypeDecls;
  438. SkipWhitespace;
  439. end;
  440. ExpectString('>');
  441. end;
  442. ParseMisc(doc);
  443. end;
  444. end;
  445. function TXMLReader.ParseEq: Boolean; // [25]
  446. var
  447. savedbuf: PChar;
  448. begin
  449. savedbuf := buf;
  450. SkipWhitespace;
  451. if buf[0] = '=' then begin
  452. Inc(buf);
  453. SkipWhitespace;
  454. Result := True;
  455. end else begin
  456. buf := savedbuf;
  457. Result := False;
  458. end;
  459. end;
  460. procedure TXMLReader.ExpectEq;
  461. begin
  462. if not ParseEq then
  463. RaiseExc('Expected "="');
  464. end;
  465. // Parse "Misc*":
  466. // Misc ::= Comment | PI | S
  467. procedure TXMLReader.ParseMisc(AOwner: TDOMNode); // [27]
  468. begin
  469. repeat
  470. SkipWhitespace;
  471. until not (ParseComment(AOwner) or ParsePI);
  472. end;
  473. function TXMLReader.ParseMarkupDecl: Boolean; // [29]
  474. function ParseElementDecl: Boolean; // [45]
  475. procedure ExpectChoiceOrSeq; // [49], [50]
  476. procedure ExpectCP; // [48]
  477. begin
  478. if CheckFor('(') then
  479. ExpectChoiceOrSeq
  480. else
  481. ExpectName;
  482. if CheckFor('?') then
  483. else if CheckFor('*') then
  484. else if CheckFor('+') then;
  485. end;
  486. var
  487. delimiter: Char;
  488. begin
  489. SkipWhitespace;
  490. ExpectCP;
  491. SkipWhitespace;
  492. delimiter := #0;
  493. while not CheckFor(')') do begin
  494. if delimiter = #0 then begin
  495. if (buf[0] = '|') or (buf[0] = ',') then
  496. delimiter := buf[0]
  497. else
  498. RaiseExc('Expected "|" or ","');
  499. Inc(buf);
  500. end else
  501. ExpectString(delimiter);
  502. SkipWhitespace;
  503. ExpectCP;
  504. end;
  505. end;
  506. begin
  507. if CheckFor('<!ELEMENT') then begin
  508. ExpectWhitespace;
  509. ExpectName;
  510. ExpectWhitespace;
  511. // Get contentspec [46]
  512. if CheckFor('EMPTY') then
  513. else if CheckFor('ANY') then
  514. else if CheckFor('(') then begin
  515. SkipWhitespace;
  516. if CheckFor('#PCDATA') then begin
  517. // Parse Mixed section [51]
  518. SkipWhitespace;
  519. if not CheckFor(')') then
  520. repeat
  521. ExpectString('|');
  522. SkipWhitespace;
  523. ExpectName;
  524. until CheckFor(')*');
  525. end else begin
  526. // Parse Children section [47]
  527. ExpectChoiceOrSeq;
  528. if CheckFor('?') then
  529. else if CheckFor('*') then
  530. else if CheckFor('+') then;
  531. end;
  532. end else
  533. RaiseExc('Invalid content specification');
  534. SkipWhitespace;
  535. ExpectString('>');
  536. Result := True;
  537. end else
  538. Result := False;
  539. end;
  540. function ParseAttlistDecl: Boolean; // [52]
  541. var
  542. attr: TDOMAttr;
  543. begin
  544. if CheckFor('<!ATTLIST') then begin
  545. ExpectWhitespace;
  546. ExpectName;
  547. SkipWhitespace;
  548. while not CheckFor('>') do begin
  549. ExpectName;
  550. ExpectWhitespace;
  551. // Get AttType [54], [55], [56]
  552. if CheckFor('CDATA') then
  553. else if CheckFor('ID') then
  554. else if CheckFor('IDREF') then
  555. else if CheckFor('IDREFS') then
  556. else if CheckFor('ENTITTY') then
  557. else if CheckFor('ENTITIES') then
  558. else if CheckFor('NMTOKEN') then
  559. else if CheckFor('NMTOKENS') then
  560. else if CheckFor('NOTATION') then begin // [57], [58]
  561. ExpectWhitespace;
  562. ExpectString('(');
  563. SkipWhitespace;
  564. ExpectName;
  565. SkipWhitespace;
  566. while not CheckFor(')') do begin
  567. ExpectString('|');
  568. SkipWhitespace;
  569. ExpectName;
  570. SkipWhitespace;
  571. end;
  572. end else if CheckFor('(') then begin // [59]
  573. SkipWhitespace;
  574. GetString(Nmtoken);
  575. SkipWhitespace;
  576. while not CheckFor(')') do begin
  577. ExpectString('|');
  578. SkipWhitespace;
  579. GetString(Nmtoken);
  580. SkipWhitespace;
  581. end;
  582. end else
  583. RaiseExc('Invalid tokenized type');
  584. ExpectWhitespace;
  585. // Get DefaultDecl [60]
  586. if CheckFor('#REQUIRED') then
  587. else if CheckFor('#IMPLIED') then
  588. else begin
  589. if CheckFor('#FIXED') then
  590. SkipWhitespace;
  591. attr := doc.CreateAttribute('');
  592. ExpectAttValue(attr);
  593. end;
  594. SkipWhitespace;
  595. end;
  596. Result := True;
  597. end else
  598. Result := False;
  599. end;
  600. function ParseEntityDecl: Boolean; // [70]
  601. var
  602. NewEntity: TDOMEntity;
  603. function ParseEntityValue: Boolean; // [9]
  604. var
  605. strdel: array[0..1] of Char;
  606. begin
  607. if (buf[0] <> '''') and (buf[0] <> '"') then begin
  608. Result := False;
  609. exit;
  610. end;
  611. strdel[0] := buf[0];
  612. strdel[1] := #0;
  613. Inc(buf);
  614. while not CheckFor(strdel) do
  615. if ParsePEReference then
  616. else if ParseReference(NewEntity) then
  617. else begin
  618. Inc(buf); // Normal haracter
  619. end;
  620. Result := True;
  621. end;
  622. begin
  623. if CheckFor('<!ENTITY') then begin
  624. ExpectWhitespace;
  625. if CheckFor('%') then begin // [72]
  626. ExpectWhitespace;
  627. NewEntity := doc.CreateEntity(ExpectName);
  628. ExpectWhitespace;
  629. // Get PEDef [74]
  630. if ParseEntityValue then
  631. else if ParseExternalID then
  632. else
  633. RaiseExc('Expected entity value or external ID');
  634. end else begin // [71]
  635. NewEntity := doc.CreateEntity(ExpectName);
  636. ExpectWhitespace;
  637. // Get EntityDef [73]
  638. if ParseEntityValue then
  639. else begin
  640. ExpectExternalID;
  641. // Get NDataDecl [76]
  642. ExpectWhitespace;
  643. ExpectString('NDATA');
  644. ExpectWhitespace;
  645. ExpectName;
  646. end;
  647. end;
  648. SkipWhitespace;
  649. ExpectString('>');
  650. Result := True;
  651. end else
  652. Result := False;
  653. end;
  654. function ParseNotationDecl: Boolean; // [82]
  655. begin
  656. if CheckFor('<!NOTATION') then begin
  657. ExpectWhitespace;
  658. ExpectName;
  659. ExpectWhitespace;
  660. if ParseExternalID then
  661. else if CheckFor('PUBLIC') then begin // [83]
  662. ExpectWhitespace;
  663. ExpectPubidLiteral;
  664. end else
  665. RaiseExc('Expected external or public ID');
  666. SkipWhitespace;
  667. ExpectString('>');
  668. Result := True;
  669. end else
  670. Result := False;
  671. end;
  672. begin
  673. Result := False;
  674. while ParseElementDecl or ParseAttlistDecl or ParseEntityDecl or
  675. ParseNotationDecl or ParsePI or ParseComment(doc) or SkipWhitespace do
  676. Result := True;
  677. end;
  678. procedure TXMLReader.ProcessDTD(ABuf: PChar; AFilename: String);
  679. begin
  680. buf := ABuf;
  681. BufStart := ABuf;
  682. Filename := AFilename;
  683. doc := TXMLReaderDocument.Create;
  684. ParseMarkupDecl;
  685. {
  686. if buf[0] <> #0 then begin
  687. WriteLn('=== Unparsed: ===');
  688. //WriteLn(buf);
  689. WriteLn(StrLen(buf), ' chars');
  690. end;
  691. }
  692. end;
  693. function TXMLReader.ParseCharData(AOwner: TDOMNode): Boolean; // [14]
  694. var
  695. s: String;
  696. i: Integer;
  697. begin
  698. SetLength(s, 0);
  699. while not (buf[0] in [#0, '<', '&']) do
  700. begin
  701. s := s + buf[0];
  702. Inc(buf);
  703. end;
  704. if Length(s) > 0 then
  705. begin
  706. // Check if s has non-whitespace content
  707. i := Length(s);
  708. while (i > 0) and (s[i] in WhitespaceChars) do
  709. Dec(i);
  710. if i > 0 then
  711. AOwner.AppendChild(doc.CreateTextNode(s));
  712. Result := True;
  713. end else
  714. Result := False;
  715. end;
  716. function TXMLReader.ParseCDSect(AOwner: TDOMNode): Boolean; // [18]
  717. var
  718. cdata: String;
  719. begin
  720. if CheckFor('<![CDATA[') then
  721. begin
  722. SetLength(cdata, 0);
  723. while not CheckFor(']]>') do
  724. begin
  725. cdata := cdata + buf[0];
  726. Inc(buf);
  727. end;
  728. AOwner.AppendChild(doc.CreateCDATASection(cdata));
  729. Result := True;
  730. end else
  731. Result := False;
  732. end;
  733. function TXMLReader.ParseElement(AOwner: TDOMNode): Boolean; // [39] [40] [44]
  734. var
  735. NewElem: TDOMElement;
  736. var
  737. IsEmpty: Boolean;
  738. name: String;
  739. oldpos: PChar;
  740. attr: TDOMAttr;
  741. begin
  742. oldpos := buf;
  743. if CheckFor('<') then
  744. begin
  745. if not GetName(name) then
  746. begin
  747. buf := oldpos;
  748. Result := False;
  749. exit;
  750. end;
  751. NewElem := doc.CreateElement(name);
  752. AOwner.AppendChild(NewElem);
  753. SkipWhitespace;
  754. IsEmpty := False;
  755. while True do
  756. begin
  757. if CheckFor('/>') then
  758. begin
  759. IsEmpty := True;
  760. break;
  761. end;
  762. if CheckFor('>') then
  763. break;
  764. // Get Attribute [41]
  765. attr := doc.CreateAttribute(ExpectName);
  766. NewElem.Attributes.SetNamedItem(attr);
  767. ExpectEq;
  768. ExpectAttValue(attr);
  769. SkipWhitespace;
  770. end;
  771. if not IsEmpty then
  772. begin
  773. // Get content
  774. SkipWhitespace;
  775. while ParseCharData(NewElem) or ParseCDSect(NewElem) or ParsePI or
  776. ParseComment(NewElem) or ParseElement(NewElem) or
  777. ParseReference(NewElem) do;
  778. // Get ETag [42]
  779. ExpectString('</');
  780. if ExpectName <> name then
  781. RaiseExc('Unmatching element end tag (expected "</' + name + '>")');
  782. SkipWhitespace;
  783. ExpectString('>');
  784. end;
  785. ResolveEntities(NewElem);
  786. Result := True;
  787. end else
  788. Result := False;
  789. end;
  790. procedure TXMLReader.ExpectElement(AOwner: TDOMNode);
  791. begin
  792. if not ParseElement(AOwner) then
  793. RaiseExc('Expected element');
  794. end;
  795. function TXMLReader.ParsePEReference: Boolean; // [69]
  796. begin
  797. if CheckFor('%') then begin
  798. ExpectName;
  799. ExpectString(';');
  800. Result := True;
  801. end else
  802. Result := False;
  803. end;
  804. function TXMLReader.ParseReference(AOwner: TDOMNode): Boolean; // [67] [68]
  805. begin
  806. if not CheckFor('&') then begin
  807. Result := False;
  808. exit;
  809. end;
  810. if CheckFor('#') then begin // Test for CharRef [66]
  811. if CheckFor('x') then begin
  812. // !!!: there must be at least one digit
  813. while buf[0] in ['0'..'9', 'a'..'f', 'A'..'F'] do Inc(buf);
  814. end else
  815. // !!!: there must be at least one digit
  816. while buf[0] in ['0'..'9'] do Inc(buf);
  817. end else
  818. AOwner.AppendChild(doc.CreateEntityReference(ExpectName));
  819. ExpectString(';');
  820. Result := True;
  821. end;
  822. procedure TXMLReader.ExpectReference(AOwner: TDOMNode);
  823. begin
  824. if not ParseReference(AOwner) then
  825. RaiseExc('Expected reference ("&Name;" or "%Name;")');
  826. end;
  827. function TXMLReader.ParseExternalID: Boolean; // [75]
  828. function GetSystemLiteral: String;
  829. begin
  830. SetLength(Result, 0);
  831. if buf[0] = '''' then begin
  832. Inc(buf);
  833. while (buf[0] <> '''') and (buf[0] <> #0) do begin
  834. Result := Result + buf[0];
  835. Inc(buf);
  836. end;
  837. ExpectString('''');
  838. end else if buf[0] = '"' then begin
  839. Inc(buf);
  840. while (buf[0] <> '"') and (buf[0] <> #0) do begin
  841. Result := Result + buf[0];
  842. Inc(buf);
  843. end;
  844. ExpectString('"');
  845. end;
  846. end;
  847. begin
  848. if CheckFor('SYSTEM') then begin
  849. ExpectWhitespace;
  850. GetSystemLiteral;
  851. Result := True;
  852. end else if CheckFor('PUBLIC') then begin
  853. ExpectWhitespace;
  854. ExpectPubidLiteral;
  855. ExpectWhitespace;
  856. GetSystemLiteral;
  857. Result := True;
  858. end else
  859. Result := False;
  860. end;
  861. procedure TXMLReader.ExpectExternalID;
  862. begin
  863. if not ParseExternalID then
  864. RaiseExc('Expected external ID');
  865. end;
  866. function TXMLReader.ParseEncodingDecl: String; // [80]
  867. function ParseEncName: String;
  868. begin
  869. if not (buf[0] in ['A'..'Z', 'a'..'z']) then
  870. RaiseExc('Expected character (A-Z, a-z)');
  871. Result := buf[0];
  872. Inc(buf);
  873. Result := Result + GetString(['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']);
  874. end;
  875. begin
  876. SetLength(Result, 0);
  877. SkipWhitespace;
  878. if CheckFor('encoding') then begin
  879. ExpectEq;
  880. if buf[0] = '''' then begin
  881. Inc(buf);
  882. Result := ParseEncName;
  883. ExpectString('''');
  884. end else if buf[0] = '"' then begin
  885. Inc(buf);
  886. Result := ParseEncName;
  887. ExpectString('"');
  888. end;
  889. end;
  890. end;
  891. { Currently, this method will only resolve the entities which are
  892. predefined in XML: }
  893. procedure TXMLReader.ResolveEntities(RootNode: TDOMNode);
  894. procedure ReplaceEntityRef(EntityNode: TDOMNode; const Replacement: String);
  895. var
  896. PrevSibling, NextSibling: TDOMNode;
  897. begin
  898. PrevSibling := EntityNode.PreviousSibling;
  899. NextSibling := EntityNode.NextSibling;
  900. if Assigned(PrevSibling) and (PrevSibling.NodeType = TEXT_NODE) then
  901. begin
  902. TDOMCharacterData(PrevSibling).AppendData(Replacement);
  903. RootNode.RemoveChild(EntityNode);
  904. if Assigned(NextSibling) and (NextSibling.NodeType = TEXT_NODE) then
  905. begin
  906. TDOMCharacterData(PrevSibling).AppendData(
  907. TDOMCharacterData(NextSibling).Data);
  908. RootNode.RemoveChild(NextSibling);
  909. end
  910. end else
  911. if Assigned(NextSibling) and (NextSibling.NodeType = TEXT_NODE) then
  912. begin
  913. TDOMCharacterData(NextSibling).InsertData(0, Replacement);
  914. RootNode.RemoveChild(EntityNode);
  915. end else
  916. RootNode.ReplaceChild(Doc.CreateTextNode(Replacement), EntityNode);
  917. end;
  918. var
  919. Node, NextSibling: TDOMNode;
  920. begin
  921. Node := RootNode.FirstChild;
  922. while Assigned(Node) do
  923. begin
  924. NextSibling := Node.NextSibling;
  925. if Node.NodeType = ENTITY_REFERENCE_NODE then
  926. if Node.NodeName = 'amp' then
  927. ReplaceEntityRef(Node, '&')
  928. else if Node.NodeName = 'apos' then
  929. ReplaceEntityRef(Node, '''')
  930. else if Node.NodeName = 'gt' then
  931. ReplaceEntityRef(Node, '>')
  932. else if Node.NodeName = 'lt' then
  933. ReplaceEntityRef(Node, '<')
  934. else if Node.NodeName = 'quot' then
  935. ReplaceEntityRef(Node, '"');
  936. Node := NextSibling;
  937. end;
  938. end;
  939. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: File);
  940. var
  941. Reader: TXMLReader;
  942. buf: PChar;
  943. BufSize: LongInt;
  944. begin
  945. ADoc := nil;
  946. BufSize := FileSize(f) + 1;
  947. if BufSize <= 1 then
  948. exit;
  949. GetMem(buf, BufSize);
  950. try
  951. BlockRead(f, buf^, BufSize - 1);
  952. buf[BufSize - 1] := #0;
  953. Reader := TXMLReader.Create;
  954. try
  955. Reader.ProcessXML(buf, TFileRec(f).name);
  956. ADoc := TXMLDocument(Reader.doc);
  957. finally
  958. Reader.Free;
  959. end;
  960. finally
  961. FreeMem(buf);
  962. end;
  963. end;
  964. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream;
  965. const AFilename: String);
  966. var
  967. Reader: TXMLReader;
  968. buf: PChar;
  969. begin
  970. ADoc := nil;
  971. if f.Size = 0 then
  972. exit;
  973. GetMem(buf, f.Size + 1);
  974. try
  975. f.Read(buf^, f.Size);
  976. buf[f.Size] := #0;
  977. Reader := TXMLReader.Create;
  978. try
  979. Reader.ProcessXML(buf, AFilename);
  980. ADoc := TXMLDocument(Reader.doc);
  981. finally
  982. Reader.Free;
  983. end;
  984. finally
  985. FreeMem(buf);
  986. end;
  987. end;
  988. procedure ReadXMLFile(var ADoc: TXMLDocument; var f: TStream);
  989. begin
  990. ReadXMLFile(ADoc, f, '<Stream>');
  991. end;
  992. procedure ReadXMLFile(var ADoc: TXMLDocument; const AFilename: String);
  993. var
  994. Stream: TStream;
  995. begin
  996. ADoc := nil;
  997. Stream := TFileStream.Create(AFilename, fmOpenRead);
  998. try
  999. ReadXMLFile(ADoc, Stream, AFilename);
  1000. finally
  1001. Stream.Free;
  1002. end;
  1003. end;
  1004. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: File);
  1005. var
  1006. Reader: TXMLReader;
  1007. buf: PChar;
  1008. BufSize: LongInt;
  1009. begin
  1010. BufSize := FileSize(f) + 1;
  1011. if BufSize <= 1 then
  1012. exit;
  1013. GetMem(buf, BufSize);
  1014. try
  1015. BlockRead(f, buf^, BufSize - 1);
  1016. buf[BufSize - 1] := #0;
  1017. Reader := TXMLReader.Create;
  1018. try
  1019. Reader.Doc := AParentNode.OwnerDocument;
  1020. Reader.ProcessFragment(AParentNode, buf, TFileRec(f).name);
  1021. finally
  1022. Reader.Free;
  1023. end;
  1024. finally
  1025. FreeMem(buf);
  1026. end;
  1027. end;
  1028. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream;
  1029. const AFilename: String);
  1030. var
  1031. Reader: TXMLReader;
  1032. buf: PChar;
  1033. begin
  1034. if f.Size = 0 then
  1035. exit;
  1036. GetMem(buf, f.Size + 1);
  1037. try
  1038. f.Read(buf^, f.Size);
  1039. buf[f.Size] := #0;
  1040. Reader := TXMLReader.Create;
  1041. Reader.Doc := AParentNode.OwnerDocument;
  1042. try
  1043. Reader.ProcessFragment(AParentNode, buf, AFilename);
  1044. finally
  1045. Reader.Free;
  1046. end;
  1047. finally
  1048. FreeMem(buf);
  1049. end;
  1050. end;
  1051. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream);
  1052. begin
  1053. ReadXMLFragment(AParentNode, f, '<Stream>');
  1054. end;
  1055. procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String);
  1056. var
  1057. Stream: TStream;
  1058. begin
  1059. Stream := TFileStream.Create(AFilename, fmOpenRead);
  1060. try
  1061. ReadXMLFragment(AParentNode, Stream, AFilename);
  1062. finally
  1063. Stream.Free;
  1064. end;
  1065. end;
  1066. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: File);
  1067. var
  1068. Reader: TXMLReader;
  1069. buf: PChar;
  1070. BufSize: LongInt;
  1071. begin
  1072. ADoc := nil;
  1073. BufSize := FileSize(f) + 1;
  1074. if BufSize <= 1 then
  1075. exit;
  1076. GetMem(buf, BufSize);
  1077. try
  1078. BlockRead(f, buf^, BufSize - 1);
  1079. buf[BufSize - 1] := #0;
  1080. Reader := TXMLReader.Create;
  1081. try
  1082. Reader.ProcessDTD(buf, TFileRec(f).name);
  1083. ADoc := TXMLDocument(Reader.doc);
  1084. finally
  1085. Reader.Free;
  1086. end;
  1087. finally
  1088. FreeMem(buf);
  1089. end;
  1090. end;
  1091. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream;
  1092. const AFilename: String);
  1093. var
  1094. Reader: TXMLReader;
  1095. buf: PChar;
  1096. begin
  1097. ADoc := nil;
  1098. if f.Size = 0 then
  1099. exit;
  1100. GetMem(buf, f.Size + 1);
  1101. try
  1102. f.Read(buf^, f.Size);
  1103. buf[f.Size] := #0;
  1104. Reader := TXMLReader.Create;
  1105. try
  1106. Reader.ProcessDTD(buf, AFilename);
  1107. ADoc := TXMLDocument(Reader.doc);
  1108. finally
  1109. Reader.Free;
  1110. end;
  1111. finally
  1112. FreeMem(buf);
  1113. end;
  1114. end;
  1115. procedure ReadDTDFile(var ADoc: TXMLDocument; var f: TStream);
  1116. begin
  1117. ReadDTDFile(ADoc, f, '<Stream>');
  1118. end;
  1119. procedure ReadDTDFile(var ADoc: TXMLDocument; const AFilename: String);
  1120. var
  1121. Stream: TStream;
  1122. begin
  1123. ADoc := nil;
  1124. Stream := TFileStream.Create(AFilename, fmOpenRead);
  1125. try
  1126. ReadDTDFile(ADoc, Stream, AFilename);
  1127. finally
  1128. Stream.Free;
  1129. end;
  1130. end;
  1131. end.
  1132. {
  1133. Revision 1.11 2004/05/02 20:17:52 peter
  1134. * use sizeint
  1135. Revision 1.10 2003/12/01 23:59:12 sg
  1136. * Added support for main branch to be able to read and write at least
  1137. ISO8859-1 encoded files correctly. A much improved solution will be
  1138. provided when the mainbranch RTL fully supports Unicode/WideStrings.
  1139. Revision 1.9 2003/11/04 20:00:46 michael
  1140. + Fixed processing instruction parsing. <?xml is not allowed but <?xml-XXX is
  1141. Revision 1.8 2003/01/15 21:59:55 sg
  1142. * the units DOM, XMLRead and XMLWrite now compile with Delphi without
  1143. modifications as well
  1144. Revision 1.7 2002/09/21 19:22:38 sg
  1145. * Added procedures to process XML fragments only (e.g. for merging them
  1146. into an existing DOM document)
  1147. Revision 1.6 2002/09/07 15:15:29 peter
  1148. * old logs removed and tabs fixed
  1149. }