xmlread.pp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. {
  2. This file is part of the Free Component Library
  3. XML reading routines.
  4. Copyright (c) 1999-2000 by Sebastian Guenther, [email protected]
  5. Modified in 2006 by Sergei Gorelkin, [email protected]
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. {$IFNDEF FPC_DOTTEDUNITS}
  13. unit XMLRead;
  14. {$ENDIF FPC_DOTTEDUNITS}
  15. {$ifdef fpc}
  16. {$MODE objfpc}{$H+}
  17. {$endif}
  18. interface
  19. {$IFDEF FPC_DOTTEDUNITS}
  20. uses
  21. System.SysUtils, System.Classes, Xml.Dom, Xml.Utils, Xml.Reader, Xml.TextReader;
  22. {$ELSE FPC_DOTTEDUNITS}
  23. uses
  24. SysUtils, Classes, DOM, xmlutils, xmlreader, xmltextreader;
  25. {$ENDIF FPC_DOTTEDUNITS}
  26. type
  27. TErrorSeverity = {$IFDEF FPC_DOTTEDUNITS}Xml.Reader{$ELSE}xmlreader{$ENDIF}.TErrorSeverity;
  28. EXMLReadError = {$IFDEF FPC_DOTTEDUNITS}Xml.Reader{$ELSE}xmlreader{$ENDIF}.EXMLReadError;
  29. TXMLInputSource = {$IFDEF FPC_DOTTEDUNITS}Xml.Reader{$ELSE}xmlreader{$ENDIF}.TXMLInputSource;
  30. const
  31. esWarning = {$IFDEF FPC_DOTTEDUNITS}Xml.Reader{$ELSE}xmlreader{$ENDIF}.esWarning;
  32. esError = {$IFDEF FPC_DOTTEDUNITS}Xml.Reader{$ELSE}xmlreader{$ENDIF}.esError;
  33. esFatal = {$IFDEF FPC_DOTTEDUNITS}Xml.Reader{$ELSE}xmlreader{$ENDIF}.esFatal;
  34. procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String); overload;
  35. procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text); overload;
  36. procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream); overload;
  37. procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String); overload;
  38. procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String); overload;
  39. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text); overload;
  40. procedure ReadXMLFragment(AParentNode: TDOMNode; f: TStream); overload;
  41. procedure ReadXMLFragment(AParentNode: TDOMNode; f: TStream; const ABaseURI: String); overload;
  42. procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String); overload;
  43. procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text); overload;
  44. procedure ReadDTDFile(out ADoc: TXMLDocument; f: TStream); overload;
  45. procedure ReadDTDFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String); overload;
  46. type
  47. TXMLErrorEvent = {$IFDEF FPC_DOTTEDUNITS}Xml.Reader{$ELSE}xmlreader{$ENDIF}.TXMLErrorEvent;
  48. TDOMParseOptions = {$IFDEF FPC_DOTTEDUNITS}Xml.Reader{$ELSE}xmlreader{$ENDIF}.TXMLReaderSettings;
  49. // NOTE: DOM 3 LS ACTION_TYPE enumeration starts at 1
  50. TXMLContextAction = (
  51. xaAppendAsChildren = 1,
  52. xaReplaceChildren,
  53. xaInsertBefore,
  54. xaInsertAfter,
  55. xaReplace);
  56. TDOMParser = class(TObject)
  57. private
  58. FOptions: TDOMParseOptions;
  59. function GetOnError: TXMLErrorEvent;
  60. procedure SetOnError(value: TXMLErrorEvent);
  61. public
  62. constructor Create;
  63. destructor Destroy; override;
  64. procedure Parse(Src: TXMLInputSource; out ADoc: TXMLDocument);
  65. procedure ParseUri(const URI: XMLString; out ADoc: TXMLDocument);
  66. function ParseWithContext(Src: TXMLInputSource; Context: TDOMNode;
  67. Action: TXMLContextAction): TDOMNode;
  68. property Options: TDOMParseOptions read FOptions;
  69. property OnError: TXMLErrorEvent read GetOnError write SetOnError;
  70. end;
  71. TDecoder = {$IFDEF FPC_DOTTEDUNITS}Xml.TextReader{$ELSE}xmltextreader{$ENDIF}.TDecoder;
  72. TGetDecoderProc = {$IFDEF FPC_DOTTEDUNITS}Xml.TextReader{$ELSE}xmltextreader{$ENDIF}.TGetDecoderProc;
  73. procedure RegisterDecoder(Proc: TGetDecoderProc);
  74. // =======================================================
  75. implementation
  76. {$IFDEF FPC_DOTTEDUNITS}
  77. uses
  78. Fcl.UriParser, Xml.DtdModel;
  79. {$ELSE FPC_DOTTEDUNITS}
  80. uses
  81. UriParser, dtdmodel;
  82. {$ENDIF FPC_DOTTEDUNITS}
  83. type
  84. TLoader = object
  85. doc: TDOMDocument;
  86. reader: TXMLTextReader;
  87. function CreateCDATANode(currnode: PNodeData): TDOMNode;
  88. function CreatePINode(currnode: PNodeData): TDOMNode;
  89. procedure ParseContent(cursor: TDOMNode_WithChildren);
  90. procedure ProcessXML(ADoc: TDOMDocument; AReader: TXMLTextReader);
  91. procedure ProcessFragment(AOwner: TDOMNode; AReader: TXMLTextReader);
  92. procedure ProcessDTD(ADoc: TDOMDocument; AReader: TXMLTextReader);
  93. procedure ProcessEntity(Sender: TXMLTextReader; AEntity: TEntityDecl);
  94. end;
  95. procedure RegisterDecoder(Proc: TGetDecoderProc);
  96. begin
  97. {$IFDEF FPC_DOTTEDUNITS}Xml.TextReader{$ELSE}xmltextreader{$ENDIF}.RegisterDecoder(Proc);
  98. end;
  99. { TDOMParser }
  100. constructor TDOMParser.Create;
  101. begin
  102. FOptions := TDOMParseOptions.Create;
  103. end;
  104. destructor TDOMParser.Destroy;
  105. begin
  106. FOptions.Free;
  107. inherited Destroy;
  108. end;
  109. function TDOMParser.GetOnError: TXMLErrorEvent;
  110. begin
  111. result := Options.OnError;
  112. end;
  113. procedure TDOMParser.SetOnError(value: TXMLErrorEvent);
  114. begin
  115. Options.OnError := value;
  116. end;
  117. procedure TDOMParser.Parse(Src: TXMLInputSource; out ADoc: TXMLDocument);
  118. var
  119. Reader: TXMLTextReader;
  120. ldr: TLoader;
  121. begin
  122. ADoc := TXMLDocument.Create;
  123. Options.NameTable := ADoc.Names;
  124. Reader := TXMLTextReader.Create(Src, Options);
  125. try
  126. ldr.ProcessXML(ADoc, Reader);
  127. finally
  128. Reader.Free;
  129. end;
  130. end;
  131. procedure TDOMParser.ParseUri(const URI: XMLString; out ADoc: TXMLDocument);
  132. var
  133. Reader: TXMLTextReader;
  134. ldr: TLoader;
  135. begin
  136. ADoc := TXMLDocument.Create;
  137. Options.NameTable := ADoc.Names;
  138. Reader := TXMLTextReader.Create(URI, Options);
  139. try
  140. ldr.ProcessXML(ADoc, Reader)
  141. finally
  142. Reader.Free;
  143. end;
  144. end;
  145. function TDOMParser.ParseWithContext(Src: TXMLInputSource;
  146. Context: TDOMNode; Action: TXMLContextAction): TDOMNode;
  147. var
  148. Frag: TDOMDocumentFragment;
  149. node: TDOMNode;
  150. reader: TXMLTextReader;
  151. ldr: TLoader;
  152. doc: TDOMDocument;
  153. begin
  154. if Action in [xaInsertBefore, xaInsertAfter, xaReplace] then
  155. node := Context.ParentNode
  156. else
  157. node := Context;
  158. // TODO: replacing document isn't yet supported
  159. if (Action = xaReplaceChildren) and (node.NodeType = DOCUMENT_NODE) then
  160. raise EDOMNotSupported.Create('DOMParser.ParseWithContext');
  161. if not (node.NodeType in [ELEMENT_NODE, DOCUMENT_FRAGMENT_NODE]) then
  162. raise EDOMHierarchyRequest.Create('DOMParser.ParseWithContext');
  163. if Context.NodeType = DOCUMENT_NODE then
  164. doc := TDOMDocument(Context)
  165. else
  166. doc := Context.OwnerDocument;
  167. Options.NameTable := doc.Names;
  168. reader := TXMLTextReader.Create(Src, Options);
  169. try
  170. Frag := doc.CreateDocumentFragment;
  171. try
  172. ldr.ProcessFragment(Frag, reader);
  173. Result := Frag.FirstChild;
  174. case Action of
  175. xaAppendAsChildren: Context.AppendChild(Frag);
  176. xaReplaceChildren: begin
  177. Context.TextContent := ''; // removes children
  178. Context.ReplaceChild(Frag, Context.FirstChild);
  179. end;
  180. xaInsertBefore: node.InsertBefore(Frag, Context);
  181. xaInsertAfter: node.InsertBefore(Frag, Context.NextSibling);
  182. xaReplace: node.ReplaceChild(Frag, Context);
  183. end;
  184. finally
  185. Frag.Free;
  186. end;
  187. finally
  188. reader.Free;
  189. end;
  190. end;
  191. procedure TLoader.ProcessXML(ADoc: TDOMDocument; AReader: TXMLTextReader);
  192. begin
  193. doc := ADoc;
  194. reader := AReader;
  195. reader.OnEntity := @ProcessEntity;
  196. doc.documentURI := reader.BaseURI;
  197. reader.FragmentMode := False;
  198. ParseContent(doc);
  199. doc.XMLStandalone := reader.Standalone;
  200. if reader.Validate then
  201. reader.ValidateIdRefs;
  202. doc.IDs := reader.IDMap;
  203. reader.IDMap := nil;
  204. end;
  205. procedure TLoader.ProcessFragment(AOwner: TDOMNode; AReader: TXMLTextReader);
  206. var
  207. DoctypeNode: TDOMDocumentType;
  208. begin
  209. doc := AOwner.OwnerDocument;
  210. reader := AReader;
  211. reader.OnEntity := @ProcessEntity;
  212. reader.FragmentMode := True;
  213. reader.XML11 := doc.XMLVersion = '1.1';
  214. DoctypeNode := doc.DocType;
  215. if Assigned(DoctypeNode) then
  216. reader.DtdSchemaInfo := DocTypeNode.Model.Reference;
  217. ParseContent(aOwner as TDOMNode_WithChildren);
  218. end;
  219. procedure TLoader.ProcessEntity(Sender: TXMLTextReader; AEntity: TEntityDecl);
  220. var
  221. DoctypeNode: TDOMDocumentType;
  222. Ent: TDOMEntity;
  223. src: TXMLCharSource;
  224. InnerReader: TXMLTextReader;
  225. InnerLoader: TLoader;
  226. begin
  227. DoctypeNode := TDOMDocument(doc).DocType;
  228. if DoctypeNode = nil then
  229. Exit;
  230. Ent := TDOMEntity(DocTypeNode.Entities.GetNamedItem(AEntity.FName));
  231. if Ent = nil then
  232. Exit;
  233. Sender.EntityToSource(AEntity, Src);
  234. if Src = nil then
  235. Exit;
  236. InnerReader := TXMLTextReader.Create(Src, Sender);
  237. try
  238. Ent.SetReadOnly(False);
  239. InnerLoader.ProcessFragment(Ent, InnerReader);
  240. AEntity.FResolved := True;
  241. finally
  242. InnerReader.Free;
  243. AEntity.FOnStack := False;
  244. Ent.SetReadOnly(True);
  245. end;
  246. end;
  247. procedure TLoader.ParseContent(cursor: TDOMNode_WithChildren);
  248. var
  249. element: TDOMElement;
  250. currnodeptr: PPNodeData;
  251. currnode: PNodeData;
  252. begin
  253. currnodeptr := (reader as IGetNodeDataPtr).CurrentNodePtr;
  254. if reader.ReadState = rsInitial then
  255. begin
  256. if not reader.Read then
  257. Exit;
  258. case cursor.NodeType of
  259. DOCUMENT_NODE, ENTITY_NODE:
  260. (cursor as TDOMNode_TopLevel).SetHeaderData(reader.XMLVersion,reader.XMLEncoding);
  261. end;
  262. end;
  263. with reader do
  264. repeat
  265. if Validate then
  266. ValidateCurrentNode;
  267. currnode := currnodeptr^;
  268. case currnode^.FNodeType of
  269. ntText:
  270. cursor.InternalAppend(doc.CreateTextNodeBuf(currnode^.FValueStart, currnode^.FValueLength, False));
  271. ntWhitespace, ntSignificantWhitespace:
  272. if PreserveWhitespace then
  273. cursor.InternalAppend(doc.CreateTextNodeBuf(currnode^.FValueStart, currnode^.FValueLength, currnode^.FNodeType = ntWhitespace));
  274. ntCDATA:
  275. cursor.InternalAppend(CreateCDATANode(currnode));
  276. ntProcessingInstruction:
  277. cursor.InternalAppend(CreatePINode(currnode));
  278. ntComment:
  279. if not IgnoreComments then
  280. cursor.InternalAppend(doc.CreateCommentBuf(currnode^.FValueStart, currnode^.FValueLength));
  281. ntElement:
  282. begin
  283. element := LoadElement(doc, currnode, reader.AttributeCount);
  284. cursor.InternalAppend(element);
  285. cursor := element;
  286. end;
  287. ntEndElement:
  288. cursor := TDOMNode_WithChildren(cursor.ParentNode);
  289. ntDocumentType:
  290. cursor.InternalAppend(TDOMDocumentType.Create(doc, DtdSchemaInfo));
  291. ntEntityReference:
  292. begin
  293. cursor.InternalAppend(doc.CreateEntityReference(currnode^.FQName^.Key));
  294. { Seeing an entity reference while expanding means that the entity
  295. fails to expand. }
  296. if not ExpandEntities then
  297. begin
  298. { Make reader iterate through contents of the reference,
  299. to ensure correct validation events and character counts. }
  300. ResolveEntity;
  301. while currnodeptr^^.FNodeType <> ntEndEntity do
  302. Read;
  303. end;
  304. end;
  305. end;
  306. until not Read;
  307. end;
  308. function TLoader.CreatePINode(currnode: PNodeData): TDOMNode;
  309. var
  310. s: DOMString;
  311. begin
  312. SetString(s, currnode^.FValueStart, currnode^.FValueLength);
  313. result := Doc.CreateProcessingInstruction(currnode^.FQName^.Key, s);
  314. end;
  315. function TLoader.CreateCDATANode(currnode: PNodeData): TDOMNode;
  316. var
  317. s: XMLString;
  318. begin
  319. SetString(s, currnode^.FValueStart, currnode^.FValueLength);
  320. result := doc.CreateCDATASection(s);
  321. end;
  322. procedure TLoader.ProcessDTD(ADoc: TDOMDocument; AReader: TXMLTextReader);
  323. begin
  324. AReader.DtdSchemaInfo := TDTDModel.Create(AReader.NameTable);
  325. // TODO: DTD labeled version 1.1 will be rejected - must set FXML11 flag
  326. doc.AppendChild(TDOMDocumentType.Create(doc, AReader.DtdSchemaInfo));
  327. AReader.ParseDTD;
  328. end;
  329. { plain calls }
  330. procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text);
  331. var
  332. Reader: TXMLTextReader;
  333. ldr: TLoader;
  334. begin
  335. ADoc := TXMLDocument.Create;
  336. Reader := TXMLTextReader.Create(f, ADoc.Names);
  337. try
  338. ldr.ProcessXML(ADoc,Reader);
  339. finally
  340. Reader.Free;
  341. end;
  342. end;
  343. procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String);
  344. var
  345. Reader: TXMLTextReader;
  346. ldr: TLoader;
  347. begin
  348. ADoc := TXMLDocument.Create;
  349. Reader := TXMLTextReader.Create(f, ABaseURI, ADoc.Names);
  350. try
  351. ldr.ProcessXML(ADoc, Reader);
  352. finally
  353. Reader.Free;
  354. end;
  355. end;
  356. procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream);
  357. begin
  358. ReadXMLFile(ADoc, f, 'stream:');
  359. end;
  360. procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String);
  361. var
  362. FileStream: TStream;
  363. begin
  364. ADoc := nil;
  365. FileStream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
  366. try
  367. ReadXMLFile(ADoc, FileStream, FilenameToURI(AFilename));
  368. finally
  369. FileStream.Free;
  370. end;
  371. end;
  372. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text);
  373. var
  374. Reader: TXMLTextReader;
  375. ldr: TLoader;
  376. begin
  377. Reader := TXMLTextReader.Create(f, AParentNode.OwnerDocument.Names);
  378. try
  379. ldr.ProcessFragment(AParentNode, Reader);
  380. finally
  381. Reader.Free;
  382. end;
  383. end;
  384. procedure ReadXMLFragment(AParentNode: TDOMNode; f: TStream; const ABaseURI: String);
  385. var
  386. Reader: TXMLTextReader;
  387. ldr: TLoader;
  388. begin
  389. Reader := TXMLTextReader.Create(f, ABaseURI, AParentNode.OwnerDocument.Names);
  390. try
  391. ldr.ProcessFragment(AParentNode, Reader);
  392. finally
  393. Reader.Free;
  394. end;
  395. end;
  396. procedure ReadXMLFragment(AParentNode: TDOMNode; f: TStream);
  397. begin
  398. ReadXMLFragment(AParentNode, f, 'stream:');
  399. end;
  400. procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String);
  401. var
  402. Stream: TStream;
  403. begin
  404. Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
  405. try
  406. ReadXMLFragment(AParentNode, Stream, FilenameToURI(AFilename));
  407. finally
  408. Stream.Free;
  409. end;
  410. end;
  411. procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text);
  412. var
  413. Reader: TXMLTextReader;
  414. ldr: TLoader;
  415. begin
  416. ADoc := TXMLDocument.Create;
  417. Reader := TXMLTextReader.Create(f, ADoc.Names);
  418. try
  419. ldr.ProcessDTD(ADoc,Reader);
  420. finally
  421. Reader.Free;
  422. end;
  423. end;
  424. procedure ReadDTDFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String);
  425. var
  426. Reader: TXMLTextReader;
  427. ldr: TLoader;
  428. begin
  429. ADoc := TXMLDocument.Create;
  430. Reader := TXMLTextReader.Create(f, ABaseURI, ADoc.Names);
  431. try
  432. ldr.ProcessDTD(ADoc,Reader);
  433. finally
  434. Reader.Free;
  435. end;
  436. end;
  437. procedure ReadDTDFile(out ADoc: TXMLDocument; f: TStream);
  438. begin
  439. ReadDTDFile(ADoc, f, 'stream:');
  440. end;
  441. procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String);
  442. var
  443. Stream: TStream;
  444. begin
  445. ADoc := nil;
  446. Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
  447. try
  448. ReadDTDFile(ADoc, Stream, FilenameToURI(AFilename));
  449. finally
  450. Stream.Free;
  451. end;
  452. end;
  453. end.