xmlreader.pp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. {
  2. This file is part of the Free Component Library
  3. TXMLReader - base class for streamed XML reading.
  4. Copyright (c) 2011 by Sergei Gorelkin, [email protected]
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. unit XmlReader;
  12. {$mode objfpc}{$H+}
  13. interface
  14. uses
  15. Classes, SysUtils, xmlutils;
  16. type
  17. TErrorSeverity = (esWarning, esError, esFatal);
  18. EXMLReadError = class(Exception)
  19. private
  20. FSeverity: TErrorSeverity;
  21. FErrorMessage: string;
  22. FLine: Integer;
  23. FLinePos: Integer;
  24. public
  25. constructor Create(sev: TErrorSeverity; const AMsg: string; ALine, ALinePos: Integer;
  26. const uri: string); overload;
  27. constructor Create(const AMsg: string); overload;
  28. property Severity: TErrorSeverity read FSeverity;
  29. property ErrorMessage: string read FErrorMessage;
  30. property Line: Integer read FLine;
  31. property LinePos: Integer read FLinePos;
  32. end;
  33. TXMLErrorEvent = procedure(e: EXMLReadError) of object;
  34. TXMLReadState = (
  35. rsInitial,
  36. rsInteractive,
  37. rsError,
  38. rsEndOfFile,
  39. rsClosed
  40. );
  41. TXMLInputSource = class(TObject)
  42. private
  43. FStream: TStream;
  44. FStringData: string;
  45. FBaseURI: XMLString;
  46. FSystemID: XMLString;
  47. FPublicID: XMLString;
  48. // FEncoding: string;
  49. public
  50. constructor Create(AStream: TStream); overload;
  51. constructor Create(const AStringData: string); overload;
  52. property Stream: TStream read FStream;
  53. property StringData: string read FStringData;
  54. property BaseURI: XMLString read FBaseURI write FBaseURI;
  55. property SystemID: XMLString read FSystemID write FSystemID;
  56. property PublicID: XMLString read FPublicID write FPublicID;
  57. // property Encoding: string read FEncoding write FEncoding;
  58. end;
  59. TConformanceLevel = (clAuto, clFragment, clDocument);
  60. TXMLReaderSettings = class(TObject)
  61. private
  62. FNameTable: THashTable;
  63. FValidate: Boolean;
  64. FPreserveWhitespace: Boolean;
  65. FExpandEntities: Boolean;
  66. FIgnoreComments: Boolean;
  67. FCDSectionsAsText: Boolean;
  68. FNamespaces: Boolean;
  69. FDisallowDoctype: Boolean;
  70. FCanonical: Boolean;
  71. FMaxChars: Cardinal;
  72. FOnError: TXMLErrorEvent;
  73. FConformance: TConformanceLevel;
  74. function GetCanonical: Boolean;
  75. procedure SetCanonical(aValue: Boolean);
  76. public
  77. property NameTable: THashTable read FNameTable write FNameTable;
  78. property Validate: Boolean read FValidate write FValidate;
  79. property PreserveWhitespace: Boolean read FPreserveWhitespace write FPreserveWhitespace;
  80. property ExpandEntities: Boolean read FExpandEntities write FExpandEntities;
  81. property IgnoreComments: Boolean read FIgnoreComments write FIgnoreComments;
  82. property CDSectionsAsText: Boolean read FCDSectionsAsText write FCDSectionsAsText;
  83. property Namespaces: Boolean read FNamespaces write FNamespaces;
  84. property DisallowDoctype: Boolean read FDisallowDoctype write FDisallowDoctype;
  85. property MaxChars: Cardinal read FMaxChars write FMaxChars;
  86. property CanonicalForm: Boolean read GetCanonical write SetCanonical;
  87. property OnError: TXMLErrorEvent read FOnError write FOnError;
  88. property ConformanceLevel: TConformanceLevel read FConformance write FConformance;
  89. end;
  90. TXMLReader = class(TObject)
  91. protected
  92. FReadState: TXMLReadState;
  93. FReadStringBuf: TWideCharBuf;
  94. protected
  95. function GetEOF: Boolean; virtual;
  96. function GetNameTable: THashTable; virtual; abstract;
  97. function GetDepth: Integer; virtual; abstract;
  98. function GetNodeType: TXMLNodeType; virtual; abstract;
  99. function GetValue: XMLString; virtual; abstract;
  100. function GetName: XMLString; virtual; abstract;
  101. function GetLocalName: XMLString; virtual; abstract;
  102. function GetPrefix: XMLString; virtual; abstract;
  103. function GetNamespaceUri: XMLString; virtual; abstract;
  104. function GetBaseUri: XMLString; virtual; abstract;
  105. function GetHasValue: Boolean; virtual; abstract;
  106. function GetAttributeCount: Integer; virtual; abstract;
  107. function GetIsDefault: Boolean; virtual; abstract;
  108. public
  109. destructor Destroy; override;
  110. function Read: Boolean; virtual; abstract;
  111. procedure Close; virtual; abstract;
  112. function MoveToFirstAttribute: Boolean; virtual; abstract;
  113. function MoveToNextAttribute: Boolean; virtual; abstract;
  114. function MoveToElement: Boolean; virtual; abstract;
  115. function ReadAttributeValue: Boolean; virtual; abstract;
  116. function MoveToContent: TXMLNodeType; virtual;
  117. procedure ResolveEntity; virtual; abstract;
  118. function ReadElementString: XMLString; overload;
  119. function ReadElementString(const aName: XMLString): XMLString; overload;
  120. function ReadElementString(const aLocalName, aNamespace: XMLString): XMLString; overload;
  121. procedure ReadEndElement; virtual;
  122. procedure ReadStartElement; overload;
  123. procedure ReadStartElement(const aName: XMLString); overload;
  124. procedure ReadStartElement(const aLocalName, aNamespace: XMLString); overload;
  125. function ReadString: XMLString; virtual;
  126. procedure Skip; virtual;
  127. function LookupNamespace(const APrefix: XMLString): XMLString; virtual; abstract;
  128. function GetAttribute(i: Integer): XMLString; virtual; abstract;
  129. function GetAttribute(const Name: XMLString): XMLString; virtual; abstract;
  130. function GetAttribute(const localName, nsUri: XMLString): XMLString; virtual; abstract;
  131. property NameTable: THashTable read GetNameTable;
  132. property nodeType: TXMLNodeType read GetNodeType;
  133. property ReadState: TXMLReadState read FReadState;
  134. property Depth: Integer read GetDepth;
  135. property EOF: Boolean read GetEOF;
  136. property Name: XMLString read GetName;
  137. property LocalName: XMLString read GetLocalName;
  138. property Prefix: XMLString read GetPrefix;
  139. property namespaceUri: XMLString read GetNamespaceUri;
  140. property Value: XMLString read GetValue;
  141. property HasValue: Boolean read GetHasValue;
  142. property AttributeCount: Integer read GetAttributeCount;
  143. property BaseUri: XMLString read GetBaseUri;
  144. property IsDefault: Boolean read GetIsDefault;
  145. end;
  146. implementation
  147. const
  148. ContentNodeTypes = [ntText, ntCDATA, ntElement, ntEndElement,
  149. ntEntityReference, ntEndEntity];
  150. { EXMLReadError }
  151. constructor EXMLReadError.Create(sev: TErrorSeverity; const AMsg: string; ALine, ALinePos: Integer;
  152. const uri: string);
  153. begin
  154. inherited CreateFmt('In ''%s'' (line %d pos %d): %s',[uri, ALine, ALinePos, AMsg]);
  155. FSeverity := sev;
  156. FErrorMessage := AMsg;
  157. FLine := ALine;
  158. FLinePos := ALinePos;
  159. end;
  160. constructor EXMLReadError.Create(const AMsg: string);
  161. begin
  162. inherited Create(AMsg);
  163. FErrorMessage := AMsg;
  164. FSeverity := esFatal;
  165. end;
  166. { TXMLInputSource }
  167. constructor TXMLInputSource.Create(AStream: TStream);
  168. begin
  169. inherited Create;
  170. FStream := AStream;
  171. end;
  172. constructor TXMLInputSource.Create(const AStringData: string);
  173. begin
  174. inherited Create;
  175. FStringData := AStringData;
  176. end;
  177. { TXMLReaderSettings }
  178. function TXMLReaderSettings.GetCanonical: Boolean;
  179. begin
  180. Result := FCanonical and FExpandEntities and FCDSectionsAsText and
  181. { (not normalizeCharacters) and } FNamespaces and
  182. { namespaceDeclarations and } FPreserveWhitespace;
  183. end;
  184. procedure TXMLReaderSettings.SetCanonical(aValue: Boolean);
  185. begin
  186. FCanonical := aValue;
  187. if aValue then
  188. begin
  189. FExpandEntities := True;
  190. FCDSectionsAsText := True;
  191. FNamespaces := True;
  192. FPreserveWhitespace := True;
  193. { normalizeCharacters := False; }
  194. { namespaceDeclarations := True; }
  195. { wellFormed := True; }
  196. end;
  197. end;
  198. { TXMLReader }
  199. destructor TXMLReader.Destroy;
  200. begin
  201. if Assigned(FReadStringBuf.Buffer) then
  202. FreeMem(FReadStringBuf.Buffer);
  203. inherited Destroy;
  204. end;
  205. function TXMLReader.GetEOF: Boolean;
  206. begin
  207. result := (FReadState=rsEndOfFile);
  208. end;
  209. function TXMLReader.MoveToContent: TXMLNodeType;
  210. begin
  211. if ReadState > rsInteractive then
  212. begin
  213. result := ntNone;
  214. exit;
  215. end;
  216. if nodeType = ntAttribute then
  217. MoveToElement;
  218. repeat
  219. result := nodeType;
  220. if result in ContentNodeTypes then
  221. exit;
  222. until not Read;
  223. result := ntNone;
  224. end;
  225. function TXMLReader.ReadElementString: XMLString;
  226. begin
  227. ReadStartElement;
  228. result := ReadString;
  229. if NodeType <> ntEndElement then
  230. raise EXMLReadError.Create('Expecting end of element');
  231. Read;
  232. end;
  233. function TXMLReader.ReadElementString(const aName: XMLString): XMLString;
  234. begin
  235. ReadStartElement(aName);
  236. result := ReadString;
  237. if NodeType <> ntEndElement then
  238. raise EXMLReadError.Create('Expecting end of element');
  239. Read;
  240. end;
  241. function TXMLReader.ReadElementString(const aLocalName, aNamespace: XMLString): XMLString;
  242. begin
  243. ReadStartElement(aLocalName, aNamespace);
  244. result := ReadString;
  245. if NodeType <> ntEndElement then
  246. raise EXMLReadError.Create('Expecting end of element');
  247. Read;
  248. end;
  249. procedure TXMLReader.ReadEndElement;
  250. begin
  251. if MoveToContent <> ntEndElement then
  252. raise EXMLReadError.Create('Expecting end of element');
  253. Read;
  254. end;
  255. procedure TXMLReader.ReadStartElement;
  256. begin
  257. if MoveToContent <> ntElement then
  258. raise EXMLReadError.Create('Invalid node type');
  259. Read;
  260. end;
  261. procedure TXMLReader.ReadStartElement(const aName: XMLString);
  262. begin
  263. if MoveToContent <> ntElement then
  264. raise EXMLReadError.Create('Invalid node type') ;
  265. if Name <> aName then
  266. raise EXMLReadError.CreateFmt('Element ''%s'' was not found',[aName]);
  267. Read;
  268. end;
  269. procedure TXMLReader.ReadStartElement(const aLocalName, aNamespace: XMLString);
  270. begin
  271. if MoveToContent <> ntElement then
  272. raise EXMLReadError.Create('Invalid node type');
  273. if (localName <> aLocalName) or (NamespaceURI <> aNamespace) then
  274. raise EXMLReadError.CreateFmt('Element ''%s'' with namespace ''%s'' was not found',
  275. [aLocalName, aNamespace]);
  276. Read;
  277. end;
  278. function TXMLReader.ReadString: XMLString;
  279. begin
  280. result := '';
  281. MoveToElement;
  282. if FReadStringBuf.Buffer = nil then
  283. BufAllocate(FReadStringBuf, 512);
  284. FReadStringBuf.Length := 0;
  285. if NodeType = ntElement then
  286. repeat
  287. Read;
  288. if NodeType in [ntText, ntCDATA, ntWhitespace, ntSignificantWhitespace] then
  289. BufAppendString(FReadStringBuf, Value)
  290. else
  291. Break;
  292. until False
  293. else
  294. while NodeType in [ntText,ntCDATA,ntWhitespace,ntSignificantWhitespace] do
  295. begin
  296. BufAppendString(FReadStringBuf, Value);
  297. Read;
  298. end;
  299. SetString(result, FReadStringBuf.Buffer, FReadStringBuf.Length);
  300. FReadStringBuf.Length := 0;
  301. end;
  302. procedure TXMLReader.Skip;
  303. var
  304. i: Integer;
  305. begin
  306. if ReadState <> rsInteractive then
  307. exit;
  308. MoveToElement;
  309. if (NodeType <> ntElement) then
  310. begin
  311. Read;
  312. exit;
  313. end;
  314. i := Depth;
  315. while Read and (i < Depth) do {loop};
  316. if NodeType = ntEndElement then
  317. Read;
  318. end;
  319. end.