xmlreader.pp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. {
  2. This file is part of the Free Component Library
  3. TXMLReader - base class for streamed XML reading.
  4. Copyright (c) 2011 by Sergei Gorelkin, [email protected]
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. unit XmlReader;
  12. {$mode objfpc}{$H+}
  13. interface
  14. uses
  15. Classes, SysUtils, xmlutils;
  16. type
  17. TErrorSeverity = (esWarning, esError, esFatal);
  18. EXMLReadError = class(Exception)
  19. private
  20. FSeverity: TErrorSeverity;
  21. FErrorMessage: string;
  22. FLine: Integer;
  23. FLinePos: Integer;
  24. public
  25. constructor Create(sev: TErrorSeverity; const AMsg: string; ALine, ALinePos: Integer;
  26. const uri: string); overload;
  27. constructor Create(const AMsg: string); overload;
  28. property Severity: TErrorSeverity read FSeverity;
  29. property ErrorMessage: string read FErrorMessage;
  30. property Line: Integer read FLine;
  31. property LinePos: Integer read FLinePos;
  32. end;
  33. TXMLErrorEvent = procedure(e: EXMLReadError) of object;
  34. TXMLReadState = (
  35. rsInitial,
  36. rsInteractive,
  37. rsError,
  38. rsEndOfFile,
  39. rsClosed
  40. );
  41. { TXMLInputSource }
  42. TInputSourceType = (istStream,istAnsi,istUnicode,istSystemID);
  43. TXMLInputSource = class(TObject)
  44. private
  45. FInputSourceType: TInputSourceType;
  46. FStream: TStream;
  47. FAnsiStringData : AnsiString;
  48. FUnicodeStringData : UnicodeString;
  49. FBaseURI: XMLString;
  50. FSystemID: XMLString;
  51. FPublicID: XMLString;
  52. function GetStringData: String;
  53. procedure SetSystemID(AValue: XMLString);
  54. // FEncoding: string;
  55. public
  56. constructor Create(AStream: TStream); overload;
  57. constructor Create(const AStringData: AnsiString); overload;
  58. constructor Create(const AStringData: UnicodeString); overload;
  59. property Stream: TStream read FStream;
  60. property AnsiStringData: AnsiString read FAnsiStringData;
  61. property UnicodeStringData: UnicodeString read FUnicodeStringData;
  62. // property StringData: String read GetStringData; deprecated 'Use AnsiStringData or UnicodeStringData';
  63. property BaseURI: XMLString read FBaseURI write FBaseURI;
  64. property SystemID: XMLString read FSystemID write SetSystemID;
  65. property PublicID: XMLString read FPublicID write FPublicID;
  66. Property InputSourceType : TInputSourceType Read FInputSourceType;
  67. // property Encoding: string read FEncoding write FEncoding;
  68. end;
  69. TConformanceLevel = (clAuto, clFragment, clDocument);
  70. TXMLReaderSettings = class(TObject)
  71. private
  72. FNameTable: THashTable;
  73. FValidate: Boolean;
  74. FPreserveWhitespace: Boolean;
  75. FExpandEntities: Boolean;
  76. FIgnoreComments: Boolean;
  77. FCDSectionsAsText: Boolean;
  78. FNamespaces: Boolean;
  79. FDisallowDoctype: Boolean;
  80. FCanonical: Boolean;
  81. FMaxChars: Cardinal;
  82. FOnError: TXMLErrorEvent;
  83. FConformance: TConformanceLevel;
  84. function GetCanonical: Boolean;
  85. procedure SetCanonical(aValue: Boolean);
  86. public
  87. property NameTable: THashTable read FNameTable write FNameTable;
  88. property Validate: Boolean read FValidate write FValidate;
  89. property PreserveWhitespace: Boolean read FPreserveWhitespace write FPreserveWhitespace;
  90. property ExpandEntities: Boolean read FExpandEntities write FExpandEntities;
  91. property IgnoreComments: Boolean read FIgnoreComments write FIgnoreComments;
  92. property CDSectionsAsText: Boolean read FCDSectionsAsText write FCDSectionsAsText;
  93. property Namespaces: Boolean read FNamespaces write FNamespaces;
  94. property DisallowDoctype: Boolean read FDisallowDoctype write FDisallowDoctype;
  95. property MaxChars: Cardinal read FMaxChars write FMaxChars;
  96. property CanonicalForm: Boolean read GetCanonical write SetCanonical;
  97. property OnError: TXMLErrorEvent read FOnError write FOnError;
  98. property ConformanceLevel: TConformanceLevel read FConformance write FConformance;
  99. end;
  100. TXMLReader = class(TObject)
  101. protected
  102. FReadState: TXMLReadState;
  103. FReadStringBuf: TWideCharBuf;
  104. protected
  105. function GetEOF: Boolean; virtual;
  106. function GetNameTable: THashTable; virtual; abstract;
  107. function GetDepth: Integer; virtual; abstract;
  108. function GetNodeType: TXMLNodeType; virtual; abstract;
  109. function GetValue: XMLString; virtual; abstract;
  110. function GetName: XMLString; virtual; abstract;
  111. function GetLocalName: XMLString; virtual; abstract;
  112. function GetPrefix: XMLString; virtual; abstract;
  113. function GetNamespaceUri: XMLString; virtual; abstract;
  114. function GetBaseUri: XMLString; virtual; abstract;
  115. function GetHasValue: Boolean; virtual; abstract;
  116. function GetAttributeCount: Integer; virtual; abstract;
  117. function GetIsDefault: Boolean; virtual; abstract;
  118. public
  119. destructor Destroy; override;
  120. function Read: Boolean; virtual; abstract;
  121. procedure Close; virtual; abstract;
  122. function MoveToFirstAttribute: Boolean; virtual; abstract;
  123. function MoveToNextAttribute: Boolean; virtual; abstract;
  124. function MoveToElement: Boolean; virtual; abstract;
  125. function ReadAttributeValue: Boolean; virtual; abstract;
  126. function MoveToContent: TXMLNodeType; virtual;
  127. procedure ResolveEntity; virtual; abstract;
  128. function ReadElementString: XMLString; overload;
  129. function ReadElementString(const aName: XMLString): XMLString; overload;
  130. function ReadElementString(const aLocalName, aNamespace: XMLString): XMLString; overload;
  131. procedure ReadEndElement; virtual;
  132. procedure ReadStartElement; overload;
  133. procedure ReadStartElement(const aName: XMLString); overload;
  134. procedure ReadStartElement(const aLocalName, aNamespace: XMLString); overload;
  135. function ReadString: XMLString; virtual;
  136. procedure Skip; virtual;
  137. function LookupNamespace(const APrefix: XMLString): XMLString; virtual; abstract;
  138. function GetAttribute(i: Integer): XMLString; virtual; abstract;
  139. function GetAttribute(const Name: XMLString): XMLString; virtual; abstract;
  140. function GetAttribute(const localName, nsUri: XMLString): XMLString; virtual; abstract;
  141. property NameTable: THashTable read GetNameTable;
  142. property nodeType: TXMLNodeType read GetNodeType;
  143. property ReadState: TXMLReadState read FReadState;
  144. property Depth: Integer read GetDepth;
  145. property EOF: Boolean read GetEOF;
  146. property Name: XMLString read GetName;
  147. property LocalName: XMLString read GetLocalName;
  148. property Prefix: XMLString read GetPrefix;
  149. property namespaceUri: XMLString read GetNamespaceUri;
  150. property Value: XMLString read GetValue;
  151. property HasValue: Boolean read GetHasValue;
  152. property AttributeCount: Integer read GetAttributeCount;
  153. property BaseUri: XMLString read GetBaseUri;
  154. property IsDefault: Boolean read GetIsDefault;
  155. end;
  156. implementation
  157. const
  158. ContentNodeTypes = [ntText, ntCDATA, ntElement, ntEndElement,
  159. ntEntityReference, ntEndEntity];
  160. { EXMLReadError }
  161. constructor EXMLReadError.Create(sev: TErrorSeverity; const AMsg: string; ALine, ALinePos: Integer;
  162. const uri: string);
  163. begin
  164. inherited CreateFmt('In ''%s'' (line %d pos %d): %s',[uri, ALine, ALinePos, AMsg]);
  165. FSeverity := sev;
  166. FErrorMessage := AMsg;
  167. FLine := ALine;
  168. FLinePos := ALinePos;
  169. end;
  170. constructor EXMLReadError.Create(const AMsg: string);
  171. begin
  172. inherited Create(AMsg);
  173. FErrorMessage := AMsg;
  174. FSeverity := esFatal;
  175. end;
  176. { TXMLInputSource }
  177. function TXMLInputSource.GetStringData: String;
  178. begin
  179. {$IF SIZEOF(CHAR)=2}
  180. Result:=UnicodeStringData
  181. {$ELSE}
  182. Result:=AnsiStringData
  183. {$ENDIF}
  184. end;
  185. procedure TXMLInputSource.SetSystemID(AValue: XMLString);
  186. begin
  187. if FSystemID=AValue then Exit;
  188. FSystemID:=AValue;
  189. FInputSourceType:=istSystemID;
  190. end;
  191. constructor TXMLInputSource.Create(AStream: TStream);
  192. begin
  193. inherited Create;
  194. FStream := AStream;
  195. FInputSourceType:=istStream;
  196. end;
  197. constructor TXMLInputSource.Create(const AStringData: AnsiString);
  198. begin
  199. inherited Create;
  200. FAnsiStringData:=aStringData;
  201. FInputSourceType:=istAnsi;
  202. end;
  203. constructor TXMLInputSource.Create(const AStringData: UnicodeString);
  204. begin
  205. inherited Create;
  206. FUnicodeStringData:=aStringData;
  207. FInputSourceType:=istUnicode;
  208. end;
  209. { TXMLReaderSettings }
  210. function TXMLReaderSettings.GetCanonical: Boolean;
  211. begin
  212. Result := FCanonical and FExpandEntities and FCDSectionsAsText and
  213. { (not normalizeCharacters) and } FNamespaces and
  214. { namespaceDeclarations and } FPreserveWhitespace;
  215. end;
  216. procedure TXMLReaderSettings.SetCanonical(aValue: Boolean);
  217. begin
  218. FCanonical := aValue;
  219. if aValue then
  220. begin
  221. FExpandEntities := True;
  222. FCDSectionsAsText := True;
  223. FNamespaces := True;
  224. FPreserveWhitespace := True;
  225. { normalizeCharacters := False; }
  226. { namespaceDeclarations := True; }
  227. { wellFormed := True; }
  228. end;
  229. end;
  230. { TXMLReader }
  231. destructor TXMLReader.Destroy;
  232. begin
  233. if Assigned(FReadStringBuf.Buffer) then
  234. FreeMem(FReadStringBuf.Buffer);
  235. inherited Destroy;
  236. end;
  237. function TXMLReader.GetEOF: Boolean;
  238. begin
  239. result := (FReadState=rsEndOfFile);
  240. end;
  241. function TXMLReader.MoveToContent: TXMLNodeType;
  242. begin
  243. if ReadState > rsInteractive then
  244. begin
  245. result := ntNone;
  246. exit;
  247. end;
  248. if nodeType = ntAttribute then
  249. MoveToElement;
  250. repeat
  251. result := nodeType;
  252. if result in ContentNodeTypes then
  253. exit;
  254. until not Read;
  255. result := ntNone;
  256. end;
  257. function TXMLReader.ReadElementString: XMLString;
  258. begin
  259. ReadStartElement;
  260. result := ReadString;
  261. if NodeType <> ntEndElement then
  262. raise EXMLReadError.Create('Expecting end of element');
  263. Read;
  264. end;
  265. function TXMLReader.ReadElementString(const aName: XMLString): XMLString;
  266. begin
  267. ReadStartElement(aName);
  268. result := ReadString;
  269. if NodeType <> ntEndElement then
  270. raise EXMLReadError.Create('Expecting end of element');
  271. Read;
  272. end;
  273. function TXMLReader.ReadElementString(const aLocalName, aNamespace: XMLString): XMLString;
  274. begin
  275. ReadStartElement(aLocalName, aNamespace);
  276. result := ReadString;
  277. if NodeType <> ntEndElement then
  278. raise EXMLReadError.Create('Expecting end of element');
  279. Read;
  280. end;
  281. procedure TXMLReader.ReadEndElement;
  282. begin
  283. if MoveToContent <> ntEndElement then
  284. raise EXMLReadError.Create('Expecting end of element');
  285. Read;
  286. end;
  287. procedure TXMLReader.ReadStartElement;
  288. begin
  289. if MoveToContent <> ntElement then
  290. raise EXMLReadError.Create('Invalid node type');
  291. Read;
  292. end;
  293. procedure TXMLReader.ReadStartElement(const aName: XMLString);
  294. begin
  295. if MoveToContent <> ntElement then
  296. raise EXMLReadError.Create('Invalid node type') ;
  297. if Name <> aName then
  298. raise EXMLReadError.CreateFmt('Element ''%s'' was not found',[aName]);
  299. Read;
  300. end;
  301. procedure TXMLReader.ReadStartElement(const aLocalName, aNamespace: XMLString);
  302. begin
  303. if MoveToContent <> ntElement then
  304. raise EXMLReadError.Create('Invalid node type');
  305. if (localName <> aLocalName) or (NamespaceURI <> aNamespace) then
  306. raise EXMLReadError.CreateFmt('Element ''%s'' with namespace ''%s'' was not found',
  307. [aLocalName, aNamespace]);
  308. Read;
  309. end;
  310. function TXMLReader.ReadString: XMLString;
  311. begin
  312. result := '';
  313. MoveToElement;
  314. if FReadStringBuf.Buffer = nil then
  315. BufAllocate(FReadStringBuf, 512);
  316. FReadStringBuf.Length := 0;
  317. if NodeType = ntElement then
  318. repeat
  319. Read;
  320. if NodeType in [ntText, ntCDATA, ntWhitespace, ntSignificantWhitespace] then
  321. BufAppendString(FReadStringBuf, Value)
  322. else
  323. Break;
  324. until False
  325. else
  326. while NodeType in [ntText,ntCDATA,ntWhitespace,ntSignificantWhitespace] do
  327. begin
  328. BufAppendString(FReadStringBuf, Value);
  329. Read;
  330. end;
  331. SetString(result, FReadStringBuf.Buffer, FReadStringBuf.Length);
  332. FReadStringBuf.Length := 0;
  333. end;
  334. procedure TXMLReader.Skip;
  335. var
  336. i: Integer;
  337. begin
  338. if ReadState <> rsInteractive then
  339. exit;
  340. MoveToElement;
  341. if (NodeType <> ntElement) then
  342. begin
  343. Read;
  344. exit;
  345. end;
  346. i := Depth;
  347. while Read and (i < Depth) do {loop};
  348. if NodeType = ntEndElement then
  349. Read;
  350. end;
  351. end.