xmlreader.pp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. {
  2. This file is part of the Free Component Library
  3. TXMLReader - base class for streamed XML reading.
  4. Copyright (c) 2011 by Sergei Gorelkin, [email protected]
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. {$IFNDEF FPC_DOTTEDUNITS}
  12. unit XmlReader;
  13. {$ENDIF FPC_DOTTEDUNITS}
  14. {$mode objfpc}{$H+}
  15. interface
  16. {$IFDEF FPC_DOTTEDUNITS}
  17. uses
  18. System.Classes, System.SysUtils, Xml.Utils;
  19. {$ELSE FPC_DOTTEDUNITS}
  20. uses
  21. Classes, SysUtils, xmlutils;
  22. {$ENDIF FPC_DOTTEDUNITS}
  23. type
  24. TErrorSeverity = (esWarning, esError, esFatal);
  25. EXMLReadError = class(Exception)
  26. private
  27. FSeverity: TErrorSeverity;
  28. FErrorMessage: string;
  29. FLine: Integer;
  30. FLinePos: Integer;
  31. public
  32. constructor Create(sev: TErrorSeverity; const AMsg: string; ALine, ALinePos: Integer;
  33. const uri: string); overload;
  34. constructor Create(const AMsg: string); overload;
  35. property Severity: TErrorSeverity read FSeverity;
  36. property ErrorMessage: string read FErrorMessage;
  37. property Line: Integer read FLine;
  38. property LinePos: Integer read FLinePos;
  39. end;
  40. TXMLErrorEvent = procedure(e: EXMLReadError) of object;
  41. TXMLReadState = (
  42. rsInitial,
  43. rsInteractive,
  44. rsError,
  45. rsEndOfFile,
  46. rsClosed
  47. );
  48. { TXMLInputSource }
  49. TInputSourceType = (istStream,istAnsi,istUnicode,istSystemID);
  50. TXMLInputSource = class(TObject)
  51. private
  52. FInputSourceType: TInputSourceType;
  53. FStream: TStream;
  54. FAnsiStringData : AnsiString;
  55. FUnicodeStringData : UnicodeString;
  56. FBaseURI: XMLString;
  57. FSystemID: XMLString;
  58. FPublicID: XMLString;
  59. function GetStringData: String;
  60. procedure SetSystemID(AValue: XMLString);
  61. // FEncoding: string;
  62. public
  63. constructor Create(AStream: TStream); overload;
  64. constructor Create(const AStringData: AnsiString); overload;
  65. constructor Create(const AStringData: UnicodeString); overload;
  66. property Stream: TStream read FStream;
  67. property AnsiStringData: AnsiString read FAnsiStringData;
  68. property UnicodeStringData: UnicodeString read FUnicodeStringData;
  69. // property StringData: String read GetStringData; deprecated 'Use AnsiStringData or UnicodeStringData';
  70. property BaseURI: XMLString read FBaseURI write FBaseURI;
  71. property SystemID: XMLString read FSystemID write SetSystemID;
  72. property PublicID: XMLString read FPublicID write FPublicID;
  73. Property InputSourceType : TInputSourceType Read FInputSourceType;
  74. // property Encoding: string read FEncoding write FEncoding;
  75. end;
  76. TConformanceLevel = (clAuto, clFragment, clDocument);
  77. TXMLReaderSettings = class(TObject)
  78. private
  79. FNameTable: THashTable;
  80. FValidate: Boolean;
  81. FPreserveWhitespace: Boolean;
  82. FExpandEntities: Boolean;
  83. FIgnoreComments: Boolean;
  84. FCDSectionsAsText: Boolean;
  85. FNamespaces: Boolean;
  86. FDisallowDoctype: Boolean;
  87. FCanonical: Boolean;
  88. FMaxChars: Cardinal;
  89. FOnError: TXMLErrorEvent;
  90. FConformance: TConformanceLevel;
  91. function GetCanonical: Boolean;
  92. procedure SetCanonical(aValue: Boolean);
  93. public
  94. property NameTable: THashTable read FNameTable write FNameTable;
  95. property Validate: Boolean read FValidate write FValidate;
  96. property PreserveWhitespace: Boolean read FPreserveWhitespace write FPreserveWhitespace;
  97. property ExpandEntities: Boolean read FExpandEntities write FExpandEntities;
  98. property IgnoreComments: Boolean read FIgnoreComments write FIgnoreComments;
  99. property CDSectionsAsText: Boolean read FCDSectionsAsText write FCDSectionsAsText;
  100. property Namespaces: Boolean read FNamespaces write FNamespaces;
  101. property DisallowDoctype: Boolean read FDisallowDoctype write FDisallowDoctype;
  102. property MaxChars: Cardinal read FMaxChars write FMaxChars;
  103. property CanonicalForm: Boolean read GetCanonical write SetCanonical;
  104. property OnError: TXMLErrorEvent read FOnError write FOnError;
  105. property ConformanceLevel: TConformanceLevel read FConformance write FConformance;
  106. end;
  107. TXMLReader = class(TObject)
  108. protected
  109. FReadState: TXMLReadState;
  110. FReadStringBuf: TWideCharBuf;
  111. protected
  112. function GetEOF: Boolean; virtual;
  113. function GetNameTable: THashTable; virtual; abstract;
  114. function GetDepth: Integer; virtual; abstract;
  115. function GetNodeType: TXMLNodeType; virtual; abstract;
  116. function GetValue: XMLString; virtual; abstract;
  117. function GetName: XMLString; virtual; abstract;
  118. function GetLocalName: XMLString; virtual; abstract;
  119. function GetPrefix: XMLString; virtual; abstract;
  120. function GetNamespaceUri: XMLString; virtual; abstract;
  121. function GetBaseUri: XMLString; virtual; abstract;
  122. function GetHasValue: Boolean; virtual; abstract;
  123. function GetAttributeCount: Integer; virtual; abstract;
  124. function GetIsDefault: Boolean; virtual; abstract;
  125. public
  126. destructor Destroy; override;
  127. function Read: Boolean; virtual; abstract;
  128. procedure Close; virtual; abstract;
  129. function MoveToFirstAttribute: Boolean; virtual; abstract;
  130. function MoveToNextAttribute: Boolean; virtual; abstract;
  131. function MoveToElement: Boolean; virtual; abstract;
  132. function ReadAttributeValue: Boolean; virtual; abstract;
  133. function MoveToContent: TXMLNodeType; virtual;
  134. procedure ResolveEntity; virtual; abstract;
  135. function ReadElementString: XMLString; overload;
  136. function ReadElementString(const aName: XMLString): XMLString; overload;
  137. function ReadElementString(const aLocalName, aNamespace: XMLString): XMLString; overload;
  138. procedure ReadEndElement; virtual;
  139. procedure ReadStartElement; overload;
  140. procedure ReadStartElement(const aName: XMLString); overload;
  141. procedure ReadStartElement(const aLocalName, aNamespace: XMLString); overload;
  142. function ReadString: XMLString; virtual;
  143. procedure Skip; virtual;
  144. function LookupNamespace(const APrefix: XMLString): XMLString; virtual; abstract;
  145. function GetAttribute(i: Integer): XMLString; virtual; abstract;
  146. function GetAttribute(const Name: XMLString): XMLString; virtual; abstract;
  147. function GetAttribute(const localName, nsUri: XMLString): XMLString; virtual; abstract;
  148. property NameTable: THashTable read GetNameTable;
  149. property nodeType: TXMLNodeType read GetNodeType;
  150. property ReadState: TXMLReadState read FReadState;
  151. property Depth: Integer read GetDepth;
  152. property EOF: Boolean read GetEOF;
  153. property Name: XMLString read GetName;
  154. property LocalName: XMLString read GetLocalName;
  155. property Prefix: XMLString read GetPrefix;
  156. property namespaceUri: XMLString read GetNamespaceUri;
  157. property Value: XMLString read GetValue;
  158. property HasValue: Boolean read GetHasValue;
  159. property AttributeCount: Integer read GetAttributeCount;
  160. property BaseUri: XMLString read GetBaseUri;
  161. property IsDefault: Boolean read GetIsDefault;
  162. end;
  163. implementation
  164. const
  165. ContentNodeTypes = [ntText, ntCDATA, ntElement, ntEndElement,
  166. ntEntityReference, ntEndEntity];
  167. { EXMLReadError }
  168. constructor EXMLReadError.Create(sev: TErrorSeverity; const AMsg: string; ALine, ALinePos: Integer;
  169. const uri: string);
  170. begin
  171. inherited CreateFmt('In ''%s'' (line %d pos %d): %s',[uri, ALine, ALinePos, AMsg]);
  172. FSeverity := sev;
  173. FErrorMessage := AMsg;
  174. FLine := ALine;
  175. FLinePos := ALinePos;
  176. end;
  177. constructor EXMLReadError.Create(const AMsg: string);
  178. begin
  179. inherited Create(AMsg);
  180. FErrorMessage := AMsg;
  181. FSeverity := esFatal;
  182. end;
  183. { TXMLInputSource }
  184. function TXMLInputSource.GetStringData: String;
  185. begin
  186. {$IF SIZEOF(CHAR)=2}
  187. Result:=UnicodeStringData
  188. {$ELSE}
  189. Result:=AnsiStringData
  190. {$ENDIF}
  191. end;
  192. procedure TXMLInputSource.SetSystemID(AValue: XMLString);
  193. begin
  194. if FSystemID=AValue then Exit;
  195. FSystemID:=AValue;
  196. FInputSourceType:=istSystemID;
  197. end;
  198. constructor TXMLInputSource.Create(AStream: TStream);
  199. begin
  200. inherited Create;
  201. FStream := AStream;
  202. FInputSourceType:=istStream;
  203. end;
  204. constructor TXMLInputSource.Create(const AStringData: AnsiString);
  205. begin
  206. inherited Create;
  207. FAnsiStringData:=aStringData;
  208. FInputSourceType:=istAnsi;
  209. end;
  210. constructor TXMLInputSource.Create(const AStringData: UnicodeString);
  211. begin
  212. inherited Create;
  213. FUnicodeStringData:=aStringData;
  214. FInputSourceType:=istUnicode;
  215. end;
  216. { TXMLReaderSettings }
  217. function TXMLReaderSettings.GetCanonical: Boolean;
  218. begin
  219. Result := FCanonical and FExpandEntities and FCDSectionsAsText and
  220. { (not normalizeCharacters) and } FNamespaces and
  221. { namespaceDeclarations and } FPreserveWhitespace;
  222. end;
  223. procedure TXMLReaderSettings.SetCanonical(aValue: Boolean);
  224. begin
  225. FCanonical := aValue;
  226. if aValue then
  227. begin
  228. FExpandEntities := True;
  229. FCDSectionsAsText := True;
  230. FNamespaces := True;
  231. FPreserveWhitespace := True;
  232. { normalizeCharacters := False; }
  233. { namespaceDeclarations := True; }
  234. { wellFormed := True; }
  235. end;
  236. end;
  237. { TXMLReader }
  238. destructor TXMLReader.Destroy;
  239. begin
  240. if Assigned(FReadStringBuf.Buffer) then
  241. FreeMem(FReadStringBuf.Buffer);
  242. inherited Destroy;
  243. end;
  244. function TXMLReader.GetEOF: Boolean;
  245. begin
  246. result := (FReadState=rsEndOfFile);
  247. end;
  248. function TXMLReader.MoveToContent: TXMLNodeType;
  249. begin
  250. if ReadState > rsInteractive then
  251. begin
  252. result := ntNone;
  253. exit;
  254. end;
  255. if nodeType = ntAttribute then
  256. MoveToElement;
  257. repeat
  258. result := nodeType;
  259. if result in ContentNodeTypes then
  260. exit;
  261. until not Read;
  262. result := ntNone;
  263. end;
  264. function TXMLReader.ReadElementString: XMLString;
  265. begin
  266. ReadStartElement;
  267. result := ReadString;
  268. if NodeType <> ntEndElement then
  269. raise EXMLReadError.Create('Expecting end of element');
  270. Read;
  271. end;
  272. function TXMLReader.ReadElementString(const aName: XMLString): XMLString;
  273. begin
  274. ReadStartElement(aName);
  275. result := ReadString;
  276. if NodeType <> ntEndElement then
  277. raise EXMLReadError.Create('Expecting end of element');
  278. Read;
  279. end;
  280. function TXMLReader.ReadElementString(const aLocalName, aNamespace: XMLString): XMLString;
  281. begin
  282. ReadStartElement(aLocalName, aNamespace);
  283. result := ReadString;
  284. if NodeType <> ntEndElement then
  285. raise EXMLReadError.Create('Expecting end of element');
  286. Read;
  287. end;
  288. procedure TXMLReader.ReadEndElement;
  289. begin
  290. if MoveToContent <> ntEndElement then
  291. raise EXMLReadError.Create('Expecting end of element');
  292. Read;
  293. end;
  294. procedure TXMLReader.ReadStartElement;
  295. begin
  296. if MoveToContent <> ntElement then
  297. raise EXMLReadError.Create('Invalid node type');
  298. Read;
  299. end;
  300. procedure TXMLReader.ReadStartElement(const aName: XMLString);
  301. begin
  302. if MoveToContent <> ntElement then
  303. raise EXMLReadError.Create('Invalid node type') ;
  304. if Name <> aName then
  305. raise EXMLReadError.CreateFmt('Element ''%s'' was not found',[aName]);
  306. Read;
  307. end;
  308. procedure TXMLReader.ReadStartElement(const aLocalName, aNamespace: XMLString);
  309. begin
  310. if MoveToContent <> ntElement then
  311. raise EXMLReadError.Create('Invalid node type');
  312. if (localName <> aLocalName) or (NamespaceURI <> aNamespace) then
  313. raise EXMLReadError.CreateFmt('Element ''%s'' with namespace ''%s'' was not found',
  314. [aLocalName, aNamespace]);
  315. Read;
  316. end;
  317. function TXMLReader.ReadString: XMLString;
  318. begin
  319. result := '';
  320. MoveToElement;
  321. if FReadStringBuf.Buffer = nil then
  322. BufAllocate(FReadStringBuf, 512);
  323. FReadStringBuf.Length := 0;
  324. if NodeType = ntElement then
  325. repeat
  326. Read;
  327. if NodeType in [ntText, ntCDATA, ntWhitespace, ntSignificantWhitespace] then
  328. BufAppendString(FReadStringBuf, Value)
  329. else
  330. Break;
  331. until False
  332. else
  333. while NodeType in [ntText,ntCDATA,ntWhitespace,ntSignificantWhitespace] do
  334. begin
  335. BufAppendString(FReadStringBuf, Value);
  336. Read;
  337. end;
  338. SetString(result, FReadStringBuf.Buffer, FReadStringBuf.Length);
  339. FReadStringBuf.Length := 0;
  340. end;
  341. procedure TXMLReader.Skip;
  342. var
  343. i: Integer;
  344. begin
  345. if ReadState <> rsInteractive then
  346. exit;
  347. MoveToElement;
  348. if (NodeType <> ntElement) then
  349. begin
  350. Read;
  351. exit;
  352. end;
  353. i := Depth;
  354. while Read and (i < Depth) do {loop};
  355. if NodeType = ntEndElement then
  356. Read;
  357. end;
  358. end.