123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410 |
- {
- This file is part of the Free Component Library
- TXMLReader - base class for streamed XML reading.
- Copyright (c) 2011 by Sergei Gorelkin, [email protected]
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- {$IFNDEF FPC_DOTTEDUNITS}
- unit XmlReader;
- {$ENDIF FPC_DOTTEDUNITS}
- {$mode objfpc}{$H+}
- interface
- {$IFDEF FPC_DOTTEDUNITS}
- uses
- System.Classes, System.SysUtils, Xml.Utils;
- {$ELSE FPC_DOTTEDUNITS}
- uses
- Classes, SysUtils, xmlutils;
- {$ENDIF FPC_DOTTEDUNITS}
- type
- TErrorSeverity = (esWarning, esError, esFatal);
- EXMLReadError = class(Exception)
- private
- FSeverity: TErrorSeverity;
- FErrorMessage: string;
- FLine: Integer;
- FLinePos: Integer;
- public
- constructor Create(sev: TErrorSeverity; const AMsg: string; ALine, ALinePos: Integer;
- const uri: string); overload;
- constructor Create(const AMsg: string); overload;
- property Severity: TErrorSeverity read FSeverity;
- property ErrorMessage: string read FErrorMessage;
- property Line: Integer read FLine;
- property LinePos: Integer read FLinePos;
- end;
- TXMLErrorEvent = procedure(e: EXMLReadError) of object;
- TXMLReadState = (
- rsInitial,
- rsInteractive,
- rsError,
- rsEndOfFile,
- rsClosed
- );
- { TXMLInputSource }
- TInputSourceType = (istStream,istAnsi,istUnicode,istSystemID);
- TXMLInputSource = class(TObject)
- private
- FInputSourceType: TInputSourceType;
- FStream: TStream;
- FAnsiStringData : AnsiString;
- FUnicodeStringData : UnicodeString;
- FBaseURI: XMLString;
- FSystemID: XMLString;
- FPublicID: XMLString;
- function GetStringData: String;
- procedure SetSystemID(AValue: XMLString);
- // FEncoding: string;
- public
- constructor Create(AStream: TStream); overload;
- constructor Create(const AStringData: AnsiString); overload;
- constructor Create(const AStringData: UnicodeString); overload;
- property Stream: TStream read FStream;
- property AnsiStringData: AnsiString read FAnsiStringData;
- property UnicodeStringData: UnicodeString read FUnicodeStringData;
- // property StringData: String read GetStringData; deprecated 'Use AnsiStringData or UnicodeStringData';
- property BaseURI: XMLString read FBaseURI write FBaseURI;
- property SystemID: XMLString read FSystemID write SetSystemID;
- property PublicID: XMLString read FPublicID write FPublicID;
- Property InputSourceType : TInputSourceType Read FInputSourceType;
- // property Encoding: string read FEncoding write FEncoding;
- end;
- TConformanceLevel = (clAuto, clFragment, clDocument);
- TXMLReaderSettings = class(TObject)
- private
- FNameTable: THashTable;
- FValidate: Boolean;
- FPreserveWhitespace: Boolean;
- FExpandEntities: Boolean;
- FIgnoreComments: Boolean;
- FCDSectionsAsText: Boolean;
- FNamespaces: Boolean;
- FDisallowDoctype: Boolean;
- FCanonical: Boolean;
- FMaxChars: Cardinal;
- FOnError: TXMLErrorEvent;
- FConformance: TConformanceLevel;
- function GetCanonical: Boolean;
- procedure SetCanonical(aValue: Boolean);
- public
- property NameTable: THashTable read FNameTable write FNameTable;
- property Validate: Boolean read FValidate write FValidate;
- property PreserveWhitespace: Boolean read FPreserveWhitespace write FPreserveWhitespace;
- property ExpandEntities: Boolean read FExpandEntities write FExpandEntities;
- property IgnoreComments: Boolean read FIgnoreComments write FIgnoreComments;
- property CDSectionsAsText: Boolean read FCDSectionsAsText write FCDSectionsAsText;
- property Namespaces: Boolean read FNamespaces write FNamespaces;
- property DisallowDoctype: Boolean read FDisallowDoctype write FDisallowDoctype;
- property MaxChars: Cardinal read FMaxChars write FMaxChars;
- property CanonicalForm: Boolean read GetCanonical write SetCanonical;
- property OnError: TXMLErrorEvent read FOnError write FOnError;
- property ConformanceLevel: TConformanceLevel read FConformance write FConformance;
- end;
- TXMLReader = class(TObject)
- protected
- FReadState: TXMLReadState;
- FReadStringBuf: TWideCharBuf;
- protected
- function GetEOF: Boolean; virtual;
- function GetNameTable: THashTable; virtual; abstract;
- function GetDepth: Integer; virtual; abstract;
- function GetNodeType: TXMLNodeType; virtual; abstract;
- function GetValue: XMLString; virtual; abstract;
- function GetName: XMLString; virtual; abstract;
- function GetLocalName: XMLString; virtual; abstract;
- function GetPrefix: XMLString; virtual; abstract;
- function GetNamespaceUri: XMLString; virtual; abstract;
- function GetBaseUri: XMLString; virtual; abstract;
- function GetHasValue: Boolean; virtual; abstract;
- function GetAttributeCount: Integer; virtual; abstract;
- function GetIsDefault: Boolean; virtual; abstract;
- public
- destructor Destroy; override;
- function Read: Boolean; virtual; abstract;
- procedure Close; virtual; abstract;
- function MoveToFirstAttribute: Boolean; virtual; abstract;
- function MoveToNextAttribute: Boolean; virtual; abstract;
- function MoveToElement: Boolean; virtual; abstract;
- function ReadAttributeValue: Boolean; virtual; abstract;
- function MoveToContent: TXMLNodeType; virtual;
- procedure ResolveEntity; virtual; abstract;
- function ReadElementString: XMLString; overload;
- function ReadElementString(const aName: XMLString): XMLString; overload;
- function ReadElementString(const aLocalName, aNamespace: XMLString): XMLString; overload;
- procedure ReadEndElement; virtual;
- procedure ReadStartElement; overload;
- procedure ReadStartElement(const aName: XMLString); overload;
- procedure ReadStartElement(const aLocalName, aNamespace: XMLString); overload;
- function ReadString: XMLString; virtual;
- procedure Skip; virtual;
- function LookupNamespace(const APrefix: XMLString): XMLString; virtual; abstract;
- function GetAttribute(i: Integer): XMLString; virtual; abstract;
- function GetAttribute(const Name: XMLString): XMLString; virtual; abstract;
- function GetAttribute(const localName, nsUri: XMLString): XMLString; virtual; abstract;
- property NameTable: THashTable read GetNameTable;
- property nodeType: TXMLNodeType read GetNodeType;
- property ReadState: TXMLReadState read FReadState;
- property Depth: Integer read GetDepth;
- property EOF: Boolean read GetEOF;
- property Name: XMLString read GetName;
- property LocalName: XMLString read GetLocalName;
- property Prefix: XMLString read GetPrefix;
- property namespaceUri: XMLString read GetNamespaceUri;
- property Value: XMLString read GetValue;
- property HasValue: Boolean read GetHasValue;
- property AttributeCount: Integer read GetAttributeCount;
- property BaseUri: XMLString read GetBaseUri;
- property IsDefault: Boolean read GetIsDefault;
- end;
- implementation
- const
- ContentNodeTypes = [ntText, ntCDATA, ntElement, ntEndElement,
- ntEntityReference, ntEndEntity];
- { EXMLReadError }
- constructor EXMLReadError.Create(sev: TErrorSeverity; const AMsg: string; ALine, ALinePos: Integer;
- const uri: string);
- begin
- inherited CreateFmt('In ''%s'' (line %d pos %d): %s',[uri, ALine, ALinePos, AMsg]);
- FSeverity := sev;
- FErrorMessage := AMsg;
- FLine := ALine;
- FLinePos := ALinePos;
- end;
- constructor EXMLReadError.Create(const AMsg: string);
- begin
- inherited Create(AMsg);
- FErrorMessage := AMsg;
- FSeverity := esFatal;
- end;
- { TXMLInputSource }
- function TXMLInputSource.GetStringData: String;
- begin
- {$IF SIZEOF(CHAR)=2}
- Result:=UnicodeStringData
- {$ELSE}
- Result:=AnsiStringData
- {$ENDIF}
- end;
- procedure TXMLInputSource.SetSystemID(AValue: XMLString);
- begin
- if FSystemID=AValue then Exit;
- FSystemID:=AValue;
- FInputSourceType:=istSystemID;
- end;
- constructor TXMLInputSource.Create(AStream: TStream);
- begin
- inherited Create;
- FStream := AStream;
- FInputSourceType:=istStream;
- end;
- constructor TXMLInputSource.Create(const AStringData: AnsiString);
- begin
- inherited Create;
- FAnsiStringData:=aStringData;
- FInputSourceType:=istAnsi;
- end;
- constructor TXMLInputSource.Create(const AStringData: UnicodeString);
- begin
- inherited Create;
- FUnicodeStringData:=aStringData;
- FInputSourceType:=istUnicode;
- end;
- { TXMLReaderSettings }
- function TXMLReaderSettings.GetCanonical: Boolean;
- begin
- Result := FCanonical and FExpandEntities and FCDSectionsAsText and
- { (not normalizeCharacters) and } FNamespaces and
- { namespaceDeclarations and } FPreserveWhitespace;
- end;
- procedure TXMLReaderSettings.SetCanonical(aValue: Boolean);
- begin
- FCanonical := aValue;
- if aValue then
- begin
- FExpandEntities := True;
- FCDSectionsAsText := True;
- FNamespaces := True;
- FPreserveWhitespace := True;
- { normalizeCharacters := False; }
- { namespaceDeclarations := True; }
- { wellFormed := True; }
- end;
- end;
- { TXMLReader }
- destructor TXMLReader.Destroy;
- begin
- if Assigned(FReadStringBuf.Buffer) then
- FreeMem(FReadStringBuf.Buffer);
- inherited Destroy;
- end;
- function TXMLReader.GetEOF: Boolean;
- begin
- result := (FReadState=rsEndOfFile);
- end;
- function TXMLReader.MoveToContent: TXMLNodeType;
- begin
- if ReadState > rsInteractive then
- begin
- result := ntNone;
- exit;
- end;
- if nodeType = ntAttribute then
- MoveToElement;
- repeat
- result := nodeType;
- if result in ContentNodeTypes then
- exit;
- until not Read;
- result := ntNone;
- end;
- function TXMLReader.ReadElementString: XMLString;
- begin
- ReadStartElement;
- result := ReadString;
- if NodeType <> ntEndElement then
- raise EXMLReadError.Create('Expecting end of element');
- Read;
- end;
- function TXMLReader.ReadElementString(const aName: XMLString): XMLString;
- begin
- ReadStartElement(aName);
- result := ReadString;
- if NodeType <> ntEndElement then
- raise EXMLReadError.Create('Expecting end of element');
- Read;
- end;
- function TXMLReader.ReadElementString(const aLocalName, aNamespace: XMLString): XMLString;
- begin
- ReadStartElement(aLocalName, aNamespace);
- result := ReadString;
- if NodeType <> ntEndElement then
- raise EXMLReadError.Create('Expecting end of element');
- Read;
- end;
- procedure TXMLReader.ReadEndElement;
- begin
- if MoveToContent <> ntEndElement then
- raise EXMLReadError.Create('Expecting end of element');
- Read;
- end;
- procedure TXMLReader.ReadStartElement;
- begin
- if MoveToContent <> ntElement then
- raise EXMLReadError.Create('Invalid node type');
- Read;
- end;
- procedure TXMLReader.ReadStartElement(const aName: XMLString);
- begin
- if MoveToContent <> ntElement then
- raise EXMLReadError.Create('Invalid node type') ;
- if Name <> aName then
- raise EXMLReadError.CreateFmt('Element ''%s'' was not found',[aName]);
- Read;
- end;
- procedure TXMLReader.ReadStartElement(const aLocalName, aNamespace: XMLString);
- begin
- if MoveToContent <> ntElement then
- raise EXMLReadError.Create('Invalid node type');
- if (localName <> aLocalName) or (NamespaceURI <> aNamespace) then
- raise EXMLReadError.CreateFmt('Element ''%s'' with namespace ''%s'' was not found',
- [aLocalName, aNamespace]);
- Read;
- end;
- function TXMLReader.ReadString: XMLString;
- begin
- result := '';
- MoveToElement;
- if FReadStringBuf.Buffer = nil then
- BufAllocate(FReadStringBuf, 512);
- FReadStringBuf.Length := 0;
- if NodeType = ntElement then
- repeat
- Read;
- if NodeType in [ntText, ntCDATA, ntWhitespace, ntSignificantWhitespace] then
- BufAppendString(FReadStringBuf, Value)
- else
- Break;
- until False
- else
- while NodeType in [ntText,ntCDATA,ntWhitespace,ntSignificantWhitespace] do
- begin
- BufAppendString(FReadStringBuf, Value);
- Read;
- end;
- SetString(result, FReadStringBuf.Buffer, FReadStringBuf.Length);
- FReadStringBuf.Length := 0;
- end;
- procedure TXMLReader.Skip;
- var
- i: Integer;
- begin
- if ReadState <> rsInteractive then
- exit;
- MoveToElement;
- if (NodeType <> ntElement) then
- begin
- Read;
- exit;
- end;
- i := Depth;
- while Read and (i < Depth) do {loop};
- if NodeType = ntEndElement then
- Read;
- end;
- end.
|