123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580 |
- {
- This file is part of the Free Component Library
- XML reading routines.
- Copyright (c) 1999-2000 by Sebastian Guenther, [email protected]
- Modified in 2006 by Sergei Gorelkin, [email protected]
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- unit XMLRead;
- {$ifdef fpc}
- {$MODE objfpc}{$H+}
- {$endif}
- interface
- uses
- SysUtils, Classes, DOM;
- type
- EXMLReadError = class(Exception);
- procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String); overload;
- procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text); overload;
- procedure ReadXMLFile(out ADoc: TXMLDocument; var f: TStream); overload;
- procedure ReadXMLFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String); overload;
- procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String); overload;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text); overload;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream); overload;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream; const ABaseURI: String); overload;
- procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String); overload;
- procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text); overload;
- procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream); overload;
- procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String); overload;
- // =======================================================
- implementation
- uses
- UriParser;
- type
- TSetOfChar = set of Char;
- const
- Letter = ['A'..'Z', 'a'..'z'];
- Digit = ['0'..'9'];
- PubidChars: TSetOfChar = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
- '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
- '#', '@', '$', '_', '%'];
- type
- TDOMNotationEx = class(TDOMNotation);
- TDOMAttrEx = class(TDOMAttr);
- TXMLInputSource = class;
- TDOMElementDef = class;
- TDOMEntityEx = class(TDOMEntity)
- protected
- FInternal: Boolean;
- FResolved: Boolean;
- FOnStack: Boolean;
- FReplacementText: DOMString;
- end;
- // TODO: Do I need PEMap in DocType? Maybe move it to Reader itself?
- // (memory usage - they are not needed after parsing)
- TDOMDocumentTypeEx = class(TDOMDocumentType)
- private
- FHasPERefs: Boolean;
- FPEMap: TDOMNamedNodeMap;
- FElementDefs: TDOMNamedNodeMap;
- function GetPEMap: TDOMNamedNodeMap;
- function GetElementDefs: TDOMNamedNodeMap;
- protected
- property PEMap: TDOMNamedNodeMap read GetPEMap;
- property ElementDefs: TDOMNamedNodeMap read GetElementDefs;
- property HasPERefs: Boolean read FHasPERefs write FHasPERefs;
- public
- destructor Destroy; override;
- end;
- TXMLReader = class;
- TDecoder = class;
- TDecoderRef = class of TDecoder;
- TXMLInputSource = class
- private
- FBuf: PChar;
- FBufEnd: PChar;
- FEof: Boolean;
- FSurrogate: WideChar;
- FReader: TXMLReader;
- FParent: TXMLInputSource;
- FEntity: TObject; // weak reference
- FCursor: TObject; // weak reference
- FLine: Integer;
- FColumn: Integer;
- FSystemID: WideString;
- FPublicID: WideString;
- function GetSystemID: WideString;
- function GetPublicID: WideString;
- protected
- procedure FetchData; virtual;
- public
- constructor Create(const AData: WideString);
- function NextChar: WideChar; virtual;
- procedure Initialize; virtual;
- procedure SetEncoding(const AEncoding: string); virtual;
- property SystemID: WideString read GetSystemID write FSystemID;
- property PublicID: WideString read GetPublicID write FPublicID;
- end;
- TXMLDecodingSource = class(TXMLInputSource)
- private
- FDecoder: TDecoder;
- FSeenCR: Boolean;
- function InternalNextChar: WideChar;
- procedure DecodingError(const Msg: string); overload;
- procedure DecodingError(const Msg: string; const Args: array of const); overload;
- public
- destructor Destroy; override;
- function NextChar: WideChar; override;
- procedure SetEncoding(const AEncoding: string); override;
- procedure Initialize; override;
- end;
- TXMLStreamInputSource = class(TXMLDecodingSource)
- private
- FAllocated: PChar;
- FStream: TStream;
- FBufSize: Integer;
- FOwnStream: Boolean;
- public
- constructor Create(AStream: TStream; AOwnStream: Boolean);
- destructor Destroy; override;
- procedure FetchData; override;
- end;
- TXMLFileInputSource = class(TXMLDecodingSource)
- private
- FFile: ^Text;
- FString: string;
- public
- constructor Create(var AFile: Text);
- procedure FetchData; override;
- end;
- TDecoder = class
- private
- FSource: TXMLDecodingSource;
- public
- constructor Create(ASource: TXMLDecodingSource);
- function DecodeNext: WideChar; virtual; abstract;
- class function Supports(const AEncoding: string): Boolean; virtual; abstract;
- end;
- TISO8859_1Decoder = class(TDecoder)
- public
- function DecodeNext: WideChar; override;
- class function Supports(const AEncoding: string): Boolean; override;
- end;
- TUCS2Decoder = class(TDecoder)
- private
- FSwapEndian: Boolean;
- FEncoding: string;
- public
- function DecodeNext: WideChar; override;
- class function Supports(const AEncoding: string): Boolean; override;
- end;
- TUTF8Decoder = class(TDecoder)
- public
- function DecodeNext: WideChar; override;
- class function Supports(const AEncoding: string): Boolean; override;
- end;
- PWideCharBuf = ^TWideCharBuf;
- TWideCharBuf = record
- Buffer: PWideChar;
- Length: Integer;
- MaxLength: Integer;
- end;
- TEntityResolveEvent = procedure(const PubID, SysID: WideString; var Source: TXMLInputSource) of object;
- TDeclType = (dtNone, dtXml, dtText);
- TXMLReader = class
- private
- FSource: TXMLInputSource;
- FCurChar: WideChar;
- FWhitespace: Boolean;
- FXML11: Boolean;
- FValue: TWideCharBuf;
- FName: TWideCharBuf;
- FCopyBuf: PWideCharBuf;
- FIntSubset: Boolean;
- FAllowedDecl: TDeclType;
- FDtdParsed: Boolean;
- FRecognizePE: Boolean;
- FStandalone: Boolean; // property of Doc ?
- FInvalid: Boolean;
- // TODO: This array must be stored globally, not per instance
- FNamePages: PByteArray;
- FForbiddenAscii: TSetOfChar;
- FDocType: TDOMDocumentTypeEx; // a shortcut
- FEntityLevel: Integer;
- FPreserveWhitespace: Boolean;
- FCreateEntityRefs: Boolean;
- procedure RaiseExpectedQmark;
- procedure GetChar;
- procedure GetCharRaw;
- procedure Unget(wc: WideChar);
- procedure Initialize(ASource: TXMLInputSource);
- procedure InitializeRoot(ASource: TXMLInputSource);
- procedure DoParseAttValue(Delim: WideChar);
- procedure DoParseFragment;
- procedure DoParseExtSubset(ASource: TXMLInputSource);
- function ContextPush(AEntity: TDOMEntityEx): Boolean;
- function ContextPop: Boolean;
- procedure XML11_BuildTables;
- function XML11_CheckName: Boolean;
- protected
- FCursor: TDOMNode;
- procedure RaiseExc(const descr: String); overload;
- procedure RaiseExc(const descr: string; const args: array of const); overload;
- procedure RaiseExc(Expected: WideChar); overload;
- function SkipWhitespace: Boolean;
- procedure ExpectWhitespace;
- procedure ExpectString(const s: String);
- procedure ExpectChar(wc: WideChar);
- function CheckForChar(c: WideChar): Boolean;
- procedure SkipString(const ValidChars: TSetOfChar);
- function GetString(const ValidChars: TSetOfChar): WideString;
- procedure RaiseNameNotFound;
- function CheckName: Boolean;
- function CheckNmToken: Boolean;
- function ExpectName: WideString; // [5]
- procedure SkipName;
- function SkipQuotedLiteral: Boolean;
- procedure ExpectAttValue; // [10]
- procedure SkipPubidLiteral; // [12]
- procedure SkipSystemLiteral(out Literal: WideString; Required: Boolean);
- procedure ParseComment; // [15]
- procedure ParsePI; // [16]
- procedure ParseCDSect; // [18]
- procedure ParseXmlOrTextDecl(TextDecl: Boolean);
- function ParseEq: Boolean; // [25]
- procedure ExpectEq;
- procedure ParseMisc; // [27]
- procedure ParseDoctypeDecl; // [28]
- procedure ParseMarkupDecl; // [29]
- procedure ParseElement; // [39]
- procedure ParseContent; // [43]
- function ResolvePredefined(const RefName: WideString): WideChar;
- procedure IncludeEntity(AEntity: TDOMEntityEx; InAttr: Boolean);
- procedure StartPE;
- function ParseCharRef: Boolean; // [66]
- function ParseReference: TDOMEntityEx; // [67]
- function ParsePEReference: Boolean; // [69]
- function ParseExternalID(out SysID, PubID: WideString; // [75]
- SysIdOptional: Boolean): Boolean;
- procedure ProcessTextAndRefs;
- procedure AssertPENesting(CurrentLevel: Integer);
- procedure ParseEntityDecl;
- procedure ParseEntityDeclValue(Delim: WideChar);
- procedure ParseAttlistDecl;
- procedure ExpectChoiceOrSeq;
- procedure ParseMixedOrChildren;
- procedure ParseElementDecl;
- procedure ParseNotationDecl;
- function ResolveEntity(const SystemID, PublicID: WideString; out Source: TXMLInputSource): Boolean;
- procedure ProcessDefaultAttributes(Element: TDOMElement);
- procedure ValidationError(const Msg: string; const args: array of const);
- public
- doc: TDOMDocument;
- constructor Create;
- destructor Destroy; override;
- procedure ProcessXML(ASource: TXMLInputSource); // [1]
- procedure ProcessFragment(ASource: TXMLInputSource; AOwner: TDOMNode);
- procedure ProcessDTD(ASource: TXMLInputSource); // ([29])
- end;
- // AttDef/ElementDef support
- TAttrDataType = (
- DT_CDATA,
- DT_ID,
- DT_IDREF,
- DT_IDREFS,
- DT_ENTITY,
- DT_ENTITIES,
- DT_NMTOKEN,
- DT_NMTOKENS,
- DT_NOTATION
- );
- TAttrDefault = (
- AD_IMPLIED,
- AD_DEFAULT,
- AD_REQUIRED,
- AD_FIXED
- );
- TDOMAttrDef = class(TDOMAttr)
- protected
- FDataType: TAttrDataType;
- FDefault: TAttrDefault;
- // FEnumeration: TWideStringList? array of WideStrings?
- end;
- TDOMElementDef = class(TDOMElement);
- {$i names.inc}
- // TODO: List of registered/supported decoders
- function FindDecoder(const Encoding: string): TDecoderRef;
- begin
- if TISO8859_1Decoder.Supports(Encoding) then
- Result := TISO8859_1Decoder
- else
- Result := nil;
- end;
- procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
- begin
- ABuffer.MaxLength := ALength;
- ABuffer.Length := 0;
- ABuffer.Buffer:=AllocMem(ABuffer.MaxLength*SizeOf(WideChar));
- end;
- procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
- var
- OldLength : integer;
- begin
- if ABuffer.Length >= ABuffer.MaxLength then
- begin
- OldLength := ABuffer.MaxLength;
- ABuffer.MaxLength := ABuffer.MaxLength * 2;
- ReallocMem(ABuffer.Buffer, ABuffer.MaxLength * SizeOf(WideChar));
- FillChar(ABuffer.Buffer[OldLength],(ABuffer.MaxLength-OldLength) * SizeOf(WideChar),0);
- end;
- ABuffer.Buffer[ABuffer.Length] := wc;
- Inc(ABuffer.Length);
- end;
- function IsValidEncName(const s: WideString): Boolean;
- var
- I: Integer;
- begin
- Result := False;
- if (s = '') or (s[1] > #255) or not (char(s[1]) in ['A'..'Z', 'a'..'z']) then
- Exit;
- for I := 2 to Length(s) do
- if (s[I] > #255) or not (char(s[I]) in ['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']) then
- Exit;
- Result := True;
- end;
- { TDOMDocumentTypeEx }
- destructor TDOMDocumentTypeEx.Destroy;
- begin
- FPEMap.Free;
- FElementDefs.Free;
- inherited Destroy;
- end;
- function TDOMDocumentTypeEx.GetElementDefs: TDOMNamedNodeMap;
- begin
- if FElementDefs = nil then
- FElementDefs := TDOMNamedNodeMap.Create(Self, ELEMENT_NODE);
- Result := FElementDefs;
- end;
- function TDOMDocumentTypeEx.GetPEMap: TDOMNamedNodeMap;
- begin
- if FPEMap = nil then
- FPEMap := TDOMNamedNodeMap.Create(Self, ENTITY_NODE);
- Result := FPEMap;
- end;
- // TODO: These classes still cannot be considered as the final solution...
- { TXMLInputSource }
- constructor TXMLInputSource.Create(const AData: WideString);
- begin
- inherited Create;
- FBuf := PChar(PWideChar(AData));
- FBufEnd := FBuf + Length(AData) * sizeof(WideChar);
- end;
- procedure TXMLInputSource.Initialize;
- begin
- FLine := 1;
- FColumn := 0;
- end;
- function TXMLInputSource.NextChar: WideChar;
- begin
- if FSurrogate <> #0 then
- begin
- Result := FSurrogate;
- FSurrogate := #0;
- end
- else if FBufEnd <= FBuf then
- begin
- Result := #0;
- FEof := True;
- end
- else
- begin
- Result := PWideChar(FBuf)^;
- Inc(FBuf, sizeof(WideChar));
- end;
- // TODO: Column counting - surrogate pair is a single char!
- if Result = #10 then
- begin
- Inc(FLine);
- FColumn := 0;
- end
- else
- Inc(FColumn);
- end;
- procedure TXMLDecodingSource.DecodingError(const Msg: string);
- begin
- FReader.RaiseExc(Msg);
- end;
- procedure TXMLDecodingSource.DecodingError(const Msg: string;
- const Args: array of const);
- begin
- FReader.RaiseExc(Msg, Args);
- end;
- procedure TXMLInputSource.FetchData;
- begin
- FEof := True;
- end;
- procedure TXMLInputSource.SetEncoding(const AEncoding: string);
- begin
- // do nothing
- end;
- function TXMLInputSource.GetPublicID: WideString;
- begin
- if FPublicID <> '' then
- Result := FPublicID
- else if Assigned(FParent) then
- Result := FParent.PublicID
- else
- Result := '';
- end;
- function TXMLInputSource.GetSystemID: WideString;
- begin
- if FSystemID <> '' then
- Result := FSystemID
- else if Assigned(FParent) then
- Result := FParent.SystemID
- else
- Result := '';
- end;
- { TXMLDecodingSource }
- destructor TXMLDecodingSource.Destroy;
- begin
- FDecoder.Free;
- inherited Destroy;
- end;
- function TXMLDecodingSource.InternalNextChar: WideChar;
- begin
- // TODO: find a place for it, finally...
- if FSurrogate <> #0 then
- begin
- Result := FSurrogate;
- FSurrogate := #0;
- Exit;
- end;
- if FBufEnd <= FBuf then
- FetchData;
- if not FEof then
- Result := FDecoder.DecodeNext
- else
- Result := #0;
- end;
- function TXMLDecodingSource.NextChar: WideChar;
- begin
- Result := InternalNextChar;
- if FSeenCR then
- begin
- if (Result = #10) or ((Result = #$85) and FReader.FXML11) then
- Result := InternalNextChar;
- FSeenCR := False;
- end;
- case Result of
- #13: begin
- FSeenCR := True;
- Result := #10;
- end;
- #$85, #$2028:
- if FReader.FXML11 then
- Result := #10;
- end;
- if (Result < #256) and (char(Result) in FReader.FForbiddenAscii) or
- ((ord(Result) or 1) = $FFFF) then
- DecodingError('Invalid character');
- // TODO: Column counting - surrogate pair is a single char!
- if Result = #10 then
- begin
- Inc(FLine);
- FColumn := 0;
- end
- else
- Inc(FColumn);
- end;
- procedure TXMLDecodingSource.Initialize;
- begin
- inherited;
- if FBufEnd-FBuf > 1 then
- repeat
- if (FBuf[0] = #$FE) and (FBuf[1] = #$FF) then // BE
- begin
- FDecoder := TUCS2Decoder.Create(Self);
- TUCS2Decoder(FDecoder).FEncoding := 'UTF-16BE';
- {$IFNDEF ENDIAN_BIG}
- TUCS2Decoder(FDecoder).FSwapEndian := True;
- {$ENDIF}
- Exit;
- end
- else if (FBuf[0] = #$FF) and (FBuf[1] = #$FE) then // LE
- begin
- FDecoder := TUCS2Decoder.Create(Self);
- TUCS2Decoder(FDecoder).FEncoding := 'UTF-16LE';
- {$IFDEF ENDIAN_BIG}
- TUCS2Decoder(FDecoder).FSwapEndian := True;
- {$ENDIF}
- Exit;
- end
- else
- Break;
- until False;
- FDecoder := TUTF8Decoder.Create(Self);
- end;
- procedure TXMLDecodingSource.SetEncoding(const AEncoding: string);
- var
- NewDecoder: TDecoderRef;
- begin
- if FDecoder.Supports(AEncoding) then // no change needed
- Exit;
- // hardcoded stuff - special case of UCS2
- if FDecoder is TUCS2Decoder then
- begin
- // check for 'UTF-16LE' or 'UTF-16BE'
- if SameText(AEncoding, TUCS2Decoder(FDecoder).FEncoding) then
- Exit
- else
- DecodingError('Current encoding cannot be switched to ''%s''', [AEncoding]);
- end;
- NewDecoder := FindDecoder(AEncoding);
- if Assigned(NewDecoder) then
- begin
- FDecoder.Free;
- FDecoder := NewDecoder.Create(Self);
- end
- else
- DecodingError('Encoding ''%s'' is not supported', [AEncoding]);
- end;
- { TXMLStreamInputSource }
- constructor TXMLStreamInputSource.Create(AStream: TStream; AOwnStream: Boolean);
- begin
- FStream := AStream;
- FBufSize := 4096;
- GetMem(FAllocated, FBufSize+8);
- FBuf := FAllocated+8;
- FBufEnd := FBuf;
- FOwnStream := AOwnStream;
- FetchData;
- end;
- destructor TXMLStreamInputSource.Destroy;
- begin
- FreeMem(FAllocated);
- if FOwnStream then
- FStream.Free;
- inherited Destroy;
- end;
- procedure TXMLStreamInputSource.FetchData;
- var
- Remainder, BytesRead: Integer;
- OldBuf: PChar;
- begin
- Assert(FBufEnd - FBuf < 8);
- OldBuf := FBuf;
- Remainder := FBufEnd - FBuf;
- FBuf := FAllocated+8-Remainder;
- Move(OldBuf^, FBuf^, Remainder);
- BytesRead := FStream.Read(FAllocated[8], FBufSize);
- if BytesRead = 0 then
- FEof := True;
- FBufEnd := FAllocated + 8 + BytesRead;
- end;
- { TXMLFileInputSource }
- constructor TXMLFileInputSource.Create(var AFile: Text);
- begin
- FFile := @AFile;
- ReadLn(FFile^, FString);
- FBuf := PChar(FString);
- FBufEnd := FBuf + Length(FString);
- end;
- procedure TXMLFileInputSource.FetchData;
- begin
- FEof := Eof(FFile^);
- if not FEof then
- begin
- ReadLn(FFile^, FString);
- FString := FString + #10; // bad solution...
- FBuf := PChar(FString);
- FBufEnd := FBuf + Length(FString);
- end;
- end;
- { TDecoder }
- constructor TDecoder.Create(ASource: TXMLDecodingSource);
- begin
- inherited Create;
- FSource := ASource;
- end;
- { TISO8859_1Decoder}
- function TISO8859_1Decoder.DecodeNext: WideChar;
- begin
- with FSource do
- begin
- Result := WideChar(FBuf[0]);
- Inc(FBuf);
- end;
- end;
- class function TISO8859_1Decoder.Supports(const AEncoding: string): Boolean;
- begin
- Result := SameText(AEncoding, 'ISO-8859-1') or
- SameText(AEncoding, 'ISO_8859-1') or
- SameText(AEncoding, 'latin1') or
- SameText(AEncoding, 'iso-ir-100') or
- SameText(AEncoding, 'l1') or
- SameText(AEncoding, 'IBM819') or
- SameText(AEncoding, 'CP819') or
- SameText(AEncoding, 'csISOLatin1') or
- // This one is not in character-sets.txt, but used in most FPC documentation...
- SameText(AEncoding, 'ISO8859-1');
- end;
- { TUCS2Decoder }
- function TUCS2Decoder.DecodeNext: WideChar;
- begin
- with FSource do
- begin
- Result := PWideChar(FBuf)^;
- Inc(FBuf, sizeof(WideChar));
- end;
- if FSwapEndian then
- Result := WideChar(Swap(Word(Result)));
- end;
- class function TUCS2Decoder.Supports(const AEncoding: string): Boolean;
- begin
- // generic aliases for both LE and BE
- Result := SameText(AEncoding, 'UTF-16') or
- SameText(AEncoding, 'unicode');
- end;
- { TUTF8Decoder }
- function TUTF8Decoder.DecodeNext: WideChar;
- const
- MaxCode: array[0..3] of Cardinal = ($7F, $7FF, $FFFF, $1FFFFF);
- var
- Value: Cardinal;
- I, bc: Integer;
- begin
- with FSource do
- begin
- Result := WideChar(FBuf[0]);
- Inc(FBuf);
- if Result < #$80 then
- Exit;
- if Byte(Result) and $40 = 0 then
- DecodingError('Invalid UTF8 sequence start byte');
- bc := 1;
- if Byte(Result) and $20 <> 0 then
- begin
- Inc(bc);
- if Byte(Result) and $10 <> 0 then
- begin
- Inc(bc);
- if Byte(Result) and $8 <> 0 then
- DecodingError('UCS4 character out of supported range');
- end;
- end;
- // DONE: (?) check that bc bytes available
- if FBufEnd-FBuf < bc then
- FetchData;
- Value := Byte(Result);
- I := bc; // note: I is never zero
- while bc > 0 do
- begin
- if ord(FBuf[0]) and $C0 <> $80 then
- DecodingError('Invalid byte in UTF8 sequence');
- Value := (Value shl 6) or (Cardinal(FBuf[0]) and $3F);
- Inc(FBuf);
- Dec(bc);
- end;
- Value := Value and MaxCode[I];
- // RFC2279 check
- if Value <= MaxCode[I-1] then
- DecodingError('Invalid UTF8 sequence');
- case Value of
- 0..$D7FF, $E000..$FFFF:
- begin
- Result := WideChar(Value);
- Exit;
- end;
- $10000..$10FFFF:
- begin
- Result := WideChar($D7C0 + (Value shr 10));
- FSurrogate := WideChar($DC00 xor (Value and $3FF));
- Exit;
- end;
- end;
- DecodingError('UCS4 character out of supported range');
- end;
- end;
- class function TUTF8Decoder.Supports(const AEncoding: string): Boolean;
- begin
- Result := SameText(AEncoding, 'UTF-8');
- end;
- { TXMLReader }
- function TXMLReader.ResolveEntity(const SystemID, PublicID: WideString; out Source: TXMLInputSource): Boolean;
- var
- AbsSysID: WideString;
- Filename: string;
- Stream: TStream;
- begin
- Result := False;
- if ResolveRelativeURI(FSource.SystemID, SystemID, AbsSysID) then
- begin
- Source := nil;
- // TODO: alternative resolvers
- if URIToFilename(AbsSysID, Filename) then
- begin
- try
- Stream := TFileStream.Create(Filename, fmOpenRead + fmShareDenyWrite);
- Source := TXMLStreamInputSource.Create(Stream, True);
- Source.SystemID := AbsSysID;
- Source.PublicID := PublicID;
- Result := True;
- except
- on E: Exception do
- ValidationError('%s', [E.Message]);
- end;
- end;
- end;
- end;
- procedure TXMLReader.InitializeRoot(ASource: TXMLInputSource);
- begin
- Initialize(ASource);
- GetChar;
- // TODO: presence of BOM must prevent UTF-8 encoding from being changed
- CheckForChar(#$FEFF); // skip BOM, if one is present
- end;
- procedure TXMLReader.Initialize(ASource: TXMLInputSource);
- begin
- FSource := ASource;
- FSource.FReader := Self;
- FSource.Initialize;
- end;
- procedure TXMLReader.GetCharRaw;
- begin
- FCurChar := FSource.NextChar;
- FWhitespace := (FCurChar = #32) or (FCurChar = #10) or
- (FCurChar = #9) or (FCurChar = #13);
- // Used for handling the internal DTD subset
- if Assigned(FCopyBuf) and (FSource.FParent = nil) then
- BufAppend(FCopyBuf^, FCurChar);
- end;
- procedure TXMLReader.GetChar;
- begin
- GetCharRaw;
- if not FRecognizePE then
- Exit;
- if (FCurChar = #0) and ContextPop then
- begin
- Unget(FCurChar);
- FCurChar := #32;
- FWhitespace := True;
- end
- else if FCurChar = '%' then
- begin
- FCurChar := FSource.NextChar;
- if not CheckName then
- begin
- Unget(FCurChar);
- FCurChar := '%';
- Exit;
- end;
- if FCurChar = ';' then // "%pe1;%pe2" - must not recognize pe2 immediately!
- GetCharRaw
- else
- RaiseExc(WideChar(';'));
- StartPE;
- FCurChar := #32;
- FWhitespace := True;
- end;
- end;
- procedure TXMLReader.Unget(wc: WideChar);
- begin
- FSource.FSurrogate := wc;
- end;
- procedure TXMLReader.RaiseExpectedQmark;
- begin
- RaiseExc('Expected single or double quote');
- end;
- procedure TXMLReader.RaiseExc(Expected: WideChar);
- begin
- // FIX: don't output what is found - anything may be found, including exploits...
- RaiseExc('Expected "%1s"', [string(Expected)]);
- end;
- procedure TXMLReader.RaiseExc(const descr: String);
- begin
- raise EXMLReadError.CreateFmt('In ''%s'' (line %d pos %d): %s', [FSource.SystemID, FSource.FLine, FSource.FColumn, descr]);
- end;
- procedure TXMLReader.RaiseExc(const descr: string; const args: array of const);
- begin
- RaiseExc(Format(descr, args));
- end;
- function TXMLReader.SkipWhitespace: Boolean;
- begin
- Result := False;
- while FWhitespace do
- begin
- GetChar;
- Result := True;
- end;
- end;
- procedure TXMLReader.ExpectWhitespace;
- begin
- if not SkipWhitespace then
- RaiseExc('Expected whitespace');
- end;
- procedure TXMLReader.ExpectChar(wc: WideChar);
- begin
- if FCurChar = wc then
- GetChar
- else
- RaiseExc(wc);
- end;
- procedure TXMLReader.ExpectString(const s: String);
- var
- I: Integer;
- begin
- for I := 1 to Length(s) do
- begin
- if FCurChar <> WideChar(s[i]) then
- RaiseExc('Expected "%s"', [s]);
- GetChar;
- end;
- end;
- function TXMLReader.CheckForChar(c: WideChar): Boolean;
- begin
- Result := (FCurChar = c);
- if Result then
- GetChar;
- end;
- procedure TXMLReader.SkipString(const ValidChars: TSetOfChar);
- begin
- FValue.Length := 0;
- while (ord(FCurChar) < 256) and (char(FCurChar) in ValidChars) do
- begin
- BufAppend(FValue, FCurChar);
- GetChar;
- end;
- end;
- function TXMLReader.GetString(const ValidChars: TSetOfChar): WideString;
- begin
- SkipString(ValidChars);
- SetString(Result, FValue.Buffer, FValue.Length);
- end;
- constructor TXMLReader.Create;
- begin
- inherited Create;
- // Naming bitmap: Point to static data for XML 1.0,
- // and allocate buffer in XML11_BuildTables when necessary.
- FNamePages := @NamePages;
- BufAllocate(FName, 128);
- BufAllocate(FValue, 512);
- FForbiddenAscii := [#1..#8, #11..#12, #14..#31];
- // TODO: put under user control
- FPreserveWhitespace := True;
- FCreateEntityRefs := True;
- end;
- destructor TXMLReader.Destroy;
- begin
- if FXML11 then
- FreeMem(FNamePages);
- FreeMem(FName.Buffer);
- FreeMem(FValue.Buffer);
- while ContextPop do; // clean input stack
- FSource.Free;
- inherited Destroy;
- end;
- procedure TXMLReader.XML11_BuildTables;
- var
- I: Integer;
- begin
- if not FXML11 then
- GetMem(FNamePages, 512);
- FXML11 := True;
- for I := 0 to 255 do
- FNamePages^[I] := ord(Byte(I) in Xml11HighPages);
- FNamePages^[0] := 2;
- FNamePages^[3] := $2c;
- FNamePages^[$20] := $2a;
- FNamePages^[$21] := $2b;
- FNamePages^[$2f] := $29;
- FNamePages^[$30] := $2d;
- FNamePages^[$fd] := $28;
- Move(FNamePages^, FNamePages^[256], 256);
- FNamePages^[$100] := $19;
- FNamePages^[$103] := $2E;
- FNamePages^[$120] := $2F;
- FForbiddenAscii := [#1..#8, #11..#12, #14..#31, #$7F..#$84, #$86..#$9F];
- end;
- procedure TXMLReader.ProcessXML(ASource: TXMLInputSource);
- begin
- doc := TXMLDocument.Create;
- FCursor := doc;
- InitializeRoot(ASource);
- FAllowedDecl := dtXml;
- ParseMisc;
- FDtdParsed := True;
- if FDocType = nil then
- ValidationError('Missing DTD', []);
- if CheckName then
- ParseElement
- else
- RaiseExc('Expected element');
- ParseMisc;
- if Assigned(FDocType) and (doc.DocumentElement.TagName <> FDocType.Name) then
- ValidationError('DTD name does not match root element', []);
- if FCurChar <> #0 then
- RaiseExc('Text after end of document element found');
- end;
- procedure TXMLReader.ProcessFragment(ASource: TXMLInputSource; AOwner: TDOMNode);
- begin
- doc := AOwner.OwnerDocument;
- FCursor := AOwner;
- InitializeRoot(ASource);
- FXML11 := doc.InheritsFrom(TXMLDocument) and (TXMLDocument(doc).XMLVersion = '1.1');
- FAllowedDecl := dtText;
- DoParseFragment;
- end;
- // XML 1.1 allowed range $10000..$EFFFF is [D800..DB7F] followed by [DC00..DFFF]
- function TXMLReader.XML11_CheckName: Boolean;
- begin
- if (FCurChar >= #$D800) and (FCurChar <= #$DB7F) then
- begin
- BufAppend(FName, FCurChar);
- GetCharRaw;
- Result := (FCurChar >= #$DC00) and (FCurChar <= #$DFFF);
- end
- else
- Result := False;
- end;
- function TXMLReader.CheckName: Boolean;
- begin
- FName.Length := 0;
- Result := (Byte(FCurChar) in NamingBitmap[FNamePages^[hi(Word(FCurChar))]]) or
- (FXML11 and XML11_CheckName);
- if Result then
- repeat
- BufAppend(FName, FCurChar);
- GetChar;
- until not ((Byte(FCurChar) in NamingBitmap[FNamePages^[$100+hi(Word(FCurChar))]]) or
- (FXML11 and XML11_CheckName));
- end;
- function TXMLReader.CheckNmToken: Boolean;
- begin
- FName.Length := 0;
- Result := False;
- while (Byte(FCurChar) in NamingBitmap[FNamePages^[$100+hi(Word(FCurChar))]]) or
- (FXML11 and XML11_CheckName) do
- begin
- BufAppend(FName, FCurChar);
- GetChar;
- Result := True;
- end;
- end;
- procedure TXMLReader.RaiseNameNotFound;
- begin
- RaiseExc('Name starts with invalid character');
- end;
- function TXMLReader.ExpectName: WideString;
- begin
- if not CheckName then
- RaiseNameNotFound;
- SetString(Result, FName.Buffer, FName.Length);
- end;
- procedure TXMLReader.SkipName;
- begin
- if not CheckName then
- RaiseNameNotFound;
- end;
- function TXMLReader.ResolvePredefined(const RefName: WideString): WideChar;
- begin
- if RefName = 'amp' then
- Result := '&'
- else if RefName = 'apos' then
- Result := ''''
- else if RefName = 'gt' then
- Result := '>'
- else if RefName = 'lt' then
- Result := '<'
- else if RefName = 'quot' then
- Result := '"'
- else
- Result := #0;
- end;
- function TXMLReader.ParseCharRef: Boolean; // [66]
- var
- Value: Integer;
- begin
- Result := FCurChar = '#';
- if Result then
- begin
- GetCharRaw;
- Value := 0;
- if CheckForChar('x') then
- repeat
- case FCurChar of
- '0'..'9': Value := Value * 16 + Ord(FCurChar) - Ord('0');
- 'a'..'f': Value := Value * 16 + Ord(FCurChar) - (Ord('a') - 10);
- 'A'..'F': Value := Value * 16 + Ord(FCurChar) - (Ord('A') - 10);
- else
- Break;
- end;
- GetCharRaw;
- until False
- else
- repeat
- case FCurChar of
- '0'..'9': Value := Value * 10 + Ord(FCurChar) - Ord('0');
- else
- Break;
- end;
- GetCharRaw;
- until False;
- ExpectChar(';');
- case Value of
- $01..$08, $0B..$0C, $0E..$1F:
- if FXML11 then
- BufAppend(FValue, WideChar(Value))
- else
- RaiseExc('Invalid character reference');
- $09, $0A, $0D, $20..$D7FF, $E000..$FFFD:
- BufAppend(FValue, WideChar(Value));
- $10000..$10FFFF:
- begin
- BufAppend(FValue, WideChar($D7C0 + (Value shr 10)));
- BufAppend(FValue, WideChar($DC00 xor (Value and $3FF)));
- end;
- else
- RaiseExc('Invalid character reference');
- end;
- end;
- end;
- procedure TXMLReader.DoParseAttValue(Delim: WideChar);
- var
- RefNode: TDOMEntityEx;
- begin
- FValue.Length := 0;
- while (FCurChar <> Delim) and (FCurChar <> #0) do
- begin
- if FCurChar = '<' then
- RaiseExc('Literal "<" in attribute value')
- else if FCurChar <> '&' then
- begin
- if FWhitespace then
- FCurChar := #32;
- BufAppend(FValue, FCurChar);
- GetCharRaw;
- end
- else
- begin
- GetCharRaw; // skip '&'
- if ParseCharRef then
- Continue;
- RefNode := ParseReference;
- if Assigned(RefNode) then
- begin
- if FValue.Length > 0 then
- begin
- FCursor.AppendChild(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length));
- FValue.Length := 0;
- end;
- if RefNode.SystemID <> '' then
- RaiseExc('External entity reference is not allowed in attribute value');
- IncludeEntity(RefNode, True);
- end;
- end;
- end; // while
- if FValue.Length > 0 then
- begin
- FCursor.AppendChild(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length));
- FValue.Length := 0;
- end;
- end;
- procedure TXMLReader.DoParseFragment;
- begin
- ParseContent;
- if FCurChar <> #0 then
- RaiseExc('Closing tag not allowed here');
- end;
- function TXMLReader.ContextPush(AEntity: TDOMEntityEx): Boolean;
- var
- Src: TXMLInputSource;
- begin
- if AEntity.SystemID <> '' then
- begin
- Result := ResolveEntity(AEntity.SystemID, AEntity.PublicID, Src);
- if not Result then
- Exit;
- {
- TODO: need different handling of TextDecl in external PEs
- it cannot be parsed if PE is referenced INSIDE declaration
- But - is such case ever met in the wild ?? E.g. MSXML fails such things...
- }
- FAllowedDecl := dtText;
- end
- else
- Src := TXMLInputSource.Create(AEntity.FReplacementText);
- AEntity.FOnStack := True;
- Src.FEntity := AEntity;
- Src.FParent := FSource;
- Src.FCursor := FCursor;
- Unget(FCurChar); // remember FCurChar in previous context
- Inc(FEntityLevel);
- Initialize(Src);
- Result := True;
- end;
- function TXMLReader.ContextPop: Boolean;
- var
- Src: TXMLInputSource;
- begin
- Result := Assigned(FSource.FParent);
- if Result then
- begin
- Src := FSource.FParent;
- if Assigned(FSource.FEntity) then
- TDOMEntityEx(FSource.FEntity).FOnStack := False;
- FCursor := TDOMNode(FSource.FCursor);
- FSource.Free;
- FSource := Src;
- Dec(FEntityLevel);
- GetChar; // re-classify - case of "%pe1;%pe2;"
- end;
- end;
- procedure TXMLReader.IncludeEntity(AEntity: TDOMEntityEx; InAttr: Boolean);
- var
- Node, Child: TDOMNode;
- begin
- if not AEntity.FResolved then
- begin
- if AEntity.FOnStack then
- RaiseExc('Entity ''%s'' recursively references itself', [AEntity.NodeName]);
- if ContextPush(AEntity) then
- begin
- GetCharRaw;
- CheckForChar(#$FEFF);
- FCursor := AEntity; // build child node tree for the entity
- try
- if InAttr then
- DoParseAttValue(#0)
- else
- DoParseFragment;
- AEntity.FResolved := True;
- finally
- ContextPop; // FCursor restored
- FValue.Length := 0;
- end;
- end;
- end;
- Node := FCursor;
- if FCreateEntityRefs or (not AEntity.FResolved) then
- begin
- Node := doc.CreateEntityReference(AEntity.NodeName);
- FCursor.AppendChild(Node);
- end;
- Child := AEntity.FirstChild; // clone the entity node tree
- while Assigned(Child) do
- begin
- Node.AppendChild(Child.CloneNode(True));
- Child := Child.NextSibling;
- end;
- end;
- procedure TXMLReader.StartPE;
- var
- PEName: WideString;
- PEnt: TDOMEntityEx;
- begin
- SetString(PEName, FName.Buffer, FName.Length);
- PEnt := FDocType.PEMap.GetNamedItem(PEName) as TDOMEntityEx;
- if PEnt = nil then // TODO -cVC: Referencing undefined PE
- begin // (These are classified as 'optional errors'...)
- // ValidationError('Undefined parameter entity referenced: %s', [PEName]);
- Exit;
- end;
- if PEnt.FOnStack then
- RaiseExc('Entity ''%%%s'' recursively references itself', [PEnt.NodeName]);
- ContextPush(PEnt);
- end;
- function TXMLReader.ParseReference: TDOMEntityEx;
- var
- RefName: WideString;
- Predef: WideChar;
- begin
- Result := nil;
- RefName := ExpectName;
- ExpectChar(';');
- Predef := ResolvePredefined(RefName);
- if Predef <> #0 then
- BufAppend(FValue, Predef)
- else
- begin
- if Assigned(FDocType) then
- Result := FDocType.Entities.GetNamedItem(RefName) as TDOMEntityEx;
- if Result = nil then
- begin
- if FStandalone or (FDocType = nil) or not (FDocType.HasPERefs or (FDocType.SystemID <> '')) then
- RaiseExc('Undefined entity ''%s'' referenced', [RefName])
- else
- ValidationError('Undefined entity ''%s'' referenced', [RefName]);
- end
- else
- begin
- if FStandalone and (not Result.FInternal) then
- RaiseExc('Standalone constraint violation');
- if Result.NotationName <> '' then
- RaiseExc('Reference to unparsed entity ''%s''', [RefName]);
- end;
- end;
- end;
- procedure TXMLReader.ProcessTextAndRefs;
- var
- nonWs: Boolean;
- last: WideChar;
- RefNode: TDOMEntityEx;
- begin
- FValue.Length := 0;
- nonWs := False;
- FAllowedDecl := dtNone;
- while (FCurChar <> '<') and (FCurChar <> #0) do
- begin
- if FCurChar <> '&' then
- begin
- if not FWhitespace then
- nonWs := True;
- BufAppend(FValue, FCurChar);
- if FCurChar = '>' then
- with FValue do
- if (Length >= 3) and
- (Buffer[Length-2] = ']') and (Buffer[Length-3] = ']') then
- RaiseExc('Literal '']]>'' is not allowed in text');
- GetCharRaw;
- end
- else
- begin
- GetCharRaw; // skip '&'
- if ParseCharRef then
- begin
- last := FValue.Buffer[FValue.Length-1];
- if (last <> #9) and (last <> #10) and (last <> #13) and (last <> #32) then
- nonWs := True;
- Continue;
- end;
- nonWs := True;
- RefNode := ParseReference;
- if Assigned(RefNode) then
- begin
- if (nonWs or FPreserveWhitespace) and (FValue.Length > 0) then
- begin
- FCursor.AppendChild(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length));
- FValue.Length := 0;
- nonWs := False;
- end;
- IncludeEntity(RefNode, False);
- end;
- end;
- end; // while
- if (nonWs or FPreserveWhitespace) and (FValue.Length > 0) then
- begin
- FCursor.AppendChild(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length));
- FValue.Length := 0;
- end;
- end;
- procedure TXMLReader.ExpectAttValue; // [10]
- var
- Delim: WideChar;
- begin
- if (FCurChar <> '''') and (FCurChar <> '"') then
- RaiseExpectedQmark;
- Delim := FCurChar;
- GetCharRaw; // skip quote
- DoParseAttValue(Delim);
- GetChar; // NOTE: not GetCharRaw - when parsing AttDef in DTD,
- // immediately following PERef must be recognized
- end;
- function TXMLReader.SkipQuotedLiteral: Boolean;
- var
- Delim: WideChar;
- begin
- Result := (FCurChar = '''') or (FCurChar = '"');
- if Result then
- begin
- Delim := FCurChar;
- GetCharRaw; // skip quote
- FValue.Length := 0;
- while (FCurChar <> Delim) and (FCurChar <> #0) do
- begin
- BufAppend(FValue, FCurChar);
- GetCharRaw;
- end;
- ExpectChar(Delim); // <-- to check the EOF only
- end;
- end;
- procedure TXMLReader.SkipPubidLiteral; // [12]
- var
- I: Integer;
- begin
- if SkipQuotedLiteral then
- begin
- for I := 0 to FValue.Length-1 do
- if (FValue.Buffer[I] > #255) or not (Char(FValue.Buffer[I]) in PubidChars) then
- RaiseExc('Illegal Public ID literal')
- end
- else
- RaiseExpectedQMark;
- end;
- procedure TXMLReader.SkipSystemLiteral(out Literal: WideString; Required: Boolean);
- begin
- if SkipQuotedLiteral then
- SetString(Literal, FValue.Buffer, FValue.Length)
- else if Required then
- RaiseExpectedQMark;
- end;
- procedure TXMLReader.ParseComment; // [15]
- begin
- ExpectString('--');
- FValue.Length := 0;
- repeat
- BufAppend(FValue, FCurChar);
- GetCharRaw;
- with FValue do
- if (Length >= 2) and (Buffer[Length-1] = '-') and
- (Buffer[Length-2] = '-') then
- begin
- Dec(Length, 2);
- if Assigned(FCursor) then
- FCursor.AppendChild(doc.CreateCommentBuf(Buffer, Length));
- ExpectChar('>');
- Exit;
- end;
- until FCurChar = #0;
- RaiseExc('Unterminated comment');
- end;
- procedure TXMLReader.ParsePI; // [16]
- var
- Name, Value: WideString;
- begin
- GetCharRaw; // skip '?'
- Name := ExpectName;
- with FName do
- if (Length = 3) and
- ((Buffer[0] = 'X') or (Buffer[0] = 'x')) and
- ((Buffer[1] = 'M') or (Buffer[1] = 'm')) and
- ((Buffer[2] = 'L') or (Buffer[2] = 'l')) then
- begin
- if Name <> 'xml' then
- RaiseExc('''xml'' is a reserved word; it must be lowercase');
- if FAllowedDecl <> dtNone then
- begin
- ParseXmlOrTextDecl(FAllowedDecl = dtText);
- FAllowedDecl := dtNone;
- Exit;
- end
- else
- RaiseExc('XML declaration not allowed here');
- end;
- if FCurChar <> '?' then
- ExpectWhitespace;
- FAllowedDecl := dtNone;
- FValue.Length := 0;
- repeat
- BufAppend(FValue, FCurChar);
- GetCharRaw;
- with FValue do
- if (Length >= 2) and (Buffer[Length-1] = '>') and
- (Buffer[Length-2] = '?') then
- begin
- Dec(Length, 2);
- SetString(Value, Buffer, Length);
- if Assigned(FCursor) then
- FCursor.AppendChild(Doc.CreateProcessingInstruction(Name, Value));
- Exit;
- end;
- until FCurChar = #0;
- RaiseExc('Unterminated processing instruction');
- end;
- // here we come from ParsePI, 'xml' is already consumed
- procedure TXMLReader.ParseXmlOrTextDecl(TextDecl: Boolean);
- var
- TmpStr: WideString;
- IsXML11: Boolean;
- begin
- ExpectWhitespace;
- // VersionInfo: optional in TextDecl, required in XmlDecl
- if (not TextDecl) or (FCurChar = 'v') then
- begin
- ExpectString('version'); // [24]
- ExpectEq;
- SkipSystemLiteral(TmpStr, True);
- IsXML11 := False;
- if TmpStr = '1.1' then // Checking for bad chars is implied
- IsXML11 := True
- else if TmpStr <> '1.0' then
- RaiseExc('Illegal version number');
- if not TextDecl then
- begin
- if doc.InheritsFrom(TXMLDocument) then
- TXMLDocument(doc).XMLVersion := TmpStr;
- if IsXML11 then
- XML11_BuildTables;
- end
- else // parsing external entity
- if IsXML11 and not FXML11 then
- RaiseExc('XML 1.0 document cannot invoke XML 1.1 entities');
- if FCurChar <> '?' then
- ExpectWhitespace;
- end;
- // EncodingDecl: required in TextDecl, optional in XmlDecl
- if TextDecl or (FCurChar = 'e') then // [80]
- begin
- ExpectString('encoding');
- ExpectEq;
- SkipSystemLiteral(TmpStr, True);
- if not IsValidEncName(TmpStr) then
- RaiseExc('Illegal encoding name');
- FSource.SetEncoding(TmpStr); // <-- Wide2Ansi conversion here
- // getting here means that specified encoding is supported
- // TODO: maybe assign the 'preferred' encoding name?
- if not TextDecl and doc.InheritsFrom(TXMLDocument) then
- TXMLDocument(doc).Encoding := TmpStr;
- if FCurChar <> '?' then
- ExpectWhitespace;
- end;
- // SDDecl: forbidden in TextDecl, optional in XmlDecl
- if (not TextDecl) and (FCurChar = 's') then
- begin
- ExpectString('standalone');
- ExpectEq;
- SkipSystemLiteral(TmpStr, True);
- if TmpStr = 'yes' then
- FStandalone := True
- else if TmpStr <> 'no' then
- RaiseExc('Only "yes" or "no" are permitted as values of "standalone"');
- SkipWhitespace;
- end;
- ExpectString('?>');
- end;
- procedure TXMLReader.ParseDoctypeDecl; // [28]
- var
- IntSubset: TWideCharBuf;
- Src, OldSrc: TXMLInputSource;
- begin
- FAllowedDecl := dtNone;
- if FDtdParsed then
- RaiseExc('Markup declaration not allowed here');
- ExpectString('DOCTYPE'); // gives possibly incorrect error message
- ExpectWhitespace;
- FDocType := TDOMDocumentTypeEx.Create(doc);
- FDtdParsed := True;
- { To comply with certain output tests, we must insert PIs coming from internal
- subset before DocType node. This looks very synthetic, but let it be...
- Moreover, this code actually duplicates such PIs }
- try
- FDocType.FName := ExpectName;
- ExpectWhitespace;
- ParseExternalID(FDocType.FSystemID, FDocType.FPublicID, False);
- SkipWhitespace;
- if FCurChar = '[' then
- begin
- BufAllocate(IntSubset, 256);
- FCopyBuf := @IntSubset;
- GetChar; // cause very first char after '[' to be appended
- try
- FIntSubset := True;
- ParseMarkupDecl;
- if IntSubset.Length > 0 then // sanity check - must at least contain ']'
- SetString(FDocType.FInternalSubset, IntSubset.Buffer, IntSubset.Length-1);
- ExpectChar(']');
- finally
- FIntSubset := False;
- FCopyBuf := nil;
- FreeMem(IntSubset.Buffer);
- end;
- SkipWhitespace;
- end;
- ExpectChar('>');
- if FDocType.SystemID <> '' then
- begin
- if ResolveEntity(FDocType.SystemID, FDocType.PublicID, Src) then
- begin
- OldSrc := FSource;
- Unget(FCurChar);
- FCursor := nil;
- try
- DoParseExtSubset(Src);
- finally
- while ContextPop do; // Cleanup after possible exceptions
- FSource.Free;
- FSource := OldSrc;
- GetChar;
- FCursor := Doc;
- end;
- end;
- end;
- finally
- doc.AppendChild(FDocType);
- end;
- end;
- procedure TXMLReader.ParseMisc;
- begin
- repeat
- if SkipWhitespace then
- FAllowedDecl := dtNone;
- if not CheckForChar('<') then
- Break;
- if CheckForChar('!') then
- begin
- FAllowedDecl := dtNone;
- if FCurChar = '-' then
- ParseComment
- else
- ParseDoctypeDecl;
- end
- else
- if FCurChar = '?' then
- ParsePI
- else
- Break;
- until FCurChar = #0;
- FAllowedDecl := dtNone;
- end;
- function TXMLReader.ParseEq: Boolean; // [25]
- begin
- while FWhitespace do GetCharRaw;
- Result := FCurChar = '=';
- if Result then
- begin
- GetCharRaw;
- while FWhitespace do GetCharRaw;
- end;
- end;
- procedure TXMLReader.ExpectEq;
- begin
- if not ParseEq then
- RaiseExc('Expected "="');
- end;
- { DTD stuff }
- procedure TXMLReader.AssertPENesting(CurrentLevel: Integer);
- begin
- if CurrentLevel <> FEntityLevel then
- ValidationError('Parameter entities must be properly nested', []);
- end;
- // content model
- type
- TElementContentType = (
- ctEmpty,
- ctAny,
- ctMixed,
- ctName,
- ctChoice,
- ctSeq
- );
- TElementContentQuant = (
- cqNone,
- cqOpt,
- cqReq,
- cqPlus
- );
- {
- TElementContent = record
- ContentType: TElementContentType;
- ContentQuant: TElementContentQuant;
- Name: WideString;
- Children: array of TElementContent;
- end;
- }
- procedure TXMLReader.ExpectChoiceOrSeq(); // [49], [50]
- var
- Delim: WideChar;
- PELevel: Integer;
- begin
- Delim := #0;
- repeat
- SkipWhitespace;
- if FCurChar = '(' then
- begin
- PELevel := FEntityLevel;
- GetChar;
- ExpectChoiceOrSeq;
- AssertPENesting(PELevel);
- GetChar;
- end
- else
- SkipName;
- if CheckForChar('?') then
- else if CheckForChar('*') then
- else if CheckForChar('+') then;
- SkipWhitespace;
- if FCurChar = ')' then
- Break;
- if Delim = #0 then
- begin
- if (FCurChar = '|') or (FCurChar = ',') then
- Delim := FCurChar
- else
- RaiseExc('Expected "|" or ","');
- end
- else
- if FCurChar <> Delim then
- RaiseExc(Delim);
- GetChar; // skip delimiter
- until False;
- end;
- procedure TXMLReader.ParseMixedOrChildren;
- var
- PELevel: Integer;
- NeedAsterisk: Boolean;
- begin
- PELevel := FEntityLevel;
- GetChar; // starting bracket
- SkipWhitespace;
- if CheckForChar('#') then // Mixed section [51]
- begin
- ExpectString('PCDATA');
- SkipWhitespace;
- NeedAsterisk := False;
- while FCurChar <> ')' do
- begin
- ExpectChar('|');
- NeedAsterisk := True;
- SkipWhitespace;
- SkipName;
- SkipWhitespace;
- end;
- AssertPENesting(PELevel);
- GetChar;
- if NeedAsterisk then
- ExpectChar('*')
- else
- CheckForChar('*');
- end
- else // Parse Children section [47]
- begin
- ExpectChoiceOrSeq;
- AssertPENesting(PELevel);
- GetChar;
- if CheckForChar('?') then
- else if CheckForChar('*') then
- else if CheckForChar('+') then;
- end;
- end;
- procedure TXMLReader.ParseElementDecl; // [45]
- begin
- SkipName;
- ExpectWhitespace;
- // Get contentspec [46]
- if FCurChar = 'E' then
- ExpectString('EMPTY')
- else if FCurChar = 'A' then
- ExpectString('ANY')
- else if FCurChar = '(' then
- ParseMixedOrChildren
- else
- RaiseExc('Invalid content specification');
- end;
- procedure TXMLReader.ParseNotationDecl; // [82]
- var
- Notation: TDOMNotationEx;
- begin
- Notation := TDOMNotationEx(TDOMNotation.Create(Doc));
- try
- Notation.FName := ExpectName;
- ExpectWhitespace;
- if not ParseExternalID(Notation.FSystemID, Notation.FPublicID, True) then
- RaiseExc('Expected external or public ID');
- except
- Notation.Free;
- raise;
- end;
- if FDocType.Notations.GetNamedItem(Notation.FName) = nil then
- FDocType.Notations.SetNamedItem(Notation)
- else
- begin
- ValidationError('Duplicate notation declaration: %s', [Notation.FName]);
- Notation.Free;
- end;
- end;
- procedure TXMLReader.ParseAttlistDecl; // [52]
- var
- SaveCurNode: TDOMNode;
- ValueRequired: Boolean;
- Token: WideString;
- ElDef: TDOMElementDef;
- AttDef: TDOMAttrDef;
- begin
- Token := ExpectName;
- ElDef := TDOMElementDef(FDocType.ElementDefs.GetNamedItem(Token));
- if ElDef = nil then
- begin
- // TODO -cVC: must distinguish ElementDef created here from one explicitly declared
- ElDef := TDOMElementDef.Create(doc);
- ElDef.FNodeName := Token;
- FDocType.ElementDefs.SetNamedItem(ElDef);
- end;
- SkipWhitespace;
- while FCurChar <> '>' do
- begin
- SkipWhitespace; { !!! }
- AttDef := TDOMAttrDef.Create(doc);
- try
- AttDef.FName := ExpectName;
- ExpectWhitespace;
- Token := GetString(['A'..'Z']); // Get AttType [54], [55], [56]
- if Token = 'CDATA' then
- AttDef.FDataType := DT_CDATA
- else if Token = 'ID' then
- AttDef.FDataType := DT_ID
- else if Token = 'IDREF' then
- AttDef.FDataType := DT_IDREF
- else if Token = 'IDREFS' then
- AttDef.FDataType := DT_IDREFS
- else if Token = 'ENTITY' then
- AttDef.FDataType := DT_ENTITY
- else if Token = 'ENTITIES' then
- AttDef.FDataType := DT_ENTITIES
- else if Token = 'NMTOKEN' then
- AttDef.FDataType := DT_NMTOKEN
- else if Token = 'NMTOKENS' then
- AttDef.FDataType := DT_NMTOKENS
- else if Token = 'NOTATION' then // [57], [58]
- begin
- AttDef.FDataType := DT_NOTATION;
- ExpectWhitespace;
- ExpectChar('(');
- repeat
- SkipWhitespace;
- SkipName;
- SkipWhitespace;
- until not CheckForChar('|');
- ExpectChar(')');
- end
- else
- if CheckForChar('(') then // [59]
- begin
- AttDef.FDataType := DT_NMTOKEN;
- repeat
- SkipWhitespace;
- if not CheckNmToken then
- RaiseNameNotFound; // not completely correct error message
- SkipWhitespace;
- until not CheckForChar('|');
- ExpectChar(')');
- end else
- RaiseExc('Invalid tokenized type');
- ExpectWhitespace;
- // Get DefaultDecl [60]
- ValueRequired := False;
- if CheckForChar('#') then
- begin
- Token := GetString(['A'..'Z']);
- if Token = 'REQUIRED' then
- AttDef.FDefault := AD_REQUIRED
- else if Token = 'IMPLIED' then
- AttDef.FDefault := AD_IMPLIED
- else if Token = 'FIXED' then
- begin
- AttDef.FDefault := AD_FIXED;
- ExpectWhitespace;
- ValueRequired := True;
- end
- else
- RaiseExc('Illegal attribute default');
- end
- else
- begin
- AttDef.FDefault := AD_DEFAULT;
- ValueRequired := True;
- end;
- if ValueRequired then
- begin
- SaveCurNode := FCursor;
- FCursor := AttDef;
- // tricky moment, no tests for that
- { FRecognizePE := False; } // TODO: shall it really be disabled?
- try
- ExpectAttValue;
- finally
- FCursor := SaveCurNode;
- { FRecognizePE := not FIntSubset; }
- end;
- if AttDef.FDataType = DT_ID then
- ValidationError('Attributes of type ID must not have a default value',[]);
- end;
- // First declaration is binding, subsequent should be ignored
- if Assigned(ElDef.GetAttributeNode(AttDef.Name)) then
- AttDef.Free
- else
- ElDef.SetAttributeNode(AttDef);
- except
- AttDef.Free;
- raise;
- end;
- SkipWhitespace;
- end;
- end;
- procedure TXMLReader.ParseEntityDeclValue(Delim: WideChar); // [9]
- var
- I: Integer;
- Src: TXMLInputSource;
- begin
- Src := FSource;
- // "Included in literal": process until delimiter hit IN SAME context
- while not ((FSource = Src) and CheckForChar(Delim)) do
- if ParsePEReference then
- begin
- if FIntSubset and (FSource.FParent = nil) then
- RaiseExc('PE references in internal subset not allowed inside declarations');
- StartPE;
- GetCharRaw;
- end
- else if FCurChar = '&' then // CharRefs: include, EntityRefs: bypass
- begin
- GetCharRaw;
- if not ParseCharRef then
- begin
- BufAppend(FValue, '&');
- ExpectName;
- ExpectChar(';');
- for I := 0 to FName.Length-1 do
- BufAppend(FValue, FName.Buffer[I]);
- BufAppend(FValue, ';');
- end;
- end
- else if FCurChar <> #0 then // Regular character
- begin
- BufAppend(FValue, FCurChar);
- GetCharRaw;
- end
- else if not ContextPop then // #0
- Break;
- end;
- procedure TXMLReader.ParseEntityDecl; // [70]
- var
- NDataAllowed: Boolean;
- Delim: WideChar;
- Entity: TDOMEntityEx;
- Map: TDOMNamedNodeMap;
- begin
- NDataAllowed := True;
- Map := FDocType.Entities;
- if CheckForChar('%') then // [72]
- begin
- ExpectWhitespace;
- NDataAllowed := False;
- Map := FDocType.PEMap;
- end;
- Entity := TDOMEntityEx.Create(Doc);
- try
- Entity.FInternal := FIntSubset and (FSource.FParent = nil);
- Entity.FName := ExpectName;
- ExpectWhitespace;
- if (FCurChar = '"') or (FCurChar = '''') then
- begin
- NDataAllowed := False;
- Delim := FCurChar;
- FRecognizePE := False; // PERef right after delimiter should not be recognized
- GetCharRaw; // at char level - we process it 'manually'
- FValue.Length := 0;
- ParseEntityDeclValue(Delim);
- FRecognizePE := not FIntSubset;
- SetString(Entity.FReplacementText, FValue.Buffer, FValue.Length);
- end
- else
- if not ParseExternalID(Entity.FSystemID, Entity.FPublicID, False) then
- RaiseExc('Expected entity value or external ID');
- if NDataAllowed then // [76]
- begin
- if FCurChar <> '>' then
- ExpectWhitespace;
- if FCurChar = 'N' then
- begin
- ExpectString('NDATA');
- ExpectWhitespace;
- SkipName;
- // TODO -cVC: Notation declared. Here or after all has been read?
- SetString(Entity.FNotationName, FName.Buffer, FName.Length);
- if FDocType.Notations.GetNamedItem(Entity.NotationName) = nil then
- ValidationError('Reference to undeclared notation ''%s''', [Entity.NotationName]);
- end;
- end;
- except
- Entity.Free;
- raise;
- end;
- // Repeated declarations of same entity are legal but must be ignored
- if Map.GetNamedItem(Entity.NodeName) = nil then
- Map.SetNamedItem(Entity)
- else
- Entity.Free;
- end;
- procedure TXMLReader.ParseMarkupDecl; // [29]
- var
- Token: WideString;
- IncludeLevel: Integer;
- IgnoreLevel: Integer;
- PELevel: Integer;
- begin
- IncludeLevel := 0;
- IgnoreLevel := 0;
- repeat
- if SkipWhitespace then
- FAllowedDecl := dtNone;
- if ParsePEReference then // PERef between declarations should always be recognized
- begin
- FAllowedDecl := dtNone;
- if Assigned(FDocType) then
- FDocType.HasPERefs := True;
- StartPE;
- GetChar;
- Continue;
- end;
- if (FCurChar = #0) and ContextPop then
- Continue;
- if (FCurChar = ']') and (IncludeLevel > 0) then
- begin
- ExpectString(']]>');
- Dec(IncludeLevel);
- Continue;
- end;
- if FCurChar <> '<' then
- Break;
- PELevel := FEntityLevel;
- GetCharRaw;
- if CheckForChar('!') then
- begin
- FAllowedDecl := dtNone;
- if FCurChar = '-' then
- ParseComment
- else if FCurChar = '[' then
- begin
- if FIntSubset and (FSource.FParent = nil) then
- RaiseExc('Conditional sections not allowed in internal subset');
- FRecognizePE := not FIntSubset;
- GetChar; // skip '['
- SkipWhitespace;
- Token := GetString(['A'..'Z']);
- SkipWhitespace;
- if Token = 'INCLUDE' then
- Inc(IncludeLevel)
- else if Token = 'IGNORE' then
- IgnoreLevel := 1
- else
- RaiseExc('Expected "INCLUDE" or "IGNORE"');
- AssertPENesting(PELevel);
- ExpectChar('[');
- if IgnoreLevel > 0 then
- repeat
- FRecognizePE := False; // PEs not recognized in IGNORE section
- if CheckForChar('<') and CheckForChar('!') and CheckForChar('[') then
- Inc(IgnoreLevel)
- else if CheckForChar(']') and CheckForChar(']') and CheckForChar('>') then
- Dec(IgnoreLevel)
- else GetChar;
- until (IgnoreLevel=0) or (FCurChar = #0);
- end
- else
- begin
- FRecognizePE := not FIntSubset;
- Token := GetString(['A'..'Z']);
- ExpectWhitespace;
- if Token = 'ELEMENT' then
- ParseElementDecl
- else if Token = 'ENTITY' then
- ParseEntityDecl
- else if Token = 'ATTLIST' then
- ParseAttlistDecl
- else if Token = 'NOTATION' then
- ParseNotationDecl
- else
- RaiseExc('Illegal markup declaration');
- SkipWhitespace;
- FRecognizePE := False; // ! Don't auto-pop context on last markup delimiter
- ExpectChar('>'); // This enables correct nesting check
- end;
- {
- MarkupDecl starting in PE and ending in root is a WFC [28a]
- MarkupDecl starting in root but ending in PE is a VC (erratum 2e-14)
- }
- if PELevel > FEntityLevel then
- RaiseExc('Parameter entities must be properly nested')
- else
- AssertPENesting(PELevel);
- end
- else if FCurChar = '?' then
- ParsePI;
- until False;
- FRecognizePE := False;
- if (IncludeLevel > 0) or (IgnoreLevel > 0) then
- RaiseExc('Conditional section not closed');
- end;
- procedure TXMLReader.DoParseExtSubset(ASource: TXMLInputSource);
- begin
- InitializeRoot(ASource);
- FAllowedDecl := dtText;
- ParseMarkupDecl;
- if FCurChar <> #0 then
- RaiseExc('Illegal character in DTD');
- end;
- procedure TXMLReader.ProcessDTD(ASource: TXMLInputSource);
- begin
- doc := TXMLDocument.Create;
- FDocType := TDOMDocumentTypeEx.Create(doc);
- // TODO: DTD labeled version 1.1 will be rejected - must set FXML11 flag
- // TODO: what shall be FCursor? FDocType cannot - it does not accept child nodes
- doc.AppendChild(FDocType);
- DoParseExtSubset(ASource);
- end;
- procedure TXMLReader.ParseCDSect; // [18]
- var
- name: WideString;
- begin
- ExpectString('[CDATA[');
- FValue.Length := 0;
- repeat
- BufAppend(FValue, FCurChar);
- GetCharRaw;
- with FValue do
- if (Length >= 3) and (Buffer[Length-1] = '>') and
- (Buffer[Length-2] = ']') and (Buffer[Length-3] = ']') then
- begin
- Dec(Length, 3);
- SetString(name, Buffer, Length);
- FCursor.AppendChild(doc.CreateCDATASection(name));
- Exit;
- end;
- until FCurChar = #0;
- RaiseExc('Unterminated CDATA section');
- end;
- procedure TXMLReader.ParseContent;
- begin
- repeat
- if FCurChar = '<' then
- begin
- GetCharRaw;
- if CheckName then
- ParseElement
- else if FCurChar = '!' then
- begin
- GetCharRaw;
- FAllowedDecl := dtNone;
- if FCurChar = '[' then
- ParseCDSect
- else if FCurChar = '-' then
- ParseComment
- else
- ParseDoctypeDecl; // actually will raise error
- end
- else if FCurChar = '?' then
- ParsePI
- else
- Exit;
- end
- else
- ProcessTextAndRefs;
- until FCurChar = #0;
- end;
- // Element name already in FNameBuffer
- procedure TXMLReader.ParseElement; // [39] [40] [44]
- var
- NewElem: TDOMElement;
- IsEmpty: Boolean;
- attr, OldAttr: TDOMNode;
- begin
- NewElem := doc.CreateElementBuf(FName.Buffer, FName.Length);
- FCursor.AppendChild(NewElem);
- Assert(NewElem.ParentNode = FCursor, 'AppendChild did not set ParentNode');
- FCursor := NewElem;
- IsEmpty := False;
- while FCurChar <> '>' do
- begin
- if FCurChar = '/' then
- begin
- GetCharRaw;
- IsEmpty := True;
- FCursor := FCursor.ParentNode;
- Break;
- end;
- // Get Attribute [41]
- ExpectWhitespace;
- if not CheckName then // allow stuff like <element >, <element />
- Continue;
- attr := doc.CreateAttributeBuf(FName.Buffer, FName.Length);
- // !!cannot use TDOMElement.SetAttributeNode because it will free old attribute
- OldAttr := NewElem.Attributes.SetNamedItem(Attr);
- if Assigned(OldAttr) then
- begin
- OldAttr.Free;
- RaiseExc('Duplicate attribute');
- end;
- ExpectEq;
- Assert(TDOMAttr(attr).OwnerElement = NewElem, 'DOMAttr.OwnerElement not set correctly');
- FCursor := attr;
- ExpectAttValue;
- FCursor := NewElem;
- end;
- ExpectChar('>');
- ProcessDefaultAttributes(NewElem);
- if not IsEmpty then
- begin
- if not FPreserveWhitespace then // critical for testsuite compliance
- SkipWhitespace;
- ParseContent;
- if FCurChar = '/' then // Get ETag [42]
- begin
- GetCharRaw;
- if ExpectName <> NewElem.TagName then
- RaiseExc('Unmatching element end tag (expected "</%s>")', [NewElem.TagName]);
- SkipWhitespace;
- ExpectChar('>');
- FCursor := FCursor.ParentNode;
- end
- else if FCurChar <> #0 then
- RaiseNameNotFound
- else // End of stream in content
- RaiseExc('Document element not closed');
- end;
- end;
- procedure TXMLReader.ProcessDefaultAttributes(Element: TDOMElement);
- var
- I: Integer;
- ElDef: TDOMElementDef;
- AttDefs: TDOMNamedNodeMap;
- AttDef: TDOMAttrDef;
- Attr: TDOMAttrEx;
- Spec: Boolean;
- begin
- if Assigned(FDocType) then
- begin
- ElDef := TDOMElementDef(FDocType.ElementDefs.GetNamedItem(Element.TagName));
- if Assigned(ElDef) and ElDef.HasAttributes then
- begin
- AttDefs := ElDef.Attributes;
- for I := 0 to AttDefs.Length-1 do
- begin
- AttDef := AttDefs[I] as TDOMAttrDef;
- Spec := True;
- // no validity checking yet; just append default values
- Attr := TDOMAttrEx(Element.GetAttributeNode(AttDef.Name));
- if (AttDef.FDefault in [AD_DEFAULT, AD_FIXED]) and (Attr = nil) then
- begin
- Attr := TDOMAttrEx(AttDef.CloneNode(True));
- Element.SetAttributeNode(Attr);
- Spec := False;
- end;
- if Assigned(Attr) then
- begin
- Attr.FSpecified := Spec;
- Attr.FNormalize := (AttDef.FDataType <> DT_CDATA);
- end;
- end;
- end;
- end;
- end;
- function TXMLReader.ParsePEReference: Boolean; // [69]
- begin
- Result := CheckForChar('%');
- if Result then
- begin
- SkipName;
- ExpectChar(';');
- end;
- end;
- function TXMLReader.ParseExternalID(out SysID, PubID: WideString; // [75]
- SysIdOptional: Boolean): Boolean;
- begin
- if FCurChar = 'S' then
- begin
- ExpectString('SYSTEM');
- ExpectWhitespace;
- SkipSystemLiteral(SysID, True);
- Result := True;
- end
- else if FCurChar = 'P' then
- begin
- ExpectString('PUBLIC');
- ExpectWhitespace;
- SkipPubidLiteral;
- SetString(PubID, FValue.Buffer, FValue.Length);
- if SysIdOptional then
- begin
- SkipWhitespace;
- SkipSystemLiteral(SysID, False);
- end
- else
- begin
- ExpectWhitespace;
- SkipSystemLiteral(SysID, True);
- end;
- Result := True;
- end else
- Result := False;
- end;
- procedure TXMLReader.ValidationError(const Msg: string;
- const args: array of const);
- begin
- // TODO: just a stub now
- FInvalid := True;
- end;
- procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text);
- var
- Reader: TXMLReader;
- Src: TXMLInputSource;
- begin
- ADoc := nil;
- Src := TXMLFileInputSource.Create(f);
- Src.SystemID := FilenameToURI(TTextRec(f).Name);
- Reader := TXMLReader.Create;
- try
- Reader.ProcessXML(Src);
- ADoc := TXMLDocument(Reader.Doc);
- finally
- Reader.Free;
- end;
- end;
- procedure ReadXMLFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String);
- var
- Reader: TXMLReader;
- Src: TXMLInputSource;
- begin
- ADoc := nil;
- Reader := TXMLReader.Create;
- try
- Src := TXMLStreamInputSource.Create(f, False);
- Src.SystemID := ABaseURI;
- Reader.ProcessXML(Src);
- finally
- ADoc := TXMLDocument(Reader.doc);
- Reader.Free;
- end;
- end;
- procedure ReadXMLFile(out ADoc: TXMLDocument; var f: TStream);
- begin
- ReadXMLFile(ADoc, f, 'stream:');
- end;
- procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String);
- var
- FileStream: TStream;
- begin
- ADoc := nil;
- FileStream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
- try
- ReadXMLFile(ADoc, FileStream, FilenameToURI(AFilename));
- finally
- FileStream.Free;
- end;
- end;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text);
- var
- Reader: TXMLReader;
- Src: TXMLInputSource;
- begin
- Reader := TXMLReader.Create;
- try
- Src := TXMLFileInputSource.Create(f);
- Src.SystemID := FilenameToURI(TTextRec(f).Name);
- Reader.ProcessFragment(Src, AParentNode);
- finally
- Reader.Free;
- end;
- end;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream; const ABaseURI: String);
- var
- Reader: TXMLReader;
- Src: TXMLInputSource;
- begin
- Reader := TXMLReader.Create;
- try
- Src := TXMLStreamInputSource.Create(f, False);
- Src.SystemID := ABaseURI;
- Reader.ProcessFragment(Src, AParentNode);
- finally
- Reader.Free;
- end;
- end;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream);
- begin
- ReadXMLFragment(AParentNode, f, 'stream:');
- end;
- procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String);
- var
- Stream: TStream;
- begin
- Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
- try
- ReadXMLFragment(AParentNode, Stream, FilenameToURI(AFilename));
- finally
- Stream.Free;
- end;
- end;
- procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text);
- var
- Reader: TXMLReader;
- Src: TXMLInputSource;
- begin
- ADoc := nil;
- Reader := TXMLReader.Create;
- try
- Src := TXMLFileInputSource.Create(f);
- Src.SystemID := FilenameToURI(TTextRec(f).Name);
- Reader.ProcessDTD(Src);
- ADoc := TXMLDocument(Reader.doc);
- finally
- Reader.Free;
- end;
- end;
- procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String);
- var
- Reader: TXMLReader;
- Src: TXMLInputSource;
- begin
- ADoc := nil;
- Reader := TXMLReader.Create;
- try
- Src := TXMLStreamInputSource.Create(f, False);
- Src.SystemID := ABaseURI;
- Reader.ProcessDTD(Src);
- ADoc := TXMLDocument(Reader.doc);
- finally
- Reader.Free;
- end;
- end;
- procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream);
- begin
- ReadDTDFile(ADoc, f, 'stream:');
- end;
- procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String);
- var
- Stream: TStream;
- begin
- ADoc := nil;
- Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
- try
- ReadDTDFile(ADoc, Stream, FilenameToURI(AFilename));
- finally
- Stream.Free;
- end;
- end;
- end.
|