| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576 |
- {
- This file is part of the Free Component Library
- XML reading routines.
- Copyright (c) 1999-2000 by Sebastian Guenther, [email protected]
- Modified in 2006 by Sergei Gorelkin, [email protected]
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- unit XMLRead;
- {$ifdef fpc}
- {$MODE objfpc}{$H+}
- {$endif}
- interface
- uses
- SysUtils, Classes, DOM;
- type
- EXMLReadError = class(Exception);
- procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String); overload;
- procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text); overload;
- procedure ReadXMLFile(out ADoc: TXMLDocument; var f: TStream); overload;
- procedure ReadXMLFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String); overload;
- procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String); overload;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text); overload;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream); overload;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream; const ABaseURI: String); overload;
- procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String); overload;
- procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text); overload;
- procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream); overload;
- procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String); overload;
- // =======================================================
- implementation
- uses
- UriParser;
- type
- TSetOfChar = set of Char;
- const
- Letter = ['A'..'Z', 'a'..'z'];
- Digit = ['0'..'9'];
- PubidChars: TSetOfChar = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
- '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
- '#', '@', '$', '_', '%'];
- type
- TDOMNotationEx = class(TDOMNotation);
- TDOMAttrEx = class(TDOMAttr);
- TXMLInputSource = class;
- TDOMElementDef = class;
- TDOMEntityEx = class(TDOMEntity)
- protected
- FInternal: Boolean;
- FResolved: Boolean;
- FOnStack: Boolean;
- FReplacementText: DOMString;
- end;
- // TODO: Do I need PEMap in DocType? Maybe move it to Reader itself?
- // (memory usage - they are not needed after parsing)
- TDOMDocumentTypeEx = class(TDOMDocumentType)
- private
- FHasPERefs: Boolean;
- FPEMap: TDOMNamedNodeMap;
- FElementDefs: TDOMNamedNodeMap;
- function GetPEMap: TDOMNamedNodeMap;
- function GetElementDefs: TDOMNamedNodeMap;
- protected
- property PEMap: TDOMNamedNodeMap read GetPEMap;
- property ElementDefs: TDOMNamedNodeMap read GetElementDefs;
- property HasPERefs: Boolean read FHasPERefs write FHasPERefs;
- public
- destructor Destroy; override;
- end;
- TXMLReader = class;
- TDecoder = class;
- TDecoderRef = class of TDecoder;
- TXMLInputSource = class
- private
- FBuf: PChar;
- FBufEnd: PChar;
- FEof: Boolean;
- FSurrogate: WideChar;
- FReader: TXMLReader;
- FParent: TXMLInputSource;
- FEntity: TObject; // weak reference
- FCursor: TObject; // weak reference
- FLine: Integer;
- FColumn: Integer;
- FSystemID: WideString;
- FPublicID: WideString;
- function GetSystemID: WideString;
- function GetPublicID: WideString;
- protected
- procedure FetchData; virtual;
- public
- constructor Create(const AData: WideString);
- function NextChar: WideChar; virtual;
- procedure Initialize; virtual;
- procedure SetEncoding(const AEncoding: string); virtual;
- property SystemID: WideString read GetSystemID write FSystemID;
- property PublicID: WideString read GetPublicID write FPublicID;
- end;
- TXMLDecodingSource = class(TXMLInputSource)
- private
- FDecoder: TDecoder;
- FSeenCR: Boolean;
- function InternalNextChar: WideChar;
- procedure DecodingError(const Msg: string); overload;
- procedure DecodingError(const Msg: string; const Args: array of const); overload;
- public
- destructor Destroy; override;
- function NextChar: WideChar; override;
- procedure SetEncoding(const AEncoding: string); override;
- procedure Initialize; override;
- end;
- TXMLStreamInputSource = class(TXMLDecodingSource)
- private
- FAllocated: PChar;
- FStream: TStream;
- FBufSize: Integer;
- FOwnStream: Boolean;
- public
- constructor Create(AStream: TStream; AOwnStream: Boolean);
- destructor Destroy; override;
- procedure FetchData; override;
- end;
- TXMLFileInputSource = class(TXMLDecodingSource)
- private
- FFile: ^Text;
- FString: string;
- public
- constructor Create(var AFile: Text);
- procedure FetchData; override;
- end;
- TDecoder = class
- private
- FSource: TXMLDecodingSource;
- public
- constructor Create(ASource: TXMLDecodingSource);
- function DecodeNext: WideChar; virtual; abstract;
- class function Supports(const AEncoding: string): Boolean; virtual; abstract;
- end;
- TISO8859_1Decoder = class(TDecoder)
- public
- function DecodeNext: WideChar; override;
- class function Supports(const AEncoding: string): Boolean; override;
- end;
- TUCS2Decoder = class(TDecoder)
- private
- FSwapEndian: Boolean;
- FEncoding: string;
- public
- function DecodeNext: WideChar; override;
- class function Supports(const AEncoding: string): Boolean; override;
- end;
- TUTF8Decoder = class(TDecoder)
- public
- function DecodeNext: WideChar; override;
- class function Supports(const AEncoding: string): Boolean; override;
- end;
- PWideCharBuf = ^TWideCharBuf;
- TWideCharBuf = record
- Buffer: PWideChar;
- Length: Integer;
- MaxLength: Integer;
- end;
- TEntityResolveEvent = procedure(const PubID, SysID: WideString; var Source: TXMLInputSource) of object;
- TDeclType = (dtNone, dtXml, dtText);
- TXMLReader = class
- private
- FSource: TXMLInputSource;
- FCurChar: WideChar;
- FWhitespace: Boolean;
- FXML11: Boolean;
- FValue: TWideCharBuf;
- FName: TWideCharBuf;
- FCopyBuf: PWideCharBuf;
- FIntSubset: Boolean;
- FAllowedDecl: TDeclType;
- FDtdParsed: Boolean;
- FRecognizePE: Boolean;
- FStandalone: Boolean; // property of Doc ?
- FInvalid: Boolean;
- // TODO: This array must be stored globally, not per instance
- FNamePages: PByteArray;
- FForbiddenAscii: TSetOfChar;
- FDocType: TDOMDocumentTypeEx; // a shortcut
- FEntityLevel: Integer;
- FPreserveWhitespace: Boolean;
- FCreateEntityRefs: Boolean;
- procedure RaiseExpectedQmark;
- procedure GetChar;
- procedure GetCharRaw;
- procedure Unget(wc: WideChar);
- procedure Initialize(ASource: TXMLInputSource);
- procedure InitializeRoot(ASource: TXMLInputSource);
- procedure DoParseAttValue(Delim: WideChar);
- procedure DoParseFragment;
- procedure DoParseExtSubset(ASource: TXMLInputSource);
- function ContextPush(AEntity: TDOMEntityEx): Boolean;
- function ContextPop: Boolean;
- procedure XML11_BuildTables;
- function XML11_CheckName: Boolean;
- protected
- FCursor: TDOMNode;
- procedure RaiseExc(const descr: String); overload;
- procedure RaiseExc(const descr: string; const args: array of const); overload;
- procedure RaiseExc(Expected: WideChar); overload;
- function SkipWhitespace: Boolean;
- procedure ExpectWhitespace;
- procedure ExpectString(const s: String);
- procedure ExpectChar(wc: WideChar);
- function CheckForChar(c: WideChar): Boolean;
- procedure SkipString(const ValidChars: TSetOfChar);
- function GetString(const ValidChars: TSetOfChar): WideString;
- procedure RaiseNameNotFound;
- function CheckName: Boolean;
- function CheckNmToken: Boolean;
- function ExpectName: WideString; // [5]
- procedure SkipName;
- function SkipQuotedLiteral: Boolean;
- procedure ExpectAttValue; // [10]
- procedure SkipPubidLiteral; // [12]
- procedure SkipSystemLiteral(out Literal: WideString; Required: Boolean);
- procedure ParseComment; // [15]
- procedure ParsePI; // [16]
- procedure ParseCDSect; // [18]
- procedure ParseXmlOrTextDecl(TextDecl: Boolean);
- function ParseEq: Boolean; // [25]
- procedure ExpectEq;
- procedure ParseMisc; // [27]
- procedure ParseDoctypeDecl; // [28]
- procedure ParseMarkupDecl; // [29]
- procedure ParseElement; // [39]
- procedure ParseContent; // [43]
- function ResolvePredefined(const RefName: WideString): WideChar;
- procedure IncludeEntity(AEntity: TDOMEntityEx; InAttr: Boolean);
- procedure StartPE;
- function ParseCharRef: Boolean; // [66]
- function ParseReference: TDOMEntityEx; // [67]
- function ParsePEReference: Boolean; // [69]
- function ParseExternalID(out SysID, PubID: WideString; // [75]
- SysIdOptional: Boolean): Boolean;
- procedure ProcessTextAndRefs;
- procedure AssertPENesting(CurrentLevel: Integer);
- procedure ParseEntityDecl;
- procedure ParseEntityDeclValue(Delim: WideChar);
- procedure ParseAttlistDecl;
- procedure ExpectChoiceOrSeq;
- procedure ParseMixedOrChildren;
- procedure ParseElementDecl;
- procedure ParseNotationDecl;
- function ResolveEntity(const SystemID, PublicID: WideString; out Source: TXMLInputSource): Boolean;
- procedure ProcessDefaultAttributes(Element: TDOMElement);
- procedure ValidationError(const Msg: string; const args: array of const);
- public
- doc: TDOMDocument;
- constructor Create;
- destructor Destroy; override;
- procedure ProcessXML(ASource: TXMLInputSource); // [1]
- procedure ProcessFragment(ASource: TXMLInputSource; AOwner: TDOMNode);
- procedure ProcessDTD(ASource: TXMLInputSource); // ([29])
- end;
- // AttDef/ElementDef support
- TAttrDataType = (
- DT_CDATA,
- DT_ID,
- DT_IDREF,
- DT_IDREFS,
- DT_ENTITY,
- DT_ENTITIES,
- DT_NMTOKEN,
- DT_NMTOKENS,
- DT_NOTATION
- );
- TAttrDefault = (
- AD_IMPLIED,
- AD_DEFAULT,
- AD_REQUIRED,
- AD_FIXED
- );
- TDOMAttrDef = class(TDOMAttr)
- protected
- FDataType: TAttrDataType;
- FDefault: TAttrDefault;
- // FEnumeration: TWideStringList? array of WideStrings?
- end;
- TDOMElementDef = class(TDOMElement);
-
- {$i names.inc}
- // TODO: List of registered/supported decoders
- function FindDecoder(const Encoding: string): TDecoderRef;
- begin
- if TISO8859_1Decoder.Supports(Encoding) then
- Result := TISO8859_1Decoder
- else
- Result := nil;
- end;
- procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
- begin
- ABuffer.MaxLength := ALength;
- ABuffer.Length := 0;
- GetMem(ABuffer.Buffer, ABuffer.MaxLength*SizeOf(WideChar));
- end;
- procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
- begin
- if ABuffer.Length >= ABuffer.MaxLength then
- begin
- ABuffer.MaxLength := ABuffer.MaxLength * 2;
- ReallocMem(ABuffer.Buffer, ABuffer.MaxLength * SizeOf(WideChar));
- end;
- ABuffer.Buffer[ABuffer.Length] := wc;
- Inc(ABuffer.Length);
- end;
- function IsValidEncName(const s: WideString): Boolean;
- var
- I: Integer;
- begin
- Result := False;
- if (s = '') or (s[1] > #255) or not (char(s[1]) in ['A'..'Z', 'a'..'z']) then
- Exit;
- for I := 2 to Length(s) do
- if (s[I] > #255) or not (char(s[I]) in ['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']) then
- Exit;
- Result := True;
- end;
- { TDOMDocumentTypeEx }
- destructor TDOMDocumentTypeEx.Destroy;
- begin
- FPEMap.Free;
- FElementDefs.Free;
- inherited Destroy;
- end;
- function TDOMDocumentTypeEx.GetElementDefs: TDOMNamedNodeMap;
- begin
- if FElementDefs = nil then
- FElementDefs := TDOMNamedNodeMap.Create(Self, ELEMENT_NODE);
- Result := FElementDefs;
- end;
- function TDOMDocumentTypeEx.GetPEMap: TDOMNamedNodeMap;
- begin
- if FPEMap = nil then
- FPEMap := TDOMNamedNodeMap.Create(Self, ENTITY_NODE);
- Result := FPEMap;
- end;
- // TODO: These classes still cannot be considered as the final solution...
- { TXMLInputSource }
- constructor TXMLInputSource.Create(const AData: WideString);
- begin
- inherited Create;
- FBuf := PChar(PWideChar(AData));
- FBufEnd := FBuf + Length(AData) * sizeof(WideChar);
- end;
- procedure TXMLInputSource.Initialize;
- begin
- FLine := 1;
- FColumn := 0;
- end;
- function TXMLInputSource.NextChar: WideChar;
- begin
- if FSurrogate <> #0 then
- begin
- Result := FSurrogate;
- FSurrogate := #0;
- end
- else if FBufEnd <= FBuf then
- begin
- Result := #0;
- FEof := True;
- end
- else
- begin
- Result := PWideChar(FBuf)^;
- Inc(FBuf, sizeof(WideChar));
- end;
- // TODO: Column counting - surrogate pair is a single char!
- if Result = #10 then
- begin
- Inc(FLine);
- FColumn := 0;
- end
- else
- Inc(FColumn);
- end;
- procedure TXMLDecodingSource.DecodingError(const Msg: string);
- begin
- FReader.RaiseExc(Msg);
- end;
- procedure TXMLDecodingSource.DecodingError(const Msg: string;
- const Args: array of const);
- begin
- FReader.RaiseExc(Msg, Args);
- end;
- procedure TXMLInputSource.FetchData;
- begin
- FEof := True;
- end;
- procedure TXMLInputSource.SetEncoding(const AEncoding: string);
- begin
- // do nothing
- end;
- function TXMLInputSource.GetPublicID: WideString;
- begin
- if FPublicID <> '' then
- Result := FPublicID
- else if Assigned(FParent) then
- Result := FParent.PublicID
- else
- Result := '';
- end;
- function TXMLInputSource.GetSystemID: WideString;
- begin
- if FSystemID <> '' then
- Result := FSystemID
- else if Assigned(FParent) then
- Result := FParent.SystemID
- else
- Result := '';
- end;
- { TXMLDecodingSource }
- destructor TXMLDecodingSource.Destroy;
- begin
- FDecoder.Free;
- inherited Destroy;
- end;
- function TXMLDecodingSource.InternalNextChar: WideChar;
- begin
- // TODO: find a place for it, finally...
- if FSurrogate <> #0 then
- begin
- Result := FSurrogate;
- FSurrogate := #0;
- Exit;
- end;
- if FBufEnd <= FBuf then
- FetchData;
- if not FEof then
- Result := FDecoder.DecodeNext
- else
- Result := #0;
- end;
- function TXMLDecodingSource.NextChar: WideChar;
- begin
- Result := InternalNextChar;
- if FSeenCR then
- begin
- if (Result = #10) or ((Result = #$85) and FReader.FXML11) then
- Result := InternalNextChar;
- FSeenCR := False;
- end;
- case Result of
- #13: begin
- FSeenCR := True;
- Result := #10;
- end;
- #$85, #$2028:
- if FReader.FXML11 then
- Result := #10;
- end;
- if (Result < #256) and (char(Result) in FReader.FForbiddenAscii) or
- ((ord(Result) or 1) = $FFFF) then
- DecodingError('Invalid character');
- // TODO: Column counting - surrogate pair is a single char!
- if Result = #10 then
- begin
- Inc(FLine);
- FColumn := 0;
- end
- else
- Inc(FColumn);
- end;
- procedure TXMLDecodingSource.Initialize;
- begin
- inherited;
- if FBufEnd-FBuf > 1 then
- repeat
- if (FBuf[0] = #$FE) and (FBuf[1] = #$FF) then // BE
- begin
- FDecoder := TUCS2Decoder.Create(Self);
- TUCS2Decoder(FDecoder).FEncoding := 'UTF-16BE';
- {$IFNDEF ENDIAN_BIG}
- TUCS2Decoder(FDecoder).FSwapEndian := True;
- {$ENDIF}
- Exit;
- end
- else if (FBuf[0] = #$FF) and (FBuf[1] = #$FE) then // LE
- begin
- FDecoder := TUCS2Decoder.Create(Self);
- TUCS2Decoder(FDecoder).FEncoding := 'UTF-16LE';
- {$IFDEF ENDIAN_BIG}
- TUCS2Decoder(FDecoder).FSwapEndian := True;
- {$ENDIF}
- Exit;
- end
- else
- Break;
- until False;
- FDecoder := TUTF8Decoder.Create(Self);
- end;
- procedure TXMLDecodingSource.SetEncoding(const AEncoding: string);
- var
- NewDecoder: TDecoderRef;
- begin
- if FDecoder.Supports(AEncoding) then // no change needed
- Exit;
- // hardcoded stuff - special case of UCS2
- if FDecoder is TUCS2Decoder then
- begin
- // check for 'UTF-16LE' or 'UTF-16BE'
- if SameText(AEncoding, TUCS2Decoder(FDecoder).FEncoding) then
- Exit
- else
- DecodingError('Current encoding cannot be switched to ''%s''', [AEncoding]);
- end;
- NewDecoder := FindDecoder(AEncoding);
- if Assigned(NewDecoder) then
- begin
- FDecoder.Free;
- FDecoder := NewDecoder.Create(Self);
- end
- else
- DecodingError('Encoding ''%s'' is not supported', [AEncoding]);
- end;
- { TXMLStreamInputSource }
- constructor TXMLStreamInputSource.Create(AStream: TStream; AOwnStream: Boolean);
- begin
- FStream := AStream;
- FBufSize := 4096;
- GetMem(FAllocated, FBufSize+8);
- FBuf := FAllocated+8;
- FBufEnd := FBuf;
- FOwnStream := AOwnStream;
- FetchData;
- end;
- destructor TXMLStreamInputSource.Destroy;
- begin
- FreeMem(FAllocated);
- if FOwnStream then
- FStream.Free;
- inherited Destroy;
- end;
- procedure TXMLStreamInputSource.FetchData;
- var
- Remainder, BytesRead: Integer;
- OldBuf: PChar;
- begin
- Assert(FBufEnd - FBuf < 8);
- OldBuf := FBuf;
- Remainder := FBufEnd - FBuf;
- FBuf := FAllocated+8-Remainder;
- Move(OldBuf^, FBuf^, Remainder);
- BytesRead := FStream.Read(FAllocated[8], FBufSize);
- if BytesRead = 0 then
- FEof := True;
- FBufEnd := FAllocated + 8 + BytesRead;
- end;
- { TXMLFileInputSource }
- constructor TXMLFileInputSource.Create(var AFile: Text);
- begin
- FFile := @AFile;
- ReadLn(FFile^, FString);
- FBuf := PChar(FString);
- FBufEnd := FBuf + Length(FString);
- end;
- procedure TXMLFileInputSource.FetchData;
- begin
- FEof := Eof(FFile^);
- if not FEof then
- begin
- ReadLn(FFile^, FString);
- FString := FString + #10; // bad solution...
- FBuf := PChar(FString);
- FBufEnd := FBuf + Length(FString);
- end;
- end;
- { TDecoder }
- constructor TDecoder.Create(ASource: TXMLDecodingSource);
- begin
- inherited Create;
- FSource := ASource;
- end;
- { TISO8859_1Decoder}
- function TISO8859_1Decoder.DecodeNext: WideChar;
- begin
- with FSource do
- begin
- Result := WideChar(FBuf[0]);
- Inc(FBuf);
- end;
- end;
- class function TISO8859_1Decoder.Supports(const AEncoding: string): Boolean;
- begin
- Result := SameText(AEncoding, 'ISO-8859-1') or
- SameText(AEncoding, 'ISO_8859-1') or
- SameText(AEncoding, 'latin1') or
- SameText(AEncoding, 'iso-ir-100') or
- SameText(AEncoding, 'l1') or
- SameText(AEncoding, 'IBM819') or
- SameText(AEncoding, 'CP819') or
- SameText(AEncoding, 'csISOLatin1') or
- // This one is not in character-sets.txt, but used in most FPC documentation...
- SameText(AEncoding, 'ISO8859-1');
- end;
- { TUCS2Decoder }
- function TUCS2Decoder.DecodeNext: WideChar;
- begin
- with FSource do
- begin
- Result := PWideChar(FBuf)^;
- Inc(FBuf, sizeof(WideChar));
- end;
- if FSwapEndian then
- Result := WideChar(Swap(Word(Result)));
- end;
- class function TUCS2Decoder.Supports(const AEncoding: string): Boolean;
- begin
- // generic aliases for both LE and BE
- Result := SameText(AEncoding, 'UTF-16') or
- SameText(AEncoding, 'unicode');
- end;
- { TUTF8Decoder }
- function TUTF8Decoder.DecodeNext: WideChar;
- const
- MaxCode: array[0..3] of Cardinal = ($7F, $7FF, $FFFF, $1FFFFF);
- var
- Value: Cardinal;
- I, bc: Integer;
- begin
- with FSource do
- begin
- Result := WideChar(FBuf[0]);
- Inc(FBuf);
- if Result < #$80 then
- Exit;
- if Byte(Result) and $40 = 0 then
- DecodingError('Invalid UTF8 sequence start byte');
- bc := 1;
- if Byte(Result) and $20 <> 0 then
- begin
- Inc(bc);
- if Byte(Result) and $10 <> 0 then
- begin
- Inc(bc);
- if Byte(Result) and $8 <> 0 then
- DecodingError('UCS4 character out of supported range');
- end;
- end;
- // DONE: (?) check that bc bytes available
- if FBufEnd-FBuf < bc then
- FetchData;
- Value := Byte(Result);
- I := bc; // note: I is never zero
- while bc > 0 do
- begin
- if ord(FBuf[0]) and $C0 <> $80 then
- DecodingError('Invalid byte in UTF8 sequence');
- Value := (Value shl 6) or (Cardinal(FBuf[0]) and $3F);
- Inc(FBuf);
- Dec(bc);
- end;
- Value := Value and MaxCode[I];
- // RFC2279 check
- if Value <= MaxCode[I-1] then
- DecodingError('Invalid UTF8 sequence');
- case Value of
- 0..$D7FF, $E000..$FFFF:
- begin
- Result := WideChar(Value);
- Exit;
- end;
- $10000..$10FFFF:
- begin
- Result := WideChar($D7C0 + (Value shr 10));
- FSurrogate := WideChar($DC00 xor (Value and $3FF));
- Exit;
- end;
- end;
- DecodingError('UCS4 character out of supported range');
- end;
- end;
- class function TUTF8Decoder.Supports(const AEncoding: string): Boolean;
- begin
- Result := SameText(AEncoding, 'UTF-8');
- end;
- { TXMLReader }
- function TXMLReader.ResolveEntity(const SystemID, PublicID: WideString; out Source: TXMLInputSource): Boolean;
- var
- AbsSysID: WideString;
- Filename: string;
- Stream: TStream;
- begin
- Result := False;
- if ResolveRelativeURI(FSource.SystemID, SystemID, AbsSysID) then
- begin
- Source := nil;
- // TODO: alternative resolvers
- if URIToFilename(AbsSysID, Filename) then
- begin
- try
- Stream := TFileStream.Create(Filename, fmOpenRead + fmShareDenyWrite);
- Source := TXMLStreamInputSource.Create(Stream, True);
- Source.SystemID := AbsSysID;
- Source.PublicID := PublicID;
- Result := True;
- except
- on E: Exception do
- ValidationError('%s', [E.Message]);
- end;
- end;
- end;
- end;
- procedure TXMLReader.InitializeRoot(ASource: TXMLInputSource);
- begin
- Initialize(ASource);
- GetChar;
- // TODO: presence of BOM must prevent UTF-8 encoding from being changed
- CheckForChar(#$FEFF); // skip BOM, if one is present
- end;
- procedure TXMLReader.Initialize(ASource: TXMLInputSource);
- begin
- FSource := ASource;
- FSource.FReader := Self;
- FSource.Initialize;
- end;
- procedure TXMLReader.GetCharRaw;
- begin
- FCurChar := FSource.NextChar;
- FWhitespace := (FCurChar = #32) or (FCurChar = #10) or
- (FCurChar = #9) or (FCurChar = #13);
- // Used for handling the internal DTD subset
- if Assigned(FCopyBuf) and (FSource.FParent = nil) then
- BufAppend(FCopyBuf^, FCurChar);
- end;
- procedure TXMLReader.GetChar;
- begin
- GetCharRaw;
- if not FRecognizePE then
- Exit;
- if (FCurChar = #0) and ContextPop then
- begin
- Unget(FCurChar);
- FCurChar := #32;
- FWhitespace := True;
- end
- else if FCurChar = '%' then
- begin
- FCurChar := FSource.NextChar;
- if not CheckName then
- begin
- Unget(FCurChar);
- FCurChar := '%';
- Exit;
- end;
- if FCurChar = ';' then // "%pe1;%pe2" - must not recognize pe2 immediately!
- GetCharRaw
- else
- RaiseExc(WideChar(';'));
- StartPE;
- FCurChar := #32;
- FWhitespace := True;
- end;
- end;
- procedure TXMLReader.Unget(wc: WideChar);
- begin
- FSource.FSurrogate := wc;
- end;
- procedure TXMLReader.RaiseExpectedQmark;
- begin
- RaiseExc('Expected single or double quote');
- end;
- procedure TXMLReader.RaiseExc(Expected: WideChar);
- begin
- // FIX: don't output what is found - anything may be found, including exploits...
- RaiseExc('Expected "%1s"', [string(Expected)]);
- end;
- procedure TXMLReader.RaiseExc(const descr: String);
- begin
- raise EXMLReadError.CreateFmt('In ''%s'' (line %d pos %d): %s', [FSource.SystemID, FSource.FLine, FSource.FColumn, descr]);
- end;
- procedure TXMLReader.RaiseExc(const descr: string; const args: array of const);
- begin
- RaiseExc(Format(descr, args));
- end;
- function TXMLReader.SkipWhitespace: Boolean;
- begin
- Result := False;
- while FWhitespace do
- begin
- GetChar;
- Result := True;
- end;
- end;
- procedure TXMLReader.ExpectWhitespace;
- begin
- if not SkipWhitespace then
- RaiseExc('Expected whitespace');
- end;
- procedure TXMLReader.ExpectChar(wc: WideChar);
- begin
- if FCurChar = wc then
- GetChar
- else
- RaiseExc(wc);
- end;
- procedure TXMLReader.ExpectString(const s: String);
- var
- I: Integer;
- begin
- for I := 1 to Length(s) do
- begin
- if FCurChar <> WideChar(s[i]) then
- RaiseExc('Expected "%s"', [s]);
- GetChar;
- end;
- end;
- function TXMLReader.CheckForChar(c: WideChar): Boolean;
- begin
- Result := (FCurChar = c);
- if Result then
- GetChar;
- end;
- procedure TXMLReader.SkipString(const ValidChars: TSetOfChar);
- begin
- FValue.Length := 0;
- while (ord(FCurChar) < 256) and (char(FCurChar) in ValidChars) do
- begin
- BufAppend(FValue, FCurChar);
- GetChar;
- end;
- end;
- function TXMLReader.GetString(const ValidChars: TSetOfChar): WideString;
- begin
- SkipString(ValidChars);
- SetString(Result, FValue.Buffer, FValue.Length);
- end;
- constructor TXMLReader.Create;
- begin
- inherited Create;
- // Naming bitmap: Point to static data for XML 1.0,
- // and allocate buffer in XML11_BuildTables when necessary.
- FNamePages := @NamePages;
- BufAllocate(FName, 128);
- BufAllocate(FValue, 512);
- FForbiddenAscii := [#1..#8, #11..#12, #14..#31];
- // TODO: put under user control
- FPreserveWhitespace := True;
- FCreateEntityRefs := True;
- end;
- destructor TXMLReader.Destroy;
- begin
- if FXML11 then
- FreeMem(FNamePages);
- FreeMem(FName.Buffer);
- FreeMem(FValue.Buffer);
- while ContextPop do; // clean input stack
- FSource.Free;
- inherited Destroy;
- end;
- procedure TXMLReader.XML11_BuildTables;
- var
- I: Integer;
- begin
- if not FXML11 then
- GetMem(FNamePages, 512);
- FXML11 := True;
- for I := 0 to 255 do
- FNamePages^[I] := ord(Byte(I) in Xml11HighPages);
- FNamePages^[0] := 2;
- FNamePages^[3] := $2c;
- FNamePages^[$20] := $2a;
- FNamePages^[$21] := $2b;
- FNamePages^[$2f] := $29;
- FNamePages^[$30] := $2d;
- FNamePages^[$fd] := $28;
- Move(FNamePages^, FNamePages^[256], 256);
- FNamePages^[$100] := $19;
- FNamePages^[$103] := $2E;
- FNamePages^[$120] := $2F;
- FForbiddenAscii := [#1..#8, #11..#12, #14..#31, #$7F..#$84, #$86..#$9F];
- end;
- procedure TXMLReader.ProcessXML(ASource: TXMLInputSource);
- begin
- doc := TXMLDocument.Create;
- FCursor := doc;
- InitializeRoot(ASource);
- FAllowedDecl := dtXml;
- ParseMisc;
- FDtdParsed := True;
- if FDocType = nil then
- ValidationError('Missing DTD', []);
- if CheckName then
- ParseElement
- else
- RaiseExc('Expected element');
- ParseMisc;
- if Assigned(FDocType) and (doc.DocumentElement.TagName <> FDocType.Name) then
- ValidationError('DTD name does not match root element', []);
- if FCurChar <> #0 then
- RaiseExc('Text after end of document element found');
- end;
- procedure TXMLReader.ProcessFragment(ASource: TXMLInputSource; AOwner: TDOMNode);
- begin
- doc := AOwner.OwnerDocument;
- FCursor := AOwner;
- InitializeRoot(ASource);
- FXML11 := doc.InheritsFrom(TXMLDocument) and (TXMLDocument(doc).XMLVersion = '1.1');
- FAllowedDecl := dtText;
- DoParseFragment;
- end;
- // XML 1.1 allowed range $10000..$EFFFF is [D800..DB7F] followed by [DC00..DFFF]
- function TXMLReader.XML11_CheckName: Boolean;
- begin
- if (FCurChar >= #$D800) and (FCurChar <= #$DB7F) then
- begin
- BufAppend(FName, FCurChar);
- GetCharRaw;
- Result := (FCurChar >= #$DC00) and (FCurChar <= #$DFFF);
- end
- else
- Result := False;
- end;
- function TXMLReader.CheckName: Boolean;
- begin
- FName.Length := 0;
- Result := (Byte(FCurChar) in NamingBitmap[FNamePages^[hi(Word(FCurChar))]]) or
- (FXML11 and XML11_CheckName);
- if Result then
- repeat
- BufAppend(FName, FCurChar);
- GetChar;
- until not ((Byte(FCurChar) in NamingBitmap[FNamePages^[$100+hi(Word(FCurChar))]]) or
- (FXML11 and XML11_CheckName));
- end;
- function TXMLReader.CheckNmToken: Boolean;
- begin
- FName.Length := 0;
- Result := False;
- while (Byte(FCurChar) in NamingBitmap[FNamePages^[$100+hi(Word(FCurChar))]]) or
- (FXML11 and XML11_CheckName) do
- begin
- BufAppend(FName, FCurChar);
- GetChar;
- Result := True;
- end;
- end;
- procedure TXMLReader.RaiseNameNotFound;
- begin
- RaiseExc('Name starts with invalid character');
- end;
- function TXMLReader.ExpectName: WideString;
- begin
- if not CheckName then
- RaiseNameNotFound;
- SetString(Result, FName.Buffer, FName.Length);
- end;
- procedure TXMLReader.SkipName;
- begin
- if not CheckName then
- RaiseNameNotFound;
- end;
- function TXMLReader.ResolvePredefined(const RefName: WideString): WideChar;
- begin
- if RefName = 'amp' then
- Result := '&'
- else if RefName = 'apos' then
- Result := ''''
- else if RefName = 'gt' then
- Result := '>'
- else if RefName = 'lt' then
- Result := '<'
- else if RefName = 'quot' then
- Result := '"'
- else
- Result := #0;
- end;
- function TXMLReader.ParseCharRef: Boolean; // [66]
- var
- Value: Integer;
- begin
- Result := FCurChar = '#';
- if Result then
- begin
- GetCharRaw;
- Value := 0;
- if CheckForChar('x') then
- repeat
- case FCurChar of
- '0'..'9': Value := Value * 16 + Ord(FCurChar) - Ord('0');
- 'a'..'f': Value := Value * 16 + Ord(FCurChar) - (Ord('a') - 10);
- 'A'..'F': Value := Value * 16 + Ord(FCurChar) - (Ord('A') - 10);
- else
- Break;
- end;
- GetCharRaw;
- until False
- else
- repeat
- case FCurChar of
- '0'..'9': Value := Value * 10 + Ord(FCurChar) - Ord('0');
- else
- Break;
- end;
- GetCharRaw;
- until False;
- ExpectChar(';');
- case Value of
- $01..$08, $0B..$0C, $0E..$1F:
- if FXML11 then
- BufAppend(FValue, WideChar(Value))
- else
- RaiseExc('Invalid character reference');
- $09, $0A, $0D, $20..$D7FF, $E000..$FFFD:
- BufAppend(FValue, WideChar(Value));
- $10000..$10FFFF:
- begin
- BufAppend(FValue, WideChar($D7C0 + (Value shr 10)));
- BufAppend(FValue, WideChar($DC00 xor (Value and $3FF)));
- end;
- else
- RaiseExc('Invalid character reference');
- end;
- end;
- end;
- procedure TXMLReader.DoParseAttValue(Delim: WideChar);
- var
- RefNode: TDOMEntityEx;
- begin
- FValue.Length := 0;
- while (FCurChar <> Delim) and (FCurChar <> #0) do
- begin
- if FCurChar = '<' then
- RaiseExc('Literal "<" in attribute value')
- else if FCurChar <> '&' then
- begin
- if FWhitespace then
- FCurChar := #32;
- BufAppend(FValue, FCurChar);
- GetCharRaw;
- end
- else
- begin
- GetCharRaw; // skip '&'
- if ParseCharRef then
- Continue;
- RefNode := ParseReference;
- if Assigned(RefNode) then
- begin
- if FValue.Length > 0 then
- begin
- FCursor.AppendChild(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length));
- FValue.Length := 0;
- end;
- if RefNode.SystemID <> '' then
- RaiseExc('External entity reference is not allowed in attribute value');
- IncludeEntity(RefNode, True);
- end;
- end;
- end; // while
- if FValue.Length > 0 then
- begin
- FCursor.AppendChild(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length));
- FValue.Length := 0;
- end;
- end;
- procedure TXMLReader.DoParseFragment;
- begin
- ParseContent;
- if FCurChar <> #0 then
- RaiseExc('Closing tag not allowed here');
- end;
- function TXMLReader.ContextPush(AEntity: TDOMEntityEx): Boolean;
- var
- Src: TXMLInputSource;
- begin
- if AEntity.SystemID <> '' then
- begin
- Result := ResolveEntity(AEntity.SystemID, AEntity.PublicID, Src);
- if not Result then
- Exit;
- {
- TODO: need different handling of TextDecl in external PEs
- it cannot be parsed if PE is referenced INSIDE declaration
- But - is such case ever met in the wild ?? E.g. MSXML fails such things...
- }
- FAllowedDecl := dtText;
- end
- else
- Src := TXMLInputSource.Create(AEntity.FReplacementText);
- AEntity.FOnStack := True;
- Src.FEntity := AEntity;
- Src.FParent := FSource;
- Src.FCursor := FCursor;
- Unget(FCurChar); // remember FCurChar in previous context
- Inc(FEntityLevel);
- Initialize(Src);
- Result := True;
- end;
- function TXMLReader.ContextPop: Boolean;
- var
- Src: TXMLInputSource;
- begin
- Result := Assigned(FSource.FParent);
- if Result then
- begin
- Src := FSource.FParent;
- if Assigned(FSource.FEntity) then
- TDOMEntityEx(FSource.FEntity).FOnStack := False;
- FCursor := TDOMNode(FSource.FCursor);
- FSource.Free;
- FSource := Src;
- Dec(FEntityLevel);
- GetChar; // re-classify - case of "%pe1;%pe2;"
- end;
- end;
- procedure TXMLReader.IncludeEntity(AEntity: TDOMEntityEx; InAttr: Boolean);
- var
- Node, Child: TDOMNode;
- begin
- if not AEntity.FResolved then
- begin
- if AEntity.FOnStack then
- RaiseExc('Entity ''%s'' recursively references itself', [AEntity.NodeName]);
- if ContextPush(AEntity) then
- begin
- GetCharRaw;
- CheckForChar(#$FEFF);
- FCursor := AEntity; // build child node tree for the entity
- try
- if InAttr then
- DoParseAttValue(#0)
- else
- DoParseFragment;
- AEntity.FResolved := True;
- finally
- ContextPop; // FCursor restored
- FValue.Length := 0;
- end;
- end;
- end;
- Node := FCursor;
- if FCreateEntityRefs or (not AEntity.FResolved) then
- begin
- Node := doc.CreateEntityReference(AEntity.NodeName);
- FCursor.AppendChild(Node);
- end;
- Child := AEntity.FirstChild; // clone the entity node tree
- while Assigned(Child) do
- begin
- Node.AppendChild(Child.CloneNode(True));
- Child := Child.NextSibling;
- end;
- end;
- procedure TXMLReader.StartPE;
- var
- PEName: WideString;
- PEnt: TDOMEntityEx;
- begin
- SetString(PEName, FName.Buffer, FName.Length);
- PEnt := FDocType.PEMap.GetNamedItem(PEName) as TDOMEntityEx;
- if PEnt = nil then // TODO -cVC: Referencing undefined PE
- begin // (These are classified as 'optional errors'...)
- // ValidationError('Undefined parameter entity referenced: %s', [PEName]);
- Exit;
- end;
- if PEnt.FOnStack then
- RaiseExc('Entity ''%%%s'' recursively references itself', [PEnt.NodeName]);
- ContextPush(PEnt);
- end;
- function TXMLReader.ParseReference: TDOMEntityEx;
- var
- RefName: WideString;
- Predef: WideChar;
- begin
- Result := nil;
- RefName := ExpectName;
- ExpectChar(';');
- Predef := ResolvePredefined(RefName);
- if Predef <> #0 then
- BufAppend(FValue, Predef)
- else
- begin
- if Assigned(FDocType) then
- Result := FDocType.Entities.GetNamedItem(RefName) as TDOMEntityEx;
- if Result = nil then
- begin
- if FStandalone or (FDocType = nil) or not (FDocType.HasPERefs or (FDocType.SystemID <> '')) then
- RaiseExc('Undefined entity ''%s'' referenced', [RefName])
- else
- ValidationError('Undefined entity ''%s'' referenced', [RefName]);
- end
- else
- begin
- if FStandalone and (not Result.FInternal) then
- RaiseExc('Standalone constraint violation');
- if Result.NotationName <> '' then
- RaiseExc('Reference to unparsed entity ''%s''', [RefName]);
- end;
- end;
- end;
- procedure TXMLReader.ProcessTextAndRefs;
- var
- nonWs: Boolean;
- last: WideChar;
- RefNode: TDOMEntityEx;
- begin
- FValue.Length := 0;
- nonWs := False;
- FAllowedDecl := dtNone;
- while (FCurChar <> '<') and (FCurChar <> #0) do
- begin
- if FCurChar <> '&' then
- begin
- if not FWhitespace then
- nonWs := True;
- BufAppend(FValue, FCurChar);
- if FCurChar = '>' then
- with FValue do
- if (Length >= 3) and
- (Buffer[Length-2] = ']') and (Buffer[Length-3] = ']') then
- RaiseExc('Literal '']]>'' is not allowed in text');
- GetCharRaw;
- end
- else
- begin
- GetCharRaw; // skip '&'
- if ParseCharRef then
- begin
- last := FValue.Buffer[FValue.Length-1];
- if (last <> #9) and (last <> #10) and (last <> #13) and (last <> #32) then
- nonWs := True;
- Continue;
- end;
- nonWs := True;
- RefNode := ParseReference;
- if Assigned(RefNode) then
- begin
- if (nonWs or FPreserveWhitespace) and (FValue.Length > 0) then
- begin
- FCursor.AppendChild(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length));
- FValue.Length := 0;
- nonWs := False;
- end;
- IncludeEntity(RefNode, False);
- end;
- end;
- end; // while
- if (nonWs or FPreserveWhitespace) and (FValue.Length > 0) then
- begin
- FCursor.AppendChild(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length));
- FValue.Length := 0;
- end;
- end;
- procedure TXMLReader.ExpectAttValue; // [10]
- var
- Delim: WideChar;
- begin
- if (FCurChar <> '''') and (FCurChar <> '"') then
- RaiseExpectedQmark;
- Delim := FCurChar;
- GetCharRaw; // skip quote
- DoParseAttValue(Delim);
- GetChar; // NOTE: not GetCharRaw - when parsing AttDef in DTD,
- // immediately following PERef must be recognized
- end;
- function TXMLReader.SkipQuotedLiteral: Boolean;
- var
- Delim: WideChar;
- begin
- Result := (FCurChar = '''') or (FCurChar = '"');
- if Result then
- begin
- Delim := FCurChar;
- GetCharRaw; // skip quote
- FValue.Length := 0;
- while (FCurChar <> Delim) and (FCurChar <> #0) do
- begin
- BufAppend(FValue, FCurChar);
- GetCharRaw;
- end;
- ExpectChar(Delim); // <-- to check the EOF only
- end;
- end;
- procedure TXMLReader.SkipPubidLiteral; // [12]
- var
- I: Integer;
- begin
- if SkipQuotedLiteral then
- begin
- for I := 0 to FValue.Length-1 do
- if (FValue.Buffer[I] > #255) or not (Char(FValue.Buffer[I]) in PubidChars) then
- RaiseExc('Illegal Public ID literal')
- end
- else
- RaiseExpectedQMark;
- end;
- procedure TXMLReader.SkipSystemLiteral(out Literal: WideString; Required: Boolean);
- begin
- if SkipQuotedLiteral then
- SetString(Literal, FValue.Buffer, FValue.Length)
- else if Required then
- RaiseExpectedQMark;
- end;
- procedure TXMLReader.ParseComment; // [15]
- begin
- ExpectString('--');
- FValue.Length := 0;
- repeat
- BufAppend(FValue, FCurChar);
- GetCharRaw;
- with FValue do
- if (Length >= 2) and (Buffer[Length-1] = '-') and
- (Buffer[Length-2] = '-') then
- begin
- Dec(Length, 2);
- if Assigned(FCursor) then
- FCursor.AppendChild(doc.CreateCommentBuf(Buffer, Length));
- ExpectChar('>');
- Exit;
- end;
- until FCurChar = #0;
- RaiseExc('Unterminated comment');
- end;
- procedure TXMLReader.ParsePI; // [16]
- var
- Name, Value: WideString;
- begin
- GetCharRaw; // skip '?'
- Name := ExpectName;
- with FName do
- if (Length = 3) and
- ((Buffer[0] = 'X') or (Buffer[0] = 'x')) and
- ((Buffer[1] = 'M') or (Buffer[1] = 'm')) and
- ((Buffer[2] = 'L') or (Buffer[2] = 'l')) then
- begin
- if Name <> 'xml' then
- RaiseExc('''xml'' is a reserved word; it must be lowercase');
- if FAllowedDecl <> dtNone then
- begin
- ParseXmlOrTextDecl(FAllowedDecl = dtText);
- FAllowedDecl := dtNone;
- Exit;
- end
- else
- RaiseExc('XML declaration not allowed here');
- end;
- if FCurChar <> '?' then
- ExpectWhitespace;
- FAllowedDecl := dtNone;
- FValue.Length := 0;
- repeat
- BufAppend(FValue, FCurChar);
- GetCharRaw;
- with FValue do
- if (Length >= 2) and (Buffer[Length-1] = '>') and
- (Buffer[Length-2] = '?') then
- begin
- Dec(Length, 2);
- SetString(Value, Buffer, Length);
- if Assigned(FCursor) then
- FCursor.AppendChild(Doc.CreateProcessingInstruction(Name, Value));
- Exit;
- end;
- until FCurChar = #0;
- RaiseExc('Unterminated processing instruction');
- end;
- // here we come from ParsePI, 'xml' is already consumed
- procedure TXMLReader.ParseXmlOrTextDecl(TextDecl: Boolean);
- var
- TmpStr: WideString;
- IsXML11: Boolean;
- begin
- ExpectWhitespace;
- // VersionInfo: optional in TextDecl, required in XmlDecl
- if (not TextDecl) or (FCurChar = 'v') then
- begin
- ExpectString('version'); // [24]
- ExpectEq;
- SkipSystemLiteral(TmpStr, True);
- IsXML11 := False;
- if TmpStr = '1.1' then // Checking for bad chars is implied
- IsXML11 := True
- else if TmpStr <> '1.0' then
- RaiseExc('Illegal version number');
- if not TextDecl then
- begin
- if doc.InheritsFrom(TXMLDocument) then
- TXMLDocument(doc).XMLVersion := TmpStr;
- if IsXML11 then
- XML11_BuildTables;
- end
- else // parsing external entity
- if IsXML11 and not FXML11 then
- RaiseExc('XML 1.0 document cannot invoke XML 1.1 entities');
- if FCurChar <> '?' then
- ExpectWhitespace;
- end;
- // EncodingDecl: required in TextDecl, optional in XmlDecl
- if TextDecl or (FCurChar = 'e') then // [80]
- begin
- ExpectString('encoding');
- ExpectEq;
- SkipSystemLiteral(TmpStr, True);
- if not IsValidEncName(TmpStr) then
- RaiseExc('Illegal encoding name');
- FSource.SetEncoding(TmpStr); // <-- Wide2Ansi conversion here
- // getting here means that specified encoding is supported
- // TODO: maybe assign the 'preferred' encoding name?
- if not TextDecl and doc.InheritsFrom(TXMLDocument) then
- TXMLDocument(doc).Encoding := TmpStr;
- if FCurChar <> '?' then
- ExpectWhitespace;
- end;
- // SDDecl: forbidden in TextDecl, optional in XmlDecl
- if (not TextDecl) and (FCurChar = 's') then
- begin
- ExpectString('standalone');
- ExpectEq;
- SkipSystemLiteral(TmpStr, True);
- if TmpStr = 'yes' then
- FStandalone := True
- else if TmpStr <> 'no' then
- RaiseExc('Only "yes" or "no" are permitted as values of "standalone"');
- SkipWhitespace;
- end;
- ExpectString('?>');
- end;
- procedure TXMLReader.ParseDoctypeDecl; // [28]
- var
- IntSubset: TWideCharBuf;
- Src, OldSrc: TXMLInputSource;
- begin
- FAllowedDecl := dtNone;
- if FDtdParsed then
- RaiseExc('Markup declaration not allowed here');
- ExpectString('DOCTYPE'); // gives possibly incorrect error message
- ExpectWhitespace;
- FDocType := TDOMDocumentTypeEx.Create(doc);
- FDtdParsed := True;
- { To comply with certain output tests, we must insert PIs coming from internal
- subset before DocType node. This looks very synthetic, but let it be...
- Moreover, this code actually duplicates such PIs }
- try
- FDocType.FName := ExpectName;
- ExpectWhitespace;
- ParseExternalID(FDocType.FSystemID, FDocType.FPublicID, False);
- SkipWhitespace;
- if FCurChar = '[' then
- begin
- BufAllocate(IntSubset, 256);
- FCopyBuf := @IntSubset;
- GetChar; // cause very first char after '[' to be appended
- try
- FIntSubset := True;
- ParseMarkupDecl;
- if IntSubset.Length > 0 then // sanity check - must at least contain ']'
- SetString(FDocType.FInternalSubset, IntSubset.Buffer, IntSubset.Length-1);
- ExpectChar(']');
- finally
- FIntSubset := False;
- FCopyBuf := nil;
- FreeMem(IntSubset.Buffer);
- end;
- SkipWhitespace;
- end;
- ExpectChar('>');
- if FDocType.SystemID <> '' then
- begin
- if ResolveEntity(FDocType.SystemID, FDocType.PublicID, Src) then
- begin
- OldSrc := FSource;
- Unget(FCurChar);
- FCursor := nil;
- try
- DoParseExtSubset(Src);
- finally
- while ContextPop do; // Cleanup after possible exceptions
- FSource.Free;
- FSource := OldSrc;
- GetChar;
- FCursor := Doc;
- end;
- end;
- end;
- finally
- doc.AppendChild(FDocType);
- end;
- end;
- procedure TXMLReader.ParseMisc;
- begin
- repeat
- if SkipWhitespace then
- FAllowedDecl := dtNone;
- if not CheckForChar('<') then
- Break;
- if CheckForChar('!') then
- begin
- FAllowedDecl := dtNone;
- if FCurChar = '-' then
- ParseComment
- else
- ParseDoctypeDecl;
- end
- else
- if FCurChar = '?' then
- ParsePI
- else
- Break;
- until FCurChar = #0;
- FAllowedDecl := dtNone;
- end;
- function TXMLReader.ParseEq: Boolean; // [25]
- begin
- while FWhitespace do GetCharRaw;
- Result := FCurChar = '=';
- if Result then
- begin
- GetCharRaw;
- while FWhitespace do GetCharRaw;
- end;
- end;
- procedure TXMLReader.ExpectEq;
- begin
- if not ParseEq then
- RaiseExc('Expected "="');
- end;
- { DTD stuff }
- procedure TXMLReader.AssertPENesting(CurrentLevel: Integer);
- begin
- if CurrentLevel <> FEntityLevel then
- ValidationError('Parameter entities must be properly nested', []);
- end;
- // content model
- type
- TElementContentType = (
- ctEmpty,
- ctAny,
- ctMixed,
- ctName,
- ctChoice,
- ctSeq
- );
- TElementContentQuant = (
- cqNone,
- cqOpt,
- cqReq,
- cqPlus
- );
- {
- TElementContent = record
- ContentType: TElementContentType;
- ContentQuant: TElementContentQuant;
- Name: WideString;
- Children: array of TElementContent;
- end;
- }
- procedure TXMLReader.ExpectChoiceOrSeq(); // [49], [50]
- var
- Delim: WideChar;
- PELevel: Integer;
- begin
- Delim := #0;
- repeat
- SkipWhitespace;
- if FCurChar = '(' then
- begin
- PELevel := FEntityLevel;
- GetChar;
- ExpectChoiceOrSeq;
- AssertPENesting(PELevel);
- GetChar;
- end
- else
- SkipName;
- if CheckForChar('?') then
- else if CheckForChar('*') then
- else if CheckForChar('+') then;
- SkipWhitespace;
- if FCurChar = ')' then
- Break;
- if Delim = #0 then
- begin
- if (FCurChar = '|') or (FCurChar = ',') then
- Delim := FCurChar
- else
- RaiseExc('Expected "|" or ","');
- end
- else
- if FCurChar <> Delim then
- RaiseExc(Delim);
- GetChar; // skip delimiter
- until False;
- end;
- procedure TXMLReader.ParseMixedOrChildren;
- var
- PELevel: Integer;
- NeedAsterisk: Boolean;
- begin
- PELevel := FEntityLevel;
- GetChar; // starting bracket
- SkipWhitespace;
- if CheckForChar('#') then // Mixed section [51]
- begin
- ExpectString('PCDATA');
- SkipWhitespace;
- NeedAsterisk := False;
- while FCurChar <> ')' do
- begin
- ExpectChar('|');
- NeedAsterisk := True;
- SkipWhitespace;
- SkipName;
- SkipWhitespace;
- end;
- AssertPENesting(PELevel);
- GetChar;
- if NeedAsterisk then
- ExpectChar('*')
- else
- CheckForChar('*');
- end
- else // Parse Children section [47]
- begin
- ExpectChoiceOrSeq;
- AssertPENesting(PELevel);
- GetChar;
- if CheckForChar('?') then
- else if CheckForChar('*') then
- else if CheckForChar('+') then;
- end;
- end;
- procedure TXMLReader.ParseElementDecl; // [45]
- begin
- SkipName;
- ExpectWhitespace;
- // Get contentspec [46]
- if FCurChar = 'E' then
- ExpectString('EMPTY')
- else if FCurChar = 'A' then
- ExpectString('ANY')
- else if FCurChar = '(' then
- ParseMixedOrChildren
- else
- RaiseExc('Invalid content specification');
- end;
- procedure TXMLReader.ParseNotationDecl; // [82]
- var
- Notation: TDOMNotationEx;
- begin
- Notation := TDOMNotationEx(TDOMNotation.Create(Doc));
- try
- Notation.FName := ExpectName;
- ExpectWhitespace;
- if not ParseExternalID(Notation.FSystemID, Notation.FPublicID, True) then
- RaiseExc('Expected external or public ID');
- except
- Notation.Free;
- raise;
- end;
- if FDocType.Notations.GetNamedItem(Notation.FName) = nil then
- FDocType.Notations.SetNamedItem(Notation)
- else
- begin
- ValidationError('Duplicate notation declaration: %s', [Notation.FName]);
- Notation.Free;
- end;
- end;
- procedure TXMLReader.ParseAttlistDecl; // [52]
- var
- SaveCurNode: TDOMNode;
- ValueRequired: Boolean;
- Token: WideString;
- ElDef: TDOMElementDef;
- AttDef: TDOMAttrDef;
- begin
- Token := ExpectName;
- ElDef := TDOMElementDef(FDocType.ElementDefs.GetNamedItem(Token));
- if ElDef = nil then
- begin
- // TODO -cVC: must distinguish ElementDef created here from one explicitly declared
- ElDef := TDOMElementDef.Create(doc);
- ElDef.FNodeName := Token;
- FDocType.ElementDefs.SetNamedItem(ElDef);
- end;
- SkipWhitespace;
- while FCurChar <> '>' do
- begin
- SkipWhitespace; { !!! }
- AttDef := TDOMAttrDef.Create(doc);
- try
- AttDef.FName := ExpectName;
- ExpectWhitespace;
- Token := GetString(['A'..'Z']); // Get AttType [54], [55], [56]
- if Token = 'CDATA' then
- AttDef.FDataType := DT_CDATA
- else if Token = 'ID' then
- AttDef.FDataType := DT_ID
- else if Token = 'IDREF' then
- AttDef.FDataType := DT_IDREF
- else if Token = 'IDREFS' then
- AttDef.FDataType := DT_IDREFS
- else if Token = 'ENTITY' then
- AttDef.FDataType := DT_ENTITY
- else if Token = 'ENTITIES' then
- AttDef.FDataType := DT_ENTITIES
- else if Token = 'NMTOKEN' then
- AttDef.FDataType := DT_NMTOKEN
- else if Token = 'NMTOKENS' then
- AttDef.FDataType := DT_NMTOKENS
- else if Token = 'NOTATION' then // [57], [58]
- begin
- AttDef.FDataType := DT_NOTATION;
- ExpectWhitespace;
- ExpectChar('(');
- repeat
- SkipWhitespace;
- SkipName;
- SkipWhitespace;
- until not CheckForChar('|');
- ExpectChar(')');
- end
- else
- if CheckForChar('(') then // [59]
- begin
- AttDef.FDataType := DT_NMTOKEN;
- repeat
- SkipWhitespace;
- if not CheckNmToken then
- RaiseNameNotFound; // not completely correct error message
- SkipWhitespace;
- until not CheckForChar('|');
- ExpectChar(')');
- end else
- RaiseExc('Invalid tokenized type');
- ExpectWhitespace;
- // Get DefaultDecl [60]
- ValueRequired := False;
- if CheckForChar('#') then
- begin
- Token := GetString(['A'..'Z']);
- if Token = 'REQUIRED' then
- AttDef.FDefault := AD_REQUIRED
- else if Token = 'IMPLIED' then
- AttDef.FDefault := AD_IMPLIED
- else if Token = 'FIXED' then
- begin
- AttDef.FDefault := AD_FIXED;
- ExpectWhitespace;
- ValueRequired := True;
- end
- else
- RaiseExc('Illegal attribute default');
- end
- else
- begin
- AttDef.FDefault := AD_DEFAULT;
- ValueRequired := True;
- end;
-
- if ValueRequired then
- begin
- SaveCurNode := FCursor;
- FCursor := AttDef;
- // tricky moment, no tests for that
- { FRecognizePE := False; } // TODO: shall it really be disabled?
- try
- ExpectAttValue;
- finally
- FCursor := SaveCurNode;
- { FRecognizePE := not FIntSubset; }
- end;
- if AttDef.FDataType = DT_ID then
- ValidationError('Attributes of type ID must not have a default value',[]);
- end;
- // First declaration is binding, subsequent should be ignored
- if Assigned(ElDef.GetAttributeNode(AttDef.Name)) then
- AttDef.Free
- else
- ElDef.SetAttributeNode(AttDef);
- except
- AttDef.Free;
- raise;
- end;
- SkipWhitespace;
- end;
- end;
- procedure TXMLReader.ParseEntityDeclValue(Delim: WideChar); // [9]
- var
- I: Integer;
- Src: TXMLInputSource;
- begin
- Src := FSource;
- // "Included in literal": process until delimiter hit IN SAME context
- while not ((FSource = Src) and CheckForChar(Delim)) do
- if ParsePEReference then
- begin
- if FIntSubset and (FSource.FParent = nil) then
- RaiseExc('PE references in internal subset not allowed inside declarations');
- StartPE;
- GetCharRaw;
- end
- else if FCurChar = '&' then // CharRefs: include, EntityRefs: bypass
- begin
- GetCharRaw;
- if not ParseCharRef then
- begin
- BufAppend(FValue, '&');
- ExpectName;
- ExpectChar(';');
- for I := 0 to FName.Length-1 do
- BufAppend(FValue, FName.Buffer[I]);
- BufAppend(FValue, ';');
- end;
- end
- else if FCurChar <> #0 then // Regular character
- begin
- BufAppend(FValue, FCurChar);
- GetCharRaw;
- end
- else if not ContextPop then // #0
- Break;
- end;
- procedure TXMLReader.ParseEntityDecl; // [70]
- var
- NDataAllowed: Boolean;
- Delim: WideChar;
- Entity: TDOMEntityEx;
- Map: TDOMNamedNodeMap;
- begin
- NDataAllowed := True;
- Map := FDocType.Entities;
- if CheckForChar('%') then // [72]
- begin
- ExpectWhitespace;
- NDataAllowed := False;
- Map := FDocType.PEMap;
- end;
- Entity := TDOMEntityEx.Create(Doc);
- try
- Entity.FInternal := FIntSubset and (FSource.FParent = nil);
- Entity.FName := ExpectName;
- ExpectWhitespace;
- if (FCurChar = '"') or (FCurChar = '''') then
- begin
- NDataAllowed := False;
- Delim := FCurChar;
- FRecognizePE := False; // PERef right after delimiter should not be recognized
- GetCharRaw; // at char level - we process it 'manually'
- FValue.Length := 0;
- ParseEntityDeclValue(Delim);
- FRecognizePE := not FIntSubset;
- SetString(Entity.FReplacementText, FValue.Buffer, FValue.Length);
- end
- else
- if not ParseExternalID(Entity.FSystemID, Entity.FPublicID, False) then
- RaiseExc('Expected entity value or external ID');
- if NDataAllowed then // [76]
- begin
- if FCurChar <> '>' then
- ExpectWhitespace;
- if FCurChar = 'N' then
- begin
- ExpectString('NDATA');
- ExpectWhitespace;
- SkipName;
- // TODO -cVC: Notation declared. Here or after all has been read?
- SetString(Entity.FNotationName, FName.Buffer, FName.Length);
- if FDocType.Notations.GetNamedItem(Entity.NotationName) = nil then
- ValidationError('Reference to undeclared notation ''%s''', [Entity.NotationName]);
- end;
- end;
- except
- Entity.Free;
- raise;
- end;
- // Repeated declarations of same entity are legal but must be ignored
- if Map.GetNamedItem(Entity.NodeName) = nil then
- Map.SetNamedItem(Entity)
- else
- Entity.Free;
- end;
- procedure TXMLReader.ParseMarkupDecl; // [29]
- var
- Token: WideString;
- IncludeLevel: Integer;
- IgnoreLevel: Integer;
- PELevel: Integer;
- begin
- IncludeLevel := 0;
- IgnoreLevel := 0;
- repeat
- if SkipWhitespace then
- FAllowedDecl := dtNone;
- if ParsePEReference then // PERef between declarations should always be recognized
- begin
- FAllowedDecl := dtNone;
- if Assigned(FDocType) then
- FDocType.HasPERefs := True;
- StartPE;
- GetChar;
- Continue;
- end;
- if (FCurChar = #0) and ContextPop then
- Continue;
- if (FCurChar = ']') and (IncludeLevel > 0) then
- begin
- ExpectString(']]>');
- Dec(IncludeLevel);
- Continue;
- end;
- if FCurChar <> '<' then
- Break;
- PELevel := FEntityLevel;
- GetCharRaw;
- if CheckForChar('!') then
- begin
- FAllowedDecl := dtNone;
- if FCurChar = '-' then
- ParseComment
- else if FCurChar = '[' then
- begin
- if FIntSubset and (FSource.FParent = nil) then
- RaiseExc('Conditional sections not allowed in internal subset');
- FRecognizePE := not FIntSubset;
- GetChar; // skip '['
- SkipWhitespace;
- Token := GetString(['A'..'Z']);
- SkipWhitespace;
- if Token = 'INCLUDE' then
- Inc(IncludeLevel)
- else if Token = 'IGNORE' then
- IgnoreLevel := 1
- else
- RaiseExc('Expected "INCLUDE" or "IGNORE"');
- AssertPENesting(PELevel);
- ExpectChar('[');
- if IgnoreLevel > 0 then
- repeat
- FRecognizePE := False; // PEs not recognized in IGNORE section
- if CheckForChar('<') and CheckForChar('!') and CheckForChar('[') then
- Inc(IgnoreLevel)
- else if CheckForChar(']') and CheckForChar(']') and CheckForChar('>') then
- Dec(IgnoreLevel)
- else GetChar;
- until (IgnoreLevel=0) or (FCurChar = #0);
- end
- else
- begin
- FRecognizePE := not FIntSubset;
- Token := GetString(['A'..'Z']);
- ExpectWhitespace;
- if Token = 'ELEMENT' then
- ParseElementDecl
- else if Token = 'ENTITY' then
- ParseEntityDecl
- else if Token = 'ATTLIST' then
- ParseAttlistDecl
- else if Token = 'NOTATION' then
- ParseNotationDecl
- else
- RaiseExc('Illegal markup declaration');
- SkipWhitespace;
- FRecognizePE := False; // ! Don't auto-pop context on last markup delimiter
- ExpectChar('>'); // This enables correct nesting check
- end;
- {
- MarkupDecl starting in PE and ending in root is a WFC [28a]
- MarkupDecl starting in root but ending in PE is a VC (erratum 2e-14)
- }
- if PELevel > FEntityLevel then
- RaiseExc('Parameter entities must be properly nested')
- else
- AssertPENesting(PELevel);
- end
- else if FCurChar = '?' then
- ParsePI;
- until False;
- FRecognizePE := False;
- if (IncludeLevel > 0) or (IgnoreLevel > 0) then
- RaiseExc('Conditional section not closed');
- end;
- procedure TXMLReader.DoParseExtSubset(ASource: TXMLInputSource);
- begin
- InitializeRoot(ASource);
- FAllowedDecl := dtText;
- ParseMarkupDecl;
- if FCurChar <> #0 then
- RaiseExc('Illegal character in DTD');
- end;
- procedure TXMLReader.ProcessDTD(ASource: TXMLInputSource);
- begin
- doc := TXMLDocument.Create;
- FDocType := TDOMDocumentTypeEx.Create(doc);
- // TODO: DTD labeled version 1.1 will be rejected - must set FXML11 flag
- // TODO: what shall be FCursor? FDocType cannot - it does not accept child nodes
- doc.AppendChild(FDocType);
- DoParseExtSubset(ASource);
- end;
- procedure TXMLReader.ParseCDSect; // [18]
- var
- name: WideString;
- begin
- ExpectString('[CDATA[');
- FValue.Length := 0;
- repeat
- BufAppend(FValue, FCurChar);
- GetCharRaw;
- with FValue do
- if (Length >= 3) and (Buffer[Length-1] = '>') and
- (Buffer[Length-2] = ']') and (Buffer[Length-3] = ']') then
- begin
- Dec(Length, 3);
- SetString(name, Buffer, Length);
- FCursor.AppendChild(doc.CreateCDATASection(name));
- Exit;
- end;
- until FCurChar = #0;
- RaiseExc('Unterminated CDATA section');
- end;
- procedure TXMLReader.ParseContent;
- begin
- repeat
- if FCurChar = '<' then
- begin
- GetCharRaw;
- if CheckName then
- ParseElement
- else if FCurChar = '!' then
- begin
- GetCharRaw;
- FAllowedDecl := dtNone;
- if FCurChar = '[' then
- ParseCDSect
- else if FCurChar = '-' then
- ParseComment
- else
- ParseDoctypeDecl; // actually will raise error
- end
- else if FCurChar = '?' then
- ParsePI
- else
- Exit;
- end
- else
- ProcessTextAndRefs;
- until FCurChar = #0;
- end;
- // Element name already in FNameBuffer
- procedure TXMLReader.ParseElement; // [39] [40] [44]
- var
- NewElem: TDOMElement;
- IsEmpty: Boolean;
- attr, OldAttr: TDOMNode;
- begin
- NewElem := doc.CreateElementBuf(FName.Buffer, FName.Length);
- FCursor.AppendChild(NewElem);
- Assert(NewElem.ParentNode = FCursor, 'AppendChild did not set ParentNode');
- FCursor := NewElem;
- IsEmpty := False;
- while FCurChar <> '>' do
- begin
- if FCurChar = '/' then
- begin
- GetCharRaw;
- IsEmpty := True;
- FCursor := FCursor.ParentNode;
- Break;
- end;
- // Get Attribute [41]
- ExpectWhitespace;
- if not CheckName then // allow stuff like <element >, <element />
- Continue;
- attr := doc.CreateAttributeBuf(FName.Buffer, FName.Length);
- // !!cannot use TDOMElement.SetAttributeNode because it will free old attribute
- OldAttr := NewElem.Attributes.SetNamedItem(Attr);
- if Assigned(OldAttr) then
- begin
- OldAttr.Free;
- RaiseExc('Duplicate attribute');
- end;
- ExpectEq;
- Assert(TDOMAttr(attr).OwnerElement = NewElem, 'DOMAttr.OwnerElement not set correctly');
- FCursor := attr;
- ExpectAttValue;
- FCursor := NewElem;
- end;
- ExpectChar('>');
- ProcessDefaultAttributes(NewElem);
- if not IsEmpty then
- begin
- if not FPreserveWhitespace then // critical for testsuite compliance
- SkipWhitespace;
- ParseContent;
- if FCurChar = '/' then // Get ETag [42]
- begin
- GetCharRaw;
- if ExpectName <> NewElem.TagName then
- RaiseExc('Unmatching element end tag (expected "</%s>")', [NewElem.TagName]);
- SkipWhitespace;
- ExpectChar('>');
- FCursor := FCursor.ParentNode;
- end
- else if FCurChar <> #0 then
- RaiseNameNotFound
- else // End of stream in content
- RaiseExc('Document element not closed');
- end;
- end;
- procedure TXMLReader.ProcessDefaultAttributes(Element: TDOMElement);
- var
- I: Integer;
- ElDef: TDOMElementDef;
- AttDefs: TDOMNamedNodeMap;
- AttDef: TDOMAttrDef;
- Attr: TDOMAttrEx;
- Spec: Boolean;
- begin
- if Assigned(FDocType) then
- begin
- ElDef := TDOMElementDef(FDocType.ElementDefs.GetNamedItem(Element.TagName));
- if Assigned(ElDef) and ElDef.HasAttributes then
- begin
- AttDefs := ElDef.Attributes;
- for I := 0 to AttDefs.Length-1 do
- begin
- AttDef := AttDefs[I] as TDOMAttrDef;
- Spec := True;
- // no validity checking yet; just append default values
- Attr := TDOMAttrEx(Element.GetAttributeNode(AttDef.Name));
- if (AttDef.FDefault in [AD_DEFAULT, AD_FIXED]) and (Attr = nil) then
- begin
- Attr := TDOMAttrEx(AttDef.CloneNode(True));
- Element.SetAttributeNode(Attr);
- Spec := False;
- end;
- if Assigned(Attr) then
- begin
- Attr.FSpecified := Spec;
- Attr.FNormalize := (AttDef.FDataType <> DT_CDATA);
- end;
- end;
- end;
- end;
- end;
- function TXMLReader.ParsePEReference: Boolean; // [69]
- begin
- Result := CheckForChar('%');
- if Result then
- begin
- SkipName;
- ExpectChar(';');
- end;
- end;
- function TXMLReader.ParseExternalID(out SysID, PubID: WideString; // [75]
- SysIdOptional: Boolean): Boolean;
- begin
- if FCurChar = 'S' then
- begin
- ExpectString('SYSTEM');
- ExpectWhitespace;
- SkipSystemLiteral(SysID, True);
- Result := True;
- end
- else if FCurChar = 'P' then
- begin
- ExpectString('PUBLIC');
- ExpectWhitespace;
- SkipPubidLiteral;
- SetString(PubID, FValue.Buffer, FValue.Length);
- if SysIdOptional then
- begin
- SkipWhitespace;
- SkipSystemLiteral(SysID, False);
- end
- else
- begin
- ExpectWhitespace;
- SkipSystemLiteral(SysID, True);
- end;
- Result := True;
- end else
- Result := False;
- end;
- procedure TXMLReader.ValidationError(const Msg: string;
- const args: array of const);
- begin
- // TODO: just a stub now
- FInvalid := True;
- end;
- procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text);
- var
- Reader: TXMLReader;
- Src: TXMLInputSource;
- begin
- ADoc := nil;
- Src := TXMLFileInputSource.Create(f);
- Src.SystemID := FilenameToURI(TTextRec(f).Name);
- Reader := TXMLReader.Create;
- try
- Reader.ProcessXML(Src);
- ADoc := TXMLDocument(Reader.Doc);
- finally
- Reader.Free;
- end;
- end;
- procedure ReadXMLFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String);
- var
- Reader: TXMLReader;
- Src: TXMLInputSource;
- begin
- ADoc := nil;
- Reader := TXMLReader.Create;
- try
- Src := TXMLStreamInputSource.Create(f, False);
- Src.SystemID := ABaseURI;
- Reader.ProcessXML(Src);
- finally
- ADoc := TXMLDocument(Reader.doc);
- Reader.Free;
- end;
- end;
- procedure ReadXMLFile(out ADoc: TXMLDocument; var f: TStream);
- begin
- ReadXMLFile(ADoc, f, 'stream:');
- end;
- procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String);
- var
- FileStream: TStream;
- begin
- ADoc := nil;
- FileStream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
- try
- ReadXMLFile(ADoc, FileStream, FilenameToURI(AFilename));
- finally
- FileStream.Free;
- end;
- end;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text);
- var
- Reader: TXMLReader;
- Src: TXMLInputSource;
- begin
- Reader := TXMLReader.Create;
- try
- Src := TXMLFileInputSource.Create(f);
- Src.SystemID := FilenameToURI(TTextRec(f).Name);
- Reader.ProcessFragment(Src, AParentNode);
- finally
- Reader.Free;
- end;
- end;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream; const ABaseURI: String);
- var
- Reader: TXMLReader;
- Src: TXMLInputSource;
- begin
- Reader := TXMLReader.Create;
- try
- Src := TXMLStreamInputSource.Create(f, False);
- Src.SystemID := ABaseURI;
- Reader.ProcessFragment(Src, AParentNode);
- finally
- Reader.Free;
- end;
- end;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream);
- begin
- ReadXMLFragment(AParentNode, f, 'stream:');
- end;
- procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String);
- var
- Stream: TStream;
- begin
- Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
- try
- ReadXMLFragment(AParentNode, Stream, FilenameToURI(AFilename));
- finally
- Stream.Free;
- end;
- end;
- procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text);
- var
- Reader: TXMLReader;
- Src: TXMLInputSource;
- begin
- ADoc := nil;
- Reader := TXMLReader.Create;
- try
- Src := TXMLFileInputSource.Create(f);
- Src.SystemID := FilenameToURI(TTextRec(f).Name);
- Reader.ProcessDTD(Src);
- ADoc := TXMLDocument(Reader.doc);
- finally
- Reader.Free;
- end;
- end;
- procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String);
- var
- Reader: TXMLReader;
- Src: TXMLInputSource;
- begin
- ADoc := nil;
- Reader := TXMLReader.Create;
- try
- Src := TXMLStreamInputSource.Create(f, False);
- Src.SystemID := ABaseURI;
- Reader.ProcessDTD(Src);
- ADoc := TXMLDocument(Reader.doc);
- finally
- Reader.Free;
- end;
- end;
- procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream);
- begin
- ReadDTDFile(ADoc, f, 'stream:');
- end;
- procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String);
- var
- Stream: TStream;
- begin
- ADoc := nil;
- Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
- try
- ReadDTDFile(ADoc, Stream, FilenameToURI(AFilename));
- finally
- Stream.Free;
- end;
- end;
- end.
|