12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890 |
- {
- This file is part of the Free Component Library
- XML reading routines.
- Copyright (c) 1999-2000 by Sebastian Guenther, [email protected]
- Modified in 2006 by Sergei Gorelkin, [email protected]
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- unit XMLRead;
- {$ifdef fpc}
- {$MODE objfpc}{$H+}
- {$endif}
- interface
- uses
- SysUtils, Classes, DOM;
- type
- TErrorSeverity = (esWarning, esError, esFatal);
- EXMLReadError = class(Exception)
- private
- FSeverity: TErrorSeverity;
- FErrorMessage: string;
- FLine: Integer;
- FLinePos: Integer;
- public
- property Severity: TErrorSeverity read FSeverity;
- property ErrorMessage: string read FErrorMessage;
- property Line: Integer read FLine;
- property LinePos: Integer read FLinePos;
- end;
- procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String); overload;
- procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text); overload;
- procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream); overload;
- procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String); overload;
- procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String); overload;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text); overload;
- procedure ReadXMLFragment(AParentNode: TDOMNode; f: TStream); overload;
- procedure ReadXMLFragment(AParentNode: TDOMNode; f: TStream; const ABaseURI: String); overload;
- procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String); overload;
- procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text); overload;
- procedure ReadDTDFile(out ADoc: TXMLDocument; f: TStream); overload;
- procedure ReadDTDFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String); overload;
- type
- TDOMParseOptions = class(TObject)
- private
- FValidate: Boolean;
- FPreserveWhitespace: Boolean;
- FExpandEntities: Boolean;
- FIgnoreComments: Boolean;
- FCDSectionsAsText: Boolean;
- FResolveExternals: Boolean;
- FNamespaces: Boolean;
- FDisallowDoctype: Boolean;
- FCanonical: Boolean;
- FMaxChars: Cardinal;
- function GetCanonical: Boolean;
- procedure SetCanonical(aValue: Boolean);
- public
- property Validate: Boolean read FValidate write FValidate;
- property PreserveWhitespace: Boolean read FPreserveWhitespace write FPreserveWhitespace;
- property ExpandEntities: Boolean read FExpandEntities write FExpandEntities;
- property IgnoreComments: Boolean read FIgnoreComments write FIgnoreComments;
- property CDSectionsAsText: Boolean read FCDSectionsAsText write FCDSectionsAsText;
- property ResolveExternals: Boolean read FResolveExternals write FResolveExternals;
- property Namespaces: Boolean read FNamespaces write FNamespaces;
- property DisallowDoctype: Boolean read FDisallowDoctype write FDisallowDoctype;
- property MaxChars: Cardinal read FMaxChars write FMaxChars;
- property CanonicalForm: Boolean read GetCanonical write SetCanonical;
- end;
- // NOTE: DOM 3 LS ACTION_TYPE enumeration starts at 1
- TXMLContextAction = (
- xaAppendAsChildren = 1,
- xaReplaceChildren,
- xaInsertBefore,
- xaInsertAfter,
- xaReplace);
- TXMLErrorEvent = procedure(Error: EXMLReadError) of object;
- TXMLInputSource = class(TObject)
- private
- FStream: TStream;
- FStringData: string;
- FBaseURI: WideString;
- FSystemID: WideString;
- FPublicID: WideString;
- // FEncoding: string;
- public
- constructor Create(AStream: TStream); overload;
- constructor Create(const AStringData: string); overload;
- property Stream: TStream read FStream;
- property StringData: string read FStringData;
- property BaseURI: WideString read FBaseURI write FBaseURI;
- property SystemID: WideString read FSystemID write FSystemID;
- property PublicID: WideString read FPublicID write FPublicID;
- // property Encoding: string read FEncoding write FEncoding;
- end;
- TDOMParser = class(TObject)
- private
- FOptions: TDOMParseOptions;
- FOnError: TXMLErrorEvent;
- public
- constructor Create;
- destructor Destroy; override;
- procedure Parse(Src: TXMLInputSource; out ADoc: TXMLDocument);
- procedure ParseUri(const URI: WideString; out ADoc: TXMLDocument);
- function ParseWithContext(Src: TXMLInputSource; Context: TDOMNode;
- Action: TXMLContextAction): TDOMNode;
- property Options: TDOMParseOptions read FOptions;
- property OnError: TXMLErrorEvent read FOnError write FOnError;
- end;
- TDecoder = record
- Context: Pointer;
- Decode: function(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
- Cleanup: procedure(Context: Pointer); stdcall;
- end;
- TGetDecoderProc = function(const AEncoding: string; out Decoder: TDecoder): Boolean; stdcall;
- procedure RegisterDecoder(Proc: TGetDecoderProc);
- // =======================================================
- implementation
- uses
- UriParser, xmlutils, dtdmodel;
- const
- PubidChars: TSetOfChar = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
- '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
- '#', '@', '$', '_', '%'];
- type
- TDOMDocumentTypeEx = class(TDOMDocumentType);
- TDOMTopNodeEx = class(TDOMNode_TopLevel);
- TXMLSourceKind = (skNone, skInternalSubset, skManualPop);
- TLocation = xmlutils.TLocation;
- TDOMEntityEx = class(TDOMEntity);
- TXMLReader = class;
- TXMLCharSource = class(TObject)
- private
- FBuf: PWideChar;
- FBufEnd: PWideChar;
- FReader: TXMLReader;
- FParent: TXMLCharSource;
- FEntity: TEntityDecl;
- FLineNo: Integer;
- LFPos: PWideChar;
- FXML11Rules: Boolean;
- FSystemID: WideString;
- FCharCount: Cardinal;
- FStartNesting: Integer;
- FXMLVersion: TXMLVersion;
- FXMLEncoding: WideString;
- function GetSystemID: WideString;
- protected
- function Reload: Boolean; virtual;
- public
- Kind: TXMLSourceKind;
- constructor Create(const AData: WideString);
- procedure NextChar;
- procedure NewLine; virtual;
- function SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
- wsflag: PBoolean = nil): WideChar; virtual;
- procedure Initialize; virtual;
- function SetEncoding(const AEncoding: string): Boolean; virtual;
- function Matches(const arg: WideString): Boolean;
- property SystemID: WideString read GetSystemID write FSystemID;
- end;
- TXMLDecodingSource = class(TXMLCharSource)
- private
- FCharBuf: PChar;
- FCharBufEnd: PChar;
- FBufStart: PWideChar;
- FDecoder: TDecoder;
- FHasBOM: Boolean;
- FFixedUCS2: string;
- FBufSize: Integer;
- procedure DecodingError(const Msg: string);
- protected
- function Reload: Boolean; override;
- procedure FetchData; virtual;
- public
- procedure AfterConstruction; override;
- destructor Destroy; override;
- function SetEncoding(const AEncoding: string): Boolean; override;
- procedure NewLine; override;
- function SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
- wsflag: PBoolean = nil): WideChar; override;
- procedure Initialize; override;
- end;
- TXMLStreamInputSource = class(TXMLDecodingSource)
- private
- FAllocated: PChar;
- FStream: TStream;
- FCapacity: Integer;
- FOwnStream: Boolean;
- FEof: Boolean;
- public
- constructor Create(AStream: TStream; AOwnStream: Boolean);
- destructor Destroy; override;
- procedure FetchData; override;
- end;
- TXMLFileInputSource = class(TXMLDecodingSource)
- private
- FFile: ^Text;
- FString: string;
- FTmp: string;
- public
- constructor Create(var AFile: Text);
- procedure FetchData; override;
- end;
- PForwardRef = ^TForwardRef;
- TForwardRef = record
- Value: WideString;
- Loc: TLocation;
- end;
- TElementValidator = object
- FElementDef: TElementDecl;
- FCurCP: TContentParticle;
- FFailed: Boolean;
- FSaViolation: Boolean;
- FContentType: TElementContentType; // =ctAny when FElementDef is nil
- function IsElementAllowed(Def: TElementDecl): Boolean;
- function Incomplete: Boolean;
- end;
- TNodeDataDynArray = array of TNodeData;
- TValidatorDynArray = array of TElementValidator;
- TXMLReadState = (rsProlog, rsDTD, rsAfterDTD, rsRoot, rsEpilog);
- TCheckNameFlags = set of (cnOptional, cnToken);
- TXMLToken = (xtNone, xtEOF, xtText, xtWhitespace, xtElement, xtEndElement,
- xtCDSect, xtComment, xtPI, xtDoctype, xtEntity, xtEntityEnd, xtPopElement,
- xtPopEmptyElement, xtPushElement, xtPushEntity, xtPopEntity);
- TLiteralType = (ltPlain, ltPubid, ltEntity);
- TXMLReader = class
- private
- FSource: TXMLCharSource;
- FNameTable: THashTable;
- FCtrl: TDOMParser;
- FXML11: Boolean;
- FState: TXMLReadState;
- FHavePERefs: Boolean;
- FInsideDecl: Boolean;
- FValue: TWideCharBuf;
- FEntityValue: TWideCharBuf;
- FName: TWideCharBuf;
- FTokenStart: TLocation;
- FStandalone: Boolean;
- FNamePages: PByteArray;
- FDocType: TDTDModel;
- FPEMap: THashTable;
- FForwardRefs: TFPList;
- FDTDStartPos: PWideChar;
- FIntSubset: TWideCharBuf;
- FAttrTag: Cardinal;
- FDTDProcessed: Boolean;
- FFragmentMode: Boolean;
- FNext: TXMLToken;
- FCurrEntity: TEntityDecl;
- FIDMap: THashTable;
- FNSHelper: TNSSupport;
- FNsAttHash: TDblHashArray;
- FStdPrefix_xml: PHashItem;
- FStdPrefix_xmlns: PHashItem;
- FStdUri_xml: PHashItem;
- FStdUri_xmlns: PHashItem;
- FColonPos: Integer;
- FValidate: Boolean; // parsing options, copy of FCtrl.Options
- FPreserveWhitespace: Boolean;
- FExpandEntities: Boolean;
- FIgnoreComments: Boolean;
- FCDSectionsAsText: Boolean;
- FResolveExternals: Boolean;
- FNamespaces: Boolean;
- FDisallowDoctype: Boolean;
- FCanonical: Boolean;
- FMaxChars: Cardinal;
- procedure SkipQuote(out Delim: WideChar; required: Boolean = True);
- procedure Initialize(ASource: TXMLCharSource);
- procedure NSPrepare;
- procedure EntityToSource(AEntity: TEntityDecl; out Src: TXMLCharSource);
- function ContextPush(AEntity: TEntityDecl): Boolean;
- function ContextPop(Forced: Boolean = False): Boolean;
- procedure XML11_BuildTables;
- function ParseQuantity: TCPQuant;
- procedure StoreLocation(out Loc: TLocation);
- function ValidateAttrSyntax(AttrDef: TAttributeDef; const aValue: WideString): Boolean;
- procedure ValidateAttrValue(AttrDef: TAttributeDef; attrData: PNodeData);
- procedure AddForwardRef(Buf: PWideChar; Length: Integer);
- procedure ClearForwardRefs;
- procedure ValidateIdRefs;
- procedure StandaloneError(LineOffs: Integer = 0);
- procedure CallErrorHandler(E: EXMLReadError);
- function FindOrCreateElDef: TElementDecl;
- function SkipUntilSeq(const Delim: TSetOfChar; c1: WideChar; c2: WideChar = #0): Boolean;
- procedure CheckMaxChars(ToAdd: Cardinal);
- function AllocNodeData(AIndex: Integer): PNodeData;
- function AllocAttributeData: PNodeData;
- function AllocAttributeValueChunk(APrev: PNodeData): PNodeData;
- procedure CleanupAttribute(aNode: PNodeData);
- procedure CleanupAttributes;
- procedure SetNodeInfoWithValue(typ: TXMLNodeType; AName: PHashItem = nil);
- function AddId(aNodeData: PNodeData): Boolean;
- protected
- FNesting: Integer;
- FCurrNode: PNodeData;
- FAttrCount: Integer;
- FPrefixedAttrs: Integer;
- FSpecifiedAttrs: Integer;
- FNodeStack: TNodeDataDynArray;
- FValidatorNesting: Integer;
- FValidators: TValidatorDynArray;
- FAttrChunks: TFPList;
- FFreeAttrChunk: PNodeData;
- FAttrCleanupFlag: Boolean;
- procedure DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer=0);
- procedure DoErrorPos(Severity: TErrorSeverity; const descr: string;
- const ErrPos: TLocation); overload;
- procedure DoErrorPos(Severity: TErrorSeverity; const descr: string;
- const args: array of const; const ErrPos: TLocation); overload;
- procedure FatalError(const descr: String; LineOffs: Integer=0); overload;
- procedure FatalError(const descr: string; const args: array of const; LineOffs: Integer=0); overload;
- procedure FatalError(Expected: WideChar); overload;
- function SkipWhitespace(PercentAloneIsOk: Boolean = False): Boolean;
- function SkipS(required: Boolean = False): Boolean;
- procedure ExpectWhitespace;
- procedure ExpectString(const s: String);
- procedure ExpectChar(wc: WideChar);
- function CheckForChar(c: WideChar): Boolean;
- procedure RaiseNameNotFound;
- function CheckName(aFlags: TCheckNameFlags = []): Boolean;
- procedure CheckNCName;
- function ExpectName: WideString; // [5]
- function ParseLiteral(var ToFill: TWideCharBuf; aType: TLiteralType;
- Required: Boolean): Boolean;
- procedure ExpectAttValue(attrData: PNodeData; NonCDATA: Boolean); // [10]
- procedure ParseComment(discard: Boolean); // [15]
- procedure ParsePI; // [16]
- function CreatePINode: TDOMNode;
- procedure ParseXmlOrTextDecl(TextDecl: Boolean);
- procedure ExpectEq;
- procedure ParseDoctypeDecl; // [28]
- procedure ParseMarkupDecl; // [29]
- procedure ParseStartTag; // [39]
- procedure ParseEndTag; // [42]
- function DoStartElement: TDOMElement;
- procedure HandleEntityStart;
- procedure HandleEntityEnd;
- procedure ResolveEntity;
- procedure DoStartEntity;
- procedure ParseAttribute(ElDef: TElementDecl);
- procedure ParseContent(parent: TDOMNode_WithChildren); // [43]
- function Read: Boolean;
- function ResolvePredefined: Boolean;
- function EntityCheck(NoExternals: Boolean = False): TEntityDecl;
- procedure LoadEntity(AEntity: TEntityDecl);
- function PrefetchEntity(AEntity: TEntityDecl): Boolean;
- procedure StartPE;
- function ParseRef(var ToFill: TWideCharBuf): Boolean; // [67]
- function ParseExternalID(out SysID, PubID: WideString; // [75]
- SysIdOptional: Boolean): Boolean;
- procedure BadPENesting(S: TErrorSeverity = esError);
- procedure ParseEntityDecl;
- procedure ParseAttlistDecl;
- procedure ExpectChoiceOrSeq(CP: TContentParticle; MustEndIn: TObject);
- procedure ParseElementDecl;
- procedure ParseNotationDecl;
- function ResolveResource(const ASystemID, APublicID, ABaseURI: WideString; out Source: TXMLCharSource): Boolean;
- procedure ProcessDefaultAttributes(ElDef: TElementDecl);
- procedure ProcessNamespaceAtts;
- function AddBinding(attrData: PNodeData): Boolean;
- procedure PushVC(aElDef: TElementDecl);
- procedure PopVC;
- procedure ValidateDTD;
- procedure ValidateCurrentNode;
- procedure ValidationError(const Msg: string; const args: array of const; LineOffs: Integer = -1);
- procedure ValidationErrorWithName(const Msg: string; LineOffs: Integer = -1);
- procedure DTDReloadHook;
- procedure ConvertSource(SrcIn: TXMLInputSource; out SrcOut: TXMLCharSource);
- function DoCDSect(ch: PWideChar; Count: Integer): TDOMNode;
- procedure DoNotationDecl(const aName, aPubID, aSysID: WideString);
- public
- doc: TDOMDocument;
- constructor Create; overload;
- constructor Create(AParser: TDOMParser); overload;
- destructor Destroy; override;
- procedure ProcessXML(ASource: TXMLCharSource); // [1]
- procedure ProcessFragment(ASource: TXMLCharSource; AOwner: TDOMNode);
- procedure ProcessDTD(ASource: TXMLCharSource); // ([29])
- end;
- const
- NullLocation: TLocation = (Line: 0; LinePos: 0);
- { Decoders }
- var
- Decoders: array of TGetDecoderProc;
- procedure RegisterDecoder(Proc: TGetDecoderProc);
- var
- L: Integer;
- begin
- L := Length(Decoders);
- SetLength(Decoders, L+1);
- Decoders[L] := Proc;
- end;
- function FindDecoder(const AEncoding: string; out Decoder: TDecoder): Boolean;
- var
- I: Integer;
- begin
- Result := False;
- for I := 0 to High(Decoders) do
- if Decoders[I](AEncoding, Decoder) then
- begin
- Result := True;
- Exit;
- end;
- end;
- function Is_8859_1(const AEncoding: string): Boolean;
- begin
- Result := SameText(AEncoding, 'ISO-8859-1') or
- SameText(AEncoding, 'ISO_8859-1') or
- SameText(AEncoding, 'latin1') or
- SameText(AEncoding, 'iso-ir-100') or
- SameText(AEncoding, 'l1') or
- SameText(AEncoding, 'IBM819') or
- SameText(AEncoding, 'CP819') or
- SameText(AEncoding, 'csISOLatin1') or
- // This one is not in character-sets.txt, but was used in FPC documentation,
- // and still being used in fcl-registry package
- SameText(AEncoding, 'ISO8859-1');
- end;
- { TDOMParseOptions }
- function TDOMParseOptions.GetCanonical: Boolean;
- begin
- Result := FCanonical and FExpandEntities and FCDSectionsAsText and
- { (not normalizeCharacters) and } FNamespaces and
- { namespaceDeclarations and } FPreserveWhitespace;
- end;
- procedure TDOMParseOptions.SetCanonical(aValue: Boolean);
- begin
- FCanonical := aValue;
- if aValue then
- begin
- FExpandEntities := True;
- FCDSectionsAsText := True;
- FNamespaces := True;
- FPreserveWhitespace := True;
- { normalizeCharacters := False; }
- { namespaceDeclarations := True; }
- { wellFormed := True; }
- end;
- end;
- { TXMLInputSource }
- constructor TXMLInputSource.Create(AStream: TStream);
- begin
- inherited Create;
- FStream := AStream;
- end;
- constructor TXMLInputSource.Create(const AStringData: string);
- begin
- inherited Create;
- FStringData := AStringData;
- end;
- { TDOMParser }
- constructor TDOMParser.Create;
- begin
- FOptions := TDOMParseOptions.Create;
- end;
- destructor TDOMParser.Destroy;
- begin
- FOptions.Free;
- inherited Destroy;
- end;
- procedure TDOMParser.Parse(Src: TXMLInputSource; out ADoc: TXMLDocument);
- var
- InputSrc: TXMLCharSource;
- begin
- with TXMLReader.Create(Self) do
- try
- ConvertSource(Src, InputSrc); // handles 'no-input-specified' case
- ProcessXML(InputSrc)
- finally
- ADoc := TXMLDocument(doc);
- Free;
- end;
- end;
- procedure TDOMParser.ParseUri(const URI: WideString; out ADoc: TXMLDocument);
- var
- Src: TXMLCharSource;
- begin
- ADoc := nil;
- with TXMLReader.Create(Self) do
- try
- if ResolveResource(URI, '', '', Src) then
- ProcessXML(Src)
- else
- DoErrorPos(esFatal, 'The specified URI could not be resolved', NullLocation);
- finally
- ADoc := TXMLDocument(doc);
- Free;
- end;
- end;
- function TDOMParser.ParseWithContext(Src: TXMLInputSource;
- Context: TDOMNode; Action: TXMLContextAction): TDOMNode;
- var
- InputSrc: TXMLCharSource;
- Frag: TDOMDocumentFragment;
- node: TDOMNode;
- begin
- if Action in [xaInsertBefore, xaInsertAfter, xaReplace] then
- node := Context.ParentNode
- else
- node := Context;
- // TODO: replacing document isn't yet supported
- if (Action = xaReplaceChildren) and (node.NodeType = DOCUMENT_NODE) then
- raise EDOMNotSupported.Create('DOMParser.ParseWithContext');
- if not (node.NodeType in [ELEMENT_NODE, DOCUMENT_FRAGMENT_NODE]) then
- raise EDOMHierarchyRequest.Create('DOMParser.ParseWithContext');
- with TXMLReader.Create(Self) do
- try
- ConvertSource(Src, InputSrc); // handles 'no-input-specified' case
- Frag := Context.OwnerDocument.CreateDocumentFragment;
- try
- ProcessFragment(InputSrc, Frag);
- Result := Frag.FirstChild;
- case Action of
- xaAppendAsChildren: Context.AppendChild(Frag);
- xaReplaceChildren: begin
- Context.TextContent := ''; // removes children
- Context.ReplaceChild(Frag, Context.FirstChild);
- end;
- xaInsertBefore: node.InsertBefore(Frag, Context);
- xaInsertAfter: node.InsertBefore(Frag, Context.NextSibling);
- xaReplace: node.ReplaceChild(Frag, Context);
- end;
- finally
- Frag.Free;
- end;
- finally
- Free;
- end;
- end;
- { TXMLCharSource }
- constructor TXMLCharSource.Create(const AData: WideString);
- begin
- inherited Create;
- FLineNo := 1;
- FBuf := PWideChar(AData);
- FBufEnd := FBuf + Length(AData);
- LFPos := FBuf-1;
- FCharCount := Length(AData);
- end;
- procedure TXMLCharSource.Initialize;
- begin
- end;
- function TXMLCharSource.SetEncoding(const AEncoding: string): Boolean;
- begin
- Result := True; // always succeed
- end;
- function TXMLCharSource.GetSystemID: WideString;
- begin
- if FSystemID <> '' then
- Result := FSystemID
- else if Assigned(FParent) then
- Result := FParent.SystemID
- else
- Result := '';
- end;
- function TXMLCharSource.Reload: Boolean;
- begin
- Result := False;
- end;
- procedure TXMLCharSource.NewLine;
- begin
- Inc(FLineNo);
- LFPos := FBuf;
- end;
- function TXMLCharSource.SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
- wsflag: PBoolean): WideChar;
- var
- old: PWideChar;
- nonws: Boolean;
- begin
- old := FBuf;
- nonws := False;
- repeat
- if FBuf^ = #10 then
- NewLine;
- if (FBuf^ < #255) and (Char(ord(FBuf^)) in Delim) then
- Break;
- if (FBuf^ > #32) or not (Char(ord(FBuf^)) in [#32, #9, #10, #13]) then
- nonws := True;
- Inc(FBuf);
- until False;
- Result := FBuf^;
- BufAppendChunk(ToFill, old, FBuf);
- if Assigned(wsflag) then
- wsflag^ := wsflag^ or nonws;
- end;
- function TXMLCharSource.Matches(const arg: WideString): Boolean;
- begin
- Result := False;
- if (FBufEnd >= FBuf + Length(arg)) or Reload then
- Result := CompareMem(Pointer(arg), FBuf, Length(arg)*sizeof(WideChar));
- if Result then
- begin
- Inc(FBuf, Length(arg));
- if FBuf >= FBufEnd then
- Reload;
- end;
- end;
- { TXMLDecodingSource }
- procedure TXMLDecodingSource.AfterConstruction;
- begin
- inherited AfterConstruction;
- FBufStart := AllocMem(4096);
- FBuf := FBufStart;
- FBufEnd := FBuf;
- LFPos := FBuf-1;
- end;
- destructor TXMLDecodingSource.Destroy;
- begin
- FreeMem(FBufStart);
- if Assigned(FDecoder.Cleanup) then
- FDecoder.Cleanup(FDecoder.Context);
- inherited Destroy;
- end;
- procedure TXMLDecodingSource.FetchData;
- begin
- end;
- procedure TXMLDecodingSource.DecodingError(const Msg: string);
- begin
- // count line endings to obtain correct error location
- while FBuf < FBufEnd do
- begin
- if (FBuf^ = #10) or (FBuf^ = #13) or (FXML11Rules and ((FBuf^ = #$85) or (FBuf^ = #$2028))) then
- begin
- if (FBuf^ = #13) and (FBuf < FBufEnd-1) and
- ((FBuf[1] = #10) or (FXML11Rules and (FBuf[1] = #$85))) then
- Inc(FBuf);
- LFPos := FBuf;
- Inc(FLineNo);
- end;
- Inc(FBuf);
- end;
- FReader.FatalError(Msg);
- end;
- function TXMLDecodingSource.Reload: Boolean;
- var
- Remainder: PtrInt;
- r, inLeft: Cardinal;
- rslt: Integer;
- begin
- if Kind = skInternalSubset then
- FReader.DTDReloadHook;
- Remainder := FBufEnd - FBuf;
- if Remainder > 0 then
- Move(FBuf^, FBufStart^, Remainder * sizeof(WideChar));
- Dec(LFPos, FBuf-FBufStart);
- FBuf := FBufStart;
- FBufEnd := FBufStart + Remainder;
- repeat
- inLeft := FCharBufEnd - FCharBuf;
- if inLeft < 4 then // may contain an incomplete char
- begin
- FetchData;
- inLeft := FCharBufEnd - FCharBuf;
- if inLeft <= 0 then
- Break;
- end;
- r := FBufStart + FBufSize - FBufEnd;
- if r = 0 then
- Break;
- rslt := FDecoder.Decode(FDecoder.Context, FCharBuf, inLeft, FBufEnd, r);
- { Sanity checks: r and inLeft must not increase. }
- if inLeft + FCharBuf <= FCharBufEnd then
- FCharBuf := FCharBufEnd - inLeft
- else
- DecodingError('Decoder error: input byte count out of bounds');
- if r + FBufEnd <= FBufStart + FBufSize then
- FBufEnd := FBufStart + FBufSize - r
- else
- DecodingError('Decoder error: output char count out of bounds');
- if rslt = 0 then
- Break
- else if rslt < 0 then
- DecodingError('Invalid character in input stream')
- else
- FReader.CheckMaxChars(rslt);
- until False;
- FBufEnd^ := #0;
- Result := FBuf < FBufEnd;
- end;
- const
- XmlSign: array [0..4] of WideChar = ('<', '?', 'x', 'm', 'l');
- procedure TXMLDecodingSource.Initialize;
- begin
- inherited;
- FLineNo := 1;
- FDecoder.Decode := @Decode_UTF8;
- FFixedUCS2 := '';
- if FCharBufEnd-FCharBuf > 1 then
- begin
- if (FCharBuf[0] = #$FE) and (FCharBuf[1] = #$FF) then
- begin
- FFixedUCS2 := 'UTF-16BE';
- FDecoder.Decode := {$IFNDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
- end
- else if (FCharBuf[0] = #$FF) and (FCharBuf[1] = #$FE) then
- begin
- FFixedUCS2 := 'UTF-16LE';
- FDecoder.Decode := {$IFDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
- end;
- end;
- FBufSize := 6; // possible BOM and '<?xml'
- Reload;
- if FBuf^ = #$FEFF then
- begin
- FHasBOM := True;
- Inc(FBuf);
- end;
- LFPos := FBuf-1;
- if CompareMem(FBuf, @XmlSign[0], sizeof(XmlSign)) then
- begin
- FBufSize := 3; // don't decode past XML declaration
- Inc(FBuf, Length(XmlSign));
- FReader.ParseXmlOrTextDecl((FParent <> nil) or (FReader.FState <> rsProlog));
- end;
- FBufSize := 2047;
- if FReader.FXML11 then
- FReader.XML11_BuildTables;
- end;
- function TXMLDecodingSource.SetEncoding(const AEncoding: string): Boolean;
- var
- NewDecoder: TDecoder;
- begin
- Result := True;
- if (FFixedUCS2 = '') and SameText(AEncoding, 'UTF-8') then
- Exit;
- if FFixedUCS2 <> '' then
- begin
- Result := SameText(AEncoding, FFixedUCS2) or
- SameText(AEncoding, 'UTF-16') or
- SameText(AEncoding, 'unicode');
- Exit;
- end;
- // TODO: must fail when a byte-based stream is labeled as word-based.
- // see rmt-e2e-61, it now fails but for a completely different reason.
- FillChar(NewDecoder, sizeof(TDecoder), 0);
- if Is_8859_1(AEncoding) then
- FDecoder.Decode := @Decode_8859_1
- else if FindDecoder(AEncoding, NewDecoder) then
- FDecoder := NewDecoder
- else
- Result := False;
- end;
- procedure TXMLDecodingSource.NewLine;
- begin
- case FBuf^ of
- #10: ;
- #13: begin
- // Reload trashes the buffer, it should be consumed beforehand
- if (FBufEnd >= FBuf+2) or Reload then
- begin
- if (FBuf[1] = #10) or (FXML11Rules and (FBuf[1] = #$85)) then
- Inc(FBuf);
- end;
- FBuf^ := #10;
- end;
- #$85, #$2028: if FXML11Rules then
- FBuf^ := #10
- else
- Exit;
- else
- Exit;
- end;
- Inc(FLineNo);
- LFPos := FBuf;
- end;
- { TXMLStreamInputSource }
- const
- Slack = 16;
- constructor TXMLStreamInputSource.Create(AStream: TStream; AOwnStream: Boolean);
- begin
- FStream := AStream;
- FCapacity := 4096;
- GetMem(FAllocated, FCapacity+Slack);
- FCharBuf := FAllocated+(Slack-4);
- FCharBufEnd := FCharBuf;
- FOwnStream := AOwnStream;
- FetchData;
- end;
- destructor TXMLStreamInputSource.Destroy;
- begin
- FreeMem(FAllocated);
- if FOwnStream then
- FStream.Free;
- inherited Destroy;
- end;
- procedure TXMLStreamInputSource.FetchData;
- var
- Remainder, BytesRead: Integer;
- OldBuf: PChar;
- begin
- Assert(FCharBufEnd - FCharBuf < Slack-4);
- if FEof then
- Exit;
- OldBuf := FCharBuf;
- Remainder := FCharBufEnd - FCharBuf;
- if Remainder < 0 then
- Remainder := 0;
- FCharBuf := FAllocated+Slack-4-Remainder;
- if Remainder > 0 then
- Move(OldBuf^, FCharBuf^, Remainder);
- BytesRead := FStream.Read(FAllocated[Slack-4], FCapacity);
- if BytesRead < FCapacity then
- FEof := True;
- FCharBufEnd := FAllocated + (Slack-4) + BytesRead;
- { Null-termination has been removed:
- 1) Built-in decoders don't need it because they respect the buffer length.
- 2) It was causing unaligned access errors on ARM CPUs.
- }
- //PWideChar(FCharBufEnd)^ := #0;
- end;
- { TXMLFileInputSource }
- constructor TXMLFileInputSource.Create(var AFile: Text);
- begin
- FFile := @AFile;
- SystemID := FilenameToURI(TTextRec(AFile).Name);
- FetchData;
- end;
- procedure TXMLFileInputSource.FetchData;
- var
- Remainder: Integer;
- begin
- if not Eof(FFile^) then
- begin
- Remainder := FCharBufEnd - FCharBuf;
- if Remainder > 0 then
- SetString(FTmp, FCharBuf, Remainder);
- ReadLn(FFile^, FString);
- FString := FString + #10; // bad solution...
- if Remainder > 0 then
- Insert(FTmp, FString, 1);
- FCharBuf := PChar(FString);
- FCharBufEnd := FCharBuf + Length(FString);
- end;
- end;
- { helper that closes handle upon destruction }
- type
- THandleOwnerStream = class(THandleStream)
- public
- destructor Destroy; override;
- end;
- destructor THandleOwnerStream.Destroy;
- begin
- if Handle >= 0 then FileClose(Handle);
- inherited Destroy;
- end;
- { TXMLReader }
- procedure TXMLReader.ConvertSource(SrcIn: TXMLInputSource; out SrcOut: TXMLCharSource);
- begin
- SrcOut := nil;
- if Assigned(SrcIn) then
- begin
- if Assigned(SrcIn.FStream) then
- SrcOut := TXMLStreamInputSource.Create(SrcIn.FStream, False)
- else if SrcIn.FStringData <> '' then
- SrcOut := TXMLStreamInputSource.Create(TStringStream.Create(SrcIn.FStringData), True)
- else if (SrcIn.SystemID <> '') then
- ResolveResource(SrcIn.SystemID, SrcIn.PublicID, SrcIn.BaseURI, SrcOut);
- end;
- if (SrcOut = nil) and (FSource = nil) then
- DoErrorPos(esFatal, 'No input source specified', NullLocation);
- end;
- procedure TXMLReader.StoreLocation(out Loc: TLocation);
- begin
- Loc.Line := FSource.FLineNo;
- Loc.LinePos := FSource.FBuf-FSource.LFPos;
- end;
- function TXMLReader.ResolveResource(const ASystemID, APublicID, ABaseURI: WideString; out Source: TXMLCharSource): Boolean;
- var
- AbsSysID: WideString;
- Filename: string;
- Stream: TStream;
- fd: THandle;
- begin
- Source := nil;
- Result := False;
- if not ResolveRelativeURI(ABaseURI, ASystemID, AbsSysID) then
- Exit;
- { TODO: alternative resolvers
- These may be 'internal' resolvers or a handler set by application.
- Internal resolvers should probably produce a TStream
- ( so that internal classes need not be exported ).
- External resolver will produce TXMLInputSource that should be converted.
- External resolver must NOT be called for root entity.
- External resolver can return nil, in which case we do the default }
- if URIToFilename(AbsSysID, Filename) then
- begin
- fd := FileOpen(Filename, fmOpenRead + fmShareDenyWrite);
- if fd <> THandle(-1) then
- begin
- Stream := THandleOwnerStream.Create(fd);
- Source := TXMLStreamInputSource.Create(Stream, True);
- Source.SystemID := AbsSysID; // <- Revisit: Really need absolute sysID?
- end;
- end;
- Result := Assigned(Source);
- end;
- procedure TXMLReader.Initialize(ASource: TXMLCharSource);
- begin
- ASource.FParent := FSource;
- FSource := ASource;
- FSource.FReader := Self;
- FSource.FStartNesting := FNesting;
- FSource.Initialize;
- end;
- procedure TXMLReader.FatalError(Expected: WideChar);
- begin
- // FIX: don't output what is found - anything may be found, including exploits...
- FatalError('Expected "%1s"', [string(Expected)]);
- end;
- procedure TXMLReader.FatalError(const descr: String; LineOffs: Integer);
- begin
- DoError(esFatal, descr, LineOffs);
- end;
- procedure TXMLReader.FatalError(const descr: string; const args: array of const; LineOffs: Integer);
- begin
- DoError(esFatal, Format(descr, args), LineOffs);
- end;
- procedure TXMLReader.ValidationError(const Msg: string; const Args: array of const; LineOffs: Integer);
- begin
- if FValidate then
- DoError(esError, Format(Msg, Args), LineOffs);
- end;
- procedure TXMLReader.ValidationErrorWithName(const Msg: string; LineOffs: Integer);
- var
- ws: WideString;
- begin
- SetString(ws, FName.Buffer, FName.Length);
- ValidationError(Msg, [ws], LineOffs);
- end;
- procedure TXMLReader.DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer);
- var
- Loc: TLocation;
- begin
- StoreLocation(Loc);
- if LineOffs >= 0 then
- begin
- Dec(Loc.LinePos, LineOffs);
- DoErrorPos(Severity, descr, Loc);
- end
- else
- DoErrorPos(Severity, descr, FTokenStart);
- end;
- procedure TXMLReader.DoErrorPos(Severity: TErrorSeverity; const descr: string;
- const args: array of const; const ErrPos: TLocation);
- begin
- DoErrorPos(Severity, Format(descr, args), ErrPos);
- end;
- procedure TXMLReader.DoErrorPos(Severity: TErrorSeverity; const descr: string; const ErrPos: TLocation);
- var
- E: EXMLReadError;
- sysid: WideString;
- begin
- if Assigned(FSource) then
- begin
- sysid := FSource.FSystemID;
- if (sysid = '') and Assigned(FSource.FEntity) then
- sysid := FSource.FEntity.FURI;
- E := EXMLReadError.CreateFmt('In ''%s'' (line %d pos %d): %s', [sysid, ErrPos.Line, ErrPos.LinePos, descr]);
- end
- else
- E := EXMLReadError.Create(descr);
- E.FSeverity := Severity;
- E.FErrorMessage := descr;
- E.FLine := ErrPos.Line;
- E.FLinePos := ErrPos.LinePos;
- CallErrorHandler(E);
- // No 'finally'! If user handler raises exception, control should not get here
- // and the exception will be freed in CallErrorHandler (below)
- E.Free;
- end;
- procedure TXMLReader.CheckMaxChars(ToAdd: Cardinal);
- var
- src: TXMLCharSource;
- total: Cardinal;
- begin
- Inc(FSource.FCharCount, ToAdd);
- if FMaxChars = 0 then
- Exit;
- src := FSource;
- total := 0;
- repeat
- Inc(total, src.FCharCount);
- if total > FMaxChars then
- FatalError('Exceeded character count limit');
- src := src.FParent;
- until src = nil;
- end;
- procedure TXMLReader.CallErrorHandler(E: EXMLReadError);
- begin
- try
- if Assigned(FCtrl) and Assigned(FCtrl.FOnError) then
- FCtrl.FOnError(E);
- if E.Severity = esFatal then
- raise E;
- except
- if ExceptObject <> E then
- E.Free;
- raise;
- end;
- end;
- function TXMLReader.SkipWhitespace(PercentAloneIsOk: Boolean): Boolean;
- begin
- Result := False;
- repeat
- Result := SkipS or Result;
- if FSource.FBuf^ = #0 then
- begin
- Result := True; // report whitespace upon exiting the PE
- if not ContextPop then
- Break;
- end
- else if FSource.FBuf^ = '%' then
- begin
- if (FState <> rsDTD) or ((FSource.Kind = skInternalSubset) and FInsideDecl) then
- Break;
- // This is the only case where look-ahead is needed
- if FSource.FBuf > FSource.FBufEnd-2 then
- FSource.Reload;
- if (not PercentAloneIsOk) or (Byte(FSource.FBuf[1]) in NamingBitmap[FNamePages^[$100+hi(Word(FSource.FBuf[1]))]]) or
- (FXML11 and (FSource.FBuf[1] >= #$D800) and (FSource.FBuf[1] <= #$DB7F)) then
- begin
- Inc(FSource.FBuf); // skip '%'
- CheckName;
- ExpectChar(';');
- StartPE;
- Result := True; // report whitespace upon entering the PE
- end
- else Break;
- end
- else
- Break;
- until False;
- end;
- procedure TXMLReader.ExpectWhitespace;
- begin
- if not SkipWhitespace then
- FatalError('Expected whitespace');
- end;
- function TXMLReader.SkipS(Required: Boolean): Boolean;
- var
- p: PWideChar;
- begin
- Result := False;
- repeat
- p := FSource.FBuf;
- repeat
- if (p^ = #10) or (p^ = #13) or (FXML11 and ((p^ = #$85) or (p^ = #$2028))) then
- begin
- FSource.FBuf := p;
- FSource.NewLine;
- p := FSource.FBuf;
- end
- else if (p^ <> #32) and (p^ <> #9) then
- Break;
- Inc(p);
- Result := True;
- until False;
- FSource.FBuf := p;
- until (FSource.FBuf < FSource.FBufEnd) or (not FSource.Reload);
- if (not Result) and Required then
- FatalError('Expected whitespace');
- end;
- procedure TXMLReader.ExpectString(const s: String);
- var
- I: Integer;
- begin
- for I := 1 to Length(s) do
- begin
- if FSource.FBuf^ <> WideChar(ord(s[i])) then
- FatalError('Expected "%s"', [s], i-1);
- FSource.NextChar;
- end;
- end;
- function TXMLReader.CheckForChar(c: WideChar): Boolean;
- begin
- Result := (FSource.FBuf^ = c);
- if Result then
- begin
- Inc(FSource.FBuf);
- if FSource.FBuf >= FSource.FBufEnd then
- FSource.Reload;
- end;
- end;
- procedure TXMLReader.SkipQuote(out Delim: WideChar; required: Boolean);
- begin
- Delim := #0;
- if (FSource.FBuf^ = '''') or (FSource.FBuf^ = '"') then
- begin
- Delim := FSource.FBuf^;
- FSource.NextChar; // skip quote
- StoreLocation(FTokenStart);
- end
- else if required then
- FatalError('Expected single or double quote');
- end;
- const
- PrefixDefault: array[0..4] of WideChar = ('x','m','l','n','s');
- constructor TXMLReader.Create;
- begin
- inherited Create;
- BufAllocate(FName, 128);
- BufAllocate(FValue, 512);
- FForwardRefs := TFPList.Create;
- FAttrChunks := TFPList.Create;
- // Set char rules to XML 1.0
- FNamePages := @NamePages;
- SetLength(FNodeStack, 16);
- SetLength(FValidators, 16);
- end;
- constructor TXMLReader.Create(AParser: TDOMParser);
- begin
- Create;
- FCtrl := AParser;
- if FCtrl = nil then
- Exit;
- FValidate := FCtrl.Options.Validate;
- FPreserveWhitespace := FCtrl.Options.PreserveWhitespace;
- FExpandEntities := FCtrl.Options.ExpandEntities;
- FCDSectionsAsText := FCtrl.Options.CDSectionsAsText;
- FIgnoreComments := FCtrl.Options.IgnoreComments;
- FResolveExternals := FCtrl.Options.ResolveExternals;
- FNamespaces := FCtrl.Options.Namespaces;
- FDisallowDoctype := FCtrl.Options.DisallowDoctype;
- FCanonical := FCtrl.Options.CanonicalForm;
- FMaxChars := FCtrl.Options.MaxChars;
- end;
- destructor TXMLReader.Destroy;
- var
- i: Integer;
- begin
- for i := FAttrChunks.Count-1 downto 0 do
- Dispose(PNodeData(FAttrChunks.List^[i]));
- if Assigned(FEntityValue.Buffer) then
- FreeMem(FEntityValue.Buffer);
- FreeMem(FName.Buffer);
- FreeMem(FValue.Buffer);
- if Assigned(FSource) then
- while ContextPop(True) do; // clean input stack
- FSource.Free;
- FPEMap.Free;
- ClearForwardRefs;
- FNsAttHash.Free;
- FNSHelper.Free;
- FDocType.Release;
- FIDMap.Free;
- FForwardRefs.Free;
- FAttrChunks.Free;
- inherited Destroy;
- end;
- procedure TXMLReader.XML11_BuildTables;
- begin
- FNamePages := Xml11NamePages;
- FXML11 := True;
- FSource.FXml11Rules := True;
- end;
- { Must be executed after doc has been set.
- After introducing own NameTable, merge this into constructor }
- procedure TXMLReader.NSPrepare;
- begin
- if FNamespaces then
- begin
- FNSHelper := TNSSupport.Create;
- FNsAttHash := TDblHashArray.Create;
- FStdPrefix_xml := FNSHelper.GetPrefix(@PrefixDefault, 3);
- FStdPrefix_xmlns := FNSHelper.GetPrefix(@PrefixDefault, 5);
- FStdUri_xmlns := FNameTable.FindOrAdd(PWideChar(stduri_xmlns), Length(stduri_xmlns));
- FStdUri_xml := FNameTable.FindOrAdd(PWideChar(stduri_xml), Length(stduri_xml));
- end;
- end;
- procedure TXMLReader.ProcessXML(ASource: TXMLCharSource);
- begin
- doc := TXMLDocument.Create;
- doc.documentURI := ASource.SystemID; // TODO: to be changed to URI or BaseURI
- FNameTable := doc.Names;
- FState := rsProlog;
- FNesting := 0;
- FValidatorNesting := 0;
- FCurrNode := @FNodeStack[0];
- FFragmentMode := False;
- NSPrepare;
- Initialize(ASource);
- if FSource.FXMLVersion <> xmlVersionUnknown then
- TDOMTopNodeEx(TDOMNode(doc)).FXMLVersion := FSource.FXMLVersion;
- TDOMTopNodeEx(TDOMNode(doc)).FXMLEncoding := FSource.FXMLEncoding;
- FNext := xtText;
- ParseContent(doc);
- if FState < rsRoot then
- FatalError('Root element is missing');
- if FValidate then
- ValidateIdRefs;
- doc.IDs := FIDMap;
- FIDMap := nil;
- end;
- procedure TXMLReader.ProcessFragment(ASource: TXMLCharSource; AOwner: TDOMNode);
- var
- DoctypeNode: TDOMDocumentTypeEx;
- begin
- doc := AOwner.OwnerDocument;
- FNameTable := doc.Names;
- FState := rsRoot;
- FNesting := 0;
- FValidatorNesting := 0;
- FCurrNode := @FNodeStack[0];
- FFragmentMode := True;
- FXML11 := doc.XMLVersion = '1.1';
- NSPrepare;
- Initialize(ASource);
- { Get doctype from the owner's document, but only if it is not already assigned
- (It is set directly when parsing children of an Entity, see LoadEntity procedure) }
- if FDocType = nil then
- begin
- DoctypeNode := TDOMDocumentTypeEx(doc.DocType);
- if Assigned(DoctypeNode) then
- FDocType := DocTypeNode.FModel.Reference;
- end;
- if AOwner is TDOMEntity then
- begin
- TDOMTopNodeEx(AOwner).FXMLVersion := FSource.FXMLVersion;
- TDOMTopNodeEx(AOwner).FXMLEncoding := FSource.FXMLEncoding;
- end;
- FNext := xtText;
- ParseContent(aOwner as TDOMNode_WithChildren);
- end;
- function TXMLReader.CheckName(aFlags: TCheckNameFlags): Boolean;
- var
- p: PWideChar;
- NameStartFlag: Boolean;
- begin
- p := FSource.FBuf;
- FName.Length := 0;
- FColonPos := -1;
- NameStartFlag := not (cnToken in aFlags);
- repeat
- if NameStartFlag then
- begin
- if (Byte(p^) in NamingBitmap[FNamePages^[hi(Word(p^))]]) or
- ((p^ = ':') and (not FNamespaces)) then
- Inc(p)
- else if FXML11 and ((p^ >= #$D800) and (p^ <= #$DB7F) and
- (p[1] >= #$DC00) and (p[1] <= #$DFFF)) then
- Inc(p, 2)
- else
- begin
- // here we come either when first char of name is bad (it may be a colon),
- // or when a colon is not followed by a valid NameStartChar
- FSource.FBuf := p;
- Result := False;
- Break;
- end;
- NameStartFlag := False;
- end;
- if FXML11 then
- repeat
- if Byte(p^) in NamingBitmap[FNamePages^[$100+hi(Word(p^))]] then
- Inc(p)
- else if ((p^ >= #$D800) and (p^ <= #$DB7F) and
- (p[1] >= #$DC00) and (p[1] <= #$DFFF)) then
- Inc(p,2)
- else
- Break;
- until False
- else
- while Byte(p^) in NamingBitmap[FNamePages^[$100+hi(Word(p^))]] do
- Inc(p);
- if p^ = ':' then
- begin
- if (cnToken in aFlags) or not FNamespaces then // colon has no specific meaning
- begin
- Inc(p);
- if p^ <> #0 then Continue;
- end
- else if FColonPos = -1 then // this is the first colon, remember it
- begin
- FColonPos := p-FSource.FBuf+FName.Length;
- NameStartFlag := True;
- Inc(p);
- if p^ <> #0 then Continue;
- end;
- end;
- BufAppendChunk(FName, FSource.FBuf, p);
- Result := (FName.Length > 0);
- FSource.FBuf := p;
- if (p^ <> #0) or not FSource.Reload then
- Break;
- p := FSource.FBuf;
- until False;
- if not (Result or (cnOptional in aFlags)) then
- RaiseNameNotFound;
- end;
- procedure TXMLReader.CheckNCName;
- begin
- if FNamespaces and (FColonPos <> -1) then
- FatalError('Names of entities, notations and processing instructions may not contain colons', FName.Length);
- end;
- procedure TXMLReader.RaiseNameNotFound;
- begin
- if FColonPos <> -1 then
- FatalError('Bad QName syntax, local part is missing')
- else
- // Coming at no cost, this allows more user-friendly error messages
- with FSource do
- if (FBuf^ = #32) or (FBuf^ = #10) or (FBuf^ = #9) or (FBuf^ = #13) then
- FatalError('Whitespace is not allowed here')
- else
- FatalError('Name starts with invalid character');
- end;
- function TXMLReader.ExpectName: WideString;
- begin
- CheckName;
- SetString(Result, FName.Buffer, FName.Length);
- end;
- function TXMLReader.ResolvePredefined: Boolean;
- var
- wc: WideChar;
- begin
- Result := False;
- with FName do
- begin
- if (Length = 2) and (Buffer[1] = 't') then
- begin
- if Buffer[0] = 'l' then
- wc := '<'
- else if Buffer[0] = 'g' then
- wc := '>'
- else Exit;
- end
- else if Buffer[0] = 'a' then
- begin
- if (Length = 3) and (Buffer[1] = 'm') and (Buffer[2] = 'p') then
- wc := '&'
- else if (Length = 4) and (Buffer[1] = 'p') and (Buffer[2] = 'o') and
- (Buffer[3] = 's') then
- wc := ''''
- else Exit;
- end
- else if (Length = 4) and (Buffer[0] = 'q') and (Buffer[1] = 'u') and
- (Buffer[2] = 'o') and (Buffer[3] ='t') then
- wc := '"'
- else
- Exit;
- end; // with
- BufAppend(FValue, wc);
- Result := True;
- end;
- function TXMLReader.ParseRef(var ToFill: TWideCharBuf): Boolean; // [67]
- var
- Code: Integer;
- begin
- FSource.NextChar; // skip '&'
- Result := CheckForChar('#');
- if Result then
- begin
- Code := 0;
- if CheckForChar('x') then
- repeat
- case FSource.FBuf^ of
- '0'..'9': Code := Code * 16 + Ord(FSource.FBuf^) - Ord('0');
- 'a'..'f': Code := Code * 16 + Ord(FSource.FBuf^) - (Ord('a') - 10);
- 'A'..'F': Code := Code * 16 + Ord(FSource.FBuf^) - (Ord('A') - 10);
- else
- Break;
- end;
- FSource.NextChar;
- until Code > $10FFFF
- else
- repeat
- case FSource.FBuf^ of
- '0'..'9': Code := Code * 10 + Ord(FSource.FBuf^) - Ord('0');
- else
- Break;
- end;
- FSource.NextChar;
- until Code > $10FFFF;
- case Code of
- $01..$08, $0B..$0C, $0E..$1F:
- if FXML11 then
- BufAppend(ToFill, WideChar(Code))
- else
- FatalError('Invalid character reference');
- $09, $0A, $0D, $20..$D7FF, $E000..$FFFD:
- BufAppend(ToFill, WideChar(Code));
- $10000..$10FFFF:
- begin
- BufAppend(ToFill, WideChar($D7C0 + (Code shr 10)));
- BufAppend(ToFill, WideChar($DC00 xor (Code and $3FF)));
- end;
- else
- FatalError('Invalid character reference');
- end;
- end
- else CheckName;
- ExpectChar(';');
- end;
- const
- AttrDelims: TSetOfChar = [#0, '<', '&', '''', '"', #9, #10, #13];
- GT_Delim: TSetOfChar = [#0, '>'];
- { Parse attribute literal, producing plain string value in AttrData.FValueStr.
- If entity references are encountered and FExpandEntities=False, also builds
- a node chain starting from AttrData.FNext. Node chain is built only for the
- first level. If NonCDATA=True, additionally normalizes whitespace in string value. }
- procedure TXMLReader.ExpectAttValue(AttrData: PNodeData; NonCDATA: Boolean);
- var
- wc: WideChar;
- Delim: WideChar;
- ent: TEntityDecl;
- start: TObject;
- curr: PNodeData;
- StartPos: Integer;
- begin
- SkipQuote(Delim);
- curr := AttrData;
- FValue.Length := 0;
- StartPos := 0;
- start := FSource.FEntity;
- repeat
- wc := FSource.SkipUntil(FValue, AttrDelims);
- if wc = '<' then
- FatalError('Character ''<'' is not allowed in attribute value')
- else if wc = '&' then
- begin
- if ParseRef(FValue) or ResolvePredefined then
- Continue;
- ent := EntityCheck(True);
- if ((ent = nil) or (not FExpandEntities)) and (FSource.FEntity = start) then
- begin
- if FValue.Length > StartPos then
- begin
- curr := AllocAttributeValueChunk(curr);
- curr^.FNodeType := ntText;
- // without PWideChar typecast and in {$T-}, FPC treats '@' result as PAnsiChar...
- SetString(curr^.FValueStr, PWideChar(@FValue.Buffer[StartPos]), FValue.Length-StartPos);
- end;
- curr := AllocAttributeValueChunk(curr);
- curr^.FNodeType := ntEntityReference;
- // TODO: this probably should be placed to 'name'
- if ent = nil then
- SetString(curr^.FValueStr, FName.Buffer, FName.Length)
- else
- curr^.FValueStr := ent.FName;
- end;
- StartPos := FValue.Length;
- if Assigned(ent) then
- ContextPush(ent);
- end
- else if wc <> #0 then
- begin
- FSource.NextChar;
- if (wc = Delim) and (FSource.FEntity = start) then
- Break;
- if (wc = #10) or (wc = #9) or (wc = #13) then
- wc := #32;
- BufAppend(FValue, wc);
- end
- else
- begin
- if (FSource.FEntity = start) or not ContextPop then // #0
- FatalError('Literal has no closing quote', -1);
- StartPos := FValue.Length;
- end;
- until False;
- if Assigned(attrData^.FNext) then
- begin
- FAttrCleanupFlag := True;
- if FValue.Length > StartPos then
- begin
- curr := AllocAttributeValueChunk(curr);
- curr^.FNodeType := ntText;
- SetString(curr^.FValueStr, PWideChar(@FValue.Buffer[StartPos]), FValue.Length-StartPos);
- end;
- end;
- if nonCDATA then
- BufNormalize(FValue, attrData^.FDenormalized)
- else
- attrData^.FDenormalized := False;
- SetString(attrData^.FValueStr, FValue.Buffer, FValue.Length);
- end;
- const
- PrefixChar: array[Boolean] of string = ('', '%');
- procedure TXMLReader.EntityToSource(AEntity: TEntityDecl; out Src: TXMLCharSource);
- begin
- if AEntity.FOnStack then
- FatalError('Entity ''%s%s'' recursively references itself', [PrefixChar[AEntity.FIsPE], AEntity.FName]);
- if (AEntity.FSystemID <> '') and not AEntity.FPrefetched then
- begin
- if not ResolveResource(AEntity.FSystemID, AEntity.FPublicID, AEntity.FURI, Src) then
- begin
- // TODO: a detailed message like SysErrorMessage(GetLastError) would be great here
- ValidationError('Unable to resolve external entity ''%s''', [AEntity.FName]);
- Src := nil;
- Exit;
- end;
- end
- else
- begin
- Src := TXMLCharSource.Create(AEntity.FReplacementText);
- Src.FLineNo := AEntity.FStartLocation.Line;
- Src.LFPos := Src.FBuf - AEntity.FStartLocation.LinePos;
- // needed in case of prefetched external PE
- if AEntity.FSystemID <> '' then
- Src.SystemID := AEntity.FURI;
- end;
- AEntity.FOnStack := True;
- Src.FEntity := AEntity;
- end;
- function TXMLReader.ContextPush(AEntity: TEntityDecl): Boolean;
- var
- Src: TXMLCharSource;
- begin
- EntityToSource(AEntity, Src);
- Result := Assigned(Src);
- if Result then
- Initialize(Src);
- end;
- function TXMLReader.ContextPop(Forced: Boolean): Boolean;
- var
- Src: TXMLCharSource;
- Error: Boolean;
- begin
- Result := Assigned(FSource.FParent) and (Forced or (FSource.Kind = skNone));
- if Result then
- begin
- Src := FSource.FParent;
- Error := False;
- if Assigned(FSource.FEntity) then
- begin
- FSource.FEntity.FOnStack := False;
- FSource.FEntity.FCharCount := FSource.FCharCount;
- // [28a] PE that was started between MarkupDecls may not end inside MarkupDecl
- Error := FSource.FEntity.FBetweenDecls and FInsideDecl;
- end;
- FSource.Free;
- FSource := Src;
- // correct position of this error is after PE reference
- if Error then
- BadPENesting(esFatal);
- end;
- end;
- function TXMLReader.EntityCheck(NoExternals: Boolean): TEntityDecl;
- var
- RefName: WideString;
- cnt: Integer;
- begin
- Result := nil;
- SetString(RefName, FName.Buffer, FName.Length);
- cnt := FName.Length+2;
- if Assigned(FDocType) then
- Result := FDocType.Entities.Get(FName.Buffer, FName.Length) as TEntityDecl;
- if Result = nil then
- begin
- if FStandalone or (FDocType = nil) or not (FHavePERefs or (FDocType.FSystemID <> '')) then
- FatalError('Reference to undefined entity ''%s''', [RefName], cnt)
- else
- ValidationError('Undefined entity ''%s'' referenced', [RefName], cnt);
- Exit;
- end;
- if FStandalone and Result.ExternallyDeclared then
- FatalError('Standalone constraint violation', cnt);
- if Result.FNotationName <> '' then
- FatalError('Reference to unparsed entity ''%s''', [RefName], cnt);
- if NoExternals and (Result.FSystemID <> '') then
- FatalError('External entity reference is not allowed in attribute value', cnt);
- if not Result.FResolved then
- LoadEntity(Result);
- // at this point we know the charcount of the entity being included
- if Result.FCharCount >= cnt then
- CheckMaxChars(Result.FCharCount - cnt);
- end;
- procedure TXMLReader.StartPE;
- var
- PEnt: TEntityDecl;
- begin
- PEnt := nil;
- if Assigned(FPEMap) then
- PEnt := FPEMap.Get(FName.Buffer, FName.Length) as TEntityDecl;
- if PEnt = nil then
- begin
- ValidationErrorWithName('Undefined parameter entity ''%s'' referenced', FName.Length+2);
- // cease processing declarations, unless document is standalone.
- FDTDProcessed := FStandalone;
- Exit;
- end;
- { cache an external PE so it's only fetched once }
- if (PEnt.FSystemID <> '') and (not PEnt.FPrefetched) and (not PrefetchEntity(PEnt)) then
- begin
- FDTDProcessed := FStandalone;
- Exit;
- end;
- CheckMaxChars(PEnt.FCharCount);
- PEnt.FBetweenDecls := not FInsideDecl;
- ContextPush(PEnt);
- FHavePERefs := True;
- end;
- function TXMLReader.PrefetchEntity(AEntity: TEntityDecl): Boolean;
- begin
- Result := ContextPush(AEntity);
- if Result then
- try
- FValue.Length := 0;
- FSource.SkipUntil(FValue, [#0]);
- SetString(AEntity.FReplacementText, FValue.Buffer, FValue.Length);
- AEntity.FCharCount := FValue.Length;
- AEntity.FStartLocation.Line := 1;
- AEntity.FStartLocation.LinePos := 1;
- AEntity.FURI := FSource.SystemID; // replace base URI with absolute one
- finally
- ContextPop;
- AEntity.FPrefetched := True;
- FValue.Length := 0;
- end;
- end;
- const
- LiteralDelims: array[TLiteralType] of TSetOfChar = (
- [#0, '''', '"'], // ltPlain
- [#0, '''', '"', #13, #10], // ltPubid
- [#0, '%', '&', '''', '"'] // ltEntity
- );
- function TXMLReader.ParseLiteral(var ToFill: TWideCharBuf; aType: TLiteralType;
- Required: Boolean): Boolean;
- var
- start: TObject;
- wc, Delim: WideChar;
- dummy: Boolean;
- begin
- SkipQuote(Delim, Required);
- Result := (Delim <> #0);
- if not Result then
- Exit;
- ToFill.Length := 0;
- start := FSource.FEntity;
- repeat
- wc := FSource.SkipUntil(ToFill, LiteralDelims[aType]);
- if wc = '%' then { ltEntity only }
- begin
- FSource.NextChar;
- CheckName;
- ExpectChar(';');
- if FSource.Kind = skInternalSubset then
- FatalError('PE reference not allowed here in internal subset', FName.Length+2);
- StartPE;
- end
- else if wc = '&' then { ltEntity }
- begin
- if ParseRef(ToFill) then // charRefs always expanded
- Continue;
- BufAppend(ToFill, '&');
- BufAppendChunk(ToFill, FName.Buffer, FName.Buffer + FName.Length);
- BufAppend(ToFill, ';');
- end
- else if wc <> #0 then
- begin
- FSource.NextChar;
- if (wc = #10) or (wc = #13) then
- wc := #32
- // terminating delimiter must be in the same context as the starting one
- else if (wc = Delim) and (start = FSource.FEntity) then
- Break;
- BufAppend(ToFill, wc);
- end
- else if (FSource.FEntity = start) or not ContextPop then // #0
- FatalError('Literal has no closing quote', -1);
- until False;
- if aType = ltPubid then
- BufNormalize(ToFill, dummy);
- end;
- function TXMLReader.SkipUntilSeq(const Delim: TSetOfChar; c1: WideChar; c2: WideChar = #0): Boolean;
- var
- wc: WideChar;
- begin
- Result := False;
- StoreLocation(FTokenStart);
- repeat
- wc := FSource.SkipUntil(FValue, Delim);
- if wc <> #0 then
- begin
- FSource.NextChar;
- if (FValue.Length > ord(c2 <> #0)) then
- begin
- if (FValue.Buffer[FValue.Length-1] = c1) and
- ((c2 = #0) or ((c2 <> #0) and (FValue.Buffer[FValue.Length-2] = c2))) then
- begin
- Dec(FValue.Length, ord(c2 <> #0) + 1);
- Result := True;
- Exit;
- end;
- end;
- BufAppend(FValue, wc);
- end;
- until wc = #0;
- end;
- procedure TXMLReader.ParseComment(discard: Boolean); // [15]
- var
- SaveLength: Integer;
- begin
- ExpectString('--');
- SaveLength := FValue.Length;
- if not SkipUntilSeq([#0, '-'], '-') then
- FatalError('Unterminated comment', -1);
- ExpectChar('>');
- if not discard then
- begin
- FCurrNode := @FNodeStack[FNesting+1];
- FCurrNode^.FNodeType := ntComment;
- FCurrNode^.FQName := nil;
- FCurrNode^.FValueStart := @FValue.Buffer[SaveLength];
- FCurrNode^.FValueLength := FValue.Length-SaveLength;
- end;
- FValue.Length := SaveLength;
- end;
- procedure TXMLReader.ParsePI; // [16]
- begin
- FSource.NextChar; // skip '?'
- CheckName;
- CheckNCName;
- with FName do
- if (Length = 3) and
- ((Buffer[0] = 'X') or (Buffer[0] = 'x')) and
- ((Buffer[1] = 'M') or (Buffer[1] = 'm')) and
- ((Buffer[2] = 'L') or (Buffer[2] = 'l')) then
- begin
- if not BufEquals(FName, 'xml') then
- FatalError('''xml'' is a reserved word; it must be lowercase', FName.Length)
- else
- FatalError('XML declaration is not allowed here', FName.Length);
- end;
- if FSource.FBuf^ <> '?' then
- SkipS(True);
- FValue.Length := 0;
- if not SkipUntilSeq(GT_Delim, '?') then
- FatalError('Unterminated processing instruction', -1);
- SetNodeInfoWithValue(ntProcessingInstruction,
- FNameTable.FindOrAdd(FName.Buffer, FName.Length));
- end;
- function TXMLReader.CreatePINode: TDOMNode;
- var
- NameStr, ValueStr: WideString;
- begin
- SetString(NameStr, FName.Buffer, FName.Length);
- SetString(ValueStr, FValue.Buffer, FValue.Length);
- result := Doc.CreateProcessingInstruction(NameStr, ValueStr);
- end;
- const
- vers: array[Boolean] of TXMLVersion = (xmlVersion10, xmlVersion11);
- procedure TXMLReader.ParseXmlOrTextDecl(TextDecl: Boolean);
- var
- Delim: WideChar;
- buf: array[0..31] of WideChar;
- I: Integer;
- begin
- SkipS(True);
- // [24] VersionInfo: optional in TextDecl, required in XmlDecl
- if (not TextDecl) or (FSource.FBuf^ = 'v') then
- begin
- ExpectString('version');
- ExpectEq;
- SkipQuote(Delim);
- I := 0;
- while (I < 3) and (FSource.FBuf^ <> Delim) do
- begin
- buf[I] := FSource.FBuf^;
- Inc(I);
- FSource.NextChar;
- end;
- if (I <> 3) or (buf[0] <> '1') or (buf[1] <> '.') or
- ((buf[2] <> '0') and (buf[2] <> '1')) then
- FatalError('Illegal version number', -1);
- ExpectChar(Delim);
- FSource.FXMLVersion := vers[buf[2] = '1'];
- if TextDecl and (FSource.FXMLVersion = xmlVersion11) and not FXML11 then
- FatalError('XML 1.0 document cannot invoke XML 1.1 entities', -1);
- if TextDecl or (FSource.FBuf^ <> '?') then
- SkipS(True);
- end;
- // [80] EncodingDecl: required in TextDecl, optional in XmlDecl
- if TextDecl or (FSource.FBuf^ = 'e') then
- begin
- ExpectString('encoding');
- ExpectEq;
- SkipQuote(Delim);
- I := 0;
- while (I < 30) and (FSource.FBuf^ <> Delim) and (FSource.FBuf^ < #127) and
- ((Char(ord(FSource.FBuf^)) in ['A'..'Z', 'a'..'z']) or
- ((I > 0) and (Char(ord(FSource.FBuf^)) in ['0'..'9', '.', '-', '_']))) do
- begin
- buf[I] := FSource.FBuf^;
- Inc(I);
- FSource.NextChar;
- end;
- if not CheckForChar(Delim) then
- FatalError('Illegal encoding name', i);
- SetString(FSource.FXMLEncoding, buf, i);
- if not FSource.SetEncoding(FSource.FXMLEncoding) then // <-- Wide2Ansi conversion here
- FatalError('Encoding ''%s'' is not supported', [FSource.FXMLEncoding], i+1);
- if FSource.FBuf^ <> '?' then
- SkipS(not TextDecl);
- end;
- // [32] SDDecl: forbidden in TextDecl, optional in XmlDecl
- if (not TextDecl) and (FSource.FBuf^ = 's') then
- begin
- ExpectString('standalone');
- ExpectEq;
- SkipQuote(Delim);
- if FSource.Matches('yes') then
- FStandalone := True
- else if not FSource.Matches('no') then
- FatalError('Only "yes" or "no" are permitted as values of "standalone"', -1);
- ExpectChar(Delim);
- SkipS;
- end;
- ExpectString('?>');
- { Switch to 1.1 rules only after declaration is parsed completely. This is to
- ensure that NEL and LSEP within declaration are rejected (rmt-056, rmt-057) }
- if FSource.FXMLVersion = xmlVersion11 then
- FXML11 := True;
- end;
- procedure TXMLReader.DTDReloadHook;
- var
- p: PWideChar;
- begin
- { FSource converts CR, NEL and LSEP linebreaks to LF, and CR-NEL sequences to CR-LF.
- We must further remove the CR chars and have only LF's left. }
- p := FDTDStartPos;
- while p < FSource.FBuf do
- begin
- while (p < FSource.FBuf) and (p^ <> #13) do
- Inc(p);
- BufAppendChunk(FIntSubset, FDTDStartPos, p);
- if p^ = #13 then
- Inc(p);
- FDTDStartPos := p;
- end;
- FDTDStartPos := TXMLDecodingSource(FSource).FBufStart;
- end;
- procedure TXMLReader.ParseDoctypeDecl; // [28]
- var
- Src: TXMLCharSource;
- begin
- if FState >= rsDTD then
- FatalError('Markup declaration is not allowed here');
- if FDisallowDoctype then
- FatalError('Document type is prohibited by parser settings');
- ExpectString('DOCTYPE');
- SkipS(True);
- FDocType := TDTDModel.Create(FNameTable);
- FDTDProcessed := True; // assume success
- FState := rsDTD;
- FDocType.FName := ExpectName;
- SkipS(True);
- ParseExternalID(FDocType.FSystemID, FDocType.FPublicID, False);
- SkipS;
- if CheckForChar('[') then
- begin
- BufAllocate(FIntSubset, 256);
- FSource.Kind := skInternalSubset;
- try
- FDTDStartPos := FSource.FBuf;
- ParseMarkupDecl;
- DTDReloadHook; // fetch last chunk
- SetString(FDocType.FInternalSubset, FIntSubset.Buffer, FIntSubset.Length);
- finally
- FreeMem(FIntSubset.Buffer);
- FSource.Kind := skNone;
- end;
- ExpectChar(']');
- SkipS;
- end;
- ExpectChar('>');
- if (FDocType.FSystemID <> '') then
- begin
- if ResolveResource(FDocType.FSystemID, FDocType.FPublicID, FSource.SystemID, Src) then
- begin
- Initialize(Src);
- try
- Src.Kind := skManualPop;
- ParseMarkupDecl;
- finally
- ContextPop(True);
- end;
- end
- else
- begin
- ValidationError('Unable to resolve external DTD subset', []);
- FDTDProcessed := FStandalone;
- end;
- end;
- FState := rsAfterDTD;
- FCurrNode^.FNodeType := ntDocumentType;
- end;
- procedure TXMLReader.ExpectEq; // [25]
- begin
- if FSource.FBuf^ <> '=' then
- SkipS;
- if FSource.FBuf^ <> '=' then
- FatalError('Expected "="');
- FSource.NextChar;
- SkipS;
- end;
- { DTD stuff }
- procedure TXMLReader.BadPENesting(S: TErrorSeverity);
- begin
- if (S = esFatal) or FValidate then
- DoError(S, 'Parameter entities must be properly nested');
- end;
- procedure TXMLReader.StandaloneError(LineOffs: Integer);
- begin
- ValidationError('Standalone constriant violation', [], LineOffs);
- end;
- function TXMLReader.ParseQuantity: TCPQuant;
- begin
- case FSource.FBuf^ of
- '?': Result := cqZeroOrOnce;
- '*': Result := cqZeroOrMore;
- '+': Result := cqOnceOrMore;
- else
- Result := cqOnce;
- Exit;
- end;
- FSource.NextChar;
- end;
- function TXMLReader.FindOrCreateElDef: TElementDecl;
- var
- p: PHashItem;
- begin
- CheckName;
- p := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
- Result := TElementDecl(p^.Data);
- if Result = nil then
- begin
- Result := TElementDecl.Create;
- p^.Data := Result;
- end;
- end;
- procedure TXMLReader.ExpectChoiceOrSeq(CP: TContentParticle; MustEndIn: TObject); // [49], [50]
- var
- Delim: WideChar;
- CurrentCP: TContentParticle;
- begin
- Delim := #0;
- repeat
- CurrentCP := CP.Add;
- SkipWhitespace;
- if CheckForChar('(') then
- ExpectChoiceOrSeq(CurrentCP, FSource.FEntity)
- else
- CurrentCP.Def := FindOrCreateElDef;
- CurrentCP.CPQuant := ParseQuantity;
- SkipWhitespace;
- if FSource.FBuf^ = ')' then
- Break;
- if Delim = #0 then
- begin
- if (FSource.FBuf^ = '|') or (FSource.FBuf^ = ',') then
- Delim := FSource.FBuf^
- else
- FatalError('Expected pipe or comma delimiter');
- end
- else
- if FSource.FBuf^ <> Delim then
- FatalError(Delim);
- FSource.NextChar; // skip delimiter
- until False;
- if MustEndIn <> FSource.FEntity then
- BadPENesting;
- FSource.NextChar;
- if Delim = '|' then
- CP.CPType := ctChoice
- else
- CP.CPType := ctSeq; // '(foo)' is a sequence!
- end;
- procedure TXMLReader.ParseElementDecl; // [45]
- var
- ElDef: TElementDecl;
- CurrentEntity: TObject;
- I: Integer;
- CP: TContentParticle;
- Typ: TElementContentType;
- ExtDecl: Boolean;
- begin
- CP := nil;
- Typ := ctUndeclared; // satisfy compiler
- ExpectWhitespace;
- ElDef := FindOrCreateElDef;
- if ElDef.ContentType <> ctUndeclared then
- ValidationErrorWithName('Duplicate declaration of element ''%s''', FName.Length);
- ExtDecl := FSource.Kind <> skInternalSubset;
- ExpectWhitespace;
- if FSource.Matches('EMPTY') then
- Typ := ctEmpty
- else if FSource.Matches('ANY') then
- Typ := ctAny
- else if CheckForChar('(') then
- begin
- CP := TContentParticle.Create;
- try
- CurrentEntity := FSource.FEntity;
- SkipWhitespace;
- if FSource.Matches('#PCDATA') then // Mixed section [51]
- begin
- SkipWhitespace;
- Typ := ctMixed;
- while FSource.FBuf^ <> ')' do
- begin
- ExpectChar('|');
- SkipWhitespace;
- with CP.Add do
- begin
- Def := FindOrCreateElDef;
- for I := CP.ChildCount-2 downto 0 do
- if Def = CP.Children[I].Def then
- ValidationError('Duplicate token in mixed section', [], FName.Length);
- end;
- SkipWhitespace;
- end;
- if CurrentEntity <> FSource.FEntity then
- BadPENesting;
- FSource.NextChar;
- if (not CheckForChar('*')) and (CP.ChildCount > 0) then
- FatalError(WideChar('*'));
- CP.CPQuant := cqZeroOrMore;
- CP.CPType := ctChoice;
- end
- else // Children section [47]
- begin
- Typ := ctChildren;
- ExpectChoiceOrSeq(CP, CurrentEntity);
- CP.CPQuant := ParseQuantity;
- end;
- except
- CP.Free;
- raise;
- end;
- end
- else
- FatalError('Invalid content specification');
- // SAX: DeclHandler.ElementDecl(name, model);
- if FDTDProcessed and (ElDef.ContentType = ctUndeclared) then
- begin
- ElDef.ExternallyDeclared := ExtDecl;
- ElDef.ContentType := Typ;
- ElDef.RootCP := CP;
- end
- else
- CP.Free;
- end;
- procedure TXMLReader.ParseNotationDecl; // [82]
- var
- NameStr, SysID, PubID: WideString;
- begin
- ExpectWhitespace;
- NameStr := ExpectName;
- CheckNCName;
- ExpectWhitespace;
- if not ParseExternalID(SysID, PubID, True) then
- FatalError('Expected external or public ID');
- if FDTDProcessed then
- DoNotationDecl(NameStr, PubID, SysID);
- end;
- const
- AttrDataTypeNames: array[TAttrDataType] of WideString = (
- 'CDATA',
- 'ID',
- 'IDREF',
- 'IDREFS',
- 'ENTITY',
- 'ENTITIES',
- 'NMTOKEN',
- 'NMTOKENS',
- 'NOTATION'
- );
- procedure TXMLReader.ParseAttlistDecl; // [52]
- var
- ElDef: TElementDecl;
- AttDef: TAttributeDef;
- dt: TAttrDataType;
- Found, DiscardIt: Boolean;
- Offsets: array [Boolean] of Integer;
- attrName: PHashItem;
- begin
- ExpectWhitespace;
- ElDef := FindOrCreateElDef;
- SkipWhitespace;
- while FSource.FBuf^ <> '>' do
- begin
- CheckName;
- ExpectWhitespace;
- attrName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
- AttDef := TAttributeDef.Create(attrName, FColonPos);
- try
- AttDef.ExternallyDeclared := FSource.Kind <> skInternalSubset;
- // In case of duplicate declaration of the same attribute, we must discard it,
- // not modifying ElDef, and suppressing certain validation errors.
- DiscardIt := (not FDTDProcessed) or Assigned(ElDef.GetAttrDef(attrName));
- if CheckForChar('(') then // [59]
- begin
- AttDef.DataType := dtNmToken;
- repeat
- SkipWhitespace;
- CheckName([cnToken]);
- if not AttDef.AddEnumToken(FName.Buffer, FName.Length) then
- ValidationError('Duplicate token in enumerated attibute declaration', [], FName.Length);
- SkipWhitespace;
- until not CheckForChar('|');
- ExpectChar(')');
- ExpectWhitespace;
- end
- else
- begin
- StoreLocation(FTokenStart);
- // search topside-up so that e.g. NMTOKENS is matched before NMTOKEN
- for dt := dtNotation downto dtCData do
- begin
- Found := FSource.Matches(AttrDataTypeNames[dt]);
- if Found then
- Break;
- end;
- if Found and SkipWhitespace then
- begin
- AttDef.DataType := dt;
- if (dt = dtId) and not DiscardIt then
- begin
- if Assigned(ElDef.IDAttr) then
- ValidationError('Only one attribute of type ID is allowed per element',[])
- else
- ElDef.IDAttr := AttDef;
- end
- else if dt = dtNotation then // no test cases for these ?!
- begin
- if not DiscardIt then
- begin
- if Assigned(ElDef.NotationAttr) then
- ValidationError('Only one attribute of type NOTATION is allowed per element',[])
- else
- ElDef.NotationAttr := AttDef;
- if ElDef.ContentType = ctEmpty then
- ValidationError('NOTATION attributes are not allowed on EMPTY elements',[]);
- end;
- ExpectChar('(');
- repeat
- SkipWhitespace;
- StoreLocation(FTokenStart);
- CheckName;
- CheckNCName;
- if not AttDef.AddEnumToken(FName.Buffer, FName.Length) then
- ValidationError('Duplicate token in NOTATION attribute declaration',[], FName.Length);
- if (not DiscardIt) and FValidate then
- AddForwardRef(FName.Buffer, FName.Length);
- SkipWhitespace;
- until not CheckForChar('|');
- ExpectChar(')');
- ExpectWhitespace;
- end;
- end
- else
- begin
- // don't report 'expected whitespace' if token does not match completely
- Offsets[False] := 0;
- Offsets[True] := Length(AttrDataTypeNames[dt]);
- if Found and (FSource.FBuf^ < 'A') then
- ExpectWhitespace
- else
- FatalError('Illegal attribute type for ''%s''', [attrName^.Key], Offsets[Found]);
- end;
- end;
- StoreLocation(FTokenStart);
- if FSource.Matches('#REQUIRED') then
- AttDef.Default := adRequired
- else if FSource.Matches('#IMPLIED') then
- AttDef.Default := adImplied
- else if FSource.Matches('#FIXED') then
- begin
- AttDef.Default := adFixed;
- ExpectWhitespace;
- end
- else
- AttDef.Default := adDefault;
- if AttDef.Default in [adDefault, adFixed] then
- begin
- if AttDef.DataType = dtId then
- ValidationError('An attribute of type ID cannot have a default value',[]);
- // See comments to valid-sa-094: PE expansion should be disabled in AttDef.
- ExpectAttValue(AttDef.Data, dt <> dtCDATA);
- if not ValidateAttrSyntax(AttDef, AttDef.Data^.FValueStr) then
- ValidationError('Default value for attribute ''%s'' has wrong syntax', [attrName^.Key]);
- end;
- // SAX: DeclHandler.AttributeDecl(...)
- if DiscardIt then
- AttDef.Free
- else
- ElDef.AddAttrDef(AttDef);
- except
- AttDef.Free;
- raise;
- end;
- SkipWhitespace;
- end;
- end;
- procedure TXMLReader.ParseEntityDecl; // [70]
- var
- IsPE, Exists: Boolean;
- Entity: TEntityDecl;
- Map: THashTable;
- Item: PHashItem;
- begin
- if not SkipWhitespace(True) then
- FatalError('Expected whitespace');
- IsPE := CheckForChar('%');
- if IsPE then // [72]
- begin
- ExpectWhitespace;
- if FPEMap = nil then
- FPEMap := THashTable.Create(64, True);
- Map := FPEMap;
- end
- else
- Map := FDocType.Entities;
- Entity := TEntityDecl.Create;
- try
- Entity.ExternallyDeclared := FSource.Kind <> skInternalSubset;
- Entity.FIsPE := IsPE;
- CheckName;
- CheckNCName;
- Item := Map.FindOrAdd(FName.Buffer, FName.Length, Exists);
- ExpectWhitespace;
- // remember where the entity is declared
- Entity.FURI := FSource.SystemID;
- if FEntityValue.Buffer = nil then
- BufAllocate(FEntityValue, 256);
- if ParseLiteral(FEntityValue, ltEntity, False) then
- begin
- SetString(Entity.FReplacementText, FEntityValue.Buffer, FEntityValue.Length);
- Entity.FCharCount := FEntityValue.Length;
- Entity.FStartLocation := FTokenStart;
- end
- else
- begin
- if not ParseExternalID(Entity.FSystemID, Entity.FPublicID, False) then
- FatalError('Expected entity value or external ID');
- if not IsPE then // [76]
- begin
- if FSource.FBuf^ <> '>' then
- ExpectWhitespace;
- if FSource.Matches('NDATA') then
- begin
- ExpectWhitespace;
- StoreLocation(FTokenStart);
- Entity.FNotationName := ExpectName;
- if FValidate then
- AddForwardRef(FName.Buffer, FName.Length);
- // SAX: DTDHandler.UnparsedEntityDecl(...);
- end;
- end;
- end;
- except
- Entity.Free;
- raise;
- end;
- // Repeated declarations of same entity are legal but must be ignored
- if FDTDProcessed and not Exists then
- begin
- Item^.Data := Entity;
- Entity.FName := Item^.Key;
- end
- else
- Entity.Free;
- end;
- procedure TXMLReader.ParseMarkupDecl; // [29]
- var
- IncludeLevel: Integer;
- IgnoreLevel: Integer;
- CurrentEntity: TObject;
- IncludeLoc: TLocation;
- IgnoreLoc: TLocation;
- wc: WideChar;
- CondType: (ctUnknown, ctInclude, ctIgnore);
- begin
- IncludeLevel := 0;
- IgnoreLevel := 0;
- repeat
- SkipWhitespace;
- if (FSource.FBuf^ = ']') and (IncludeLevel > 0) then
- begin
- ExpectString(']]>');
- Dec(IncludeLevel);
- Continue;
- end;
- if not CheckForChar('<') then
- Break;
- CurrentEntity := FSource.FEntity;
- if FSource.FBuf^ = '?' then
- begin
- ParsePI;
- doc.AppendChild(CreatePINode);
- end
- else
- begin
- ExpectChar('!');
- if FSource.FBuf^ = '-' then
- ParseComment(True)
- else if CheckForChar('[') then
- begin
- if FSource.Kind = skInternalSubset then
- FatalError('Conditional sections are not allowed in internal subset', 1);
- SkipWhitespace;
- CondType := ctUnknown; // satisfy compiler
- if FSource.Matches('INCLUDE') then
- CondType := ctInclude
- else if FSource.Matches('IGNORE') then
- CondType := ctIgnore
- else
- FatalError('Expected "INCLUDE" or "IGNORE"');
- SkipWhitespace;
- if CurrentEntity <> FSource.FEntity then
- BadPENesting;
- ExpectChar('[');
- if CondType = ctInclude then
- begin
- if IncludeLevel = 0 then
- StoreLocation(IncludeLoc);
- Inc(IncludeLevel);
- end
- else if CondType = ctIgnore then
- begin
- StoreLocation(IgnoreLoc);
- IgnoreLevel := 1;
- repeat
- FValue.Length := 0;
- wc := FSource.SkipUntil(FValue, [#0, '<', ']']);
- if FSource.Matches('<![') then
- Inc(IgnoreLevel)
- else if FSource.Matches(']]>') then
- Dec(IgnoreLevel)
- else if wc <> #0 then
- FSource.NextChar
- else // PE's aren't recognized in ignore section, cannot ContextPop()
- DoErrorPos(esFatal, 'IGNORE section is not closed', IgnoreLoc);
- until IgnoreLevel=0;
- end;
- end
- else
- begin
- FInsideDecl := True;
- if FSource.Matches('ELEMENT') then
- ParseElementDecl
- else if FSource.Matches('ENTITY') then
- ParseEntityDecl
- else if FSource.Matches('ATTLIST') then
- ParseAttlistDecl
- else if FSource.Matches('NOTATION') then
- ParseNotationDecl
- else
- FatalError('Illegal markup declaration');
- SkipWhitespace;
- if CurrentEntity <> FSource.FEntity then
- BadPENesting;
- ExpectChar('>');
- FInsideDecl := False;
- end;
- end;
- until False;
- if IncludeLevel > 0 then
- DoErrorPos(esFatal, 'INCLUDE section is not closed', IncludeLoc);
- if (FSource.Kind = skInternalSubset) and (FSource.FBuf^ = ']') then
- Exit;
- if FSource.FBuf^ <> #0 then
- FatalError('Illegal character in DTD');
- end;
- procedure TXMLReader.ProcessDTD(ASource: TXMLCharSource);
- begin
- doc := TXMLDocument.Create;
- FNameTable := doc.Names;
- FDocType := TDTDModel.Create(FNameTable);
- // TODO: DTD labeled version 1.1 will be rejected - must set FXML11 flag
- doc.AppendChild(TDOMDocumentType.Create(doc, FDocType));
- NSPrepare;
- Initialize(ASource);
- ParseMarkupDecl;
- end;
- procedure TXMLReader.LoadEntity(AEntity: TEntityDecl);
- var
- InnerReader: TXMLReader;
- Src: TXMLCharSource;
- Ent: TDOMEntityEx;
- DoctypeNode: TDOMDocumentType;
- begin
- DoctypeNode := doc.DocType;
- if DoctypeNode = nil then
- Exit;
- Ent := TDOMEntityEx(DocTypeNode.Entities.GetNamedItem(AEntity.FName));
- if Ent = nil then
- Exit;
- InnerReader := TXMLReader.Create(FCtrl);
- try
- InnerReader.FAttrTag := FAttrTag;
- InnerReader.FDocType := FDocType.Reference;
- EntityToSource(AEntity, Src);
- Ent.SetReadOnly(False);
- if Assigned(Src) then
- InnerReader.ProcessFragment(Src, Ent);
- AEntity.FResolved := True;
- finally
- FAttrTag := InnerReader.FAttrTag;
- InnerReader.Free;
- AEntity.FOnStack := False;
- Ent.SetReadOnly(True);
- end;
- end;
- procedure TXMLReader.ValidateCurrentNode;
- var
- ElDef: TElementDecl;
- AttDef: TAttributeDef;
- attr: PNodeData;
- i: Integer;
- begin
- case FCurrNode^.FNodeType of
- ntElement:
- begin
- if (FNesting = 1) and (not FFragmentMode) then
- begin
- if Assigned(FDocType) then
- begin
- if FDocType.FName <> FCurrNode^.FQName^.Key then
- DoErrorPos(esError, 'Root element name does not match DTD', FCurrNode^.FLoc);
- end
- else
- DoErrorPos(esError, 'Missing DTD', FCurrNode^.FLoc);
- end;
- ElDef := TElementDecl(FCurrNode^.FQName^.Data);
- if (ElDef = nil) or (ElDef.ContentType = ctUndeclared) then
- DoErrorPos(esError, 'Using undeclared element ''%s''',[FCurrNode^.FQName^.Key], FCurrNode^.FLoc);
- if not FValidators[FValidatorNesting].IsElementAllowed(ElDef) then
- DoErrorPos(esError, 'Element ''%s'' is not allowed in this context',[FCurrNode^.FQName^.Key], FCurrNode^.FLoc);
- PushVC(ElDef);
- { Validate attributes }
- for i := 1 to FAttrCount do
- begin
- attr := @FNodeStack[FNesting+i];
- AttDef := TAttributeDef(attr^.FTypeInfo);
- if AttDef = nil then
- DoErrorPos(esError, 'Using undeclared attribute ''%s'' on element ''%s''',
- [attr^.FQName^.Key, FCurrNode^.FQName^.Key], attr^.FLoc)
- else if ((AttDef.DataType <> dtCdata) or (AttDef.Default = adFixed)) then
- begin
- if FStandalone and AttDef.ExternallyDeclared then
- { TODO: perhaps should use different and more descriptive messages }
- if attr^.FDenormalized then
- DoErrorPos(esError, 'Standalone constraint violation', attr^.FLoc2)
- else if i > FSpecifiedAttrs then
- DoError(esError, 'Standalone constraint violation');
- // TODO: what about normalization of AttDef.Value? (Currently it IS normalized)
- if (AttDef.Default = adFixed) and (AttDef.Data^.FValueStr <> attr^.FValueStr) then
- DoErrorPos(esError, 'Value of attribute ''%s'' does not match its #FIXED default',[attr^.FQName^.Key], attr^.FLoc2);
- if not ValidateAttrSyntax(AttDef, attr^.FValueStr) then
- DoErrorPos(esError, 'Attribute ''%s'' type mismatch', [attr^.FQName^.Key], attr^.FLoc2);
- ValidateAttrValue(AttDef, attr);
- end;
- end;
- end;
- ntEndElement:
- begin
- if FValidators[FValidatorNesting].Incomplete then
- ValidationError('Element ''%s'' is missing required sub-elements', [FCurrNode^.FQName^.Key], -1);
- if FValidatorNesting > 0 then
- Dec(FValidatorNesting);
- end;
- ntText, ntSignificantWhitespace:
- case FValidators[FValidatorNesting].FContentType of
- ctChildren:
- if FCurrNode^.FNodeType = ntText then
- ValidationError('Character data is not allowed in element-only content',[])
- else
- begin
- if FValidators[FValidatorNesting].FSaViolation then
- StandaloneError(-1);
- FCurrNode^.FNodeType := ntWhitespace;
- end;
- ctEmpty:
- ValidationError('Character data is not allowed in EMPTY elements', []);
- end;
- ntCDATA:
- if FValidators[FValidatorNesting].FContentType = ctChildren then
- ValidationError('CDATA sections are not allowed in element-only content',[]);
- ntProcessingInstruction:
- if FValidators[FValidatorNesting].FContentType = ctEmpty then
- ValidationError('Processing instructions are not allowed within EMPTY elements', []);
- ntComment:
- if FValidators[FValidatorNesting].FContentType = ctEmpty then
- ValidationError('Comments are not allowed within EMPTY elements', []);
- ntDocumentType:
- ValidateDTD;
- end;
- end;
- procedure TXMLReader.HandleEntityStart;
- begin
- { FNesting+1 is available due to overallocation in AllocNodeData() }
- FCurrNode := @FNodeStack[FNesting+1];
- FCurrNode^.FNodeType := ntEntityReference;
- FCurrNode^.FQName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
- FCurrNode^.FValueStart := nil;
- FCurrNode^.FValueLength := 0;
- end;
- procedure TXMLReader.HandleEntityEnd;
- begin
- ContextPop(True);
- if FNesting > 0 then Dec(FNesting);
- FCurrNode := @FNodeStack[FNesting+1];
- FCurrNode^.FNodeType := ntEndEntity;
- // TODO: other properties of FCurrNode
- FNext := xtText;
- end;
- procedure TXMLReader.ResolveEntity;
- begin
- if FCurrNode^.FNodeType <> ntEntityReference then
- raise EInvalidOperation.Create('Wrong node type');
- {... here must actually call EntityCheck, but it's called in main loop}
- FNext := xtPushEntity;
- end;
- procedure TXMLReader.DoStartEntity;
- var
- src: TXMLCharSource;
- begin
- Inc(FNesting);
- FCurrNode := AllocNodeData(FNesting);
- if Assigned(FCurrEntity) then
- ContextPush(FCurrEntity)
- else
- begin
- // Undefined entity -- use a dummy inputsource, in order to get a matching EndEntity event
- src := TXMLCharSource.Create('');
- src.Kind := skManualPop;
- Initialize(src);
- end;
- FNext := xtText;
- end;
- function TXMLReader.DoStartElement: TDOMElement;
- var
- Attr: TDOMAttr;
- i: Integer;
- begin
- with FCurrNode^.FQName^ do
- Result := doc.CreateElementBuf(PWideChar(Key), Length(Key));
- if Assigned(FCurrNode^.FNsUri) then
- Result.SetNSI(FCurrNode^.FNsUri^.Key, FCurrNode^.FColonPos+1);
- for i := 1 to FAttrCount do
- begin
- Attr := LoadAttribute(doc, @FNodeStack[FNesting+i]);
- Result.SetAttributeNode(Attr);
- // Attach element to ID map entry if necessary
- if Assigned(FNodeStack[FNesting+i].FIDEntry) then
- FNodeStack[FNesting+i].FIDEntry^.Data := Result;
- end;
- end;
- // The code below does the bulk of the parsing, and must be as fast as possible.
- // To minimize CPU cache effects, methods from different classes are kept together
- function TXMLDecodingSource.SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
- wsflag: PBoolean): WideChar;
- var
- old: PWideChar;
- nonws: Boolean;
- wc: WideChar;
- begin
- nonws := False;
- repeat
- old := FBuf;
- repeat
- wc := FBuf^;
- if (wc = #10) or (wc = #13) or (FXML11Rules and ((wc = #$85) or
- (wc = #$2028))) then
- begin
- // strictly this is needed only for 2-byte lineendings
- BufAppendChunk(ToFill, old, FBuf);
- NewLine;
- old := FBuf;
- wc := FBuf^
- end
- else if ((wc < #32) and (not ((wc = #0) and (FBuf >= FBufEnd))) and
- (wc <> #9)) or (wc > #$FFFD) or
- (FXML11Rules and (wc >= #$7F) and (wc <= #$9F)) then
- FReader.FatalError('Invalid character');
- if (wc < #255) and (Char(ord(wc)) in Delim) then
- Break;
- // the checks above filter away everything below #32 that isn't a whitespace
- if wc > #32 then
- nonws := True;
- Inc(FBuf);
- until False;
- Result := wc;
- BufAppendChunk(ToFill, old, FBuf);
- until (Result <> #0) or (not Reload);
- if Assigned(wsflag) then
- wsflag^ := wsflag^ or nonws;
- end;
- const
- TextDelims: array[Boolean] of TSetOfChar = (
- [#0, '<', '&', '>'],
- [#0, '>']
- );
- textNodeTypes: array[Boolean] of TXMLNodeType = (
- ntSignificantWhitespace,
- ntText
- );
- procedure TXMLReader.ParseContent(parent: TDOMNode_WithChildren);
- var
- cursor: TDOMNode_WithChildren;
- element: TDOMElement;
- begin
- cursor := parent;
- while Read do
- begin
- if FValidate then
- ValidateCurrentNode;
- case FCurrNode^.FNodeType of
- ntText:
- cursor.InternalAppend(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length, False));
- ntWhitespace, ntSignificantWhitespace:
- if FPreserveWhitespace then
- cursor.InternalAppend(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length, FCurrNode^.FNodeType = ntWhitespace));
- ntCDATA:
- cursor.InternalAppend(DoCDSect(FValue.Buffer, FValue.Length));
- ntProcessingInstruction:
- cursor.InternalAppend(CreatePINode);
- ntComment:
- if not FIgnoreComments then
- cursor.InternalAppend(doc.CreateCommentBuf(FCurrNode^.FValueStart, FCurrNode^.FValueLength));
- ntElement:
- begin
- element := DoStartElement;
- cursor.InternalAppend(element);
- cursor := element;
- end;
- ntEndElement:
- cursor := TDOMNode_WithChildren(cursor.ParentNode);
- ntDocumentType:
- if not FCanonical then
- cursor.InternalAppend(TDOMDocumentType.Create(doc, FDocType));
- ntEntityReference:
- cursor.InternalAppend(doc.CreateEntityReference(FCurrNode^.FQName^.Key));
- end;
- end;
- end;
- function TXMLReader.Read: Boolean;
- var
- nonWs: Boolean;
- wc: WideChar;
- InCDATA: Boolean;
- tok: TXMLToken;
- begin
- if FNext = xtPopEmptyElement then
- begin
- FNext := xtPopElement;
- FCurrNode^.FNodeType := ntEndElement;
- if FAttrCleanupFlag then
- CleanupAttributes;
- FAttrCount := 0;
- Result := True;
- Exit;
- end;
- if FNext = xtPushElement then
- begin
- if FAttrCleanupFlag then
- CleanupAttributes;
- FAttrCount := 0;
- FNext := xtText;
- end
- else if FNext = xtPopElement then
- begin
- if FNamespaces then
- FNSHelper.EndElement;
- PopVC;
- end
- else if FNext = xtPushEntity then
- DoStartEntity;
- InCDATA := (FNext = xtCDSect);
- StoreLocation(FTokenStart);
- nonWs := False;
- FValue.Length := 0;
- if FNext in [xtCDSect, xtText] then
- repeat
- wc := FSource.SkipUntil(FValue, TextDelims[InCDATA], @nonWs);
- if wc = '<' then
- begin
- Inc(FSource.FBuf);
- if FSource.FBufEnd < FSource.FBuf + 2 then
- FSource.Reload;
- if FSource.FBuf^ = '/' then
- tok := xtEndElement
- else if CheckName([cnOptional]) then
- tok := xtElement
- else if FSource.FBuf^ = '!' then
- begin
- Inc(FSource.FBuf);
- if FSource.FBuf^ = '[' then
- begin
- ExpectString('[CDATA[');
- if FState <> rsRoot then
- FatalError('Illegal at document level');
- StoreLocation(FTokenStart);
- InCDATA := True;
- if FCDSectionsAsText or (FValue.Length = 0) then
- Continue;
- tok := xtCDSect;
- end
- else if FSource.FBuf^ = '-' then
- begin
- { Ignoring comments is tricky in validating mode; discarding a comment which
- is the only child of an EMPTY element will make that element erroneously appear
- as valid. Therefore, at this point we discard only comments which are preceded
- by some text (since presence of text already renders an EMPTY element invalid).
- Other comments should be reported to validation part and discarded there. }
- if FIgnoreComments and (FValue.Length > 0) then
- begin
- ParseComment(True);
- Continue;
- end;
- tok := xtComment;
- end
- else
- tok := xtDoctype;
- end
- else if FSource.FBuf^ = '?' then
- tok := xtPI
- else
- RaiseNameNotFound;
- end
- else if wc = #0 then
- begin
- if InCDATA then
- FatalError('Unterminated CDATA section', -1);
- if FNesting > FSource.FStartNesting then
- FatalError('End-tag is missing for ''%s''', [FNodeStack[FNesting].FQName^.Key]);
- if Assigned(FSource.FParent) then
- begin
- if FExpandEntities and ContextPop then
- Continue
- else
- tok := xtEntityEnd;
- end
- else
- tok := xtEOF;
- end
- else if wc = '>' then
- begin
- BufAppend(FValue, wc);
- FSource.NextChar;
- if (FValue.Length <= 2) or (FValue.Buffer[FValue.Length-2] <> ']') or
- (FValue.Buffer[FValue.Length-3] <> ']') then Continue;
- if InCData then // got a ']]>' separator
- begin
- Dec(FValue.Length, 3);
- InCDATA := False;
- if FCDSectionsAsText then
- Continue;
- SetNodeInfoWithValue(ntCDATA);
- FNext := xtText;
- Result := True;
- Exit;
- end
- else
- FatalError('Literal '']]>'' is not allowed in text', 3);
- end
- else if wc = '&' then
- begin
- if FState <> rsRoot then
- FatalError('Illegal at document level');
- if FValidators[FValidatorNesting].FContentType = ctEmpty then
- ValidationError('References are illegal in EMPTY elements', []);
- if ParseRef(FValue) or ResolvePredefined then
- begin
- nonWs := True; // CharRef to whitespace is not considered whitespace
- Continue;
- end
- else
- begin
- FCurrEntity := EntityCheck;
- if Assigned(FCurrEntity) and FExpandEntities then
- begin
- ContextPush(FCurrEntity);
- Continue;
- end;
- tok := xtEntity;
- end;
- end;
- if FValue.Length <> 0 then
- begin
- if FState <> rsRoot then
- if nonWs then
- FatalError('Illegal at document level', -1)
- else
- Break;
- SetNodeInfoWithValue(textNodeTypes[nonWs]);
- FNext := tok;
- Result := True;
- Exit;
- end;
- Break;
- until False
- else // not (FNext in [xtText, xtCDSect])
- tok := FNext;
- FNext := xtText;
- case tok of
- xtEntity: HandleEntityStart;
- xtEntityEnd: HandleEntityEnd;
- xtElement: ParseStartTag;
- xtEndElement: ParseEndTag;
- xtPI: ParsePI;
- xtDoctype: ParseDoctypeDecl;
- xtComment: ParseComment(False);
- end;
- Result := tok <> xtEOF;
- end;
- procedure TXMLCharSource.NextChar;
- begin
- Inc(FBuf);
- if FBuf >= FBufEnd then
- Reload;
- end;
- procedure TXMLReader.ExpectChar(wc: WideChar);
- begin
- if FSource.FBuf^ = wc then
- FSource.NextChar
- else
- FatalError(wc);
- end;
- // Element name already in FNameBuffer
- procedure TXMLReader.ParseStartTag; // [39] [40] [44]
- var
- ElDef: TElementDecl;
- IsEmpty: Boolean;
- ElName: PHashItem;
- b: TBinding;
- begin
- if FState > rsRoot then
- FatalError('Only one top-level element allowed', FName.Length)
- else if FState < rsRoot then
- begin
- // dispose notation refs from DTD, if any
- ClearForwardRefs;
- FState := rsRoot;
- end;
- // we're about to process a new set of attributes
- Inc(FAttrTag);
- // Get hash entry for element name
- ElName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
- // Find declaration for this element
- ElDef := TElementDecl(ElName^.Data);
- IsEmpty := False;
- FAttrCount := 0;
- FPrefixedAttrs := 0;
- FSpecifiedAttrs := 0;
- Inc(FNesting);
- FCurrNode := AllocNodeData(FNesting);
- FCurrNode^.FQName := ElName;
- FCurrNode^.FNodeType := ntElement;
- FCurrNode^.FColonPos := FColonPos;
- StoreLocation(FCurrNode^.FLoc);
- Dec(FCurrNode^.FLoc.LinePos, FName.Length);
- if FNamespaces then
- begin
- FNSHelper.StartElement;
- if FColonPos > 0 then
- FCurrNode^.FPrefix := FNSHelper.GetPrefix(FName.Buffer, FColonPos);
- end;
- while (FSource.FBuf^ <> '>') and (FSource.FBuf^ <> '/') do
- begin
- SkipS(True);
- if (FSource.FBuf^ = '>') or (FSource.FBuf^ = '/') then
- Break;
- ParseAttribute(ElDef);
- end;
- if FSource.FBuf^ = '/' then
- begin
- IsEmpty := True;
- FSource.NextChar;
- end;
- ExpectChar('>');
- if Assigned(ElDef) and ElDef.NeedsDefaultPass then
- ProcessDefaultAttributes(ElDef);
- // Adding attributes might have reallocated FNodeStack, so restore FCurrNode once again
- FCurrNode := @FNodeStack[FNesting];
- if FNamespaces then
- begin
- { Assign namespace URIs to prefixed attrs }
- if FPrefixedAttrs <> 0 then
- ProcessNamespaceAtts;
- { Expand the element name }
- if Assigned(FCurrNode^.FPrefix) then
- begin
- b := TBinding(FCurrNode^.FPrefix^.Data);
- if not (Assigned(b) and (b.uri <> '')) then
- DoErrorPos(esFatal, 'Unbound element name prefix "%s"', [FCurrNode^.FPrefix^.Key],FCurrNode^.FLoc);
- FCurrNode^.FNsUri := FNameTable.FindOrAdd(PWideChar(b.uri), Length(b.uri));
- end
- else
- begin
- b := FNSHelper.DefaultNSBinding;
- if Assigned(b) then
- FCurrNode^.FNsUri := FNameTable.FindOrAdd(PWideChar(b.uri), Length(b.uri));
- end;
- end;
- if not IsEmpty then
- begin
- if not FPreserveWhitespace then // critical for testsuite compliance
- SkipS;
- FNext := xtPushElement;
- end
- else
- FNext := xtPopEmptyElement;
- end;
- procedure TXMLReader.ParseEndTag; // [42]
- var
- ElName: PHashItem;
- begin
- if FNesting <= FSource.FStartNesting then
- FatalError('End-tag is not allowed here');
- Inc(FSource.FBuf);
- FCurrNode := @FNodeStack[FNesting]; // move off the possible child
- FCurrNode^.FNodeType := ntEndElement;
- ElName := FCurrNode^.FQName;
- CheckName;
- if not BufEquals(FName, ElName^.Key) then
- FatalError('Unmatching element end tag (expected "</%s>")', [ElName^.Key], FName.Length);
- if FSource.FBuf^ = '>' then // this handles majority of cases
- FSource.NextChar
- else
- begin
- SkipS;
- ExpectChar('>');
- end;
- Inc(FTokenStart.LinePos, 2); // move over '</' chars
- FNext := xtPopElement;
- end;
- procedure TXMLReader.ParseAttribute(ElDef: TElementDecl);
- var
- attrName: PHashItem;
- attrData: PNodeData;
- AttDef: TAttributeDef;
- i: Integer;
- begin
- CheckName;
- attrName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
- attrData := AllocAttributeData;
- attrData^.FQName := attrName;
- attrData^.FColonPos := FColonPos;
- StoreLocation(attrData^.FLoc);
- Dec(attrData^.FLoc.LinePos, FName.Length);
- FSpecifiedAttrs := FAttrCount;
- if Assigned(ElDef) then
- begin
- AttDef := ElDef.GetAttrDef(attrName);
- if Assigned(AttDef) then
- AttDef.Tag := FAttrTag; // indicates that this one is specified
- end
- else
- AttDef := nil;
- attrData^.FTypeInfo := AttDef;
- // check for duplicates
- for i := 1 to FAttrCount-1 do
- if FNodeStack[FNesting+i].FQName = attrName then
- FatalError('Duplicate attribute', FName.Length);
- if FNamespaces then
- begin
- if ((FName.Length = 5) or (FColonPos = 5)) and
- (FName.Buffer[0] = 'x') and (FName.Buffer[1] = 'm') and
- (FName.Buffer[2] = 'l') and (FName.Buffer[3] = 'n') and
- (FName.Buffer[4] = 's') then
- begin
- if FColonPos > 0 then
- attrData^.FPrefix := FStdPrefix_xmlns;
- attrData^.FNsUri := FStdUri_xmlns;
- end
- else if FColonPos > 0 then
- begin
- attrData^.FPrefix := FNSHelper.GetPrefix(FName.Buffer, FColonPos);
- Inc(FPrefixedAttrs);
- end;
- end;
- ExpectEq;
- ExpectAttValue(attrData, Assigned(AttDef) and (AttDef.DataType <> dtCDATA));
- attrData^.FLoc2 := FTokenStart;
- if Assigned(attrData^.FNsUri) then
- begin
- if (not AddBinding(attrData)) and FCanonical then
- begin
- CleanupAttribute(attrData);
- Dec(FAttrCount);
- Dec(FSpecifiedAttrs);
- end;
- end;
- end;
- procedure TXMLReader.AddForwardRef(Buf: PWideChar; Length: Integer);
- var
- w: PForwardRef;
- begin
- New(w);
- SetString(w^.Value, Buf, Length);
- w^.Loc := FTokenStart;
- FForwardRefs.Add(w);
- end;
- procedure TXMLReader.ClearForwardRefs;
- var
- I: Integer;
- begin
- for I := 0 to FForwardRefs.Count-1 do
- Dispose(PForwardRef(FForwardRefs.List^[I]));
- FForwardRefs.Clear;
- end;
- procedure TXMLReader.ValidateIdRefs;
- var
- I: Integer;
- begin
- for I := 0 to FForwardRefs.Count-1 do
- with PForwardRef(FForwardRefs.List^[I])^ do
- if (FIDMap = nil) or (FIDMap.Find(PWideChar(Value), Length(Value)) = nil) then
- DoErrorPos(esError, 'The ID ''%s'' does not match any element', [Value], Loc);
- ClearForwardRefs;
- end;
- procedure TXMLReader.ProcessDefaultAttributes(ElDef: TElementDecl);
- var
- I: Integer;
- AttDef: TAttributeDef;
- attrData: PNodeData;
- begin
- for I := 0 to ElDef.AttrDefCount-1 do
- begin
- AttDef := ElDef.AttrDefs[I];
- if AttDef.Tag <> FAttrTag then // this one wasn't specified
- begin
- case AttDef.Default of
- adDefault, adFixed: begin
- attrData := AllocAttributeData;
- attrData^ := AttDef.Data^;
- if FCanonical then
- attrData^.FIsDefault := False;
- if FNamespaces then
- begin
- if AttDef.IsNamespaceDecl then
- begin
- if attrData^.FColonPos > 0 then
- attrData^.FPrefix := FStdPrefix_xmlns;
- attrData^.FNsUri := FStdUri_xmlns;
- if (not AddBinding(attrData)) and FCanonical then
- Dec(FAttrCount);
- end
- else if attrData^.FColonPos > 0 then
- begin
- attrData^.FPrefix := FNSHelper.GetPrefix(PWideChar(attrData^.FQName^.Key), attrData^.FColonPos);
- Inc(FPrefixedAttrs);
- end;
- end;
- end;
- adRequired:
- ValidationError('Required attribute ''%s'' of element ''%s'' is missing',
- [AttDef.Data^.FQName^.Key, FNodeStack[FNesting].FQName^.Key], 0)
- end;
- end;
- end;
- end;
- function TXMLReader.AddBinding(attrData: PNodeData): Boolean;
- var
- nsUri, Pfx: PHashItem;
- begin
- nsUri := FNameTable.FindOrAdd(PWideChar(attrData^.FValueStr), Length(attrData^.FValueStr));
- if attrData^.FColonPos > 0 then
- Pfx := FNSHelper.GetPrefix(@attrData^.FQName^.key[7], Length(attrData^.FQName^.key)-6)
- else
- Pfx := FNSHelper.GetPrefix(nil, 0); { will return the default prefix }
- { 'xml' is allowed to be bound to the correct namespace }
- if ((nsUri = FStduri_xml) <> (Pfx = FStdPrefix_xml)) or
- (Pfx = FStdPrefix_xmlns) or
- (nsUri = FStduri_xmlns) then
- begin
- if (Pfx = FStdPrefix_xml) or (Pfx = FStdPrefix_xmlns) then
- DoErrorPos(esFatal, 'Illegal usage of reserved prefix ''%s''', [Pfx^.Key], attrData^.FLoc)
- else
- DoErrorPos(esFatal, 'Illegal usage of reserved namespace URI ''%s''', [attrData^.FValueStr], attrData^.FLoc2);
- end;
- if (attrData^.FValueStr = '') and not (FXML11 or (Pfx^.Key = '')) then
- DoErrorPos(esFatal, 'Illegal undefining of namespace', attrData^.FLoc2);
- Result := (Pfx^.Data = nil) or (TBinding(Pfx^.Data).uri <> attrData^.FValueStr);
- if Result then
- FNSHelper.BindPrefix(attrData^.FValueStr, Pfx);
- end;
- procedure TXMLReader.ProcessNamespaceAtts;
- var
- I, J: Integer;
- Pfx, AttrName: PHashItem;
- attrData: PNodeData;
- b: TBinding;
- begin
- FNsAttHash.Init(FPrefixedAttrs);
- for I := 1 to FAttrCount do
- begin
- attrData := @FNodeStack[FNesting+i];
- if (attrData^.FColonPos < 1) or Assigned(attrData^.FNsUri) then
- Continue;
- Pfx := attrData^.FPrefix;
- b := TBinding(Pfx^.Data);
- if not (Assigned(b) and (b.uri <> '')) then
- DoErrorPos(esFatal, 'Unbound attribute name prefix "%s"', [Pfx^.Key], attrData^.FLoc);
- { detect duplicates }
- J := attrData^.FColonPos+1;
- AttrName := attrData^.FQName;
- if FNsAttHash.Locate(@b.uri, @AttrName^.Key[J], Length(AttrName^.Key) - J+1) then
- DoErrorPos(esFatal, 'Duplicate prefixed attribute', attrData^.FLoc);
- attrData^.FNsUri := FNameTable.FindOrAdd(PWideChar(b.uri), Length(b.uri));
- end;
- end;
- function TXMLReader.ParseExternalID(out SysID, PubID: WideString; // [75]
- SysIdOptional: Boolean): Boolean;
- var
- I: Integer;
- wc: WideChar;
- begin
- Result := False;
- if FSource.Matches('SYSTEM') then
- SysIdOptional := False
- else if FSource.Matches('PUBLIC') then
- begin
- ExpectWhitespace;
- ParseLiteral(FValue, ltPubid, True);
- SetString(PubID, FValue.Buffer, FValue.Length);
- for I := 1 to Length(PubID) do
- begin
- wc := PubID[I];
- if (wc > #255) or not (Char(ord(wc)) in PubidChars) then
- FatalError('Illegal Public ID literal', -1);
- end;
- end
- else
- Exit;
- if SysIdOptional then
- SkipWhitespace
- else
- ExpectWhitespace;
- if ParseLiteral(FValue, ltPlain, not SysIdOptional) then
- SetString(SysID, FValue.Buffer, FValue.Length);
- Result := True;
- end;
- function TXMLReader.ValidateAttrSyntax(AttrDef: TAttributeDef; const aValue: WideString): Boolean;
- begin
- case AttrDef.DataType of
- dtId, dtIdRef, dtEntity: Result := IsXmlName(aValue, FXML11) and
- ((not FNamespaces) or (Pos(WideChar(':'), aValue) = 0));
- dtIdRefs, dtEntities: Result := IsXmlNames(aValue, FXML11) and
- ((not FNamespaces) or (Pos(WideChar(':'), aValue) = 0));
- dtNmToken: Result := IsXmlNmToken(aValue, FXML11) and AttrDef.HasEnumToken(aValue);
- dtNmTokens: Result := IsXmlNmTokens(aValue, FXML11);
- // IsXmlName() not necessary - enum is never empty and contains valid names
- dtNotation: Result := AttrDef.HasEnumToken(aValue);
- else
- Result := True;
- end;
- end;
- procedure TXMLReader.ValidateAttrValue(AttrDef: TAttributeDef; attrData: PNodeData);
- var
- L, StartPos, EndPos: Integer;
- Entity: TEntityDecl;
- begin
- L := Length(attrData^.FValueStr);
- case AttrDef.DataType of
- dtId: begin
- if not AddID(attrData) then
- DoErrorPos(esError, 'The ID ''%s'' is not unique', [attrData^.FValueStr], attrData^.FLoc2);
- end;
- dtIdRef, dtIdRefs: begin
- StartPos := 1;
- while StartPos <= L do
- begin
- EndPos := StartPos;
- while (EndPos <= L) and (attrData^.FValueStr[EndPos] <> #32) do
- Inc(EndPos);
- if (FIDMap = nil) or (FIDMap.Find(@attrData^.FValueStr[StartPos], EndPos-StartPos) = nil) then
- AddForwardRef(@attrData^.FValueStr[StartPos], EndPos-StartPos);
- StartPos := EndPos + 1;
- end;
- end;
- dtEntity, dtEntities: begin
- StartPos := 1;
- while StartPos <= L do
- begin
- EndPos := StartPos;
- while (EndPos <= L) and (attrData^.FValueStr[EndPos] <> #32) do
- Inc(EndPos);
- Entity := TEntityDecl(FDocType.Entities.Get(@attrData^.FValueStr[StartPos], EndPos-StartPos));
- if (Entity = nil) or (Entity.FNotationName = '') then
- ValidationError('Attribute ''%s'' type mismatch', [attrData^.FQName^.Key], -1);
- StartPos := EndPos + 1;
- end;
- end;
- end;
- end;
- procedure TXMLReader.ValidateDTD;
- var
- I: Integer;
- begin
- for I := 0 to FForwardRefs.Count-1 do
- with PForwardRef(FForwardRefs[I])^ do
- if FDocType.Notations.Get(PWideChar(Value), Length(Value)) = nil then
- DoErrorPos(esError, 'Notation ''%s'' is not declared', [Value], Loc);
- end;
- function TXMLReader.DoCDSect(ch: PWideChar; Count: Integer): TDOMNode;
- var
- s: WideString;
- begin
- Assert(not FCDSectionsAsText, 'Should not be called when CDSectionsAsText=True');
- SetString(s, ch, Count);
- result := doc.CreateCDATASection(s);
- end;
- procedure TXMLReader.DoNotationDecl(const aName, aPubID, aSysID: WideString);
- var
- Notation: TNotationDecl;
- Entry: PHashItem;
- begin
- Entry := FDocType.Notations.FindOrAdd(PWideChar(aName), Length(aName));
- if Entry^.Data = nil then
- begin
- Notation := TNotationDecl.Create;
- Notation.FName := aName;
- Notation.FPublicID := aPubID;
- Notation.FSystemID := aSysID;
- Entry^.Data := Notation;
- end
- else
- ValidationError('Duplicate notation declaration: ''%s''', [aName]);
- end;
- function TXMLReader.AddId(aNodeData: PNodeData): Boolean;
- var
- e: PHashItem;
- begin
- if FIDMap = nil then
- FIDMap := THashTable.Create(256, False);
- e := FIDMap.FindOrAdd(PWideChar(aNodeData^.FValueStr), Length(aNodeData^.FValueStr), Result);
- Result := not Result;
- if Result then
- aNodeData^.FIDEntry := e;
- end;
- function TXMLReader.AllocAttributeData: PNodeData;
- begin
- Result := AllocNodeData(FNesting + FAttrCount + 1);
- Result^.FNodeType := ntAttribute;
- Result^.FIsDefault := False;
- Inc(FAttrCount);
- end;
- function TXMLReader.AllocNodeData(AIndex: Integer): PNodeData;
- begin
- {make sure we have an extra slot to place child text/comment/etc}
- if AIndex >= Length(FNodeStack)-1 then
- SetLength(FNodeStack, AIndex * 2 + 2);
- Result := @FNodeStack[AIndex];
- Result^.FPrefix := nil;
- Result^.FNsUri := nil;
- Result^.FIDEntry := nil;
- end;
- function TXMLReader.AllocAttributeValueChunk(APrev: PNodeData): PNodeData;
- begin
- { when parsing DTD, don't take ownership of allocated data }
- if FState = rsDTD then
- begin
- New(result);
- FillChar(result^, sizeof(TNodeData), 0);
- end
- else
- begin
- result := FFreeAttrChunk;
- if Assigned(result) then
- begin
- FFreeAttrChunk := result^.FNext;
- result^.FNext := nil;
- end
- else { no free chunks, create a new one }
- begin
- New(result);
- FillChar(result^, sizeof(TNodeData), 0);
- FAttrChunks.Add(result);
- end;
- end;
- APrev^.FNext := result;
- end;
- procedure TXMLReader.CleanupAttributes;
- var
- i: Integer;
- begin
- {cleanup only specified attributes; default ones are owned by DTD}
- for i := 1 to FSpecifiedAttrs do
- CleanupAttribute(@FNodeStack[FNesting+i]);
- FAttrCleanupFlag := False;
- end;
- procedure TXMLReader.CleanupAttribute(aNode: PNodeData);
- var
- chunk, tmp: PNodeData;
- begin
- chunk := aNode^.FNext;
- while Assigned(chunk) do
- begin
- tmp := chunk^.FNext;
- chunk^.FNext := FFreeAttrChunk;
- FFreeAttrChunk := chunk;
- chunk := tmp;
- end;
- aNode^.FNext := nil;
- end;
- procedure TXMLReader.SetNodeInfoWithValue(typ: TXMLNodeType; AName: PHashItem = nil);
- begin
- {FNesting+1 is available due to overallocation in AllocNodeData() }
- FCurrNode := @FNodeStack[FNesting+1];
- FCurrNode^.FNodeType := typ;
- FCurrNode^.FQName := AName;
- FCurrNode^.FValueStart := FValue.Buffer;
- FCurrNode^.FValueLength := FValue.Length;
- end;
- procedure TXMLReader.PushVC(aElDef: TElementDecl);
- begin
- Inc(FValidatorNesting);
- if FValidatorNesting >= Length(FValidators) then
- SetLength(FValidators, FValidatorNesting * 2);
- with FValidators[FValidatorNesting] do
- begin
- FElementDef := aElDef;
- FCurCP := nil;
- FFailed := False;
- FContentType := ctAny;
- FSaViolation := False;
- if Assigned(aElDef) then
- begin
- FContentType := aElDef.ContentType;
- FSaViolation := FStandalone and aElDef.ExternallyDeclared;
- end;
- end;
- end;
- procedure TXMLReader.PopVC;
- begin
- if (FNesting = 1) and (not FFragmentMode) then
- FState := rsEpilog;
- if FNesting > 0 then Dec(FNesting);
- FCurrNode := @FNodeStack[FNesting];
- FNext := xtText;
- end;
- { TElementValidator }
- function TElementValidator.IsElementAllowed(Def: TElementDecl): Boolean;
- var
- Next: TContentParticle;
- begin
- Result := True;
- // if element is not declared, non-validity has been already reported, no need to report again...
- if Assigned(Def) and Assigned(FElementDef) then
- begin
- case FElementDef.ContentType of
- ctEmpty: Result := False;
- ctChildren, ctMixed: begin
- if FFailed then // if already detected a mismatch, don't waste time
- Exit;
- if FCurCP = nil then
- Next := FElementDef.RootCP.FindFirst(Def)
- else
- Next := FCurCP.FindNext(Def, 0); { second arg ignored here }
- Result := Assigned(Next);
- if Result then
- FCurCP := Next
- else
- FFailed := True; // used to prevent extra error at the end of element
- end;
- // ctAny, ctUndeclared: returns True by default
- end;
- end;
- end;
- function TElementValidator.Incomplete: Boolean;
- begin
- if Assigned(FElementDef) and (FElementDef.ContentType = ctChildren) and (not FFailed) then
- begin
- if FCurCP <> nil then
- Result := FCurCP.MoreRequired(0) { arg ignored here }
- else
- Result := FElementDef.RootCP.IsRequired;
- end
- else
- Result := False;
- end;
- { plain calls }
- procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text);
- var
- Reader: TXMLReader;
- Src: TXMLCharSource;
- begin
- ADoc := nil;
- Src := TXMLFileInputSource.Create(f);
- Reader := TXMLReader.Create;
- try
- Reader.ProcessXML(Src);
- finally
- ADoc := TXMLDocument(Reader.Doc);
- Reader.Free;
- end;
- end;
- procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String);
- var
- Reader: TXMLReader;
- Src: TXMLCharSource;
- begin
- ADoc := nil;
- Reader := TXMLReader.Create;
- try
- Src := TXMLStreamInputSource.Create(f, False);
- Src.SystemID := ABaseURI;
- Reader.ProcessXML(Src);
- finally
- ADoc := TXMLDocument(Reader.doc);
- Reader.Free;
- end;
- end;
- procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream);
- begin
- ReadXMLFile(ADoc, f, 'stream:');
- end;
- procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String);
- var
- FileStream: TStream;
- begin
- ADoc := nil;
- FileStream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
- try
- ReadXMLFile(ADoc, FileStream, FilenameToURI(AFilename));
- finally
- FileStream.Free;
- end;
- end;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text);
- var
- Reader: TXMLReader;
- Src: TXMLCharSource;
- begin
- Reader := TXMLReader.Create;
- try
- Src := TXMLFileInputSource.Create(f);
- Reader.ProcessFragment(Src, AParentNode);
- finally
- Reader.Free;
- end;
- end;
- procedure ReadXMLFragment(AParentNode: TDOMNode; f: TStream; const ABaseURI: String);
- var
- Reader: TXMLReader;
- Src: TXMLCharSource;
- begin
- Reader := TXMLReader.Create;
- try
- Src := TXMLStreamInputSource.Create(f, False);
- Src.SystemID := ABaseURI;
- Reader.ProcessFragment(Src, AParentNode);
- finally
- Reader.Free;
- end;
- end;
- procedure ReadXMLFragment(AParentNode: TDOMNode; f: TStream);
- begin
- ReadXMLFragment(AParentNode, f, 'stream:');
- end;
- procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String);
- var
- Stream: TStream;
- begin
- Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
- try
- ReadXMLFragment(AParentNode, Stream, FilenameToURI(AFilename));
- finally
- Stream.Free;
- end;
- end;
- procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text);
- var
- Reader: TXMLReader;
- Src: TXMLCharSource;
- begin
- ADoc := nil;
- Reader := TXMLReader.Create;
- try
- Src := TXMLFileInputSource.Create(f);
- Reader.ProcessDTD(Src);
- finally
- ADoc := TXMLDocument(Reader.doc);
- Reader.Free;
- end;
- end;
- procedure ReadDTDFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String);
- var
- Reader: TXMLReader;
- Src: TXMLCharSource;
- begin
- ADoc := nil;
- Reader := TXMLReader.Create;
- try
- Src := TXMLStreamInputSource.Create(f, False);
- Src.SystemID := ABaseURI;
- Reader.ProcessDTD(Src);
- finally
- ADoc := TXMLDocument(Reader.doc);
- Reader.Free;
- end;
- end;
- procedure ReadDTDFile(out ADoc: TXMLDocument; f: TStream);
- begin
- ReadDTDFile(ADoc, f, 'stream:');
- end;
- procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String);
- var
- Stream: TStream;
- begin
- ADoc := nil;
- Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
- try
- ReadDTDFile(ADoc, Stream, FilenameToURI(AFilename));
- finally
- Stream.Free;
- end;
- end;
- end.
|