1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802 |
- {
- This file is part of the Free Component Library
- XML reading routines.
- Copyright (c) 1999-2000 by Sebastian Guenther, [email protected]
- Modified in 2006 by Sergei Gorelkin, [email protected]
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- unit XMLRead;
- {$ifdef fpc}
- {$MODE objfpc}{$H+}
- {$endif}
- interface
- uses
- SysUtils, Classes, DOM;
- type
- TErrorSeverity = (esWarning, esError, esFatal);
- EXMLReadError = class(Exception)
- private
- FSeverity: TErrorSeverity;
- FErrorMessage: string;
- FLine: Integer;
- FLinePos: Integer;
- public
- property Severity: TErrorSeverity read FSeverity;
- property ErrorMessage: string read FErrorMessage;
- property Line: Integer read FLine;
- property LinePos: Integer read FLinePos;
- end;
- procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String); overload;
- procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text); overload;
- procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream); overload;
- procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String); overload;
- procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String); overload;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text); overload;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream); overload;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream; const ABaseURI: String); overload;
- procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String); overload;
- procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text); overload;
- procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream); overload;
- procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String); overload;
- type
- TDOMParseOptions = class(TObject)
- private
- FValidate: Boolean;
- FPreserveWhitespace: Boolean;
- FExpandEntities: Boolean;
- FIgnoreComments: Boolean;
- FCDSectionsAsText: Boolean;
- FResolveExternals: Boolean;
- FNamespaces: Boolean;
- FDisallowDoctype: Boolean;
- FCanonical: Boolean;
- FMaxChars: Cardinal;
- function GetCanonical: Boolean;
- procedure SetCanonical(aValue: Boolean);
- public
- property Validate: Boolean read FValidate write FValidate;
- property PreserveWhitespace: Boolean read FPreserveWhitespace write FPreserveWhitespace;
- property ExpandEntities: Boolean read FExpandEntities write FExpandEntities;
- property IgnoreComments: Boolean read FIgnoreComments write FIgnoreComments;
- property CDSectionsAsText: Boolean read FCDSectionsAsText write FCDSectionsAsText;
- property ResolveExternals: Boolean read FResolveExternals write FResolveExternals;
- property Namespaces: Boolean read FNamespaces write FNamespaces;
- property DisallowDoctype: Boolean read FDisallowDoctype write FDisallowDoctype;
- property MaxChars: Cardinal read FMaxChars write FMaxChars;
- property CanonicalForm: Boolean read GetCanonical write SetCanonical;
- end;
- // NOTE: DOM 3 LS ACTION_TYPE enumeration starts at 1
- TXMLContextAction = (
- xaAppendAsChildren = 1,
- xaReplaceChildren,
- xaInsertBefore,
- xaInsertAfter,
- xaReplace);
- TXMLErrorEvent = procedure(Error: EXMLReadError) of object;
- TXMLInputSource = class(TObject)
- private
- FStream: TStream;
- FStringData: string;
- FBaseURI: WideString;
- FSystemID: WideString;
- FPublicID: WideString;
- // FEncoding: string;
- public
- constructor Create(AStream: TStream); overload;
- constructor Create(const AStringData: string); overload;
- property Stream: TStream read FStream;
- property StringData: string read FStringData;
- property BaseURI: WideString read FBaseURI write FBaseURI;
- property SystemID: WideString read FSystemID write FSystemID;
- property PublicID: WideString read FPublicID write FPublicID;
- // property Encoding: string read FEncoding write FEncoding;
- end;
- TDOMParser = class(TObject)
- private
- FOptions: TDOMParseOptions;
- FOnError: TXMLErrorEvent;
- public
- constructor Create;
- destructor Destroy; override;
- procedure Parse(Src: TXMLInputSource; out ADoc: TXMLDocument);
- procedure ParseUri(const URI: WideString; out ADoc: TXMLDocument);
- function ParseWithContext(Src: TXMLInputSource; Context: TDOMNode;
- Action: TXMLContextAction): TDOMNode;
- property Options: TDOMParseOptions read FOptions;
- property OnError: TXMLErrorEvent read FOnError write FOnError;
- end;
- TDecoder = record
- Context: Pointer;
- Decode: function(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
- Cleanup: procedure(Context: Pointer); stdcall;
- end;
- TGetDecoderProc = function(const AEncoding: string; out Decoder: TDecoder): Boolean; stdcall;
- procedure RegisterDecoder(Proc: TGetDecoderProc);
- // =======================================================
- implementation
- uses
- UriParser, xmlutils;
- const
- PubidChars: TSetOfChar = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
- '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
- '#', '@', '$', '_', '%'];
- type
- TDOMNotationEx = class(TDOMNotation);
- TDOMDocumentTypeEx = class(TDOMDocumentType);
- TDOMElementDef = class;
- TDTDSubsetType = (dsNone, dsInternal, dsExternal);
- // This may be augmented with ByteOffset, UTF8Offset, etc.
- TLocation = record
- Line: Integer;
- LinePos: Integer;
- end;
- TDOMEntityEx = class(TDOMEntity)
- protected
- FExternallyDeclared: Boolean;
- FResolved: Boolean;
- FOnStack: Boolean;
- FBetweenDecls: Boolean;
- FReplacementText: DOMString;
- FURI: DOMString;
- FStartLocation: TLocation;
- FCharCount: Cardinal;
- end;
- PWideCharBuf = ^TWideCharBuf;
- TWideCharBuf = record
- Buffer: PWideChar;
- Length: Integer;
- MaxLength: Integer;
- end;
- TXMLReader = class;
- TXMLCharSource = class(TObject)
- private
- FBuf: PWideChar;
- FBufEnd: PWideChar;
- FReader: TXMLReader;
- FParent: TXMLCharSource;
- FEntity: TObject; // weak reference
- FLineNo: Integer;
- LFPos: PWideChar;
- FXML11Rules: Boolean;
- FSystemID: WideString;
- FCharCount: Cardinal;
- function GetSystemID: WideString;
- protected
- function Reload: Boolean; virtual;
- public
- DTDSubsetType: TDTDSubsetType;
- constructor Create(const AData: WideString);
- procedure NextChar;
- procedure NewLine; virtual;
- function SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
- wsflag: PBoolean = nil): WideChar; virtual;
- procedure Initialize; virtual;
- function SetEncoding(const AEncoding: string): Boolean; virtual;
- function Matches(const arg: WideString): Boolean;
- property SystemID: WideString read GetSystemID write FSystemID;
- end;
- TXMLDecodingSource = class(TXMLCharSource)
- private
- FCharBuf: PChar;
- FCharBufEnd: PChar;
- FBufStart: PWideChar;
- FDecoder: TDecoder;
- FHasBOM: Boolean;
- FFixedUCS2: string;
- FBufSize: Integer;
- procedure DecodingError(const Msg: string);
- protected
- function Reload: Boolean; override;
- procedure FetchData; virtual;
- public
- procedure AfterConstruction; override;
- destructor Destroy; override;
- function SetEncoding(const AEncoding: string): Boolean; override;
- procedure NewLine; override;
- function SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
- wsflag: PBoolean = nil): WideChar; override;
- procedure Initialize; override;
- end;
- TXMLStreamInputSource = class(TXMLDecodingSource)
- private
- FAllocated: PChar;
- FStream: TStream;
- FCapacity: Integer;
- FOwnStream: Boolean;
- FEof: Boolean;
- public
- constructor Create(AStream: TStream; AOwnStream: Boolean);
- destructor Destroy; override;
- procedure FetchData; override;
- end;
- TXMLFileInputSource = class(TXMLDecodingSource)
- private
- FFile: ^Text;
- FString: string;
- public
- constructor Create(var AFile: Text);
- procedure FetchData; override;
- end;
- PForwardRef = ^TForwardRef;
- TForwardRef = record
- Value: WideString;
- Loc: TLocation;
- end;
- TCPType = (ctName, ctChoice, ctSeq);
- TCPQuant = (cqOnce, cqZeroOrOnce, cqZeroOrMore, cqOnceOrMore);
- TContentParticle = class(TObject)
- private
- FParent: TContentParticle;
- FChildren: TFPList;
- FIndex: Integer;
- function GetChildCount: Integer;
- function GetChild(Index: Integer): TContentParticle;
- public
- CPType: TCPType;
- CPQuant: TCPQuant;
- Def: TDOMElementDef;
- destructor Destroy; override;
- function Add: TContentParticle;
- function IsRequired: Boolean;
- function FindFirst(aDef: TDOMElementDef): TContentParticle;
- function FindNext(aDef: TDOMElementDef; ChildIdx: Integer): TContentParticle;
- function MoreRequired(ChildIdx: Integer): Boolean;
- property ChildCount: Integer read GetChildCount;
- property Children[Index: Integer]: TContentParticle read GetChild;
- end;
- TElementValidator = object
- FElement: TDOMElement;
- FElementDef: TDOMElementDef;
- FCurCP: TContentParticle;
- FFailed: Boolean;
- function IsElementAllowed(Def: TDOMElementDef): Boolean;
- function Incomplete: Boolean;
- end;
- TXMLReadState = (rsProlog, rsDTD, rsRoot, rsEpilog);
- TElementContentType = (
- ctUndeclared,
- ctAny,
- ctEmpty,
- ctMixed,
- ctChildren
- );
- TCheckNameFlags = set of (cnOptional, cnToken);
-
- TPrefixedAttr = record
- Attr: TDOMAttr;
- PrefixLen: Integer; // to avoid recalculation
- end;
- TXMLReader = class
- private
- FSource: TXMLCharSource;
- FCtrl: TDOMParser;
- FXML11: Boolean;
- FState: TXMLReadState;
- FRecognizePE: Boolean;
- FHavePERefs: Boolean;
- FInsideDecl: Boolean;
- FDocNotValid: Boolean;
- FValue: TWideCharBuf;
- FEntityValue: TWideCharBuf;
- FName: TWideCharBuf;
- FTokenStart: TLocation;
- FStandalone: Boolean; // property of Doc ?
- FNamePages: PByteArray;
- FDocType: TDOMDocumentTypeEx; // a shortcut
- FPEMap: TDOMNamedNodeMap;
- FIDRefs: TFPList;
- FNotationRefs: TFPList;
- FCurrContentType: TElementContentType;
- FSaViolation: Boolean;
- FDTDStartPos: PWideChar;
- FIntSubset: TWideCharBuf;
- FAttrTag: Cardinal;
- FOwnsDoctype: Boolean;
- FNSHelper: TNSSupport;
- FWorkAtts: array of TPrefixedAttr;
- FNsAttHash: TDblHashArray;
- FStdPrefix_xml: PHashItem;
- FStdPrefix_xmlns: PHashItem;
- FColonPos: Integer;
- FValidate: Boolean; // parsing options, copy of FCtrl.Options
- FPreserveWhitespace: Boolean;
- FExpandEntities: Boolean;
- FIgnoreComments: Boolean;
- FCDSectionsAsText: Boolean;
- FResolveExternals: Boolean;
- FNamespaces: Boolean;
- FDisallowDoctype: Boolean;
- FCanonical: Boolean;
- FMaxChars: Cardinal;
- procedure SkipQuote(out Delim: WideChar; required: Boolean = True);
- procedure Initialize(ASource: TXMLCharSource);
- function DoParseAttValue(Delim: WideChar): Boolean;
- function ContextPush(AEntity: TDOMEntityEx): Boolean;
- function ContextPop: Boolean;
- procedure XML11_BuildTables;
- procedure ParseQuantity(CP: TContentParticle);
- procedure StoreLocation(out Loc: TLocation);
- function ValidateAttrSyntax(AttrDef: TDOMAttrDef; const aValue: WideString): Boolean;
- procedure ValidateAttrValue(Attr: TDOMAttr; const aValue: WideString);
- procedure AddForwardRef(aList: TFPList; Buf: PWideChar; Length: Integer);
- procedure ClearRefs(aList: TFPList);
- procedure ValidateIdRefs;
- procedure StandaloneError(LineOffs: Integer = 0);
- procedure CallErrorHandler(E: EXMLReadError);
- function FindOrCreateElDef: TDOMElementDef;
- function SkipUntilSeq(const Delim: TSetOfChar; c1: WideChar; c2: WideChar = #0): Boolean;
- procedure CheckMaxChars;
- protected
- FCursor: TDOMNode_WithChildren;
- FNesting: Integer;
- FValidator: array of TElementValidator;
- procedure DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer=0);
- procedure DoErrorPos(Severity: TErrorSeverity; const descr: string;
- const ErrPos: TLocation);
- procedure FatalError(const descr: String; LineOffs: Integer=0); overload;
- procedure FatalError(const descr: string; const args: array of const; LineOffs: Integer=0); overload;
- procedure FatalError(Expected: WideChar); overload;
- function SkipWhitespace(PercentAloneIsOk: Boolean = False): Boolean;
- function SkipS(required: Boolean = False): Boolean;
- procedure ExpectWhitespace;
- procedure ExpectString(const s: String);
- procedure ExpectChar(wc: WideChar);
- function CheckForChar(c: WideChar): Boolean;
- procedure RaiseNameNotFound;
- function CheckName(aFlags: TCheckNameFlags = []): Boolean;
- procedure CheckNCName;
- function ExpectName: WideString; // [5]
- procedure SkipQuotedLiteral(out Literal: WideString; required: Boolean = True);
- procedure ExpectAttValue; // [10]
- procedure ParseComment; // [15]
- procedure ParsePI; // [16]
- procedure ParseCDSect; // [18]
- procedure ParseXmlOrTextDecl(TextDecl: Boolean);
- procedure ExpectEq;
- procedure ParseDoctypeDecl; // [28]
- procedure ParseMarkupDecl; // [29]
- procedure ParseElement; // [39]
- procedure ParseEndTag; // [42]
- procedure DoEndElement(ErrOffset: Integer);
- procedure ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
- procedure ParseContent; // [43]
- function ResolvePredefined: Boolean;
- procedure IncludeEntity(InAttr: Boolean);
- procedure StartPE;
- function ParseRef(var ToFill: TWideCharBuf): Boolean; // [67]
- function ParseExternalID(out SysID, PubID: WideString; // [75]
- SysIdOptional: Boolean): Boolean;
- procedure BadPENesting(S: TErrorSeverity = esError);
- procedure ParseEntityDecl;
- function ParseEntityDeclValue(Delim: WideChar): Boolean;
- procedure ParseAttlistDecl;
- procedure ExpectChoiceOrSeq(CP: TContentParticle);
- procedure ParseElementDecl;
- procedure ParseNotationDecl;
- function ResolveEntity(const SystemID, PublicID, BaseURI: WideString; out Source: TXMLCharSource): Boolean;
- procedure ProcessDefaultAttributes(Element: TDOMElement; Map: TDOMNamedNodeMap);
- procedure ProcessNamespaceAtts(Element: TDOMElement);
- procedure AddBinding(Attr: TDOMAttr; PrefixPtr: PWideChar; PrefixLen: Integer);
- procedure PushVC(aElement: TDOMElement; aElDef: TDOMElementDef);
- procedure PopVC;
- procedure UpdateConstraints;
- procedure ValidateDTD;
- procedure ValidateRoot;
- procedure ValidationError(const Msg: string; const args: array of const; LineOffs: Integer = -1);
- procedure DoAttrText(ch: PWideChar; Count: Integer);
- procedure DTDReloadHook;
- procedure ConvertSource(SrcIn: TXMLInputSource; out SrcOut: TXMLCharSource);
- // Some SAX-alike stuff (at a very early stage)
- procedure DoText(ch: PWideChar; Count: Integer; Whitespace: Boolean=False);
- procedure DoComment(ch: PWideChar; Count: Integer);
- procedure DoCDSect(ch: PWideChar; Count: Integer);
- procedure DoNotationDecl(const aName, aPubID, aSysID: WideString);
- public
- doc: TDOMDocument;
- constructor Create; overload;
- constructor Create(AParser: TDOMParser); overload;
- destructor Destroy; override;
- procedure ProcessXML(ASource: TXMLCharSource); // [1]
- procedure ProcessFragment(ASource: TXMLCharSource; AOwner: TDOMNode);
- procedure ProcessDTD(ASource: TXMLCharSource); // ([29])
- end;
- // Attribute/Element declarations
- TDOMElementDef = class(TDOMElement)
- public
- FExternallyDeclared: Boolean;
- ContentType: TElementContentType;
- IDAttr: TDOMAttrDef;
- NotationAttr: TDOMAttrDef;
- RootCP: TContentParticle;
- destructor Destroy; override;
- end;
- const
- NullLocation: TLocation = (Line: 0; LinePos: 0);
- { Decoders }
- var
- Decoders: array of TGetDecoderProc;
- procedure RegisterDecoder(Proc: TGetDecoderProc);
- var
- L: Integer;
- begin
- L := Length(Decoders);
- SetLength(Decoders, L+1);
- Decoders[L] := Proc;
- end;
- function FindDecoder(const AEncoding: string; out Decoder: TDecoder): Boolean;
- var
- I: Integer;
- begin
- Result := False;
- for I := 0 to High(Decoders) do
- if Decoders[I](AEncoding, Decoder) then
- begin
- Result := True;
- Exit;
- end;
- end;
- function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
- var
- cnt: Cardinal;
- begin
- cnt := OutCnt; // num of widechars
- if cnt > InCnt div sizeof(WideChar) then
- cnt := InCnt div sizeof(WideChar);
- Move(InBuf^, OutBuf^, cnt * sizeof(WideChar));
- Dec(InCnt, cnt*sizeof(WideChar));
- Dec(OutCnt, cnt);
- Result := cnt;
- end;
- function Decode_UCS2_Swapped(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
- var
- I: Integer;
- cnt: Cardinal;
- InPtr: PChar;
- begin
- cnt := OutCnt; // num of widechars
- if cnt > InCnt div sizeof(WideChar) then
- cnt := InCnt div sizeof(WideChar);
- InPtr := InBuf;
- for I := 0 to cnt-1 do
- begin
- OutBuf[I] := WideChar((ord(InPtr^) shl 8) or ord(InPtr[1]));
- Inc(InPtr, 2);
- end;
- Dec(InCnt, cnt*sizeof(WideChar));
- Dec(OutCnt, cnt);
- Result := cnt;
- end;
- function Decode_88591(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
- var
- I: Integer;
- cnt: Cardinal;
- begin
- cnt := OutCnt; // num of widechars
- if cnt > InCnt then
- cnt := InCnt;
- for I := 0 to cnt-1 do
- OutBuf[I] := WideChar(ord(InBuf[I]));
- Dec(InCnt, cnt);
- Dec(OutCnt, cnt);
- Result := cnt;
- end;
- function Decode_UTF8(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
- const
- MaxCode: array[1..4] of Cardinal = ($7F, $7FF, $FFFF, $1FFFFF);
- var
- i, j, bc: Cardinal;
- Value: Cardinal;
- begin
- result := 0;
- i := OutCnt;
- while (i > 0) and (InCnt > 0) do
- begin
- bc := 1;
- Value := ord(InBuf^);
- if Value < $80 then
- OutBuf^ := WideChar(Value)
- else
- begin
- if Value < $C2 then
- begin
- Result := -1;
- Break;
- end;
- Inc(bc);
- if Value > $DF then
- begin
- Inc(bc);
- if Value > $EF then
- begin
- Inc(bc);
- if Value > $F7 then // never encountered in the tests.
- begin
- Result := -1;
- Break;
- end;
- end;
- end;
- if InCnt < bc then
- Break;
- j := 1;
- while j < bc do
- begin
- if InBuf[j] in [#$80..#$BF] then
- Value := (Value shl 6) or (Cardinal(InBuf[j]) and $3F)
- else
- begin
- Result := -1;
- Break;
- end;
- Inc(j);
- end;
- Value := Value and MaxCode[bc];
- // RFC2279 check
- if Value <= MaxCode[bc-1] then
- begin
- Result := -1;
- Break;
- end;
- case Value of
- 0..$D7FF, $E000..$FFFF: OutBuf^ := WideChar(Value);
- $10000..$10FFFF:
- begin
- if i < 2 then Break;
- OutBuf^ := WideChar($D7C0 + (Value shr 10));
- OutBuf[1] := WideChar($DC00 xor (Value and $3FF));
- Inc(OutBuf); // once here
- Dec(i);
- end
- else
- begin
- Result := -1;
- Break;
- end;
- end;
- end;
- Inc(OutBuf);
- Inc(InBuf, bc);
- Dec(InCnt, bc);
- Dec(i);
- end;
- if Result >= 0 then
- Result := OutCnt-i;
- OutCnt := i;
- end;
- function Is_8859_1(const AEncoding: string): Boolean;
- begin
- Result := SameText(AEncoding, 'ISO-8859-1') or
- SameText(AEncoding, 'ISO_8859-1') or
- SameText(AEncoding, 'latin1') or
- SameText(AEncoding, 'iso-ir-100') or
- SameText(AEncoding, 'l1') or
- SameText(AEncoding, 'IBM819') or
- SameText(AEncoding, 'CP819') or
- SameText(AEncoding, 'csISOLatin1') or
- // This one is not in character-sets.txt, but used in most FPC documentation...
- SameText(AEncoding, 'ISO8859-1');
- end;
- procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
- begin
- ABuffer.MaxLength := ALength;
- ABuffer.Length := 0;
- ABuffer.Buffer := AllocMem(ABuffer.MaxLength*SizeOf(WideChar));
- end;
- procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
- begin
- if ABuffer.Length >= ABuffer.MaxLength then
- begin
- ReallocMem(ABuffer.Buffer, ABuffer.MaxLength * 2 * SizeOf(WideChar));
- FillChar(ABuffer.Buffer[ABuffer.MaxLength], ABuffer.MaxLength * SizeOf(WideChar),0);
- ABuffer.MaxLength := ABuffer.MaxLength * 2;
- end;
- ABuffer.Buffer[ABuffer.Length] := wc;
- Inc(ABuffer.Length);
- end;
- procedure BufAppendChunk(var ABuf: TWideCharBuf; pstart, pend: PWideChar);
- var
- Len: Integer;
- begin
- Len := PEnd - PStart;
- if Len <= 0 then
- Exit;
- if Len >= ABuf.MaxLength - ABuf.Length then
- begin
- ABuf.MaxLength := (Len + ABuf.Length)*2;
- // note: memory clean isn't necessary here.
- // To avoid garbage, control Length field.
- ReallocMem(ABuf.Buffer, ABuf.MaxLength * sizeof(WideChar));
- end;
- Move(pstart^, ABuf.Buffer[ABuf.Length], Len * sizeof(WideChar));
- Inc(ABuf.Length, Len);
- end;
- function BufEquals(const ABuf: TWideCharBuf; const Arg: WideString): Boolean;
- begin
- Result := (ABuf.Length = Length(Arg)) and
- CompareMem(ABuf.Buffer, Pointer(Arg), ABuf.Length*sizeof(WideChar));
- end;
- { TDOMParseOptions }
- function TDOMParseOptions.GetCanonical: Boolean;
- begin
- Result := FCanonical and FExpandEntities and FCDSectionsAsText and
- { (not normalizeCharacters) and } FNamespaces and
- { namespaceDeclarations and } FPreserveWhitespace;
- end;
- procedure TDOMParseOptions.SetCanonical(aValue: Boolean);
- begin
- FCanonical := aValue;
- if aValue then
- begin
- FExpandEntities := True;
- FCDSectionsAsText := True;
- FNamespaces := True;
- FPreserveWhitespace := True;
- { normalizeCharacters := False; }
- { namespaceDeclarations := True; }
- { wellFormed := True; }
- end;
- end;
- { TXMLInputSource }
- constructor TXMLInputSource.Create(AStream: TStream);
- begin
- inherited Create;
- FStream := AStream;
- end;
- constructor TXMLInputSource.Create(const AStringData: string);
- begin
- inherited Create;
- FStringData := AStringData;
- end;
- { TDOMParser }
- constructor TDOMParser.Create;
- begin
- FOptions := TDOMParseOptions.Create;
- end;
- destructor TDOMParser.Destroy;
- begin
- FOptions.Free;
- inherited Destroy;
- end;
- procedure TDOMParser.Parse(Src: TXMLInputSource; out ADoc: TXMLDocument);
- var
- InputSrc: TXMLCharSource;
- begin
- with TXMLReader.Create(Self) do
- try
- ConvertSource(Src, InputSrc); // handles 'no-input-specified' case
- ProcessXML(InputSrc)
- finally
- ADoc := TXMLDocument(doc);
- Free;
- end;
- end;
- procedure TDOMParser.ParseUri(const URI: WideString; out ADoc: TXMLDocument);
- var
- Src: TXMLCharSource;
- begin
- ADoc := nil;
- with TXMLReader.Create(Self) do
- try
- if ResolveEntity(URI, '', '', Src) then
- ProcessXML(Src)
- else
- DoErrorPos(esFatal, 'The specified URI could not be resolved', NullLocation);
- finally
- ADoc := TXMLDocument(doc);
- Free;
- end;
- end;
- function TDOMParser.ParseWithContext(Src: TXMLInputSource;
- Context: TDOMNode; Action: TXMLContextAction): TDOMNode;
- var
- InputSrc: TXMLCharSource;
- Frag: TDOMDocumentFragment;
- node: TDOMNode;
- begin
- if Action in [xaInsertBefore, xaInsertAfter, xaReplace] then
- node := Context.ParentNode
- else
- node := Context;
- // TODO: replacing document isn't yet supported
- if (Action = xaReplaceChildren) and (node.NodeType = DOCUMENT_NODE) then
- raise EDOMNotSupported.Create('DOMParser.ParseWithContext');
- if not (node.NodeType in [ELEMENT_NODE, DOCUMENT_FRAGMENT_NODE]) then
- raise EDOMHierarchyRequest.Create('DOMParser.ParseWithContext');
- with TXMLReader.Create(Self) do
- try
- ConvertSource(Src, InputSrc); // handles 'no-input-specified' case
- Frag := Context.OwnerDocument.CreateDocumentFragment;
- try
- ProcessFragment(InputSrc, Frag);
- Result := Frag.FirstChild;
- case Action of
- xaAppendAsChildren: Context.AppendChild(Frag);
- xaReplaceChildren: begin
- Context.TextContent := ''; // removes children
- Context.ReplaceChild(Frag, Context.FirstChild);
- end;
- xaInsertBefore: node.InsertBefore(Frag, Context);
- xaInsertAfter: node.InsertBefore(Frag, Context.NextSibling);
- xaReplace: node.ReplaceChild(Frag, Context);
- end;
- finally
- Frag.Free;
- end;
- finally
- Free;
- end;
- end;
- { TXMLCharSource }
- constructor TXMLCharSource.Create(const AData: WideString);
- begin
- inherited Create;
- FLineNo := 1;
- FBuf := PWideChar(AData);
- FBufEnd := FBuf + Length(AData);
- LFPos := FBuf-1;
- FCharCount := Length(AData);
- end;
- procedure TXMLCharSource.Initialize;
- begin
- end;
- function TXMLCharSource.SetEncoding(const AEncoding: string): Boolean;
- begin
- Result := True; // always succeed
- end;
- function TXMLCharSource.GetSystemID: WideString;
- begin
- if FSystemID <> '' then
- Result := FSystemID
- else if Assigned(FParent) then
- Result := FParent.SystemID
- else
- Result := '';
- end;
- function TXMLCharSource.Reload: Boolean;
- begin
- Result := False;
- end;
- procedure TXMLCharSource.NewLine;
- begin
- Inc(FLineNo);
- LFPos := FBuf;
- end;
- function TXMLCharSource.SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
- wsflag: PBoolean): WideChar;
- var
- old: PWideChar;
- nonws: Boolean;
- begin
- old := FBuf;
- nonws := False;
- repeat
- if FBuf^ = #10 then
- NewLine;
- if (FBuf^ < #255) and (Char(ord(FBuf^)) in Delim) then
- Break;
- if (FBuf^ > #32) or not (Char(ord(FBuf^)) in [#32, #9, #10, #13]) then
- nonws := True;
- Inc(FBuf);
- until False;
- Result := FBuf^;
- BufAppendChunk(ToFill, old, FBuf);
- if Assigned(wsflag) then
- wsflag^ := wsflag^ or nonws;
- end;
- function TXMLCharSource.Matches(const arg: WideString): Boolean;
- begin
- Result := False;
- if (FBufEnd >= FBuf + Length(arg)) or Reload then
- Result := CompareMem(Pointer(arg), FBuf, Length(arg)*sizeof(WideChar));
- if Result then
- Inc(FBuf, Length(arg));
- end;
- { TXMLDecodingSource }
- procedure TXMLDecodingSource.AfterConstruction;
- begin
- inherited AfterConstruction;
- FBufStart := AllocMem(4096);
- FBuf := FBufStart;
- FBufEnd := FBuf;
- LFPos := FBuf-1;
- end;
- destructor TXMLDecodingSource.Destroy;
- begin
- FreeMem(FBufStart);
- if Assigned(FDecoder.Cleanup) then
- FDecoder.Cleanup(FDecoder.Context);
- inherited Destroy;
- end;
- procedure TXMLDecodingSource.FetchData;
- begin
- end;
- procedure TXMLDecodingSource.DecodingError(const Msg: string);
- begin
- // count line endings to obtain correct error location
- while FBuf < FBufEnd do
- begin
- if (FBuf^ = #10) or (FBuf^ = #13) or (FXML11Rules and ((FBuf^ = #$85) or (FBuf^ = #$2028))) then
- begin
- if (FBuf^ = #13) and (FBuf < FBufEnd-1) and
- ((FBuf[1] = #10) or (FXML11Rules and (FBuf[1] = #$85))) then
- Inc(FBuf);
- LFPos := FBuf;
- Inc(FLineNo);
- end;
- Inc(FBuf);
- end;
- FReader.FatalError(Msg);
- end;
- function TXMLDecodingSource.Reload: Boolean;
- var
- Remainder: PtrInt;
- r, inLeft: Cardinal;
- rslt: Integer;
- begin
- if DTDSubsetType = dsInternal then
- FReader.DTDReloadHook;
- Remainder := FBufEnd - FBuf;
- if Remainder > 0 then
- Move(FBuf^, FBufStart^, Remainder * sizeof(WideChar));
- Dec(LFPos, FBuf-FBufStart);
- FBuf := FBufStart;
- FBufEnd := FBufStart + Remainder;
- repeat
- inLeft := FCharBufEnd - FCharBuf;
- if inLeft < 4 then // may contain an incomplete char
- begin
- FetchData;
- inLeft := FCharBufEnd - FCharBuf;
- if inLeft <= 0 then
- Break;
- end;
- r := FBufStart + FBufSize - FBufEnd;
- if r = 0 then
- Break;
- rslt := FDecoder.Decode(FDecoder.Context, FCharBuf, inLeft, FBufEnd, r);
- { Sanity checks: r and inLeft must not increase. }
- if inLeft + FCharBuf <= FCharBufEnd then
- FCharBuf := FCharBufEnd - inLeft
- else
- DecodingError('Decoder error: input byte count out of bounds');
- if r + FBufEnd <= FBufStart + FBufSize then
- FBufEnd := FBufStart + FBufSize - r
- else
- DecodingError('Decoder error: output char count out of bounds');
- if rslt = 0 then
- Break
- else if rslt < 0 then
- DecodingError('Invalid character in input stream')
- else
- begin
- Inc(FCharCount, rslt);
- FReader.CheckMaxChars;
- end;
- until False;
- FBufEnd^ := #0;
- Result := FBuf < FBufEnd;
- end;
- const
- XmlSign: array [0..4] of WideChar = ('<', '?', 'x', 'm', 'l');
- procedure TXMLDecodingSource.Initialize;
- begin
- inherited;
- FLineNo := 1;
- FXml11Rules := FReader.FXML11;
- FDecoder.Decode := @Decode_UTF8;
- FFixedUCS2 := '';
- if FCharBufEnd-FCharBuf > 1 then
- begin
- if (FCharBuf[0] = #$FE) and (FCharBuf[1] = #$FF) then
- begin
- FFixedUCS2 := 'UTF-16BE';
- FDecoder.Decode := {$IFNDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
- end
- else if (FCharBuf[0] = #$FF) and (FCharBuf[1] = #$FE) then
- begin
- FFixedUCS2 := 'UTF-16LE';
- FDecoder.Decode := {$IFDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
- end;
- end;
- FBufSize := 6; // possible BOM and '<?xml'
- Reload;
- if FBuf^ = #$FEFF then
- begin
- FHasBOM := True;
- Inc(FBuf);
- end;
- LFPos := FBuf-1;
- if CompareMem(FBuf, @XmlSign[0], sizeof(XmlSign)) then
- begin
- FBufSize := 3; // don't decode past XML declaration
- Inc(FBuf, Length(XmlSign));
- FReader.ParseXmlOrTextDecl(FParent <> nil);
- end;
- FBufSize := 2047;
- end;
- function TXMLDecodingSource.SetEncoding(const AEncoding: string): Boolean;
- var
- NewDecoder: TDecoder;
- begin
- Result := True;
- if (FFixedUCS2 = '') and SameText(AEncoding, 'UTF-8') then
- Exit;
- if FFixedUCS2 <> '' then
- begin
- Result := SameText(AEncoding, FFixedUCS2) or
- SameText(AEncoding, 'UTF-16') or
- SameText(AEncoding, 'unicode');
- Exit;
- end;
- // TODO: must fail when a byte-based stream is labeled as word-based.
- // see rmt-e2e-61, it now fails but for a completely different reason.
- FillChar(NewDecoder, sizeof(TDecoder), 0);
- if Is_8859_1(AEncoding) then
- FDecoder.Decode := @Decode_88591
- else if FindDecoder(AEncoding, NewDecoder) then
- FDecoder := NewDecoder
- else
- Result := False;
- end;
- procedure TXMLDecodingSource.NewLine;
- begin
- case FBuf^ of
- #10: begin
- Inc(FLineNo);
- LFPos := FBuf;
- end;
- #13: begin
- Inc(FLineNo);
- LFPos := FBuf;
- // Reload trashes the buffer, it should be consumed beforehand
- if (FBufEnd >= FBuf+2) or Reload then
- begin
- if (FBuf[1] = #10) or (FXML11Rules and (FBuf[1] = #$85)) then
- begin
- Inc(FBuf);
- Inc(LFPos);
- end;
- FBuf^ := #10;
- end;
- end;
- #$85, #$2028: if FXML11Rules then
- begin
- FBuf^ := #10;
- Inc(FLineNo);
- LFPos := FBuf;
- end;
- end;
- end;
- { TXMLStreamInputSource }
- const
- Slack = 16;
- constructor TXMLStreamInputSource.Create(AStream: TStream; AOwnStream: Boolean);
- begin
- FStream := AStream;
- FCapacity := 4096;
- GetMem(FAllocated, FCapacity+Slack);
- FCharBuf := FAllocated+(Slack-4);
- FCharBufEnd := FCharBuf;
- FOwnStream := AOwnStream;
- FetchData;
- end;
- destructor TXMLStreamInputSource.Destroy;
- begin
- FreeMem(FAllocated);
- if FOwnStream then
- FStream.Free;
- inherited Destroy;
- end;
- procedure TXMLStreamInputSource.FetchData;
- var
- Remainder, BytesRead: Integer;
- OldBuf: PChar;
- begin
- Assert(FCharBufEnd - FCharBuf < Slack-4);
- if FEof then
- Exit;
- OldBuf := FCharBuf;
- Remainder := FCharBufEnd - FCharBuf;
- if Remainder < 0 then
- Remainder := 0;
- FCharBuf := FAllocated+Slack-4-Remainder;
- if Remainder > 0 then
- Move(OldBuf^, FCharBuf^, Remainder);
- BytesRead := FStream.Read(FAllocated[Slack-4], FCapacity);
- if BytesRead < FCapacity then
- FEof := True;
- FCharBufEnd := FAllocated + (Slack-4) + BytesRead;
- PWideChar(FCharBufEnd)^ := #0;
- end;
- { TXMLFileInputSource }
- constructor TXMLFileInputSource.Create(var AFile: Text);
- begin
- FFile := @AFile;
- SystemID := FilenameToURI(TTextRec(AFile).Name);
- FetchData;
- end;
- procedure TXMLFileInputSource.FetchData;
- begin
- if not Eof(FFile^) then
- begin
- ReadLn(FFile^, FString);
- FString := FString + #10; // bad solution...
- FCharBuf := PChar(FString);
- FCharBufEnd := FCharBuf + Length(FString);
- end;
- end;
- { helper that closes handle upon destruction }
- type
- THandleOwnerStream = class(THandleStream)
- public
- destructor Destroy; override;
- end;
- destructor THandleOwnerStream.Destroy;
- begin
- if Handle >= 0 then FileClose(Handle);
- inherited Destroy;
- end;
- { TXMLReader }
- procedure TXMLReader.ConvertSource(SrcIn: TXMLInputSource; out SrcOut: TXMLCharSource);
- begin
- SrcOut := nil;
- if Assigned(SrcIn) then
- begin
- if Assigned(SrcIn.FStream) then
- SrcOut := TXMLStreamInputSource.Create(SrcIn.FStream, False)
- else if SrcIn.FStringData <> '' then
- SrcOut := TXMLStreamInputSource.Create(TStringStream.Create(SrcIn.FStringData), True)
- else if (SrcIn.SystemID <> '') then
- ResolveEntity(SrcIn.SystemID, SrcIn.PublicID, SrcIn.BaseURI, SrcOut);
- end;
- if (SrcOut = nil) and (FSource = nil) then
- DoErrorPos(esFatal, 'No input source specified', NullLocation);
- end;
- procedure TXMLReader.StoreLocation(out Loc: TLocation);
- begin
- Loc.Line := FSource.FLineNo;
- Loc.LinePos := FSource.FBuf-FSource.LFPos;
- end;
- function TXMLReader.ResolveEntity(const SystemID, PublicID, BaseURI: WideString; out Source: TXMLCharSource): Boolean;
- var
- AbsSysID: WideString;
- Filename: string;
- Stream: TStream;
- fd: THandle;
- begin
- Source := nil;
- Result := False;
- if not ResolveRelativeURI(BaseURI, SystemID, AbsSysID) then
- Exit;
- { TODO: alternative resolvers
- These may be 'internal' resolvers or a handler set by application.
- Internal resolvers should probably produce a TStream
- ( so that internal classes need not be exported ).
- External resolver will produce TXMLInputSource that should be converted.
- External resolver must NOT be called for root entity.
- External resolver can return nil, in which case we do the default }
- if URIToFilename(AbsSysID, Filename) then
- begin
- fd := FileOpen(Filename, fmOpenRead + fmShareDenyWrite);
- if fd <> THandle(-1) then
- begin
- Stream := THandleOwnerStream.Create(fd);
- Source := TXMLStreamInputSource.Create(Stream, True);
- Source.SystemID := AbsSysID; // <- Revisit: Really need absolute sysID?
- end;
- end;
- Result := Assigned(Source);
- end;
- procedure TXMLReader.Initialize(ASource: TXMLCharSource);
- begin
- ASource.FParent := FSource;
- FSource := ASource;
- FSource.FReader := Self;
- FSource.Initialize;
- end;
- procedure TXMLReader.FatalError(Expected: WideChar);
- begin
- // FIX: don't output what is found - anything may be found, including exploits...
- FatalError('Expected "%1s"', [string(Expected)]);
- end;
- procedure TXMLReader.FatalError(const descr: String; LineOffs: Integer);
- begin
- DoError(esFatal, descr, LineOffs);
- end;
- procedure TXMLReader.FatalError(const descr: string; const args: array of const; LineOffs: Integer);
- begin
- DoError(esFatal, Format(descr, args), LineOffs);
- end;
- procedure TXMLReader.ValidationError(const Msg: string; const Args: array of const; LineOffs: Integer);
- begin
- FDocNotValid := True;
- if FValidate then
- DoError(esError, Format(Msg, Args), LineOffs);
- end;
- procedure TXMLReader.DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer);
- var
- Loc: TLocation;
- begin
- StoreLocation(Loc);
- if LineOffs >= 0 then
- begin
- Dec(Loc.LinePos, LineOffs);
- DoErrorPos(Severity, descr, Loc);
- end
- else
- DoErrorPos(Severity, descr, FTokenStart);
- end;
- procedure TXMLReader.DoErrorPos(Severity: TErrorSeverity; const descr: string; const ErrPos: TLocation);
- var
- E: EXMLReadError;
- sysid: WideString;
- begin
- if Assigned(FSource) then
- begin
- sysid := FSource.FSystemID;
- if (sysid = '') and Assigned(FSource.FEntity) then
- sysid := TDOMEntityEx(FSource.FEntity).FURI;
- E := EXMLReadError.CreateFmt('In ''%s'' (line %d pos %d): %s', [sysid, ErrPos.Line, ErrPos.LinePos, descr]);
- end
- else
- E := EXMLReadError.Create(descr);
- E.FSeverity := Severity;
- E.FErrorMessage := descr;
- E.FLine := ErrPos.Line;
- E.FLinePos := ErrPos.LinePos;
- CallErrorHandler(E);
- // No 'finally'! If user handler raises exception, control should not get here
- // and the exception will be freed in CallErrorHandler (below)
- E.Free;
- end;
- procedure TXMLReader.CheckMaxChars;
- var
- src: TXMLCharSource;
- total: Cardinal;
- begin
- if FMaxChars = 0 then
- Exit;
- src := FSource;
- total := 0;
- repeat
- Inc(total, src.FCharCount);
- if total > FMaxChars then
- FatalError('Exceeded character count limit');
- src := src.FParent;
- until src = nil;
- end;
- procedure TXMLReader.CallErrorHandler(E: EXMLReadError);
- begin
- try
- if Assigned(FCtrl) and Assigned(FCtrl.FOnError) then
- FCtrl.FOnError(E);
- if E.Severity = esFatal then
- raise E;
- except
- if ExceptObject <> E then
- E.Free;
- raise;
- end;
- end;
- function TXMLReader.SkipWhitespace(PercentAloneIsOk: Boolean): Boolean;
- begin
- Result := False;
- repeat
- Result := SkipS or Result;
- if FSource.FBuf^ = #0 then
- begin
- Result := True; // report whitespace upon exiting the PE
- if not ContextPop then
- Break;
- end
- else if FSource.FBuf^ = '%' then
- begin
- if not FRecognizePE then
- Break;
- // This is the only case where look-ahead is needed
- if FSource.FBuf > FSource.FBufEnd-2 then
- FSource.Reload;
- if (not PercentAloneIsOk) or (Byte(FSource.FBuf[1]) in NamingBitmap[FNamePages^[$100+hi(Word(FSource.FBuf[1]))]]) or
- (FXML11 and (FSource.FBuf[1] >= #$D800) and (FSource.FBuf[1] <= #$DB7F)) then
- begin
- Inc(FSource.FBuf); // skip '%'
- CheckName;
- ExpectChar(';');
- StartPE;
- Result := True; // report whitespace upon entering the PE
- end
- else Break;
- end
- else
- Break;
- until False;
- end;
- procedure TXMLReader.ExpectWhitespace;
- begin
- if not SkipWhitespace then
- FatalError('Expected whitespace');
- end;
- function TXMLReader.SkipS(Required: Boolean): Boolean;
- var
- p: PWideChar;
- begin
- Result := False;
- repeat
- p := FSource.FBuf;
- repeat
- if (p^ = #10) or (p^ = #13) or (FXML11 and ((p^ = #$85) or (p^ = #$2028))) then
- begin
- FSource.FBuf := p;
- FSource.NewLine;
- p := FSource.FBuf;
- end
- else if (p^ <> #32) and (p^ <> #9) then
- Break;
- Inc(p);
- Result := True;
- until False;
- FSource.FBuf := p;
- until (p^ <> #0) or (not FSource.Reload);
- if (not Result) and Required then
- FatalError('Expected whitespace');
- end;
- procedure TXMLReader.ExpectString(const s: String);
- var
- I: Integer;
- begin
- for I := 1 to Length(s) do
- begin
- if FSource.FBuf^ <> WideChar(ord(s[i])) then
- FatalError('Expected "%s"', [s], i-1);
- FSource.NextChar;
- end;
- end;
- function TXMLReader.CheckForChar(c: WideChar): Boolean;
- begin
- Result := (FSource.FBuf^ = c);
- if Result then
- begin
- Inc(FSource.FBuf);
- if FSource.FBuf >= FSource.FBufEnd then
- FSource.Reload;
- end;
- end;
- procedure TXMLReader.SkipQuote(out Delim: WideChar; required: Boolean);
- begin
- Delim := #0;
- if (FSource.FBuf^ = '''') or (FSource.FBuf^ = '"') then
- begin
- Delim := FSource.FBuf^;
- FSource.NextChar; // skip quote
- end
- else if required then
- FatalError('Expected single or double quote');
- end;
- const
- PrefixDefault: array[0..4] of WideChar = ('x','m','l','n','s');
- constructor TXMLReader.Create;
- begin
- inherited Create;
- BufAllocate(FName, 128);
- BufAllocate(FValue, 512);
- FIDRefs := TFPList.Create;
- FNotationRefs := TFPList.Create;
- FNSHelper := TNSSupport.Create;
- FNsAttHash := TDblHashArray.Create;
- SetLength(FWorkAtts, 16);
- FStdPrefix_xml := FNSHelper.GetPrefix(@PrefixDefault, 3);
- FStdPrefix_xmlns := FNSHelper.GetPrefix(@PrefixDefault, 5);
- // Set char rules to XML 1.0
- FNamePages := @NamePages;
- SetLength(FValidator, 16);
- end;
- constructor TXMLReader.Create(AParser: TDOMParser);
- begin
- Create;
- FCtrl := AParser;
- FValidate := FCtrl.Options.Validate;
- FPreserveWhitespace := FCtrl.Options.PreserveWhitespace;
- FExpandEntities := FCtrl.Options.ExpandEntities;
- FCDSectionsAsText := FCtrl.Options.CDSectionsAsText;
- FIgnoreComments := FCtrl.Options.IgnoreComments;
- FResolveExternals := FCtrl.Options.ResolveExternals;
- FNamespaces := FCtrl.Options.Namespaces;
- FDisallowDoctype := FCtrl.Options.DisallowDoctype;
- FCanonical := FCtrl.Options.CanonicalForm;
- FMaxChars := FCtrl.Options.MaxChars;
- end;
- destructor TXMLReader.Destroy;
- begin
- if Assigned(FEntityValue.Buffer) then
- FreeMem(FEntityValue.Buffer);
- FreeMem(FName.Buffer);
- FreeMem(FValue.Buffer);
- if Assigned(FSource) then
- while ContextPop do; // clean input stack
- FSource.Free;
- FPEMap.Free;
- ClearRefs(FNotationRefs);
- ClearRefs(FIDRefs);
- FNsAttHash.Free;
- FNSHelper.Free;
- if FOwnsDoctype then
- FDocType.Free;
- FNotationRefs.Free;
- FIDRefs.Free;
- inherited Destroy;
- end;
- procedure TXMLReader.XML11_BuildTables;
- begin
- FNamePages := Xml11NamePages;
- FXML11 := True;
- FSource.FXml11Rules := True;
- end;
- procedure TXMLReader.ProcessXML(ASource: TXMLCharSource);
- begin
- doc := TXMLDocument.Create;
- doc.documentURI := ASource.SystemID; // TODO: to be changed to URI or BaseURI
- FCursor := doc;
- FState := rsProlog;
- FNesting := 0;
- Initialize(ASource);
- ParseContent;
- if FState < rsRoot then
- FatalError('Root element is missing');
- if FValidate and Assigned(FDocType) then
- ValidateIdRefs;
- end;
- procedure TXMLReader.ProcessFragment(ASource: TXMLCharSource; AOwner: TDOMNode);
- begin
- doc := AOwner.OwnerDocument;
- FCursor := AOwner as TDOMNode_WithChildren;
- FState := rsRoot;
- Initialize(ASource);
- FXML11 := doc.InheritsFrom(TXMLDocument) and (TXMLDocument(doc).XMLVersion = '1.1');
- ParseContent;
- end;
- function TXMLReader.CheckName(aFlags: TCheckNameFlags): Boolean;
- var
- p: PWideChar;
- NameStartFlag: Boolean;
- begin
- p := FSource.FBuf;
- FName.Length := 0;
- FColonPos := -1;
- NameStartFlag := not (cnToken in aFlags);
- repeat
- if NameStartFlag then
- begin
- if (Byte(p^) in NamingBitmap[FNamePages^[hi(Word(p^))]]) or
- ((p^ = ':') and (not FNamespaces)) then
- Inc(p)
- else if FXML11 and ((p^ >= #$D800) and (p^ <= #$DB7F) and
- (p[1] >= #$DC00) and (p[1] <= #$DFFF)) then
- Inc(p, 2)
- else
- begin
- // here we come either when first char of name is bad (it may be a colon),
- // or when a colon is not followed by a valid NameStartChar
- FSource.FBuf := p;
- Result := False;
- Break;
- end;
- NameStartFlag := False;
- end;
- if FXML11 then
- repeat
- if Byte(p^) in NamingBitmap[FNamePages^[$100+hi(Word(p^))]] then
- Inc(p)
- else if ((p^ >= #$D800) and (p^ <= #$DB7F) and
- (p[1] >= #$DC00) and (p[1] <= #$DFFF)) then
- Inc(p,2)
- else
- Break;
- until False
- else
- while Byte(p^) in NamingBitmap[FNamePages^[$100+hi(Word(p^))]] do
- Inc(p);
- if p^ = ':' then
- begin
- if (cnToken in aFlags) or not FNamespaces then // colon has no specific meaning
- begin
- Inc(p);
- if p^ <> #0 then Continue;
- end
- else if FColonPos = -1 then // this is the first colon, remember it
- begin
- FColonPos := p-FSource.FBuf+FName.Length;
- NameStartFlag := True;
- Inc(p);
- if p^ <> #0 then Continue;
- end;
- end;
- BufAppendChunk(FName, FSource.FBuf, p);
- Result := (FName.Length > 0);
- FSource.FBuf := p;
- if (p^ <> #0) or not FSource.Reload then
- Break;
- p := FSource.FBuf;
- until False;
- if not (Result or (cnOptional in aFlags)) then
- RaiseNameNotFound;
- end;
- procedure TXMLReader.CheckNCName;
- begin
- if FNamespaces and (FColonPos <> -1) then
- FatalError('Names of entities, notations and processing instructions may not contain colons', FName.Length);
- end;
- procedure TXMLReader.RaiseNameNotFound;
- begin
- if FColonPos <> -1 then
- FatalError('Bad QName syntax, local part is missing')
- else
- // Coming at no cost, this allows more user-friendly error messages
- with FSource do
- if (FBuf^ = #32) or (FBuf^ = #10) or (FBuf^ = #9) or (FBuf^ = #13) then
- FatalError('Whitespace is not allowed here')
- else
- FatalError('Name starts with invalid character');
- end;
- function TXMLReader.ExpectName: WideString;
- begin
- CheckName;
- SetString(Result, FName.Buffer, FName.Length);
- end;
- function TXMLReader.ResolvePredefined: Boolean;
- var
- wc: WideChar;
- begin
- Result := False;
- with FName do
- begin
- if (Length = 2) and (Buffer[1] = 't') then
- begin
- if Buffer[0] = 'l' then
- wc := '<'
- else if Buffer[0] = 'g' then
- wc := '>'
- else Exit;
- end
- else if Buffer[0] = 'a' then
- begin
- if (Length = 3) and (Buffer[1] = 'm') and (Buffer[2] = 'p') then
- wc := '&'
- else if (Length = 4) and (Buffer[1] = 'p') and (Buffer[2] = 'o') and
- (Buffer[3] = 's') then
- wc := ''''
- else Exit;
- end
- else if (Length = 4) and (Buffer[0] = 'q') and (Buffer[1] = 'u') and
- (Buffer[2] = 'o') and (Buffer[3] ='t') then
- wc := '"'
- else
- Exit;
- end; // with
- BufAppend(FValue, wc);
- Result := True;
- end;
- function TXMLReader.ParseRef(var ToFill: TWideCharBuf): Boolean; // [67]
- var
- Value: Integer;
- begin
- FSource.NextChar; // skip '&'
- Result := CheckForChar('#');
- if Result then
- begin
- Value := 0;
- if CheckForChar('x') then
- repeat
- case FSource.FBuf^ of
- '0'..'9': Value := Value * 16 + Ord(FSource.FBuf^) - Ord('0');
- 'a'..'f': Value := Value * 16 + Ord(FSource.FBuf^) - (Ord('a') - 10);
- 'A'..'F': Value := Value * 16 + Ord(FSource.FBuf^) - (Ord('A') - 10);
- else
- Break;
- end;
- FSource.NextChar;
- until Value > $10FFFF
- else
- repeat
- case FSource.FBuf^ of
- '0'..'9': Value := Value * 10 + Ord(FSource.FBuf^) - Ord('0');
- else
- Break;
- end;
- FSource.NextChar;
- until Value > $10FFFF;
- case Value of
- $01..$08, $0B..$0C, $0E..$1F:
- if FXML11 then
- BufAppend(ToFill, WideChar(Value))
- else
- FatalError('Invalid character reference');
- $09, $0A, $0D, $20..$D7FF, $E000..$FFFD:
- BufAppend(ToFill, WideChar(Value));
- $10000..$10FFFF:
- begin
- BufAppend(ToFill, WideChar($D7C0 + (Value shr 10)));
- BufAppend(ToFill, WideChar($DC00 xor (Value and $3FF)));
- end;
- else
- FatalError('Invalid character reference');
- end;
- end
- else CheckName;
- ExpectChar(';');
- end;
- const
- AttrDelims: TSetOfChar = [#0, '<', '&', '''', '"', #9, #10, #13];
- EntityValueDelims: TSetOfChar = [#0, '%', '&', '''', '"'];
- SQ_Delim: TSetOfChar = [#0, ''''];
- DQ_Delim: TSetOfChar = [#0, '"'];
- GT_Delim: TSetOfChar = [#0, '>'];
- function TXMLReader.DoParseAttValue(Delim: WideChar): Boolean;
- var
- wc: WideChar;
- begin
- FValue.Length := 0;
- repeat
- wc := FSource.SkipUntil(FValue, AttrDelims);
- if wc = '<' then
- FatalError('Character ''<'' is not allowed in attribute value')
- else if wc = '&' then
- begin
- if ParseRef(FValue) or ResolvePredefined then
- Continue;
- // have to insert entity or reference
- if FValue.Length > 0 then
- begin
- DoAttrText(FValue.Buffer, FValue.Length);
- FValue.Length := 0;
- end;
- IncludeEntity(True);
- end
- else if wc <> #0 then
- begin
- FSource.NextChar;
- if wc = Delim then
- Break;
- if (wc = #10) or (wc = #9) or (wc = #13) then
- wc := #32;
- BufAppend(FValue, wc);
- end;
- until wc = #0;
- // When processing the included entity, Delim = #0, so getting here isn't a error
- if FValue.Length > 0 then
- DoAttrText(FValue.Buffer, FValue.Length);
- FValue.Length := 0;
- Result := wc <> #0;
- end;
- function TXMLReader.ContextPush(AEntity: TDOMEntityEx): Boolean;
- var
- Src: TXMLCharSource;
- begin
- if (AEntity.SystemID <> '') and not AEntity.FResolved then
- begin
- Result := ResolveEntity(AEntity.SystemID, AEntity.PublicID, AEntity.FURI, Src);
- if not Result then
- begin
- // TODO: a detailed message like SysErrorMessage(GetLastError) would be great here
- ValidationError('Unable to resolve external entity ''%s''', [AEntity.NodeName]);
- Exit;
- end;
- end
- else
- begin
- Src := TXMLCharSource.Create(AEntity.FReplacementText);
- Src.FLineNo := AEntity.FStartLocation.Line;
- Src.LFPos := Src.FBuf - AEntity.FStartLocation.LinePos;
- // needed in case of prefetched external PE
- if AEntity.SystemID <> '' then
- Src.SystemID := AEntity.FURI;
- end;
- AEntity.FOnStack := True;
- Src.FEntity := AEntity;
- Initialize(Src);
- Result := True;
- end;
- function TXMLReader.ContextPop: Boolean;
- var
- Src: TXMLCharSource;
- Error: Boolean;
- begin
- Result := Assigned(FSource.FParent) and (FSource.DTDSubsetType = dsNone);
- if Result then
- begin
- Src := FSource.FParent;
- Error := False;
- if Assigned(FSource.FEntity) then
- begin
- TDOMEntityEx(FSource.FEntity).FOnStack := False;
- TDOMEntityEx(FSource.FEntity).FCharCount := FSource.FCharCount;
- // [28a] PE that was started between MarkupDecls may not end inside MarkupDecl
- Error := TDOMEntityEx(FSource.FEntity).FBetweenDecls and FInsideDecl;
- end;
- FSource.Free;
- FSource := Src;
- // correct position of this error is after PE reference
- if Error then
- BadPENesting(esFatal);
- end;
- end;
- procedure TXMLReader.IncludeEntity(InAttr: Boolean);
- var
- AEntity: TDOMEntityEx;
- RefName: WideString;
- Child: TDOMNode;
- SaveCursor: TDOMNode_WithChildren;
- cnt: Cardinal;
- begin
- AEntity := nil;
- SetString(RefName, FName.Buffer, FName.Length);
- cnt := FName.Length+2;
- if Assigned(FDocType) then
- AEntity := FDocType.Entities.GetNamedItem(RefName) as TDOMEntityEx;
- if AEntity = nil then
- begin
- if FStandalone or (FDocType = nil) or not (FHavePERefs or (FDocType.SystemID <> '')) then
- FatalError('Reference to undefined entity ''%s''', [RefName], cnt)
- else
- ValidationError('Undefined entity ''%s'' referenced', [RefName], cnt);
- FCursor.AppendChild(doc.CreateEntityReference(RefName));
- Exit;
- end;
- if InAttr and (AEntity.SystemID <> '') then
- FatalError('External entity reference is not allowed in attribute value', cnt);
- if FStandalone and AEntity.FExternallyDeclared then
- FatalError('Standalone constraint violation', cnt);
- if AEntity.NotationName <> '' then
- FatalError('Reference to unparsed entity ''%s''', [RefName], cnt);
- if not AEntity.FResolved then
- begin
- if AEntity.FOnStack then
- FatalError('Entity ''%s'' recursively references itself', [RefName]);
- if ContextPush(AEntity) then
- begin
- SaveCursor := FCursor;
- FCursor := AEntity; // build child node tree for the entity
- try
- AEntity.SetReadOnly(False);
- if InAttr then
- DoParseAttValue(#0)
- else
- ParseContent;
- AEntity.FResolved := True;
- finally
- AEntity.SetReadOnly(True);
- ContextPop;
- FCursor := SaveCursor;
- FValue.Length := 0;
- end;
- end;
- end;
- // charcount of the entity included is known at this point
- Inc(FSource.FCharCount, AEntity.FCharCount - cnt);
- CheckMaxChars;
- if (not FExpandEntities) or (not AEntity.FResolved) then
- begin
- // This will clone Entity children
- FCursor.AppendChild(doc.CreateEntityReference(RefName));
- Exit;
- end;
- Child := AEntity.FirstChild; // clone the entity node tree
- while Assigned(Child) do
- begin
- FCursor.AppendChild(Child.CloneNode(True));
- Child := Child.NextSibling;
- end;
- end;
- procedure TXMLReader.StartPE;
- var
- PEName: WideString;
- PEnt: TDOMEntityEx;
- begin
- SetString(PEName, FName.Buffer, FName.Length);
- PEnt := nil;
- if Assigned(FPEMap) then
- PEnt := FPEMap.GetNamedItem(PEName) as TDOMEntityEx;
- if PEnt = nil then // TODO -cVC: Referencing undefined PE
- begin // (These are classified as 'optional errors'...)
- // ValidationError('Undefined parameter entity referenced: %s', [PEName]);
- Exit;
- end;
- if PEnt.FOnStack then
- FatalError('Entity ''%%%s'' recursively references itself', [PEnt.NodeName]);
- { cache an external PE so it's only fetched once }
- if (PEnt.SystemID <> '') and not PEnt.FResolved then
- begin
- if ContextPush(PEnt) then
- try
- FValue.Length := 0;
- FSource.SkipUntil(FValue, [#0]);
- SetString(PEnt.FReplacementText, FValue.Buffer, FValue.Length);
- PEnt.FCharCount := FValue.Length;
- PEnt.FStartLocation.Line := 1;
- PEnt.FStartLocation.LinePos := 1;
- PEnt.FURI := FSource.SystemID; // replace base URI with absolute one
- finally
- ContextPop;
- PEnt.FResolved := True;
- FValue.Length := 0;
- end;
- end;
- Inc(FSource.FCharCount, PEnt.FCharCount);
- CheckMaxChars;
- PEnt.FBetweenDecls := not FInsideDecl;
- ContextPush(PEnt);
- FHavePERefs := True;
- end;
- procedure TXMLReader.ExpectAttValue; // [10]
- var
- Delim: WideChar;
- begin
- SkipQuote(Delim);
- StoreLocation(FTokenStart);
- if not DoParseAttValue(Delim) then
- FatalError('Literal has no closing quote',-1);
- end;
- procedure TXMLReader.SkipQuotedLiteral(out Literal: WideString; required: Boolean);
- var
- Delim: WideChar;
- begin
- SkipQuote(Delim, required);
- if Delim <> #0 then
- begin
- StoreLocation(FTokenStart);
- FValue.Length := 0;
- if Delim = '''' then
- Delim := FSource.SkipUntil(FValue, SQ_Delim)
- else
- Delim := FSource.SkipUntil(FValue, DQ_Delim);
- if Delim = #0 then
- FatalError('Literal has no closing quote', -1);
- FSource.NextChar;
- SetString(Literal, FValue.Buffer, FValue.Length);
- end;
- end;
- function TXMLReader.SkipUntilSeq(const Delim: TSetOfChar; c1: WideChar; c2: WideChar = #0): Boolean;
- var
- wc: WideChar;
- begin
- Result := False;
- FValue.Length := 0;
- StoreLocation(FTokenStart);
- repeat
- wc := FSource.SkipUntil(FValue, Delim);
- if wc <> #0 then
- begin
- FSource.NextChar;
- if (FValue.Length > ord(c2 <> #0)) then
- begin
- if (FValue.Buffer[FValue.Length-1] = c1) and
- ((c2 = #0) or ((c2 <> #0) and (FValue.Buffer[FValue.Length-2] = c2))) then
- begin
- Dec(FValue.Length, ord(c2 <> #0) + 1);
- Result := True;
- Exit;
- end;
- end;
- BufAppend(FValue, wc);
- end;
- until wc = #0;
- end;
- procedure TXMLReader.ParseComment; // [15]
- begin
- ExpectString('--');
- if SkipUntilSeq([#0, '-'], '-') then
- begin
- ExpectChar('>');
- DoComment(FValue.Buffer, FValue.Length);
- end
- else
- FatalError('Unterminated comment', -1);
- end;
- procedure TXMLReader.ParsePI; // [16]
- var
- Name, Value: WideString;
- PINode: TDOMProcessingInstruction;
- begin
- FSource.NextChar; // skip '?'
- Name := ExpectName;
- CheckNCName;
- with FName do
- if (Length = 3) and
- ((Buffer[0] = 'X') or (Buffer[0] = 'x')) and
- ((Buffer[1] = 'M') or (Buffer[1] = 'm')) and
- ((Buffer[2] = 'L') or (Buffer[2] = 'l')) then
- begin
- if Name <> 'xml' then
- FatalError('''xml'' is a reserved word; it must be lowercase', FName.Length)
- else
- FatalError('XML declaration is not allowed here', FName.Length);
- end;
- if FSource.FBuf^ <> '?' then
- SkipS(True);
- if SkipUntilSeq(GT_Delim, '?') then
- begin
- SetString(Value, FValue.Buffer, FValue.Length);
- // SAX: ContentHandler.ProcessingInstruction(Name, Value);
- if FCurrContentType = ctEmpty then
- ValidationError('Processing instructions are not allowed within EMPTY elements', []);
- PINode := Doc.CreateProcessingInstruction(Name, Value);
- if Assigned(FCursor) then
- FCursor.AppendChild(PINode)
- else // to comply with certain tests, insert PI from DTD before DTD
- Doc.InsertBefore(PINode, FDocType);
- end
- else
- FatalError('Unterminated processing instruction', -1);
- end;
- procedure TXMLReader.ParseXmlOrTextDecl(TextDecl: Boolean);
- var
- TmpStr: WideString;
- IsXML11: Boolean;
- begin
- SkipS(True);
- // VersionInfo: optional in TextDecl, required in XmlDecl
- if (not TextDecl) or (FSource.FBuf^ = 'v') then
- begin
- ExpectString('version'); // [24]
- ExpectEq;
- SkipQuotedLiteral(TmpStr);
- IsXML11 := False;
- if TmpStr = '1.1' then // Checking for bad chars is implied
- IsXML11 := True
- else if TmpStr <> '1.0' then
- { should be no whitespace in these literals, but that isn't checked now }
- FatalError('Illegal version number', -1);
- if not TextDecl then
- begin
- if doc.InheritsFrom(TXMLDocument) then
- TXMLDocument(doc).XMLVersion := TmpStr;
- if IsXML11 then
- XML11_BuildTables;
- end
- else // parsing external entity
- if IsXML11 and not FXML11 then
- FatalError('XML 1.0 document cannot invoke XML 1.1 entities', -1);
- if FSource.FBuf^ <> '?' then
- SkipS(True);
- end;
- // EncodingDecl: required in TextDecl, optional in XmlDecl
- if TextDecl or (FSource.FBuf^ = 'e') then // [80]
- begin
- ExpectString('encoding');
- ExpectEq;
- SkipQuotedLiteral(TmpStr);
- if not IsValidXmlEncoding(TmpStr) then
- FatalError('Illegal encoding name', -1);
- if not FSource.SetEncoding(TmpStr) then // <-- Wide2Ansi conversion here
- FatalError('Encoding ''%s'' is not supported', [TmpStr], -1);
- // getting here means that specified encoding is supported
- // TODO: maybe assign the 'preferred' encoding name?
- if not TextDecl and doc.InheritsFrom(TXMLDocument) then
- TXMLDocument(doc).Encoding := TmpStr;
- if FSource.FBuf^ <> '?' then
- SkipS(True);
- end;
- // SDDecl: forbidden in TextDecl, optional in XmlDecl
- if (not TextDecl) and (FSource.FBuf^ = 's') then
- begin
- ExpectString('standalone');
- ExpectEq;
- SkipQuotedLiteral(TmpStr);
- if TmpStr = 'yes' then
- FStandalone := True
- else if TmpStr <> 'no' then
- FatalError('Only "yes" or "no" are permitted as values of "standalone"', -1);
- SkipS;
- end;
- ExpectString('?>');
- end;
- procedure TXMLReader.DTDReloadHook;
- var
- p: PWideChar;
- begin
- { FSource converts CR, NEL and LSEP linebreaks to LF, and CR-NEL sequences to CR-LF.
- We must further remove the CR chars and have only LF's left. }
- p := FDTDStartPos;
- while p < FSource.FBuf do
- begin
- while (p < FSource.FBuf) and (p^ <> #13) do
- Inc(p);
- BufAppendChunk(FIntSubset, FDTDStartPos, p);
- if p^ = #13 then
- Inc(p);
- FDTDStartPos := p;
- end;
- FDTDStartPos := TXMLDecodingSource(FSource).FBufStart;
- end;
- procedure TXMLReader.ParseDoctypeDecl; // [28]
- var
- Src: TXMLCharSource;
- begin
- if FState >= rsDTD then
- FatalError('Markup declaration is not allowed here');
- if FDisallowDoctype then
- FatalError('Document type is prohibited by parser settings');
- ExpectString('DOCTYPE');
- SkipS(True);
- FDocType := TDOMDocumentTypeEx(TDOMDocumentType.Create(doc));
- FState := rsDTD;
- try
- FDocType.FName := ExpectName;
- SkipS(True);
- ParseExternalID(FDocType.FSystemID, FDocType.FPublicID, False);
- SkipS;
- finally
- // DONE: append node after its name has been set; always append to avoid leak
- if FCanonical then
- FOwnsDoctype := True
- else
- Doc.AppendChild(FDocType);
- FCursor := nil;
- end;
- if CheckForChar('[') then
- begin
- BufAllocate(FIntSubset, 256);
- FSource.DTDSubsetType := dsInternal;
- try
- FDTDStartPos := FSource.FBuf;
- ParseMarkupDecl;
- DTDReloadHook; // fetch last chunk
- SetString(FDocType.FInternalSubset, FIntSubset.Buffer, FIntSubset.Length);
- finally
- FreeMem(FIntSubset.Buffer);
- FSource.DTDSubsetType := dsNone;
- end;
- ExpectChar(']');
- SkipS;
- end;
- ExpectChar('>');
- if (FDocType.SystemID <> '') then
- begin
- if ResolveEntity(FDocType.SystemID, FDocType.PublicID, FSource.SystemID, Src) then
- begin
- Initialize(Src);
- try
- Src.DTDSubsetType := dsExternal;
- ParseMarkupDecl;
- finally
- Src.DTDSubsetType := dsNone;
- ContextPop;
- end;
- end
- else
- ValidationError('Unable to resolve external DTD subset', []);
- end;
- FCursor := Doc;
- ValidateDTD;
- FDocType.SetReadOnly(True);
- end;
- procedure TXMLReader.ExpectEq; // [25]
- begin
- if FSource.FBuf^ <> '=' then
- SkipS;
- if FSource.FBuf^ <> '=' then
- FatalError('Expected "="');
- FSource.NextChar;
- SkipS;
- end;
- { DTD stuff }
- procedure TXMLReader.BadPENesting(S: TErrorSeverity);
- begin
- if (S = esFatal) or FValidate then
- DoError(S, 'Parameter entities must be properly nested');
- end;
- procedure TXMLReader.StandaloneError(LineOffs: Integer);
- begin
- ValidationError('Standalone constriant violation', [], LineOffs);
- end;
- procedure TXMLReader.ParseQuantity(CP: TContentParticle);
- begin
- case FSource.FBuf^ of
- '?': CP.CPQuant := cqZeroOrOnce;
- '*': CP.CPQuant := cqZeroOrMore;
- '+': CP.CPQuant := cqOnceOrMore;
- else
- Exit;
- end;
- FSource.NextChar;
- end;
- function TXMLReader.FindOrCreateElDef: TDOMElementDef;
- var
- p: PHashItem;
- begin
- CheckName;
- p := doc.Names.FindOrAdd(FName.Buffer, FName.Length);
- Result := TDOMElementDef(p^.Data);
- if Result = nil then
- begin
- Result := TDOMElementDef.Create(doc);
- Result.FNSI.QName := p;
- p^.Data := Result;
- end;
- end;
- procedure TXMLReader.ExpectChoiceOrSeq(CP: TContentParticle); // [49], [50]
- var
- Delim: WideChar;
- CurrentEntity: TObject;
- CurrentCP: TContentParticle;
- begin
- Delim := #0;
- repeat
- CurrentCP := CP.Add;
- SkipWhitespace;
- if CheckForChar('(') then
- begin
- CurrentEntity := FSource.FEntity;
- ExpectChoiceOrSeq(CurrentCP);
- if CurrentEntity <> FSource.FEntity then
- BadPENesting;
- FSource.NextChar;
- end
- else
- CurrentCP.Def := FindOrCreateElDef;
- ParseQuantity(CurrentCP);
- SkipWhitespace;
- if FSource.FBuf^ = ')' then
- Break;
- if Delim = #0 then
- begin
- if (FSource.FBuf^ = '|') or (FSource.FBuf^ = ',') then
- Delim := FSource.FBuf^
- else
- FatalError('Expected pipe or comma delimiter');
- end
- else
- if FSource.FBuf^ <> Delim then
- FatalError(Delim);
- FSource.NextChar; // skip delimiter
- until False;
- if Delim = '|' then
- CP.CPType := ctChoice
- else
- CP.CPType := ctSeq; // '(foo)' is a sequence!
- end;
- procedure TXMLReader.ParseElementDecl; // [45]
- var
- ElDef: TDOMElementDef;
- CurrentEntity: TObject;
- I: Integer;
- CP: TContentParticle;
- Typ: TElementContentType;
- ExtDecl: Boolean;
- begin
- CP := nil;
- Typ := ctUndeclared; // satisfy compiler
- ExpectWhitespace;
- ElDef := FindOrCreateElDef;
- if ElDef.ContentType <> ctUndeclared then
- ValidationError('Duplicate declaration of element ''%s''', [ElDef.TagName], FName.Length);
- ExtDecl := FSource.DTDSubsetType <> dsInternal;
- ExpectWhitespace;
- if FSource.Matches('EMPTY') then
- Typ := ctEmpty
- else if FSource.Matches('ANY') then
- Typ := ctAny
- else if CheckForChar('(') then
- begin
- CP := TContentParticle.Create;
- try
- CurrentEntity := FSource.FEntity;
- SkipWhitespace;
- if FSource.Matches('#PCDATA') then // Mixed section [51]
- begin
- SkipWhitespace;
- Typ := ctMixed;
- while FSource.FBuf^ <> ')' do
- begin
- ExpectChar('|');
- SkipWhitespace;
- with CP.Add do
- begin
- Def := FindOrCreateElDef;
- for I := CP.ChildCount-2 downto 0 do
- if Def = CP.Children[I].Def then
- ValidationError('Duplicate token in mixed section', [], FName.Length);
- end;
- SkipWhitespace;
- end;
- if CurrentEntity <> FSource.FEntity then
- BadPENesting;
- FSource.NextChar;
- if (not CheckForChar('*')) and (CP.ChildCount > 0) then
- FatalError(WideChar('*'));
- end
- else // Children section [47]
- begin
- Typ := ctChildren;
- ExpectChoiceOrSeq(CP);
- if CurrentEntity <> FSource.FEntity then
- BadPENesting;
- FSource.NextChar;
- ParseQuantity(CP);
- end;
- except
- CP.Free;
- raise;
- end;
- end
- else
- FatalError('Invalid content specification');
- // SAX: DeclHandler.ElementDecl(name, model);
- if ElDef.ContentType = ctUndeclared then
- begin
- ElDef.FExternallyDeclared := ExtDecl;
- ElDef.ContentType := Typ;
- ElDef.RootCP := CP;
- end
- else
- CP.Free;
- end;
- procedure TXMLReader.ParseNotationDecl; // [82]
- var
- Name, SysID, PubID: WideString;
- begin
- ExpectWhitespace;
- Name := ExpectName;
- CheckNCName;
- ExpectWhitespace;
- if not ParseExternalID(SysID, PubID, True) then
- FatalError('Expected external or public ID');
- DoNotationDecl(Name, PubID, SysID);
- end;
- const
- AttrDataTypeNames: array[TAttrDataType] of WideString = (
- 'CDATA',
- 'ID',
- 'IDREF',
- 'IDREFS',
- 'ENTITY',
- 'ENTITIES',
- 'NMTOKEN',
- 'NMTOKENS',
- 'NOTATION'
- );
- procedure TXMLReader.ParseAttlistDecl; // [52]
- var
- ElDef: TDOMElementDef;
- AttDef: TDOMAttrDef;
- dt: TAttrDataType;
- Found, DiscardIt: Boolean;
- Offsets: array [Boolean] of Integer;
- begin
- ExpectWhitespace;
- ElDef := FindOrCreateElDef;
- SkipWhitespace;
- while FSource.FBuf^ <> '>' do
- begin
- CheckName;
- ExpectWhitespace;
- AttDef := doc.CreateAttributeDef(FName.Buffer, FName.Length);
- try
- AttDef.ExternallyDeclared := FSource.DTDSubsetType <> dsInternal;
- // In case of duplicate declaration of the same attribute, we must discard it,
- // not modifying ElDef, and suppressing certain validation errors.
- DiscardIt := Assigned(ElDef.GetAttributeNode(AttDef.Name));
- if not DiscardIt then
- ElDef.SetAttributeNode(AttDef);
- if CheckForChar('(') then // [59]
- begin
- AttDef.DataType := dtNmToken;
- repeat
- SkipWhitespace;
- CheckName([cnToken]);
- if not AttDef.AddEnumToken(FName.Buffer, FName.Length) then
- ValidationError('Duplicate token in enumerated attibute declaration', [], FName.Length);
- SkipWhitespace;
- until not CheckForChar('|');
- ExpectChar(')');
- ExpectWhitespace;
- end
- else
- begin
- StoreLocation(FTokenStart);
- // search topside-up so that e.g. NMTOKENS is matched before NMTOKEN
- for dt := dtNotation downto dtCData do
- begin
- Found := FSource.Matches(AttrDataTypeNames[dt]);
- if Found then
- Break;
- end;
- if Found and SkipWhitespace then
- begin
- AttDef.DataType := dt;
- if (dt = dtId) and not DiscardIt then
- begin
- if Assigned(ElDef.IDAttr) then
- ValidationError('Only one attribute of type ID is allowed per element',[])
- else
- ElDef.IDAttr := AttDef;
- end
- else if dt = dtNotation then // no test cases for these ?!
- begin
- if not DiscardIt then
- begin
- if Assigned(ElDef.NotationAttr) then
- ValidationError('Only one attribute of type NOTATION is allowed per element',[])
- else
- ElDef.NotationAttr := AttDef;
- if ElDef.ContentType = ctEmpty then
- ValidationError('NOTATION attributes are not allowed on EMPTY elements',[]);
- end;
- ExpectChar('(');
- repeat
- SkipWhitespace;
- StoreLocation(FTokenStart);
- CheckName;
- CheckNCName;
- if not AttDef.AddEnumToken(FName.Buffer, FName.Length) then
- ValidationError('Duplicate token in NOTATION attribute declaration',[], FName.Length);
- if not DiscardIt then
- AddForwardRef(FNotationRefs, FName.Buffer, FName.Length);
- SkipWhitespace;
- until not CheckForChar('|');
- ExpectChar(')');
- ExpectWhitespace;
- end;
- end
- else
- begin
- // don't report 'expected whitespace' if token does not match completely
- Offsets[False] := 0;
- Offsets[True] := Length(AttrDataTypeNames[dt]);
- if Found and (FSource.FBuf^ < 'A') then
- ExpectWhitespace
- else
- FatalError('Illegal attribute type for ''%s''', [AttDef.Name], Offsets[Found]);
- end;
- end;
- StoreLocation(FTokenStart);
- if FSource.Matches('#REQUIRED') then
- AttDef.Default := adRequired
- else if FSource.Matches('#IMPLIED') then
- AttDef.Default := adImplied
- else if FSource.Matches('#FIXED') then
- begin
- AttDef.Default := adFixed;
- ExpectWhitespace;
- end
- else
- AttDef.Default := adDefault;
- if AttDef.Default in [adDefault, adFixed] then
- begin
- if AttDef.DataType = dtId then
- ValidationError('An attribute of type ID cannot have a default value',[]);
- FCursor := AttDef;
- // See comments to valid-sa-094: PE expansion should be disabled in AttDef.
- // ExpectAttValue() does not recognize PEs anyway, so setting FRecognizePEs isn't needed
- // Saving/restoring FCursor is also redundant because it is always nil here.
- ExpectAttValue;
- FCursor := nil;
- if not ValidateAttrSyntax(AttDef, AttDef.NodeValue) then
- ValidationError('Default value for attribute ''%s'' has wrong syntax', [AttDef.Name]);
- end;
- // SAX: DeclHandler.AttributeDecl(...)
- if DiscardIt then
- AttDef.Free;
- except
- AttDef.Free;
- raise;
- end;
- SkipWhitespace;
- end;
- end;
- function TXMLReader.ParseEntityDeclValue(Delim: WideChar): Boolean; // [9]
- var
- CurrentEntity: TObject;
- wc: WideChar;
- begin
- CurrentEntity := FSource.FEntity;
- if FEntityValue.Buffer = nil then
- BufAllocate(FEntityValue, 256);
- FEntityValue.Length := 0;
- repeat
- wc := FSource.SkipUntil(FEntityValue, EntityValueDelims);
- if wc = '%' then
- begin
- FSource.NextChar;
- CheckName;
- ExpectChar(';');
- if FSource.DTDSubsetType = dsInternal then
- FatalError('PE reference not allowed here in internal subset', FName.Length+2);
- StartPE;
- end
- else if wc = '&' then
- begin
- // expand CharRefs, bypass (but check for well-formedness) EntityRefs
- if not ParseRef(FEntityValue) then
- begin
- BufAppend(FEntityValue, '&');
- BufAppendChunk(FEntityValue, FName.Buffer, FName.Buffer + FName.Length);
- BufAppend(FEntityValue, ';');
- end;
- end
- else if wc <> #0 then
- begin
- FSource.NextChar;
- // terminating delimiter must be in the same context as the starting one
- if (wc = Delim) and (CurrentEntity = FSource.FEntity) then
- Break;
- BufAppend(FEntityValue, wc);
- end
- else if (FSource.FEntity = CurrentEntity) or not ContextPop then // #0
- Break;
- until False;
- Result := (wc <> #0);
- end;
- procedure TXMLReader.ParseEntityDecl; // [70]
- var
- NDataAllowed: Boolean;
- Delim: WideChar;
- Entity: TDOMEntityEx;
- Map: TDOMNamedNodeMap;
- begin
- if not SkipWhitespace(True) then
- FatalError('Expected whitespace');
- NDataAllowed := True;
- Map := FDocType.Entities;
- if CheckForChar('%') then // [72]
- begin
- ExpectWhitespace;
- NDataAllowed := False;
- if FPEMap = nil then
- FPEMap := TDOMNamedNodeMap.Create(FDocType, ENTITY_NODE);
- Map := FPEMap;
- end;
- Entity := TDOMEntityEx.Create(Doc);
- Entity.SetReadOnly(True);
- try
- Entity.FExternallyDeclared := FSource.DTDSubsetType <> dsInternal;
- Entity.FName := ExpectName;
- CheckNCName;
- ExpectWhitespace;
- // remember where the entity is declared
- Entity.FURI := FSource.SystemID;
- if (FSource.FBuf^ = '"') or (FSource.FBuf^ = '''') then
- begin
- NDataAllowed := False;
- Delim := FSource.FBuf^;
- FSource.NextChar;
- StoreLocation(Entity.FStartLocation);
- if not ParseEntityDeclValue(Delim) then
- DoErrorPos(esFatal, 'Literal has no closing quote', Entity.FStartLocation);
- SetString(Entity.FReplacementText, FEntityValue.Buffer, FEntityValue.Length);
- Entity.FCharCount := FEntityValue.Length;
- end
- else if not ParseExternalID(Entity.FSystemID, Entity.FPublicID, False) then
- FatalError('Expected entity value or external ID');
- if NDataAllowed then // [76]
- begin
- if FSource.FBuf^ <> '>' then
- ExpectWhitespace;
- if FSource.Matches('NDATA') then
- begin
- ExpectWhitespace;
- StoreLocation(FTokenStart);
- Entity.FNotationName := ExpectName;
- AddForwardRef(FNotationRefs, FName.Buffer, FName.Length);
- // SAX: DTDHandler.UnparsedEntityDecl(...);
- end;
- end;
- except
- Entity.Free;
- raise;
- end;
- // Repeated declarations of same entity are legal but must be ignored
- if Map.GetNamedItem(Entity.NodeName) = nil then
- Map.SetNamedItem(Entity)
- else
- Entity.Free;
- end;
- procedure TXMLReader.ParseMarkupDecl; // [29]
- var
- IncludeLevel: Integer;
- IgnoreLevel: Integer;
- CurrentEntity: TObject;
- IncludeLoc: TLocation;
- IgnoreLoc: TLocation;
- wc: WideChar;
- CondType: (ctUnknown, ctInclude, ctIgnore);
- begin
- IncludeLevel := 0;
- IgnoreLevel := 0;
- repeat
- FRecognizePE := True; // PERef between declarations should always be recognized
- SkipWhitespace;
- FRecognizePE := False;
- if (FSource.FBuf^ = ']') and (IncludeLevel > 0) then
- begin
- ExpectString(']]>');
- Dec(IncludeLevel);
- Continue;
- end;
- if not CheckForChar('<') then
- Break;
- CurrentEntity := FSource.FEntity;
- if FSource.FBuf^ = '?' then
- ParsePI
- else
- begin
- ExpectChar('!');
- if FSource.FBuf^ = '-' then
- ParseComment
- else if CheckForChar('[') then
- begin
- if FSource.DTDSubsetType = dsInternal then
- FatalError('Conditional sections are not allowed in internal subset', 1);
- FRecognizePE := True;
- SkipWhitespace;
- CondType := ctUnknown; // satisfy compiler
- if FSource.Matches('INCLUDE') then
- CondType := ctInclude
- else if FSource.Matches('IGNORE') then
- CondType := ctIgnore
- else
- FatalError('Expected "INCLUDE" or "IGNORE"');
- SkipWhitespace;
- if CurrentEntity <> FSource.FEntity then
- BadPENesting;
- ExpectChar('[');
- if CondType = ctInclude then
- begin
- if IncludeLevel = 0 then
- StoreLocation(IncludeLoc);
- Inc(IncludeLevel);
- end
- else if CondType = ctIgnore then
- begin
- StoreLocation(IgnoreLoc);
- IgnoreLevel := 1;
- repeat
- FValue.Length := 0;
- wc := FSource.SkipUntil(FValue, [#0, '<', ']']);
- if FSource.Matches('<![') then
- Inc(IgnoreLevel)
- else if FSource.Matches(']]>') then
- Dec(IgnoreLevel)
- else if wc <> #0 then
- FSource.NextChar
- else // PE's aren't recognized in ignore section, cannot ContextPop()
- DoErrorPos(esFatal, 'IGNORE section is not closed', IgnoreLoc);
- until IgnoreLevel=0;
- end;
- end
- else
- begin
- FRecognizePE := FSource.DTDSubsetType <> dsInternal;
- FInsideDecl := True;
- if FSource.Matches('ELEMENT') then
- ParseElementDecl
- else if FSource.Matches('ENTITY') then
- ParseEntityDecl
- else if FSource.Matches('ATTLIST') then
- ParseAttlistDecl
- else if FSource.Matches('NOTATION') then
- ParseNotationDecl
- else
- FatalError('Illegal markup declaration');
- SkipWhitespace;
- FRecognizePE := False;
- if CurrentEntity <> FSource.FEntity then
- BadPENesting;
- ExpectChar('>');
- FInsideDecl := False;
- end;
- end;
- until False;
- FRecognizePE := False;
- if IncludeLevel > 0 then
- DoErrorPos(esFatal, 'INCLUDE section is not closed', IncludeLoc);
- if (FSource.DTDSubsetType = dsInternal) and (FSource.FBuf^ = ']') then
- Exit;
- if FSource.FBuf^ <> #0 then
- FatalError('Illegal character in DTD');
- end;
- procedure TXMLReader.ProcessDTD(ASource: TXMLCharSource);
- begin
- doc := TXMLDocument.Create;
- FDocType := TDOMDocumentTypeEx.Create(doc);
- // TODO: DTD labeled version 1.1 will be rejected - must set FXML11 flag
- // DONE: It's ok to have FCursor=nil now
- doc.AppendChild(FDocType);
- Initialize(ASource);
- ParseMarkupDecl;
- end;
- procedure TXMLReader.ParseCDSect; // [18]
- begin
- ExpectString('[CDATA[');
- if FState <> rsRoot then
- FatalError('Illegal at document level');
- if SkipUntilSeq(GT_Delim, ']', ']') then
- DoCDSect(FValue.Buffer, FValue.Length)
- else
- FatalError('Unterminated CDATA section', -1);
- end;
- // The code below does the bulk of the parsing, and must be as fast as possible.
- // To minimize CPU cache effects, methods from different classes are kept together
- function TXMLDecodingSource.SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
- wsflag: PBoolean): WideChar;
- var
- old: PWideChar;
- nonws: Boolean;
- wc: WideChar;
- begin
- nonws := False;
- repeat
- old := FBuf;
- repeat
- wc := FBuf^;
- if (wc = #10) or (wc = #13) or (FXML11Rules and ((wc = #$85) or
- (wc = #$2028))) then
- begin
- // strictly this is needed only for 2-byte lineendings
- BufAppendChunk(ToFill, old, FBuf);
- NewLine;
- old := FBuf;
- wc := FBuf^
- end
- else if ((wc < #32) and (not ((wc = #0) and (FBuf >= FBufEnd))) and
- (wc <> #9)) or (wc > #$FFFD) or
- (FXML11Rules and (wc >= #$7F) and (wc <= #$9F)) then
- FReader.FatalError('Invalid character');
- if (wc < #255) and (Char(ord(wc)) in Delim) then
- Break;
- // the checks above filter away everything below #32 that isn't a whitespace
- if wc > #32 then
- nonws := True;
- Inc(FBuf);
- until False;
- Result := wc;
- BufAppendChunk(ToFill, old, FBuf);
- until (Result <> #0) or (not Reload);
- if Assigned(wsflag) then
- wsflag^ := wsflag^ or nonws;
- end;
- procedure TXMLReader.ParseContent;
- var
- nonWs: Boolean;
- wc: WideChar;
- StartNesting: Integer;
- begin
- StartNesting := FNesting;
- with FSource do
- repeat
- if FBuf^ = '<' then
- begin
- Inc(FBuf);
- if FBufEnd < FBuf + 2 then
- Reload;
- if FBuf^ = '/' then
- begin
- if FNesting <= StartNesting then
- FatalError('End-tag is not allowed here');
- Inc(FBuf);
- ParseEndTag;
- end
- else if CheckName([cnOptional]) then
- ParseElement
- else if FBuf^ = '!' then
- begin
- Inc(FBuf);
- if FBuf^ = '[' then
- ParseCDSect
- else if FBuf^ = '-' then
- ParseComment
- else
- ParseDoctypeDecl;
- end
- else if FBuf^ = '?' then
- ParsePI
- else
- RaiseNameNotFound;
- end
- else
- begin
- FValue.Length := 0;
- nonWs := False;
- StoreLocation(FTokenStart);
- repeat
- wc := SkipUntil(FValue, [#0, '<', '&', '>'], @nonWs);
- if (wc = '<') or (wc = #0) then
- Break
- else if wc = '>' then
- begin
- with FValue do if (Length > 1) and (Buffer[Length-1] = ']') and
- (Buffer[Length-2] = ']') then
- FatalError('Literal '']]>'' is not allowed in text', 2);
- BufAppend(FValue, wc);
- NextChar;
- end
- else if wc = '&' then
- begin
- if FState <> rsRoot then
- FatalError('Illegal at document level');
- if FCurrContentType = ctEmpty then
- ValidationError('References are illegal in EMPTY elements', []);
- if ParseRef(FValue) or ResolvePredefined then
- nonWs := True // CharRef to whitespace is not considered whitespace
- else
- begin
- if (nonWs or FPreserveWhitespace) and (FValue.Length > 0) then
- begin
- // 'Reference illegal at root' is checked above, no need to check here
- DoText(FValue.Buffer, FValue.Length, not nonWs);
- FValue.Length := 0;
- end;
- IncludeEntity(False);
- end;
- end;
- until False;
- if FState = rsRoot then
- begin
- if (nonWs or FPreserveWhitespace) and (FValue.Length > 0) then
- begin
- DoText(FValue.Buffer, FValue.Length, not nonWs);
- FValue.Length := 0;
- end;
- end
- else if nonWs then
- FatalError('Illegal at document level', -1);
- end;
- until FBuf^ = #0;
- if FNesting > StartNesting then
- FatalError('End-tag is missing for ''%s''', [FValidator[FNesting].FElement.NSI.QName^.Key]);
- end;
- procedure TXMLCharSource.NextChar;
- begin
- Inc(FBuf);
- if FBuf >= FBufEnd then
- Reload;
- end;
- procedure TXMLReader.ExpectChar(wc: WideChar);
- begin
- if FSource.FBuf^ = wc then
- FSource.NextChar
- else
- FatalError(wc);
- end;
- // Element name already in FNameBuffer
- procedure TXMLReader.ParseElement; // [39] [40] [44]
- var
- NewElem: TDOMElement;
- ElDef: TDOMElementDef;
- IsEmpty: Boolean;
- ElName: PHashItem;
- begin
- if FState > rsRoot then
- FatalError('Only one top-level element allowed', FName.Length)
- else if FState < rsRoot then
- begin
- if FValidate then
- ValidateRoot;
- FState := rsRoot;
- end;
- NewElem := doc.CreateElementBuf(FName.Buffer, FName.Length);
- FCursor.AppendChild(NewElem);
- // we're about to process a new set of attributes
- Inc(FAttrTag);
- // Remember the hash entry, we'll need it often
- ElName := NewElem.NSI.QName;
- // Find declaration for this element
- ElDef := TDOMElementDef(ElName^.Data);
- if (ElDef = nil) or (ElDef.ContentType = ctUndeclared) then
- ValidationError('Using undeclared element ''%s''',[ElName^.Key], FName.Length);
- // Check if new element is allowed in current context
- if FValidate and not FValidator[FNesting].IsElementAllowed(ElDef) then
- ValidationError('Element ''%s'' is not allowed in this context',[ElName^.Key], FName.Length);
- IsEmpty := False;
- while (FSource.FBuf^ <> '>') and (FSource.FBuf^ <> '/') do
- begin
- SkipS(True);
- if (FSource.FBuf^ = '>') or (FSource.FBuf^ = '/') then
- Break;
- ParseAttribute(NewElem, ElDef);
- end;
- if FSource.FBuf^ = '/' then
- begin
- IsEmpty := True;
- FSource.NextChar;
- end;
- ExpectChar('>');
- if Assigned(ElDef) and Assigned(ElDef.FAttributes) then
- ProcessDefaultAttributes(NewElem, ElDef.FAttributes);
- PushVC(NewElem, ElDef); // this increases FNesting
- if FNamespaces then
- ProcessNamespaceAtts(NewElem);
- if not IsEmpty then
- begin
- FCursor := NewElem;
- if not FPreserveWhitespace then // critical for testsuite compliance
- SkipS;
- end
- else
- DoEndElement(0);
- end;
- procedure TXMLReader.DoEndElement(ErrOffset: Integer);
- var
- NewElem: TDOMElement;
- begin
- NewElem := FValidator[FNesting].FElement;
- TDOMNode(FCursor) := NewElem.ParentNode;
- if FCursor = doc then
- FState := rsEpilog;
- if FValidate and FValidator[FNesting].Incomplete then
- ValidationError('Element ''%s'' is missing required sub-elements', [NewElem.NSI.QName^.Key], ErrOffset);
- if FNamespaces then
- FNSHelper.EndElement;
- PopVC;
- end;
- procedure TXMLReader.ParseEndTag; // [42]
- var
- ErrOffset: Integer;
- ElName: PHashItem;
- begin
- ElName := FValidator[FNesting].FElement.NSI.QName;
- CheckName;
- if not BufEquals(FName, ElName^.Key) then
- FatalError('Unmatching element end tag (expected "</%s>")', [ElName^.Key], FName.Length);
- if FSource.FBuf^ = '>' then // this handles majority of cases
- begin
- ErrOffset := FName.Length+1;
- FSource.NextChar;
- end
- else // but if closing '>' is preceded by whitespace,
- begin // skipping it is likely to lose position info.
- StoreLocation(FTokenStart);
- Dec(FTokenStart.LinePos, FName.Length);
- ErrOffset := -1;
- SkipS;
- ExpectChar('>');
- end;
- DoEndElement(ErrOffset);
- end;
- procedure TXMLReader.ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
- var
- attr: TDOMAttr;
- AttDef: TDOMAttrDef;
- OldAttr: TDOMNode;
- procedure CheckValue;
- var
- AttValue, OldValue: WideString;
- begin
- if FStandalone and AttDef.ExternallyDeclared then
- begin
- OldValue := Attr.Value;
- Attr.DataType := AttDef.DataType;
- AttValue := Attr.Value;
- if AttValue <> OldValue then
- StandaloneError(-1);
- end
- else
- begin
- Attr.DataType := AttDef.DataType;
- AttValue := Attr.Value;
- end;
- // TODO: what about normalization of AttDef.Value? (Currently it IS normalized)
- if (AttDef.Default = adFixed) and (AttDef.Value <> AttValue) then
- ValidationError('Value of attribute ''%s'' does not match its #FIXED default',[AttDef.Name], -1);
- if not ValidateAttrSyntax(AttDef, AttValue) then
- ValidationError('Attribute ''%s'' type mismatch', [AttDef.Name], -1);
- ValidateAttrValue(Attr, AttValue);
- end;
- begin
- CheckName;
- attr := doc.CreateAttributeBuf(FName.Buffer, FName.Length);
- if Assigned(ElDef) then
- begin
- AttDef := TDOMAttrDef(ElDef.GetAttributeNode(attr.NSI.QName^.Key));
- if AttDef = nil then
- ValidationError('Using undeclared attribute ''%s'' on element ''%s''',[attr.NSI.QName^.Key, Elem.NSI.QName^.Key], FName.Length)
- else
- AttDef.Tag := FAttrTag; // indicates that this one is specified
- end
- else
- AttDef := nil;
- // !!cannot use TDOMElement.SetAttributeNode because it will free old attribute
- OldAttr := Elem.Attributes.SetNamedItem(Attr);
- if Assigned(OldAttr) then
- begin
- OldAttr.Free;
- FatalError('Duplicate attribute', FName.Length);
- end;
- ExpectEq;
- FCursor := attr;
- ExpectAttValue;
- if Assigned(AttDef) and ((AttDef.DataType <> dtCdata) or (AttDef.Default = adFixed)) then
- CheckValue;
- end;
- procedure TXMLReader.AddForwardRef(aList: TFPList; Buf: PWideChar; Length: Integer);
- var
- w: PForwardRef;
- begin
- New(w);
- SetString(w^.Value, Buf, Length);
- w^.Loc := FTokenStart;
- aList.Add(w);
- end;
- procedure TXMLReader.ClearRefs(aList: TFPList);
- var
- I: Integer;
- begin
- for I := 0 to aList.Count-1 do
- Dispose(PForwardRef(aList.List^[I]));
- aList.Clear;
- end;
- procedure TXMLReader.ValidateIdRefs;
- var
- I: Integer;
- begin
- for I := 0 to FIDRefs.Count-1 do
- with PForwardRef(FIDRefs.List^[I])^ do
- if Doc.GetElementById(Value) = nil then
- DoErrorPos(esError, Format('The ID ''%s'' does not match any element', [Value]), Loc);
- ClearRefs(FIDRefs);
- end;
- procedure TXMLReader.ProcessDefaultAttributes(Element: TDOMElement; Map: TDOMNamedNodeMap);
- var
- I: Integer;
- AttDef: TDOMAttrDef;
- Attr: TDOMAttr;
- begin
- for I := 0 to Map.Length-1 do
- begin
- AttDef := Map[I] as TDOMAttrDef;
- if AttDef.Tag <> FAttrTag then // this one wasn't specified
- begin
- case AttDef.Default of
- adDefault, adFixed: begin
- if FStandalone and AttDef.ExternallyDeclared then
- StandaloneError;
- Attr := TDOMAttr(AttDef.CloneNode(True));
- Element.SetAttributeNode(Attr);
- ValidateAttrValue(Attr, Attr.Value);
- end;
- adRequired: ValidationError('Required attribute ''%s'' of element ''%s'' is missing',[AttDef.Name, Element.TagName], 0)
- end;
- end;
- end;
- end;
- procedure TXMLReader.AddBinding(Attr: TDOMAttr; PrefixPtr: PWideChar; PrefixLen: Integer);
- var
- nsUri: DOMString;
- Prefix: PHashItem;
- begin
- nsUri := Attr.NodeValue;
- Prefix := FNSHelper.GetPrefix(PrefixPtr, PrefixLen);
- { 'xml' is allowed to be bound to the correct namespace }
- if ((nsUri = stduri_xml) <> (Prefix = FStdPrefix_xml)) or
- (Prefix = FStdPrefix_xmlns) or
- (nsUri = stduri_xmlns) then
- begin
- if (Prefix = FStdPrefix_xml) or (Prefix = FStdPrefix_xmlns) then
- FatalError('Illegal usage of reserved prefix ''%s''', [Prefix^.Key])
- else
- FatalError('Illegal usage of reserved namespace URI ''%s''', [nsUri]);
- end;
- if (nsUri = '') and not (FXML11 or (Prefix^.Key = '')) then
- FatalError('Illegal undefining of namespace'); { position - ? }
- FNSHelper.BindPrefix(nsURI, Prefix);
- end;
- procedure TXMLReader.ProcessNamespaceAtts(Element: TDOMElement);
- var
- I, J: Integer;
- Map: TDOMNamedNodeMap;
- Prefix, AttrName: PHashItem;
- Attr: TDOMAttr;
- PrefixCount: Integer;
- b: TBinding;
- begin
- FNSHelper.StartElement;
- PrefixCount := 0;
- if Element.HasAttributes then
- begin
- Map := Element.Attributes;
- if Map.Length > LongWord(Length(FWorkAtts)) then
- SetLength(FWorkAtts, Map.Length+10);
- { Pass 1, identify prefixed attrs and assign prefixes }
- for I := 0 to Map.Length-1 do
- begin
- Attr := TDOMAttr(Map[I]);
- AttrName := Attr.NSI.QName;
- if Pos(WideString('xmlns'), AttrName^.Key) = 1 then
- begin
- { this is a namespace declaration }
- if Length(AttrName^.Key) = 5 then
- begin
- // TODO: check all consequences of having zero PrefixLength
- Attr.SetNSI(stduri_xmlns, 0);
- AddBinding(Attr, nil, 0);
- end
- else if AttrName^.Key[6] = ':' then
- begin
- Attr.SetNSI(stduri_xmlns, 6);
- AddBinding(Attr, @AttrName^.Key[7], Length(AttrName^.Key)-6);
- end;
- end
- else
- begin
- J := Pos(WideChar(':'), AttrName^.Key);
- if J > 1 then
- begin
- FWorkAtts[PrefixCount].Attr := Attr;
- FWorkAtts[PrefixCount].PrefixLen := J;
- Inc(PrefixCount);
- end;
- end;
- end;
- end;
- { Pass 2, now all bindings are known, handle remaining prefixed attributes }
- if PrefixCount > 0 then
- begin
- FNsAttHash.Init(PrefixCount);
- for I := 0 to PrefixCount-1 do
- begin
- AttrName := FWorkAtts[I].Attr.NSI.QName;
- if not FNSHelper.IsPrefixBound(PWideChar(AttrName^.Key), FWorkAtts[I].PrefixLen-1, Prefix) then
- FatalError('Unbound prefix "%s"', [Prefix^.Key]);
- b := TBinding(Prefix^.Data);
- { detect duplicates }
- J := FWorkAtts[I].PrefixLen+1;
- if FNsAttHash.Locate(@b.uri, @AttrName^.Key[J], Length(AttrName^.Key) - J) then
- FatalError('Duplicate prefixed attribute');
- // convert Attr into namespaced one (by hack for the time being)
- FWorkAtts[I].Attr.SetNSI(b.uri, J-1);
- end;
- end;
- { Finally, expand the element name }
- J := Pos(WideChar(':'), Element.NSI.QName^.Key);
- if J > 1 then
- begin
- if not FNSHelper.IsPrefixBound(PWideChar(Element.NSI.QName^.Key), J-1, Prefix) then
- FatalError('Unbound prefix "%s"', [Prefix^.Key]);
- b := TBinding(Prefix^.Data);
- Element.SetNSI(b.uri, J);
- end
- else
- begin
- b := FNSHelper.DefaultNSBinding;
- if Assigned(b) then
- Element.SetNSI(b.uri, 0);
- end;
- end;
- function TXMLReader.ParseExternalID(out SysID, PubID: WideString; // [75]
- SysIdOptional: Boolean): Boolean;
- var
- I: Integer;
- wc: WideChar;
- begin
- if FSource.Matches('SYSTEM') then
- begin
- ExpectWhitespace;
- SkipQuotedLiteral(SysID);
- Result := True;
- end
- else if FSource.Matches('PUBLIC') then
- begin
- ExpectWhitespace;
- SkipQuotedLiteral(PubID);
- for I := 1 to Length(PubID) do
- begin
- wc := PubID[I];
- if (wc > #255) or not (Char(ord(wc)) in PubidChars) then
- FatalError('Illegal Public ID literal', -1);
- if (wc = #10) or (wc = #13) then
- PubID[I] := #32;
- end;
- NormalizeSpaces(PubID);
- if SysIdOptional then
- SkipWhitespace
- else
- ExpectWhitespace;
- SkipQuotedLiteral(SysID, not SysIdOptional);
- Result := True;
- end else
- Result := False;
- end;
- function TXMLReader.ValidateAttrSyntax(AttrDef: TDOMAttrDef; const aValue: WideString): Boolean;
- begin
- case AttrDef.DataType of
- dtId, dtIdRef, dtEntity: Result := IsXmlName(aValue, FXML11) and
- ((not FNamespaces) or (Pos(WideChar(':'), aValue) = 0));
- dtIdRefs, dtEntities: Result := IsXmlNames(aValue, FXML11) and
- ((not FNamespaces) or (Pos(WideChar(':'), aValue) = 0));
- dtNmToken: Result := IsXmlNmToken(aValue, FXML11) and AttrDef.HasEnumToken(aValue);
- dtNmTokens: Result := IsXmlNmTokens(aValue, FXML11);
- // IsXmlName() not necessary - enum is never empty and contains valid names
- dtNotation: Result := AttrDef.HasEnumToken(aValue);
- else
- Result := True;
- end;
- end;
- procedure TXMLReader.ValidateAttrValue(Attr: TDOMAttr; const aValue: WideString);
- var
- L, StartPos, EndPos: Integer;
- Entity: TDOMEntity;
- begin
- L := Length(aValue);
- case Attr.DataType of
- dtId: if not Doc.AddID(Attr) then
- ValidationError('The ID ''%s'' is not unique', [aValue], -1);
- dtIdRef, dtIdRefs: begin
- StartPos := 1;
- while StartPos <= L do
- begin
- EndPos := StartPos;
- while (EndPos <= L) and (aValue[EndPos] <> #32) do
- Inc(EndPos);
- AddForwardRef(FIDRefs, @aValue[StartPos], EndPos-StartPos);
- StartPos := EndPos + 1;
- end;
- end;
- dtEntity, dtEntities: begin
- StartPos := 1;
- while StartPos <= L do
- begin
- EndPos := StartPos;
- while (EndPos <= L) and (aValue[EndPos] <> #32) do
- Inc(EndPos);
- Entity := TDOMEntity(FDocType.Entities.GetNamedItem(Copy(aValue, StartPos, EndPos-StartPos)));
- if (Entity = nil) or (Entity.NotationName = '') then
- ValidationError('Attribute ''%s'' type mismatch', [Attr.Name], -1);
- StartPos := EndPos + 1;
- end;
- end;
- end;
- end;
- procedure TXMLReader.ValidateRoot;
- begin
- if Assigned(FDocType) then
- begin
- if not BufEquals(FName, FDocType.Name) then
- ValidationError('Root element name does not match DTD', [], FName.Length);
- end
- else
- ValidationError('Missing DTD', [], FName.Length);
- end;
- procedure TXMLReader.ValidateDTD;
- var
- I: Integer;
- begin
- if FValidate then
- for I := 0 to FNotationRefs.Count-1 do
- with PForwardRef(FNotationRefs[I])^ do
- if FDocType.Notations.GetNamedItem(Value) = nil then
- DoErrorPos(esError, Format('Notation ''%s'' is not declared', [Value]), Loc);
- ClearRefs(FNotationRefs);
- end;
- procedure TXMLReader.DoText(ch: PWideChar; Count: Integer; Whitespace: Boolean);
- var
- TextNode: TDOMText;
- begin
- // Validating filter part
- case FCurrContentType of
- ctChildren:
- if not Whitespace then
- ValidationError('Character data is not allowed in element-only content',[])
- else
- if FSaViolation then
- StandaloneError(-1);
- ctEmpty:
- ValidationError('Character data is not allowed in EMPTY elements', []);
- end;
- // Document builder part
- TextNode := Doc.CreateTextNodeBuf(ch, Count, Whitespace and (FCurrContentType = ctChildren));
- FCursor.AppendChild(TextNode);
- end;
- procedure TXMLReader.DoAttrText(ch: PWideChar; Count: Integer);
- begin
- FCursor.AppendChild(Doc.CreateTextNodeBuf(ch, Count, False));
- end;
- procedure TXMLReader.DoComment(ch: PWideChar; Count: Integer);
- var
- Node: TDOMComment;
- begin
- // validation filter part
- if FCurrContentType = ctEmpty then
- ValidationError('Comments are not allowed within EMPTY elements', []);
- // DOM builder part
- if (not FIgnoreComments) and Assigned(FCursor) then
- begin
- Node := Doc.CreateCommentBuf(ch, Count);
- FCursor.AppendChild(Node);
- end;
- end;
- procedure TXMLReader.DoCDSect(ch: PWideChar; Count: Integer);
- var
- s: WideString;
- begin
- if FCurrContentType = ctChildren then
- ValidationError('CDATA sections are not allowed in element-only content',[]);
- if not FCDSectionsAsText then
- begin
- SetString(s, ch, Count);
- // SAX: LexicalHandler.StartCDATA;
- // SAX: ContentHandler.Characters(...);
- FCursor.AppendChild(doc.CreateCDATASection(s));
- // SAX: LexicalHandler.EndCDATA;
- end
- else
- FCursor.AppendChild(doc.CreateTextNodeBuf(ch, Count, False));
- end;
- procedure TXMLReader.DoNotationDecl(const aName, aPubID, aSysID: WideString);
- var
- Notation: TDOMNotationEx;
- begin
- if FDocType.Notations.GetNamedItem(aName) = nil then
- begin
- Notation := TDOMNotationEx(TDOMNotation.Create(doc));
- Notation.FName := aName;
- Notation.FPublicID := aPubID;
- Notation.FSystemID := aSysID;
- FDocType.Notations.SetNamedItem(Notation);
- end
- else
- ValidationError('Duplicate notation declaration: ''%s''', [aName]);
- end;
- procedure TXMLReader.PushVC(aElement: TDOMElement; aElDef: TDOMElementDef);
- begin
- Inc(FNesting);
- if FNesting >= Length(FValidator) then
- SetLength(FValidator, FNesting * 2);
- FValidator[FNesting].FElement := aElement;
- FValidator[FNesting].FElementDef := aElDef;
- FValidator[FNesting].FCurCP := nil;
- FValidator[FNesting].FFailed := False;
- UpdateConstraints;
- end;
- procedure TXMLReader.PopVC;
- begin
- if FNesting > 0 then Dec(FNesting);
- UpdateConstraints;
- end;
- procedure TXMLReader.UpdateConstraints;
- begin
- if FValidate and Assigned(FValidator[FNesting].FElementDef) then
- begin
- FCurrContentType := FValidator[FNesting].FElementDef.ContentType;
- FSaViolation := FStandalone and (FValidator[FNesting].FElementDef.FExternallyDeclared);
- end
- else
- begin
- FCurrContentType := ctAny;
- FSaViolation := False;
- end;
- end;
- { TElementValidator }
- function TElementValidator.IsElementAllowed(Def: TDOMElementDef): Boolean;
- var
- I: Integer;
- Next: TContentParticle;
- begin
- Result := True;
- // if element is not declared, non-validity has been already reported, no need to report again...
- if Assigned(Def) and Assigned(FElementDef) then
- begin
- case FElementDef.ContentType of
- ctMixed: begin
- for I := 0 to FElementDef.RootCP.ChildCount-1 do
- begin
- if Def = FElementDef.RootCP.Children[I].Def then
- Exit;
- end;
- Result := False;
- end;
- ctEmpty: Result := False;
- ctChildren: begin
- if FCurCP = nil then
- Next := FElementDef.RootCP.FindFirst(Def)
- else
- Next := FCurCP.FindNext(Def, 0); { second arg ignored here }
- Result := Assigned(Next);
- if Result then
- FCurCP := Next
- else
- FFailed := True; // used to prevent extra error at the end of element
- end;
- // ctAny, ctUndeclared: returns True by default
- end;
- end;
- end;
- function TElementValidator.Incomplete: Boolean;
- begin
- if Assigned(FElementDef) and (FElementDef.ContentType = ctChildren) and (not FFailed) then
- begin
- if FCurCP <> nil then
- Result := FCurCP.MoreRequired(0) { arg ignored here }
- else
- Result := FElementDef.RootCP.IsRequired;
- end
- else
- Result := False;
- end;
- { TContentParticle }
- function TContentParticle.Add: TContentParticle;
- begin
- if FChildren = nil then
- FChildren := TFPList.Create;
- Result := TContentParticle.Create;
- Result.FParent := Self;
- Result.FIndex := FChildren.Add(Result);
- end;
- destructor TContentParticle.Destroy;
- var
- I: Integer;
- begin
- if Assigned(FChildren) then
- for I := FChildren.Count-1 downto 0 do
- TObject(FChildren[I]).Free;
- FChildren.Free;
- inherited Destroy;
- end;
- function TContentParticle.GetChild(Index: Integer): TContentParticle;
- begin
- Result := TContentParticle(FChildren[Index]);
- end;
- function TContentParticle.GetChildCount: Integer;
- begin
- if Assigned(FChildren) then
- Result := FChildren.Count
- else
- Result := 0;
- end;
- function TContentParticle.IsRequired: Boolean;
- var
- I: Integer;
- begin
- Result := (CPQuant = cqOnce) or (CPQuant = cqOnceOrMore);
- // do not return True if all children are optional
- if (CPType <> ctName) and Result then
- begin
- for I := 0 to ChildCount-1 do
- begin
- Result := Children[I].IsRequired;
- if Result then Exit;
- end;
- end;
- end;
- function TContentParticle.MoreRequired(ChildIdx: Integer): Boolean;
- var
- I: Integer;
- begin
- Result := False;
- if CPType = ctSeq then
- begin
- for I := ChildIdx + 1 to ChildCount-1 do
- begin
- Result := Children[I].IsRequired;
- if Result then Exit;
- end;
- end;
- if Assigned(FParent) then
- Result := FParent.MoreRequired(FIndex);
- end;
- function TContentParticle.FindFirst(aDef: TDOMElementDef): TContentParticle;
- var
- I: Integer;
- begin
- Result := nil;
- case CPType of
- ctSeq:
- for I := 0 to ChildCount-1 do with Children[I] do
- begin
- Result := FindFirst(aDef);
- if Assigned(Result) or IsRequired then
- Exit;
- end;
- ctChoice:
- for I := 0 to ChildCount-1 do with Children[I] do
- begin
- Result := FindFirst(aDef);
- if Assigned(Result) then
- Exit;
- end;
- else // ctName
- if aDef = Self.Def then
- Result := Self
- end;
- end;
- function TContentParticle.FindNext(aDef: TDOMElementDef;
- ChildIdx: Integer): TContentParticle;
- var
- I: Integer;
- begin
- Result := nil;
- if CPType = ctSeq then // search sequence to its end
- begin
- for I := ChildIdx + 1 to ChildCount-1 do with Children[I] do
- begin
- Result := FindFirst(aDef);
- if (Result <> nil) or IsRequired then
- Exit;
- end;
- end;
- if (CPQuant = cqZeroOrMore) or (CPQuant = cqOnceOrMore) then
- Result := FindFirst(aDef);
- if (Result = nil) and Assigned(FParent) then
- Result := FParent.FindNext(aDef, FIndex);
- end;
- { TDOMElementDef }
- destructor TDOMElementDef.Destroy;
- begin
- RootCP.Free;
- inherited Destroy;
- end;
- { plain calls }
- procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text);
- var
- Reader: TXMLReader;
- Src: TXMLCharSource;
- begin
- ADoc := nil;
- Src := TXMLFileInputSource.Create(f);
- Reader := TXMLReader.Create;
- try
- Reader.ProcessXML(Src);
- finally
- ADoc := TXMLDocument(Reader.Doc);
- Reader.Free;
- end;
- end;
- procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String);
- var
- Reader: TXMLReader;
- Src: TXMLCharSource;
- begin
- ADoc := nil;
- Reader := TXMLReader.Create;
- try
- Src := TXMLStreamInputSource.Create(f, False);
- Src.SystemID := ABaseURI;
- Reader.ProcessXML(Src);
- finally
- ADoc := TXMLDocument(Reader.doc);
- Reader.Free;
- end;
- end;
- procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream);
- begin
- ReadXMLFile(ADoc, f, 'stream:');
- end;
- procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String);
- var
- FileStream: TStream;
- begin
- ADoc := nil;
- FileStream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
- try
- ReadXMLFile(ADoc, FileStream, FilenameToURI(AFilename));
- finally
- FileStream.Free;
- end;
- end;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text);
- var
- Reader: TXMLReader;
- Src: TXMLCharSource;
- begin
- Reader := TXMLReader.Create;
- try
- Src := TXMLFileInputSource.Create(f);
- Reader.ProcessFragment(Src, AParentNode);
- finally
- Reader.Free;
- end;
- end;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream; const ABaseURI: String);
- var
- Reader: TXMLReader;
- Src: TXMLCharSource;
- begin
- Reader := TXMLReader.Create;
- try
- Src := TXMLStreamInputSource.Create(f, False);
- Src.SystemID := ABaseURI;
- Reader.ProcessFragment(Src, AParentNode);
- finally
- Reader.Free;
- end;
- end;
- procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream);
- begin
- ReadXMLFragment(AParentNode, f, 'stream:');
- end;
- procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String);
- var
- Stream: TStream;
- begin
- Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
- try
- ReadXMLFragment(AParentNode, Stream, FilenameToURI(AFilename));
- finally
- Stream.Free;
- end;
- end;
- procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text);
- var
- Reader: TXMLReader;
- Src: TXMLCharSource;
- begin
- ADoc := nil;
- Reader := TXMLReader.Create;
- try
- Src := TXMLFileInputSource.Create(f);
- Reader.ProcessDTD(Src);
- finally
- ADoc := TXMLDocument(Reader.doc);
- Reader.Free;
- end;
- end;
- procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String);
- var
- Reader: TXMLReader;
- Src: TXMLCharSource;
- begin
- ADoc := nil;
- Reader := TXMLReader.Create;
- try
- Src := TXMLStreamInputSource.Create(f, False);
- Src.SystemID := ABaseURI;
- Reader.ProcessDTD(Src);
- finally
- ADoc := TXMLDocument(Reader.doc);
- Reader.Free;
- end;
- end;
- procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream);
- begin
- ReadDTDFile(ADoc, f, 'stream:');
- end;
- procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String);
- var
- Stream: TStream;
- begin
- ADoc := nil;
- Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
- try
- ReadDTDFile(ADoc, Stream, FilenameToURI(AFilename));
- finally
- Stream.Free;
- end;
- end;
- end.
|