1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012 |
- {
- This file is part of the Free Component Library
- TXMLTextReader, a streaming text XML reader
- Copyright (c) 1999-2000 by Sebastian Guenther, [email protected]
- Modified in 2006 by Sergei Gorelkin, [email protected]
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- {$IFNDEF FPC_DOTTEDUNITS}
- unit xmltextreader;
- {$ENDIF FPC_DOTTEDUNITS}
- {$mode objfpc}{$h+}
- interface
- {$IFDEF FPC_DOTTEDUNITS}
- uses
- System.SysUtils, System.Classes, Xml.Utils, Xml.Reader, Xml.DtdModel;
- {$ELSE FPC_DOTTEDUNITS}
- uses
- SysUtils, Classes, xmlutils, xmlreader, dtdmodel;
- {$ENDIF FPC_DOTTEDUNITS}
- type
- TDecoder = record
- Context: Pointer;
- Decode: function(Context: Pointer; InBuf: PAnsiChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
- Cleanup: procedure(Context: Pointer); stdcall;
- end;
- TGetDecoderProc = function(const AEncoding: string; out Decoder: TDecoder): Boolean; stdcall;
- TXMLSourceKind = (skNone, skInternalSubset, skManualPop);
- TXMLTextReader = class;
- TXMLCharSource = class(TObject)
- private
- FBuf: PWideChar;
- FBufEnd: PWideChar;
- FReader: TXMLTextReader;
- FParent: TXMLCharSource;
- FEntity: TEntityDecl;
- FLineNo: Integer;
- LFPos: PWideChar;
- FXML11Rules: Boolean;
- FSourceURI: XMLString;
- FCharCount: Cardinal;
- FStartNesting: Integer;
- FXMLVersion: TXMLVersion;
- FXMLEncoding: XMLString;
- function GetSourceURI: XMLString;
- protected
- function Reload: Boolean; virtual;
- public
- Kind: TXMLSourceKind;
- constructor Create(const AData: XMLString);
- procedure NextChar;
- procedure NewLine; virtual;
- function SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
- wsflag: PBoolean = nil): WideChar; virtual;
- procedure Initialize; virtual;
- function SetEncoding(const AEncoding: string): Boolean; virtual;
- function Matches(const arg: XMLString): Boolean;
- function MatchesLong(const arg: XMLString): Boolean;
- property SourceURI: XMLString read GetSourceURI write FSourceURI;
- end;
- TElementValidator = object
- FElementDef: TElementDecl;
- FCurCP: TContentParticle;
- FFailed: Boolean;
- FSaViolation: Boolean;
- FContentType: TElementContentType; // =ctAny when FElementDef is nil
- function IsElementAllowed(Def: TElementDecl): Boolean;
- function Incomplete: Boolean;
- end;
- TNodeDataDynArray = array of TNodeData;
- TValidatorDynArray = array of TElementValidator;
- TCheckNameFlags = set of (cnOptional, cnToken);
- TXMLToken = (xtNone, xtEOF, xtText, xtElement, xtEndElement,
- xtCDSect, xtComment, xtPI, xtDoctype, xtEntity, xtEntityEnd, xtPopElement,
- xtPopEmptyElement, xtPushElement, xtPushEntity, xtPopEntity, xtFakeLF);
- TAttributeReadState = (arsNone, arsText, arsEntity, arsEntityEnd, arsPushEntity);
- TLiteralType = (ltPlain, ltPubid, ltEntity);
- TEntityEvent = procedure(Sender: TXMLTextReader; AEntity: TEntityDecl) of object;
- TXMLTextReader = class(TXMLReader, IXmlLineInfo, IGetNodeDataPtr)
- private
- FSource: TXMLCharSource;
- FNameTable: THashTable;
- FXML11: Boolean;
- FNameTableOwned: Boolean;
- FState: (rsProlog, rsDTD, rsAfterDTD, rsRoot, rsEpilog);
- FHavePERefs: Boolean;
- FInsideDecl: Boolean;
- FValue: TWideCharBuf;
- FEntityValue: TWideCharBuf;
- FName: TWideCharBuf;
- FTokenStart: TLocation;
- FStandalone: Boolean;
- FDocType: TDTDModel;
- FPEMap: THashTable;
- FForwardRefs: TFPList;
- FDTDStartPos: PWideChar;
- FIntSubset: TWideCharBuf;
- FAttrTag: Cardinal;
- FDTDProcessed: Boolean;
- FFragmentMode: Boolean;
- FNext: TXMLToken;
- FCurrEntity: TEntityDecl;
- FIDMap: THashTable;
- FAttrDefIndex: array of Cardinal;
- FNSHelper: TNSSupport;
- FNsAttHash: TDblHashArray;
- FEmptyStr: PHashItem;
- FStdPrefix_xml: PHashItem;
- FStdPrefix_xmlns: PHashItem;
- FStdUri_xml: PHashItem;
- FStdUri_xmlns: PHashItem;
- FColonPos: Integer;
- FValidate: Boolean; // parsing options, copy of FCtrl.Options
- FPreserveWhitespace: Boolean;
- FExpandEntities: Boolean;
- FIgnoreComments: Boolean;
- FCDSectionsAsText: Boolean;
- FNamespaces: Boolean;
- FDisallowDoctype: Boolean;
- FCanonical: Boolean;
- FMaxChars: Cardinal;
- FOnError: TXMLErrorEvent;
- FCurrAttrIndex: Integer;
- FOnEntity: TEntityEvent;
- procedure CleanAttrReadState;
- procedure SetEOFState;
- procedure SkipQuote(out Delim: WideChar; required: Boolean = True);
- procedure SetSource(ASource: TXMLCharSource);
- function ContextPush(AEntity: TEntityDecl; DummySource: Boolean = False): Boolean;
- function ContextPop(Forced: Boolean = False): Boolean;
- function ParseQuantity: TCPQuant;
- procedure StoreLocation(out Loc: TLocation);
- procedure ValidateAttrValue(AttrDef: TAttributeDef; attrData: PNodeData);
- procedure AddForwardRef(Buf: PWideChar; Length: Integer);
- procedure ClearForwardRefs;
- procedure CallErrorHandler(E: EXMLReadError);
- function FindOrCreateElDef: TElementDecl;
- function SkipUntilSeq(const Delim: TSetOfChar; c1: WideChar): Boolean;
- procedure CheckMaxChars(ToAdd: Cardinal);
- function AllocNodeData(AIndex: Integer): PNodeData;
- function AllocAttributeData: PNodeData;
- procedure AllocAttributeValueChunk(var APrev: PNodeData; Offset: Integer);
- procedure AddPseudoAttribute(aName: PHashItem; const aValue: XMLString;
- const nameLoc, valueLoc: TLocation);
- procedure CleanupAttribute(aNode: PNodeData);
- procedure CleanupAttributes;
- procedure SetNodeInfoWithValue(typ: TXMLNodeType; AName: PHashItem = nil);
- function SetupFakeLF(nextstate: TXMLToken): Boolean;
- function AddId(aNodeData: PNodeData): Boolean;
- function QueryInterface(constref iid: TGUID; out obj): HRESULT; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
- function _AddRef: Longint; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
- function _Release: Longint; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
- procedure SetFragmentMode(aValue: Boolean);
- protected
- FNesting: Integer;
- FCurrNode: PNodeData;
- FAttrCount: Integer;
- FPrefixedAttrs: Integer;
- FSpecifiedAttrs: Integer;
- FNodeStack: TNodeDataDynArray;
- FValidatorNesting: Integer;
- FValidators: TValidatorDynArray;
- FFreeAttrChunk: PNodeData;
- FAttrCleanupFlag: Boolean;
- // ReadAttributeValue state
- FAttrReadState: TAttributeReadState;
- FAttrBaseSource: TObject;
- procedure DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer=0);
- procedure DoErrorPos(Severity: TErrorSeverity; const descr: string;
- const ErrPos: TLocation); overload;
- procedure DoErrorPos(Severity: TErrorSeverity; const descr: string;
- const args: array of const; const ErrPos: TLocation); overload;
- procedure FatalError(const descr: String; LineOffs: Integer=0); overload;
- procedure FatalError(const descr: string; const args: array of const; LineOffs: Integer=0); overload;
- procedure FatalError(Expected: WideChar); overload;
- function SkipWhitespace(PercentAloneIsOk: Boolean = False): Boolean;
- function SkipS(required: Boolean = False): Boolean;
- procedure ExpectWhitespace;
- procedure ExpectString(const s: String);
- procedure ExpectChar(wc: WideChar);
- function CheckForChar(c: WideChar): Boolean;
- procedure RaiseNameNotFound;
- function CheckName(aFlags: TCheckNameFlags = []): Boolean;
- procedure CheckNCName;
- function ParseLiteral(var ToFill: TWideCharBuf; aType: TLiteralType;
- Required: Boolean): Boolean;
- procedure ExpectAttValue(attrData: PNodeData; NonCDATA: Boolean); // [10]
- procedure ParseComment(discard: Boolean); // [15]
- procedure ParsePI; // [16]
- procedure ParseXmlOrTextDecl(TextDecl: Boolean);
- procedure ExpectEq;
- procedure ParseDoctypeDecl; // [28]
- procedure ParseMarkupDecl; // [29]
- procedure ParseIgnoreSection;
- procedure ParseStartTag; // [39]
- procedure ParseEndTag; // [42]
- procedure HandleEntityStart;
- procedure HandleEntityEnd;
- procedure DoStartEntity;
- procedure ParseAttribute(ElDef: TElementDecl);
- function ReadTopLevel: Boolean;
- procedure NextAttrValueChunk;
- function GetHasLineInfo: Boolean;
- function GetLineNumber: Integer;
- function GetLinePosition: Integer;
- function CurrentNodePtr: PPNodeData;
- public
- function Read: Boolean; override;
- function MoveToFirstAttribute: Boolean; override;
- function MoveToNextAttribute: Boolean; override;
- function MoveToElement: Boolean; override;
- function ReadAttributeValue: Boolean; override;
- procedure Close; override;
- procedure ResolveEntity; override;
- function GetAttribute(i: Integer): XMLString; override;
- function GetAttribute(const AName: XMLString): XMLString; override;
- function GetAttribute(const ALocalName, nsuri: XMLString): XMLString; override;
- function LookupNamespace(const APrefix: XMLString): XMLString; override;
- property LineNumber: Integer read GetLineNumber;
- property LinePosition: Integer read GetLinePosition;
- protected
- function GetXmlVersion: TXMLVersion;
- function GetXmlEncoding: XMLString;
- function GetNameTable: THashTable; override;
- function GetDepth: Integer; override;
- function GetNodeType: TXmlNodeType; override;
- function GetName: XMLString; override;
- function GetValue: XMLString; override;
- function GetLocalName: XMLString; override;
- function GetPrefix: XMLString; override;
- function GetNamespaceUri: XMLString; override;
- function GetHasValue: Boolean; override;
- function GetAttributeCount: Integer; override;
- function GetBaseUri: XMLString; override;
- function GetIsDefault: Boolean; override;
- function ResolvePredefined: Boolean;
- function EntityCheck(NoExternals: Boolean = False): TEntityDecl;
- function PrefetchEntity(AEntity: TEntityDecl): Boolean;
- procedure StartPE;
- function ParseRef(var ToFill: TWideCharBuf): Boolean; // [67]
- function ParseExternalID(out SysID, PubID: XMLString; // [75]
- out PubIDLoc: TLocation; SysIdOptional: Boolean): Boolean;
- procedure CheckPENesting(aExpected: TObject);
- procedure ParseEntityDecl;
- procedure ParseAttlistDecl;
- procedure ExpectChoiceOrSeq(CP: TContentParticle; MustEndIn: TObject);
- procedure ParseElementDecl;
- procedure ParseNotationDecl;
- function ResolveResource(const ASystemID, APublicID, ABaseURI: XMLString; out Source: TXMLCharSource): Boolean;
- procedure ProcessDefaultAttributes(ElDef: TElementDecl);
- procedure ProcessNamespaceAtts;
- function AddBinding(attrData: PNodeData): Boolean;
- procedure PushVC(aElDef: TElementDecl);
- procedure PopElement;
- procedure ValidateDTD;
- procedure ValidationError(const Msg: string; const args: array of const; LineOffs: Integer = -1);
- procedure ValidationErrorWithName(const Msg: string; LineOffs: Integer = -1);
- procedure DTDReloadHook;
- procedure ConvertSource(SrcIn: TXMLInputSource; out SrcOut: TXMLCharSource);
- procedure SetOptions(AValue: TXMLReaderSettings);
- procedure SetNametable(ANameTable: THashTable);
- public
- constructor Create(var AFile: Text; ANameTable: THashTable); overload;
- constructor Create(AStream: TStream; const ABaseUri: XMLString; ANameTable: THashTable); overload;
- constructor Create(AStream: TStream; const ABaseUri: XMLString; ASettings: TXMLReaderSettings); overload;
- constructor Create(ASrc: TXMLCharSource; AParent: TXMLTextReader); overload;
- constructor Create(const uri: XMLString; ASettings: TXMLReaderSettings); overload;
- constructor Create(ASrc: TXMLInputSource; ASettings: TXMLReaderSettings); overload;
- destructor Destroy; override;
- procedure AfterConstruction; override;
- property OnEntity: TEntityEvent read FOnEntity write FOnEntity;
- { stuff needed for TLoader }
- property Standalone: Boolean read FStandalone;
- property DtdSchemaInfo: TDTDModel read FDocType write FDocType;
- property XML11: Boolean write FXML11;
- property XMLVersion: TXMLVersion read GetXMLVersion;
- property XMLEncoding: XMLString read GetXMLEncoding;
- property IDMap: THashTable read FIDMap write FIDMap;
- property ExpandEntities: Boolean read FExpandEntities;
- property Validate: Boolean read FValidate;
- property PreserveWhitespace: Boolean read FPreserveWhitespace;
- property IgnoreComments: Boolean read FIgnoreComments;
- property FragmentMode: Boolean read FFragmentMode write SetFragmentMode;
- procedure ValidateCurrentNode;
- procedure ValidateIdRefs;
- procedure EntityToSource(AEntity: TEntityDecl; out Src: TXMLCharSource);
- procedure ParseDTD;
- end;
- procedure RegisterDecoder(Proc: TGetDecoderProc);
- implementation
- {$IFDEF FPC_DOTTEDUNITS}
- uses
- Fcl.UriParser;
- {$ELSE FPC_DOTTEDUNITS}
- uses
- UriParser;
- {$ENDIF FPC_DOTTEDUNITS}
- type
- TXMLDecodingSource = class(TXMLCharSource)
- private
- FCharBuf: PAnsiChar;
- FCharBufEnd: PAnsiChar;
- FBufStart: PWideChar;
- FDecoder: TDecoder;
- FHasBOM: Boolean;
- FFixedUCS2: string;
- FBufSize: Integer;
- FAssumeUTF16: Boolean;
- procedure DecodingError(const Msg: string);
- protected
- function Reload: Boolean; override;
- procedure FetchData; virtual;
- public
- procedure AfterConstruction; override;
- destructor Destroy; override;
- function SetEncoding(const AEncoding: string): Boolean; override;
- procedure NewLine; override;
- function SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
- wsflag: PBoolean = nil): WideChar; override;
- procedure Initialize; override;
- end;
- { TXMLStreamInputSource }
- TXMLStreamInputSource = class(TXMLDecodingSource)
- private
- FAllocated: PAnsiChar;
- FStream: TStream;
- FCapacity: Integer;
- FOwnStream: Boolean;
- FEof: Boolean;
- public
- constructor Create(AStream: TStream; AOwnStream: Boolean; aAssumeUTF16 : Boolean = False);
- destructor Destroy; override;
- procedure FetchData; override;
- Property AssumeUTF16 : Boolean Read FAssumeUTF16;
- end;
- TXMLFileInputSource = class(TXMLDecodingSource)
- private
- FFile: ^Text;
- FString: string;
- FTmp: string;
- public
- constructor Create(var AFile: Text);
- procedure FetchData; override;
- end;
- PForwardRef = ^TForwardRef;
- TForwardRef = record
- Value: XMLString;
- Loc: TLocation;
- end;
- const
- PubidChars: TSetOfChar = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
- '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
- '#', '@', '$', '_', '%'];
- NullLocation: TLocation = (Line: 0; LinePos: 0);
- { Decoders }
- var
- Decoders: array of TGetDecoderProc;
- procedure RegisterDecoder(Proc: TGetDecoderProc);
- var
- L: Integer;
- begin
- L := Length(Decoders);
- SetLength(Decoders, L+1);
- Decoders[L] := Proc;
- end;
- function FindDecoder(const AEncoding: string; out Decoder: TDecoder): Boolean;
- var
- I: Integer;
- begin
- Result := False;
- for I := 0 to High(Decoders) do
- if Decoders[I](AEncoding, Decoder) then
- begin
- Result := True;
- Exit;
- end;
- end;
- function Is_8859_1(const AEncoding: string): Boolean;
- begin
- Result := SameText(AEncoding, 'ISO-8859-1') or
- SameText(AEncoding, 'ISO_8859-1') or
- SameText(AEncoding, 'latin1') or
- SameText(AEncoding, 'iso-ir-100') or
- SameText(AEncoding, 'l1') or
- SameText(AEncoding, 'IBM819') or
- SameText(AEncoding, 'CP819') or
- SameText(AEncoding, 'csISOLatin1') or
- // This one is not in character-sets.txt, but was used in FPC documentation,
- // and still being used in fcl-registry package
- SameText(AEncoding, 'ISO8859-1');
- end;
- { TXMLCharSource }
- constructor TXMLCharSource.Create(const AData: XMLString);
- begin
- inherited Create;
- FLineNo := 1;
- FBuf := PWideChar(AData);
- FBufEnd := FBuf + Length(AData);
- LFPos := FBuf-1;
- FCharCount := Length(AData);
- end;
- procedure TXMLCharSource.Initialize;
- begin
- end;
- function TXMLCharSource.SetEncoding(const AEncoding: string): Boolean;
- begin
- Result := True; // always succeed
- end;
- function TXMLCharSource.GetSourceURI: XMLString;
- begin
- if FSourceURI <> '' then
- Result := FSourceURI
- else if Assigned(FParent) then
- Result := FParent.SourceURI
- else
- Result := '';
- end;
- function TXMLCharSource.Reload: Boolean;
- begin
- Result := False;
- end;
- procedure TXMLCharSource.NewLine;
- begin
- Inc(FLineNo);
- LFPos := FBuf;
- end;
- function TXMLCharSource.SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
- wsflag: PBoolean): WideChar;
- var
- old: PWideChar;
- nonws: Boolean;
- begin
- old := FBuf;
- nonws := False;
- repeat
- if FBuf^ = #10 then
- NewLine;
- if (FBuf^ < #255) and (AnsiChar(ord(FBuf^)) in Delim) then
- Break;
- if (FBuf^ > #32) or not (AnsiChar(ord(FBuf^)) in [#32, #9, #10, #13]) then
- nonws := True;
- Inc(FBuf);
- until False;
- Result := FBuf^;
- BufAppendChunk(ToFill, old, FBuf);
- if Assigned(wsflag) then
- wsflag^ := wsflag^ or nonws;
- end;
- function TXMLCharSource.Matches(const arg: XMLString): Boolean;
- begin
- Result := False;
- if (FBufEnd >= FBuf + Length(arg)) or Reload then
- Result := CompareMem(Pointer(arg), FBuf, Length(arg)*sizeof(WideChar));
- if Result then
- begin
- Inc(FBuf, Length(arg));
- if FBuf >= FBufEnd then
- Reload;
- end;
- end;
- { Used to check element name in end-tags, difference from Matches is that
- buffer may be reloaded more than once. XML has no restriction on name
- length, so a name longer than input buffer may be encountered. }
- function TXMLCharSource.MatchesLong(const arg: XMLString): Boolean;
- var
- idx, len, chunk: Integer;
- begin
- Result := False;
- idx := 1;
- len := Length(arg);
- repeat
- if (FBuf >= FBufEnd) and not Reload then
- Exit;
- if FBufEnd >= FBuf + len then
- chunk := len
- else
- chunk := FBufEnd - FBuf;
- if not CompareMem(@arg[idx], FBuf, chunk*sizeof(WideChar)) then
- Exit;
- Inc(FBuf, chunk);
- Inc(idx,chunk);
- Dec(len,chunk);
- until len = 0;
- Result := True;
- if FBuf >= FBufEnd then
- Reload;
- end;
- { TXMLDecodingSource }
- procedure TXMLDecodingSource.AfterConstruction;
- begin
- inherited AfterConstruction;
- FBufStart := AllocMem(4096);
- FBuf := FBufStart;
- FBufEnd := FBuf;
- LFPos := FBuf-1;
- end;
- destructor TXMLDecodingSource.Destroy;
- begin
- FreeMem(FBufStart);
- if Assigned(FDecoder.Cleanup) then
- FDecoder.Cleanup(FDecoder.Context);
- inherited Destroy;
- end;
- procedure TXMLDecodingSource.FetchData;
- begin
- end;
- procedure TXMLDecodingSource.DecodingError(const Msg: string);
- begin
- // count line endings to obtain correct error location
- while FBuf < FBufEnd do
- begin
- if (FBuf^ = #10) or (FBuf^ = #13) or (FXML11Rules and ((FBuf^ = #$85) or (FBuf^ = #$2028))) then
- begin
- if (FBuf^ = #13) and (FBuf < FBufEnd-1) and
- ((FBuf[1] = #10) or (FXML11Rules and (FBuf[1] = #$85))) then
- Inc(FBuf);
- LFPos := FBuf;
- Inc(FLineNo);
- end;
- Inc(FBuf);
- end;
- FReader.FatalError(Msg);
- end;
- function TXMLDecodingSource.Reload: Boolean;
- var
- Remainder: PtrInt;
- r, inLeft: Cardinal;
- rslt: Integer;
- begin
- if Kind = skInternalSubset then
- FReader.DTDReloadHook;
- Remainder := FBufEnd - FBuf;
- if Remainder > 0 then
- Move(FBuf^, FBufStart^, Remainder * sizeof(WideChar));
- Dec(LFPos, FBuf-FBufStart);
- FBuf := FBufStart;
- FBufEnd := FBufStart + Remainder;
- repeat
- inLeft := FCharBufEnd - FCharBuf;
- if inLeft < 4 then // may contain an incomplete AnsiChar
- begin
- FetchData;
- inLeft := FCharBufEnd - FCharBuf;
- if inLeft <= 0 then
- Break;
- end;
- r := FBufStart + FBufSize - FBufEnd;
- if r = 0 then
- Break;
- rslt := FDecoder.Decode(FDecoder.Context, FCharBuf, inLeft, FBufEnd, r);
- { Sanity checks: r and inLeft must not increase. }
- if inLeft + FCharBuf <= FCharBufEnd then
- FCharBuf := FCharBufEnd - inLeft
- else
- DecodingError('Decoder error: input byte count out of bounds');
- if r + FBufEnd <= FBufStart + FBufSize then
- FBufEnd := FBufStart + FBufSize - r
- else
- DecodingError('Decoder error: output AnsiChar count out of bounds');
- if rslt = 0 then
- Break
- else if rslt < 0 then
- DecodingError('Invalid character in input stream')
- else
- FReader.CheckMaxChars(rslt);
- until False;
- FBufEnd^ := #0;
- Result := FBuf < FBufEnd;
- end;
- const
- XmlSign: array [0..4] of WideChar = ('<', '?', 'x', 'm', 'l');
- procedure TXMLDecodingSource.Initialize;
- begin
- inherited;
- FLineNo := 1;
- if FAssumeUTF16 then
- begin
- FFixedUCS2 := {$IFNDEF ENDIAN_BIG} 'UTF-16BE' {$ELSE} 'UTF-16LE' {$ENDIF};
- FDecoder.Decode := @Decode_UCS2;
- end
- else
- FDecoder.Decode := @Decode_UTF8;
- FFixedUCS2 := '';
- if FCharBufEnd-FCharBuf > 1 then
- begin
- if (FCharBuf[0] = #$FE) and (FCharBuf[1] = #$FF) then
- begin
- FFixedUCS2 := 'UTF-16BE';
- FDecoder.Decode := {$IFNDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
- end
- else if (FCharBuf[0] = #$FF) and (FCharBuf[1] = #$FE) then
- begin
- FFixedUCS2 := 'UTF-16LE';
- FDecoder.Decode := {$IFDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
- end;
- end;
- FBufSize := 6; // possible BOM and '<?xml'
- Reload;
- if FBuf^ = #$FEFF then
- begin
- FHasBOM := True;
- Inc(FBuf);
- end;
- LFPos := FBuf-1;
- if CompareMem(FBuf, @XmlSign[0], sizeof(XmlSign)) then
- begin
- FBufSize := 3; // don't decode past XML declaration
- Inc(FBuf, Length(XmlSign));
- FReader.ParseXmlOrTextDecl((FParent <> nil) or (FReader.FState <> rsProlog));
- end;
- FBufSize := 2047;
- if FReader.FXML11 then
- FXml11Rules := True;
- end;
- function TXMLDecodingSource.SetEncoding(const AEncoding: string): Boolean;
- var
- NewDecoder: TDecoder;
- begin
- Result := True;
- if (FFixedUCS2 = '') and SameText(AEncoding, 'UTF-8') then
- Exit;
- if FFixedUCS2 <> '' then
- begin
- Result := SameText(AEncoding, FFixedUCS2) or
- SameText(AEncoding, 'UTF-16') or
- SameText(AEncoding, 'unicode');
- Exit;
- end;
- // TODO: must fail when a byte-based stream is labeled as word-based.
- // see rmt-e2e-61, it now fails but for a completely different reason.
- FillChar(NewDecoder, sizeof(TDecoder), 0);
- if Is_8859_1(AEncoding) then
- FDecoder.Decode := @Decode_8859_1
- else if FindDecoder(AEncoding, NewDecoder) then
- FDecoder := NewDecoder
- else
- Result := False;
- end;
- procedure TXMLDecodingSource.NewLine;
- begin
- case FBuf^ of
- #10: ;
- #13: begin
- // Reload trashes the buffer, it should be consumed beforehand
- if (FBufEnd >= FBuf+2) or Reload then
- begin
- if (FBuf[1] = #10) or (FXML11Rules and (FBuf[1] = #$85)) then
- Inc(FBuf);
- end;
- FBuf^ := #10;
- end;
- #$85, #$2028: if FXML11Rules then
- FBuf^ := #10
- else
- Exit;
- else
- Exit;
- end;
- Inc(FLineNo);
- LFPos := FBuf;
- end;
- { TXMLStreamInputSource }
- const
- Slack = 16;
- constructor TXMLStreamInputSource.Create(AStream: TStream; AOwnStream: Boolean; aAssumeUTF16 : Boolean = False);
- begin
- FStream := AStream;
- FCapacity := 4096;
- GetMem(FAllocated, FCapacity+Slack);
- FCharBuf := FAllocated+(Slack-4);
- FCharBufEnd := FCharBuf;
- FOwnStream := AOwnStream;
- FetchData;
- FAssumeUTF16:=aAssumeUTF16;
- end;
- destructor TXMLStreamInputSource.Destroy;
- begin
- FreeMem(FAllocated);
- if FOwnStream then
- FStream.Free;
- inherited Destroy;
- end;
- procedure TXMLStreamInputSource.FetchData;
- var
- Remainder, BytesRead: Integer;
- OldBuf: PAnsiChar;
- begin
- Assert(FCharBufEnd - FCharBuf < Slack-4);
- if FEof then
- Exit;
- OldBuf := FCharBuf;
- Remainder := FCharBufEnd - FCharBuf;
- if Remainder < 0 then
- Remainder := 0;
- FCharBuf := FAllocated+Slack-4-Remainder;
- if Remainder > 0 then
- Move(OldBuf^, FCharBuf^, Remainder);
- BytesRead := FStream.Read(FAllocated[Slack-4], FCapacity);
- if BytesRead < FCapacity then
- FEof := True;
- FCharBufEnd := FAllocated + (Slack-4) + BytesRead;
- { Null-termination has been removed:
- 1) Built-in decoders don't need it because they respect the buffer length.
- 2) It was causing unaligned access errors on ARM CPUs.
- }
- //PWideChar(FCharBufEnd)^ := #0;
- end;
- { TXMLFileInputSource }
- constructor TXMLFileInputSource.Create(var AFile: Text);
- begin
- FFile := @AFile;
- SourceURI := FilenameToURI(TTextRec(AFile).Name);
- FetchData;
- end;
- procedure TXMLFileInputSource.FetchData;
- var
- Remainder: Integer;
- begin
- if not Eof(FFile^) then
- begin
- Remainder := FCharBufEnd - FCharBuf;
- if Remainder > 0 then
- SetString(FTmp, FCharBuf, Remainder);
- ReadLn(FFile^, FString);
- FString := FString + #10; // bad solution...
- if Remainder > 0 then
- Insert(FTmp, FString, 1);
- FCharBuf := PAnsiChar(FString);
- FCharBufEnd := FCharBuf + Length(FString);
- end;
- end;
- { helper that closes handle upon destruction }
- type
- THandleOwnerStream = class(THandleStream)
- public
- destructor Destroy; override;
- end;
- destructor THandleOwnerStream.Destroy;
- begin
- FileClose(Handle);
- inherited Destroy;
- end;
- { TXMLTextReader }
- function TXMLTextReader.QueryInterface(constref iid: TGUID; out obj): HRESULT; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
- begin
- if GetInterface(iid,obj) then
- result := S_OK
- else
- result:= E_NOINTERFACE;
- end;
- function TXMLTextReader._AddRef: Longint; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
- begin
- result := -1;
- end;
- function TXMLTextReader._Release: Longint; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
- begin
- result := -1;
- end;
- procedure TXMLTextReader.ConvertSource(SrcIn: TXMLInputSource; out SrcOut: TXMLCharSource);
- begin
- SrcOut := nil;
- if Assigned(SrcIn) then
- begin
- Case SrcIn.InputSourceType of
- istStream:
- SrcOut := TXMLStreamInputSource.Create(SrcIn.Stream, False);
- istAnsi:
- SrcOut := TXMLStreamInputSource.Create(TStringStream.Create(SrcIn.AnsiStringData), True, False);
- istUnicode:
- SrcOut := TXMLStreamInputSource.Create(TStringStream.Create(SrcIn.UnicodeStringData,TEncoding.Unicode), True, True);
- istSystemID:
- ResolveResource(SrcIn.SystemID, SrcIn.PublicID, SrcIn.BaseURI, SrcOut);
- end;
- end;
- if (SrcOut = nil) and (FSource = nil) then
- DoErrorPos(esFatal, 'No input source specified', NullLocation);
- end;
- procedure TXMLTextReader.StoreLocation(out Loc: TLocation);
- begin
- Loc.Line := FSource.FLineNo;
- Loc.LinePos := FSource.FBuf-FSource.LFPos;
- end;
- function TXMLTextReader.ResolveResource(const ASystemID, APublicID, ABaseURI: XMLString; out Source: TXMLCharSource): Boolean;
- var
- SrcURI: XMLString;
- Filename: string;
- Stream: TStream;
- fd: THandle;
- begin
- Source := nil;
- Result := False;
- if not ResolveRelativeURI(ABaseURI, ASystemID, SrcURI) then
- Exit;
- { TODO: alternative resolvers
- These may be 'internal' resolvers or a handler set by application.
- Internal resolvers should probably produce a TStream
- ( so that internal classes need not be exported ).
- External resolver will produce TXMLInputSource that should be converted.
- External resolver must NOT be called for root entity.
- External resolver can return nil, in which case we do the default }
- if URIToFilename(SrcURI, Filename) then
- begin
- fd := FileOpen(Filename, fmOpenRead + fmShareDenyWrite);
- if fd <> THandle(-1) then
- begin
- Stream := THandleOwnerStream.Create(fd);
- Source := TXMLStreamInputSource.Create(Stream, True, False);
- Source.SourceURI := SrcURI;
- end;
- end;
- Result := Assigned(Source);
- end;
- procedure TXMLTextReader.SetSource(ASource: TXMLCharSource);
- begin
- ASource.FParent := FSource;
- FSource := ASource;
- FSource.FReader := Self;
- FSource.FStartNesting := FNesting;
- end;
- procedure TXMLTextReader.FatalError(Expected: WideChar);
- begin
- // FIX: don't output what is found - anything may be found, including exploits...
- FatalError('Expected "%1s"', [string(Expected)]);
- end;
- procedure TXMLTextReader.FatalError(const descr: String; LineOffs: Integer);
- begin
- DoError(esFatal, descr, LineOffs);
- end;
- procedure TXMLTextReader.FatalError(const descr: string; const args: array of const; LineOffs: Integer);
- begin
- DoError(esFatal, Format(descr, args), LineOffs);
- end;
- procedure TXMLTextReader.ValidationError(const Msg: string; const Args: array of const; LineOffs: Integer);
- begin
- if FValidate then
- DoError(esError, Format(Msg, Args), LineOffs);
- end;
- procedure TXMLTextReader.ValidationErrorWithName(const Msg: string; LineOffs: Integer);
- var
- ws: XMLString;
- begin
- SetString(ws, FName.Buffer, FName.Length);
- ValidationError(Msg, [ws], LineOffs);
- end;
- procedure TXMLTextReader.DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer);
- var
- Loc: TLocation;
- begin
- StoreLocation(Loc);
- if LineOffs >= 0 then
- begin
- Dec(Loc.LinePos, LineOffs);
- DoErrorPos(Severity, descr, Loc);
- end
- else
- DoErrorPos(Severity, descr, FTokenStart);
- end;
- procedure TXMLTextReader.DoErrorPos(Severity: TErrorSeverity; const descr: string;
- const args: array of const; const ErrPos: TLocation);
- begin
- DoErrorPos(Severity, Format(descr, args), ErrPos);
- end;
- procedure TXMLTextReader.DoErrorPos(Severity: TErrorSeverity; const descr: string; const ErrPos: TLocation);
- var
- E: EXMLReadError;
- srcuri: XMLString;
- begin
- if Assigned(FSource) then
- begin
- srcuri := FSource.FSourceURI;
- if (srcuri = '') and Assigned(FSource.FEntity) then
- srcuri := FSource.FEntity.FURI;
- E := EXMLReadError.Create(severity, descr, ErrPos.Line, ErrPos.LinePos, srcuri);
- end
- else
- E := EXMLReadError.Create(descr);
- CallErrorHandler(E);
- // No 'finally'! If user handler raises exception, control should not get here
- // and the exception will be freed in CallErrorHandler (below)
- E.Free;
- end;
- procedure TXMLTextReader.CheckMaxChars(ToAdd: Cardinal);
- var
- src: TXMLCharSource;
- total: Cardinal;
- begin
- Inc(FSource.FCharCount, ToAdd);
- if FMaxChars = 0 then
- Exit;
- src := FSource;
- total := 0;
- repeat
- Inc(total, src.FCharCount);
- if total > FMaxChars then
- FatalError('Exceeded character count limit');
- src := src.FParent;
- until src = nil;
- end;
- procedure TXMLTextReader.CallErrorHandler(E: EXMLReadError);
- begin
- try
- if Assigned(FOnError) then
- FOnError(E);
- if E.Severity = esFatal then
- raise E;
- except
- FReadState := rsError;
- if ExceptObject <> E then
- E.Free;
- raise;
- end;
- end;
- function TXMLTextReader.SkipWhitespace(PercentAloneIsOk: Boolean): Boolean;
- begin
- Result := False;
- repeat
- Result := SkipS or Result;
- if FSource.FBuf >= FSource.FBufEnd then
- begin
- Result := True; // report whitespace upon exiting the PE
- if not ContextPop then
- Break;
- end
- else if FSource.FBuf^ = '%' then
- begin
- if (FState <> rsDTD) then
- Break;
- // This is the only case where look-ahead is needed
- if FSource.FBuf > FSource.FBufEnd-2 then
- FSource.Reload;
- if (not PercentAloneIsOk) or (Byte(FSource.FBuf[1]) in NamingBitmap[NamePages[hi(Word(FSource.FBuf[1]))]]) or
- ((FSource.FBuf[1] >= #$D800) and (FSource.FBuf[1] <= #$DB7F)) then
- begin
- StartPE;
- Result := True; // report whitespace upon entering the PE
- end
- else Break;
- end
- else
- Break;
- until False;
- end;
- procedure TXMLTextReader.ExpectWhitespace;
- begin
- if not SkipWhitespace then
- FatalError('Expected whitespace');
- end;
- function TXMLTextReader.SkipS(Required: Boolean): Boolean;
- var
- p: PWideChar;
- begin
- Result := False;
- repeat
- p := FSource.FBuf;
- repeat
- if (p^ = #10) or (p^ = #13) or (FXML11 and ((p^ = #$85) or (p^ = #$2028))) then
- begin
- FSource.FBuf := p;
- FSource.NewLine;
- p := FSource.FBuf;
- end
- else if (p^ <> #32) and (p^ <> #9) then
- Break;
- Inc(p);
- Result := True;
- until False;
- FSource.FBuf := p;
- until (FSource.FBuf < FSource.FBufEnd) or (not FSource.Reload);
- if (not Result) and Required then
- FatalError('Expected whitespace');
- end;
- procedure TXMLTextReader.ExpectString(const s: String);
- var
- I: Integer;
- begin
- for I := 1 to Length(s) do
- begin
- if FSource.FBuf^ <> WideChar(ord(s[i])) then
- FatalError('Expected "%s"', [s], i-1);
- FSource.NextChar;
- end;
- end;
- function TXMLTextReader.CheckForChar(c: WideChar): Boolean;
- begin
- Result := (FSource.FBuf^ = c);
- if Result then
- begin
- Inc(FSource.FBuf);
- if FSource.FBuf >= FSource.FBufEnd then
- FSource.Reload;
- end;
- end;
- procedure TXMLTextReader.SkipQuote(out Delim: WideChar; required: Boolean);
- begin
- Delim := #0;
- if (FSource.FBuf^ = '''') or (FSource.FBuf^ = '"') then
- begin
- Delim := FSource.FBuf^;
- FSource.NextChar; // skip quote
- StoreLocation(FTokenStart);
- end
- else if required then
- FatalError('Expected single or double quote');
- end;
- const
- PrefixDefault: array[0..4] of WideChar = ('x','m','l','n','s');
- procedure TXMLTextReader.SetOptions(AValue: TXMLReaderSettings);
- begin
- FValidate := AValue.Validate;
- FPreserveWhitespace := AValue.PreserveWhitespace;
- FExpandEntities := AValue.ExpandEntities;
- FCDSectionsAsText := AValue.CDSectionsAsText;
- FIgnoreComments := AValue.IgnoreComments;
- FNamespaces := AValue.Namespaces;
- FDisallowDoctype := AValue.DisallowDoctype;
- FCanonical := AValue.CanonicalForm;
- FMaxChars := AValue.MaxChars;
- FOnError := AValue.OnError;
- SetFragmentMode(AValue.ConformanceLevel = clFragment);
- end;
- procedure TXMLTextReader.SetFragmentMode(aValue: Boolean);
- begin
- FFragmentMode := aValue;
- if FFragmentMode then
- FState := rsRoot
- else
- FState := rsProlog;
- end;
- constructor TXMLTextReader.Create(ASrc: TXMLInputSource; ASettings: TXMLReaderSettings);
- var
- InputSrc: TXMLCharSource;
- begin
- SetNametable(ASettings.NameTable);
- SetOptions(ASettings);
- ConvertSource(ASrc, InputSrc);
- FSource := InputSrc;
- FSource.FReader := Self;
- end;
- constructor TXMLTextReader.Create(const uri: XMLString; ASettings: TXMLReaderSettings);
- begin
- SetNametable(ASettings.NameTable);
- SetOptions(ASettings);
- if ResolveResource(uri, '', '', FSource) then
- FSource.FReader := Self
- else
- DoErrorPos(esFatal, 'The specified URI could not be resolved', NullLocation);
- end;
- procedure TXMLTextReader.SetNametable(ANameTable: THashTable);
- begin
- if ANameTable = nil then
- begin
- ANameTable := THashTable.Create(256, True);
- FNameTableOwned := True;
- end;
- FNameTable := ANameTable;
- end;
- constructor TXMLTextReader.Create(var AFile: Text; ANameTable: THashTable);
- begin
- SetNametable(ANameTable);
- FSource := TXMLFileInputSource.Create(AFile);
- FSource.FReader := Self;
- end;
- constructor TXMLTextReader.Create(AStream: TStream; const ABaseUri: XMLString; ANameTable: THashTable);
- begin
- SetNametable(ANameTable);
- FSource := TXMLStreamInputSource.Create(AStream, False);
- FSource.SourceURI := ABaseUri;
- FSource.FReader := Self;
- end;
- constructor TXMLTextReader.Create(AStream: TStream; const ABaseUri: XMLString; ASettings: TXMLReaderSettings); overload;
- begin
- SetNametable(ASettings.NameTable);
- SetOptions(ASettings);
- FSource := TXMLStreamInputSource.Create(AStream, False);
- FSource.SourceURI := ABaseUri;
- FSource.FReader := Self;
- end;
- constructor TXMLTextReader.Create(ASrc: TXMLCharSource; AParent: TXMLTextReader);
- begin
- FNameTable := AParent.FNameTable;
- FSource := ASrc;
- FSource.FReader := Self;
- FValidate := AParent.FValidate;
- FPreserveWhitespace := AParent.FPreserveWhitespace;
- FExpandEntities := AParent.FExpandEntities;
- FCDSectionsAsText := AParent.FCDSectionsAsText;
- FIgnoreComments := AParent.FIgnoreComments;
- FNamespaces := AParent.FNamespaces;
- FDisallowDoctype := AParent.FDisallowDoctype;
- FCanonical := AParent.FCanonical;
- FMaxChars := AParent.FMaxChars;
- FOnError := AParent.FOnError;
- end;
- destructor TXMLTextReader.Destroy;
- var
- cur: PNodeData;
- begin
- if FAttrCleanupFlag then
- CleanupAttributes;
- while Assigned(FFreeAttrChunk) do
- begin
- cur := FFreeAttrChunk;
- FFreeAttrChunk := cur^.FNext;
- Dispose(cur);
- end;
- if Assigned(FEntityValue.Buffer) then
- FreeMem(FEntityValue.Buffer);
- FreeMem(FName.Buffer);
- FreeMem(FValue.Buffer);
- if Assigned(FSource) then
- while ContextPop(True) do; // clean input stack
- FSource.Free;
- FPEMap.Free;
- ClearForwardRefs;
- FNsAttHash.Free;
- FNSHelper.Free;
- FDocType.Release;
- FIDMap.Free;
- FForwardRefs.Free;
- if FNameTableOwned then
- FNameTable.Free;
- inherited Destroy;
- end;
- procedure TXMLTextReader.AfterConstruction;
- begin
- BufAllocate(FName, 128);
- BufAllocate(FValue, 512);
- SetLength(FNodeStack, 16);
- SetLength(FValidators, 16);
- FNesting := 0;
- FValidatorNesting := 0;
- FCurrNode := @FNodeStack[0];
- FCurrAttrIndex := -1;
- FEmptyStr := FNameTable.FindOrAdd('');
- if FNamespaces then
- begin
- FNSHelper := TNSSupport.Create(FNameTable);
- FNsAttHash := TDblHashArray.Create;
- FStdPrefix_xml := FNSHelper.GetPrefix(@PrefixDefault, 3);
- FStdPrefix_xmlns := FNSHelper.GetPrefix(@PrefixDefault, 5);
- FStdUri_xmlns := FNameTable.FindOrAdd(stduri_xmlns);
- FStdUri_xml := FNameTable.FindOrAdd(stduri_xml);
- end;
- end;
- function TXMLTextReader.CheckName(aFlags: TCheckNameFlags): Boolean;
- var
- p: PWideChar;
- NameStartFlag: Boolean;
- begin
- p := FSource.FBuf;
- FName.Length := 0;
- FColonPos := -1;
- NameStartFlag := not (cnToken in aFlags);
- repeat
- if NameStartFlag then
- begin
- if (Byte(p^) in NamingBitmap[NamePages[hi(Word(p^))]]) or
- ((p^ = ':') and (not FNamespaces)) then
- Inc(p)
- else if ((p^ >= #$D800) and (p^ <= #$DB7F) and
- (p[1] >= #$DC00) and (p[1] <= #$DFFF)) then
- Inc(p, 2)
- else
- begin
- // here we come either when first AnsiChar of name is bad (it may be a colon),
- // or when a colon is not followed by a valid NameStartChar
- FSource.FBuf := p;
- Result := False;
- Break;
- end;
- NameStartFlag := False;
- end;
- repeat
- if (Byte(p^) in NamingBitmap[NamePages[$100+hi(Word(p^))]]) or
- ((p^= ':') and ((cnToken in aFlags) or not FNamespaces)) then
- Inc(p)
- else if ((p^ >= #$D800) and (p^ <= #$DB7F) and
- (p[1] >= #$DC00) and (p[1] <= #$DFFF)) then
- Inc(p,2)
- else
- Break;
- until False;
- if (p^ = ':') and (FColonPos < 0) then
- begin
- FColonPos := p-FSource.FBuf+FName.Length;
- NameStartFlag := True;
- Inc(p);
- if p < FSource.FBufEnd then Continue;
- end;
- BufAppendChunk(FName, FSource.FBuf, p);
- Result := (FName.Length > 0);
- FSource.FBuf := p;
- if (p < FSource.FBufEnd) or not FSource.Reload then
- Break;
- p := FSource.FBuf;
- until False;
- if not (Result or (cnOptional in aFlags)) then
- RaiseNameNotFound;
- end;
- procedure TXMLTextReader.CheckNCName;
- begin
- if FNamespaces and (FColonPos <> -1) then
- FatalError('Names of entities, notations and processing instructions may not contain colons', FName.Length);
- end;
- procedure TXMLTextReader.RaiseNameNotFound;
- begin
- if FColonPos <> -1 then
- FatalError('Bad QName syntax, local part is missing')
- else
- // Coming at no cost, this allows more user-friendly error messages
- with FSource do
- if (FBuf^ = #32) or (FBuf^ = #10) or (FBuf^ = #9) or (FBuf^ = #13) then
- FatalError('Whitespace is not allowed here')
- else
- FatalError('Name starts with invalid character');
- end;
- function TXMLTextReader.ResolvePredefined: Boolean;
- var
- wc: WideChar;
- begin
- Result := False;
- with FName do
- begin
- if (Length = 2) and (Buffer[1] = 't') then
- begin
- if Buffer[0] = 'l' then
- wc := '<'
- else if Buffer[0] = 'g' then
- wc := '>'
- else Exit;
- end
- else if Buffer[0] = 'a' then
- begin
- if (Length = 3) and (Buffer[1] = 'm') and (Buffer[2] = 'p') then
- wc := '&'
- else if (Length = 4) and (Buffer[1] = 'p') and (Buffer[2] = 'o') and
- (Buffer[3] = 's') then
- wc := ''''
- else Exit;
- end
- else if (Length = 4) and (Buffer[0] = 'q') and (Buffer[1] = 'u') and
- (Buffer[2] = 'o') and (Buffer[3] ='t') then
- wc := '"'
- else
- Exit;
- end; // with
- BufAppend(FValue, wc);
- Result := True;
- end;
- function TXMLTextReader.ParseRef(var ToFill: TWideCharBuf): Boolean; // [67]
- var
- Code: Integer;
- begin
- FSource.NextChar; // skip '&'
- Result := CheckForChar('#');
- if Result then
- begin
- Code := 0;
- if CheckForChar('x') then
- repeat
- case FSource.FBuf^ of
- '0'..'9': Code := Code * 16 + Ord(FSource.FBuf^) - Ord('0');
- 'a'..'f': Code := Code * 16 + Ord(FSource.FBuf^) - (Ord('a') - 10);
- 'A'..'F': Code := Code * 16 + Ord(FSource.FBuf^) - (Ord('A') - 10);
- else
- Break;
- end;
- FSource.NextChar;
- until Code > $10FFFF
- else
- repeat
- case FSource.FBuf^ of
- '0'..'9': Code := Code * 10 + Ord(FSource.FBuf^) - Ord('0');
- else
- Break;
- end;
- FSource.NextChar;
- until Code > $10FFFF;
- case Code of
- $01..$08, $0B..$0C, $0E..$1F:
- if FXML11 then
- BufAppend(ToFill, WideChar(Code))
- else
- FatalError('Invalid character reference');
- $09, $0A, $0D, $20..$D7FF, $E000..$FFFD:
- BufAppend(ToFill, WideChar(Code));
- $10000..$10FFFF:
- begin
- BufAppend(ToFill, WideChar($D7C0 + (Code shr 10)));
- BufAppend(ToFill, WideChar($DC00 xor (Code and $3FF)));
- end;
- else
- FatalError('Invalid character reference');
- end;
- end
- else CheckName;
- ExpectChar(';');
- end;
- const
- AttrDelims: TSetOfChar = [#0, '<', '&', '''', '"', #9, #10, #13];
- GT_Delim: TSetOfChar = [#0, '>'];
- { Parse attribute literal, producing plain string value in AttrData.FValueStr.
- If entity references are encountered and FExpandEntities=False, also builds
- a node chain starting from AttrData.FNext. Node chain is built only for the
- first level. If NonCDATA=True, additionally normalizes whitespace in string value. }
- procedure TXMLTextReader.ExpectAttValue(AttrData: PNodeData; NonCDATA: Boolean);
- var
- wc: WideChar;
- Delim: WideChar;
- ent: TEntityDecl;
- start: TObject;
- curr: PNodeData;
- StartPos: Integer;
- StartLoc: TLocation;
- entName: PHashItem;
- begin
- SkipQuote(Delim);
- AttrData^.FLoc2 := FTokenStart;
- StartLoc := FTokenStart;
- curr := AttrData;
- FValue.Length := 0;
- StartPos := 0;
- start := FSource.FEntity;
- repeat
- wc := FSource.SkipUntil(FValue, AttrDelims);
- if wc = '<' then
- FatalError('Character ''<'' is not allowed in attribute value')
- else if wc = '&' then
- begin
- if ParseRef(FValue) or ResolvePredefined then
- Continue;
- entName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
- ent := EntityCheck(True);
- if ((ent = nil) or (not FExpandEntities)) and (FSource.FEntity = start) then
- begin
- if FValue.Length > StartPos then
- begin
- AllocAttributeValueChunk(curr, StartPos);
- curr^.FLoc := StartLoc;
- end;
- AllocAttributeValueChunk(curr, FValue.Length);
- curr^.FNodeType := ntEntityReference;
- curr^.FQName := entName;
- StoreLocation(StartLoc);
- curr^.FLoc := StartLoc;
- Dec(curr^.FLoc.LinePos, FName.Length+1);
- end;
- StartPos := FValue.Length;
- if Assigned(ent) then
- ContextPush(ent);
- end
- else if wc <> #0 then
- begin
- FSource.NextChar;
- if (wc = Delim) and (FSource.FEntity = start) then
- Break;
- if (wc = #10) or (wc = #9) or (wc = #13) then
- wc := #32;
- BufAppend(FValue, wc);
- end
- else
- begin
- if (FSource.FEntity = start) or not ContextPop then // #0
- FatalError('Literal has no closing quote', -1);
- StartPos := FValue.Length;
- end;
- until False;
- if Assigned(attrData^.FNext) then
- begin
- FAttrCleanupFlag := True;
- if FValue.Length > StartPos then
- begin
- AllocAttributeValueChunk(curr, StartPos);
- curr^.FLoc := StartLoc;
- end;
- end;
- if nonCDATA then
- BufNormalize(FValue, attrData^.FDenormalized)
- else
- attrData^.FDenormalized := False;
- SetString(attrData^.FValueStr, FValue.Buffer, FValue.Length);
- end;
- const
- PrefixChar: array[Boolean] of string = ('', '%');
- procedure TXMLTextReader.EntityToSource(AEntity: TEntityDecl; out Src: TXMLCharSource);
- begin
- if AEntity.FOnStack then
- FatalError('Entity ''%s%s'' recursively references itself', [PrefixChar[AEntity.FIsPE], AEntity.FName]);
- if (AEntity.FSystemID <> '') and not AEntity.FPrefetched then
- begin
- if not ResolveResource(AEntity.FSystemID, AEntity.FPublicID, AEntity.FURI, Src) then
- begin
- // TODO: a detailed message like SysErrorMessage(GetLastError) would be great here
- ValidationError('Unable to resolve external entity ''%s''', [AEntity.FName]);
- Src := nil;
- Exit;
- end;
- end
- else
- begin
- Src := TXMLCharSource.Create(AEntity.FReplacementText);
- Src.FLineNo := AEntity.FStartLocation.Line;
- Src.LFPos := Src.FBuf - AEntity.FStartLocation.LinePos;
- // needed in case of prefetched external PE
- if AEntity.FSystemID <> '' then
- Src.SourceURI := AEntity.FURI;
- end;
- AEntity.FOnStack := True;
- Src.FEntity := AEntity;
- end;
- function TXMLTextReader.ContextPush(AEntity: TEntityDecl; DummySource: Boolean): Boolean;
- var
- Src: TXMLCharSource;
- begin
- Src := nil;
- if Assigned(AEntity) then
- EntityToSource(AEntity, Src);
- if (Src = nil) and DummySource then
- begin
- Src := TXMLCharSource.Create('');
- if FExpandEntities then
- Src.Kind := skManualPop;
- end;
- Result := Assigned(Src);
- if Result then
- begin
- SetSource(Src);
- Src.Initialize;
- end;
- end;
- function TXMLTextReader.ContextPop(Forced: Boolean): Boolean;
- var
- Src: TXMLCharSource;
- Error: Boolean;
- begin
- Result := Assigned(FSource.FParent) and (Forced or (FSource.Kind = skNone));
- if Result then
- begin
- Src := FSource.FParent;
- Error := False;
- if Assigned(FSource.FEntity) then
- begin
- FSource.FEntity.FOnStack := False;
- FSource.FEntity.FCharCount := FSource.FCharCount;
- // [28a] PE that was started between MarkupDecls may not end inside MarkupDecl
- Error := FSource.FEntity.FBetweenDecls and FInsideDecl;
- end;
- FSource.Free;
- FSource := Src;
- // correct position of this error is after PE reference
- if Error then
- FatalError('Parameter entities must be properly nested');
- end;
- end;
- function TXMLTextReader.EntityCheck(NoExternals: Boolean): TEntityDecl;
- var
- RefName: XMLString;
- cnt: Integer;
- begin
- Result := nil;
- SetString(RefName, FName.Buffer, FName.Length);
- cnt := FName.Length+2;
- if Assigned(FDocType) then
- Result := FDocType.Entities.Get(FName.Buffer, FName.Length) as TEntityDecl;
- if Result = nil then
- begin
- if FStandalone or (FDocType = nil) or not (FHavePERefs or (FDocType.FSystemID <> '')) then
- FatalError('Reference to undefined entity ''%s''', [RefName], cnt)
- else
- ValidationError('Undefined entity ''%s'' referenced', [RefName], cnt);
- Exit;
- end;
- if FStandalone and Result.ExternallyDeclared then
- FatalError('Standalone constraint violation', cnt);
- if Result.FNotationName <> '' then
- FatalError('Reference to unparsed entity ''%s''', [RefName], cnt);
- if NoExternals and (Result.FSystemID <> '') then
- FatalError('External entity reference is not allowed in attribute value', cnt);
- if not Result.FResolved then
- if Assigned(FOnEntity) then
- FOnEntity(Self, Result);
- // at this point we know the charcount of the entity being included
- if Result.FCharCount >= cnt then
- CheckMaxChars(Result.FCharCount - cnt);
- end;
- procedure TXMLTextReader.StartPE;
- var
- PEnt: TEntityDecl;
- begin
- FSource.NextChar; // skip '%'
- CheckName;
- ExpectChar(';');
- if (FSource.Kind = skInternalSubset) and FInsideDecl then
- FatalError('Parameter entity references cannot appear inside markup declarations in internal subset', FName.Length+2);
- PEnt := nil;
- if Assigned(FPEMap) then
- PEnt := FPEMap.Get(FName.Buffer, FName.Length) as TEntityDecl;
- if PEnt = nil then
- begin
- ValidationErrorWithName('Undefined parameter entity ''%s'' referenced', FName.Length+2);
- // cease processing declarations, unless document is standalone.
- FDTDProcessed := FStandalone;
- Exit;
- end;
- { cache an external PE so it's only fetched once }
- if (PEnt.FSystemID <> '') and (not PEnt.FPrefetched) and (not PrefetchEntity(PEnt)) then
- begin
- FDTDProcessed := FStandalone;
- Exit;
- end;
- CheckMaxChars(PEnt.FCharCount);
- PEnt.FBetweenDecls := not FInsideDecl;
- ContextPush(PEnt);
- FHavePERefs := True;
- end;
- function TXMLTextReader.PrefetchEntity(AEntity: TEntityDecl): Boolean;
- begin
- Result := ContextPush(AEntity);
- if Result then
- try
- FValue.Length := 0;
- FSource.SkipUntil(FValue, [#0]);
- SetString(AEntity.FReplacementText, FValue.Buffer, FValue.Length);
- AEntity.FCharCount := FValue.Length;
- AEntity.FStartLocation.Line := 1;
- AEntity.FStartLocation.LinePos := 1;
- AEntity.FURI := FSource.SourceURI; // replace base URI with absolute one
- finally
- ContextPop;
- AEntity.FPrefetched := True;
- FValue.Length := 0;
- end;
- end;
- const
- LiteralDelims: array[TLiteralType] of TSetOfChar = (
- [#0, '''', '"'], // ltPlain
- [#0, '''', '"', #13, #10], // ltPubid
- [#0, '%', '&', '''', '"'] // ltEntity
- );
- function TXMLTextReader.ParseLiteral(var ToFill: TWideCharBuf; aType: TLiteralType;
- Required: Boolean): Boolean;
- var
- start: TObject;
- wc, Delim: WideChar;
- dummy: Boolean;
- begin
- SkipQuote(Delim, Required);
- Result := (Delim <> #0);
- if not Result then
- Exit;
- ToFill.Length := 0;
- start := FSource.FEntity;
- repeat
- wc := FSource.SkipUntil(ToFill, LiteralDelims[aType]);
- if wc = '%' then { ltEntity only }
- StartPE
- else if wc = '&' then { ltEntity }
- begin
- if ParseRef(ToFill) then // charRefs always expanded
- Continue;
- BufAppend(ToFill, '&');
- BufAppendChunk(ToFill, FName.Buffer, FName.Buffer + FName.Length);
- BufAppend(ToFill, ';');
- end
- else if wc <> #0 then
- begin
- FSource.NextChar;
- if (wc = #10) or (wc = #13) then
- wc := #32
- // terminating delimiter must be in the same context as the starting one
- else if (wc = Delim) and (start = FSource.FEntity) then
- Break;
- BufAppend(ToFill, wc);
- end
- else if (FSource.FEntity = start) or not ContextPop then // #0
- FatalError('Literal has no closing quote', -1);
- until False;
- if aType = ltPubid then
- BufNormalize(ToFill, dummy);
- end;
- function TXMLTextReader.SkipUntilSeq(const Delim: TSetOfChar; c1: WideChar): Boolean;
- var
- wc: WideChar;
- begin
- Result := False;
- StoreLocation(FTokenStart);
- repeat
- wc := FSource.SkipUntil(FValue, Delim);
- if wc <> #0 then
- begin
- FSource.NextChar;
- if (FValue.Length > 0) then
- begin
- if (FValue.Buffer[FValue.Length-1] = c1) then
- begin
- Dec(FValue.Length);
- Result := True;
- Exit;
- end;
- end;
- BufAppend(FValue, wc);
- end;
- until wc = #0;
- end;
- procedure TXMLTextReader.ParseComment(discard: Boolean); // [15]
- var
- SaveLength: Integer;
- begin
- ExpectString('--');
- SaveLength := FValue.Length;
- if not SkipUntilSeq([#0, '-'], '-') then
- FatalError('Unterminated comment', -1);
- ExpectChar('>');
- if not discard then
- begin
- FCurrNode := @FNodeStack[FNesting];
- FCurrNode^.FNodeType := ntComment;
- FCurrNode^.FQName := nil;
- FCurrNode^.FValueStart := @FValue.Buffer[SaveLength];
- FCurrNode^.FValueLength := FValue.Length-SaveLength;
- end;
- FValue.Length := SaveLength;
- end;
- procedure TXMLTextReader.ParsePI; // [16]
- begin
- FSource.NextChar; // skip '?'
- CheckName;
- CheckNCName;
- with FName do
- if (Length = 3) and
- ((Buffer[0] = 'X') or (Buffer[0] = 'x')) and
- ((Buffer[1] = 'M') or (Buffer[1] = 'm')) and
- ((Buffer[2] = 'L') or (Buffer[2] = 'l')) then
- begin
- if not BufEquals(FName, 'xml') then
- FatalError('''xml'' is a reserved word; it must be lowercase', FName.Length)
- else
- FatalError('XML declaration is not allowed here', FName.Length);
- end;
- if FSource.FBuf^ <> '?' then
- SkipS(True);
- FValue.Length := 0;
- if not SkipUntilSeq(GT_Delim, '?') then
- FatalError('Unterminated processing instruction', -1);
- SetNodeInfoWithValue(ntProcessingInstruction,
- FNameTable.FindOrAdd(FName.Buffer, FName.Length));
- end;
- const
- vers: array[Boolean] of TXMLVersion = (xmlVersion10, xmlVersion11);
- procedure TXMLTextReader.ParseXmlOrTextDecl(TextDecl: Boolean);
- var
- Delim: WideChar;
- buf: array[0..31] of WideChar;
- I: Integer;
- begin
- SkipS(True);
- // [24] VersionInfo: optional in TextDecl, required in XmlDecl
- if (not TextDecl) or (FSource.FBuf^ = 'v') then
- begin
- ExpectString('version');
- ExpectEq;
- SkipQuote(Delim);
- { !! Definition "VersionNum ::= '1.' [0-9]+" per XML 1.0 Fifth Edition
- implies that version literal can have unlimited length. }
- I := 0;
- while (I < 3) and (FSource.FBuf^ <> Delim) do
- begin
- buf[I] := FSource.FBuf^;
- Inc(I);
- FSource.NextChar;
- end;
- if (I <> 3) or (buf[0] <> '1') or (buf[1] <> '.') or
- (buf[2] < '0') or (buf[2] > '9') then
- FatalError('Illegal version number', -1);
- ExpectChar(Delim);
- FSource.FXMLVersion := vers[buf[2] = '1'];
- if TextDecl and (FSource.FXMLVersion = xmlVersion11) and not FXML11 then
- FatalError('XML 1.0 document cannot invoke XML 1.1 entities', -1);
- if TextDecl or (FSource.FBuf^ <> '?') then
- SkipS(True);
- end;
- // [80] EncodingDecl: required in TextDecl, optional in XmlDecl
- if TextDecl or (FSource.FBuf^ = 'e') then
- begin
- ExpectString('encoding');
- ExpectEq;
- SkipQuote(Delim);
- I := 0;
- while (I < 30) and (FSource.FBuf^ <> Delim) and (FSource.FBuf^ < #127) and
- ((AnsiChar(ord(FSource.FBuf^)) in ['A'..'Z', 'a'..'z']) or
- ((I > 0) and (AnsiChar(ord(FSource.FBuf^)) in ['0'..'9', '.', '-', '_']))) do
- begin
- buf[I] := FSource.FBuf^;
- Inc(I);
- FSource.NextChar;
- end;
- if not CheckForChar(Delim) then
- FatalError('Illegal encoding name', i);
- SetString(FSource.FXMLEncoding, buf, i);
- if not FSource.SetEncoding(FSource.FXMLEncoding) then // <-- Wide2Ansi conversion here
- FatalError('Encoding ''%s'' is not supported', [FSource.FXMLEncoding], i+1);
- if FSource.FBuf^ <> '?' then
- SkipS(not TextDecl);
- end;
- // [32] SDDecl: forbidden in TextDecl, optional in XmlDecl
- if (not TextDecl) and (FSource.FBuf^ = 's') then
- begin
- ExpectString('standalone');
- ExpectEq;
- SkipQuote(Delim);
- if FSource.Matches('yes') then
- FStandalone := True
- else if not FSource.Matches('no') then
- FatalError('Only "yes" or "no" are permitted as values of "standalone"', -1);
- ExpectChar(Delim);
- SkipS;
- end;
- ExpectString('?>');
- { Switch to 1.1 rules only after declaration is parsed completely. This is to
- ensure that NEL and LSEP within declaration are rejected (rmt-056, rmt-057) }
- if FSource.FXMLVersion = xmlVersion11 then
- FXML11 := True;
- end;
- procedure TXMLTextReader.DTDReloadHook;
- var
- p: PWideChar;
- begin
- { FSource converts CR, NEL and LSEP linebreaks to LF, and CR-NEL sequences to CR-LF.
- We must further remove the CR chars and have only LF's left. }
- p := FDTDStartPos;
- while p < FSource.FBuf do
- begin
- while (p < FSource.FBuf) and (p^ <> #13) do
- Inc(p);
- BufAppendChunk(FIntSubset, FDTDStartPos, p);
- if p^ = #13 then
- Inc(p);
- FDTDStartPos := p;
- end;
- FDTDStartPos := TXMLDecodingSource(FSource).FBufStart;
- end;
- procedure TXMLTextReader.ParseDoctypeDecl; // [28]
- var
- Src: TXMLCharSource;
- DTDName: PHashItem;
- Locs: array [0..2] of TLocation;
- HasAtts: Boolean;
- begin
- if FState >= rsDTD then
- FatalError('Markup declaration is not allowed here');
- if FDisallowDoctype then
- FatalError('Document type is prohibited by parser settings');
- ExpectString('DOCTYPE');
- SkipS(True);
- FDocType := TDTDModel.Create(FNameTable);
- FDTDProcessed := True; // assume success
- FState := rsDTD;
- CheckName;
- SetString(FDocType.FName, FName.Buffer, FName.Length);
- DTDName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
- if SkipS then
- begin
- StoreLocation(Locs[0]);
- HasAtts := ParseExternalID(FDocType.FSystemID, FDocType.FPublicID, Locs[1], False);
- if HasAtts then
- Locs[2] := FTokenStart;
- SkipS;
- end;
- if CheckForChar('[') then
- begin
- BufAllocate(FIntSubset, 256);
- FSource.Kind := skInternalSubset;
- try
- FDTDStartPos := FSource.FBuf;
- ParseMarkupDecl;
- DTDReloadHook; // fetch last chunk
- SetString(FDocType.FInternalSubset, FIntSubset.Buffer, FIntSubset.Length);
- finally
- FreeMem(FIntSubset.Buffer);
- FSource.Kind := skNone;
- end;
- ExpectChar(']');
- SkipS;
- end;
- ExpectChar('>');
- if (FDocType.FSystemID <> '') then
- begin
- if ResolveResource(FDocType.FSystemID, FDocType.FPublicID, FSource.SourceURI, Src) then
- begin
- SetSource(Src);
- Src.Initialize;
- try
- Src.Kind := skManualPop;
- ParseMarkupDecl;
- finally
- ContextPop(True);
- end;
- end
- else
- begin
- ValidationError('Unable to resolve external DTD subset', []);
- FDTDProcessed := FStandalone;
- end;
- end;
- FState := rsAfterDTD;
- FValue.Length := 0;
- BufAppendString(FValue, FDocType.FInternalSubset);
- SetNodeInfoWithValue(ntDocumentType, DTDName);
- if HasAtts then
- begin
- if FDocType.FPublicID <> '' then
- AddPseudoAttribute(FNameTable.FindOrAdd('PUBLIC'), FDocType.FPublicID, Locs[0], Locs[1]);
- AddPseudoAttribute(FNameTable.FindOrAdd('SYSTEM'), FDocType.FSystemID, Locs[0], Locs[2]);
- end;
- end;
- procedure TXMLTextReader.ExpectEq; // [25]
- begin
- if FSource.FBuf^ <> '=' then
- SkipS;
- if FSource.FBuf^ <> '=' then
- FatalError('Expected "="');
- FSource.NextChar;
- SkipS;
- end;
- { DTD stuff }
- procedure TXMLTextReader.CheckPENesting(aExpected: TObject);
- begin
- if FSource.FEntity <> aExpected then
- ValidationError('Parameter entities must be properly nested', [], 0);
- end;
- function TXMLTextReader.ParseQuantity: TCPQuant;
- begin
- case FSource.FBuf^ of
- '?': Result := cqZeroOrOnce;
- '*': Result := cqZeroOrMore;
- '+': Result := cqOnceOrMore;
- else
- Result := cqOnce;
- Exit;
- end;
- FSource.NextChar;
- end;
- function TXMLTextReader.FindOrCreateElDef: TElementDecl;
- var
- p: PHashItem;
- begin
- CheckName;
- p := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
- Result := TElementDecl(p^.Data);
- if Result = nil then
- begin
- Result := TElementDecl.Create;
- p^.Data := Result;
- end;
- end;
- procedure TXMLTextReader.ExpectChoiceOrSeq(CP: TContentParticle; MustEndIn: TObject); // [49], [50]
- var
- Delim: WideChar;
- CurrentCP: TContentParticle;
- begin
- Delim := #0;
- repeat
- CurrentCP := CP.Add;
- SkipWhitespace;
- if CheckForChar('(') then
- ExpectChoiceOrSeq(CurrentCP, FSource.FEntity)
- else
- CurrentCP.Def := FindOrCreateElDef;
- CurrentCP.CPQuant := ParseQuantity;
- SkipWhitespace;
- if FSource.FBuf^ = ')' then
- Break;
- if Delim = #0 then
- begin
- if (FSource.FBuf^ = '|') or (FSource.FBuf^ = ',') then
- Delim := FSource.FBuf^
- else
- FatalError('Expected pipe or comma delimiter');
- end
- else
- if FSource.FBuf^ <> Delim then
- FatalError(Delim);
- FSource.NextChar; // skip delimiter
- until False;
- CheckPENesting(MustEndIn);
- FSource.NextChar;
- if Delim = '|' then
- CP.CPType := ctChoice
- else
- CP.CPType := ctSeq; // '(foo)' is a sequence!
- end;
- procedure TXMLTextReader.ParseElementDecl; // [45]
- var
- ElDef: TElementDecl;
- CurrentEntity: TObject;
- I: Integer;
- CP: TContentParticle;
- Typ: TElementContentType;
- ExtDecl: Boolean;
- begin
- CP := nil;
- Typ := ctUndeclared; // satisfy compiler
- ExpectWhitespace;
- ElDef := FindOrCreateElDef;
- if ElDef.ContentType <> ctUndeclared then
- ValidationErrorWithName('Duplicate declaration of element ''%s''', FName.Length);
- ExtDecl := FSource.Kind <> skInternalSubset;
- ExpectWhitespace;
- if FSource.Matches('EMPTY') then
- Typ := ctEmpty
- else if FSource.Matches('ANY') then
- Typ := ctAny
- else if CheckForChar('(') then
- begin
- CP := TContentParticle.Create;
- try
- CurrentEntity := FSource.FEntity;
- SkipWhitespace;
- if FSource.Matches('#PCDATA') then // Mixed section [51]
- begin
- SkipWhitespace;
- Typ := ctMixed;
- while FSource.FBuf^ <> ')' do
- begin
- ExpectChar('|');
- SkipWhitespace;
- with CP.Add do
- begin
- Def := FindOrCreateElDef;
- for I := CP.ChildCount-2 downto 0 do
- if Def = CP.Children[I].Def then
- ValidationError('Duplicate token in mixed section', [], FName.Length);
- end;
- SkipWhitespace;
- end;
- CheckPENesting(CurrentEntity);
- FSource.NextChar;
- if (not CheckForChar('*')) and (CP.ChildCount > 0) then
- FatalError(WideChar('*'));
- CP.CPQuant := cqZeroOrMore;
- CP.CPType := ctChoice;
- end
- else // Children section [47]
- begin
- Typ := ctChildren;
- ExpectChoiceOrSeq(CP, CurrentEntity);
- CP.CPQuant := ParseQuantity;
- end;
- except
- CP.Free;
- raise;
- end;
- end
- else
- FatalError('Invalid content specification');
- if FDTDProcessed and (ElDef.ContentType = ctUndeclared) then
- begin
- ElDef.ExternallyDeclared := ExtDecl;
- ElDef.ContentType := Typ;
- ElDef.RootCP := CP;
- end
- else
- CP.Free;
- end;
- procedure TXMLTextReader.ParseNotationDecl; // [82]
- var
- NameStr, SysID, PubID: XMLString;
- Notation: TNotationDecl;
- Entry: PHashItem;
- Src: TXMLCharSource;
- dummy: TLocation;
- begin
- Src := FSource;
- ExpectWhitespace;
- CheckName;
- CheckNCName;
- SetString(NameStr, FName.Buffer, FName.Length);
- ExpectWhitespace;
- if not ParseExternalID(SysID, PubID, dummy, True) then
- FatalError('Expected external or public ID');
- if FDTDProcessed then
- begin
- Entry := FDocType.Notations.FindOrAdd(NameStr);
- if Entry^.Data = nil then
- begin
- Notation := TNotationDecl.Create;
- Notation.FName := NameStr;
- Notation.FPublicID := PubID;
- Notation.FSystemID := SysID;
- Notation.FURI := Src.SourceURI;
- Entry^.Data := Notation;
- end
- else
- ValidationError('Duplicate notation declaration: ''%s''', [NameStr]);
- end;
- end;
- const
- AttrDataTypeNames: array[TAttrDataType] of XMLString = (
- 'CDATA',
- 'ID',
- 'IDREF',
- 'IDREFS',
- 'ENTITY',
- 'ENTITIES',
- 'NMTOKEN',
- 'NMTOKENS',
- 'NOTATION'
- );
- procedure TXMLTextReader.ParseAttlistDecl; // [52]
- var
- ElDef: TElementDecl;
- AttDef: TAttributeDef;
- dt: TAttrDataType;
- Found, DiscardIt: Boolean;
- Offsets: array [Boolean] of Integer;
- attrName: PHashItem;
- begin
- ExpectWhitespace;
- ElDef := FindOrCreateElDef;
- SkipWhitespace;
- while FSource.FBuf^ <> '>' do
- begin
- CheckName;
- ExpectWhitespace;
- attrName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
- AttDef := TAttributeDef.Create(attrName, FColonPos);
- try
- AttDef.ExternallyDeclared := FSource.Kind <> skInternalSubset;
- // In case of duplicate declaration of the same attribute, we must discard it,
- // not modifying ElDef, and suppressing certain validation errors.
- DiscardIt := (not FDTDProcessed) or Assigned(ElDef.GetAttrDef(attrName));
- if CheckForChar('(') then // [59]
- begin
- AttDef.DataType := dtNmToken;
- repeat
- SkipWhitespace;
- CheckName([cnToken]);
- if not AttDef.AddEnumToken(FName.Buffer, FName.Length) then
- ValidationError('Duplicate token in enumerated attribute declaration', [], FName.Length);
- SkipWhitespace;
- until not CheckForChar('|');
- ExpectChar(')');
- ExpectWhitespace;
- end
- else
- begin
- StoreLocation(FTokenStart);
- // search topside-up so that e.g. NMTOKENS is matched before NMTOKEN
- for dt := dtNotation downto dtCData do
- begin
- Found := FSource.Matches(AttrDataTypeNames[dt]);
- if Found then
- Break;
- end;
- if Found and SkipWhitespace then
- begin
- AttDef.DataType := dt;
- if (dt = dtId) and not DiscardIt then
- begin
- if Assigned(ElDef.IDAttr) then
- ValidationError('Only one attribute of type ID is allowed per element',[])
- else
- ElDef.IDAttr := AttDef;
- end
- else if dt = dtNotation then // no test cases for these ?!
- begin
- if not DiscardIt then
- begin
- if Assigned(ElDef.NotationAttr) then
- ValidationError('Only one attribute of type NOTATION is allowed per element',[])
- else
- ElDef.NotationAttr := AttDef;
- if ElDef.ContentType = ctEmpty then
- ValidationError('NOTATION attributes are not allowed on EMPTY elements',[]);
- end;
- ExpectChar('(');
- repeat
- SkipWhitespace;
- StoreLocation(FTokenStart);
- CheckName;
- CheckNCName;
- if not AttDef.AddEnumToken(FName.Buffer, FName.Length) then
- ValidationError('Duplicate token in NOTATION attribute declaration',[], FName.Length);
- if (not DiscardIt) and FValidate and
- (FDocType.Notations.Get(FName.Buffer,FName.Length)=nil) then
- AddForwardRef(FName.Buffer, FName.Length);
- SkipWhitespace;
- until not CheckForChar('|');
- ExpectChar(')');
- ExpectWhitespace;
- end;
- end
- else
- begin
- // don't report 'expected whitespace' if token does not match completely
- Offsets[False] := 0;
- Offsets[True] := Length(AttrDataTypeNames[dt]);
- if Found and (FSource.FBuf^ < 'A') then
- ExpectWhitespace
- else
- FatalError('Illegal attribute type for ''%s''', [attrName^.Key], Offsets[Found]);
- end;
- end;
- StoreLocation(FTokenStart);
- if FSource.Matches('#REQUIRED') then
- AttDef.Default := adRequired
- else if FSource.Matches('#IMPLIED') then
- AttDef.Default := adImplied
- else if FSource.Matches('#FIXED') then
- begin
- AttDef.Default := adFixed;
- ExpectWhitespace;
- end
- else
- AttDef.Default := adDefault;
- if AttDef.Default in [adDefault, adFixed] then
- begin
- if AttDef.DataType = dtId then
- ValidationError('An attribute of type ID cannot have a default value',[]);
- // See comments to valid-sa-094: PE expansion should be disabled in AttDef.
- ExpectAttValue(AttDef.Data, dt <> dtCDATA);
- if not AttDef.ValidateSyntax(AttDef.Data^.FValueStr, FNamespaces) then
- ValidationError('Default value for attribute ''%s'' has wrong syntax', [attrName^.Key]);
- end;
- if DiscardIt then
- AttDef.Free
- else
- ElDef.AddAttrDef(AttDef);
- except
- AttDef.Free;
- raise;
- end;
- SkipWhitespace;
- end;
- end;
- procedure TXMLTextReader.ParseEntityDecl; // [70]
- var
- IsPE, Exists: Boolean;
- Entity: TEntityDecl;
- Map: THashTable;
- Item: PHashItem;
- dummy: TLocation;
- begin
- Entity := TEntityDecl.Create;
- try
- Entity.ExternallyDeclared := FSource.Kind <> skInternalSubset;
- Entity.FURI := FSource.SourceURI;
- if not SkipWhitespace(True) then
- FatalError('Expected whitespace');
- IsPE := CheckForChar('%');
- if IsPE then // [72]
- begin
- ExpectWhitespace;
- if FPEMap = nil then
- FPEMap := THashTable.Create(64, True);
- Map := FPEMap;
- end
- else
- Map := FDocType.Entities;
- Entity.FIsPE := IsPE;
- CheckName;
- CheckNCName;
- Item := Map.FindOrAdd(FName.Buffer, FName.Length, Exists);
- ExpectWhitespace;
- if FEntityValue.Buffer = nil then
- BufAllocate(FEntityValue, 256);
- if ParseLiteral(FEntityValue, ltEntity, False) then
- begin
- SetString(Entity.FReplacementText, FEntityValue.Buffer, FEntityValue.Length);
- Entity.FCharCount := FEntityValue.Length;
- Entity.FStartLocation := FTokenStart;
- end
- else
- begin
- if not ParseExternalID(Entity.FSystemID, Entity.FPublicID, dummy, False) then
- FatalError('Expected entity value or external ID');
- if not IsPE then // [76]
- begin
- if FSource.FBuf^ <> '>' then
- ExpectWhitespace;
- if FSource.Matches('NDATA') then
- begin
- ExpectWhitespace;
- StoreLocation(FTokenStart); { needed for AddForwardRef }
- CheckName;
- SetString(Entity.FNotationName, FName.Buffer, FName.Length);
- if FValidate and (FDocType.Notations.Get(FName.Buffer, FName.Length)=nil) then
- AddForwardRef(FName.Buffer, FName.Length);
- end;
- end;
- end;
- except
- Entity.Free;
- raise;
- end;
- // Repeated declarations of same entity are legal but must be ignored
- if FDTDProcessed and not Exists then
- begin
- Item^.Data := Entity;
- Entity.FName := Item^.Key;
- end
- else
- Entity.Free;
- end;
- procedure TXMLTextReader.ParseIgnoreSection;
- var
- IgnoreLoc: TLocation;
- IgnoreLevel: Integer;
- wc: WideChar;
- begin
- StoreLocation(IgnoreLoc);
- IgnoreLevel := 1;
- repeat
- FValue.Length := 0;
- wc := FSource.SkipUntil(FValue, [#0, '<', ']']);
- if FSource.Matches('<![') then
- Inc(IgnoreLevel)
- else if FSource.Matches(']]>') then
- Dec(IgnoreLevel)
- else if wc <> #0 then
- FSource.NextChar
- else // PE's aren't recognized in ignore section, cannot ContextPop()
- DoErrorPos(esFatal, 'IGNORE section is not closed', IgnoreLoc);
- until IgnoreLevel=0;
- end;
- procedure TXMLTextReader.ParseMarkupDecl; // [29]
- var
- IncludeLevel: Integer;
- CurrentEntity: TObject;
- IncludeLoc: TLocation;
- CondType: (ctUnknown, ctInclude, ctIgnore);
- begin
- IncludeLevel := 0;
- repeat
- SkipWhitespace;
- if (FSource.FBuf^ = ']') and (IncludeLevel > 0) then
- begin
- ExpectString(']]>');
- Dec(IncludeLevel);
- Continue;
- end;
- if not CheckForChar('<') then
- Break;
- CurrentEntity := FSource.FEntity;
- if FSource.FBuf^ = '?' then
- begin
- ParsePI;
- end
- else
- begin
- ExpectChar('!');
- if FSource.FBuf^ = '-' then
- ParseComment(True)
- else if CheckForChar('[') then
- begin
- if FSource.Kind = skInternalSubset then
- FatalError('Conditional sections are not allowed in internal subset', 1);
- SkipWhitespace;
- CondType := ctUnknown; // satisfy compiler
- if FSource.Matches('INCLUDE') then
- CondType := ctInclude
- else if FSource.Matches('IGNORE') then
- CondType := ctIgnore
- else
- FatalError('Expected "INCLUDE" or "IGNORE"');
- SkipWhitespace;
- CheckPENesting(CurrentEntity);
- ExpectChar('[');
- if CondType = ctInclude then
- begin
- if IncludeLevel = 0 then
- StoreLocation(IncludeLoc);
- Inc(IncludeLevel);
- end
- else if CondType = ctIgnore then
- ParseIgnoreSection;
- end
- else
- begin
- FInsideDecl := True;
- if FSource.Matches('ELEMENT') then
- ParseElementDecl
- else if FSource.Matches('ENTITY') then
- ParseEntityDecl
- else if FSource.Matches('ATTLIST') then
- ParseAttlistDecl
- else if FSource.Matches('NOTATION') then
- ParseNotationDecl
- else
- FatalError('Illegal markup declaration');
- SkipWhitespace;
- CheckPENesting(CurrentEntity);
- ExpectChar('>');
- FInsideDecl := False;
- end;
- end;
- until False;
- if IncludeLevel > 0 then
- DoErrorPos(esFatal, 'INCLUDE section is not closed', IncludeLoc);
- if FSource.FBuf < FSource.FBufEnd then
- if (FSource.Kind <> skInternalSubset) or (FSource.FBuf^ <> ']') then
- FatalError('Illegal character in DTD');
- end;
- procedure TXMLTextReader.ParseDTD;
- begin
- FSource.Initialize;
- ParseMarkupDecl;
- end;
- procedure TXMLTextReader.Close;
- begin
- FReadState := rsClosed;
- FTokenStart.Line := 0;
- FTokenStart.LinePos := 0;
- end;
- function TXMLTextReader.GetAttributeCount: Integer;
- begin
- result := FAttrCount;
- end;
- function TXMLTextReader.GetAttribute(i: Integer): XMLString;
- begin
- if (i < 0) or (i >= FAttrCount) then
- raise EArgumentOutOfRangeException.Create('index');
- result := FNodeStack[FNesting+i+1].FValueStr;
- end;
- function TXMLTextReader.GetAttribute(const AName: XMLString): XMLString;
- var
- i: Integer;
- p: PHashItem;
- begin
- p := FNameTable.Find(PWideChar(AName), Length(AName));
- if Assigned(p) then
- for i := 1 to FAttrCount do
- if FNodeStack[FNesting+i].FQName = p then
- begin
- result := FNodeStack[FNesting+i].FValueStr;
- Exit;
- end;
- result := '';
- end;
- function TXMLTextReader.GetAttribute(const aLocalName, nsuri: XMLString): XMLString;
- var
- i: Integer;
- p: PWideChar;
- p1: PHashItem;
- node: PNodeData;
- begin
- p1 := FNameTable.Find(PWideChar(nsuri), Length(nsuri));
- if Assigned(p1) then
- for i := 1 to FAttrCount do
- begin
- node := @FNodeStack[FNesting+i];
- if node^.FNsUri = p1 then
- begin
- P := PWideChar(node^.FQName^.Key);
- if node^.FColonPos > 0 then
- Inc(P, node^.FColonPos+1);
- if (Length(node^.FQName^.Key)-node^.FColonPos-1 = Length(aLocalName)) and
- CompareMem(P, PWideChar(aLocalName), Length(aLocalName)*sizeof(WideChar)) then
- begin
- result := node^.FValueStr;
- Exit;
- end;
- end;
- end;
- result := '';
- end;
- function TXMLTextReader.GetDepth: Integer;
- begin
- result := FNesting;
- if FCurrAttrIndex >= 0 then
- Inc(result);
- if FAttrReadState <> arsNone then
- Inc(result);
- end;
- function TXMLTextReader.GetNameTable: THashTable;
- begin
- result := FNameTable;
- end;
- function TXMLTextReader.GetNodeType: TXmlNodeType;
- begin
- result := FCurrNode^.FNodeType;
- end;
- function TXMLTextReader.GetName: XMLString;
- begin
- if Assigned(FCurrNode^.FQName) then
- result := FCurrNode^.FQName^.Key
- else
- result := '';
- end;
- function TXMLTextReader.GetIsDefault: Boolean;
- begin
- result := FCurrNode^.FIsDefault;
- end;
- function TXMLTextReader.GetBaseUri: XMLString;
- begin
- result := FSource.SourceURI;
- end;
- function TXMLTextReader.GetXmlVersion: TXMLVersion;
- begin
- result := FSource.FXMLVersion;
- end;
- function TXMLTextReader.GetXmlEncoding: XMLString;
- begin
- result := FSource.FXMLEncoding;
- end;
- { IXmlLineInfo methods }
- function TXMLTextReader.GetHasLineInfo: Boolean;
- begin
- result := True;
- end;
- function TXMLTextReader.GetLineNumber: Integer;
- begin
- if (FCurrNode^.FNodeType in [ntElement,ntAttribute,ntEntityReference,ntEndEntity]) or (FAttrReadState <> arsNone) then
- result := FCurrNode^.FLoc.Line
- else
- result := FTokenStart.Line;
- end;
- function TXMLTextReader.GetLinePosition: Integer;
- begin
- if (FCurrNode^.FNodeType in [ntElement,ntAttribute,ntEntityReference,ntEndEntity]) or (FAttrReadState <> arsNone) then
- result := FCurrNode^.FLoc.LinePos
- else
- result := FTokenStart.LinePos;
- end;
- function TXMLTextReader.CurrentNodePtr: PPNodeData;
- begin
- result := @FCurrNode;
- end;
- function TXMLTextReader.LookupNamespace(const APrefix: XMLString): XMLString;
- begin
- if Assigned(FNSHelper) then
- result := FNSHelper.LookupNamespace(APrefix)
- else
- result := '';
- end;
- function TXMLTextReader.MoveToFirstAttribute: Boolean;
- begin
- result := False;
- if FAttrCount = 0 then
- exit;
- FCurrAttrIndex := 0;
- if FAttrReadState <> arsNone then
- CleanAttrReadState;
- FCurrNode := @FNodeStack[FNesting+1];
- result := True;
- end;
- function TXMLTextReader.MoveToNextAttribute: Boolean;
- begin
- result := False;
- if FCurrAttrIndex+1 >= FAttrCount then
- exit;
- Inc(FCurrAttrIndex);
- if FAttrReadState <> arsNone then
- CleanAttrReadState;
- FCurrNode := @FNodeStack[FNesting+1+FCurrAttrIndex];
- result := True;
- end;
- function TXMLTextReader.MoveToElement: Boolean;
- begin
- result := False;
- if FAttrReadState <> arsNone then
- CleanAttrReadState
- else if FCurrNode^.FNodeType <> ntAttribute then
- exit;
- FCurrNode := @FNodeStack[FNesting];
- FCurrAttrIndex := -1;
- result := True;
- end;
- function TXMLTextReader.ReadAttributeValue: Boolean;
- var
- attrNode: PNodeData;
- begin
- Result := False;
- if FAttrReadState = arsNone then
- begin
- if (FReadState <> rsInteractive) or (FCurrAttrIndex < 0) then
- Exit;
- attrNode := @FNodeStack[FNesting+FCurrAttrIndex+1];
- if attrNode^.FNext = nil then
- begin
- if attrNode^.FValueStr = '' then
- Exit; { we don't want to expose empty textnodes }
- FCurrNode := AllocNodeData(FNesting+FAttrCount+1);
- FCurrNode^.FNodeType := ntText;
- FCurrNode^.FValueStr := attrNode^.FValueStr;
- FCurrNode^.FLoc := attrNode^.FLoc2;
- end
- else
- FCurrNode := attrNode^.FNext;
- FAttrReadState := arsText;
- FAttrBaseSource := FSource;
- Result := True;
- end
- else // already reading, advance to next chunk
- begin
- if FSource = FAttrBaseSource then
- begin
- Result := Assigned(FCurrNode^.FNext);
- if Result then
- FCurrNode := FCurrNode^.FNext;
- end
- else
- begin
- NextAttrValueChunk;
- Result := True;
- end;
- end;
- end;
- procedure TXMLTextReader.NextAttrValueChunk;
- var
- wc: WideChar;
- tok: TAttributeReadState;
- begin
- if FAttrReadState = arsPushEntity then
- begin
- Inc(FNesting);
- { make sure that the location is available }
- AllocNodeData(FNesting+FAttrCount+1);
- FAttrReadState := arsText;
- end;
- FCurrNode := @FNodeStack[FNesting+FAttrCount+1];
- StoreLocation(FCurrNode^.FLoc);
- FValue.Length := 0;
- if FAttrReadState = arsText then
- repeat
- wc := FSource.SkipUntil(FValue, [#0, '&', #9, #10, #13]);
- if wc = '&' then
- begin
- if ParseRef(FValue) or ResolvePredefined then
- Continue;
- tok := arsEntity;
- end
- else if wc <> #0 then { #9,#10,#13 -> replace by #32 }
- begin
- FSource.NextChar;
- BufAppend(FValue, #32);
- Continue;
- end
- else // #0
- tok := arsEntityEnd;
- if FValue.Length <> 0 then
- begin
- FCurrNode^.FNodeType := ntText;
- FCurrNode^.FQName := nil;
- SetString(FCurrNode^.FValueStr, FValue.Buffer, FValue.Length);
- FAttrReadState := tok;
- Exit;
- end;
- Break;
- until False
- else
- tok := FAttrReadState;
- if tok = arsEntity then
- begin
- HandleEntityStart;
- FAttrReadState := arsText;
- end
- else if tok = arsEntityEnd then
- begin
- HandleEntityEnd;
- FAttrReadState := arsText;
- end;
- end;
- procedure TXMLTextReader.CleanAttrReadState;
- begin
- while FSource <> FAttrBaseSource do
- ContextPop(True);
- FAttrReadState := arsNone;
- end;
- function TXMLTextReader.GetHasValue: Boolean;
- begin
- result := FCurrNode^.FNodeType in [ntAttribute,ntText,ntCDATA,
- ntProcessingInstruction,ntComment,ntWhitespace,ntSignificantWhitespace,
- ntDocumentType];
- end;
- function TXMLTextReader.GetValue: XMLString;
- begin
- if (FCurrAttrIndex>=0) or (FAttrReadState <> arsNone) then
- result := FCurrNode^.FValueStr
- else
- SetString(result, FCurrNode^.FValueStart, FCurrNode^.FValueLength);
- end;
- function TXMLTextReader.GetPrefix: XMLString;
- begin
- if Assigned(FCurrNode^.FPrefix) then
- result := FCurrNode^.FPrefix^.Key
- else
- result := '';
- end;
- function TXMLTextReader.GetLocalName: XMLString;
- begin
- if FNamespaces and Assigned(FCurrNode^.FQName) then
- if FCurrNode^.FColonPos < 0 then
- Result := FCurrNode^.FQName^.Key
- else
- Result := Copy(FCurrNode^.FQName^.Key, FCurrNode^.FColonPos+2, MaxInt)
- else
- Result := '';
- end;
- function TXMLTextReader.GetNamespaceUri: XMLString;
- begin
- if Assigned(FCurrNode^.FNSURI) then
- result := FCurrNode^.FNSURI^.Key
- else
- result := '';
- end;
- procedure TXMLTextReader.SetEOFState;
- begin
- FCurrNode := @FNodeStack[0];
- Finalize(FCurrNode^);
- FillChar(FCurrNode^, sizeof(TNodeData), 0);
- FReadState := rsEndOfFile;
- end;
- procedure TXMLTextReader.ValidateCurrentNode;
- var
- ElDef: TElementDecl;
- AttDef: TAttributeDef;
- attr: PNodeData;
- i: Integer;
- begin
- case FCurrNode^.FNodeType of
- ntElement:
- begin
- if (FNesting = 0) and (not FFragmentMode) then
- begin
- if Assigned(FDocType) then
- begin
- if FDocType.FName <> FCurrNode^.FQName^.Key then
- DoErrorPos(esError, 'Root element name does not match DTD', FCurrNode^.FLoc);
- end
- else
- DoErrorPos(esError, 'Missing DTD', FCurrNode^.FLoc);
- end;
- ElDef := TElementDecl(FCurrNode^.FQName^.Data);
- if (ElDef = nil) or (ElDef.ContentType = ctUndeclared) then
- DoErrorPos(esError, 'Using undeclared element ''%s''',[FCurrNode^.FQName^.Key], FCurrNode^.FLoc);
- if not FValidators[FValidatorNesting].IsElementAllowed(ElDef) then
- DoErrorPos(esError, 'Element ''%s'' is not allowed in this context',[FCurrNode^.FQName^.Key], FCurrNode^.FLoc);
- PushVC(ElDef);
- if ElDef = nil then
- Exit;
- { Validate attributes }
- for i := 1 to FAttrCount do
- begin
- attr := @FNodeStack[FNesting+i];
- AttDef := TAttributeDef(attr^.FTypeInfo);
- if AttDef = nil then
- DoErrorPos(esError, 'Using undeclared attribute ''%s'' on element ''%s''',
- [attr^.FQName^.Key, FCurrNode^.FQName^.Key], attr^.FLoc)
- else if ((AttDef.DataType <> dtCdata) or (AttDef.Default = adFixed)) then
- begin
- if FStandalone and AttDef.ExternallyDeclared then
- if attr^.FDenormalized then
- DoErrorPos(esError, 'In a standalone document, externally defined attribute cannot cause value normalization', attr^.FLoc2)
- else if i > FSpecifiedAttrs then
- DoError(esError, 'In a standalone document, attribute cannot have a default value defined externally');
- // TODO: what about normalization of AttDef.Value? (Currently it IS normalized)
- if (AttDef.Default = adFixed) and (AttDef.Data^.FValueStr <> attr^.FValueStr) then
- DoErrorPos(esError, 'Value of attribute ''%s'' does not match its #FIXED default',[attr^.FQName^.Key], attr^.FLoc2);
- if not AttDef.ValidateSyntax(attr^.FValueStr, FNamespaces) then
- DoErrorPos(esError, 'Attribute ''%s'' type mismatch', [attr^.FQName^.Key], attr^.FLoc2);
- ValidateAttrValue(AttDef, attr);
- end;
- end;
- { Check presence of #REQUIRED attributes }
- if ElDef.HasRequiredAtts then
- for i := 0 to ElDef.AttrDefCount-1 do
- begin
- if FAttrDefIndex[i] = FAttrTag then
- Continue;
- AttDef := ElDef.AttrDefs[i];
- if AttDef.Default = adRequired then
- ValidationError('Required attribute ''%s'' of element ''%s'' is missing',
- [AttDef.Data^.FQName^.Key, FCurrNode^.FQName^.Key], 0)
- end;
- end;
- ntEndElement:
- begin
- if FValidators[FValidatorNesting].Incomplete then
- ValidationError('Element ''%s'' is missing required sub-elements', [FCurrNode^.FQName^.Key], -1);
- if FValidatorNesting > 0 then
- Dec(FValidatorNesting);
- end;
- ntText, ntSignificantWhitespace:
- case FValidators[FValidatorNesting].FContentType of
- ctChildren:
- if FCurrNode^.FNodeType = ntText then
- ValidationError('Character data is not allowed in element-only content',[])
- else
- begin
- if FValidators[FValidatorNesting].FSaViolation then
- ValidationError('Standalone constraint violation',[]);
- FCurrNode^.FNodeType := ntWhitespace;
- end;
- ctEmpty:
- ValidationError('Character data is not allowed in EMPTY elements', []);
- end;
- ntCDATA:
- if FValidators[FValidatorNesting].FContentType = ctChildren then
- ValidationError('CDATA sections are not allowed in element-only content',[]);
- ntProcessingInstruction:
- if FValidators[FValidatorNesting].FContentType = ctEmpty then
- ValidationError('Processing instructions are not allowed within EMPTY elements', []);
- ntComment:
- if FValidators[FValidatorNesting].FContentType = ctEmpty then
- ValidationError('Comments are not allowed within EMPTY elements', []);
- ntDocumentType:
- ValidateDTD;
- end;
- end;
- procedure TXMLTextReader.HandleEntityStart;
- begin
- FCurrNode := @FNodeStack[FNesting+(FAttrCount+1)*ord(FAttrReadState<>arsNone)];
- FCurrNode^.FNodeType := ntEntityReference;
- FCurrNode^.FQName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
- FCurrNode^.FColonPos := -1;
- FCurrNode^.FValueStart := nil;
- FCurrNode^.FValueLength := 0;
- FCurrNode^.FValueStr := '';
- StoreLocation(FCurrNode^.FLoc);
- { point past '&' to first AnsiChar of entity name }
- Dec(FCurrNode^.FLoc.LinePos, FName.Length+1);
- end;
- procedure TXMLTextReader.HandleEntityEnd;
- begin
- ContextPop(True);
- if FNesting > 0 then Dec(FNesting);
- FCurrNode := @FNodeStack[FNesting+(FAttrCount+1)*ord(FAttrReadState<>arsNone)];
- FCurrNode^.FNodeType := ntEndEntity;
- { point to trailing ';' }
- Inc(FCurrNode^.FLoc.LinePos, Length(FCurrNode^.FQName^.Key));
- end;
- procedure TXMLTextReader.ResolveEntity;
- var
- n: PNodeData;
- ent: TEntityDecl;
- begin
- if FCurrNode^.FNodeType <> ntEntityReference then
- raise EInvalidOperation.Create('Wrong node type');
- if FAttrReadState <> arsNone then
- begin
- { copy the EntityReference node to the stack if not already there }
- n := AllocNodeData(FNesting+FAttrCount+1);
- if n <> FCurrNode then
- n^ := FCurrNode^;
- ent := nil;
- if Assigned(FDocType) then
- ent := FDocType.Entities.Get(PWideChar(n^.FQName^.Key),Length(n^.FQName^.Key)) as TEntityDecl;
- ContextPush(ent, True);
- FAttrReadState := arsPushEntity;
- end
- else
- FNext := xtPushEntity;
- end;
- procedure TXMLTextReader.DoStartEntity;
- begin
- Inc(FNesting);
- FCurrNode := AllocNodeData(FNesting);
- ContextPush(FCurrEntity, True);
- FNext := xtText;
- end;
- // The code below does the bulk of the parsing, and must be as fast as possible.
- // To minimize CPU cache effects, methods from different classes are kept together
- function TXMLDecodingSource.SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
- wsflag: PBoolean): WideChar;
- var
- old: PWideChar;
- nonws: Boolean;
- wc: WideChar;
- begin
- nonws := False;
- repeat
- old := FBuf;
- repeat
- wc := FBuf^;
- if (wc = #10) or (wc = #13) or (FXML11Rules and ((wc = #$85) or
- (wc = #$2028))) then
- begin
- // strictly this is needed only for 2-byte lineendings
- BufAppendChunk(ToFill, old, FBuf);
- NewLine;
- old := FBuf;
- wc := FBuf^
- end
- else if ((wc < #32) and (not ((wc = #0) and (FBuf >= FBufEnd))) and
- (wc <> #9)) or (wc > #$FFFD) or
- (FXML11Rules and (wc >= #$7F) and (wc <= #$9F)) then
- FReader.FatalError('Invalid character');
- if (wc < #255) and (AnsiChar(ord(wc)) in Delim) then
- Break;
- // the checks above filter away everything below #32 that isn't a whitespace
- if wc > #32 then
- nonws := True;
- Inc(FBuf);
- until False;
- Result := wc;
- BufAppendChunk(ToFill, old, FBuf);
- until (Result <> #0) or (not Reload);
- if Assigned(wsflag) then
- wsflag^ := wsflag^ or nonws;
- end;
- const
- TextDelims: array[Boolean] of TSetOfChar = (
- [#0, '<', '&', '>'],
- [#0, '>']
- );
- textNodeTypes: array[Boolean] of TXMLNodeType = (
- ntSignificantWhitespace,
- ntText
- );
- function TXMLTextReader.ReadTopLevel: Boolean;
- var
- tok: TXMLToken;
- begin
- if FNext = xtFakeLF then
- begin
- Result := SetupFakeLF(xtText);
- Exit;
- end;
- StoreLocation(FTokenStart);
- if FNext = xtText then
- repeat
- SkipS;
- if FSource.FBuf^ = '<' then
- begin
- Inc(FSource.FBuf);
- if FSource.FBufEnd < FSource.FBuf + 2 then
- FSource.Reload;
- if FSource.FBuf^ = '!' then
- begin
- Inc(FSource.FBuf);
- if FSource.FBuf^ = '-' then
- begin
- if FIgnoreComments then
- begin
- ParseComment(True);
- Continue;
- end;
- tok := xtComment;
- end
- else
- tok := xtDoctype;
- end
- else if FSource.FBuf^ = '?' then
- tok := xtPI
- else
- begin
- CheckName;
- tok := xtElement;
- end;
- end
- else if FSource.FBuf >= FSource.FBufEnd then
- begin
- if FState < rsRoot then
- FatalError('Root element is missing');
- tok := xtEOF;
- end
- else
- FatalError('Illegal at document level');
- if FCanonical and (FState > rsRoot) and (tok <> xtEOF) then
- begin
- Result := SetupFakeLF(tok);
- Exit;
- end;
- Break;
- until False
- else // FNext <> xtText
- tok := FNext;
- if FCanonical and (FState < rsRoot) and (tok <> xtDoctype) then
- FNext := xtFakeLF
- else
- FNext := xtText;
- case tok of
- xtElement:
- begin
- if FState > rsRoot then
- FatalError('Only one top-level element allowed', FName.Length)
- else if FState < rsRoot then
- begin
- // dispose notation refs from DTD, if any
- ClearForwardRefs;
- FState := rsRoot;
- end;
- ParseStartTag;
- end;
- xtPI: ParsePI;
- xtComment: ParseComment(False);
- xtDoctype:
- begin
- ParseDoctypeDecl;
- if FCanonical then
- begin
- // recurse, effectively ignoring the DTD
- result := ReadTopLevel();
- Exit;
- end;
- end;
- xtEOF: SetEofState;
- end;
- Result := tok <> xtEOF;
- end;
- function TXMLTextReader.Read: Boolean;
- var
- nonWs: Boolean;
- wc: WideChar;
- InCDATA: Boolean;
- tok: TXMLToken;
- begin
- if FReadState > rsInteractive then
- begin
- Result := False;
- Exit;
- end;
- if FReadState = rsInitial then
- begin
- FReadState := rsInteractive;
- FSource.Initialize;
- FNext := xtText;
- end;
- if FAttrReadState <> arsNone then
- CleanAttrReadState;
- if FNext = xtPopEmptyElement then
- begin
- FNext := xtPopElement;
- FCurrNode^.FNodeType := ntEndElement;
- if FAttrCleanupFlag then
- CleanupAttributes;
- FAttrCount := 0;
- FCurrAttrIndex := -1;
- Result := True;
- Exit;
- end;
- if FNext = xtPushElement then
- begin
- if FAttrCleanupFlag then
- CleanupAttributes;
- FAttrCount := 0;
- Inc(FNesting);
- FCurrAttrIndex := -1;
- FNext := xtText;
- end
- else if FNext = xtPopElement then
- PopElement
- else if FNext = xtPushEntity then
- DoStartEntity;
- if FState <> rsRoot then
- begin
- Result := ReadTopLevel;
- Exit;
- end;
- InCDATA := (FNext = xtCDSect);
- StoreLocation(FTokenStart);
- nonWs := False;
- FValue.Length := 0;
- if FNext in [xtCDSect, xtText] then
- repeat
- wc := FSource.SkipUntil(FValue, TextDelims[InCDATA], @nonWs);
- if wc = '<' then
- begin
- Inc(FSource.FBuf);
- if FSource.FBufEnd < FSource.FBuf + 2 then
- FSource.Reload;
- if FSource.FBuf^ = '/' then
- tok := xtEndElement
- else if CheckName([cnOptional]) then
- tok := xtElement
- else if FSource.FBuf^ = '!' then
- begin
- Inc(FSource.FBuf);
- if FSource.FBuf^ = '[' then
- begin
- ExpectString('[CDATA[');
- StoreLocation(FTokenStart);
- InCDATA := True;
- if FCDSectionsAsText or (FValue.Length = 0) then
- Continue;
- tok := xtCDSect;
- end
- else if FSource.FBuf^ = '-' then
- begin
- { Ignoring comments is tricky in validating mode; discarding a comment which
- is the only child of an EMPTY element will make that element erroneously appear
- as valid. Therefore, at this point we discard only comments which are preceded
- by some text (since presence of text already renders an EMPTY element invalid).
- Other comments should be reported to validation part and discarded there. }
- if FIgnoreComments and (FValue.Length > 0) then
- begin
- ParseComment(True);
- Continue;
- end;
- tok := xtComment;
- end
- else
- tok := xtDoctype;
- end
- else if FSource.FBuf^ = '?' then
- tok := xtPI
- else
- RaiseNameNotFound;
- end
- else if wc = #0 then
- begin
- if InCDATA then
- FatalError('Unterminated CDATA section', -1);
- if FNesting > FSource.FStartNesting then
- FatalError('End-tag is missing for ''%s''', [FNodeStack[FNesting-1].FQName^.Key]);
- if Assigned(FSource.FParent) then
- begin
- if FExpandEntities and ContextPop then
- Continue
- else
- tok := xtEntityEnd;
- end
- else
- tok := xtEOF;
- end
- else if wc = '>' then
- begin
- BufAppend(FValue, wc);
- FSource.NextChar;
- if (FValue.Length <= 2) or (FValue.Buffer[FValue.Length-2] <> ']') or
- (FValue.Buffer[FValue.Length-3] <> ']') then Continue;
- if InCData then // got a ']]>' separator
- begin
- Dec(FValue.Length, 3);
- InCDATA := False;
- if FCDSectionsAsText then
- Continue;
- SetNodeInfoWithValue(ntCDATA);
- FNext := xtText;
- Result := True;
- Exit;
- end
- else
- FatalError('Literal '']]>'' is not allowed in text', 3);
- end
- else if wc = '&' then
- begin
- if FValidators[FValidatorNesting].FContentType = ctEmpty then
- ValidationError('References are illegal in EMPTY elements', []);
- if ParseRef(FValue) or ResolvePredefined then
- begin
- nonWs := True; // CharRef to whitespace is not considered whitespace
- Continue;
- end
- else
- begin
- FCurrEntity := EntityCheck;
- if Assigned(FCurrEntity) and FExpandEntities then
- begin
- ContextPush(FCurrEntity);
- Continue;
- end;
- tok := xtEntity;
- end;
- end;
- if FValue.Length <> 0 then
- begin
- SetNodeInfoWithValue(textNodeTypes[nonWs]);
- FNext := tok;
- Result := True;
- Exit;
- end;
- Break;
- until False
- else // not (FNext in [xtText, xtCDSect])
- tok := FNext;
- FNext := xtText;
- case tok of
- xtEntity: HandleEntityStart;
- xtEntityEnd: HandleEntityEnd;
- xtElement: ParseStartTag;
- xtEndElement: ParseEndTag;
- xtPI: ParsePI;
- xtDoctype: ParseDoctypeDecl;
- xtComment: ParseComment(False);
- xtEOF: SetEofState;
- end;
- Result := tok <> xtEOF;
- end;
- procedure TXMLCharSource.NextChar;
- begin
- Inc(FBuf);
- if FBuf >= FBufEnd then
- Reload;
- end;
- procedure TXMLTextReader.ExpectChar(wc: WideChar);
- begin
- if FSource.FBuf^ = wc then
- FSource.NextChar
- else
- FatalError(wc);
- end;
- // Element name already in FNameBuffer
- procedure TXMLTextReader.ParseStartTag; // [39] [40] [44]
- var
- ElDef: TElementDecl;
- IsEmpty: Boolean;
- ElName: PHashItem;
- b: TBinding;
- Len: Integer;
- begin
- ElName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
- ElDef := TElementDecl(ElName^.Data);
- if Assigned(ElDef) then
- Len := ElDef.AttrDefCount+8 { overallocate a bit }
- else
- Len := 0;
- // (re)initialize array of attribute definition tags
- if (Len-8 > Length(FAttrDefIndex)) or (FAttrTag = 0) then
- begin
- SetLength(FAttrDefIndex, Len);
- for Len := 0 to High(FAttrDefIndex) do
- FAttrDefIndex[Len] := FAttrTag;
- end;
- // we're about to process a new set of attributes
- {$push}{$r-,q-}
- Dec(FAttrTag);
- {$pop}
- IsEmpty := False;
- FAttrCount := 0;
- FCurrAttrIndex := -1;
- FPrefixedAttrs := 0;
- FSpecifiedAttrs := 0;
- FCurrNode := AllocNodeData(FNesting);
- FCurrNode^.FQName := ElName;
- FCurrNode^.FNodeType := ntElement;
- FCurrNode^.FColonPos := FColonPos;
- StoreLocation(FCurrNode^.FLoc);
- Dec(FCurrNode^.FLoc.LinePos, FName.Length);
- if FNamespaces then
- begin
- FNSHelper.PushScope;
- if FColonPos > 0 then
- FCurrNode^.FPrefix := FNSHelper.GetPrefix(FName.Buffer, FColonPos);
- end;
- while (FSource.FBuf^ <> '>') and (FSource.FBuf^ <> '/') do
- begin
- SkipS(True);
- if (FSource.FBuf^ = '>') or (FSource.FBuf^ = '/') then
- Break;
- ParseAttribute(ElDef);
- end;
- if FSource.FBuf^ = '/' then
- begin
- IsEmpty := True;
- FSource.NextChar;
- end;
- ExpectChar('>');
- if Assigned(ElDef) and ElDef.NeedsDefaultPass then
- ProcessDefaultAttributes(ElDef);
- // Adding attributes might have reallocated FNodeStack, so restore FCurrNode once again
- FCurrNode := @FNodeStack[FNesting];
- if FNamespaces then
- begin
- { Assign namespace URIs to prefixed attrs }
- if FPrefixedAttrs <> 0 then
- ProcessNamespaceAtts;
- { Expand the element name }
- if Assigned(FCurrNode^.FPrefix) then
- begin
- b := TBinding(FCurrNode^.FPrefix^.Data);
- if not (Assigned(b) and Assigned(b.uri) and (b.uri^.Key <> '')) then
- DoErrorPos(esFatal, 'Unbound element name prefix "%s"', [FCurrNode^.FPrefix^.Key],FCurrNode^.FLoc);
- FCurrNode^.FNsUri := b.uri;
- end
- else
- begin
- b := FNSHelper.DefaultNSBinding;
- if Assigned(b) then
- FCurrNode^.FNsUri := b.uri;
- end;
- end;
- if not IsEmpty then
- begin
- if not FPreserveWhitespace then // critical for testsuite compliance
- SkipS;
- FNext := xtPushElement;
- end
- else
- FNext := xtPopEmptyElement;
- end;
- procedure TXMLTextReader.ParseEndTag; // [42]
- var
- ElName: PHashItem;
- begin
- if FNesting <= FSource.FStartNesting then
- FatalError('End-tag is not allowed here');
- if FNesting > 0 then Dec(FNesting);
- Inc(FSource.FBuf);
- FCurrNode := @FNodeStack[FNesting]; // move off the possible child
- FCurrNode^.FNodeType := ntEndElement;
- StoreLocation(FTokenStart);
- FCurrNode^.FLoc := FTokenStart;
- ElName := FCurrNode^.FQName;
- if not FSource.MatchesLong(ElName^.Key) then
- FatalError('Unmatching element end tag (expected "</%s>")', [ElName^.Key], -1);
- if FSource.FBuf^ = '>' then // this handles majority of cases
- FSource.NextChar
- else
- begin // gives somewhat incorrect message for <a></aa>
- SkipS;
- ExpectChar('>');
- end;
- FNext := xtPopElement;
- end;
- procedure TXMLTextReader.ParseAttribute(ElDef: TElementDecl);
- var
- attrName: PHashItem;
- attrData: PNodeData;
- AttDef: TAttributeDef;
- i: Integer;
- begin
- CheckName;
- attrName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
- attrData := AllocAttributeData;
- attrData^.FQName := attrName;
- attrData^.FColonPos := FColonPos;
- StoreLocation(attrData^.FLoc);
- Dec(attrData^.FLoc.LinePos, FName.Length);
- FSpecifiedAttrs := FAttrCount;
- if Assigned(ElDef) then
- begin
- AttDef := ElDef.GetAttrDef(attrName);
- // mark attribute as specified
- if Assigned(AttDef) then
- FAttrDefIndex[AttDef.Index] := FAttrTag;
- end
- else
- AttDef := nil;
- attrData^.FTypeInfo := AttDef;
- // check for duplicates
- for i := 1 to FAttrCount-1 do
- if FNodeStack[FNesting+i].FQName = attrName then
- FatalError('Duplicate attribute', FName.Length);
- if FNamespaces then
- begin
- if ((FName.Length = 5) or (FColonPos = 5)) and
- (FName.Buffer[0] = 'x') and (FName.Buffer[1] = 'm') and
- (FName.Buffer[2] = 'l') and (FName.Buffer[3] = 'n') and
- (FName.Buffer[4] = 's') then
- begin
- if FColonPos > 0 then
- attrData^.FPrefix := FStdPrefix_xmlns;
- attrData^.FNsUri := FStdUri_xmlns;
- end
- else if FColonPos > 0 then
- begin
- attrData^.FPrefix := FNSHelper.GetPrefix(FName.Buffer, FColonPos);
- Inc(FPrefixedAttrs);
- end;
- end;
- ExpectEq;
- ExpectAttValue(attrData, Assigned(AttDef) and (AttDef.DataType <> dtCDATA));
- if Assigned(attrData^.FNsUri) then
- begin
- if (not AddBinding(attrData)) and FCanonical then
- begin
- CleanupAttribute(attrData);
- Dec(FAttrCount);
- Dec(FSpecifiedAttrs);
- end;
- end;
- end;
- procedure TXMLTextReader.AddForwardRef(Buf: PWideChar; Length: Integer);
- var
- w: PForwardRef;
- begin
- if FForwardRefs = nil then
- FForwardRefs := TFPList.Create;
- New(w);
- SetString(w^.Value, Buf, Length);
- w^.Loc := FTokenStart;
- FForwardRefs.Add(w);
- end;
- procedure TXMLTextReader.ClearForwardRefs;
- var
- I: Integer;
- begin
- if Assigned(FForwardRefs) then
- begin
- for I := 0 to FForwardRefs.Count-1 do
- Dispose(PForwardRef(FForwardRefs.List^[I]));
- FForwardRefs.Clear;
- end;
- end;
- procedure TXMLTextReader.ValidateIdRefs;
- var
- I: Integer;
- begin
- if Assigned(FForwardRefs) then
- begin
- for I := 0 to FForwardRefs.Count-1 do
- with PForwardRef(FForwardRefs.List^[I])^ do
- if (FIDMap = nil) or (FIDMap.Find(PWideChar(Value), Length(Value)) = nil) then
- DoErrorPos(esError, 'The ID ''%s'' does not match any element', [Value], Loc);
- ClearForwardRefs;
- end;
- end;
- procedure TXMLTextReader.ProcessDefaultAttributes(ElDef: TElementDecl);
- var
- I: Integer;
- AttDef: TAttributeDef;
- attrData: PNodeData;
- begin
- for I := 0 to ElDef.AttrDefCount-1 do
- begin
- if FAttrDefIndex[I] <> FAttrTag then // this one wasn't specified
- begin
- AttDef := ElDef.AttrDefs[I];
- case AttDef.Default of
- adDefault, adFixed: begin
- attrData := AllocAttributeData;
- attrData^ := AttDef.Data^;
- if FCanonical then
- attrData^.FIsDefault := False;
- if FNamespaces then
- begin
- if AttDef.IsNamespaceDecl then
- begin
- if attrData^.FColonPos > 0 then
- attrData^.FPrefix := FStdPrefix_xmlns;
- attrData^.FNsUri := FStdUri_xmlns;
- if (not AddBinding(attrData)) and FCanonical then
- Dec(FAttrCount);
- end
- else if attrData^.FColonPos > 0 then
- begin
- attrData^.FPrefix := FNSHelper.GetPrefix(PWideChar(attrData^.FQName^.Key), attrData^.FColonPos);
- Inc(FPrefixedAttrs);
- end
- else
- attrData^.FNsUri := FEmptyStr;
- end;
- end;
- end;
- end;
- end;
- end;
- function TXMLTextReader.AddBinding(attrData: PNodeData): Boolean;
- var
- nsUri, Pfx: PHashItem;
- begin
- nsUri := FNameTable.FindOrAdd(attrData^.FValueStr);
- if attrData^.FColonPos > 0 then
- Pfx := FNSHelper.GetPrefix(@attrData^.FQName^.key[7], Length(attrData^.FQName^.key)-6)
- else
- Pfx := FNSHelper.GetPrefix(nil, 0); { will return the default prefix }
- { 'xml' is allowed to be bound to the correct namespace }
- if ((nsUri = FStduri_xml) <> (Pfx = FStdPrefix_xml)) or
- (Pfx = FStdPrefix_xmlns) or
- (nsUri = FStduri_xmlns) then
- begin
- if (Pfx = FStdPrefix_xml) or (Pfx = FStdPrefix_xmlns) then
- DoErrorPos(esFatal, 'Illegal usage of reserved prefix ''%s''', [Pfx^.Key], attrData^.FLoc)
- else
- DoErrorPos(esFatal, 'Illegal usage of reserved namespace URI ''%s''', [attrData^.FValueStr], attrData^.FLoc2);
- end;
- if (attrData^.FValueStr = '') and not (FXML11 or (Pfx^.Key = '')) then
- DoErrorPos(esFatal, 'Illegal undefining of namespace', attrData^.FLoc2);
- Result := (Pfx^.Data = nil) or (TBinding(Pfx^.Data).uri <> nsUri);
- if Result then
- FNSHelper.BindPrefix(nsUri, Pfx);
- end;
- procedure TXMLTextReader.ProcessNamespaceAtts;
- var
- I, J: Integer;
- Pfx, AttrName: PHashItem;
- attrData: PNodeData;
- b: TBinding;
- begin
- FNsAttHash.Init(FPrefixedAttrs);
- for I := 1 to FAttrCount do
- begin
- attrData := @FNodeStack[FNesting+i];
- if Assigned(attrData^.FNsUri) then
- Continue;
- if (attrData^.FColonPos < 1) then
- begin
- attrData^.FNsUri := FEmptyStr;
- Continue;
- end;
- Pfx := attrData^.FPrefix;
- b := TBinding(Pfx^.Data);
- if not (Assigned(b) and Assigned (b.uri) and (b.uri^.Key <> '')) then
- DoErrorPos(esFatal, 'Unbound attribute name prefix "%s"', [Pfx^.Key], attrData^.FLoc);
- { detect duplicates }
- J := attrData^.FColonPos+1;
- AttrName := attrData^.FQName;
- if FNsAttHash.Locate(b.uri, @AttrName^.Key[J], Length(AttrName^.Key) - J+1) then
- DoErrorPos(esFatal, 'Duplicate prefixed attribute', attrData^.FLoc);
- attrData^.FNsUri := b.uri;
- end;
- end;
- function TXMLTextReader.ParseExternalID(out SysID, PubID: XMLString; // [75]
- out PubIDLoc: TLocation; SysIdOptional: Boolean): Boolean;
- var
- I: Integer;
- wc: WideChar;
- begin
- Result := False;
- if FSource.Matches('SYSTEM') then
- SysIdOptional := False
- else if FSource.Matches('PUBLIC') then
- begin
- ExpectWhitespace;
- ParseLiteral(FValue, ltPubid, True);
- PubIDLoc := FTokenStart;
- SetString(PubID, FValue.Buffer, FValue.Length);
- for I := 1 to Length(PubID) do
- begin
- wc := PubID[I];
- if (wc > #255) or not (AnsiChar(ord(wc)) in PubidChars) then
- FatalError('Illegal Public ID literal', -1);
- end;
- end
- else
- Exit;
- if SysIdOptional then
- SkipWhitespace
- else
- ExpectWhitespace;
- if ParseLiteral(FValue, ltPlain, not SysIdOptional) then
- SetString(SysID, FValue.Buffer, FValue.Length);
- Result := True;
- end;
- procedure TXMLTextReader.ValidateAttrValue(AttrDef: TAttributeDef; attrData: PNodeData);
- var
- L, StartPos, EndPos: Integer;
- Entity: TEntityDecl;
- begin
- L := Length(attrData^.FValueStr);
- case AttrDef.DataType of
- dtId: begin
- if not AddID(attrData) then
- DoErrorPos(esError, 'The ID ''%s'' is not unique', [attrData^.FValueStr], attrData^.FLoc2);
- end;
- dtIdRef, dtIdRefs: begin
- StartPos := 1;
- while StartPos <= L do
- begin
- EndPos := StartPos;
- while (EndPos <= L) and (attrData^.FValueStr[EndPos] <> #32) do
- Inc(EndPos);
- if (FIDMap = nil) or (FIDMap.Find(@attrData^.FValueStr[StartPos], EndPos-StartPos) = nil) then
- AddForwardRef(@attrData^.FValueStr[StartPos], EndPos-StartPos);
- StartPos := EndPos + 1;
- end;
- end;
- dtEntity, dtEntities: begin
- StartPos := 1;
- while StartPos <= L do
- begin
- EndPos := StartPos;
- while (EndPos <= L) and (attrData^.FValueStr[EndPos] <> #32) do
- Inc(EndPos);
- Entity := TEntityDecl(FDocType.Entities.Get(@attrData^.FValueStr[StartPos], EndPos-StartPos));
- if (Entity = nil) or (Entity.FNotationName = '') then
- ValidationError('Attribute ''%s'' type mismatch', [attrData^.FQName^.Key], -1);
- StartPos := EndPos + 1;
- end;
- end;
- end;
- end;
- procedure TXMLTextReader.ValidateDTD;
- var
- I: Integer;
- begin
- if Assigned(FForwardRefs) then
- begin
- for I := 0 to FForwardRefs.Count-1 do
- with PForwardRef(FForwardRefs[I])^ do
- if FDocType.Notations.Get(PWideChar(Value), Length(Value)) = nil then
- DoErrorPos(esError, 'Notation ''%s'' is not declared', [Value], Loc);
- end;
- end;
- function TXMLTextReader.AddId(aNodeData: PNodeData): Boolean;
- var
- e: PHashItem;
- begin
- if FIDMap = nil then
- FIDMap := THashTable.Create(256, False);
- e := FIDMap.FindOrAdd(PWideChar(aNodeData^.FValueStr), Length(aNodeData^.FValueStr), Result);
- Result := not Result;
- if Result then
- aNodeData^.FIDEntry := e;
- end;
- function TXMLTextReader.AllocAttributeData: PNodeData;
- begin
- Result := AllocNodeData(FNesting + FAttrCount + 1);
- Result^.FNodeType := ntAttribute;
- Result^.FIsDefault := False;
- Inc(FAttrCount);
- end;
- procedure TXMLTextReader.AddPseudoAttribute(aName: PHashItem; const aValue: XMLString;
- const nameLoc, valueLoc: TLocation);
- begin
- with AllocAttributeData^ do
- begin
- FQName := aName;
- FColonPos := -1;
- FValueStr := aValue;
- FLoc := nameLoc;
- FLoc2 := valueLoc;
- end;
- end;
- function TXMLTextReader.AllocNodeData(AIndex: Integer): PNodeData;
- begin
- {make sure we have an extra slot to place child text/comment/etc}
- if AIndex >= Length(FNodeStack)-1 then
- SetLength(FNodeStack, AIndex * 2 + 2);
- Result := @FNodeStack[AIndex];
- Result^.FNext := nil;
- Result^.FPrefix := nil;
- Result^.FNsUri := nil;
- Result^.FIDEntry := nil;
- Result^.FValueStart := nil;
- Result^.FValueLength := 0;
- end;
- procedure TXMLTextReader.AllocAttributeValueChunk(var APrev: PNodeData; Offset: Integer);
- var
- chunk: PNodeData;
- begin
- { when parsing DTD, don't take ownership of allocated data }
- chunk := FFreeAttrChunk;
- if Assigned(chunk) and (FState <> rsDTD) then
- begin
- FFreeAttrChunk := chunk^.FNext;
- chunk^.FNext := nil;
- end
- else { no free chunks, create a new one }
- chunk := AllocMem(sizeof(TNodeData));
- APrev^.FNext := chunk;
- APrev := chunk;
- { assume text node, for entity refs it is overridden later }
- chunk^.FNodeType := ntText;
- chunk^.FQName := nil;
- chunk^.FColonPos := -1;
- { without PWideChar typecast and in $T-, FPC treats '@' result as PAnsiChar... }
- SetString(chunk^.FValueStr, PWideChar(@FValue.Buffer[Offset]), FValue.Length-Offset);
- end;
- procedure TXMLTextReader.CleanupAttributes;
- var
- i: Integer;
- begin
- {cleanup only specified attributes; default ones are owned by DTD}
- for i := 1 to FSpecifiedAttrs do
- CleanupAttribute(@FNodeStack[FNesting+i]);
- FAttrCleanupFlag := False;
- end;
- procedure TXMLTextReader.CleanupAttribute(aNode: PNodeData);
- var
- chunk: PNodeData;
- begin
- if Assigned(aNode^.FNext) then
- begin
- chunk := aNode^.FNext;
- while Assigned(chunk^.FNext) do
- chunk := chunk^.FNext;
- chunk^.FNext := FFreeAttrChunk;
- FFreeAttrChunk := aNode^.FNext;
- aNode^.FNext := nil;
- end;
- end;
- procedure TXMLTextReader.SetNodeInfoWithValue(typ: TXMLNodeType; AName: PHashItem = nil);
- begin
- FCurrNode := @FNodeStack[FNesting];
- FCurrNode^.FNodeType := typ;
- FCurrNode^.FQName := AName;
- FCurrNode^.FColonPos := -1;
- FCurrNode^.FValueStart := FValue.Buffer;
- FCurrNode^.FValueLength := FValue.Length;
- end;
- function TXMLTextReader.SetupFakeLF(nextstate: TXMLToken): Boolean;
- begin
- FValue.Buffer[0] := #10;
- FValue.Length := 1;
- SetNodeInfoWithValue(ntWhitespace,nil);
- FNext := nextstate;
- Result := True;
- end;
- procedure TXMLTextReader.PushVC(aElDef: TElementDecl);
- begin
- Inc(FValidatorNesting);
- if FValidatorNesting >= Length(FValidators) then
- SetLength(FValidators, FValidatorNesting * 2);
- with FValidators[FValidatorNesting] do
- begin
- FElementDef := aElDef;
- FCurCP := nil;
- FFailed := False;
- FContentType := ctAny;
- FSaViolation := False;
- if Assigned(aElDef) then
- begin
- FContentType := aElDef.ContentType;
- FSaViolation := FStandalone and aElDef.ExternallyDeclared;
- end;
- end;
- end;
- procedure TXMLTextReader.PopElement;
- begin
- if FNamespaces then
- FNSHelper.PopScope;
- if (FNesting = 0) and (not FFragmentMode) then
- FState := rsEpilog;
- FCurrNode := @FNodeStack[FNesting];
- FNext := xtText;
- end;
- { TElementValidator }
- function TElementValidator.IsElementAllowed(Def: TElementDecl): Boolean;
- var
- Next: TContentParticle;
- begin
- Result := True;
- // if element is not declared, non-validity has been already reported, no need to report again...
- if Assigned(Def) and Assigned(FElementDef) then
- begin
- case FElementDef.ContentType of
- ctEmpty: Result := False;
- ctChildren, ctMixed: begin
- if FFailed then // if already detected a mismatch, don't waste time
- Exit;
- if FCurCP = nil then
- Next := FElementDef.RootCP.FindFirst(Def)
- else
- Next := FCurCP.FindNext(Def, 0); { second arg ignored here }
- Result := Assigned(Next);
- if Result then
- FCurCP := Next
- else
- FFailed := True; // used to prevent extra error at the end of element
- end;
- // ctAny, ctUndeclared: returns True by default
- end;
- end;
- end;
- function TElementValidator.Incomplete: Boolean;
- begin
- if Assigned(FElementDef) and (FElementDef.ContentType = ctChildren) and (not FFailed) then
- begin
- if FCurCP <> nil then
- Result := FCurCP.MoreRequired(0) { arg ignored here }
- else
- Result := FElementDef.RootCP.IsRequired;
- end
- else
- Result := False;
- end;
- end.
|