xmltextreader.pp 110 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981
  1. {
  2. This file is part of the Free Component Library
  3. TXMLTextReader, a streaming text XML reader
  4. Copyright (c) 1999-2000 by Sebastian Guenther, [email protected]
  5. Modified in 2006 by Sergei Gorelkin, [email protected]
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. unit xmltextreader;
  13. {$mode objfpc}{$h+}
  14. interface
  15. uses
  16. SysUtils, Classes, xmlutils, xmlreader, dtdmodel;
  17. type
  18. TDecoder = record
  19. Context: Pointer;
  20. Decode: function(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  21. Cleanup: procedure(Context: Pointer); stdcall;
  22. end;
  23. TGetDecoderProc = function(const AEncoding: string; out Decoder: TDecoder): Boolean; stdcall;
  24. TXMLSourceKind = (skNone, skInternalSubset, skManualPop);
  25. TXMLTextReader = class;
  26. TXMLCharSource = class(TObject)
  27. private
  28. FBuf: PWideChar;
  29. FBufEnd: PWideChar;
  30. FReader: TXMLTextReader;
  31. FParent: TXMLCharSource;
  32. FEntity: TEntityDecl;
  33. FLineNo: Integer;
  34. LFPos: PWideChar;
  35. FXML11Rules: Boolean;
  36. FSourceURI: XMLString;
  37. FCharCount: Cardinal;
  38. FStartNesting: Integer;
  39. FXMLVersion: TXMLVersion;
  40. FXMLEncoding: XMLString;
  41. function GetSourceURI: XMLString;
  42. protected
  43. function Reload: Boolean; virtual;
  44. public
  45. Kind: TXMLSourceKind;
  46. constructor Create(const AData: XMLString);
  47. procedure NextChar;
  48. procedure NewLine; virtual;
  49. function SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
  50. wsflag: PBoolean = nil): WideChar; virtual;
  51. procedure Initialize; virtual;
  52. function SetEncoding(const AEncoding: string): Boolean; virtual;
  53. function Matches(const arg: XMLString): Boolean;
  54. function MatchesLong(const arg: XMLString): Boolean;
  55. property SourceURI: XMLString read GetSourceURI write FSourceURI;
  56. end;
  57. TElementValidator = object
  58. FElementDef: TElementDecl;
  59. FCurCP: TContentParticle;
  60. FFailed: Boolean;
  61. FSaViolation: Boolean;
  62. FContentType: TElementContentType; // =ctAny when FElementDef is nil
  63. function IsElementAllowed(Def: TElementDecl): Boolean;
  64. function Incomplete: Boolean;
  65. end;
  66. TNodeDataDynArray = array of TNodeData;
  67. TValidatorDynArray = array of TElementValidator;
  68. TCheckNameFlags = set of (cnOptional, cnToken);
  69. TXMLToken = (xtNone, xtEOF, xtText, xtElement, xtEndElement,
  70. xtCDSect, xtComment, xtPI, xtDoctype, xtEntity, xtEntityEnd, xtPopElement,
  71. xtPopEmptyElement, xtPushElement, xtPushEntity, xtPopEntity, xtFakeLF);
  72. TAttributeReadState = (arsNone, arsText, arsEntity, arsEntityEnd, arsPushEntity);
  73. TLiteralType = (ltPlain, ltPubid, ltEntity);
  74. TEntityEvent = procedure(Sender: TXMLTextReader; AEntity: TEntityDecl) of object;
  75. TXMLTextReader = class(TXMLReader, IXmlLineInfo, IGetNodeDataPtr)
  76. private
  77. FSource: TXMLCharSource;
  78. FNameTable: THashTable;
  79. FXML11: Boolean;
  80. FNameTableOwned: Boolean;
  81. FState: (rsProlog, rsDTD, rsAfterDTD, rsRoot, rsEpilog);
  82. FHavePERefs: Boolean;
  83. FInsideDecl: Boolean;
  84. FValue: TWideCharBuf;
  85. FEntityValue: TWideCharBuf;
  86. FName: TWideCharBuf;
  87. FTokenStart: TLocation;
  88. FStandalone: Boolean;
  89. FDocType: TDTDModel;
  90. FPEMap: THashTable;
  91. FForwardRefs: TFPList;
  92. FDTDStartPos: PWideChar;
  93. FIntSubset: TWideCharBuf;
  94. FAttrTag: Cardinal;
  95. FDTDProcessed: Boolean;
  96. FFragmentMode: Boolean;
  97. FNext: TXMLToken;
  98. FCurrEntity: TEntityDecl;
  99. FIDMap: THashTable;
  100. FAttrDefIndex: array of Cardinal;
  101. FNSHelper: TNSSupport;
  102. FNsAttHash: TDblHashArray;
  103. FEmptyStr: PHashItem;
  104. FStdPrefix_xml: PHashItem;
  105. FStdPrefix_xmlns: PHashItem;
  106. FStdUri_xml: PHashItem;
  107. FStdUri_xmlns: PHashItem;
  108. FColonPos: Integer;
  109. FValidate: Boolean; // parsing options, copy of FCtrl.Options
  110. FPreserveWhitespace: Boolean;
  111. FExpandEntities: Boolean;
  112. FIgnoreComments: Boolean;
  113. FCDSectionsAsText: Boolean;
  114. FNamespaces: Boolean;
  115. FDisallowDoctype: Boolean;
  116. FCanonical: Boolean;
  117. FMaxChars: Cardinal;
  118. FOnError: TXMLErrorEvent;
  119. FCurrAttrIndex: Integer;
  120. FOnEntity: TEntityEvent;
  121. procedure CleanAttrReadState;
  122. procedure SetEOFState;
  123. procedure SkipQuote(out Delim: WideChar; required: Boolean = True);
  124. procedure SetSource(ASource: TXMLCharSource);
  125. function ContextPush(AEntity: TEntityDecl; DummySource: Boolean = False): Boolean;
  126. function ContextPop(Forced: Boolean = False): Boolean;
  127. function ParseQuantity: TCPQuant;
  128. procedure StoreLocation(out Loc: TLocation);
  129. procedure ValidateAttrValue(AttrDef: TAttributeDef; attrData: PNodeData);
  130. procedure AddForwardRef(Buf: PWideChar; Length: Integer);
  131. procedure ClearForwardRefs;
  132. procedure CallErrorHandler(E: EXMLReadError);
  133. function FindOrCreateElDef: TElementDecl;
  134. function SkipUntilSeq(const Delim: TSetOfChar; c1: WideChar): Boolean;
  135. procedure CheckMaxChars(ToAdd: Cardinal);
  136. function AllocNodeData(AIndex: Integer): PNodeData;
  137. function AllocAttributeData: PNodeData;
  138. procedure AllocAttributeValueChunk(var APrev: PNodeData; Offset: Integer);
  139. procedure AddPseudoAttribute(aName: PHashItem; const aValue: XMLString;
  140. const nameLoc, valueLoc: TLocation);
  141. procedure CleanupAttribute(aNode: PNodeData);
  142. procedure CleanupAttributes;
  143. procedure SetNodeInfoWithValue(typ: TXMLNodeType; AName: PHashItem = nil);
  144. function SetupFakeLF(nextstate: TXMLToken): Boolean;
  145. function AddId(aNodeData: PNodeData): Boolean;
  146. function QueryInterface(constref iid: TGUID; out obj): HRESULT; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
  147. function _AddRef: Longint; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
  148. function _Release: Longint; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
  149. procedure SetFragmentMode(aValue: Boolean);
  150. protected
  151. FNesting: Integer;
  152. FCurrNode: PNodeData;
  153. FAttrCount: Integer;
  154. FPrefixedAttrs: Integer;
  155. FSpecifiedAttrs: Integer;
  156. FNodeStack: TNodeDataDynArray;
  157. FValidatorNesting: Integer;
  158. FValidators: TValidatorDynArray;
  159. FFreeAttrChunk: PNodeData;
  160. FAttrCleanupFlag: Boolean;
  161. // ReadAttributeValue state
  162. FAttrReadState: TAttributeReadState;
  163. FAttrBaseSource: TObject;
  164. procedure DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer=0);
  165. procedure DoErrorPos(Severity: TErrorSeverity; const descr: string;
  166. const ErrPos: TLocation); overload;
  167. procedure DoErrorPos(Severity: TErrorSeverity; const descr: string;
  168. const args: array of const; const ErrPos: TLocation); overload;
  169. procedure FatalError(const descr: String; LineOffs: Integer=0); overload;
  170. procedure FatalError(const descr: string; const args: array of const; LineOffs: Integer=0); overload;
  171. procedure FatalError(Expected: WideChar); overload;
  172. function SkipWhitespace(PercentAloneIsOk: Boolean = False): Boolean;
  173. function SkipS(required: Boolean = False): Boolean;
  174. procedure ExpectWhitespace;
  175. procedure ExpectString(const s: String);
  176. procedure ExpectChar(wc: WideChar);
  177. function CheckForChar(c: WideChar): Boolean;
  178. procedure RaiseNameNotFound;
  179. function CheckName(aFlags: TCheckNameFlags = []): Boolean;
  180. procedure CheckNCName;
  181. function ParseLiteral(var ToFill: TWideCharBuf; aType: TLiteralType;
  182. Required: Boolean): Boolean;
  183. procedure ExpectAttValue(attrData: PNodeData; NonCDATA: Boolean); // [10]
  184. procedure ParseComment(discard: Boolean); // [15]
  185. procedure ParsePI; // [16]
  186. procedure ParseXmlOrTextDecl(TextDecl: Boolean);
  187. procedure ExpectEq;
  188. procedure ParseDoctypeDecl; // [28]
  189. procedure ParseMarkupDecl; // [29]
  190. procedure ParseIgnoreSection;
  191. procedure ParseStartTag; // [39]
  192. procedure ParseEndTag; // [42]
  193. procedure HandleEntityStart;
  194. procedure HandleEntityEnd;
  195. procedure DoStartEntity;
  196. procedure ParseAttribute(ElDef: TElementDecl);
  197. function ReadTopLevel: Boolean;
  198. procedure NextAttrValueChunk;
  199. function GetHasLineInfo: Boolean;
  200. function GetLineNumber: Integer;
  201. function GetLinePosition: Integer;
  202. function CurrentNodePtr: PPNodeData;
  203. public
  204. function Read: Boolean; override;
  205. function MoveToFirstAttribute: Boolean; override;
  206. function MoveToNextAttribute: Boolean; override;
  207. function MoveToElement: Boolean; override;
  208. function ReadAttributeValue: Boolean; override;
  209. procedure Close; override;
  210. procedure ResolveEntity; override;
  211. function GetAttribute(i: Integer): XMLString; override;
  212. function GetAttribute(const AName: XMLString): XMLString; override;
  213. function GetAttribute(const ALocalName, nsuri: XMLString): XMLString; override;
  214. function LookupNamespace(const APrefix: XMLString): XMLString; override;
  215. property LineNumber: Integer read GetLineNumber;
  216. property LinePosition: Integer read GetLinePosition;
  217. protected
  218. function GetXmlVersion: TXMLVersion;
  219. function GetXmlEncoding: XMLString;
  220. function GetNameTable: THashTable; override;
  221. function GetDepth: Integer; override;
  222. function GetNodeType: TXmlNodeType; override;
  223. function GetName: XMLString; override;
  224. function GetValue: XMLString; override;
  225. function GetLocalName: XMLString; override;
  226. function GetPrefix: XMLString; override;
  227. function GetNamespaceUri: XMLString; override;
  228. function GetHasValue: Boolean; override;
  229. function GetAttributeCount: Integer; override;
  230. function GetBaseUri: XMLString; override;
  231. function GetIsDefault: Boolean; override;
  232. function ResolvePredefined: Boolean;
  233. function EntityCheck(NoExternals: Boolean = False): TEntityDecl;
  234. function PrefetchEntity(AEntity: TEntityDecl): Boolean;
  235. procedure StartPE;
  236. function ParseRef(var ToFill: TWideCharBuf): Boolean; // [67]
  237. function ParseExternalID(out SysID, PubID: XMLString; // [75]
  238. out PubIDLoc: TLocation; SysIdOptional: Boolean): Boolean;
  239. procedure CheckPENesting(aExpected: TObject);
  240. procedure ParseEntityDecl;
  241. procedure ParseAttlistDecl;
  242. procedure ExpectChoiceOrSeq(CP: TContentParticle; MustEndIn: TObject);
  243. procedure ParseElementDecl;
  244. procedure ParseNotationDecl;
  245. function ResolveResource(const ASystemID, APublicID, ABaseURI: XMLString; out Source: TXMLCharSource): Boolean;
  246. procedure ProcessDefaultAttributes(ElDef: TElementDecl);
  247. procedure ProcessNamespaceAtts;
  248. function AddBinding(attrData: PNodeData): Boolean;
  249. procedure PushVC(aElDef: TElementDecl);
  250. procedure PopElement;
  251. procedure ValidateDTD;
  252. procedure ValidationError(const Msg: string; const args: array of const; LineOffs: Integer = -1);
  253. procedure ValidationErrorWithName(const Msg: string; LineOffs: Integer = -1);
  254. procedure DTDReloadHook;
  255. procedure ConvertSource(SrcIn: TXMLInputSource; out SrcOut: TXMLCharSource);
  256. procedure SetOptions(AValue: TXMLReaderSettings);
  257. procedure SetNametable(ANameTable: THashTable);
  258. public
  259. constructor Create(var AFile: Text; ANameTable: THashTable); overload;
  260. constructor Create(AStream: TStream; const ABaseUri: XMLString; ANameTable: THashTable); overload;
  261. constructor Create(AStream: TStream; const ABaseUri: XMLString; ASettings: TXMLReaderSettings); overload;
  262. constructor Create(ASrc: TXMLCharSource; AParent: TXMLTextReader); overload;
  263. constructor Create(const uri: XMLString; ASettings: TXMLReaderSettings); overload;
  264. constructor Create(ASrc: TXMLInputSource; ASettings: TXMLReaderSettings); overload;
  265. destructor Destroy; override;
  266. procedure AfterConstruction; override;
  267. property OnEntity: TEntityEvent read FOnEntity write FOnEntity;
  268. { stuff needed for TLoader }
  269. property Standalone: Boolean read FStandalone;
  270. property DtdSchemaInfo: TDTDModel read FDocType write FDocType;
  271. property XML11: Boolean write FXML11;
  272. property XMLVersion: TXMLVersion read GetXMLVersion;
  273. property XMLEncoding: XMLString read GetXMLEncoding;
  274. property IDMap: THashTable read FIDMap write FIDMap;
  275. property ExpandEntities: Boolean read FExpandEntities;
  276. property Validate: Boolean read FValidate;
  277. property PreserveWhitespace: Boolean read FPreserveWhitespace;
  278. property IgnoreComments: Boolean read FIgnoreComments;
  279. property FragmentMode: Boolean read FFragmentMode write SetFragmentMode;
  280. procedure ValidateCurrentNode;
  281. procedure ValidateIdRefs;
  282. procedure EntityToSource(AEntity: TEntityDecl; out Src: TXMLCharSource);
  283. procedure ParseDTD;
  284. end;
  285. procedure RegisterDecoder(Proc: TGetDecoderProc);
  286. implementation
  287. uses
  288. UriParser;
  289. type
  290. TXMLDecodingSource = class(TXMLCharSource)
  291. private
  292. FCharBuf: PChar;
  293. FCharBufEnd: PChar;
  294. FBufStart: PWideChar;
  295. FDecoder: TDecoder;
  296. FHasBOM: Boolean;
  297. FFixedUCS2: string;
  298. FBufSize: Integer;
  299. procedure DecodingError(const Msg: string);
  300. protected
  301. function Reload: Boolean; override;
  302. procedure FetchData; virtual;
  303. public
  304. procedure AfterConstruction; override;
  305. destructor Destroy; override;
  306. function SetEncoding(const AEncoding: string): Boolean; override;
  307. procedure NewLine; override;
  308. function SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
  309. wsflag: PBoolean = nil): WideChar; override;
  310. procedure Initialize; override;
  311. end;
  312. TXMLStreamInputSource = class(TXMLDecodingSource)
  313. private
  314. FAllocated: PChar;
  315. FStream: TStream;
  316. FCapacity: Integer;
  317. FOwnStream: Boolean;
  318. FEof: Boolean;
  319. public
  320. constructor Create(AStream: TStream; AOwnStream: Boolean);
  321. destructor Destroy; override;
  322. procedure FetchData; override;
  323. end;
  324. TXMLFileInputSource = class(TXMLDecodingSource)
  325. private
  326. FFile: ^Text;
  327. FString: string;
  328. FTmp: string;
  329. public
  330. constructor Create(var AFile: Text);
  331. procedure FetchData; override;
  332. end;
  333. PForwardRef = ^TForwardRef;
  334. TForwardRef = record
  335. Value: XMLString;
  336. Loc: TLocation;
  337. end;
  338. const
  339. PubidChars: TSetOfChar = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
  340. '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
  341. '#', '@', '$', '_', '%'];
  342. NullLocation: TLocation = (Line: 0; LinePos: 0);
  343. { Decoders }
  344. var
  345. Decoders: array of TGetDecoderProc;
  346. procedure RegisterDecoder(Proc: TGetDecoderProc);
  347. var
  348. L: Integer;
  349. begin
  350. L := Length(Decoders);
  351. SetLength(Decoders, L+1);
  352. Decoders[L] := Proc;
  353. end;
  354. function FindDecoder(const AEncoding: string; out Decoder: TDecoder): Boolean;
  355. var
  356. I: Integer;
  357. begin
  358. Result := False;
  359. for I := 0 to High(Decoders) do
  360. if Decoders[I](AEncoding, Decoder) then
  361. begin
  362. Result := True;
  363. Exit;
  364. end;
  365. end;
  366. function Is_8859_1(const AEncoding: string): Boolean;
  367. begin
  368. Result := SameText(AEncoding, 'ISO-8859-1') or
  369. SameText(AEncoding, 'ISO_8859-1') or
  370. SameText(AEncoding, 'latin1') or
  371. SameText(AEncoding, 'iso-ir-100') or
  372. SameText(AEncoding, 'l1') or
  373. SameText(AEncoding, 'IBM819') or
  374. SameText(AEncoding, 'CP819') or
  375. SameText(AEncoding, 'csISOLatin1') or
  376. // This one is not in character-sets.txt, but was used in FPC documentation,
  377. // and still being used in fcl-registry package
  378. SameText(AEncoding, 'ISO8859-1');
  379. end;
  380. { TXMLCharSource }
  381. constructor TXMLCharSource.Create(const AData: XMLString);
  382. begin
  383. inherited Create;
  384. FLineNo := 1;
  385. FBuf := PWideChar(AData);
  386. FBufEnd := FBuf + Length(AData);
  387. LFPos := FBuf-1;
  388. FCharCount := Length(AData);
  389. end;
  390. procedure TXMLCharSource.Initialize;
  391. begin
  392. end;
  393. function TXMLCharSource.SetEncoding(const AEncoding: string): Boolean;
  394. begin
  395. Result := True; // always succeed
  396. end;
  397. function TXMLCharSource.GetSourceURI: XMLString;
  398. begin
  399. if FSourceURI <> '' then
  400. Result := FSourceURI
  401. else if Assigned(FParent) then
  402. Result := FParent.SourceURI
  403. else
  404. Result := '';
  405. end;
  406. function TXMLCharSource.Reload: Boolean;
  407. begin
  408. Result := False;
  409. end;
  410. procedure TXMLCharSource.NewLine;
  411. begin
  412. Inc(FLineNo);
  413. LFPos := FBuf;
  414. end;
  415. function TXMLCharSource.SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
  416. wsflag: PBoolean): WideChar;
  417. var
  418. old: PWideChar;
  419. nonws: Boolean;
  420. begin
  421. old := FBuf;
  422. nonws := False;
  423. repeat
  424. if FBuf^ = #10 then
  425. NewLine;
  426. if (FBuf^ < #255) and (Char(ord(FBuf^)) in Delim) then
  427. Break;
  428. if (FBuf^ > #32) or not (Char(ord(FBuf^)) in [#32, #9, #10, #13]) then
  429. nonws := True;
  430. Inc(FBuf);
  431. until False;
  432. Result := FBuf^;
  433. BufAppendChunk(ToFill, old, FBuf);
  434. if Assigned(wsflag) then
  435. wsflag^ := wsflag^ or nonws;
  436. end;
  437. function TXMLCharSource.Matches(const arg: XMLString): Boolean;
  438. begin
  439. Result := False;
  440. if (FBufEnd >= FBuf + Length(arg)) or Reload then
  441. Result := CompareMem(Pointer(arg), FBuf, Length(arg)*sizeof(WideChar));
  442. if Result then
  443. begin
  444. Inc(FBuf, Length(arg));
  445. if FBuf >= FBufEnd then
  446. Reload;
  447. end;
  448. end;
  449. { Used to check element name in end-tags, difference from Matches is that
  450. buffer may be reloaded more than once. XML has no restriction on name
  451. length, so a name longer than input buffer may be encountered. }
  452. function TXMLCharSource.MatchesLong(const arg: XMLString): Boolean;
  453. var
  454. idx, len, chunk: Integer;
  455. begin
  456. Result := False;
  457. idx := 1;
  458. len := Length(arg);
  459. repeat
  460. if (FBuf >= FBufEnd) and not Reload then
  461. Exit;
  462. if FBufEnd >= FBuf + len then
  463. chunk := len
  464. else
  465. chunk := FBufEnd - FBuf;
  466. if not CompareMem(@arg[idx], FBuf, chunk*sizeof(WideChar)) then
  467. Exit;
  468. Inc(FBuf, chunk);
  469. Inc(idx,chunk);
  470. Dec(len,chunk);
  471. until len = 0;
  472. Result := True;
  473. if FBuf >= FBufEnd then
  474. Reload;
  475. end;
  476. { TXMLDecodingSource }
  477. procedure TXMLDecodingSource.AfterConstruction;
  478. begin
  479. inherited AfterConstruction;
  480. FBufStart := AllocMem(4096);
  481. FBuf := FBufStart;
  482. FBufEnd := FBuf;
  483. LFPos := FBuf-1;
  484. end;
  485. destructor TXMLDecodingSource.Destroy;
  486. begin
  487. FreeMem(FBufStart);
  488. if Assigned(FDecoder.Cleanup) then
  489. FDecoder.Cleanup(FDecoder.Context);
  490. inherited Destroy;
  491. end;
  492. procedure TXMLDecodingSource.FetchData;
  493. begin
  494. end;
  495. procedure TXMLDecodingSource.DecodingError(const Msg: string);
  496. begin
  497. // count line endings to obtain correct error location
  498. while FBuf < FBufEnd do
  499. begin
  500. if (FBuf^ = #10) or (FBuf^ = #13) or (FXML11Rules and ((FBuf^ = #$85) or (FBuf^ = #$2028))) then
  501. begin
  502. if (FBuf^ = #13) and (FBuf < FBufEnd-1) and
  503. ((FBuf[1] = #10) or (FXML11Rules and (FBuf[1] = #$85))) then
  504. Inc(FBuf);
  505. LFPos := FBuf;
  506. Inc(FLineNo);
  507. end;
  508. Inc(FBuf);
  509. end;
  510. FReader.FatalError(Msg);
  511. end;
  512. function TXMLDecodingSource.Reload: Boolean;
  513. var
  514. Remainder: PtrInt;
  515. r, inLeft: Cardinal;
  516. rslt: Integer;
  517. begin
  518. if Kind = skInternalSubset then
  519. FReader.DTDReloadHook;
  520. Remainder := FBufEnd - FBuf;
  521. if Remainder > 0 then
  522. Move(FBuf^, FBufStart^, Remainder * sizeof(WideChar));
  523. Dec(LFPos, FBuf-FBufStart);
  524. FBuf := FBufStart;
  525. FBufEnd := FBufStart + Remainder;
  526. repeat
  527. inLeft := FCharBufEnd - FCharBuf;
  528. if inLeft < 4 then // may contain an incomplete char
  529. begin
  530. FetchData;
  531. inLeft := FCharBufEnd - FCharBuf;
  532. if inLeft <= 0 then
  533. Break;
  534. end;
  535. r := FBufStart + FBufSize - FBufEnd;
  536. if r = 0 then
  537. Break;
  538. rslt := FDecoder.Decode(FDecoder.Context, FCharBuf, inLeft, FBufEnd, r);
  539. { Sanity checks: r and inLeft must not increase. }
  540. if inLeft + FCharBuf <= FCharBufEnd then
  541. FCharBuf := FCharBufEnd - inLeft
  542. else
  543. DecodingError('Decoder error: input byte count out of bounds');
  544. if r + FBufEnd <= FBufStart + FBufSize then
  545. FBufEnd := FBufStart + FBufSize - r
  546. else
  547. DecodingError('Decoder error: output char count out of bounds');
  548. if rslt = 0 then
  549. Break
  550. else if rslt < 0 then
  551. DecodingError('Invalid character in input stream')
  552. else
  553. FReader.CheckMaxChars(rslt);
  554. until False;
  555. FBufEnd^ := #0;
  556. Result := FBuf < FBufEnd;
  557. end;
  558. const
  559. XmlSign: array [0..4] of WideChar = ('<', '?', 'x', 'm', 'l');
  560. procedure TXMLDecodingSource.Initialize;
  561. begin
  562. inherited;
  563. FLineNo := 1;
  564. FDecoder.Decode := @Decode_UTF8;
  565. FFixedUCS2 := '';
  566. if FCharBufEnd-FCharBuf > 1 then
  567. begin
  568. if (FCharBuf[0] = #$FE) and (FCharBuf[1] = #$FF) then
  569. begin
  570. FFixedUCS2 := 'UTF-16BE';
  571. FDecoder.Decode := {$IFNDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
  572. end
  573. else if (FCharBuf[0] = #$FF) and (FCharBuf[1] = #$FE) then
  574. begin
  575. FFixedUCS2 := 'UTF-16LE';
  576. FDecoder.Decode := {$IFDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
  577. end;
  578. end;
  579. FBufSize := 6; // possible BOM and '<?xml'
  580. Reload;
  581. if FBuf^ = #$FEFF then
  582. begin
  583. FHasBOM := True;
  584. Inc(FBuf);
  585. end;
  586. LFPos := FBuf-1;
  587. if CompareMem(FBuf, @XmlSign[0], sizeof(XmlSign)) then
  588. begin
  589. FBufSize := 3; // don't decode past XML declaration
  590. Inc(FBuf, Length(XmlSign));
  591. FReader.ParseXmlOrTextDecl((FParent <> nil) or (FReader.FState <> rsProlog));
  592. end;
  593. FBufSize := 2047;
  594. if FReader.FXML11 then
  595. FXml11Rules := True;
  596. end;
  597. function TXMLDecodingSource.SetEncoding(const AEncoding: string): Boolean;
  598. var
  599. NewDecoder: TDecoder;
  600. begin
  601. Result := True;
  602. if (FFixedUCS2 = '') and SameText(AEncoding, 'UTF-8') then
  603. Exit;
  604. if FFixedUCS2 <> '' then
  605. begin
  606. Result := SameText(AEncoding, FFixedUCS2) or
  607. SameText(AEncoding, 'UTF-16') or
  608. SameText(AEncoding, 'unicode');
  609. Exit;
  610. end;
  611. // TODO: must fail when a byte-based stream is labeled as word-based.
  612. // see rmt-e2e-61, it now fails but for a completely different reason.
  613. FillChar(NewDecoder, sizeof(TDecoder), 0);
  614. if Is_8859_1(AEncoding) then
  615. FDecoder.Decode := @Decode_8859_1
  616. else if FindDecoder(AEncoding, NewDecoder) then
  617. FDecoder := NewDecoder
  618. else
  619. Result := False;
  620. end;
  621. procedure TXMLDecodingSource.NewLine;
  622. begin
  623. case FBuf^ of
  624. #10: ;
  625. #13: begin
  626. // Reload trashes the buffer, it should be consumed beforehand
  627. if (FBufEnd >= FBuf+2) or Reload then
  628. begin
  629. if (FBuf[1] = #10) or (FXML11Rules and (FBuf[1] = #$85)) then
  630. Inc(FBuf);
  631. end;
  632. FBuf^ := #10;
  633. end;
  634. #$85, #$2028: if FXML11Rules then
  635. FBuf^ := #10
  636. else
  637. Exit;
  638. else
  639. Exit;
  640. end;
  641. Inc(FLineNo);
  642. LFPos := FBuf;
  643. end;
  644. { TXMLStreamInputSource }
  645. const
  646. Slack = 16;
  647. constructor TXMLStreamInputSource.Create(AStream: TStream; AOwnStream: Boolean);
  648. begin
  649. FStream := AStream;
  650. FCapacity := 4096;
  651. GetMem(FAllocated, FCapacity+Slack);
  652. FCharBuf := FAllocated+(Slack-4);
  653. FCharBufEnd := FCharBuf;
  654. FOwnStream := AOwnStream;
  655. FetchData;
  656. end;
  657. destructor TXMLStreamInputSource.Destroy;
  658. begin
  659. FreeMem(FAllocated);
  660. if FOwnStream then
  661. FStream.Free;
  662. inherited Destroy;
  663. end;
  664. procedure TXMLStreamInputSource.FetchData;
  665. var
  666. Remainder, BytesRead: Integer;
  667. OldBuf: PChar;
  668. begin
  669. Assert(FCharBufEnd - FCharBuf < Slack-4);
  670. if FEof then
  671. Exit;
  672. OldBuf := FCharBuf;
  673. Remainder := FCharBufEnd - FCharBuf;
  674. if Remainder < 0 then
  675. Remainder := 0;
  676. FCharBuf := FAllocated+Slack-4-Remainder;
  677. if Remainder > 0 then
  678. Move(OldBuf^, FCharBuf^, Remainder);
  679. BytesRead := FStream.Read(FAllocated[Slack-4], FCapacity);
  680. if BytesRead < FCapacity then
  681. FEof := True;
  682. FCharBufEnd := FAllocated + (Slack-4) + BytesRead;
  683. { Null-termination has been removed:
  684. 1) Built-in decoders don't need it because they respect the buffer length.
  685. 2) It was causing unaligned access errors on ARM CPUs.
  686. }
  687. //PWideChar(FCharBufEnd)^ := #0;
  688. end;
  689. { TXMLFileInputSource }
  690. constructor TXMLFileInputSource.Create(var AFile: Text);
  691. begin
  692. FFile := @AFile;
  693. SourceURI := FilenameToURI(TTextRec(AFile).Name);
  694. FetchData;
  695. end;
  696. procedure TXMLFileInputSource.FetchData;
  697. var
  698. Remainder: Integer;
  699. begin
  700. if not Eof(FFile^) then
  701. begin
  702. Remainder := FCharBufEnd - FCharBuf;
  703. if Remainder > 0 then
  704. SetString(FTmp, FCharBuf, Remainder);
  705. ReadLn(FFile^, FString);
  706. FString := FString + #10; // bad solution...
  707. if Remainder > 0 then
  708. Insert(FTmp, FString, 1);
  709. FCharBuf := PChar(FString);
  710. FCharBufEnd := FCharBuf + Length(FString);
  711. end;
  712. end;
  713. { helper that closes handle upon destruction }
  714. type
  715. THandleOwnerStream = class(THandleStream)
  716. public
  717. destructor Destroy; override;
  718. end;
  719. destructor THandleOwnerStream.Destroy;
  720. begin
  721. FileClose(Handle);
  722. inherited Destroy;
  723. end;
  724. { TXMLTextReader }
  725. function TXMLTextReader.QueryInterface(constref iid: TGUID; out obj): HRESULT; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
  726. begin
  727. if GetInterface(iid,obj) then
  728. result := S_OK
  729. else
  730. result:= E_NOINTERFACE;
  731. end;
  732. function TXMLTextReader._AddRef: Longint; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
  733. begin
  734. result := -1;
  735. end;
  736. function TXMLTextReader._Release: Longint; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
  737. begin
  738. result := -1;
  739. end;
  740. procedure TXMLTextReader.ConvertSource(SrcIn: TXMLInputSource; out SrcOut: TXMLCharSource);
  741. begin
  742. SrcOut := nil;
  743. if Assigned(SrcIn) then
  744. begin
  745. if Assigned(SrcIn.Stream) then
  746. SrcOut := TXMLStreamInputSource.Create(SrcIn.Stream, False)
  747. else if SrcIn.StringData <> '' then
  748. SrcOut := TXMLStreamInputSource.Create(TStringStream.Create(SrcIn.StringData), True)
  749. else if (SrcIn.SystemID <> '') then
  750. ResolveResource(SrcIn.SystemID, SrcIn.PublicID, SrcIn.BaseURI, SrcOut);
  751. end;
  752. if (SrcOut = nil) and (FSource = nil) then
  753. DoErrorPos(esFatal, 'No input source specified', NullLocation);
  754. end;
  755. procedure TXMLTextReader.StoreLocation(out Loc: TLocation);
  756. begin
  757. Loc.Line := FSource.FLineNo;
  758. Loc.LinePos := FSource.FBuf-FSource.LFPos;
  759. end;
  760. function TXMLTextReader.ResolveResource(const ASystemID, APublicID, ABaseURI: XMLString; out Source: TXMLCharSource): Boolean;
  761. var
  762. SrcURI: XMLString;
  763. Filename: string;
  764. Stream: TStream;
  765. fd: THandle;
  766. begin
  767. Source := nil;
  768. Result := False;
  769. if not ResolveRelativeURI(ABaseURI, ASystemID, SrcURI) then
  770. Exit;
  771. { TODO: alternative resolvers
  772. These may be 'internal' resolvers or a handler set by application.
  773. Internal resolvers should probably produce a TStream
  774. ( so that internal classes need not be exported ).
  775. External resolver will produce TXMLInputSource that should be converted.
  776. External resolver must NOT be called for root entity.
  777. External resolver can return nil, in which case we do the default }
  778. if URIToFilename(SrcURI, Filename) then
  779. begin
  780. fd := FileOpen(Filename, fmOpenRead + fmShareDenyWrite);
  781. if fd <> THandle(-1) then
  782. begin
  783. Stream := THandleOwnerStream.Create(fd);
  784. Source := TXMLStreamInputSource.Create(Stream, True);
  785. Source.SourceURI := SrcURI;
  786. end;
  787. end;
  788. Result := Assigned(Source);
  789. end;
  790. procedure TXMLTextReader.SetSource(ASource: TXMLCharSource);
  791. begin
  792. ASource.FParent := FSource;
  793. FSource := ASource;
  794. FSource.FReader := Self;
  795. FSource.FStartNesting := FNesting;
  796. end;
  797. procedure TXMLTextReader.FatalError(Expected: WideChar);
  798. begin
  799. // FIX: don't output what is found - anything may be found, including exploits...
  800. FatalError('Expected "%1s"', [string(Expected)]);
  801. end;
  802. procedure TXMLTextReader.FatalError(const descr: String; LineOffs: Integer);
  803. begin
  804. DoError(esFatal, descr, LineOffs);
  805. end;
  806. procedure TXMLTextReader.FatalError(const descr: string; const args: array of const; LineOffs: Integer);
  807. begin
  808. DoError(esFatal, Format(descr, args), LineOffs);
  809. end;
  810. procedure TXMLTextReader.ValidationError(const Msg: string; const Args: array of const; LineOffs: Integer);
  811. begin
  812. if FValidate then
  813. DoError(esError, Format(Msg, Args), LineOffs);
  814. end;
  815. procedure TXMLTextReader.ValidationErrorWithName(const Msg: string; LineOffs: Integer);
  816. var
  817. ws: XMLString;
  818. begin
  819. SetString(ws, FName.Buffer, FName.Length);
  820. ValidationError(Msg, [ws], LineOffs);
  821. end;
  822. procedure TXMLTextReader.DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer);
  823. var
  824. Loc: TLocation;
  825. begin
  826. StoreLocation(Loc);
  827. if LineOffs >= 0 then
  828. begin
  829. Dec(Loc.LinePos, LineOffs);
  830. DoErrorPos(Severity, descr, Loc);
  831. end
  832. else
  833. DoErrorPos(Severity, descr, FTokenStart);
  834. end;
  835. procedure TXMLTextReader.DoErrorPos(Severity: TErrorSeverity; const descr: string;
  836. const args: array of const; const ErrPos: TLocation);
  837. begin
  838. DoErrorPos(Severity, Format(descr, args), ErrPos);
  839. end;
  840. procedure TXMLTextReader.DoErrorPos(Severity: TErrorSeverity; const descr: string; const ErrPos: TLocation);
  841. var
  842. E: EXMLReadError;
  843. srcuri: XMLString;
  844. begin
  845. if Assigned(FSource) then
  846. begin
  847. srcuri := FSource.FSourceURI;
  848. if (srcuri = '') and Assigned(FSource.FEntity) then
  849. srcuri := FSource.FEntity.FURI;
  850. E := EXMLReadError.Create(severity, descr, ErrPos.Line, ErrPos.LinePos, srcuri);
  851. end
  852. else
  853. E := EXMLReadError.Create(descr);
  854. CallErrorHandler(E);
  855. // No 'finally'! If user handler raises exception, control should not get here
  856. // and the exception will be freed in CallErrorHandler (below)
  857. E.Free;
  858. end;
  859. procedure TXMLTextReader.CheckMaxChars(ToAdd: Cardinal);
  860. var
  861. src: TXMLCharSource;
  862. total: Cardinal;
  863. begin
  864. Inc(FSource.FCharCount, ToAdd);
  865. if FMaxChars = 0 then
  866. Exit;
  867. src := FSource;
  868. total := 0;
  869. repeat
  870. Inc(total, src.FCharCount);
  871. if total > FMaxChars then
  872. FatalError('Exceeded character count limit');
  873. src := src.FParent;
  874. until src = nil;
  875. end;
  876. procedure TXMLTextReader.CallErrorHandler(E: EXMLReadError);
  877. begin
  878. try
  879. if Assigned(FOnError) then
  880. FOnError(E);
  881. if E.Severity = esFatal then
  882. raise E;
  883. except
  884. FReadState := rsError;
  885. if ExceptObject <> E then
  886. E.Free;
  887. raise;
  888. end;
  889. end;
  890. function TXMLTextReader.SkipWhitespace(PercentAloneIsOk: Boolean): Boolean;
  891. begin
  892. Result := False;
  893. repeat
  894. Result := SkipS or Result;
  895. if FSource.FBuf >= FSource.FBufEnd then
  896. begin
  897. Result := True; // report whitespace upon exiting the PE
  898. if not ContextPop then
  899. Break;
  900. end
  901. else if FSource.FBuf^ = '%' then
  902. begin
  903. if (FState <> rsDTD) then
  904. Break;
  905. // This is the only case where look-ahead is needed
  906. if FSource.FBuf > FSource.FBufEnd-2 then
  907. FSource.Reload;
  908. if (not PercentAloneIsOk) or (Byte(FSource.FBuf[1]) in NamingBitmap[NamePages[hi(Word(FSource.FBuf[1]))]]) or
  909. ((FSource.FBuf[1] >= #$D800) and (FSource.FBuf[1] <= #$DB7F)) then
  910. begin
  911. StartPE;
  912. Result := True; // report whitespace upon entering the PE
  913. end
  914. else Break;
  915. end
  916. else
  917. Break;
  918. until False;
  919. end;
  920. procedure TXMLTextReader.ExpectWhitespace;
  921. begin
  922. if not SkipWhitespace then
  923. FatalError('Expected whitespace');
  924. end;
  925. function TXMLTextReader.SkipS(Required: Boolean): Boolean;
  926. var
  927. p: PWideChar;
  928. begin
  929. Result := False;
  930. repeat
  931. p := FSource.FBuf;
  932. repeat
  933. if (p^ = #10) or (p^ = #13) or (FXML11 and ((p^ = #$85) or (p^ = #$2028))) then
  934. begin
  935. FSource.FBuf := p;
  936. FSource.NewLine;
  937. p := FSource.FBuf;
  938. end
  939. else if (p^ <> #32) and (p^ <> #9) then
  940. Break;
  941. Inc(p);
  942. Result := True;
  943. until False;
  944. FSource.FBuf := p;
  945. until (FSource.FBuf < FSource.FBufEnd) or (not FSource.Reload);
  946. if (not Result) and Required then
  947. FatalError('Expected whitespace');
  948. end;
  949. procedure TXMLTextReader.ExpectString(const s: String);
  950. var
  951. I: Integer;
  952. begin
  953. for I := 1 to Length(s) do
  954. begin
  955. if FSource.FBuf^ <> WideChar(ord(s[i])) then
  956. FatalError('Expected "%s"', [s], i-1);
  957. FSource.NextChar;
  958. end;
  959. end;
  960. function TXMLTextReader.CheckForChar(c: WideChar): Boolean;
  961. begin
  962. Result := (FSource.FBuf^ = c);
  963. if Result then
  964. begin
  965. Inc(FSource.FBuf);
  966. if FSource.FBuf >= FSource.FBufEnd then
  967. FSource.Reload;
  968. end;
  969. end;
  970. procedure TXMLTextReader.SkipQuote(out Delim: WideChar; required: Boolean);
  971. begin
  972. Delim := #0;
  973. if (FSource.FBuf^ = '''') or (FSource.FBuf^ = '"') then
  974. begin
  975. Delim := FSource.FBuf^;
  976. FSource.NextChar; // skip quote
  977. StoreLocation(FTokenStart);
  978. end
  979. else if required then
  980. FatalError('Expected single or double quote');
  981. end;
  982. const
  983. PrefixDefault: array[0..4] of WideChar = ('x','m','l','n','s');
  984. procedure TXMLTextReader.SetOptions(AValue: TXMLReaderSettings);
  985. begin
  986. FValidate := AValue.Validate;
  987. FPreserveWhitespace := AValue.PreserveWhitespace;
  988. FExpandEntities := AValue.ExpandEntities;
  989. FCDSectionsAsText := AValue.CDSectionsAsText;
  990. FIgnoreComments := AValue.IgnoreComments;
  991. FNamespaces := AValue.Namespaces;
  992. FDisallowDoctype := AValue.DisallowDoctype;
  993. FCanonical := AValue.CanonicalForm;
  994. FMaxChars := AValue.MaxChars;
  995. FOnError := AValue.OnError;
  996. SetFragmentMode(AValue.ConformanceLevel = clFragment);
  997. end;
  998. procedure TXMLTextReader.SetFragmentMode(aValue: Boolean);
  999. begin
  1000. FFragmentMode := aValue;
  1001. if FFragmentMode then
  1002. FState := rsRoot
  1003. else
  1004. FState := rsProlog;
  1005. end;
  1006. constructor TXMLTextReader.Create(ASrc: TXMLInputSource; ASettings: TXMLReaderSettings);
  1007. var
  1008. InputSrc: TXMLCharSource;
  1009. begin
  1010. SetNametable(ASettings.NameTable);
  1011. SetOptions(ASettings);
  1012. ConvertSource(ASrc, InputSrc);
  1013. FSource := InputSrc;
  1014. FSource.FReader := Self;
  1015. end;
  1016. constructor TXMLTextReader.Create(const uri: XMLString; ASettings: TXMLReaderSettings);
  1017. begin
  1018. SetNametable(ASettings.NameTable);
  1019. SetOptions(ASettings);
  1020. if ResolveResource(uri, '', '', FSource) then
  1021. FSource.FReader := Self
  1022. else
  1023. DoErrorPos(esFatal, 'The specified URI could not be resolved', NullLocation);
  1024. end;
  1025. procedure TXMLTextReader.SetNametable(ANameTable: THashTable);
  1026. begin
  1027. if ANameTable = nil then
  1028. begin
  1029. ANameTable := THashTable.Create(256, True);
  1030. FNameTableOwned := True;
  1031. end;
  1032. FNameTable := ANameTable;
  1033. end;
  1034. constructor TXMLTextReader.Create(var AFile: Text; ANameTable: THashTable);
  1035. begin
  1036. SetNametable(ANameTable);
  1037. FSource := TXMLFileInputSource.Create(AFile);
  1038. FSource.FReader := Self;
  1039. end;
  1040. constructor TXMLTextReader.Create(AStream: TStream; const ABaseUri: XMLString; ANameTable: THashTable);
  1041. begin
  1042. SetNametable(ANameTable);
  1043. FSource := TXMLStreamInputSource.Create(AStream, False);
  1044. FSource.SourceURI := ABaseUri;
  1045. FSource.FReader := Self;
  1046. end;
  1047. constructor TXMLTextReader.Create(AStream: TStream; const ABaseUri: XMLString; ASettings: TXMLReaderSettings); overload;
  1048. begin
  1049. SetNametable(ASettings.NameTable);
  1050. SetOptions(ASettings);
  1051. FSource := TXMLStreamInputSource.Create(AStream, False);
  1052. FSource.SourceURI := ABaseUri;
  1053. FSource.FReader := Self;
  1054. end;
  1055. constructor TXMLTextReader.Create(ASrc: TXMLCharSource; AParent: TXMLTextReader);
  1056. begin
  1057. FNameTable := AParent.FNameTable;
  1058. FSource := ASrc;
  1059. FSource.FReader := Self;
  1060. FValidate := AParent.FValidate;
  1061. FPreserveWhitespace := AParent.FPreserveWhitespace;
  1062. FExpandEntities := AParent.FExpandEntities;
  1063. FCDSectionsAsText := AParent.FCDSectionsAsText;
  1064. FIgnoreComments := AParent.FIgnoreComments;
  1065. FNamespaces := AParent.FNamespaces;
  1066. FDisallowDoctype := AParent.FDisallowDoctype;
  1067. FCanonical := AParent.FCanonical;
  1068. FMaxChars := AParent.FMaxChars;
  1069. FOnError := AParent.FOnError;
  1070. end;
  1071. destructor TXMLTextReader.Destroy;
  1072. var
  1073. cur: PNodeData;
  1074. begin
  1075. if FAttrCleanupFlag then
  1076. CleanupAttributes;
  1077. while Assigned(FFreeAttrChunk) do
  1078. begin
  1079. cur := FFreeAttrChunk;
  1080. FFreeAttrChunk := cur^.FNext;
  1081. Dispose(cur);
  1082. end;
  1083. if Assigned(FEntityValue.Buffer) then
  1084. FreeMem(FEntityValue.Buffer);
  1085. FreeMem(FName.Buffer);
  1086. FreeMem(FValue.Buffer);
  1087. if Assigned(FSource) then
  1088. while ContextPop(True) do; // clean input stack
  1089. FSource.Free;
  1090. FPEMap.Free;
  1091. ClearForwardRefs;
  1092. FNsAttHash.Free;
  1093. FNSHelper.Free;
  1094. FDocType.Release;
  1095. FIDMap.Free;
  1096. FForwardRefs.Free;
  1097. if FNameTableOwned then
  1098. FNameTable.Free;
  1099. inherited Destroy;
  1100. end;
  1101. procedure TXMLTextReader.AfterConstruction;
  1102. begin
  1103. BufAllocate(FName, 128);
  1104. BufAllocate(FValue, 512);
  1105. SetLength(FNodeStack, 16);
  1106. SetLength(FValidators, 16);
  1107. FNesting := 0;
  1108. FValidatorNesting := 0;
  1109. FCurrNode := @FNodeStack[0];
  1110. FCurrAttrIndex := -1;
  1111. FEmptyStr := FNameTable.FindOrAdd('');
  1112. if FNamespaces then
  1113. begin
  1114. FNSHelper := TNSSupport.Create(FNameTable);
  1115. FNsAttHash := TDblHashArray.Create;
  1116. FStdPrefix_xml := FNSHelper.GetPrefix(@PrefixDefault, 3);
  1117. FStdPrefix_xmlns := FNSHelper.GetPrefix(@PrefixDefault, 5);
  1118. FStdUri_xmlns := FNameTable.FindOrAdd(stduri_xmlns);
  1119. FStdUri_xml := FNameTable.FindOrAdd(stduri_xml);
  1120. end;
  1121. end;
  1122. function TXMLTextReader.CheckName(aFlags: TCheckNameFlags): Boolean;
  1123. var
  1124. p: PWideChar;
  1125. NameStartFlag: Boolean;
  1126. begin
  1127. p := FSource.FBuf;
  1128. FName.Length := 0;
  1129. FColonPos := -1;
  1130. NameStartFlag := not (cnToken in aFlags);
  1131. repeat
  1132. if NameStartFlag then
  1133. begin
  1134. if (Byte(p^) in NamingBitmap[NamePages[hi(Word(p^))]]) or
  1135. ((p^ = ':') and (not FNamespaces)) then
  1136. Inc(p)
  1137. else if ((p^ >= #$D800) and (p^ <= #$DB7F) and
  1138. (p[1] >= #$DC00) and (p[1] <= #$DFFF)) then
  1139. Inc(p, 2)
  1140. else
  1141. begin
  1142. // here we come either when first char of name is bad (it may be a colon),
  1143. // or when a colon is not followed by a valid NameStartChar
  1144. FSource.FBuf := p;
  1145. Result := False;
  1146. Break;
  1147. end;
  1148. NameStartFlag := False;
  1149. end;
  1150. repeat
  1151. if (Byte(p^) in NamingBitmap[NamePages[$100+hi(Word(p^))]]) or
  1152. ((p^= ':') and ((cnToken in aFlags) or not FNamespaces)) then
  1153. Inc(p)
  1154. else if ((p^ >= #$D800) and (p^ <= #$DB7F) and
  1155. (p[1] >= #$DC00) and (p[1] <= #$DFFF)) then
  1156. Inc(p,2)
  1157. else
  1158. Break;
  1159. until False;
  1160. if (p^ = ':') and (FColonPos < 0) then
  1161. begin
  1162. FColonPos := p-FSource.FBuf+FName.Length;
  1163. NameStartFlag := True;
  1164. Inc(p);
  1165. if p < FSource.FBufEnd then Continue;
  1166. end;
  1167. BufAppendChunk(FName, FSource.FBuf, p);
  1168. Result := (FName.Length > 0);
  1169. FSource.FBuf := p;
  1170. if (p < FSource.FBufEnd) or not FSource.Reload then
  1171. Break;
  1172. p := FSource.FBuf;
  1173. until False;
  1174. if not (Result or (cnOptional in aFlags)) then
  1175. RaiseNameNotFound;
  1176. end;
  1177. procedure TXMLTextReader.CheckNCName;
  1178. begin
  1179. if FNamespaces and (FColonPos <> -1) then
  1180. FatalError('Names of entities, notations and processing instructions may not contain colons', FName.Length);
  1181. end;
  1182. procedure TXMLTextReader.RaiseNameNotFound;
  1183. begin
  1184. if FColonPos <> -1 then
  1185. FatalError('Bad QName syntax, local part is missing')
  1186. else
  1187. // Coming at no cost, this allows more user-friendly error messages
  1188. with FSource do
  1189. if (FBuf^ = #32) or (FBuf^ = #10) or (FBuf^ = #9) or (FBuf^ = #13) then
  1190. FatalError('Whitespace is not allowed here')
  1191. else
  1192. FatalError('Name starts with invalid character');
  1193. end;
  1194. function TXMLTextReader.ResolvePredefined: Boolean;
  1195. var
  1196. wc: WideChar;
  1197. begin
  1198. Result := False;
  1199. with FName do
  1200. begin
  1201. if (Length = 2) and (Buffer[1] = 't') then
  1202. begin
  1203. if Buffer[0] = 'l' then
  1204. wc := '<'
  1205. else if Buffer[0] = 'g' then
  1206. wc := '>'
  1207. else Exit;
  1208. end
  1209. else if Buffer[0] = 'a' then
  1210. begin
  1211. if (Length = 3) and (Buffer[1] = 'm') and (Buffer[2] = 'p') then
  1212. wc := '&'
  1213. else if (Length = 4) and (Buffer[1] = 'p') and (Buffer[2] = 'o') and
  1214. (Buffer[3] = 's') then
  1215. wc := ''''
  1216. else Exit;
  1217. end
  1218. else if (Length = 4) and (Buffer[0] = 'q') and (Buffer[1] = 'u') and
  1219. (Buffer[2] = 'o') and (Buffer[3] ='t') then
  1220. wc := '"'
  1221. else
  1222. Exit;
  1223. end; // with
  1224. BufAppend(FValue, wc);
  1225. Result := True;
  1226. end;
  1227. function TXMLTextReader.ParseRef(var ToFill: TWideCharBuf): Boolean; // [67]
  1228. var
  1229. Code: Integer;
  1230. begin
  1231. FSource.NextChar; // skip '&'
  1232. Result := CheckForChar('#');
  1233. if Result then
  1234. begin
  1235. Code := 0;
  1236. if CheckForChar('x') then
  1237. repeat
  1238. case FSource.FBuf^ of
  1239. '0'..'9': Code := Code * 16 + Ord(FSource.FBuf^) - Ord('0');
  1240. 'a'..'f': Code := Code * 16 + Ord(FSource.FBuf^) - (Ord('a') - 10);
  1241. 'A'..'F': Code := Code * 16 + Ord(FSource.FBuf^) - (Ord('A') - 10);
  1242. else
  1243. Break;
  1244. end;
  1245. FSource.NextChar;
  1246. until Code > $10FFFF
  1247. else
  1248. repeat
  1249. case FSource.FBuf^ of
  1250. '0'..'9': Code := Code * 10 + Ord(FSource.FBuf^) - Ord('0');
  1251. else
  1252. Break;
  1253. end;
  1254. FSource.NextChar;
  1255. until Code > $10FFFF;
  1256. case Code of
  1257. $01..$08, $0B..$0C, $0E..$1F:
  1258. if FXML11 then
  1259. BufAppend(ToFill, WideChar(Code))
  1260. else
  1261. FatalError('Invalid character reference');
  1262. $09, $0A, $0D, $20..$D7FF, $E000..$FFFD:
  1263. BufAppend(ToFill, WideChar(Code));
  1264. $10000..$10FFFF:
  1265. begin
  1266. BufAppend(ToFill, WideChar($D7C0 + (Code shr 10)));
  1267. BufAppend(ToFill, WideChar($DC00 xor (Code and $3FF)));
  1268. end;
  1269. else
  1270. FatalError('Invalid character reference');
  1271. end;
  1272. end
  1273. else CheckName;
  1274. ExpectChar(';');
  1275. end;
  1276. const
  1277. AttrDelims: TSetOfChar = [#0, '<', '&', '''', '"', #9, #10, #13];
  1278. GT_Delim: TSetOfChar = [#0, '>'];
  1279. { Parse attribute literal, producing plain string value in AttrData.FValueStr.
  1280. If entity references are encountered and FExpandEntities=False, also builds
  1281. a node chain starting from AttrData.FNext. Node chain is built only for the
  1282. first level. If NonCDATA=True, additionally normalizes whitespace in string value. }
  1283. procedure TXMLTextReader.ExpectAttValue(AttrData: PNodeData; NonCDATA: Boolean);
  1284. var
  1285. wc: WideChar;
  1286. Delim: WideChar;
  1287. ent: TEntityDecl;
  1288. start: TObject;
  1289. curr: PNodeData;
  1290. StartPos: Integer;
  1291. StartLoc: TLocation;
  1292. entName: PHashItem;
  1293. begin
  1294. SkipQuote(Delim);
  1295. AttrData^.FLoc2 := FTokenStart;
  1296. StartLoc := FTokenStart;
  1297. curr := AttrData;
  1298. FValue.Length := 0;
  1299. StartPos := 0;
  1300. start := FSource.FEntity;
  1301. repeat
  1302. wc := FSource.SkipUntil(FValue, AttrDelims);
  1303. if wc = '<' then
  1304. FatalError('Character ''<'' is not allowed in attribute value')
  1305. else if wc = '&' then
  1306. begin
  1307. if ParseRef(FValue) or ResolvePredefined then
  1308. Continue;
  1309. entName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  1310. ent := EntityCheck(True);
  1311. if ((ent = nil) or (not FExpandEntities)) and (FSource.FEntity = start) then
  1312. begin
  1313. if FValue.Length > StartPos then
  1314. begin
  1315. AllocAttributeValueChunk(curr, StartPos);
  1316. curr^.FLoc := StartLoc;
  1317. end;
  1318. AllocAttributeValueChunk(curr, FValue.Length);
  1319. curr^.FNodeType := ntEntityReference;
  1320. curr^.FQName := entName;
  1321. StoreLocation(StartLoc);
  1322. curr^.FLoc := StartLoc;
  1323. Dec(curr^.FLoc.LinePos, FName.Length+1);
  1324. end;
  1325. StartPos := FValue.Length;
  1326. if Assigned(ent) then
  1327. ContextPush(ent);
  1328. end
  1329. else if wc <> #0 then
  1330. begin
  1331. FSource.NextChar;
  1332. if (wc = Delim) and (FSource.FEntity = start) then
  1333. Break;
  1334. if (wc = #10) or (wc = #9) or (wc = #13) then
  1335. wc := #32;
  1336. BufAppend(FValue, wc);
  1337. end
  1338. else
  1339. begin
  1340. if (FSource.FEntity = start) or not ContextPop then // #0
  1341. FatalError('Literal has no closing quote', -1);
  1342. StartPos := FValue.Length;
  1343. end;
  1344. until False;
  1345. if Assigned(attrData^.FNext) then
  1346. begin
  1347. FAttrCleanupFlag := True;
  1348. if FValue.Length > StartPos then
  1349. begin
  1350. AllocAttributeValueChunk(curr, StartPos);
  1351. curr^.FLoc := StartLoc;
  1352. end;
  1353. end;
  1354. if nonCDATA then
  1355. BufNormalize(FValue, attrData^.FDenormalized)
  1356. else
  1357. attrData^.FDenormalized := False;
  1358. SetString(attrData^.FValueStr, FValue.Buffer, FValue.Length);
  1359. end;
  1360. const
  1361. PrefixChar: array[Boolean] of string = ('', '%');
  1362. procedure TXMLTextReader.EntityToSource(AEntity: TEntityDecl; out Src: TXMLCharSource);
  1363. begin
  1364. if AEntity.FOnStack then
  1365. FatalError('Entity ''%s%s'' recursively references itself', [PrefixChar[AEntity.FIsPE], AEntity.FName]);
  1366. if (AEntity.FSystemID <> '') and not AEntity.FPrefetched then
  1367. begin
  1368. if not ResolveResource(AEntity.FSystemID, AEntity.FPublicID, AEntity.FURI, Src) then
  1369. begin
  1370. // TODO: a detailed message like SysErrorMessage(GetLastError) would be great here
  1371. ValidationError('Unable to resolve external entity ''%s''', [AEntity.FName]);
  1372. Src := nil;
  1373. Exit;
  1374. end;
  1375. end
  1376. else
  1377. begin
  1378. Src := TXMLCharSource.Create(AEntity.FReplacementText);
  1379. Src.FLineNo := AEntity.FStartLocation.Line;
  1380. Src.LFPos := Src.FBuf - AEntity.FStartLocation.LinePos;
  1381. // needed in case of prefetched external PE
  1382. if AEntity.FSystemID <> '' then
  1383. Src.SourceURI := AEntity.FURI;
  1384. end;
  1385. AEntity.FOnStack := True;
  1386. Src.FEntity := AEntity;
  1387. end;
  1388. function TXMLTextReader.ContextPush(AEntity: TEntityDecl; DummySource: Boolean): Boolean;
  1389. var
  1390. Src: TXMLCharSource;
  1391. begin
  1392. Src := nil;
  1393. if Assigned(AEntity) then
  1394. EntityToSource(AEntity, Src);
  1395. if (Src = nil) and DummySource then
  1396. begin
  1397. Src := TXMLCharSource.Create('');
  1398. if FExpandEntities then
  1399. Src.Kind := skManualPop;
  1400. end;
  1401. Result := Assigned(Src);
  1402. if Result then
  1403. begin
  1404. SetSource(Src);
  1405. Src.Initialize;
  1406. end;
  1407. end;
  1408. function TXMLTextReader.ContextPop(Forced: Boolean): Boolean;
  1409. var
  1410. Src: TXMLCharSource;
  1411. Error: Boolean;
  1412. begin
  1413. Result := Assigned(FSource.FParent) and (Forced or (FSource.Kind = skNone));
  1414. if Result then
  1415. begin
  1416. Src := FSource.FParent;
  1417. Error := False;
  1418. if Assigned(FSource.FEntity) then
  1419. begin
  1420. FSource.FEntity.FOnStack := False;
  1421. FSource.FEntity.FCharCount := FSource.FCharCount;
  1422. // [28a] PE that was started between MarkupDecls may not end inside MarkupDecl
  1423. Error := FSource.FEntity.FBetweenDecls and FInsideDecl;
  1424. end;
  1425. FSource.Free;
  1426. FSource := Src;
  1427. // correct position of this error is after PE reference
  1428. if Error then
  1429. FatalError('Parameter entities must be properly nested');
  1430. end;
  1431. end;
  1432. function TXMLTextReader.EntityCheck(NoExternals: Boolean): TEntityDecl;
  1433. var
  1434. RefName: XMLString;
  1435. cnt: Integer;
  1436. begin
  1437. Result := nil;
  1438. SetString(RefName, FName.Buffer, FName.Length);
  1439. cnt := FName.Length+2;
  1440. if Assigned(FDocType) then
  1441. Result := FDocType.Entities.Get(FName.Buffer, FName.Length) as TEntityDecl;
  1442. if Result = nil then
  1443. begin
  1444. if FStandalone or (FDocType = nil) or not (FHavePERefs or (FDocType.FSystemID <> '')) then
  1445. FatalError('Reference to undefined entity ''%s''', [RefName], cnt)
  1446. else
  1447. ValidationError('Undefined entity ''%s'' referenced', [RefName], cnt);
  1448. Exit;
  1449. end;
  1450. if FStandalone and Result.ExternallyDeclared then
  1451. FatalError('Standalone constraint violation', cnt);
  1452. if Result.FNotationName <> '' then
  1453. FatalError('Reference to unparsed entity ''%s''', [RefName], cnt);
  1454. if NoExternals and (Result.FSystemID <> '') then
  1455. FatalError('External entity reference is not allowed in attribute value', cnt);
  1456. if not Result.FResolved then
  1457. if Assigned(FOnEntity) then
  1458. FOnEntity(Self, Result);
  1459. // at this point we know the charcount of the entity being included
  1460. if Result.FCharCount >= cnt then
  1461. CheckMaxChars(Result.FCharCount - cnt);
  1462. end;
  1463. procedure TXMLTextReader.StartPE;
  1464. var
  1465. PEnt: TEntityDecl;
  1466. begin
  1467. FSource.NextChar; // skip '%'
  1468. CheckName;
  1469. ExpectChar(';');
  1470. if (FSource.Kind = skInternalSubset) and FInsideDecl then
  1471. FatalError('Parameter entity references cannot appear inside markup declarations in internal subset', FName.Length+2);
  1472. PEnt := nil;
  1473. if Assigned(FPEMap) then
  1474. PEnt := FPEMap.Get(FName.Buffer, FName.Length) as TEntityDecl;
  1475. if PEnt = nil then
  1476. begin
  1477. ValidationErrorWithName('Undefined parameter entity ''%s'' referenced', FName.Length+2);
  1478. // cease processing declarations, unless document is standalone.
  1479. FDTDProcessed := FStandalone;
  1480. Exit;
  1481. end;
  1482. { cache an external PE so it's only fetched once }
  1483. if (PEnt.FSystemID <> '') and (not PEnt.FPrefetched) and (not PrefetchEntity(PEnt)) then
  1484. begin
  1485. FDTDProcessed := FStandalone;
  1486. Exit;
  1487. end;
  1488. CheckMaxChars(PEnt.FCharCount);
  1489. PEnt.FBetweenDecls := not FInsideDecl;
  1490. ContextPush(PEnt);
  1491. FHavePERefs := True;
  1492. end;
  1493. function TXMLTextReader.PrefetchEntity(AEntity: TEntityDecl): Boolean;
  1494. begin
  1495. Result := ContextPush(AEntity);
  1496. if Result then
  1497. try
  1498. FValue.Length := 0;
  1499. FSource.SkipUntil(FValue, [#0]);
  1500. SetString(AEntity.FReplacementText, FValue.Buffer, FValue.Length);
  1501. AEntity.FCharCount := FValue.Length;
  1502. AEntity.FStartLocation.Line := 1;
  1503. AEntity.FStartLocation.LinePos := 1;
  1504. AEntity.FURI := FSource.SourceURI; // replace base URI with absolute one
  1505. finally
  1506. ContextPop;
  1507. AEntity.FPrefetched := True;
  1508. FValue.Length := 0;
  1509. end;
  1510. end;
  1511. const
  1512. LiteralDelims: array[TLiteralType] of TSetOfChar = (
  1513. [#0, '''', '"'], // ltPlain
  1514. [#0, '''', '"', #13, #10], // ltPubid
  1515. [#0, '%', '&', '''', '"'] // ltEntity
  1516. );
  1517. function TXMLTextReader.ParseLiteral(var ToFill: TWideCharBuf; aType: TLiteralType;
  1518. Required: Boolean): Boolean;
  1519. var
  1520. start: TObject;
  1521. wc, Delim: WideChar;
  1522. dummy: Boolean;
  1523. begin
  1524. SkipQuote(Delim, Required);
  1525. Result := (Delim <> #0);
  1526. if not Result then
  1527. Exit;
  1528. ToFill.Length := 0;
  1529. start := FSource.FEntity;
  1530. repeat
  1531. wc := FSource.SkipUntil(ToFill, LiteralDelims[aType]);
  1532. if wc = '%' then { ltEntity only }
  1533. StartPE
  1534. else if wc = '&' then { ltEntity }
  1535. begin
  1536. if ParseRef(ToFill) then // charRefs always expanded
  1537. Continue;
  1538. BufAppend(ToFill, '&');
  1539. BufAppendChunk(ToFill, FName.Buffer, FName.Buffer + FName.Length);
  1540. BufAppend(ToFill, ';');
  1541. end
  1542. else if wc <> #0 then
  1543. begin
  1544. FSource.NextChar;
  1545. if (wc = #10) or (wc = #13) then
  1546. wc := #32
  1547. // terminating delimiter must be in the same context as the starting one
  1548. else if (wc = Delim) and (start = FSource.FEntity) then
  1549. Break;
  1550. BufAppend(ToFill, wc);
  1551. end
  1552. else if (FSource.FEntity = start) or not ContextPop then // #0
  1553. FatalError('Literal has no closing quote', -1);
  1554. until False;
  1555. if aType = ltPubid then
  1556. BufNormalize(ToFill, dummy);
  1557. end;
  1558. function TXMLTextReader.SkipUntilSeq(const Delim: TSetOfChar; c1: WideChar): Boolean;
  1559. var
  1560. wc: WideChar;
  1561. begin
  1562. Result := False;
  1563. StoreLocation(FTokenStart);
  1564. repeat
  1565. wc := FSource.SkipUntil(FValue, Delim);
  1566. if wc <> #0 then
  1567. begin
  1568. FSource.NextChar;
  1569. if (FValue.Length > 0) then
  1570. begin
  1571. if (FValue.Buffer[FValue.Length-1] = c1) then
  1572. begin
  1573. Dec(FValue.Length);
  1574. Result := True;
  1575. Exit;
  1576. end;
  1577. end;
  1578. BufAppend(FValue, wc);
  1579. end;
  1580. until wc = #0;
  1581. end;
  1582. procedure TXMLTextReader.ParseComment(discard: Boolean); // [15]
  1583. var
  1584. SaveLength: Integer;
  1585. begin
  1586. ExpectString('--');
  1587. SaveLength := FValue.Length;
  1588. if not SkipUntilSeq([#0, '-'], '-') then
  1589. FatalError('Unterminated comment', -1);
  1590. ExpectChar('>');
  1591. if not discard then
  1592. begin
  1593. FCurrNode := @FNodeStack[FNesting];
  1594. FCurrNode^.FNodeType := ntComment;
  1595. FCurrNode^.FQName := nil;
  1596. FCurrNode^.FValueStart := @FValue.Buffer[SaveLength];
  1597. FCurrNode^.FValueLength := FValue.Length-SaveLength;
  1598. end;
  1599. FValue.Length := SaveLength;
  1600. end;
  1601. procedure TXMLTextReader.ParsePI; // [16]
  1602. begin
  1603. FSource.NextChar; // skip '?'
  1604. CheckName;
  1605. CheckNCName;
  1606. with FName do
  1607. if (Length = 3) and
  1608. ((Buffer[0] = 'X') or (Buffer[0] = 'x')) and
  1609. ((Buffer[1] = 'M') or (Buffer[1] = 'm')) and
  1610. ((Buffer[2] = 'L') or (Buffer[2] = 'l')) then
  1611. begin
  1612. if not BufEquals(FName, 'xml') then
  1613. FatalError('''xml'' is a reserved word; it must be lowercase', FName.Length)
  1614. else
  1615. FatalError('XML declaration is not allowed here', FName.Length);
  1616. end;
  1617. if FSource.FBuf^ <> '?' then
  1618. SkipS(True);
  1619. FValue.Length := 0;
  1620. if not SkipUntilSeq(GT_Delim, '?') then
  1621. FatalError('Unterminated processing instruction', -1);
  1622. SetNodeInfoWithValue(ntProcessingInstruction,
  1623. FNameTable.FindOrAdd(FName.Buffer, FName.Length));
  1624. end;
  1625. const
  1626. vers: array[Boolean] of TXMLVersion = (xmlVersion10, xmlVersion11);
  1627. procedure TXMLTextReader.ParseXmlOrTextDecl(TextDecl: Boolean);
  1628. var
  1629. Delim: WideChar;
  1630. buf: array[0..31] of WideChar;
  1631. I: Integer;
  1632. begin
  1633. SkipS(True);
  1634. // [24] VersionInfo: optional in TextDecl, required in XmlDecl
  1635. if (not TextDecl) or (FSource.FBuf^ = 'v') then
  1636. begin
  1637. ExpectString('version');
  1638. ExpectEq;
  1639. SkipQuote(Delim);
  1640. { !! Definition "VersionNum ::= '1.' [0-9]+" per XML 1.0 Fifth Edition
  1641. implies that version literal can have unlimited length. }
  1642. I := 0;
  1643. while (I < 3) and (FSource.FBuf^ <> Delim) do
  1644. begin
  1645. buf[I] := FSource.FBuf^;
  1646. Inc(I);
  1647. FSource.NextChar;
  1648. end;
  1649. if (I <> 3) or (buf[0] <> '1') or (buf[1] <> '.') or
  1650. (buf[2] < '0') or (buf[2] > '9') then
  1651. FatalError('Illegal version number', -1);
  1652. ExpectChar(Delim);
  1653. FSource.FXMLVersion := vers[buf[2] = '1'];
  1654. if TextDecl and (FSource.FXMLVersion = xmlVersion11) and not FXML11 then
  1655. FatalError('XML 1.0 document cannot invoke XML 1.1 entities', -1);
  1656. if TextDecl or (FSource.FBuf^ <> '?') then
  1657. SkipS(True);
  1658. end;
  1659. // [80] EncodingDecl: required in TextDecl, optional in XmlDecl
  1660. if TextDecl or (FSource.FBuf^ = 'e') then
  1661. begin
  1662. ExpectString('encoding');
  1663. ExpectEq;
  1664. SkipQuote(Delim);
  1665. I := 0;
  1666. while (I < 30) and (FSource.FBuf^ <> Delim) and (FSource.FBuf^ < #127) and
  1667. ((Char(ord(FSource.FBuf^)) in ['A'..'Z', 'a'..'z']) or
  1668. ((I > 0) and (Char(ord(FSource.FBuf^)) in ['0'..'9', '.', '-', '_']))) do
  1669. begin
  1670. buf[I] := FSource.FBuf^;
  1671. Inc(I);
  1672. FSource.NextChar;
  1673. end;
  1674. if not CheckForChar(Delim) then
  1675. FatalError('Illegal encoding name', i);
  1676. SetString(FSource.FXMLEncoding, buf, i);
  1677. if not FSource.SetEncoding(FSource.FXMLEncoding) then // <-- Wide2Ansi conversion here
  1678. FatalError('Encoding ''%s'' is not supported', [FSource.FXMLEncoding], i+1);
  1679. if FSource.FBuf^ <> '?' then
  1680. SkipS(not TextDecl);
  1681. end;
  1682. // [32] SDDecl: forbidden in TextDecl, optional in XmlDecl
  1683. if (not TextDecl) and (FSource.FBuf^ = 's') then
  1684. begin
  1685. ExpectString('standalone');
  1686. ExpectEq;
  1687. SkipQuote(Delim);
  1688. if FSource.Matches('yes') then
  1689. FStandalone := True
  1690. else if not FSource.Matches('no') then
  1691. FatalError('Only "yes" or "no" are permitted as values of "standalone"', -1);
  1692. ExpectChar(Delim);
  1693. SkipS;
  1694. end;
  1695. ExpectString('?>');
  1696. { Switch to 1.1 rules only after declaration is parsed completely. This is to
  1697. ensure that NEL and LSEP within declaration are rejected (rmt-056, rmt-057) }
  1698. if FSource.FXMLVersion = xmlVersion11 then
  1699. FXML11 := True;
  1700. end;
  1701. procedure TXMLTextReader.DTDReloadHook;
  1702. var
  1703. p: PWideChar;
  1704. begin
  1705. { FSource converts CR, NEL and LSEP linebreaks to LF, and CR-NEL sequences to CR-LF.
  1706. We must further remove the CR chars and have only LF's left. }
  1707. p := FDTDStartPos;
  1708. while p < FSource.FBuf do
  1709. begin
  1710. while (p < FSource.FBuf) and (p^ <> #13) do
  1711. Inc(p);
  1712. BufAppendChunk(FIntSubset, FDTDStartPos, p);
  1713. if p^ = #13 then
  1714. Inc(p);
  1715. FDTDStartPos := p;
  1716. end;
  1717. FDTDStartPos := TXMLDecodingSource(FSource).FBufStart;
  1718. end;
  1719. procedure TXMLTextReader.ParseDoctypeDecl; // [28]
  1720. var
  1721. Src: TXMLCharSource;
  1722. DTDName: PHashItem;
  1723. Locs: array [0..2] of TLocation;
  1724. HasAtts: Boolean;
  1725. begin
  1726. if FState >= rsDTD then
  1727. FatalError('Markup declaration is not allowed here');
  1728. if FDisallowDoctype then
  1729. FatalError('Document type is prohibited by parser settings');
  1730. ExpectString('DOCTYPE');
  1731. SkipS(True);
  1732. FDocType := TDTDModel.Create(FNameTable);
  1733. FDTDProcessed := True; // assume success
  1734. FState := rsDTD;
  1735. CheckName;
  1736. SetString(FDocType.FName, FName.Buffer, FName.Length);
  1737. DTDName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  1738. SkipS(True);
  1739. StoreLocation(Locs[0]);
  1740. HasAtts := ParseExternalID(FDocType.FSystemID, FDocType.FPublicID, Locs[1], False);
  1741. if HasAtts then
  1742. Locs[2] := FTokenStart;
  1743. SkipS;
  1744. if CheckForChar('[') then
  1745. begin
  1746. BufAllocate(FIntSubset, 256);
  1747. FSource.Kind := skInternalSubset;
  1748. try
  1749. FDTDStartPos := FSource.FBuf;
  1750. ParseMarkupDecl;
  1751. DTDReloadHook; // fetch last chunk
  1752. SetString(FDocType.FInternalSubset, FIntSubset.Buffer, FIntSubset.Length);
  1753. finally
  1754. FreeMem(FIntSubset.Buffer);
  1755. FSource.Kind := skNone;
  1756. end;
  1757. ExpectChar(']');
  1758. SkipS;
  1759. end;
  1760. ExpectChar('>');
  1761. if (FDocType.FSystemID <> '') then
  1762. begin
  1763. if ResolveResource(FDocType.FSystemID, FDocType.FPublicID, FSource.SourceURI, Src) then
  1764. begin
  1765. SetSource(Src);
  1766. Src.Initialize;
  1767. try
  1768. Src.Kind := skManualPop;
  1769. ParseMarkupDecl;
  1770. finally
  1771. ContextPop(True);
  1772. end;
  1773. end
  1774. else
  1775. begin
  1776. ValidationError('Unable to resolve external DTD subset', []);
  1777. FDTDProcessed := FStandalone;
  1778. end;
  1779. end;
  1780. FState := rsAfterDTD;
  1781. FValue.Length := 0;
  1782. BufAppendString(FValue, FDocType.FInternalSubset);
  1783. SetNodeInfoWithValue(ntDocumentType, DTDName);
  1784. if HasAtts then
  1785. begin
  1786. if FDocType.FPublicID <> '' then
  1787. AddPseudoAttribute(FNameTable.FindOrAdd('PUBLIC'), FDocType.FPublicID, Locs[0], Locs[1]);
  1788. AddPseudoAttribute(FNameTable.FindOrAdd('SYSTEM'), FDocType.FSystemID, Locs[0], Locs[2]);
  1789. end;
  1790. end;
  1791. procedure TXMLTextReader.ExpectEq; // [25]
  1792. begin
  1793. if FSource.FBuf^ <> '=' then
  1794. SkipS;
  1795. if FSource.FBuf^ <> '=' then
  1796. FatalError('Expected "="');
  1797. FSource.NextChar;
  1798. SkipS;
  1799. end;
  1800. { DTD stuff }
  1801. procedure TXMLTextReader.CheckPENesting(aExpected: TObject);
  1802. begin
  1803. if FSource.FEntity <> aExpected then
  1804. ValidationError('Parameter entities must be properly nested', [], 0);
  1805. end;
  1806. function TXMLTextReader.ParseQuantity: TCPQuant;
  1807. begin
  1808. case FSource.FBuf^ of
  1809. '?': Result := cqZeroOrOnce;
  1810. '*': Result := cqZeroOrMore;
  1811. '+': Result := cqOnceOrMore;
  1812. else
  1813. Result := cqOnce;
  1814. Exit;
  1815. end;
  1816. FSource.NextChar;
  1817. end;
  1818. function TXMLTextReader.FindOrCreateElDef: TElementDecl;
  1819. var
  1820. p: PHashItem;
  1821. begin
  1822. CheckName;
  1823. p := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  1824. Result := TElementDecl(p^.Data);
  1825. if Result = nil then
  1826. begin
  1827. Result := TElementDecl.Create;
  1828. p^.Data := Result;
  1829. end;
  1830. end;
  1831. procedure TXMLTextReader.ExpectChoiceOrSeq(CP: TContentParticle; MustEndIn: TObject); // [49], [50]
  1832. var
  1833. Delim: WideChar;
  1834. CurrentCP: TContentParticle;
  1835. begin
  1836. Delim := #0;
  1837. repeat
  1838. CurrentCP := CP.Add;
  1839. SkipWhitespace;
  1840. if CheckForChar('(') then
  1841. ExpectChoiceOrSeq(CurrentCP, FSource.FEntity)
  1842. else
  1843. CurrentCP.Def := FindOrCreateElDef;
  1844. CurrentCP.CPQuant := ParseQuantity;
  1845. SkipWhitespace;
  1846. if FSource.FBuf^ = ')' then
  1847. Break;
  1848. if Delim = #0 then
  1849. begin
  1850. if (FSource.FBuf^ = '|') or (FSource.FBuf^ = ',') then
  1851. Delim := FSource.FBuf^
  1852. else
  1853. FatalError('Expected pipe or comma delimiter');
  1854. end
  1855. else
  1856. if FSource.FBuf^ <> Delim then
  1857. FatalError(Delim);
  1858. FSource.NextChar; // skip delimiter
  1859. until False;
  1860. CheckPENesting(MustEndIn);
  1861. FSource.NextChar;
  1862. if Delim = '|' then
  1863. CP.CPType := ctChoice
  1864. else
  1865. CP.CPType := ctSeq; // '(foo)' is a sequence!
  1866. end;
  1867. procedure TXMLTextReader.ParseElementDecl; // [45]
  1868. var
  1869. ElDef: TElementDecl;
  1870. CurrentEntity: TObject;
  1871. I: Integer;
  1872. CP: TContentParticle;
  1873. Typ: TElementContentType;
  1874. ExtDecl: Boolean;
  1875. begin
  1876. CP := nil;
  1877. Typ := ctUndeclared; // satisfy compiler
  1878. ExpectWhitespace;
  1879. ElDef := FindOrCreateElDef;
  1880. if ElDef.ContentType <> ctUndeclared then
  1881. ValidationErrorWithName('Duplicate declaration of element ''%s''', FName.Length);
  1882. ExtDecl := FSource.Kind <> skInternalSubset;
  1883. ExpectWhitespace;
  1884. if FSource.Matches('EMPTY') then
  1885. Typ := ctEmpty
  1886. else if FSource.Matches('ANY') then
  1887. Typ := ctAny
  1888. else if CheckForChar('(') then
  1889. begin
  1890. CP := TContentParticle.Create;
  1891. try
  1892. CurrentEntity := FSource.FEntity;
  1893. SkipWhitespace;
  1894. if FSource.Matches('#PCDATA') then // Mixed section [51]
  1895. begin
  1896. SkipWhitespace;
  1897. Typ := ctMixed;
  1898. while FSource.FBuf^ <> ')' do
  1899. begin
  1900. ExpectChar('|');
  1901. SkipWhitespace;
  1902. with CP.Add do
  1903. begin
  1904. Def := FindOrCreateElDef;
  1905. for I := CP.ChildCount-2 downto 0 do
  1906. if Def = CP.Children[I].Def then
  1907. ValidationError('Duplicate token in mixed section', [], FName.Length);
  1908. end;
  1909. SkipWhitespace;
  1910. end;
  1911. CheckPENesting(CurrentEntity);
  1912. FSource.NextChar;
  1913. if (not CheckForChar('*')) and (CP.ChildCount > 0) then
  1914. FatalError(WideChar('*'));
  1915. CP.CPQuant := cqZeroOrMore;
  1916. CP.CPType := ctChoice;
  1917. end
  1918. else // Children section [47]
  1919. begin
  1920. Typ := ctChildren;
  1921. ExpectChoiceOrSeq(CP, CurrentEntity);
  1922. CP.CPQuant := ParseQuantity;
  1923. end;
  1924. except
  1925. CP.Free;
  1926. raise;
  1927. end;
  1928. end
  1929. else
  1930. FatalError('Invalid content specification');
  1931. if FDTDProcessed and (ElDef.ContentType = ctUndeclared) then
  1932. begin
  1933. ElDef.ExternallyDeclared := ExtDecl;
  1934. ElDef.ContentType := Typ;
  1935. ElDef.RootCP := CP;
  1936. end
  1937. else
  1938. CP.Free;
  1939. end;
  1940. procedure TXMLTextReader.ParseNotationDecl; // [82]
  1941. var
  1942. NameStr, SysID, PubID: XMLString;
  1943. Notation: TNotationDecl;
  1944. Entry: PHashItem;
  1945. Src: TXMLCharSource;
  1946. dummy: TLocation;
  1947. begin
  1948. Src := FSource;
  1949. ExpectWhitespace;
  1950. CheckName;
  1951. CheckNCName;
  1952. SetString(NameStr, FName.Buffer, FName.Length);
  1953. ExpectWhitespace;
  1954. if not ParseExternalID(SysID, PubID, dummy, True) then
  1955. FatalError('Expected external or public ID');
  1956. if FDTDProcessed then
  1957. begin
  1958. Entry := FDocType.Notations.FindOrAdd(NameStr);
  1959. if Entry^.Data = nil then
  1960. begin
  1961. Notation := TNotationDecl.Create;
  1962. Notation.FName := NameStr;
  1963. Notation.FPublicID := PubID;
  1964. Notation.FSystemID := SysID;
  1965. Notation.FURI := Src.SourceURI;
  1966. Entry^.Data := Notation;
  1967. end
  1968. else
  1969. ValidationError('Duplicate notation declaration: ''%s''', [NameStr]);
  1970. end;
  1971. end;
  1972. const
  1973. AttrDataTypeNames: array[TAttrDataType] of XMLString = (
  1974. 'CDATA',
  1975. 'ID',
  1976. 'IDREF',
  1977. 'IDREFS',
  1978. 'ENTITY',
  1979. 'ENTITIES',
  1980. 'NMTOKEN',
  1981. 'NMTOKENS',
  1982. 'NOTATION'
  1983. );
  1984. procedure TXMLTextReader.ParseAttlistDecl; // [52]
  1985. var
  1986. ElDef: TElementDecl;
  1987. AttDef: TAttributeDef;
  1988. dt: TAttrDataType;
  1989. Found, DiscardIt: Boolean;
  1990. Offsets: array [Boolean] of Integer;
  1991. attrName: PHashItem;
  1992. begin
  1993. ExpectWhitespace;
  1994. ElDef := FindOrCreateElDef;
  1995. SkipWhitespace;
  1996. while FSource.FBuf^ <> '>' do
  1997. begin
  1998. CheckName;
  1999. ExpectWhitespace;
  2000. attrName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  2001. AttDef := TAttributeDef.Create(attrName, FColonPos);
  2002. try
  2003. AttDef.ExternallyDeclared := FSource.Kind <> skInternalSubset;
  2004. // In case of duplicate declaration of the same attribute, we must discard it,
  2005. // not modifying ElDef, and suppressing certain validation errors.
  2006. DiscardIt := (not FDTDProcessed) or Assigned(ElDef.GetAttrDef(attrName));
  2007. if CheckForChar('(') then // [59]
  2008. begin
  2009. AttDef.DataType := dtNmToken;
  2010. repeat
  2011. SkipWhitespace;
  2012. CheckName([cnToken]);
  2013. if not AttDef.AddEnumToken(FName.Buffer, FName.Length) then
  2014. ValidationError('Duplicate token in enumerated attibute declaration', [], FName.Length);
  2015. SkipWhitespace;
  2016. until not CheckForChar('|');
  2017. ExpectChar(')');
  2018. ExpectWhitespace;
  2019. end
  2020. else
  2021. begin
  2022. StoreLocation(FTokenStart);
  2023. // search topside-up so that e.g. NMTOKENS is matched before NMTOKEN
  2024. for dt := dtNotation downto dtCData do
  2025. begin
  2026. Found := FSource.Matches(AttrDataTypeNames[dt]);
  2027. if Found then
  2028. Break;
  2029. end;
  2030. if Found and SkipWhitespace then
  2031. begin
  2032. AttDef.DataType := dt;
  2033. if (dt = dtId) and not DiscardIt then
  2034. begin
  2035. if Assigned(ElDef.IDAttr) then
  2036. ValidationError('Only one attribute of type ID is allowed per element',[])
  2037. else
  2038. ElDef.IDAttr := AttDef;
  2039. end
  2040. else if dt = dtNotation then // no test cases for these ?!
  2041. begin
  2042. if not DiscardIt then
  2043. begin
  2044. if Assigned(ElDef.NotationAttr) then
  2045. ValidationError('Only one attribute of type NOTATION is allowed per element',[])
  2046. else
  2047. ElDef.NotationAttr := AttDef;
  2048. if ElDef.ContentType = ctEmpty then
  2049. ValidationError('NOTATION attributes are not allowed on EMPTY elements',[]);
  2050. end;
  2051. ExpectChar('(');
  2052. repeat
  2053. SkipWhitespace;
  2054. StoreLocation(FTokenStart);
  2055. CheckName;
  2056. CheckNCName;
  2057. if not AttDef.AddEnumToken(FName.Buffer, FName.Length) then
  2058. ValidationError('Duplicate token in NOTATION attribute declaration',[], FName.Length);
  2059. if (not DiscardIt) and FValidate and
  2060. (FDocType.Notations.Get(FName.Buffer,FName.Length)=nil) then
  2061. AddForwardRef(FName.Buffer, FName.Length);
  2062. SkipWhitespace;
  2063. until not CheckForChar('|');
  2064. ExpectChar(')');
  2065. ExpectWhitespace;
  2066. end;
  2067. end
  2068. else
  2069. begin
  2070. // don't report 'expected whitespace' if token does not match completely
  2071. Offsets[False] := 0;
  2072. Offsets[True] := Length(AttrDataTypeNames[dt]);
  2073. if Found and (FSource.FBuf^ < 'A') then
  2074. ExpectWhitespace
  2075. else
  2076. FatalError('Illegal attribute type for ''%s''', [attrName^.Key], Offsets[Found]);
  2077. end;
  2078. end;
  2079. StoreLocation(FTokenStart);
  2080. if FSource.Matches('#REQUIRED') then
  2081. AttDef.Default := adRequired
  2082. else if FSource.Matches('#IMPLIED') then
  2083. AttDef.Default := adImplied
  2084. else if FSource.Matches('#FIXED') then
  2085. begin
  2086. AttDef.Default := adFixed;
  2087. ExpectWhitespace;
  2088. end
  2089. else
  2090. AttDef.Default := adDefault;
  2091. if AttDef.Default in [adDefault, adFixed] then
  2092. begin
  2093. if AttDef.DataType = dtId then
  2094. ValidationError('An attribute of type ID cannot have a default value',[]);
  2095. // See comments to valid-sa-094: PE expansion should be disabled in AttDef.
  2096. ExpectAttValue(AttDef.Data, dt <> dtCDATA);
  2097. if not AttDef.ValidateSyntax(AttDef.Data^.FValueStr, FNamespaces) then
  2098. ValidationError('Default value for attribute ''%s'' has wrong syntax', [attrName^.Key]);
  2099. end;
  2100. if DiscardIt then
  2101. AttDef.Free
  2102. else
  2103. ElDef.AddAttrDef(AttDef);
  2104. except
  2105. AttDef.Free;
  2106. raise;
  2107. end;
  2108. SkipWhitespace;
  2109. end;
  2110. end;
  2111. procedure TXMLTextReader.ParseEntityDecl; // [70]
  2112. var
  2113. IsPE, Exists: Boolean;
  2114. Entity: TEntityDecl;
  2115. Map: THashTable;
  2116. Item: PHashItem;
  2117. dummy: TLocation;
  2118. begin
  2119. Entity := TEntityDecl.Create;
  2120. try
  2121. Entity.ExternallyDeclared := FSource.Kind <> skInternalSubset;
  2122. Entity.FURI := FSource.SourceURI;
  2123. if not SkipWhitespace(True) then
  2124. FatalError('Expected whitespace');
  2125. IsPE := CheckForChar('%');
  2126. if IsPE then // [72]
  2127. begin
  2128. ExpectWhitespace;
  2129. if FPEMap = nil then
  2130. FPEMap := THashTable.Create(64, True);
  2131. Map := FPEMap;
  2132. end
  2133. else
  2134. Map := FDocType.Entities;
  2135. Entity.FIsPE := IsPE;
  2136. CheckName;
  2137. CheckNCName;
  2138. Item := Map.FindOrAdd(FName.Buffer, FName.Length, Exists);
  2139. ExpectWhitespace;
  2140. if FEntityValue.Buffer = nil then
  2141. BufAllocate(FEntityValue, 256);
  2142. if ParseLiteral(FEntityValue, ltEntity, False) then
  2143. begin
  2144. SetString(Entity.FReplacementText, FEntityValue.Buffer, FEntityValue.Length);
  2145. Entity.FCharCount := FEntityValue.Length;
  2146. Entity.FStartLocation := FTokenStart;
  2147. end
  2148. else
  2149. begin
  2150. if not ParseExternalID(Entity.FSystemID, Entity.FPublicID, dummy, False) then
  2151. FatalError('Expected entity value or external ID');
  2152. if not IsPE then // [76]
  2153. begin
  2154. if FSource.FBuf^ <> '>' then
  2155. ExpectWhitespace;
  2156. if FSource.Matches('NDATA') then
  2157. begin
  2158. ExpectWhitespace;
  2159. StoreLocation(FTokenStart); { needed for AddForwardRef }
  2160. CheckName;
  2161. SetString(Entity.FNotationName, FName.Buffer, FName.Length);
  2162. if FValidate and (FDocType.Notations.Get(FName.Buffer, FName.Length)=nil) then
  2163. AddForwardRef(FName.Buffer, FName.Length);
  2164. end;
  2165. end;
  2166. end;
  2167. except
  2168. Entity.Free;
  2169. raise;
  2170. end;
  2171. // Repeated declarations of same entity are legal but must be ignored
  2172. if FDTDProcessed and not Exists then
  2173. begin
  2174. Item^.Data := Entity;
  2175. Entity.FName := Item^.Key;
  2176. end
  2177. else
  2178. Entity.Free;
  2179. end;
  2180. procedure TXMLTextReader.ParseIgnoreSection;
  2181. var
  2182. IgnoreLoc: TLocation;
  2183. IgnoreLevel: Integer;
  2184. wc: WideChar;
  2185. begin
  2186. StoreLocation(IgnoreLoc);
  2187. IgnoreLevel := 1;
  2188. repeat
  2189. FValue.Length := 0;
  2190. wc := FSource.SkipUntil(FValue, [#0, '<', ']']);
  2191. if FSource.Matches('<![') then
  2192. Inc(IgnoreLevel)
  2193. else if FSource.Matches(']]>') then
  2194. Dec(IgnoreLevel)
  2195. else if wc <> #0 then
  2196. FSource.NextChar
  2197. else // PE's aren't recognized in ignore section, cannot ContextPop()
  2198. DoErrorPos(esFatal, 'IGNORE section is not closed', IgnoreLoc);
  2199. until IgnoreLevel=0;
  2200. end;
  2201. procedure TXMLTextReader.ParseMarkupDecl; // [29]
  2202. var
  2203. IncludeLevel: Integer;
  2204. CurrentEntity: TObject;
  2205. IncludeLoc: TLocation;
  2206. CondType: (ctUnknown, ctInclude, ctIgnore);
  2207. begin
  2208. IncludeLevel := 0;
  2209. repeat
  2210. SkipWhitespace;
  2211. if (FSource.FBuf^ = ']') and (IncludeLevel > 0) then
  2212. begin
  2213. ExpectString(']]>');
  2214. Dec(IncludeLevel);
  2215. Continue;
  2216. end;
  2217. if not CheckForChar('<') then
  2218. Break;
  2219. CurrentEntity := FSource.FEntity;
  2220. if FSource.FBuf^ = '?' then
  2221. begin
  2222. ParsePI;
  2223. end
  2224. else
  2225. begin
  2226. ExpectChar('!');
  2227. if FSource.FBuf^ = '-' then
  2228. ParseComment(True)
  2229. else if CheckForChar('[') then
  2230. begin
  2231. if FSource.Kind = skInternalSubset then
  2232. FatalError('Conditional sections are not allowed in internal subset', 1);
  2233. SkipWhitespace;
  2234. CondType := ctUnknown; // satisfy compiler
  2235. if FSource.Matches('INCLUDE') then
  2236. CondType := ctInclude
  2237. else if FSource.Matches('IGNORE') then
  2238. CondType := ctIgnore
  2239. else
  2240. FatalError('Expected "INCLUDE" or "IGNORE"');
  2241. SkipWhitespace;
  2242. CheckPENesting(CurrentEntity);
  2243. ExpectChar('[');
  2244. if CondType = ctInclude then
  2245. begin
  2246. if IncludeLevel = 0 then
  2247. StoreLocation(IncludeLoc);
  2248. Inc(IncludeLevel);
  2249. end
  2250. else if CondType = ctIgnore then
  2251. ParseIgnoreSection;
  2252. end
  2253. else
  2254. begin
  2255. FInsideDecl := True;
  2256. if FSource.Matches('ELEMENT') then
  2257. ParseElementDecl
  2258. else if FSource.Matches('ENTITY') then
  2259. ParseEntityDecl
  2260. else if FSource.Matches('ATTLIST') then
  2261. ParseAttlistDecl
  2262. else if FSource.Matches('NOTATION') then
  2263. ParseNotationDecl
  2264. else
  2265. FatalError('Illegal markup declaration');
  2266. SkipWhitespace;
  2267. CheckPENesting(CurrentEntity);
  2268. ExpectChar('>');
  2269. FInsideDecl := False;
  2270. end;
  2271. end;
  2272. until False;
  2273. if IncludeLevel > 0 then
  2274. DoErrorPos(esFatal, 'INCLUDE section is not closed', IncludeLoc);
  2275. if FSource.FBuf < FSource.FBufEnd then
  2276. if (FSource.Kind <> skInternalSubset) or (FSource.FBuf^ <> ']') then
  2277. FatalError('Illegal character in DTD');
  2278. end;
  2279. procedure TXMLTextReader.ParseDTD;
  2280. begin
  2281. FSource.Initialize;
  2282. ParseMarkupDecl;
  2283. end;
  2284. procedure TXMLTextReader.Close;
  2285. begin
  2286. FReadState := rsClosed;
  2287. FTokenStart.Line := 0;
  2288. FTokenStart.LinePos := 0;
  2289. end;
  2290. function TXMLTextReader.GetAttributeCount: Integer;
  2291. begin
  2292. result := FAttrCount;
  2293. end;
  2294. function TXMLTextReader.GetAttribute(i: Integer): XMLString;
  2295. begin
  2296. if (i < 0) or (i >= FAttrCount) then
  2297. raise EArgumentOutOfRangeException.Create('index');
  2298. result := FNodeStack[FNesting+i+1].FValueStr;
  2299. end;
  2300. function TXMLTextReader.GetAttribute(const AName: XMLString): XMLString;
  2301. var
  2302. i: Integer;
  2303. p: PHashItem;
  2304. begin
  2305. p := FNameTable.Find(PWideChar(AName), Length(AName));
  2306. if Assigned(p) then
  2307. for i := 1 to FAttrCount do
  2308. if FNodeStack[FNesting+i].FQName = p then
  2309. begin
  2310. result := FNodeStack[FNesting+i].FValueStr;
  2311. Exit;
  2312. end;
  2313. result := '';
  2314. end;
  2315. function TXMLTextReader.GetAttribute(const aLocalName, nsuri: XMLString): XMLString;
  2316. var
  2317. i: Integer;
  2318. p: PWideChar;
  2319. p1: PHashItem;
  2320. node: PNodeData;
  2321. begin
  2322. p1 := FNameTable.Find(PWideChar(nsuri), Length(nsuri));
  2323. if Assigned(p1) then
  2324. for i := 1 to FAttrCount do
  2325. begin
  2326. node := @FNodeStack[FNesting+i];
  2327. if node^.FNsUri = p1 then
  2328. begin
  2329. P := PWideChar(node^.FQName^.Key);
  2330. if node^.FColonPos > 0 then
  2331. Inc(P, node^.FColonPos+1);
  2332. if (Length(node^.FQName^.Key)-node^.FColonPos-1 = Length(aLocalName)) and
  2333. CompareMem(P, PWideChar(aLocalName), Length(aLocalName)*sizeof(WideChar)) then
  2334. begin
  2335. result := node^.FValueStr;
  2336. Exit;
  2337. end;
  2338. end;
  2339. end;
  2340. result := '';
  2341. end;
  2342. function TXMLTextReader.GetDepth: Integer;
  2343. begin
  2344. result := FNesting;
  2345. if FCurrAttrIndex >= 0 then
  2346. Inc(result);
  2347. if FAttrReadState <> arsNone then
  2348. Inc(result);
  2349. end;
  2350. function TXMLTextReader.GetNameTable: THashTable;
  2351. begin
  2352. result := FNameTable;
  2353. end;
  2354. function TXMLTextReader.GetNodeType: TXmlNodeType;
  2355. begin
  2356. result := FCurrNode^.FNodeType;
  2357. end;
  2358. function TXMLTextReader.GetName: XMLString;
  2359. begin
  2360. if Assigned(FCurrNode^.FQName) then
  2361. result := FCurrNode^.FQName^.Key
  2362. else
  2363. result := '';
  2364. end;
  2365. function TXMLTextReader.GetIsDefault: Boolean;
  2366. begin
  2367. result := FCurrNode^.FIsDefault;
  2368. end;
  2369. function TXMLTextReader.GetBaseUri: XMLString;
  2370. begin
  2371. result := FSource.SourceURI;
  2372. end;
  2373. function TXMLTextReader.GetXmlVersion: TXMLVersion;
  2374. begin
  2375. result := FSource.FXMLVersion;
  2376. end;
  2377. function TXMLTextReader.GetXmlEncoding: XMLString;
  2378. begin
  2379. result := FSource.FXMLEncoding;
  2380. end;
  2381. { IXmlLineInfo methods }
  2382. function TXMLTextReader.GetHasLineInfo: Boolean;
  2383. begin
  2384. result := True;
  2385. end;
  2386. function TXMLTextReader.GetLineNumber: Integer;
  2387. begin
  2388. if (FCurrNode^.FNodeType in [ntElement,ntAttribute,ntEntityReference,ntEndEntity]) or (FAttrReadState <> arsNone) then
  2389. result := FCurrNode^.FLoc.Line
  2390. else
  2391. result := FTokenStart.Line;
  2392. end;
  2393. function TXMLTextReader.GetLinePosition: Integer;
  2394. begin
  2395. if (FCurrNode^.FNodeType in [ntElement,ntAttribute,ntEntityReference,ntEndEntity]) or (FAttrReadState <> arsNone) then
  2396. result := FCurrNode^.FLoc.LinePos
  2397. else
  2398. result := FTokenStart.LinePos;
  2399. end;
  2400. function TXMLTextReader.CurrentNodePtr: PPNodeData;
  2401. begin
  2402. result := @FCurrNode;
  2403. end;
  2404. function TXMLTextReader.LookupNamespace(const APrefix: XMLString): XMLString;
  2405. begin
  2406. if Assigned(FNSHelper) then
  2407. result := FNSHelper.LookupNamespace(APrefix)
  2408. else
  2409. result := '';
  2410. end;
  2411. function TXMLTextReader.MoveToFirstAttribute: Boolean;
  2412. begin
  2413. result := False;
  2414. if FAttrCount = 0 then
  2415. exit;
  2416. FCurrAttrIndex := 0;
  2417. if FAttrReadState <> arsNone then
  2418. CleanAttrReadState;
  2419. FCurrNode := @FNodeStack[FNesting+1];
  2420. result := True;
  2421. end;
  2422. function TXMLTextReader.MoveToNextAttribute: Boolean;
  2423. begin
  2424. result := False;
  2425. if FCurrAttrIndex+1 >= FAttrCount then
  2426. exit;
  2427. Inc(FCurrAttrIndex);
  2428. if FAttrReadState <> arsNone then
  2429. CleanAttrReadState;
  2430. FCurrNode := @FNodeStack[FNesting+1+FCurrAttrIndex];
  2431. result := True;
  2432. end;
  2433. function TXMLTextReader.MoveToElement: Boolean;
  2434. begin
  2435. result := False;
  2436. if FAttrReadState <> arsNone then
  2437. CleanAttrReadState
  2438. else if FCurrNode^.FNodeType <> ntAttribute then
  2439. exit;
  2440. FCurrNode := @FNodeStack[FNesting];
  2441. FCurrAttrIndex := -1;
  2442. result := True;
  2443. end;
  2444. function TXMLTextReader.ReadAttributeValue: Boolean;
  2445. var
  2446. attrNode: PNodeData;
  2447. begin
  2448. Result := False;
  2449. if FAttrReadState = arsNone then
  2450. begin
  2451. if (FReadState <> rsInteractive) or (FCurrAttrIndex < 0) then
  2452. Exit;
  2453. attrNode := @FNodeStack[FNesting+FCurrAttrIndex+1];
  2454. if attrNode^.FNext = nil then
  2455. begin
  2456. if attrNode^.FValueStr = '' then
  2457. Exit; { we don't want to expose empty textnodes }
  2458. FCurrNode := AllocNodeData(FNesting+FAttrCount+1);
  2459. FCurrNode^.FNodeType := ntText;
  2460. FCurrNode^.FValueStr := attrNode^.FValueStr;
  2461. FCurrNode^.FLoc := attrNode^.FLoc2;
  2462. end
  2463. else
  2464. FCurrNode := attrNode^.FNext;
  2465. FAttrReadState := arsText;
  2466. FAttrBaseSource := FSource;
  2467. Result := True;
  2468. end
  2469. else // already reading, advance to next chunk
  2470. begin
  2471. if FSource = FAttrBaseSource then
  2472. begin
  2473. Result := Assigned(FCurrNode^.FNext);
  2474. if Result then
  2475. FCurrNode := FCurrNode^.FNext;
  2476. end
  2477. else
  2478. begin
  2479. NextAttrValueChunk;
  2480. Result := True;
  2481. end;
  2482. end;
  2483. end;
  2484. procedure TXMLTextReader.NextAttrValueChunk;
  2485. var
  2486. wc: WideChar;
  2487. tok: TAttributeReadState;
  2488. begin
  2489. if FAttrReadState = arsPushEntity then
  2490. begin
  2491. Inc(FNesting);
  2492. { make sure that the location is available }
  2493. AllocNodeData(FNesting+FAttrCount+1);
  2494. FAttrReadState := arsText;
  2495. end;
  2496. FCurrNode := @FNodeStack[FNesting+FAttrCount+1];
  2497. StoreLocation(FCurrNode^.FLoc);
  2498. FValue.Length := 0;
  2499. if FAttrReadState = arsText then
  2500. repeat
  2501. wc := FSource.SkipUntil(FValue, [#0, '&', #9, #10, #13]);
  2502. if wc = '&' then
  2503. begin
  2504. if ParseRef(FValue) or ResolvePredefined then
  2505. Continue;
  2506. tok := arsEntity;
  2507. end
  2508. else if wc <> #0 then { #9,#10,#13 -> replace by #32 }
  2509. begin
  2510. FSource.NextChar;
  2511. BufAppend(FValue, #32);
  2512. Continue;
  2513. end
  2514. else // #0
  2515. tok := arsEntityEnd;
  2516. if FValue.Length <> 0 then
  2517. begin
  2518. FCurrNode^.FNodeType := ntText;
  2519. FCurrNode^.FQName := nil;
  2520. SetString(FCurrNode^.FValueStr, FValue.Buffer, FValue.Length);
  2521. FAttrReadState := tok;
  2522. Exit;
  2523. end;
  2524. Break;
  2525. until False
  2526. else
  2527. tok := FAttrReadState;
  2528. if tok = arsEntity then
  2529. begin
  2530. HandleEntityStart;
  2531. FAttrReadState := arsText;
  2532. end
  2533. else if tok = arsEntityEnd then
  2534. begin
  2535. HandleEntityEnd;
  2536. FAttrReadState := arsText;
  2537. end;
  2538. end;
  2539. procedure TXMLTextReader.CleanAttrReadState;
  2540. begin
  2541. while FSource <> FAttrBaseSource do
  2542. ContextPop(True);
  2543. FAttrReadState := arsNone;
  2544. end;
  2545. function TXMLTextReader.GetHasValue: Boolean;
  2546. begin
  2547. result := FCurrNode^.FNodeType in [ntAttribute,ntText,ntCDATA,
  2548. ntProcessingInstruction,ntComment,ntWhitespace,ntSignificantWhitespace,
  2549. ntDocumentType];
  2550. end;
  2551. function TXMLTextReader.GetValue: XMLString;
  2552. begin
  2553. if (FCurrAttrIndex>=0) or (FAttrReadState <> arsNone) then
  2554. result := FCurrNode^.FValueStr
  2555. else
  2556. SetString(result, FCurrNode^.FValueStart, FCurrNode^.FValueLength);
  2557. end;
  2558. function TXMLTextReader.GetPrefix: XMLString;
  2559. begin
  2560. if Assigned(FCurrNode^.FPrefix) then
  2561. result := FCurrNode^.FPrefix^.Key
  2562. else
  2563. result := '';
  2564. end;
  2565. function TXMLTextReader.GetLocalName: XMLString;
  2566. begin
  2567. if FNamespaces and Assigned(FCurrNode^.FQName) then
  2568. if FCurrNode^.FColonPos < 0 then
  2569. Result := FCurrNode^.FQName^.Key
  2570. else
  2571. Result := Copy(FCurrNode^.FQName^.Key, FCurrNode^.FColonPos+2, MaxInt)
  2572. else
  2573. Result := '';
  2574. end;
  2575. function TXMLTextReader.GetNamespaceUri: XMLString;
  2576. begin
  2577. if Assigned(FCurrNode^.FNSURI) then
  2578. result := FCurrNode^.FNSURI^.Key
  2579. else
  2580. result := '';
  2581. end;
  2582. procedure TXMLTextReader.SetEOFState;
  2583. begin
  2584. FCurrNode := @FNodeStack[0];
  2585. Finalize(FCurrNode^);
  2586. FillChar(FCurrNode^, sizeof(TNodeData), 0);
  2587. FReadState := rsEndOfFile;
  2588. end;
  2589. procedure TXMLTextReader.ValidateCurrentNode;
  2590. var
  2591. ElDef: TElementDecl;
  2592. AttDef: TAttributeDef;
  2593. attr: PNodeData;
  2594. i: Integer;
  2595. begin
  2596. case FCurrNode^.FNodeType of
  2597. ntElement:
  2598. begin
  2599. if (FNesting = 0) and (not FFragmentMode) then
  2600. begin
  2601. if Assigned(FDocType) then
  2602. begin
  2603. if FDocType.FName <> FCurrNode^.FQName^.Key then
  2604. DoErrorPos(esError, 'Root element name does not match DTD', FCurrNode^.FLoc);
  2605. end
  2606. else
  2607. DoErrorPos(esError, 'Missing DTD', FCurrNode^.FLoc);
  2608. end;
  2609. ElDef := TElementDecl(FCurrNode^.FQName^.Data);
  2610. if (ElDef = nil) or (ElDef.ContentType = ctUndeclared) then
  2611. DoErrorPos(esError, 'Using undeclared element ''%s''',[FCurrNode^.FQName^.Key], FCurrNode^.FLoc);
  2612. if not FValidators[FValidatorNesting].IsElementAllowed(ElDef) then
  2613. DoErrorPos(esError, 'Element ''%s'' is not allowed in this context',[FCurrNode^.FQName^.Key], FCurrNode^.FLoc);
  2614. PushVC(ElDef);
  2615. if ElDef = nil then
  2616. Exit;
  2617. { Validate attributes }
  2618. for i := 1 to FAttrCount do
  2619. begin
  2620. attr := @FNodeStack[FNesting+i];
  2621. AttDef := TAttributeDef(attr^.FTypeInfo);
  2622. if AttDef = nil then
  2623. DoErrorPos(esError, 'Using undeclared attribute ''%s'' on element ''%s''',
  2624. [attr^.FQName^.Key, FCurrNode^.FQName^.Key], attr^.FLoc)
  2625. else if ((AttDef.DataType <> dtCdata) or (AttDef.Default = adFixed)) then
  2626. begin
  2627. if FStandalone and AttDef.ExternallyDeclared then
  2628. if attr^.FDenormalized then
  2629. DoErrorPos(esError, 'In a standalone document, externally defined attribute cannot cause value normalization', attr^.FLoc2)
  2630. else if i > FSpecifiedAttrs then
  2631. DoError(esError, 'In a standalone document, attribute cannot have a default value defined externally');
  2632. // TODO: what about normalization of AttDef.Value? (Currently it IS normalized)
  2633. if (AttDef.Default = adFixed) and (AttDef.Data^.FValueStr <> attr^.FValueStr) then
  2634. DoErrorPos(esError, 'Value of attribute ''%s'' does not match its #FIXED default',[attr^.FQName^.Key], attr^.FLoc2);
  2635. if not AttDef.ValidateSyntax(attr^.FValueStr, FNamespaces) then
  2636. DoErrorPos(esError, 'Attribute ''%s'' type mismatch', [attr^.FQName^.Key], attr^.FLoc2);
  2637. ValidateAttrValue(AttDef, attr);
  2638. end;
  2639. end;
  2640. { Check presence of #REQUIRED attributes }
  2641. if ElDef.HasRequiredAtts then
  2642. for i := 0 to ElDef.AttrDefCount-1 do
  2643. begin
  2644. if FAttrDefIndex[i] = FAttrTag then
  2645. Continue;
  2646. AttDef := ElDef.AttrDefs[i];
  2647. if AttDef.Default = adRequired then
  2648. ValidationError('Required attribute ''%s'' of element ''%s'' is missing',
  2649. [AttDef.Data^.FQName^.Key, FCurrNode^.FQName^.Key], 0)
  2650. end;
  2651. end;
  2652. ntEndElement:
  2653. begin
  2654. if FValidators[FValidatorNesting].Incomplete then
  2655. ValidationError('Element ''%s'' is missing required sub-elements', [FCurrNode^.FQName^.Key], -1);
  2656. if FValidatorNesting > 0 then
  2657. Dec(FValidatorNesting);
  2658. end;
  2659. ntText, ntSignificantWhitespace:
  2660. case FValidators[FValidatorNesting].FContentType of
  2661. ctChildren:
  2662. if FCurrNode^.FNodeType = ntText then
  2663. ValidationError('Character data is not allowed in element-only content',[])
  2664. else
  2665. begin
  2666. if FValidators[FValidatorNesting].FSaViolation then
  2667. ValidationError('Standalone constraint violation',[]);
  2668. FCurrNode^.FNodeType := ntWhitespace;
  2669. end;
  2670. ctEmpty:
  2671. ValidationError('Character data is not allowed in EMPTY elements', []);
  2672. end;
  2673. ntCDATA:
  2674. if FValidators[FValidatorNesting].FContentType = ctChildren then
  2675. ValidationError('CDATA sections are not allowed in element-only content',[]);
  2676. ntProcessingInstruction:
  2677. if FValidators[FValidatorNesting].FContentType = ctEmpty then
  2678. ValidationError('Processing instructions are not allowed within EMPTY elements', []);
  2679. ntComment:
  2680. if FValidators[FValidatorNesting].FContentType = ctEmpty then
  2681. ValidationError('Comments are not allowed within EMPTY elements', []);
  2682. ntDocumentType:
  2683. ValidateDTD;
  2684. end;
  2685. end;
  2686. procedure TXMLTextReader.HandleEntityStart;
  2687. begin
  2688. FCurrNode := @FNodeStack[FNesting+(FAttrCount+1)*ord(FAttrReadState<>arsNone)];
  2689. FCurrNode^.FNodeType := ntEntityReference;
  2690. FCurrNode^.FQName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  2691. FCurrNode^.FColonPos := -1;
  2692. FCurrNode^.FValueStart := nil;
  2693. FCurrNode^.FValueLength := 0;
  2694. FCurrNode^.FValueStr := '';
  2695. StoreLocation(FCurrNode^.FLoc);
  2696. { point past '&' to first char of entity name }
  2697. Dec(FCurrNode^.FLoc.LinePos, FName.Length+1);
  2698. end;
  2699. procedure TXMLTextReader.HandleEntityEnd;
  2700. begin
  2701. ContextPop(True);
  2702. if FNesting > 0 then Dec(FNesting);
  2703. FCurrNode := @FNodeStack[FNesting+(FAttrCount+1)*ord(FAttrReadState<>arsNone)];
  2704. FCurrNode^.FNodeType := ntEndEntity;
  2705. { point to trailing ';' }
  2706. Inc(FCurrNode^.FLoc.LinePos, Length(FCurrNode^.FQName^.Key));
  2707. end;
  2708. procedure TXMLTextReader.ResolveEntity;
  2709. var
  2710. n: PNodeData;
  2711. ent: TEntityDecl;
  2712. begin
  2713. if FCurrNode^.FNodeType <> ntEntityReference then
  2714. raise EInvalidOperation.Create('Wrong node type');
  2715. if FAttrReadState <> arsNone then
  2716. begin
  2717. { copy the EntityReference node to the stack if not already there }
  2718. n := AllocNodeData(FNesting+FAttrCount+1);
  2719. if n <> FCurrNode then
  2720. n^ := FCurrNode^;
  2721. ent := nil;
  2722. if Assigned(FDocType) then
  2723. ent := FDocType.Entities.Get(PWideChar(n^.FQName^.Key),Length(n^.FQName^.Key)) as TEntityDecl;
  2724. ContextPush(ent, True);
  2725. FAttrReadState := arsPushEntity;
  2726. end
  2727. else
  2728. FNext := xtPushEntity;
  2729. end;
  2730. procedure TXMLTextReader.DoStartEntity;
  2731. begin
  2732. Inc(FNesting);
  2733. FCurrNode := AllocNodeData(FNesting);
  2734. ContextPush(FCurrEntity, True);
  2735. FNext := xtText;
  2736. end;
  2737. // The code below does the bulk of the parsing, and must be as fast as possible.
  2738. // To minimize CPU cache effects, methods from different classes are kept together
  2739. function TXMLDecodingSource.SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
  2740. wsflag: PBoolean): WideChar;
  2741. var
  2742. old: PWideChar;
  2743. nonws: Boolean;
  2744. wc: WideChar;
  2745. begin
  2746. nonws := False;
  2747. repeat
  2748. old := FBuf;
  2749. repeat
  2750. wc := FBuf^;
  2751. if (wc = #10) or (wc = #13) or (FXML11Rules and ((wc = #$85) or
  2752. (wc = #$2028))) then
  2753. begin
  2754. // strictly this is needed only for 2-byte lineendings
  2755. BufAppendChunk(ToFill, old, FBuf);
  2756. NewLine;
  2757. old := FBuf;
  2758. wc := FBuf^
  2759. end
  2760. else if ((wc < #32) and (not ((wc = #0) and (FBuf >= FBufEnd))) and
  2761. (wc <> #9)) or (wc > #$FFFD) or
  2762. (FXML11Rules and (wc >= #$7F) and (wc <= #$9F)) then
  2763. FReader.FatalError('Invalid character');
  2764. if (wc < #255) and (Char(ord(wc)) in Delim) then
  2765. Break;
  2766. // the checks above filter away everything below #32 that isn't a whitespace
  2767. if wc > #32 then
  2768. nonws := True;
  2769. Inc(FBuf);
  2770. until False;
  2771. Result := wc;
  2772. BufAppendChunk(ToFill, old, FBuf);
  2773. until (Result <> #0) or (not Reload);
  2774. if Assigned(wsflag) then
  2775. wsflag^ := wsflag^ or nonws;
  2776. end;
  2777. const
  2778. TextDelims: array[Boolean] of TSetOfChar = (
  2779. [#0, '<', '&', '>'],
  2780. [#0, '>']
  2781. );
  2782. textNodeTypes: array[Boolean] of TXMLNodeType = (
  2783. ntSignificantWhitespace,
  2784. ntText
  2785. );
  2786. function TXMLTextReader.ReadTopLevel: Boolean;
  2787. var
  2788. tok: TXMLToken;
  2789. begin
  2790. if FNext = xtFakeLF then
  2791. begin
  2792. Result := SetupFakeLF(xtText);
  2793. Exit;
  2794. end;
  2795. StoreLocation(FTokenStart);
  2796. if FNext = xtText then
  2797. repeat
  2798. SkipS;
  2799. if FSource.FBuf^ = '<' then
  2800. begin
  2801. Inc(FSource.FBuf);
  2802. if FSource.FBufEnd < FSource.FBuf + 2 then
  2803. FSource.Reload;
  2804. if FSource.FBuf^ = '!' then
  2805. begin
  2806. Inc(FSource.FBuf);
  2807. if FSource.FBuf^ = '-' then
  2808. begin
  2809. if FIgnoreComments then
  2810. begin
  2811. ParseComment(True);
  2812. Continue;
  2813. end;
  2814. tok := xtComment;
  2815. end
  2816. else
  2817. tok := xtDoctype;
  2818. end
  2819. else if FSource.FBuf^ = '?' then
  2820. tok := xtPI
  2821. else
  2822. begin
  2823. CheckName;
  2824. tok := xtElement;
  2825. end;
  2826. end
  2827. else if FSource.FBuf >= FSource.FBufEnd then
  2828. begin
  2829. if FState < rsRoot then
  2830. FatalError('Root element is missing');
  2831. tok := xtEOF;
  2832. end
  2833. else
  2834. FatalError('Illegal at document level');
  2835. if FCanonical and (FState > rsRoot) and (tok <> xtEOF) then
  2836. begin
  2837. Result := SetupFakeLF(tok);
  2838. Exit;
  2839. end;
  2840. Break;
  2841. until False
  2842. else // FNext <> xtText
  2843. tok := FNext;
  2844. if FCanonical and (FState < rsRoot) and (tok <> xtDoctype) then
  2845. FNext := xtFakeLF
  2846. else
  2847. FNext := xtText;
  2848. case tok of
  2849. xtElement:
  2850. begin
  2851. if FState > rsRoot then
  2852. FatalError('Only one top-level element allowed', FName.Length)
  2853. else if FState < rsRoot then
  2854. begin
  2855. // dispose notation refs from DTD, if any
  2856. ClearForwardRefs;
  2857. FState := rsRoot;
  2858. end;
  2859. ParseStartTag;
  2860. end;
  2861. xtPI: ParsePI;
  2862. xtComment: ParseComment(False);
  2863. xtDoctype:
  2864. begin
  2865. ParseDoctypeDecl;
  2866. if FCanonical then
  2867. begin
  2868. // recurse, effectively ignoring the DTD
  2869. result := ReadTopLevel();
  2870. Exit;
  2871. end;
  2872. end;
  2873. xtEOF: SetEofState;
  2874. end;
  2875. Result := tok <> xtEOF;
  2876. end;
  2877. function TXMLTextReader.Read: Boolean;
  2878. var
  2879. nonWs: Boolean;
  2880. wc: WideChar;
  2881. InCDATA: Boolean;
  2882. tok: TXMLToken;
  2883. begin
  2884. if FReadState > rsInteractive then
  2885. begin
  2886. Result := False;
  2887. Exit;
  2888. end;
  2889. if FReadState = rsInitial then
  2890. begin
  2891. FReadState := rsInteractive;
  2892. FSource.Initialize;
  2893. FNext := xtText;
  2894. end;
  2895. if FAttrReadState <> arsNone then
  2896. CleanAttrReadState;
  2897. if FNext = xtPopEmptyElement then
  2898. begin
  2899. FNext := xtPopElement;
  2900. FCurrNode^.FNodeType := ntEndElement;
  2901. if FAttrCleanupFlag then
  2902. CleanupAttributes;
  2903. FAttrCount := 0;
  2904. FCurrAttrIndex := -1;
  2905. Result := True;
  2906. Exit;
  2907. end;
  2908. if FNext = xtPushElement then
  2909. begin
  2910. if FAttrCleanupFlag then
  2911. CleanupAttributes;
  2912. FAttrCount := 0;
  2913. Inc(FNesting);
  2914. FCurrAttrIndex := -1;
  2915. FNext := xtText;
  2916. end
  2917. else if FNext = xtPopElement then
  2918. PopElement
  2919. else if FNext = xtPushEntity then
  2920. DoStartEntity;
  2921. if FState <> rsRoot then
  2922. begin
  2923. Result := ReadTopLevel;
  2924. Exit;
  2925. end;
  2926. InCDATA := (FNext = xtCDSect);
  2927. StoreLocation(FTokenStart);
  2928. nonWs := False;
  2929. FValue.Length := 0;
  2930. if FNext in [xtCDSect, xtText] then
  2931. repeat
  2932. wc := FSource.SkipUntil(FValue, TextDelims[InCDATA], @nonWs);
  2933. if wc = '<' then
  2934. begin
  2935. Inc(FSource.FBuf);
  2936. if FSource.FBufEnd < FSource.FBuf + 2 then
  2937. FSource.Reload;
  2938. if FSource.FBuf^ = '/' then
  2939. tok := xtEndElement
  2940. else if CheckName([cnOptional]) then
  2941. tok := xtElement
  2942. else if FSource.FBuf^ = '!' then
  2943. begin
  2944. Inc(FSource.FBuf);
  2945. if FSource.FBuf^ = '[' then
  2946. begin
  2947. ExpectString('[CDATA[');
  2948. StoreLocation(FTokenStart);
  2949. InCDATA := True;
  2950. if FCDSectionsAsText or (FValue.Length = 0) then
  2951. Continue;
  2952. tok := xtCDSect;
  2953. end
  2954. else if FSource.FBuf^ = '-' then
  2955. begin
  2956. { Ignoring comments is tricky in validating mode; discarding a comment which
  2957. is the only child of an EMPTY element will make that element erroneously appear
  2958. as valid. Therefore, at this point we discard only comments which are preceded
  2959. by some text (since presence of text already renders an EMPTY element invalid).
  2960. Other comments should be reported to validation part and discarded there. }
  2961. if FIgnoreComments and (FValue.Length > 0) then
  2962. begin
  2963. ParseComment(True);
  2964. Continue;
  2965. end;
  2966. tok := xtComment;
  2967. end
  2968. else
  2969. tok := xtDoctype;
  2970. end
  2971. else if FSource.FBuf^ = '?' then
  2972. tok := xtPI
  2973. else
  2974. RaiseNameNotFound;
  2975. end
  2976. else if wc = #0 then
  2977. begin
  2978. if InCDATA then
  2979. FatalError('Unterminated CDATA section', -1);
  2980. if FNesting > FSource.FStartNesting then
  2981. FatalError('End-tag is missing for ''%s''', [FNodeStack[FNesting-1].FQName^.Key]);
  2982. if Assigned(FSource.FParent) then
  2983. begin
  2984. if FExpandEntities and ContextPop then
  2985. Continue
  2986. else
  2987. tok := xtEntityEnd;
  2988. end
  2989. else
  2990. tok := xtEOF;
  2991. end
  2992. else if wc = '>' then
  2993. begin
  2994. BufAppend(FValue, wc);
  2995. FSource.NextChar;
  2996. if (FValue.Length <= 2) or (FValue.Buffer[FValue.Length-2] <> ']') or
  2997. (FValue.Buffer[FValue.Length-3] <> ']') then Continue;
  2998. if InCData then // got a ']]>' separator
  2999. begin
  3000. Dec(FValue.Length, 3);
  3001. InCDATA := False;
  3002. if FCDSectionsAsText then
  3003. Continue;
  3004. SetNodeInfoWithValue(ntCDATA);
  3005. FNext := xtText;
  3006. Result := True;
  3007. Exit;
  3008. end
  3009. else
  3010. FatalError('Literal '']]>'' is not allowed in text', 3);
  3011. end
  3012. else if wc = '&' then
  3013. begin
  3014. if FValidators[FValidatorNesting].FContentType = ctEmpty then
  3015. ValidationError('References are illegal in EMPTY elements', []);
  3016. if ParseRef(FValue) or ResolvePredefined then
  3017. begin
  3018. nonWs := True; // CharRef to whitespace is not considered whitespace
  3019. Continue;
  3020. end
  3021. else
  3022. begin
  3023. FCurrEntity := EntityCheck;
  3024. if Assigned(FCurrEntity) and FExpandEntities then
  3025. begin
  3026. ContextPush(FCurrEntity);
  3027. Continue;
  3028. end;
  3029. tok := xtEntity;
  3030. end;
  3031. end;
  3032. if FValue.Length <> 0 then
  3033. begin
  3034. SetNodeInfoWithValue(textNodeTypes[nonWs]);
  3035. FNext := tok;
  3036. Result := True;
  3037. Exit;
  3038. end;
  3039. Break;
  3040. until False
  3041. else // not (FNext in [xtText, xtCDSect])
  3042. tok := FNext;
  3043. FNext := xtText;
  3044. case tok of
  3045. xtEntity: HandleEntityStart;
  3046. xtEntityEnd: HandleEntityEnd;
  3047. xtElement: ParseStartTag;
  3048. xtEndElement: ParseEndTag;
  3049. xtPI: ParsePI;
  3050. xtDoctype: ParseDoctypeDecl;
  3051. xtComment: ParseComment(False);
  3052. xtEOF: SetEofState;
  3053. end;
  3054. Result := tok <> xtEOF;
  3055. end;
  3056. procedure TXMLCharSource.NextChar;
  3057. begin
  3058. Inc(FBuf);
  3059. if FBuf >= FBufEnd then
  3060. Reload;
  3061. end;
  3062. procedure TXMLTextReader.ExpectChar(wc: WideChar);
  3063. begin
  3064. if FSource.FBuf^ = wc then
  3065. FSource.NextChar
  3066. else
  3067. FatalError(wc);
  3068. end;
  3069. // Element name already in FNameBuffer
  3070. procedure TXMLTextReader.ParseStartTag; // [39] [40] [44]
  3071. var
  3072. ElDef: TElementDecl;
  3073. IsEmpty: Boolean;
  3074. ElName: PHashItem;
  3075. b: TBinding;
  3076. Len: Integer;
  3077. begin
  3078. ElName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  3079. ElDef := TElementDecl(ElName^.Data);
  3080. if Assigned(ElDef) then
  3081. Len := ElDef.AttrDefCount+8 { overallocate a bit }
  3082. else
  3083. Len := 0;
  3084. // (re)initialize array of attribute definition tags
  3085. if (Len-8 > Length(FAttrDefIndex)) or (FAttrTag = 0) then
  3086. begin
  3087. SetLength(FAttrDefIndex, Len);
  3088. for Len := 0 to High(FAttrDefIndex) do
  3089. FAttrDefIndex[Len] := FAttrTag;
  3090. end;
  3091. // we're about to process a new set of attributes
  3092. {$push}{$r-,q-}
  3093. Dec(FAttrTag);
  3094. {$pop}
  3095. IsEmpty := False;
  3096. FAttrCount := 0;
  3097. FCurrAttrIndex := -1;
  3098. FPrefixedAttrs := 0;
  3099. FSpecifiedAttrs := 0;
  3100. FCurrNode := AllocNodeData(FNesting);
  3101. FCurrNode^.FQName := ElName;
  3102. FCurrNode^.FNodeType := ntElement;
  3103. FCurrNode^.FColonPos := FColonPos;
  3104. StoreLocation(FCurrNode^.FLoc);
  3105. Dec(FCurrNode^.FLoc.LinePos, FName.Length);
  3106. if FNamespaces then
  3107. begin
  3108. FNSHelper.PushScope;
  3109. if FColonPos > 0 then
  3110. FCurrNode^.FPrefix := FNSHelper.GetPrefix(FName.Buffer, FColonPos);
  3111. end;
  3112. while (FSource.FBuf^ <> '>') and (FSource.FBuf^ <> '/') do
  3113. begin
  3114. SkipS(True);
  3115. if (FSource.FBuf^ = '>') or (FSource.FBuf^ = '/') then
  3116. Break;
  3117. ParseAttribute(ElDef);
  3118. end;
  3119. if FSource.FBuf^ = '/' then
  3120. begin
  3121. IsEmpty := True;
  3122. FSource.NextChar;
  3123. end;
  3124. ExpectChar('>');
  3125. if Assigned(ElDef) and ElDef.NeedsDefaultPass then
  3126. ProcessDefaultAttributes(ElDef);
  3127. // Adding attributes might have reallocated FNodeStack, so restore FCurrNode once again
  3128. FCurrNode := @FNodeStack[FNesting];
  3129. if FNamespaces then
  3130. begin
  3131. { Assign namespace URIs to prefixed attrs }
  3132. if FPrefixedAttrs <> 0 then
  3133. ProcessNamespaceAtts;
  3134. { Expand the element name }
  3135. if Assigned(FCurrNode^.FPrefix) then
  3136. begin
  3137. b := TBinding(FCurrNode^.FPrefix^.Data);
  3138. if not (Assigned(b) and Assigned(b.uri) and (b.uri^.Key <> '')) then
  3139. DoErrorPos(esFatal, 'Unbound element name prefix "%s"', [FCurrNode^.FPrefix^.Key],FCurrNode^.FLoc);
  3140. FCurrNode^.FNsUri := b.uri;
  3141. end
  3142. else
  3143. begin
  3144. b := FNSHelper.DefaultNSBinding;
  3145. if Assigned(b) then
  3146. FCurrNode^.FNsUri := b.uri;
  3147. end;
  3148. end;
  3149. if not IsEmpty then
  3150. begin
  3151. if not FPreserveWhitespace then // critical for testsuite compliance
  3152. SkipS;
  3153. FNext := xtPushElement;
  3154. end
  3155. else
  3156. FNext := xtPopEmptyElement;
  3157. end;
  3158. procedure TXMLTextReader.ParseEndTag; // [42]
  3159. var
  3160. ElName: PHashItem;
  3161. begin
  3162. if FNesting <= FSource.FStartNesting then
  3163. FatalError('End-tag is not allowed here');
  3164. if FNesting > 0 then Dec(FNesting);
  3165. Inc(FSource.FBuf);
  3166. FCurrNode := @FNodeStack[FNesting]; // move off the possible child
  3167. FCurrNode^.FNodeType := ntEndElement;
  3168. StoreLocation(FTokenStart);
  3169. FCurrNode^.FLoc := FTokenStart;
  3170. ElName := FCurrNode^.FQName;
  3171. if not FSource.MatchesLong(ElName^.Key) then
  3172. FatalError('Unmatching element end tag (expected "</%s>")', [ElName^.Key], -1);
  3173. if FSource.FBuf^ = '>' then // this handles majority of cases
  3174. FSource.NextChar
  3175. else
  3176. begin // gives somewhat incorrect message for <a></aa>
  3177. SkipS;
  3178. ExpectChar('>');
  3179. end;
  3180. FNext := xtPopElement;
  3181. end;
  3182. procedure TXMLTextReader.ParseAttribute(ElDef: TElementDecl);
  3183. var
  3184. attrName: PHashItem;
  3185. attrData: PNodeData;
  3186. AttDef: TAttributeDef;
  3187. i: Integer;
  3188. begin
  3189. CheckName;
  3190. attrName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  3191. attrData := AllocAttributeData;
  3192. attrData^.FQName := attrName;
  3193. attrData^.FColonPos := FColonPos;
  3194. StoreLocation(attrData^.FLoc);
  3195. Dec(attrData^.FLoc.LinePos, FName.Length);
  3196. FSpecifiedAttrs := FAttrCount;
  3197. if Assigned(ElDef) then
  3198. begin
  3199. AttDef := ElDef.GetAttrDef(attrName);
  3200. // mark attribute as specified
  3201. if Assigned(AttDef) then
  3202. FAttrDefIndex[AttDef.Index] := FAttrTag;
  3203. end
  3204. else
  3205. AttDef := nil;
  3206. attrData^.FTypeInfo := AttDef;
  3207. // check for duplicates
  3208. for i := 1 to FAttrCount-1 do
  3209. if FNodeStack[FNesting+i].FQName = attrName then
  3210. FatalError('Duplicate attribute', FName.Length);
  3211. if FNamespaces then
  3212. begin
  3213. if ((FName.Length = 5) or (FColonPos = 5)) and
  3214. (FName.Buffer[0] = 'x') and (FName.Buffer[1] = 'm') and
  3215. (FName.Buffer[2] = 'l') and (FName.Buffer[3] = 'n') and
  3216. (FName.Buffer[4] = 's') then
  3217. begin
  3218. if FColonPos > 0 then
  3219. attrData^.FPrefix := FStdPrefix_xmlns;
  3220. attrData^.FNsUri := FStdUri_xmlns;
  3221. end
  3222. else if FColonPos > 0 then
  3223. begin
  3224. attrData^.FPrefix := FNSHelper.GetPrefix(FName.Buffer, FColonPos);
  3225. Inc(FPrefixedAttrs);
  3226. end;
  3227. end;
  3228. ExpectEq;
  3229. ExpectAttValue(attrData, Assigned(AttDef) and (AttDef.DataType <> dtCDATA));
  3230. if Assigned(attrData^.FNsUri) then
  3231. begin
  3232. if (not AddBinding(attrData)) and FCanonical then
  3233. begin
  3234. CleanupAttribute(attrData);
  3235. Dec(FAttrCount);
  3236. Dec(FSpecifiedAttrs);
  3237. end;
  3238. end;
  3239. end;
  3240. procedure TXMLTextReader.AddForwardRef(Buf: PWideChar; Length: Integer);
  3241. var
  3242. w: PForwardRef;
  3243. begin
  3244. if FForwardRefs = nil then
  3245. FForwardRefs := TFPList.Create;
  3246. New(w);
  3247. SetString(w^.Value, Buf, Length);
  3248. w^.Loc := FTokenStart;
  3249. FForwardRefs.Add(w);
  3250. end;
  3251. procedure TXMLTextReader.ClearForwardRefs;
  3252. var
  3253. I: Integer;
  3254. begin
  3255. if Assigned(FForwardRefs) then
  3256. begin
  3257. for I := 0 to FForwardRefs.Count-1 do
  3258. Dispose(PForwardRef(FForwardRefs.List^[I]));
  3259. FForwardRefs.Clear;
  3260. end;
  3261. end;
  3262. procedure TXMLTextReader.ValidateIdRefs;
  3263. var
  3264. I: Integer;
  3265. begin
  3266. if Assigned(FForwardRefs) then
  3267. begin
  3268. for I := 0 to FForwardRefs.Count-1 do
  3269. with PForwardRef(FForwardRefs.List^[I])^ do
  3270. if (FIDMap = nil) or (FIDMap.Find(PWideChar(Value), Length(Value)) = nil) then
  3271. DoErrorPos(esError, 'The ID ''%s'' does not match any element', [Value], Loc);
  3272. ClearForwardRefs;
  3273. end;
  3274. end;
  3275. procedure TXMLTextReader.ProcessDefaultAttributes(ElDef: TElementDecl);
  3276. var
  3277. I: Integer;
  3278. AttDef: TAttributeDef;
  3279. attrData: PNodeData;
  3280. begin
  3281. for I := 0 to ElDef.AttrDefCount-1 do
  3282. begin
  3283. if FAttrDefIndex[I] <> FAttrTag then // this one wasn't specified
  3284. begin
  3285. AttDef := ElDef.AttrDefs[I];
  3286. case AttDef.Default of
  3287. adDefault, adFixed: begin
  3288. attrData := AllocAttributeData;
  3289. attrData^ := AttDef.Data^;
  3290. if FCanonical then
  3291. attrData^.FIsDefault := False;
  3292. if FNamespaces then
  3293. begin
  3294. if AttDef.IsNamespaceDecl then
  3295. begin
  3296. if attrData^.FColonPos > 0 then
  3297. attrData^.FPrefix := FStdPrefix_xmlns;
  3298. attrData^.FNsUri := FStdUri_xmlns;
  3299. if (not AddBinding(attrData)) and FCanonical then
  3300. Dec(FAttrCount);
  3301. end
  3302. else if attrData^.FColonPos > 0 then
  3303. begin
  3304. attrData^.FPrefix := FNSHelper.GetPrefix(PWideChar(attrData^.FQName^.Key), attrData^.FColonPos);
  3305. Inc(FPrefixedAttrs);
  3306. end
  3307. else
  3308. attrData^.FNsUri := FEmptyStr;
  3309. end;
  3310. end;
  3311. end;
  3312. end;
  3313. end;
  3314. end;
  3315. function TXMLTextReader.AddBinding(attrData: PNodeData): Boolean;
  3316. var
  3317. nsUri, Pfx: PHashItem;
  3318. begin
  3319. nsUri := FNameTable.FindOrAdd(attrData^.FValueStr);
  3320. if attrData^.FColonPos > 0 then
  3321. Pfx := FNSHelper.GetPrefix(@attrData^.FQName^.key[7], Length(attrData^.FQName^.key)-6)
  3322. else
  3323. Pfx := FNSHelper.GetPrefix(nil, 0); { will return the default prefix }
  3324. { 'xml' is allowed to be bound to the correct namespace }
  3325. if ((nsUri = FStduri_xml) <> (Pfx = FStdPrefix_xml)) or
  3326. (Pfx = FStdPrefix_xmlns) or
  3327. (nsUri = FStduri_xmlns) then
  3328. begin
  3329. if (Pfx = FStdPrefix_xml) or (Pfx = FStdPrefix_xmlns) then
  3330. DoErrorPos(esFatal, 'Illegal usage of reserved prefix ''%s''', [Pfx^.Key], attrData^.FLoc)
  3331. else
  3332. DoErrorPos(esFatal, 'Illegal usage of reserved namespace URI ''%s''', [attrData^.FValueStr], attrData^.FLoc2);
  3333. end;
  3334. if (attrData^.FValueStr = '') and not (FXML11 or (Pfx^.Key = '')) then
  3335. DoErrorPos(esFatal, 'Illegal undefining of namespace', attrData^.FLoc2);
  3336. Result := (Pfx^.Data = nil) or (TBinding(Pfx^.Data).uri <> nsUri);
  3337. if Result then
  3338. FNSHelper.BindPrefix(nsUri, Pfx);
  3339. end;
  3340. procedure TXMLTextReader.ProcessNamespaceAtts;
  3341. var
  3342. I, J: Integer;
  3343. Pfx, AttrName: PHashItem;
  3344. attrData: PNodeData;
  3345. b: TBinding;
  3346. begin
  3347. FNsAttHash.Init(FPrefixedAttrs);
  3348. for I := 1 to FAttrCount do
  3349. begin
  3350. attrData := @FNodeStack[FNesting+i];
  3351. if Assigned(attrData^.FNsUri) then
  3352. Continue;
  3353. if (attrData^.FColonPos < 1) then
  3354. begin
  3355. attrData^.FNsUri := FEmptyStr;
  3356. Continue;
  3357. end;
  3358. Pfx := attrData^.FPrefix;
  3359. b := TBinding(Pfx^.Data);
  3360. if not (Assigned(b) and Assigned (b.uri) and (b.uri^.Key <> '')) then
  3361. DoErrorPos(esFatal, 'Unbound attribute name prefix "%s"', [Pfx^.Key], attrData^.FLoc);
  3362. { detect duplicates }
  3363. J := attrData^.FColonPos+1;
  3364. AttrName := attrData^.FQName;
  3365. if FNsAttHash.Locate(b.uri, @AttrName^.Key[J], Length(AttrName^.Key) - J+1) then
  3366. DoErrorPos(esFatal, 'Duplicate prefixed attribute', attrData^.FLoc);
  3367. attrData^.FNsUri := b.uri;
  3368. end;
  3369. end;
  3370. function TXMLTextReader.ParseExternalID(out SysID, PubID: XMLString; // [75]
  3371. out PubIDLoc: TLocation; SysIdOptional: Boolean): Boolean;
  3372. var
  3373. I: Integer;
  3374. wc: WideChar;
  3375. begin
  3376. Result := False;
  3377. if FSource.Matches('SYSTEM') then
  3378. SysIdOptional := False
  3379. else if FSource.Matches('PUBLIC') then
  3380. begin
  3381. ExpectWhitespace;
  3382. ParseLiteral(FValue, ltPubid, True);
  3383. PubIDLoc := FTokenStart;
  3384. SetString(PubID, FValue.Buffer, FValue.Length);
  3385. for I := 1 to Length(PubID) do
  3386. begin
  3387. wc := PubID[I];
  3388. if (wc > #255) or not (Char(ord(wc)) in PubidChars) then
  3389. FatalError('Illegal Public ID literal', -1);
  3390. end;
  3391. end
  3392. else
  3393. Exit;
  3394. if SysIdOptional then
  3395. SkipWhitespace
  3396. else
  3397. ExpectWhitespace;
  3398. if ParseLiteral(FValue, ltPlain, not SysIdOptional) then
  3399. SetString(SysID, FValue.Buffer, FValue.Length);
  3400. Result := True;
  3401. end;
  3402. procedure TXMLTextReader.ValidateAttrValue(AttrDef: TAttributeDef; attrData: PNodeData);
  3403. var
  3404. L, StartPos, EndPos: Integer;
  3405. Entity: TEntityDecl;
  3406. begin
  3407. L := Length(attrData^.FValueStr);
  3408. case AttrDef.DataType of
  3409. dtId: begin
  3410. if not AddID(attrData) then
  3411. DoErrorPos(esError, 'The ID ''%s'' is not unique', [attrData^.FValueStr], attrData^.FLoc2);
  3412. end;
  3413. dtIdRef, dtIdRefs: begin
  3414. StartPos := 1;
  3415. while StartPos <= L do
  3416. begin
  3417. EndPos := StartPos;
  3418. while (EndPos <= L) and (attrData^.FValueStr[EndPos] <> #32) do
  3419. Inc(EndPos);
  3420. if (FIDMap = nil) or (FIDMap.Find(@attrData^.FValueStr[StartPos], EndPos-StartPos) = nil) then
  3421. AddForwardRef(@attrData^.FValueStr[StartPos], EndPos-StartPos);
  3422. StartPos := EndPos + 1;
  3423. end;
  3424. end;
  3425. dtEntity, dtEntities: begin
  3426. StartPos := 1;
  3427. while StartPos <= L do
  3428. begin
  3429. EndPos := StartPos;
  3430. while (EndPos <= L) and (attrData^.FValueStr[EndPos] <> #32) do
  3431. Inc(EndPos);
  3432. Entity := TEntityDecl(FDocType.Entities.Get(@attrData^.FValueStr[StartPos], EndPos-StartPos));
  3433. if (Entity = nil) or (Entity.FNotationName = '') then
  3434. ValidationError('Attribute ''%s'' type mismatch', [attrData^.FQName^.Key], -1);
  3435. StartPos := EndPos + 1;
  3436. end;
  3437. end;
  3438. end;
  3439. end;
  3440. procedure TXMLTextReader.ValidateDTD;
  3441. var
  3442. I: Integer;
  3443. begin
  3444. if Assigned(FForwardRefs) then
  3445. begin
  3446. for I := 0 to FForwardRefs.Count-1 do
  3447. with PForwardRef(FForwardRefs[I])^ do
  3448. if FDocType.Notations.Get(PWideChar(Value), Length(Value)) = nil then
  3449. DoErrorPos(esError, 'Notation ''%s'' is not declared', [Value], Loc);
  3450. end;
  3451. end;
  3452. function TXMLTextReader.AddId(aNodeData: PNodeData): Boolean;
  3453. var
  3454. e: PHashItem;
  3455. begin
  3456. if FIDMap = nil then
  3457. FIDMap := THashTable.Create(256, False);
  3458. e := FIDMap.FindOrAdd(PWideChar(aNodeData^.FValueStr), Length(aNodeData^.FValueStr), Result);
  3459. Result := not Result;
  3460. if Result then
  3461. aNodeData^.FIDEntry := e;
  3462. end;
  3463. function TXMLTextReader.AllocAttributeData: PNodeData;
  3464. begin
  3465. Result := AllocNodeData(FNesting + FAttrCount + 1);
  3466. Result^.FNodeType := ntAttribute;
  3467. Result^.FIsDefault := False;
  3468. Inc(FAttrCount);
  3469. end;
  3470. procedure TXMLTextReader.AddPseudoAttribute(aName: PHashItem; const aValue: XMLString;
  3471. const nameLoc, valueLoc: TLocation);
  3472. begin
  3473. with AllocAttributeData^ do
  3474. begin
  3475. FQName := aName;
  3476. FColonPos := -1;
  3477. FValueStr := aValue;
  3478. FLoc := nameLoc;
  3479. FLoc2 := valueLoc;
  3480. end;
  3481. end;
  3482. function TXMLTextReader.AllocNodeData(AIndex: Integer): PNodeData;
  3483. begin
  3484. {make sure we have an extra slot to place child text/comment/etc}
  3485. if AIndex >= Length(FNodeStack)-1 then
  3486. SetLength(FNodeStack, AIndex * 2 + 2);
  3487. Result := @FNodeStack[AIndex];
  3488. Result^.FNext := nil;
  3489. Result^.FPrefix := nil;
  3490. Result^.FNsUri := nil;
  3491. Result^.FIDEntry := nil;
  3492. Result^.FValueStart := nil;
  3493. Result^.FValueLength := 0;
  3494. end;
  3495. procedure TXMLTextReader.AllocAttributeValueChunk(var APrev: PNodeData; Offset: Integer);
  3496. var
  3497. chunk: PNodeData;
  3498. begin
  3499. { when parsing DTD, don't take ownership of allocated data }
  3500. chunk := FFreeAttrChunk;
  3501. if Assigned(chunk) and (FState <> rsDTD) then
  3502. begin
  3503. FFreeAttrChunk := chunk^.FNext;
  3504. chunk^.FNext := nil;
  3505. end
  3506. else { no free chunks, create a new one }
  3507. chunk := AllocMem(sizeof(TNodeData));
  3508. APrev^.FNext := chunk;
  3509. APrev := chunk;
  3510. { assume text node, for entity refs it is overridden later }
  3511. chunk^.FNodeType := ntText;
  3512. chunk^.FQName := nil;
  3513. chunk^.FColonPos := -1;
  3514. { without PWideChar typecast and in $T-, FPC treats '@' result as PAnsiChar... }
  3515. SetString(chunk^.FValueStr, PWideChar(@FValue.Buffer[Offset]), FValue.Length-Offset);
  3516. end;
  3517. procedure TXMLTextReader.CleanupAttributes;
  3518. var
  3519. i: Integer;
  3520. begin
  3521. {cleanup only specified attributes; default ones are owned by DTD}
  3522. for i := 1 to FSpecifiedAttrs do
  3523. CleanupAttribute(@FNodeStack[FNesting+i]);
  3524. FAttrCleanupFlag := False;
  3525. end;
  3526. procedure TXMLTextReader.CleanupAttribute(aNode: PNodeData);
  3527. var
  3528. chunk: PNodeData;
  3529. begin
  3530. if Assigned(aNode^.FNext) then
  3531. begin
  3532. chunk := aNode^.FNext;
  3533. while Assigned(chunk^.FNext) do
  3534. chunk := chunk^.FNext;
  3535. chunk^.FNext := FFreeAttrChunk;
  3536. FFreeAttrChunk := aNode^.FNext;
  3537. aNode^.FNext := nil;
  3538. end;
  3539. end;
  3540. procedure TXMLTextReader.SetNodeInfoWithValue(typ: TXMLNodeType; AName: PHashItem = nil);
  3541. begin
  3542. FCurrNode := @FNodeStack[FNesting];
  3543. FCurrNode^.FNodeType := typ;
  3544. FCurrNode^.FQName := AName;
  3545. FCurrNode^.FColonPos := -1;
  3546. FCurrNode^.FValueStart := FValue.Buffer;
  3547. FCurrNode^.FValueLength := FValue.Length;
  3548. end;
  3549. function TXMLTextReader.SetupFakeLF(nextstate: TXMLToken): Boolean;
  3550. begin
  3551. FValue.Buffer[0] := #10;
  3552. FValue.Length := 1;
  3553. SetNodeInfoWithValue(ntWhitespace,nil);
  3554. FNext := nextstate;
  3555. Result := True;
  3556. end;
  3557. procedure TXMLTextReader.PushVC(aElDef: TElementDecl);
  3558. begin
  3559. Inc(FValidatorNesting);
  3560. if FValidatorNesting >= Length(FValidators) then
  3561. SetLength(FValidators, FValidatorNesting * 2);
  3562. with FValidators[FValidatorNesting] do
  3563. begin
  3564. FElementDef := aElDef;
  3565. FCurCP := nil;
  3566. FFailed := False;
  3567. FContentType := ctAny;
  3568. FSaViolation := False;
  3569. if Assigned(aElDef) then
  3570. begin
  3571. FContentType := aElDef.ContentType;
  3572. FSaViolation := FStandalone and aElDef.ExternallyDeclared;
  3573. end;
  3574. end;
  3575. end;
  3576. procedure TXMLTextReader.PopElement;
  3577. begin
  3578. if FNamespaces then
  3579. FNSHelper.PopScope;
  3580. if (FNesting = 0) and (not FFragmentMode) then
  3581. FState := rsEpilog;
  3582. FCurrNode := @FNodeStack[FNesting];
  3583. FNext := xtText;
  3584. end;
  3585. { TElementValidator }
  3586. function TElementValidator.IsElementAllowed(Def: TElementDecl): Boolean;
  3587. var
  3588. Next: TContentParticle;
  3589. begin
  3590. Result := True;
  3591. // if element is not declared, non-validity has been already reported, no need to report again...
  3592. if Assigned(Def) and Assigned(FElementDef) then
  3593. begin
  3594. case FElementDef.ContentType of
  3595. ctEmpty: Result := False;
  3596. ctChildren, ctMixed: begin
  3597. if FFailed then // if already detected a mismatch, don't waste time
  3598. Exit;
  3599. if FCurCP = nil then
  3600. Next := FElementDef.RootCP.FindFirst(Def)
  3601. else
  3602. Next := FCurCP.FindNext(Def, 0); { second arg ignored here }
  3603. Result := Assigned(Next);
  3604. if Result then
  3605. FCurCP := Next
  3606. else
  3607. FFailed := True; // used to prevent extra error at the end of element
  3608. end;
  3609. // ctAny, ctUndeclared: returns True by default
  3610. end;
  3611. end;
  3612. end;
  3613. function TElementValidator.Incomplete: Boolean;
  3614. begin
  3615. if Assigned(FElementDef) and (FElementDef.ContentType = ctChildren) and (not FFailed) then
  3616. begin
  3617. if FCurCP <> nil then
  3618. Result := FCurCP.MoreRequired(0) { arg ignored here }
  3619. else
  3620. Result := FElementDef.RootCP.IsRequired;
  3621. end
  3622. else
  3623. Result := False;
  3624. end;
  3625. end.