xmltextreader.pp 110 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984
  1. {
  2. This file is part of the Free Component Library
  3. TXMLTextReader, a streaming text XML reader
  4. Copyright (c) 1999-2000 by Sebastian Guenther, [email protected]
  5. Modified in 2006 by Sergei Gorelkin, [email protected]
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. unit xmltextreader;
  13. {$mode objfpc}{$h+}
  14. interface
  15. uses
  16. SysUtils, Classes, xmlutils, xmlreader, dtdmodel;
  17. type
  18. TDecoder = record
  19. Context: Pointer;
  20. Decode: function(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  21. Cleanup: procedure(Context: Pointer); stdcall;
  22. end;
  23. TGetDecoderProc = function(const AEncoding: string; out Decoder: TDecoder): Boolean; stdcall;
  24. TXMLSourceKind = (skNone, skInternalSubset, skManualPop);
  25. TXMLTextReader = class;
  26. TXMLCharSource = class(TObject)
  27. private
  28. FBuf: PWideChar;
  29. FBufEnd: PWideChar;
  30. FReader: TXMLTextReader;
  31. FParent: TXMLCharSource;
  32. FEntity: TEntityDecl;
  33. FLineNo: Integer;
  34. LFPos: PWideChar;
  35. FXML11Rules: Boolean;
  36. FSourceURI: XMLString;
  37. FCharCount: Cardinal;
  38. FStartNesting: Integer;
  39. FXMLVersion: TXMLVersion;
  40. FXMLEncoding: XMLString;
  41. function GetSourceURI: XMLString;
  42. protected
  43. function Reload: Boolean; virtual;
  44. public
  45. Kind: TXMLSourceKind;
  46. constructor Create(const AData: XMLString);
  47. procedure NextChar;
  48. procedure NewLine; virtual;
  49. function SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
  50. wsflag: PBoolean = nil): WideChar; virtual;
  51. procedure Initialize; virtual;
  52. function SetEncoding(const AEncoding: string): Boolean; virtual;
  53. function Matches(const arg: XMLString): Boolean;
  54. function MatchesLong(const arg: XMLString): Boolean;
  55. property SourceURI: XMLString read GetSourceURI write FSourceURI;
  56. end;
  57. TElementValidator = object
  58. FElementDef: TElementDecl;
  59. FCurCP: TContentParticle;
  60. FFailed: Boolean;
  61. FSaViolation: Boolean;
  62. FContentType: TElementContentType; // =ctAny when FElementDef is nil
  63. function IsElementAllowed(Def: TElementDecl): Boolean;
  64. function Incomplete: Boolean;
  65. end;
  66. TNodeDataDynArray = array of TNodeData;
  67. TValidatorDynArray = array of TElementValidator;
  68. TCheckNameFlags = set of (cnOptional, cnToken);
  69. TXMLToken = (xtNone, xtEOF, xtText, xtElement, xtEndElement,
  70. xtCDSect, xtComment, xtPI, xtDoctype, xtEntity, xtEntityEnd, xtPopElement,
  71. xtPopEmptyElement, xtPushElement, xtPushEntity, xtPopEntity, xtFakeLF);
  72. TAttributeReadState = (arsNone, arsText, arsEntity, arsEntityEnd, arsPushEntity);
  73. TLiteralType = (ltPlain, ltPubid, ltEntity);
  74. TEntityEvent = procedure(Sender: TXMLTextReader; AEntity: TEntityDecl) of object;
  75. TXMLTextReader = class(TXMLReader, IXmlLineInfo, IGetNodeDataPtr)
  76. private
  77. FSource: TXMLCharSource;
  78. FNameTable: THashTable;
  79. FXML11: Boolean;
  80. FNameTableOwned: Boolean;
  81. FState: (rsProlog, rsDTD, rsAfterDTD, rsRoot, rsEpilog);
  82. FHavePERefs: Boolean;
  83. FInsideDecl: Boolean;
  84. FValue: TWideCharBuf;
  85. FEntityValue: TWideCharBuf;
  86. FName: TWideCharBuf;
  87. FTokenStart: TLocation;
  88. FStandalone: Boolean;
  89. FDocType: TDTDModel;
  90. FPEMap: THashTable;
  91. FForwardRefs: TFPList;
  92. FDTDStartPos: PWideChar;
  93. FIntSubset: TWideCharBuf;
  94. FAttrTag: Cardinal;
  95. FDTDProcessed: Boolean;
  96. FFragmentMode: Boolean;
  97. FNext: TXMLToken;
  98. FCurrEntity: TEntityDecl;
  99. FIDMap: THashTable;
  100. FAttrDefIndex: array of Cardinal;
  101. FNSHelper: TNSSupport;
  102. FNsAttHash: TDblHashArray;
  103. FEmptyStr: PHashItem;
  104. FStdPrefix_xml: PHashItem;
  105. FStdPrefix_xmlns: PHashItem;
  106. FStdUri_xml: PHashItem;
  107. FStdUri_xmlns: PHashItem;
  108. FColonPos: Integer;
  109. FValidate: Boolean; // parsing options, copy of FCtrl.Options
  110. FPreserveWhitespace: Boolean;
  111. FExpandEntities: Boolean;
  112. FIgnoreComments: Boolean;
  113. FCDSectionsAsText: Boolean;
  114. FNamespaces: Boolean;
  115. FDisallowDoctype: Boolean;
  116. FCanonical: Boolean;
  117. FMaxChars: Cardinal;
  118. FOnError: TXMLErrorEvent;
  119. FCurrAttrIndex: Integer;
  120. FOnEntity: TEntityEvent;
  121. procedure CleanAttrReadState;
  122. procedure SetEOFState;
  123. procedure SkipQuote(out Delim: WideChar; required: Boolean = True);
  124. procedure SetSource(ASource: TXMLCharSource);
  125. function ContextPush(AEntity: TEntityDecl; DummySource: Boolean = False): Boolean;
  126. function ContextPop(Forced: Boolean = False): Boolean;
  127. function ParseQuantity: TCPQuant;
  128. procedure StoreLocation(out Loc: TLocation);
  129. procedure ValidateAttrValue(AttrDef: TAttributeDef; attrData: PNodeData);
  130. procedure AddForwardRef(Buf: PWideChar; Length: Integer);
  131. procedure ClearForwardRefs;
  132. procedure CallErrorHandler(E: EXMLReadError);
  133. function FindOrCreateElDef: TElementDecl;
  134. function SkipUntilSeq(const Delim: TSetOfChar; c1: WideChar): Boolean;
  135. procedure CheckMaxChars(ToAdd: Cardinal);
  136. function AllocNodeData(AIndex: Integer): PNodeData;
  137. function AllocAttributeData: PNodeData;
  138. procedure AllocAttributeValueChunk(var APrev: PNodeData; Offset: Integer);
  139. procedure AddPseudoAttribute(aName: PHashItem; const aValue: XMLString;
  140. const nameLoc, valueLoc: TLocation);
  141. procedure CleanupAttribute(aNode: PNodeData);
  142. procedure CleanupAttributes;
  143. procedure SetNodeInfoWithValue(typ: TXMLNodeType; AName: PHashItem = nil);
  144. function SetupFakeLF(nextstate: TXMLToken): Boolean;
  145. function AddId(aNodeData: PNodeData): Boolean;
  146. function QueryInterface(constref iid: TGUID; out obj): HRESULT; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
  147. function _AddRef: Longint; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
  148. function _Release: Longint; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
  149. procedure SetFragmentMode(aValue: Boolean);
  150. protected
  151. FNesting: Integer;
  152. FCurrNode: PNodeData;
  153. FAttrCount: Integer;
  154. FPrefixedAttrs: Integer;
  155. FSpecifiedAttrs: Integer;
  156. FNodeStack: TNodeDataDynArray;
  157. FValidatorNesting: Integer;
  158. FValidators: TValidatorDynArray;
  159. FFreeAttrChunk: PNodeData;
  160. FAttrCleanupFlag: Boolean;
  161. // ReadAttributeValue state
  162. FAttrReadState: TAttributeReadState;
  163. FAttrBaseSource: TObject;
  164. procedure DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer=0);
  165. procedure DoErrorPos(Severity: TErrorSeverity; const descr: string;
  166. const ErrPos: TLocation); overload;
  167. procedure DoErrorPos(Severity: TErrorSeverity; const descr: string;
  168. const args: array of const; const ErrPos: TLocation); overload;
  169. procedure FatalError(const descr: String; LineOffs: Integer=0); overload;
  170. procedure FatalError(const descr: string; const args: array of const; LineOffs: Integer=0); overload;
  171. procedure FatalError(Expected: WideChar); overload;
  172. function SkipWhitespace(PercentAloneIsOk: Boolean = False): Boolean;
  173. function SkipS(required: Boolean = False): Boolean;
  174. procedure ExpectWhitespace;
  175. procedure ExpectString(const s: String);
  176. procedure ExpectChar(wc: WideChar);
  177. function CheckForChar(c: WideChar): Boolean;
  178. procedure RaiseNameNotFound;
  179. function CheckName(aFlags: TCheckNameFlags = []): Boolean;
  180. procedure CheckNCName;
  181. function ParseLiteral(var ToFill: TWideCharBuf; aType: TLiteralType;
  182. Required: Boolean): Boolean;
  183. procedure ExpectAttValue(attrData: PNodeData; NonCDATA: Boolean); // [10]
  184. procedure ParseComment(discard: Boolean); // [15]
  185. procedure ParsePI; // [16]
  186. procedure ParseXmlOrTextDecl(TextDecl: Boolean);
  187. procedure ExpectEq;
  188. procedure ParseDoctypeDecl; // [28]
  189. procedure ParseMarkupDecl; // [29]
  190. procedure ParseIgnoreSection;
  191. procedure ParseStartTag; // [39]
  192. procedure ParseEndTag; // [42]
  193. procedure HandleEntityStart;
  194. procedure HandleEntityEnd;
  195. procedure DoStartEntity;
  196. procedure ParseAttribute(ElDef: TElementDecl);
  197. function ReadTopLevel: Boolean;
  198. procedure NextAttrValueChunk;
  199. function GetHasLineInfo: Boolean;
  200. function GetLineNumber: Integer;
  201. function GetLinePosition: Integer;
  202. function CurrentNodePtr: PPNodeData;
  203. public
  204. function Read: Boolean; override;
  205. function MoveToFirstAttribute: Boolean; override;
  206. function MoveToNextAttribute: Boolean; override;
  207. function MoveToElement: Boolean; override;
  208. function ReadAttributeValue: Boolean; override;
  209. procedure Close; override;
  210. procedure ResolveEntity; override;
  211. function GetAttribute(i: Integer): XMLString; override;
  212. function GetAttribute(const AName: XMLString): XMLString; override;
  213. function GetAttribute(const ALocalName, nsuri: XMLString): XMLString; override;
  214. function LookupNamespace(const APrefix: XMLString): XMLString; override;
  215. property LineNumber: Integer read GetLineNumber;
  216. property LinePosition: Integer read GetLinePosition;
  217. protected
  218. function GetXmlVersion: TXMLVersion;
  219. function GetXmlEncoding: XMLString;
  220. function GetNameTable: THashTable; override;
  221. function GetDepth: Integer; override;
  222. function GetNodeType: TXmlNodeType; override;
  223. function GetName: XMLString; override;
  224. function GetValue: XMLString; override;
  225. function GetLocalName: XMLString; override;
  226. function GetPrefix: XMLString; override;
  227. function GetNamespaceUri: XMLString; override;
  228. function GetHasValue: Boolean; override;
  229. function GetAttributeCount: Integer; override;
  230. function GetBaseUri: XMLString; override;
  231. function GetIsDefault: Boolean; override;
  232. function ResolvePredefined: Boolean;
  233. function EntityCheck(NoExternals: Boolean = False): TEntityDecl;
  234. function PrefetchEntity(AEntity: TEntityDecl): Boolean;
  235. procedure StartPE;
  236. function ParseRef(var ToFill: TWideCharBuf): Boolean; // [67]
  237. function ParseExternalID(out SysID, PubID: XMLString; // [75]
  238. out PubIDLoc: TLocation; SysIdOptional: Boolean): Boolean;
  239. procedure CheckPENesting(aExpected: TObject);
  240. procedure ParseEntityDecl;
  241. procedure ParseAttlistDecl;
  242. procedure ExpectChoiceOrSeq(CP: TContentParticle; MustEndIn: TObject);
  243. procedure ParseElementDecl;
  244. procedure ParseNotationDecl;
  245. function ResolveResource(const ASystemID, APublicID, ABaseURI: XMLString; out Source: TXMLCharSource): Boolean;
  246. procedure ProcessDefaultAttributes(ElDef: TElementDecl);
  247. procedure ProcessNamespaceAtts;
  248. function AddBinding(attrData: PNodeData): Boolean;
  249. procedure PushVC(aElDef: TElementDecl);
  250. procedure PopElement;
  251. procedure ValidateDTD;
  252. procedure ValidationError(const Msg: string; const args: array of const; LineOffs: Integer = -1);
  253. procedure ValidationErrorWithName(const Msg: string; LineOffs: Integer = -1);
  254. procedure DTDReloadHook;
  255. procedure ConvertSource(SrcIn: TXMLInputSource; out SrcOut: TXMLCharSource);
  256. procedure SetOptions(AValue: TXMLReaderSettings);
  257. procedure SetNametable(ANameTable: THashTable);
  258. public
  259. constructor Create(var AFile: Text; ANameTable: THashTable); overload;
  260. constructor Create(AStream: TStream; const ABaseUri: XMLString; ANameTable: THashTable); overload;
  261. constructor Create(AStream: TStream; const ABaseUri: XMLString; ASettings: TXMLReaderSettings); overload;
  262. constructor Create(ASrc: TXMLCharSource; AParent: TXMLTextReader); overload;
  263. constructor Create(const uri: XMLString; ASettings: TXMLReaderSettings); overload;
  264. constructor Create(ASrc: TXMLInputSource; ASettings: TXMLReaderSettings); overload;
  265. destructor Destroy; override;
  266. procedure AfterConstruction; override;
  267. property OnEntity: TEntityEvent read FOnEntity write FOnEntity;
  268. { stuff needed for TLoader }
  269. property Standalone: Boolean read FStandalone;
  270. property DtdSchemaInfo: TDTDModel read FDocType write FDocType;
  271. property XML11: Boolean write FXML11;
  272. property XMLVersion: TXMLVersion read GetXMLVersion;
  273. property XMLEncoding: XMLString read GetXMLEncoding;
  274. property IDMap: THashTable read FIDMap write FIDMap;
  275. property ExpandEntities: Boolean read FExpandEntities;
  276. property Validate: Boolean read FValidate;
  277. property PreserveWhitespace: Boolean read FPreserveWhitespace;
  278. property IgnoreComments: Boolean read FIgnoreComments;
  279. property FragmentMode: Boolean read FFragmentMode write SetFragmentMode;
  280. procedure ValidateCurrentNode;
  281. procedure ValidateIdRefs;
  282. procedure EntityToSource(AEntity: TEntityDecl; out Src: TXMLCharSource);
  283. procedure ParseDTD;
  284. end;
  285. procedure RegisterDecoder(Proc: TGetDecoderProc);
  286. implementation
  287. uses
  288. UriParser;
  289. type
  290. TXMLDecodingSource = class(TXMLCharSource)
  291. private
  292. FCharBuf: PChar;
  293. FCharBufEnd: PChar;
  294. FBufStart: PWideChar;
  295. FDecoder: TDecoder;
  296. FHasBOM: Boolean;
  297. FFixedUCS2: string;
  298. FBufSize: Integer;
  299. procedure DecodingError(const Msg: string);
  300. protected
  301. function Reload: Boolean; override;
  302. procedure FetchData; virtual;
  303. public
  304. procedure AfterConstruction; override;
  305. destructor Destroy; override;
  306. function SetEncoding(const AEncoding: string): Boolean; override;
  307. procedure NewLine; override;
  308. function SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
  309. wsflag: PBoolean = nil): WideChar; override;
  310. procedure Initialize; override;
  311. end;
  312. TXMLStreamInputSource = class(TXMLDecodingSource)
  313. private
  314. FAllocated: PChar;
  315. FStream: TStream;
  316. FCapacity: Integer;
  317. FOwnStream: Boolean;
  318. FEof: Boolean;
  319. public
  320. constructor Create(AStream: TStream; AOwnStream: Boolean);
  321. destructor Destroy; override;
  322. procedure FetchData; override;
  323. end;
  324. TXMLFileInputSource = class(TXMLDecodingSource)
  325. private
  326. FFile: ^Text;
  327. FString: string;
  328. FTmp: string;
  329. public
  330. constructor Create(var AFile: Text);
  331. procedure FetchData; override;
  332. end;
  333. PForwardRef = ^TForwardRef;
  334. TForwardRef = record
  335. Value: XMLString;
  336. Loc: TLocation;
  337. end;
  338. const
  339. PubidChars: TSetOfChar = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
  340. '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
  341. '#', '@', '$', '_', '%'];
  342. NullLocation: TLocation = (Line: 0; LinePos: 0);
  343. { Decoders }
  344. var
  345. Decoders: array of TGetDecoderProc;
  346. procedure RegisterDecoder(Proc: TGetDecoderProc);
  347. var
  348. L: Integer;
  349. begin
  350. L := Length(Decoders);
  351. SetLength(Decoders, L+1);
  352. Decoders[L] := Proc;
  353. end;
  354. function FindDecoder(const AEncoding: string; out Decoder: TDecoder): Boolean;
  355. var
  356. I: Integer;
  357. begin
  358. Result := False;
  359. for I := 0 to High(Decoders) do
  360. if Decoders[I](AEncoding, Decoder) then
  361. begin
  362. Result := True;
  363. Exit;
  364. end;
  365. end;
  366. function Is_8859_1(const AEncoding: string): Boolean;
  367. begin
  368. Result := SameText(AEncoding, 'ISO-8859-1') or
  369. SameText(AEncoding, 'ISO_8859-1') or
  370. SameText(AEncoding, 'latin1') or
  371. SameText(AEncoding, 'iso-ir-100') or
  372. SameText(AEncoding, 'l1') or
  373. SameText(AEncoding, 'IBM819') or
  374. SameText(AEncoding, 'CP819') or
  375. SameText(AEncoding, 'csISOLatin1') or
  376. // This one is not in character-sets.txt, but was used in FPC documentation,
  377. // and still being used in fcl-registry package
  378. SameText(AEncoding, 'ISO8859-1');
  379. end;
  380. { TXMLCharSource }
  381. constructor TXMLCharSource.Create(const AData: XMLString);
  382. begin
  383. inherited Create;
  384. FLineNo := 1;
  385. FBuf := PWideChar(AData);
  386. FBufEnd := FBuf + Length(AData);
  387. LFPos := FBuf-1;
  388. FCharCount := Length(AData);
  389. end;
  390. procedure TXMLCharSource.Initialize;
  391. begin
  392. end;
  393. function TXMLCharSource.SetEncoding(const AEncoding: string): Boolean;
  394. begin
  395. Result := True; // always succeed
  396. end;
  397. function TXMLCharSource.GetSourceURI: XMLString;
  398. begin
  399. if FSourceURI <> '' then
  400. Result := FSourceURI
  401. else if Assigned(FParent) then
  402. Result := FParent.SourceURI
  403. else
  404. Result := '';
  405. end;
  406. function TXMLCharSource.Reload: Boolean;
  407. begin
  408. Result := False;
  409. end;
  410. procedure TXMLCharSource.NewLine;
  411. begin
  412. Inc(FLineNo);
  413. LFPos := FBuf;
  414. end;
  415. function TXMLCharSource.SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
  416. wsflag: PBoolean): WideChar;
  417. var
  418. old: PWideChar;
  419. nonws: Boolean;
  420. begin
  421. old := FBuf;
  422. nonws := False;
  423. repeat
  424. if FBuf^ = #10 then
  425. NewLine;
  426. if (FBuf^ < #255) and (Char(ord(FBuf^)) in Delim) then
  427. Break;
  428. if (FBuf^ > #32) or not (Char(ord(FBuf^)) in [#32, #9, #10, #13]) then
  429. nonws := True;
  430. Inc(FBuf);
  431. until False;
  432. Result := FBuf^;
  433. BufAppendChunk(ToFill, old, FBuf);
  434. if Assigned(wsflag) then
  435. wsflag^ := wsflag^ or nonws;
  436. end;
  437. function TXMLCharSource.Matches(const arg: XMLString): Boolean;
  438. begin
  439. Result := False;
  440. if (FBufEnd >= FBuf + Length(arg)) or Reload then
  441. Result := CompareMem(Pointer(arg), FBuf, Length(arg)*sizeof(WideChar));
  442. if Result then
  443. begin
  444. Inc(FBuf, Length(arg));
  445. if FBuf >= FBufEnd then
  446. Reload;
  447. end;
  448. end;
  449. { Used to check element name in end-tags, difference from Matches is that
  450. buffer may be reloaded more than once. XML has no restriction on name
  451. length, so a name longer than input buffer may be encountered. }
  452. function TXMLCharSource.MatchesLong(const arg: XMLString): Boolean;
  453. var
  454. idx, len, chunk: Integer;
  455. begin
  456. Result := False;
  457. idx := 1;
  458. len := Length(arg);
  459. repeat
  460. if (FBuf >= FBufEnd) and not Reload then
  461. Exit;
  462. if FBufEnd >= FBuf + len then
  463. chunk := len
  464. else
  465. chunk := FBufEnd - FBuf;
  466. if not CompareMem(@arg[idx], FBuf, chunk*sizeof(WideChar)) then
  467. Exit;
  468. Inc(FBuf, chunk);
  469. Inc(idx,chunk);
  470. Dec(len,chunk);
  471. until len = 0;
  472. Result := True;
  473. if FBuf >= FBufEnd then
  474. Reload;
  475. end;
  476. { TXMLDecodingSource }
  477. procedure TXMLDecodingSource.AfterConstruction;
  478. begin
  479. inherited AfterConstruction;
  480. FBufStart := AllocMem(4096);
  481. FBuf := FBufStart;
  482. FBufEnd := FBuf;
  483. LFPos := FBuf-1;
  484. end;
  485. destructor TXMLDecodingSource.Destroy;
  486. begin
  487. FreeMem(FBufStart);
  488. if Assigned(FDecoder.Cleanup) then
  489. FDecoder.Cleanup(FDecoder.Context);
  490. inherited Destroy;
  491. end;
  492. procedure TXMLDecodingSource.FetchData;
  493. begin
  494. end;
  495. procedure TXMLDecodingSource.DecodingError(const Msg: string);
  496. begin
  497. // count line endings to obtain correct error location
  498. while FBuf < FBufEnd do
  499. begin
  500. if (FBuf^ = #10) or (FBuf^ = #13) or (FXML11Rules and ((FBuf^ = #$85) or (FBuf^ = #$2028))) then
  501. begin
  502. if (FBuf^ = #13) and (FBuf < FBufEnd-1) and
  503. ((FBuf[1] = #10) or (FXML11Rules and (FBuf[1] = #$85))) then
  504. Inc(FBuf);
  505. LFPos := FBuf;
  506. Inc(FLineNo);
  507. end;
  508. Inc(FBuf);
  509. end;
  510. FReader.FatalError(Msg);
  511. end;
  512. function TXMLDecodingSource.Reload: Boolean;
  513. var
  514. Remainder: PtrInt;
  515. r, inLeft: Cardinal;
  516. rslt: Integer;
  517. begin
  518. if Kind = skInternalSubset then
  519. FReader.DTDReloadHook;
  520. Remainder := FBufEnd - FBuf;
  521. if Remainder > 0 then
  522. Move(FBuf^, FBufStart^, Remainder * sizeof(WideChar));
  523. Dec(LFPos, FBuf-FBufStart);
  524. FBuf := FBufStart;
  525. FBufEnd := FBufStart + Remainder;
  526. repeat
  527. inLeft := FCharBufEnd - FCharBuf;
  528. if inLeft < 4 then // may contain an incomplete char
  529. begin
  530. FetchData;
  531. inLeft := FCharBufEnd - FCharBuf;
  532. if inLeft <= 0 then
  533. Break;
  534. end;
  535. r := FBufStart + FBufSize - FBufEnd;
  536. if r = 0 then
  537. Break;
  538. rslt := FDecoder.Decode(FDecoder.Context, FCharBuf, inLeft, FBufEnd, r);
  539. { Sanity checks: r and inLeft must not increase. }
  540. if inLeft + FCharBuf <= FCharBufEnd then
  541. FCharBuf := FCharBufEnd - inLeft
  542. else
  543. DecodingError('Decoder error: input byte count out of bounds');
  544. if r + FBufEnd <= FBufStart + FBufSize then
  545. FBufEnd := FBufStart + FBufSize - r
  546. else
  547. DecodingError('Decoder error: output char count out of bounds');
  548. if rslt = 0 then
  549. Break
  550. else if rslt < 0 then
  551. DecodingError('Invalid character in input stream')
  552. else
  553. FReader.CheckMaxChars(rslt);
  554. until False;
  555. FBufEnd^ := #0;
  556. Result := FBuf < FBufEnd;
  557. end;
  558. const
  559. XmlSign: array [0..4] of WideChar = ('<', '?', 'x', 'm', 'l');
  560. procedure TXMLDecodingSource.Initialize;
  561. begin
  562. inherited;
  563. FLineNo := 1;
  564. FDecoder.Decode := @Decode_UTF8;
  565. FFixedUCS2 := '';
  566. if FCharBufEnd-FCharBuf > 1 then
  567. begin
  568. if (FCharBuf[0] = #$FE) and (FCharBuf[1] = #$FF) then
  569. begin
  570. FFixedUCS2 := 'UTF-16BE';
  571. FDecoder.Decode := {$IFNDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
  572. end
  573. else if (FCharBuf[0] = #$FF) and (FCharBuf[1] = #$FE) then
  574. begin
  575. FFixedUCS2 := 'UTF-16LE';
  576. FDecoder.Decode := {$IFDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
  577. end;
  578. end;
  579. FBufSize := 6; // possible BOM and '<?xml'
  580. Reload;
  581. if FBuf^ = #$FEFF then
  582. begin
  583. FHasBOM := True;
  584. Inc(FBuf);
  585. end;
  586. LFPos := FBuf-1;
  587. if CompareMem(FBuf, @XmlSign[0], sizeof(XmlSign)) then
  588. begin
  589. FBufSize := 3; // don't decode past XML declaration
  590. Inc(FBuf, Length(XmlSign));
  591. FReader.ParseXmlOrTextDecl((FParent <> nil) or (FReader.FState <> rsProlog));
  592. end;
  593. FBufSize := 2047;
  594. if FReader.FXML11 then
  595. FXml11Rules := True;
  596. end;
  597. function TXMLDecodingSource.SetEncoding(const AEncoding: string): Boolean;
  598. var
  599. NewDecoder: TDecoder;
  600. begin
  601. Result := True;
  602. if (FFixedUCS2 = '') and SameText(AEncoding, 'UTF-8') then
  603. Exit;
  604. if FFixedUCS2 <> '' then
  605. begin
  606. Result := SameText(AEncoding, FFixedUCS2) or
  607. SameText(AEncoding, 'UTF-16') or
  608. SameText(AEncoding, 'unicode');
  609. Exit;
  610. end;
  611. // TODO: must fail when a byte-based stream is labeled as word-based.
  612. // see rmt-e2e-61, it now fails but for a completely different reason.
  613. FillChar(NewDecoder, sizeof(TDecoder), 0);
  614. if Is_8859_1(AEncoding) then
  615. FDecoder.Decode := @Decode_8859_1
  616. else if FindDecoder(AEncoding, NewDecoder) then
  617. FDecoder := NewDecoder
  618. else
  619. Result := False;
  620. end;
  621. procedure TXMLDecodingSource.NewLine;
  622. begin
  623. case FBuf^ of
  624. #10: ;
  625. #13: begin
  626. // Reload trashes the buffer, it should be consumed beforehand
  627. if (FBufEnd >= FBuf+2) or Reload then
  628. begin
  629. if (FBuf[1] = #10) or (FXML11Rules and (FBuf[1] = #$85)) then
  630. Inc(FBuf);
  631. end;
  632. FBuf^ := #10;
  633. end;
  634. #$85, #$2028: if FXML11Rules then
  635. FBuf^ := #10
  636. else
  637. Exit;
  638. else
  639. Exit;
  640. end;
  641. Inc(FLineNo);
  642. LFPos := FBuf;
  643. end;
  644. { TXMLStreamInputSource }
  645. const
  646. Slack = 16;
  647. constructor TXMLStreamInputSource.Create(AStream: TStream; AOwnStream: Boolean);
  648. begin
  649. FStream := AStream;
  650. FCapacity := 4096;
  651. GetMem(FAllocated, FCapacity+Slack);
  652. FCharBuf := FAllocated+(Slack-4);
  653. FCharBufEnd := FCharBuf;
  654. FOwnStream := AOwnStream;
  655. FetchData;
  656. end;
  657. destructor TXMLStreamInputSource.Destroy;
  658. begin
  659. FreeMem(FAllocated);
  660. if FOwnStream then
  661. FStream.Free;
  662. inherited Destroy;
  663. end;
  664. procedure TXMLStreamInputSource.FetchData;
  665. var
  666. Remainder, BytesRead: Integer;
  667. OldBuf: PChar;
  668. begin
  669. Assert(FCharBufEnd - FCharBuf < Slack-4);
  670. if FEof then
  671. Exit;
  672. OldBuf := FCharBuf;
  673. Remainder := FCharBufEnd - FCharBuf;
  674. if Remainder < 0 then
  675. Remainder := 0;
  676. FCharBuf := FAllocated+Slack-4-Remainder;
  677. if Remainder > 0 then
  678. Move(OldBuf^, FCharBuf^, Remainder);
  679. BytesRead := FStream.Read(FAllocated[Slack-4], FCapacity);
  680. if BytesRead < FCapacity then
  681. FEof := True;
  682. FCharBufEnd := FAllocated + (Slack-4) + BytesRead;
  683. { Null-termination has been removed:
  684. 1) Built-in decoders don't need it because they respect the buffer length.
  685. 2) It was causing unaligned access errors on ARM CPUs.
  686. }
  687. //PWideChar(FCharBufEnd)^ := #0;
  688. end;
  689. { TXMLFileInputSource }
  690. constructor TXMLFileInputSource.Create(var AFile: Text);
  691. begin
  692. FFile := @AFile;
  693. SourceURI := FilenameToURI(TTextRec(AFile).Name);
  694. FetchData;
  695. end;
  696. procedure TXMLFileInputSource.FetchData;
  697. var
  698. Remainder: Integer;
  699. begin
  700. if not Eof(FFile^) then
  701. begin
  702. Remainder := FCharBufEnd - FCharBuf;
  703. if Remainder > 0 then
  704. SetString(FTmp, FCharBuf, Remainder);
  705. ReadLn(FFile^, FString);
  706. FString := FString + #10; // bad solution...
  707. if Remainder > 0 then
  708. Insert(FTmp, FString, 1);
  709. FCharBuf := PChar(FString);
  710. FCharBufEnd := FCharBuf + Length(FString);
  711. end;
  712. end;
  713. { helper that closes handle upon destruction }
  714. type
  715. THandleOwnerStream = class(THandleStream)
  716. public
  717. destructor Destroy; override;
  718. end;
  719. destructor THandleOwnerStream.Destroy;
  720. begin
  721. FileClose(Handle);
  722. inherited Destroy;
  723. end;
  724. { TXMLTextReader }
  725. function TXMLTextReader.QueryInterface(constref iid: TGUID; out obj): HRESULT; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
  726. begin
  727. if GetInterface(iid,obj) then
  728. result := S_OK
  729. else
  730. result:= E_NOINTERFACE;
  731. end;
  732. function TXMLTextReader._AddRef: Longint; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
  733. begin
  734. result := -1;
  735. end;
  736. function TXMLTextReader._Release: Longint; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
  737. begin
  738. result := -1;
  739. end;
  740. procedure TXMLTextReader.ConvertSource(SrcIn: TXMLInputSource; out SrcOut: TXMLCharSource);
  741. begin
  742. SrcOut := nil;
  743. if Assigned(SrcIn) then
  744. begin
  745. if Assigned(SrcIn.Stream) then
  746. SrcOut := TXMLStreamInputSource.Create(SrcIn.Stream, False)
  747. else if SrcIn.StringData <> '' then
  748. SrcOut := TXMLStreamInputSource.Create(TStringStream.Create(SrcIn.StringData), True)
  749. else if (SrcIn.SystemID <> '') then
  750. ResolveResource(SrcIn.SystemID, SrcIn.PublicID, SrcIn.BaseURI, SrcOut);
  751. end;
  752. if (SrcOut = nil) and (FSource = nil) then
  753. DoErrorPos(esFatal, 'No input source specified', NullLocation);
  754. end;
  755. procedure TXMLTextReader.StoreLocation(out Loc: TLocation);
  756. begin
  757. Loc.Line := FSource.FLineNo;
  758. Loc.LinePos := FSource.FBuf-FSource.LFPos;
  759. end;
  760. function TXMLTextReader.ResolveResource(const ASystemID, APublicID, ABaseURI: XMLString; out Source: TXMLCharSource): Boolean;
  761. var
  762. SrcURI: XMLString;
  763. Filename: string;
  764. Stream: TStream;
  765. fd: THandle;
  766. begin
  767. Source := nil;
  768. Result := False;
  769. if not ResolveRelativeURI(ABaseURI, ASystemID, SrcURI) then
  770. Exit;
  771. { TODO: alternative resolvers
  772. These may be 'internal' resolvers or a handler set by application.
  773. Internal resolvers should probably produce a TStream
  774. ( so that internal classes need not be exported ).
  775. External resolver will produce TXMLInputSource that should be converted.
  776. External resolver must NOT be called for root entity.
  777. External resolver can return nil, in which case we do the default }
  778. if URIToFilename(SrcURI, Filename) then
  779. begin
  780. fd := FileOpen(Filename, fmOpenRead + fmShareDenyWrite);
  781. if fd <> THandle(-1) then
  782. begin
  783. Stream := THandleOwnerStream.Create(fd);
  784. Source := TXMLStreamInputSource.Create(Stream, True);
  785. Source.SourceURI := SrcURI;
  786. end;
  787. end;
  788. Result := Assigned(Source);
  789. end;
  790. procedure TXMLTextReader.SetSource(ASource: TXMLCharSource);
  791. begin
  792. ASource.FParent := FSource;
  793. FSource := ASource;
  794. FSource.FReader := Self;
  795. FSource.FStartNesting := FNesting;
  796. end;
  797. procedure TXMLTextReader.FatalError(Expected: WideChar);
  798. begin
  799. // FIX: don't output what is found - anything may be found, including exploits...
  800. FatalError('Expected "%1s"', [string(Expected)]);
  801. end;
  802. procedure TXMLTextReader.FatalError(const descr: String; LineOffs: Integer);
  803. begin
  804. DoError(esFatal, descr, LineOffs);
  805. end;
  806. procedure TXMLTextReader.FatalError(const descr: string; const args: array of const; LineOffs: Integer);
  807. begin
  808. DoError(esFatal, Format(descr, args), LineOffs);
  809. end;
  810. procedure TXMLTextReader.ValidationError(const Msg: string; const Args: array of const; LineOffs: Integer);
  811. begin
  812. if FValidate then
  813. DoError(esError, Format(Msg, Args), LineOffs);
  814. end;
  815. procedure TXMLTextReader.ValidationErrorWithName(const Msg: string; LineOffs: Integer);
  816. var
  817. ws: XMLString;
  818. begin
  819. SetString(ws, FName.Buffer, FName.Length);
  820. ValidationError(Msg, [ws], LineOffs);
  821. end;
  822. procedure TXMLTextReader.DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer);
  823. var
  824. Loc: TLocation;
  825. begin
  826. StoreLocation(Loc);
  827. if LineOffs >= 0 then
  828. begin
  829. Dec(Loc.LinePos, LineOffs);
  830. DoErrorPos(Severity, descr, Loc);
  831. end
  832. else
  833. DoErrorPos(Severity, descr, FTokenStart);
  834. end;
  835. procedure TXMLTextReader.DoErrorPos(Severity: TErrorSeverity; const descr: string;
  836. const args: array of const; const ErrPos: TLocation);
  837. begin
  838. DoErrorPos(Severity, Format(descr, args), ErrPos);
  839. end;
  840. procedure TXMLTextReader.DoErrorPos(Severity: TErrorSeverity; const descr: string; const ErrPos: TLocation);
  841. var
  842. E: EXMLReadError;
  843. srcuri: XMLString;
  844. begin
  845. if Assigned(FSource) then
  846. begin
  847. srcuri := FSource.FSourceURI;
  848. if (srcuri = '') and Assigned(FSource.FEntity) then
  849. srcuri := FSource.FEntity.FURI;
  850. E := EXMLReadError.Create(severity, descr, ErrPos.Line, ErrPos.LinePos, srcuri);
  851. end
  852. else
  853. E := EXMLReadError.Create(descr);
  854. CallErrorHandler(E);
  855. // No 'finally'! If user handler raises exception, control should not get here
  856. // and the exception will be freed in CallErrorHandler (below)
  857. E.Free;
  858. end;
  859. procedure TXMLTextReader.CheckMaxChars(ToAdd: Cardinal);
  860. var
  861. src: TXMLCharSource;
  862. total: Cardinal;
  863. begin
  864. Inc(FSource.FCharCount, ToAdd);
  865. if FMaxChars = 0 then
  866. Exit;
  867. src := FSource;
  868. total := 0;
  869. repeat
  870. Inc(total, src.FCharCount);
  871. if total > FMaxChars then
  872. FatalError('Exceeded character count limit');
  873. src := src.FParent;
  874. until src = nil;
  875. end;
  876. procedure TXMLTextReader.CallErrorHandler(E: EXMLReadError);
  877. begin
  878. try
  879. if Assigned(FOnError) then
  880. FOnError(E);
  881. if E.Severity = esFatal then
  882. raise E;
  883. except
  884. FReadState := rsError;
  885. if ExceptObject <> E then
  886. E.Free;
  887. raise;
  888. end;
  889. end;
  890. function TXMLTextReader.SkipWhitespace(PercentAloneIsOk: Boolean): Boolean;
  891. begin
  892. Result := False;
  893. repeat
  894. Result := SkipS or Result;
  895. if FSource.FBuf >= FSource.FBufEnd then
  896. begin
  897. Result := True; // report whitespace upon exiting the PE
  898. if not ContextPop then
  899. Break;
  900. end
  901. else if FSource.FBuf^ = '%' then
  902. begin
  903. if (FState <> rsDTD) then
  904. Break;
  905. // This is the only case where look-ahead is needed
  906. if FSource.FBuf > FSource.FBufEnd-2 then
  907. FSource.Reload;
  908. if (not PercentAloneIsOk) or (Byte(FSource.FBuf[1]) in NamingBitmap[NamePages[hi(Word(FSource.FBuf[1]))]]) or
  909. ((FSource.FBuf[1] >= #$D800) and (FSource.FBuf[1] <= #$DB7F)) then
  910. begin
  911. StartPE;
  912. Result := True; // report whitespace upon entering the PE
  913. end
  914. else Break;
  915. end
  916. else
  917. Break;
  918. until False;
  919. end;
  920. procedure TXMLTextReader.ExpectWhitespace;
  921. begin
  922. if not SkipWhitespace then
  923. FatalError('Expected whitespace');
  924. end;
  925. function TXMLTextReader.SkipS(Required: Boolean): Boolean;
  926. var
  927. p: PWideChar;
  928. begin
  929. Result := False;
  930. repeat
  931. p := FSource.FBuf;
  932. repeat
  933. if (p^ = #10) or (p^ = #13) or (FXML11 and ((p^ = #$85) or (p^ = #$2028))) then
  934. begin
  935. FSource.FBuf := p;
  936. FSource.NewLine;
  937. p := FSource.FBuf;
  938. end
  939. else if (p^ <> #32) and (p^ <> #9) then
  940. Break;
  941. Inc(p);
  942. Result := True;
  943. until False;
  944. FSource.FBuf := p;
  945. until (FSource.FBuf < FSource.FBufEnd) or (not FSource.Reload);
  946. if (not Result) and Required then
  947. FatalError('Expected whitespace');
  948. end;
  949. procedure TXMLTextReader.ExpectString(const s: String);
  950. var
  951. I: Integer;
  952. begin
  953. for I := 1 to Length(s) do
  954. begin
  955. if FSource.FBuf^ <> WideChar(ord(s[i])) then
  956. FatalError('Expected "%s"', [s], i-1);
  957. FSource.NextChar;
  958. end;
  959. end;
  960. function TXMLTextReader.CheckForChar(c: WideChar): Boolean;
  961. begin
  962. Result := (FSource.FBuf^ = c);
  963. if Result then
  964. begin
  965. Inc(FSource.FBuf);
  966. if FSource.FBuf >= FSource.FBufEnd then
  967. FSource.Reload;
  968. end;
  969. end;
  970. procedure TXMLTextReader.SkipQuote(out Delim: WideChar; required: Boolean);
  971. begin
  972. Delim := #0;
  973. if (FSource.FBuf^ = '''') or (FSource.FBuf^ = '"') then
  974. begin
  975. Delim := FSource.FBuf^;
  976. FSource.NextChar; // skip quote
  977. StoreLocation(FTokenStart);
  978. end
  979. else if required then
  980. FatalError('Expected single or double quote');
  981. end;
  982. const
  983. PrefixDefault: array[0..4] of WideChar = ('x','m','l','n','s');
  984. procedure TXMLTextReader.SetOptions(AValue: TXMLReaderSettings);
  985. begin
  986. FValidate := AValue.Validate;
  987. FPreserveWhitespace := AValue.PreserveWhitespace;
  988. FExpandEntities := AValue.ExpandEntities;
  989. FCDSectionsAsText := AValue.CDSectionsAsText;
  990. FIgnoreComments := AValue.IgnoreComments;
  991. FNamespaces := AValue.Namespaces;
  992. FDisallowDoctype := AValue.DisallowDoctype;
  993. FCanonical := AValue.CanonicalForm;
  994. FMaxChars := AValue.MaxChars;
  995. FOnError := AValue.OnError;
  996. SetFragmentMode(AValue.ConformanceLevel = clFragment);
  997. end;
  998. procedure TXMLTextReader.SetFragmentMode(aValue: Boolean);
  999. begin
  1000. FFragmentMode := aValue;
  1001. if FFragmentMode then
  1002. FState := rsRoot
  1003. else
  1004. FState := rsProlog;
  1005. end;
  1006. constructor TXMLTextReader.Create(ASrc: TXMLInputSource; ASettings: TXMLReaderSettings);
  1007. var
  1008. InputSrc: TXMLCharSource;
  1009. begin
  1010. SetNametable(ASettings.NameTable);
  1011. SetOptions(ASettings);
  1012. ConvertSource(ASrc, InputSrc);
  1013. FSource := InputSrc;
  1014. FSource.FReader := Self;
  1015. end;
  1016. constructor TXMLTextReader.Create(const uri: XMLString; ASettings: TXMLReaderSettings);
  1017. begin
  1018. SetNametable(ASettings.NameTable);
  1019. SetOptions(ASettings);
  1020. if ResolveResource(uri, '', '', FSource) then
  1021. FSource.FReader := Self
  1022. else
  1023. DoErrorPos(esFatal, 'The specified URI could not be resolved', NullLocation);
  1024. end;
  1025. procedure TXMLTextReader.SetNametable(ANameTable: THashTable);
  1026. begin
  1027. if ANameTable = nil then
  1028. begin
  1029. ANameTable := THashTable.Create(256, True);
  1030. FNameTableOwned := True;
  1031. end;
  1032. FNameTable := ANameTable;
  1033. end;
  1034. constructor TXMLTextReader.Create(var AFile: Text; ANameTable: THashTable);
  1035. begin
  1036. SetNametable(ANameTable);
  1037. FSource := TXMLFileInputSource.Create(AFile);
  1038. FSource.FReader := Self;
  1039. end;
  1040. constructor TXMLTextReader.Create(AStream: TStream; const ABaseUri: XMLString; ANameTable: THashTable);
  1041. begin
  1042. SetNametable(ANameTable);
  1043. FSource := TXMLStreamInputSource.Create(AStream, False);
  1044. FSource.SourceURI := ABaseUri;
  1045. FSource.FReader := Self;
  1046. end;
  1047. constructor TXMLTextReader.Create(AStream: TStream; const ABaseUri: XMLString; ASettings: TXMLReaderSettings); overload;
  1048. begin
  1049. SetNametable(ASettings.NameTable);
  1050. SetOptions(ASettings);
  1051. FSource := TXMLStreamInputSource.Create(AStream, False);
  1052. FSource.SourceURI := ABaseUri;
  1053. FSource.FReader := Self;
  1054. end;
  1055. constructor TXMLTextReader.Create(ASrc: TXMLCharSource; AParent: TXMLTextReader);
  1056. begin
  1057. FNameTable := AParent.FNameTable;
  1058. FSource := ASrc;
  1059. FSource.FReader := Self;
  1060. FValidate := AParent.FValidate;
  1061. FPreserveWhitespace := AParent.FPreserveWhitespace;
  1062. FExpandEntities := AParent.FExpandEntities;
  1063. FCDSectionsAsText := AParent.FCDSectionsAsText;
  1064. FIgnoreComments := AParent.FIgnoreComments;
  1065. FNamespaces := AParent.FNamespaces;
  1066. FDisallowDoctype := AParent.FDisallowDoctype;
  1067. FCanonical := AParent.FCanonical;
  1068. FMaxChars := AParent.FMaxChars;
  1069. FOnError := AParent.FOnError;
  1070. end;
  1071. destructor TXMLTextReader.Destroy;
  1072. var
  1073. cur: PNodeData;
  1074. begin
  1075. if FAttrCleanupFlag then
  1076. CleanupAttributes;
  1077. while Assigned(FFreeAttrChunk) do
  1078. begin
  1079. cur := FFreeAttrChunk;
  1080. FFreeAttrChunk := cur^.FNext;
  1081. Dispose(cur);
  1082. end;
  1083. if Assigned(FEntityValue.Buffer) then
  1084. FreeMem(FEntityValue.Buffer);
  1085. FreeMem(FName.Buffer);
  1086. FreeMem(FValue.Buffer);
  1087. if Assigned(FSource) then
  1088. while ContextPop(True) do; // clean input stack
  1089. FSource.Free;
  1090. FPEMap.Free;
  1091. ClearForwardRefs;
  1092. FNsAttHash.Free;
  1093. FNSHelper.Free;
  1094. FDocType.Release;
  1095. FIDMap.Free;
  1096. FForwardRefs.Free;
  1097. if FNameTableOwned then
  1098. FNameTable.Free;
  1099. inherited Destroy;
  1100. end;
  1101. procedure TXMLTextReader.AfterConstruction;
  1102. begin
  1103. BufAllocate(FName, 128);
  1104. BufAllocate(FValue, 512);
  1105. SetLength(FNodeStack, 16);
  1106. SetLength(FValidators, 16);
  1107. FNesting := 0;
  1108. FValidatorNesting := 0;
  1109. FCurrNode := @FNodeStack[0];
  1110. FCurrAttrIndex := -1;
  1111. FEmptyStr := FNameTable.FindOrAdd('');
  1112. if FNamespaces then
  1113. begin
  1114. FNSHelper := TNSSupport.Create(FNameTable);
  1115. FNsAttHash := TDblHashArray.Create;
  1116. FStdPrefix_xml := FNSHelper.GetPrefix(@PrefixDefault, 3);
  1117. FStdPrefix_xmlns := FNSHelper.GetPrefix(@PrefixDefault, 5);
  1118. FStdUri_xmlns := FNameTable.FindOrAdd(stduri_xmlns);
  1119. FStdUri_xml := FNameTable.FindOrAdd(stduri_xml);
  1120. end;
  1121. end;
  1122. function TXMLTextReader.CheckName(aFlags: TCheckNameFlags): Boolean;
  1123. var
  1124. p: PWideChar;
  1125. NameStartFlag: Boolean;
  1126. begin
  1127. p := FSource.FBuf;
  1128. FName.Length := 0;
  1129. FColonPos := -1;
  1130. NameStartFlag := not (cnToken in aFlags);
  1131. repeat
  1132. if NameStartFlag then
  1133. begin
  1134. if (Byte(p^) in NamingBitmap[NamePages[hi(Word(p^))]]) or
  1135. ((p^ = ':') and (not FNamespaces)) then
  1136. Inc(p)
  1137. else if ((p^ >= #$D800) and (p^ <= #$DB7F) and
  1138. (p[1] >= #$DC00) and (p[1] <= #$DFFF)) then
  1139. Inc(p, 2)
  1140. else
  1141. begin
  1142. // here we come either when first char of name is bad (it may be a colon),
  1143. // or when a colon is not followed by a valid NameStartChar
  1144. FSource.FBuf := p;
  1145. Result := False;
  1146. Break;
  1147. end;
  1148. NameStartFlag := False;
  1149. end;
  1150. repeat
  1151. if (Byte(p^) in NamingBitmap[NamePages[$100+hi(Word(p^))]]) or
  1152. ((p^= ':') and ((cnToken in aFlags) or not FNamespaces)) then
  1153. Inc(p)
  1154. else if ((p^ >= #$D800) and (p^ <= #$DB7F) and
  1155. (p[1] >= #$DC00) and (p[1] <= #$DFFF)) then
  1156. Inc(p,2)
  1157. else
  1158. Break;
  1159. until False;
  1160. if (p^ = ':') and (FColonPos < 0) then
  1161. begin
  1162. FColonPos := p-FSource.FBuf+FName.Length;
  1163. NameStartFlag := True;
  1164. Inc(p);
  1165. if p < FSource.FBufEnd then Continue;
  1166. end;
  1167. BufAppendChunk(FName, FSource.FBuf, p);
  1168. Result := (FName.Length > 0);
  1169. FSource.FBuf := p;
  1170. if (p < FSource.FBufEnd) or not FSource.Reload then
  1171. Break;
  1172. p := FSource.FBuf;
  1173. until False;
  1174. if not (Result or (cnOptional in aFlags)) then
  1175. RaiseNameNotFound;
  1176. end;
  1177. procedure TXMLTextReader.CheckNCName;
  1178. begin
  1179. if FNamespaces and (FColonPos <> -1) then
  1180. FatalError('Names of entities, notations and processing instructions may not contain colons', FName.Length);
  1181. end;
  1182. procedure TXMLTextReader.RaiseNameNotFound;
  1183. begin
  1184. if FColonPos <> -1 then
  1185. FatalError('Bad QName syntax, local part is missing')
  1186. else
  1187. // Coming at no cost, this allows more user-friendly error messages
  1188. with FSource do
  1189. if (FBuf^ = #32) or (FBuf^ = #10) or (FBuf^ = #9) or (FBuf^ = #13) then
  1190. FatalError('Whitespace is not allowed here')
  1191. else
  1192. FatalError('Name starts with invalid character');
  1193. end;
  1194. function TXMLTextReader.ResolvePredefined: Boolean;
  1195. var
  1196. wc: WideChar;
  1197. begin
  1198. Result := False;
  1199. with FName do
  1200. begin
  1201. if (Length = 2) and (Buffer[1] = 't') then
  1202. begin
  1203. if Buffer[0] = 'l' then
  1204. wc := '<'
  1205. else if Buffer[0] = 'g' then
  1206. wc := '>'
  1207. else Exit;
  1208. end
  1209. else if Buffer[0] = 'a' then
  1210. begin
  1211. if (Length = 3) and (Buffer[1] = 'm') and (Buffer[2] = 'p') then
  1212. wc := '&'
  1213. else if (Length = 4) and (Buffer[1] = 'p') and (Buffer[2] = 'o') and
  1214. (Buffer[3] = 's') then
  1215. wc := ''''
  1216. else Exit;
  1217. end
  1218. else if (Length = 4) and (Buffer[0] = 'q') and (Buffer[1] = 'u') and
  1219. (Buffer[2] = 'o') and (Buffer[3] ='t') then
  1220. wc := '"'
  1221. else
  1222. Exit;
  1223. end; // with
  1224. BufAppend(FValue, wc);
  1225. Result := True;
  1226. end;
  1227. function TXMLTextReader.ParseRef(var ToFill: TWideCharBuf): Boolean; // [67]
  1228. var
  1229. Code: Integer;
  1230. begin
  1231. FSource.NextChar; // skip '&'
  1232. Result := CheckForChar('#');
  1233. if Result then
  1234. begin
  1235. Code := 0;
  1236. if CheckForChar('x') then
  1237. repeat
  1238. case FSource.FBuf^ of
  1239. '0'..'9': Code := Code * 16 + Ord(FSource.FBuf^) - Ord('0');
  1240. 'a'..'f': Code := Code * 16 + Ord(FSource.FBuf^) - (Ord('a') - 10);
  1241. 'A'..'F': Code := Code * 16 + Ord(FSource.FBuf^) - (Ord('A') - 10);
  1242. else
  1243. Break;
  1244. end;
  1245. FSource.NextChar;
  1246. until Code > $10FFFF
  1247. else
  1248. repeat
  1249. case FSource.FBuf^ of
  1250. '0'..'9': Code := Code * 10 + Ord(FSource.FBuf^) - Ord('0');
  1251. else
  1252. Break;
  1253. end;
  1254. FSource.NextChar;
  1255. until Code > $10FFFF;
  1256. case Code of
  1257. $01..$08, $0B..$0C, $0E..$1F:
  1258. if FXML11 then
  1259. BufAppend(ToFill, WideChar(Code))
  1260. else
  1261. FatalError('Invalid character reference');
  1262. $09, $0A, $0D, $20..$D7FF, $E000..$FFFD:
  1263. BufAppend(ToFill, WideChar(Code));
  1264. $10000..$10FFFF:
  1265. begin
  1266. BufAppend(ToFill, WideChar($D7C0 + (Code shr 10)));
  1267. BufAppend(ToFill, WideChar($DC00 xor (Code and $3FF)));
  1268. end;
  1269. else
  1270. FatalError('Invalid character reference');
  1271. end;
  1272. end
  1273. else CheckName;
  1274. ExpectChar(';');
  1275. end;
  1276. const
  1277. AttrDelims: TSetOfChar = [#0, '<', '&', '''', '"', #9, #10, #13];
  1278. GT_Delim: TSetOfChar = [#0, '>'];
  1279. { Parse attribute literal, producing plain string value in AttrData.FValueStr.
  1280. If entity references are encountered and FExpandEntities=False, also builds
  1281. a node chain starting from AttrData.FNext. Node chain is built only for the
  1282. first level. If NonCDATA=True, additionally normalizes whitespace in string value. }
  1283. procedure TXMLTextReader.ExpectAttValue(AttrData: PNodeData; NonCDATA: Boolean);
  1284. var
  1285. wc: WideChar;
  1286. Delim: WideChar;
  1287. ent: TEntityDecl;
  1288. start: TObject;
  1289. curr: PNodeData;
  1290. StartPos: Integer;
  1291. StartLoc: TLocation;
  1292. entName: PHashItem;
  1293. begin
  1294. SkipQuote(Delim);
  1295. AttrData^.FLoc2 := FTokenStart;
  1296. StartLoc := FTokenStart;
  1297. curr := AttrData;
  1298. FValue.Length := 0;
  1299. StartPos := 0;
  1300. start := FSource.FEntity;
  1301. repeat
  1302. wc := FSource.SkipUntil(FValue, AttrDelims);
  1303. if wc = '<' then
  1304. FatalError('Character ''<'' is not allowed in attribute value')
  1305. else if wc = '&' then
  1306. begin
  1307. if ParseRef(FValue) or ResolvePredefined then
  1308. Continue;
  1309. entName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  1310. ent := EntityCheck(True);
  1311. if ((ent = nil) or (not FExpandEntities)) and (FSource.FEntity = start) then
  1312. begin
  1313. if FValue.Length > StartPos then
  1314. begin
  1315. AllocAttributeValueChunk(curr, StartPos);
  1316. curr^.FLoc := StartLoc;
  1317. end;
  1318. AllocAttributeValueChunk(curr, FValue.Length);
  1319. curr^.FNodeType := ntEntityReference;
  1320. curr^.FQName := entName;
  1321. StoreLocation(StartLoc);
  1322. curr^.FLoc := StartLoc;
  1323. Dec(curr^.FLoc.LinePos, FName.Length+1);
  1324. end;
  1325. StartPos := FValue.Length;
  1326. if Assigned(ent) then
  1327. ContextPush(ent);
  1328. end
  1329. else if wc <> #0 then
  1330. begin
  1331. FSource.NextChar;
  1332. if (wc = Delim) and (FSource.FEntity = start) then
  1333. Break;
  1334. if (wc = #10) or (wc = #9) or (wc = #13) then
  1335. wc := #32;
  1336. BufAppend(FValue, wc);
  1337. end
  1338. else
  1339. begin
  1340. if (FSource.FEntity = start) or not ContextPop then // #0
  1341. FatalError('Literal has no closing quote', -1);
  1342. StartPos := FValue.Length;
  1343. end;
  1344. until False;
  1345. if Assigned(attrData^.FNext) then
  1346. begin
  1347. FAttrCleanupFlag := True;
  1348. if FValue.Length > StartPos then
  1349. begin
  1350. AllocAttributeValueChunk(curr, StartPos);
  1351. curr^.FLoc := StartLoc;
  1352. end;
  1353. end;
  1354. if nonCDATA then
  1355. BufNormalize(FValue, attrData^.FDenormalized)
  1356. else
  1357. attrData^.FDenormalized := False;
  1358. SetString(attrData^.FValueStr, FValue.Buffer, FValue.Length);
  1359. end;
  1360. const
  1361. PrefixChar: array[Boolean] of string = ('', '%');
  1362. procedure TXMLTextReader.EntityToSource(AEntity: TEntityDecl; out Src: TXMLCharSource);
  1363. begin
  1364. if AEntity.FOnStack then
  1365. FatalError('Entity ''%s%s'' recursively references itself', [PrefixChar[AEntity.FIsPE], AEntity.FName]);
  1366. if (AEntity.FSystemID <> '') and not AEntity.FPrefetched then
  1367. begin
  1368. if not ResolveResource(AEntity.FSystemID, AEntity.FPublicID, AEntity.FURI, Src) then
  1369. begin
  1370. // TODO: a detailed message like SysErrorMessage(GetLastError) would be great here
  1371. ValidationError('Unable to resolve external entity ''%s''', [AEntity.FName]);
  1372. Src := nil;
  1373. Exit;
  1374. end;
  1375. end
  1376. else
  1377. begin
  1378. Src := TXMLCharSource.Create(AEntity.FReplacementText);
  1379. Src.FLineNo := AEntity.FStartLocation.Line;
  1380. Src.LFPos := Src.FBuf - AEntity.FStartLocation.LinePos;
  1381. // needed in case of prefetched external PE
  1382. if AEntity.FSystemID <> '' then
  1383. Src.SourceURI := AEntity.FURI;
  1384. end;
  1385. AEntity.FOnStack := True;
  1386. Src.FEntity := AEntity;
  1387. end;
  1388. function TXMLTextReader.ContextPush(AEntity: TEntityDecl; DummySource: Boolean): Boolean;
  1389. var
  1390. Src: TXMLCharSource;
  1391. begin
  1392. Src := nil;
  1393. if Assigned(AEntity) then
  1394. EntityToSource(AEntity, Src);
  1395. if (Src = nil) and DummySource then
  1396. begin
  1397. Src := TXMLCharSource.Create('');
  1398. if FExpandEntities then
  1399. Src.Kind := skManualPop;
  1400. end;
  1401. Result := Assigned(Src);
  1402. if Result then
  1403. begin
  1404. SetSource(Src);
  1405. Src.Initialize;
  1406. end;
  1407. end;
  1408. function TXMLTextReader.ContextPop(Forced: Boolean): Boolean;
  1409. var
  1410. Src: TXMLCharSource;
  1411. Error: Boolean;
  1412. begin
  1413. Result := Assigned(FSource.FParent) and (Forced or (FSource.Kind = skNone));
  1414. if Result then
  1415. begin
  1416. Src := FSource.FParent;
  1417. Error := False;
  1418. if Assigned(FSource.FEntity) then
  1419. begin
  1420. FSource.FEntity.FOnStack := False;
  1421. FSource.FEntity.FCharCount := FSource.FCharCount;
  1422. // [28a] PE that was started between MarkupDecls may not end inside MarkupDecl
  1423. Error := FSource.FEntity.FBetweenDecls and FInsideDecl;
  1424. end;
  1425. FSource.Free;
  1426. FSource := Src;
  1427. // correct position of this error is after PE reference
  1428. if Error then
  1429. FatalError('Parameter entities must be properly nested');
  1430. end;
  1431. end;
  1432. function TXMLTextReader.EntityCheck(NoExternals: Boolean): TEntityDecl;
  1433. var
  1434. RefName: XMLString;
  1435. cnt: Integer;
  1436. begin
  1437. Result := nil;
  1438. SetString(RefName, FName.Buffer, FName.Length);
  1439. cnt := FName.Length+2;
  1440. if Assigned(FDocType) then
  1441. Result := FDocType.Entities.Get(FName.Buffer, FName.Length) as TEntityDecl;
  1442. if Result = nil then
  1443. begin
  1444. if FStandalone or (FDocType = nil) or not (FHavePERefs or (FDocType.FSystemID <> '')) then
  1445. FatalError('Reference to undefined entity ''%s''', [RefName], cnt)
  1446. else
  1447. ValidationError('Undefined entity ''%s'' referenced', [RefName], cnt);
  1448. Exit;
  1449. end;
  1450. if FStandalone and Result.ExternallyDeclared then
  1451. FatalError('Standalone constraint violation', cnt);
  1452. if Result.FNotationName <> '' then
  1453. FatalError('Reference to unparsed entity ''%s''', [RefName], cnt);
  1454. if NoExternals and (Result.FSystemID <> '') then
  1455. FatalError('External entity reference is not allowed in attribute value', cnt);
  1456. if not Result.FResolved then
  1457. if Assigned(FOnEntity) then
  1458. FOnEntity(Self, Result);
  1459. // at this point we know the charcount of the entity being included
  1460. if Result.FCharCount >= cnt then
  1461. CheckMaxChars(Result.FCharCount - cnt);
  1462. end;
  1463. procedure TXMLTextReader.StartPE;
  1464. var
  1465. PEnt: TEntityDecl;
  1466. begin
  1467. FSource.NextChar; // skip '%'
  1468. CheckName;
  1469. ExpectChar(';');
  1470. if (FSource.Kind = skInternalSubset) and FInsideDecl then
  1471. FatalError('Parameter entity references cannot appear inside markup declarations in internal subset', FName.Length+2);
  1472. PEnt := nil;
  1473. if Assigned(FPEMap) then
  1474. PEnt := FPEMap.Get(FName.Buffer, FName.Length) as TEntityDecl;
  1475. if PEnt = nil then
  1476. begin
  1477. ValidationErrorWithName('Undefined parameter entity ''%s'' referenced', FName.Length+2);
  1478. // cease processing declarations, unless document is standalone.
  1479. FDTDProcessed := FStandalone;
  1480. Exit;
  1481. end;
  1482. { cache an external PE so it's only fetched once }
  1483. if (PEnt.FSystemID <> '') and (not PEnt.FPrefetched) and (not PrefetchEntity(PEnt)) then
  1484. begin
  1485. FDTDProcessed := FStandalone;
  1486. Exit;
  1487. end;
  1488. CheckMaxChars(PEnt.FCharCount);
  1489. PEnt.FBetweenDecls := not FInsideDecl;
  1490. ContextPush(PEnt);
  1491. FHavePERefs := True;
  1492. end;
  1493. function TXMLTextReader.PrefetchEntity(AEntity: TEntityDecl): Boolean;
  1494. begin
  1495. Result := ContextPush(AEntity);
  1496. if Result then
  1497. try
  1498. FValue.Length := 0;
  1499. FSource.SkipUntil(FValue, [#0]);
  1500. SetString(AEntity.FReplacementText, FValue.Buffer, FValue.Length);
  1501. AEntity.FCharCount := FValue.Length;
  1502. AEntity.FStartLocation.Line := 1;
  1503. AEntity.FStartLocation.LinePos := 1;
  1504. AEntity.FURI := FSource.SourceURI; // replace base URI with absolute one
  1505. finally
  1506. ContextPop;
  1507. AEntity.FPrefetched := True;
  1508. FValue.Length := 0;
  1509. end;
  1510. end;
  1511. const
  1512. LiteralDelims: array[TLiteralType] of TSetOfChar = (
  1513. [#0, '''', '"'], // ltPlain
  1514. [#0, '''', '"', #13, #10], // ltPubid
  1515. [#0, '%', '&', '''', '"'] // ltEntity
  1516. );
  1517. function TXMLTextReader.ParseLiteral(var ToFill: TWideCharBuf; aType: TLiteralType;
  1518. Required: Boolean): Boolean;
  1519. var
  1520. start: TObject;
  1521. wc, Delim: WideChar;
  1522. dummy: Boolean;
  1523. begin
  1524. SkipQuote(Delim, Required);
  1525. Result := (Delim <> #0);
  1526. if not Result then
  1527. Exit;
  1528. ToFill.Length := 0;
  1529. start := FSource.FEntity;
  1530. repeat
  1531. wc := FSource.SkipUntil(ToFill, LiteralDelims[aType]);
  1532. if wc = '%' then { ltEntity only }
  1533. StartPE
  1534. else if wc = '&' then { ltEntity }
  1535. begin
  1536. if ParseRef(ToFill) then // charRefs always expanded
  1537. Continue;
  1538. BufAppend(ToFill, '&');
  1539. BufAppendChunk(ToFill, FName.Buffer, FName.Buffer + FName.Length);
  1540. BufAppend(ToFill, ';');
  1541. end
  1542. else if wc <> #0 then
  1543. begin
  1544. FSource.NextChar;
  1545. if (wc = #10) or (wc = #13) then
  1546. wc := #32
  1547. // terminating delimiter must be in the same context as the starting one
  1548. else if (wc = Delim) and (start = FSource.FEntity) then
  1549. Break;
  1550. BufAppend(ToFill, wc);
  1551. end
  1552. else if (FSource.FEntity = start) or not ContextPop then // #0
  1553. FatalError('Literal has no closing quote', -1);
  1554. until False;
  1555. if aType = ltPubid then
  1556. BufNormalize(ToFill, dummy);
  1557. end;
  1558. function TXMLTextReader.SkipUntilSeq(const Delim: TSetOfChar; c1: WideChar): Boolean;
  1559. var
  1560. wc: WideChar;
  1561. begin
  1562. Result := False;
  1563. StoreLocation(FTokenStart);
  1564. repeat
  1565. wc := FSource.SkipUntil(FValue, Delim);
  1566. if wc <> #0 then
  1567. begin
  1568. FSource.NextChar;
  1569. if (FValue.Length > 0) then
  1570. begin
  1571. if (FValue.Buffer[FValue.Length-1] = c1) then
  1572. begin
  1573. Dec(FValue.Length);
  1574. Result := True;
  1575. Exit;
  1576. end;
  1577. end;
  1578. BufAppend(FValue, wc);
  1579. end;
  1580. until wc = #0;
  1581. end;
  1582. procedure TXMLTextReader.ParseComment(discard: Boolean); // [15]
  1583. var
  1584. SaveLength: Integer;
  1585. begin
  1586. ExpectString('--');
  1587. SaveLength := FValue.Length;
  1588. if not SkipUntilSeq([#0, '-'], '-') then
  1589. FatalError('Unterminated comment', -1);
  1590. ExpectChar('>');
  1591. if not discard then
  1592. begin
  1593. FCurrNode := @FNodeStack[FNesting];
  1594. FCurrNode^.FNodeType := ntComment;
  1595. FCurrNode^.FQName := nil;
  1596. FCurrNode^.FValueStart := @FValue.Buffer[SaveLength];
  1597. FCurrNode^.FValueLength := FValue.Length-SaveLength;
  1598. end;
  1599. FValue.Length := SaveLength;
  1600. end;
  1601. procedure TXMLTextReader.ParsePI; // [16]
  1602. begin
  1603. FSource.NextChar; // skip '?'
  1604. CheckName;
  1605. CheckNCName;
  1606. with FName do
  1607. if (Length = 3) and
  1608. ((Buffer[0] = 'X') or (Buffer[0] = 'x')) and
  1609. ((Buffer[1] = 'M') or (Buffer[1] = 'm')) and
  1610. ((Buffer[2] = 'L') or (Buffer[2] = 'l')) then
  1611. begin
  1612. if not BufEquals(FName, 'xml') then
  1613. FatalError('''xml'' is a reserved word; it must be lowercase', FName.Length)
  1614. else
  1615. FatalError('XML declaration is not allowed here', FName.Length);
  1616. end;
  1617. if FSource.FBuf^ <> '?' then
  1618. SkipS(True);
  1619. FValue.Length := 0;
  1620. if not SkipUntilSeq(GT_Delim, '?') then
  1621. FatalError('Unterminated processing instruction', -1);
  1622. SetNodeInfoWithValue(ntProcessingInstruction,
  1623. FNameTable.FindOrAdd(FName.Buffer, FName.Length));
  1624. end;
  1625. const
  1626. vers: array[Boolean] of TXMLVersion = (xmlVersion10, xmlVersion11);
  1627. procedure TXMLTextReader.ParseXmlOrTextDecl(TextDecl: Boolean);
  1628. var
  1629. Delim: WideChar;
  1630. buf: array[0..31] of WideChar;
  1631. I: Integer;
  1632. begin
  1633. SkipS(True);
  1634. // [24] VersionInfo: optional in TextDecl, required in XmlDecl
  1635. if (not TextDecl) or (FSource.FBuf^ = 'v') then
  1636. begin
  1637. ExpectString('version');
  1638. ExpectEq;
  1639. SkipQuote(Delim);
  1640. { !! Definition "VersionNum ::= '1.' [0-9]+" per XML 1.0 Fifth Edition
  1641. implies that version literal can have unlimited length. }
  1642. I := 0;
  1643. while (I < 3) and (FSource.FBuf^ <> Delim) do
  1644. begin
  1645. buf[I] := FSource.FBuf^;
  1646. Inc(I);
  1647. FSource.NextChar;
  1648. end;
  1649. if (I <> 3) or (buf[0] <> '1') or (buf[1] <> '.') or
  1650. (buf[2] < '0') or (buf[2] > '9') then
  1651. FatalError('Illegal version number', -1);
  1652. ExpectChar(Delim);
  1653. FSource.FXMLVersion := vers[buf[2] = '1'];
  1654. if TextDecl and (FSource.FXMLVersion = xmlVersion11) and not FXML11 then
  1655. FatalError('XML 1.0 document cannot invoke XML 1.1 entities', -1);
  1656. if TextDecl or (FSource.FBuf^ <> '?') then
  1657. SkipS(True);
  1658. end;
  1659. // [80] EncodingDecl: required in TextDecl, optional in XmlDecl
  1660. if TextDecl or (FSource.FBuf^ = 'e') then
  1661. begin
  1662. ExpectString('encoding');
  1663. ExpectEq;
  1664. SkipQuote(Delim);
  1665. I := 0;
  1666. while (I < 30) and (FSource.FBuf^ <> Delim) and (FSource.FBuf^ < #127) and
  1667. ((Char(ord(FSource.FBuf^)) in ['A'..'Z', 'a'..'z']) or
  1668. ((I > 0) and (Char(ord(FSource.FBuf^)) in ['0'..'9', '.', '-', '_']))) do
  1669. begin
  1670. buf[I] := FSource.FBuf^;
  1671. Inc(I);
  1672. FSource.NextChar;
  1673. end;
  1674. if not CheckForChar(Delim) then
  1675. FatalError('Illegal encoding name', i);
  1676. SetString(FSource.FXMLEncoding, buf, i);
  1677. if not FSource.SetEncoding(FSource.FXMLEncoding) then // <-- Wide2Ansi conversion here
  1678. FatalError('Encoding ''%s'' is not supported', [FSource.FXMLEncoding], i+1);
  1679. if FSource.FBuf^ <> '?' then
  1680. SkipS(not TextDecl);
  1681. end;
  1682. // [32] SDDecl: forbidden in TextDecl, optional in XmlDecl
  1683. if (not TextDecl) and (FSource.FBuf^ = 's') then
  1684. begin
  1685. ExpectString('standalone');
  1686. ExpectEq;
  1687. SkipQuote(Delim);
  1688. if FSource.Matches('yes') then
  1689. FStandalone := True
  1690. else if not FSource.Matches('no') then
  1691. FatalError('Only "yes" or "no" are permitted as values of "standalone"', -1);
  1692. ExpectChar(Delim);
  1693. SkipS;
  1694. end;
  1695. ExpectString('?>');
  1696. { Switch to 1.1 rules only after declaration is parsed completely. This is to
  1697. ensure that NEL and LSEP within declaration are rejected (rmt-056, rmt-057) }
  1698. if FSource.FXMLVersion = xmlVersion11 then
  1699. FXML11 := True;
  1700. end;
  1701. procedure TXMLTextReader.DTDReloadHook;
  1702. var
  1703. p: PWideChar;
  1704. begin
  1705. { FSource converts CR, NEL and LSEP linebreaks to LF, and CR-NEL sequences to CR-LF.
  1706. We must further remove the CR chars and have only LF's left. }
  1707. p := FDTDStartPos;
  1708. while p < FSource.FBuf do
  1709. begin
  1710. while (p < FSource.FBuf) and (p^ <> #13) do
  1711. Inc(p);
  1712. BufAppendChunk(FIntSubset, FDTDStartPos, p);
  1713. if p^ = #13 then
  1714. Inc(p);
  1715. FDTDStartPos := p;
  1716. end;
  1717. FDTDStartPos := TXMLDecodingSource(FSource).FBufStart;
  1718. end;
  1719. procedure TXMLTextReader.ParseDoctypeDecl; // [28]
  1720. var
  1721. Src: TXMLCharSource;
  1722. DTDName: PHashItem;
  1723. Locs: array [0..2] of TLocation;
  1724. HasAtts: Boolean;
  1725. begin
  1726. if FState >= rsDTD then
  1727. FatalError('Markup declaration is not allowed here');
  1728. if FDisallowDoctype then
  1729. FatalError('Document type is prohibited by parser settings');
  1730. ExpectString('DOCTYPE');
  1731. SkipS(True);
  1732. FDocType := TDTDModel.Create(FNameTable);
  1733. FDTDProcessed := True; // assume success
  1734. FState := rsDTD;
  1735. CheckName;
  1736. SetString(FDocType.FName, FName.Buffer, FName.Length);
  1737. DTDName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  1738. if SkipS then
  1739. begin
  1740. StoreLocation(Locs[0]);
  1741. HasAtts := ParseExternalID(FDocType.FSystemID, FDocType.FPublicID, Locs[1], False);
  1742. if HasAtts then
  1743. Locs[2] := FTokenStart;
  1744. SkipS;
  1745. end;
  1746. if CheckForChar('[') then
  1747. begin
  1748. BufAllocate(FIntSubset, 256);
  1749. FSource.Kind := skInternalSubset;
  1750. try
  1751. FDTDStartPos := FSource.FBuf;
  1752. ParseMarkupDecl;
  1753. DTDReloadHook; // fetch last chunk
  1754. SetString(FDocType.FInternalSubset, FIntSubset.Buffer, FIntSubset.Length);
  1755. finally
  1756. FreeMem(FIntSubset.Buffer);
  1757. FSource.Kind := skNone;
  1758. end;
  1759. ExpectChar(']');
  1760. SkipS;
  1761. end;
  1762. ExpectChar('>');
  1763. if (FDocType.FSystemID <> '') then
  1764. begin
  1765. if ResolveResource(FDocType.FSystemID, FDocType.FPublicID, FSource.SourceURI, Src) then
  1766. begin
  1767. SetSource(Src);
  1768. Src.Initialize;
  1769. try
  1770. Src.Kind := skManualPop;
  1771. ParseMarkupDecl;
  1772. finally
  1773. ContextPop(True);
  1774. end;
  1775. end
  1776. else
  1777. begin
  1778. ValidationError('Unable to resolve external DTD subset', []);
  1779. FDTDProcessed := FStandalone;
  1780. end;
  1781. end;
  1782. FState := rsAfterDTD;
  1783. FValue.Length := 0;
  1784. BufAppendString(FValue, FDocType.FInternalSubset);
  1785. SetNodeInfoWithValue(ntDocumentType, DTDName);
  1786. if HasAtts then
  1787. begin
  1788. if FDocType.FPublicID <> '' then
  1789. AddPseudoAttribute(FNameTable.FindOrAdd('PUBLIC'), FDocType.FPublicID, Locs[0], Locs[1]);
  1790. AddPseudoAttribute(FNameTable.FindOrAdd('SYSTEM'), FDocType.FSystemID, Locs[0], Locs[2]);
  1791. end;
  1792. end;
  1793. procedure TXMLTextReader.ExpectEq; // [25]
  1794. begin
  1795. if FSource.FBuf^ <> '=' then
  1796. SkipS;
  1797. if FSource.FBuf^ <> '=' then
  1798. FatalError('Expected "="');
  1799. FSource.NextChar;
  1800. SkipS;
  1801. end;
  1802. { DTD stuff }
  1803. procedure TXMLTextReader.CheckPENesting(aExpected: TObject);
  1804. begin
  1805. if FSource.FEntity <> aExpected then
  1806. ValidationError('Parameter entities must be properly nested', [], 0);
  1807. end;
  1808. function TXMLTextReader.ParseQuantity: TCPQuant;
  1809. begin
  1810. case FSource.FBuf^ of
  1811. '?': Result := cqZeroOrOnce;
  1812. '*': Result := cqZeroOrMore;
  1813. '+': Result := cqOnceOrMore;
  1814. else
  1815. Result := cqOnce;
  1816. Exit;
  1817. end;
  1818. FSource.NextChar;
  1819. end;
  1820. function TXMLTextReader.FindOrCreateElDef: TElementDecl;
  1821. var
  1822. p: PHashItem;
  1823. begin
  1824. CheckName;
  1825. p := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  1826. Result := TElementDecl(p^.Data);
  1827. if Result = nil then
  1828. begin
  1829. Result := TElementDecl.Create;
  1830. p^.Data := Result;
  1831. end;
  1832. end;
  1833. procedure TXMLTextReader.ExpectChoiceOrSeq(CP: TContentParticle; MustEndIn: TObject); // [49], [50]
  1834. var
  1835. Delim: WideChar;
  1836. CurrentCP: TContentParticle;
  1837. begin
  1838. Delim := #0;
  1839. repeat
  1840. CurrentCP := CP.Add;
  1841. SkipWhitespace;
  1842. if CheckForChar('(') then
  1843. ExpectChoiceOrSeq(CurrentCP, FSource.FEntity)
  1844. else
  1845. CurrentCP.Def := FindOrCreateElDef;
  1846. CurrentCP.CPQuant := ParseQuantity;
  1847. SkipWhitespace;
  1848. if FSource.FBuf^ = ')' then
  1849. Break;
  1850. if Delim = #0 then
  1851. begin
  1852. if (FSource.FBuf^ = '|') or (FSource.FBuf^ = ',') then
  1853. Delim := FSource.FBuf^
  1854. else
  1855. FatalError('Expected pipe or comma delimiter');
  1856. end
  1857. else
  1858. if FSource.FBuf^ <> Delim then
  1859. FatalError(Delim);
  1860. FSource.NextChar; // skip delimiter
  1861. until False;
  1862. CheckPENesting(MustEndIn);
  1863. FSource.NextChar;
  1864. if Delim = '|' then
  1865. CP.CPType := ctChoice
  1866. else
  1867. CP.CPType := ctSeq; // '(foo)' is a sequence!
  1868. end;
  1869. procedure TXMLTextReader.ParseElementDecl; // [45]
  1870. var
  1871. ElDef: TElementDecl;
  1872. CurrentEntity: TObject;
  1873. I: Integer;
  1874. CP: TContentParticle;
  1875. Typ: TElementContentType;
  1876. ExtDecl: Boolean;
  1877. begin
  1878. CP := nil;
  1879. Typ := ctUndeclared; // satisfy compiler
  1880. ExpectWhitespace;
  1881. ElDef := FindOrCreateElDef;
  1882. if ElDef.ContentType <> ctUndeclared then
  1883. ValidationErrorWithName('Duplicate declaration of element ''%s''', FName.Length);
  1884. ExtDecl := FSource.Kind <> skInternalSubset;
  1885. ExpectWhitespace;
  1886. if FSource.Matches('EMPTY') then
  1887. Typ := ctEmpty
  1888. else if FSource.Matches('ANY') then
  1889. Typ := ctAny
  1890. else if CheckForChar('(') then
  1891. begin
  1892. CP := TContentParticle.Create;
  1893. try
  1894. CurrentEntity := FSource.FEntity;
  1895. SkipWhitespace;
  1896. if FSource.Matches('#PCDATA') then // Mixed section [51]
  1897. begin
  1898. SkipWhitespace;
  1899. Typ := ctMixed;
  1900. while FSource.FBuf^ <> ')' do
  1901. begin
  1902. ExpectChar('|');
  1903. SkipWhitespace;
  1904. with CP.Add do
  1905. begin
  1906. Def := FindOrCreateElDef;
  1907. for I := CP.ChildCount-2 downto 0 do
  1908. if Def = CP.Children[I].Def then
  1909. ValidationError('Duplicate token in mixed section', [], FName.Length);
  1910. end;
  1911. SkipWhitespace;
  1912. end;
  1913. CheckPENesting(CurrentEntity);
  1914. FSource.NextChar;
  1915. if (not CheckForChar('*')) and (CP.ChildCount > 0) then
  1916. FatalError(WideChar('*'));
  1917. CP.CPQuant := cqZeroOrMore;
  1918. CP.CPType := ctChoice;
  1919. end
  1920. else // Children section [47]
  1921. begin
  1922. Typ := ctChildren;
  1923. ExpectChoiceOrSeq(CP, CurrentEntity);
  1924. CP.CPQuant := ParseQuantity;
  1925. end;
  1926. except
  1927. CP.Free;
  1928. raise;
  1929. end;
  1930. end
  1931. else
  1932. FatalError('Invalid content specification');
  1933. if FDTDProcessed and (ElDef.ContentType = ctUndeclared) then
  1934. begin
  1935. ElDef.ExternallyDeclared := ExtDecl;
  1936. ElDef.ContentType := Typ;
  1937. ElDef.RootCP := CP;
  1938. end
  1939. else
  1940. CP.Free;
  1941. end;
  1942. procedure TXMLTextReader.ParseNotationDecl; // [82]
  1943. var
  1944. NameStr, SysID, PubID: XMLString;
  1945. Notation: TNotationDecl;
  1946. Entry: PHashItem;
  1947. Src: TXMLCharSource;
  1948. dummy: TLocation;
  1949. begin
  1950. Src := FSource;
  1951. ExpectWhitespace;
  1952. CheckName;
  1953. CheckNCName;
  1954. SetString(NameStr, FName.Buffer, FName.Length);
  1955. ExpectWhitespace;
  1956. if not ParseExternalID(SysID, PubID, dummy, True) then
  1957. FatalError('Expected external or public ID');
  1958. if FDTDProcessed then
  1959. begin
  1960. Entry := FDocType.Notations.FindOrAdd(NameStr);
  1961. if Entry^.Data = nil then
  1962. begin
  1963. Notation := TNotationDecl.Create;
  1964. Notation.FName := NameStr;
  1965. Notation.FPublicID := PubID;
  1966. Notation.FSystemID := SysID;
  1967. Notation.FURI := Src.SourceURI;
  1968. Entry^.Data := Notation;
  1969. end
  1970. else
  1971. ValidationError('Duplicate notation declaration: ''%s''', [NameStr]);
  1972. end;
  1973. end;
  1974. const
  1975. AttrDataTypeNames: array[TAttrDataType] of XMLString = (
  1976. 'CDATA',
  1977. 'ID',
  1978. 'IDREF',
  1979. 'IDREFS',
  1980. 'ENTITY',
  1981. 'ENTITIES',
  1982. 'NMTOKEN',
  1983. 'NMTOKENS',
  1984. 'NOTATION'
  1985. );
  1986. procedure TXMLTextReader.ParseAttlistDecl; // [52]
  1987. var
  1988. ElDef: TElementDecl;
  1989. AttDef: TAttributeDef;
  1990. dt: TAttrDataType;
  1991. Found, DiscardIt: Boolean;
  1992. Offsets: array [Boolean] of Integer;
  1993. attrName: PHashItem;
  1994. begin
  1995. ExpectWhitespace;
  1996. ElDef := FindOrCreateElDef;
  1997. SkipWhitespace;
  1998. while FSource.FBuf^ <> '>' do
  1999. begin
  2000. CheckName;
  2001. ExpectWhitespace;
  2002. attrName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  2003. AttDef := TAttributeDef.Create(attrName, FColonPos);
  2004. try
  2005. AttDef.ExternallyDeclared := FSource.Kind <> skInternalSubset;
  2006. // In case of duplicate declaration of the same attribute, we must discard it,
  2007. // not modifying ElDef, and suppressing certain validation errors.
  2008. DiscardIt := (not FDTDProcessed) or Assigned(ElDef.GetAttrDef(attrName));
  2009. if CheckForChar('(') then // [59]
  2010. begin
  2011. AttDef.DataType := dtNmToken;
  2012. repeat
  2013. SkipWhitespace;
  2014. CheckName([cnToken]);
  2015. if not AttDef.AddEnumToken(FName.Buffer, FName.Length) then
  2016. ValidationError('Duplicate token in enumerated attribute declaration', [], FName.Length);
  2017. SkipWhitespace;
  2018. until not CheckForChar('|');
  2019. ExpectChar(')');
  2020. ExpectWhitespace;
  2021. end
  2022. else
  2023. begin
  2024. StoreLocation(FTokenStart);
  2025. // search topside-up so that e.g. NMTOKENS is matched before NMTOKEN
  2026. for dt := dtNotation downto dtCData do
  2027. begin
  2028. Found := FSource.Matches(AttrDataTypeNames[dt]);
  2029. if Found then
  2030. Break;
  2031. end;
  2032. if Found and SkipWhitespace then
  2033. begin
  2034. AttDef.DataType := dt;
  2035. if (dt = dtId) and not DiscardIt then
  2036. begin
  2037. if Assigned(ElDef.IDAttr) then
  2038. ValidationError('Only one attribute of type ID is allowed per element',[])
  2039. else
  2040. ElDef.IDAttr := AttDef;
  2041. end
  2042. else if dt = dtNotation then // no test cases for these ?!
  2043. begin
  2044. if not DiscardIt then
  2045. begin
  2046. if Assigned(ElDef.NotationAttr) then
  2047. ValidationError('Only one attribute of type NOTATION is allowed per element',[])
  2048. else
  2049. ElDef.NotationAttr := AttDef;
  2050. if ElDef.ContentType = ctEmpty then
  2051. ValidationError('NOTATION attributes are not allowed on EMPTY elements',[]);
  2052. end;
  2053. ExpectChar('(');
  2054. repeat
  2055. SkipWhitespace;
  2056. StoreLocation(FTokenStart);
  2057. CheckName;
  2058. CheckNCName;
  2059. if not AttDef.AddEnumToken(FName.Buffer, FName.Length) then
  2060. ValidationError('Duplicate token in NOTATION attribute declaration',[], FName.Length);
  2061. if (not DiscardIt) and FValidate and
  2062. (FDocType.Notations.Get(FName.Buffer,FName.Length)=nil) then
  2063. AddForwardRef(FName.Buffer, FName.Length);
  2064. SkipWhitespace;
  2065. until not CheckForChar('|');
  2066. ExpectChar(')');
  2067. ExpectWhitespace;
  2068. end;
  2069. end
  2070. else
  2071. begin
  2072. // don't report 'expected whitespace' if token does not match completely
  2073. Offsets[False] := 0;
  2074. Offsets[True] := Length(AttrDataTypeNames[dt]);
  2075. if Found and (FSource.FBuf^ < 'A') then
  2076. ExpectWhitespace
  2077. else
  2078. FatalError('Illegal attribute type for ''%s''', [attrName^.Key], Offsets[Found]);
  2079. end;
  2080. end;
  2081. StoreLocation(FTokenStart);
  2082. if FSource.Matches('#REQUIRED') then
  2083. AttDef.Default := adRequired
  2084. else if FSource.Matches('#IMPLIED') then
  2085. AttDef.Default := adImplied
  2086. else if FSource.Matches('#FIXED') then
  2087. begin
  2088. AttDef.Default := adFixed;
  2089. ExpectWhitespace;
  2090. end
  2091. else
  2092. AttDef.Default := adDefault;
  2093. if AttDef.Default in [adDefault, adFixed] then
  2094. begin
  2095. if AttDef.DataType = dtId then
  2096. ValidationError('An attribute of type ID cannot have a default value',[]);
  2097. // See comments to valid-sa-094: PE expansion should be disabled in AttDef.
  2098. ExpectAttValue(AttDef.Data, dt <> dtCDATA);
  2099. if not AttDef.ValidateSyntax(AttDef.Data^.FValueStr, FNamespaces) then
  2100. ValidationError('Default value for attribute ''%s'' has wrong syntax', [attrName^.Key]);
  2101. end;
  2102. if DiscardIt then
  2103. AttDef.Free
  2104. else
  2105. ElDef.AddAttrDef(AttDef);
  2106. except
  2107. AttDef.Free;
  2108. raise;
  2109. end;
  2110. SkipWhitespace;
  2111. end;
  2112. end;
  2113. procedure TXMLTextReader.ParseEntityDecl; // [70]
  2114. var
  2115. IsPE, Exists: Boolean;
  2116. Entity: TEntityDecl;
  2117. Map: THashTable;
  2118. Item: PHashItem;
  2119. dummy: TLocation;
  2120. begin
  2121. Entity := TEntityDecl.Create;
  2122. try
  2123. Entity.ExternallyDeclared := FSource.Kind <> skInternalSubset;
  2124. Entity.FURI := FSource.SourceURI;
  2125. if not SkipWhitespace(True) then
  2126. FatalError('Expected whitespace');
  2127. IsPE := CheckForChar('%');
  2128. if IsPE then // [72]
  2129. begin
  2130. ExpectWhitespace;
  2131. if FPEMap = nil then
  2132. FPEMap := THashTable.Create(64, True);
  2133. Map := FPEMap;
  2134. end
  2135. else
  2136. Map := FDocType.Entities;
  2137. Entity.FIsPE := IsPE;
  2138. CheckName;
  2139. CheckNCName;
  2140. Item := Map.FindOrAdd(FName.Buffer, FName.Length, Exists);
  2141. ExpectWhitespace;
  2142. if FEntityValue.Buffer = nil then
  2143. BufAllocate(FEntityValue, 256);
  2144. if ParseLiteral(FEntityValue, ltEntity, False) then
  2145. begin
  2146. SetString(Entity.FReplacementText, FEntityValue.Buffer, FEntityValue.Length);
  2147. Entity.FCharCount := FEntityValue.Length;
  2148. Entity.FStartLocation := FTokenStart;
  2149. end
  2150. else
  2151. begin
  2152. if not ParseExternalID(Entity.FSystemID, Entity.FPublicID, dummy, False) then
  2153. FatalError('Expected entity value or external ID');
  2154. if not IsPE then // [76]
  2155. begin
  2156. if FSource.FBuf^ <> '>' then
  2157. ExpectWhitespace;
  2158. if FSource.Matches('NDATA') then
  2159. begin
  2160. ExpectWhitespace;
  2161. StoreLocation(FTokenStart); { needed for AddForwardRef }
  2162. CheckName;
  2163. SetString(Entity.FNotationName, FName.Buffer, FName.Length);
  2164. if FValidate and (FDocType.Notations.Get(FName.Buffer, FName.Length)=nil) then
  2165. AddForwardRef(FName.Buffer, FName.Length);
  2166. end;
  2167. end;
  2168. end;
  2169. except
  2170. Entity.Free;
  2171. raise;
  2172. end;
  2173. // Repeated declarations of same entity are legal but must be ignored
  2174. if FDTDProcessed and not Exists then
  2175. begin
  2176. Item^.Data := Entity;
  2177. Entity.FName := Item^.Key;
  2178. end
  2179. else
  2180. Entity.Free;
  2181. end;
  2182. procedure TXMLTextReader.ParseIgnoreSection;
  2183. var
  2184. IgnoreLoc: TLocation;
  2185. IgnoreLevel: Integer;
  2186. wc: WideChar;
  2187. begin
  2188. StoreLocation(IgnoreLoc);
  2189. IgnoreLevel := 1;
  2190. repeat
  2191. FValue.Length := 0;
  2192. wc := FSource.SkipUntil(FValue, [#0, '<', ']']);
  2193. if FSource.Matches('<![') then
  2194. Inc(IgnoreLevel)
  2195. else if FSource.Matches(']]>') then
  2196. Dec(IgnoreLevel)
  2197. else if wc <> #0 then
  2198. FSource.NextChar
  2199. else // PE's aren't recognized in ignore section, cannot ContextPop()
  2200. DoErrorPos(esFatal, 'IGNORE section is not closed', IgnoreLoc);
  2201. until IgnoreLevel=0;
  2202. end;
  2203. procedure TXMLTextReader.ParseMarkupDecl; // [29]
  2204. var
  2205. IncludeLevel: Integer;
  2206. CurrentEntity: TObject;
  2207. IncludeLoc: TLocation;
  2208. CondType: (ctUnknown, ctInclude, ctIgnore);
  2209. begin
  2210. IncludeLevel := 0;
  2211. repeat
  2212. SkipWhitespace;
  2213. if (FSource.FBuf^ = ']') and (IncludeLevel > 0) then
  2214. begin
  2215. ExpectString(']]>');
  2216. Dec(IncludeLevel);
  2217. Continue;
  2218. end;
  2219. if not CheckForChar('<') then
  2220. Break;
  2221. CurrentEntity := FSource.FEntity;
  2222. if FSource.FBuf^ = '?' then
  2223. begin
  2224. ParsePI;
  2225. end
  2226. else
  2227. begin
  2228. ExpectChar('!');
  2229. if FSource.FBuf^ = '-' then
  2230. ParseComment(True)
  2231. else if CheckForChar('[') then
  2232. begin
  2233. if FSource.Kind = skInternalSubset then
  2234. FatalError('Conditional sections are not allowed in internal subset', 1);
  2235. SkipWhitespace;
  2236. CondType := ctUnknown; // satisfy compiler
  2237. if FSource.Matches('INCLUDE') then
  2238. CondType := ctInclude
  2239. else if FSource.Matches('IGNORE') then
  2240. CondType := ctIgnore
  2241. else
  2242. FatalError('Expected "INCLUDE" or "IGNORE"');
  2243. SkipWhitespace;
  2244. CheckPENesting(CurrentEntity);
  2245. ExpectChar('[');
  2246. if CondType = ctInclude then
  2247. begin
  2248. if IncludeLevel = 0 then
  2249. StoreLocation(IncludeLoc);
  2250. Inc(IncludeLevel);
  2251. end
  2252. else if CondType = ctIgnore then
  2253. ParseIgnoreSection;
  2254. end
  2255. else
  2256. begin
  2257. FInsideDecl := True;
  2258. if FSource.Matches('ELEMENT') then
  2259. ParseElementDecl
  2260. else if FSource.Matches('ENTITY') then
  2261. ParseEntityDecl
  2262. else if FSource.Matches('ATTLIST') then
  2263. ParseAttlistDecl
  2264. else if FSource.Matches('NOTATION') then
  2265. ParseNotationDecl
  2266. else
  2267. FatalError('Illegal markup declaration');
  2268. SkipWhitespace;
  2269. CheckPENesting(CurrentEntity);
  2270. ExpectChar('>');
  2271. FInsideDecl := False;
  2272. end;
  2273. end;
  2274. until False;
  2275. if IncludeLevel > 0 then
  2276. DoErrorPos(esFatal, 'INCLUDE section is not closed', IncludeLoc);
  2277. if FSource.FBuf < FSource.FBufEnd then
  2278. if (FSource.Kind <> skInternalSubset) or (FSource.FBuf^ <> ']') then
  2279. FatalError('Illegal character in DTD');
  2280. end;
  2281. procedure TXMLTextReader.ParseDTD;
  2282. begin
  2283. FSource.Initialize;
  2284. ParseMarkupDecl;
  2285. end;
  2286. procedure TXMLTextReader.Close;
  2287. begin
  2288. FReadState := rsClosed;
  2289. FTokenStart.Line := 0;
  2290. FTokenStart.LinePos := 0;
  2291. end;
  2292. function TXMLTextReader.GetAttributeCount: Integer;
  2293. begin
  2294. result := FAttrCount;
  2295. end;
  2296. function TXMLTextReader.GetAttribute(i: Integer): XMLString;
  2297. begin
  2298. if (i < 0) or (i >= FAttrCount) then
  2299. raise EArgumentOutOfRangeException.Create('index');
  2300. result := FNodeStack[FNesting+i+1].FValueStr;
  2301. end;
  2302. function TXMLTextReader.GetAttribute(const AName: XMLString): XMLString;
  2303. var
  2304. i: Integer;
  2305. p: PHashItem;
  2306. begin
  2307. p := FNameTable.Find(PWideChar(AName), Length(AName));
  2308. if Assigned(p) then
  2309. for i := 1 to FAttrCount do
  2310. if FNodeStack[FNesting+i].FQName = p then
  2311. begin
  2312. result := FNodeStack[FNesting+i].FValueStr;
  2313. Exit;
  2314. end;
  2315. result := '';
  2316. end;
  2317. function TXMLTextReader.GetAttribute(const aLocalName, nsuri: XMLString): XMLString;
  2318. var
  2319. i: Integer;
  2320. p: PWideChar;
  2321. p1: PHashItem;
  2322. node: PNodeData;
  2323. begin
  2324. p1 := FNameTable.Find(PWideChar(nsuri), Length(nsuri));
  2325. if Assigned(p1) then
  2326. for i := 1 to FAttrCount do
  2327. begin
  2328. node := @FNodeStack[FNesting+i];
  2329. if node^.FNsUri = p1 then
  2330. begin
  2331. P := PWideChar(node^.FQName^.Key);
  2332. if node^.FColonPos > 0 then
  2333. Inc(P, node^.FColonPos+1);
  2334. if (Length(node^.FQName^.Key)-node^.FColonPos-1 = Length(aLocalName)) and
  2335. CompareMem(P, PWideChar(aLocalName), Length(aLocalName)*sizeof(WideChar)) then
  2336. begin
  2337. result := node^.FValueStr;
  2338. Exit;
  2339. end;
  2340. end;
  2341. end;
  2342. result := '';
  2343. end;
  2344. function TXMLTextReader.GetDepth: Integer;
  2345. begin
  2346. result := FNesting;
  2347. if FCurrAttrIndex >= 0 then
  2348. Inc(result);
  2349. if FAttrReadState <> arsNone then
  2350. Inc(result);
  2351. end;
  2352. function TXMLTextReader.GetNameTable: THashTable;
  2353. begin
  2354. result := FNameTable;
  2355. end;
  2356. function TXMLTextReader.GetNodeType: TXmlNodeType;
  2357. begin
  2358. result := FCurrNode^.FNodeType;
  2359. end;
  2360. function TXMLTextReader.GetName: XMLString;
  2361. begin
  2362. if Assigned(FCurrNode^.FQName) then
  2363. result := FCurrNode^.FQName^.Key
  2364. else
  2365. result := '';
  2366. end;
  2367. function TXMLTextReader.GetIsDefault: Boolean;
  2368. begin
  2369. result := FCurrNode^.FIsDefault;
  2370. end;
  2371. function TXMLTextReader.GetBaseUri: XMLString;
  2372. begin
  2373. result := FSource.SourceURI;
  2374. end;
  2375. function TXMLTextReader.GetXmlVersion: TXMLVersion;
  2376. begin
  2377. result := FSource.FXMLVersion;
  2378. end;
  2379. function TXMLTextReader.GetXmlEncoding: XMLString;
  2380. begin
  2381. result := FSource.FXMLEncoding;
  2382. end;
  2383. { IXmlLineInfo methods }
  2384. function TXMLTextReader.GetHasLineInfo: Boolean;
  2385. begin
  2386. result := True;
  2387. end;
  2388. function TXMLTextReader.GetLineNumber: Integer;
  2389. begin
  2390. if (FCurrNode^.FNodeType in [ntElement,ntAttribute,ntEntityReference,ntEndEntity]) or (FAttrReadState <> arsNone) then
  2391. result := FCurrNode^.FLoc.Line
  2392. else
  2393. result := FTokenStart.Line;
  2394. end;
  2395. function TXMLTextReader.GetLinePosition: Integer;
  2396. begin
  2397. if (FCurrNode^.FNodeType in [ntElement,ntAttribute,ntEntityReference,ntEndEntity]) or (FAttrReadState <> arsNone) then
  2398. result := FCurrNode^.FLoc.LinePos
  2399. else
  2400. result := FTokenStart.LinePos;
  2401. end;
  2402. function TXMLTextReader.CurrentNodePtr: PPNodeData;
  2403. begin
  2404. result := @FCurrNode;
  2405. end;
  2406. function TXMLTextReader.LookupNamespace(const APrefix: XMLString): XMLString;
  2407. begin
  2408. if Assigned(FNSHelper) then
  2409. result := FNSHelper.LookupNamespace(APrefix)
  2410. else
  2411. result := '';
  2412. end;
  2413. function TXMLTextReader.MoveToFirstAttribute: Boolean;
  2414. begin
  2415. result := False;
  2416. if FAttrCount = 0 then
  2417. exit;
  2418. FCurrAttrIndex := 0;
  2419. if FAttrReadState <> arsNone then
  2420. CleanAttrReadState;
  2421. FCurrNode := @FNodeStack[FNesting+1];
  2422. result := True;
  2423. end;
  2424. function TXMLTextReader.MoveToNextAttribute: Boolean;
  2425. begin
  2426. result := False;
  2427. if FCurrAttrIndex+1 >= FAttrCount then
  2428. exit;
  2429. Inc(FCurrAttrIndex);
  2430. if FAttrReadState <> arsNone then
  2431. CleanAttrReadState;
  2432. FCurrNode := @FNodeStack[FNesting+1+FCurrAttrIndex];
  2433. result := True;
  2434. end;
  2435. function TXMLTextReader.MoveToElement: Boolean;
  2436. begin
  2437. result := False;
  2438. if FAttrReadState <> arsNone then
  2439. CleanAttrReadState
  2440. else if FCurrNode^.FNodeType <> ntAttribute then
  2441. exit;
  2442. FCurrNode := @FNodeStack[FNesting];
  2443. FCurrAttrIndex := -1;
  2444. result := True;
  2445. end;
  2446. function TXMLTextReader.ReadAttributeValue: Boolean;
  2447. var
  2448. attrNode: PNodeData;
  2449. begin
  2450. Result := False;
  2451. if FAttrReadState = arsNone then
  2452. begin
  2453. if (FReadState <> rsInteractive) or (FCurrAttrIndex < 0) then
  2454. Exit;
  2455. attrNode := @FNodeStack[FNesting+FCurrAttrIndex+1];
  2456. if attrNode^.FNext = nil then
  2457. begin
  2458. if attrNode^.FValueStr = '' then
  2459. Exit; { we don't want to expose empty textnodes }
  2460. FCurrNode := AllocNodeData(FNesting+FAttrCount+1);
  2461. FCurrNode^.FNodeType := ntText;
  2462. FCurrNode^.FValueStr := attrNode^.FValueStr;
  2463. FCurrNode^.FLoc := attrNode^.FLoc2;
  2464. end
  2465. else
  2466. FCurrNode := attrNode^.FNext;
  2467. FAttrReadState := arsText;
  2468. FAttrBaseSource := FSource;
  2469. Result := True;
  2470. end
  2471. else // already reading, advance to next chunk
  2472. begin
  2473. if FSource = FAttrBaseSource then
  2474. begin
  2475. Result := Assigned(FCurrNode^.FNext);
  2476. if Result then
  2477. FCurrNode := FCurrNode^.FNext;
  2478. end
  2479. else
  2480. begin
  2481. NextAttrValueChunk;
  2482. Result := True;
  2483. end;
  2484. end;
  2485. end;
  2486. procedure TXMLTextReader.NextAttrValueChunk;
  2487. var
  2488. wc: WideChar;
  2489. tok: TAttributeReadState;
  2490. begin
  2491. if FAttrReadState = arsPushEntity then
  2492. begin
  2493. Inc(FNesting);
  2494. { make sure that the location is available }
  2495. AllocNodeData(FNesting+FAttrCount+1);
  2496. FAttrReadState := arsText;
  2497. end;
  2498. FCurrNode := @FNodeStack[FNesting+FAttrCount+1];
  2499. StoreLocation(FCurrNode^.FLoc);
  2500. FValue.Length := 0;
  2501. if FAttrReadState = arsText then
  2502. repeat
  2503. wc := FSource.SkipUntil(FValue, [#0, '&', #9, #10, #13]);
  2504. if wc = '&' then
  2505. begin
  2506. if ParseRef(FValue) or ResolvePredefined then
  2507. Continue;
  2508. tok := arsEntity;
  2509. end
  2510. else if wc <> #0 then { #9,#10,#13 -> replace by #32 }
  2511. begin
  2512. FSource.NextChar;
  2513. BufAppend(FValue, #32);
  2514. Continue;
  2515. end
  2516. else // #0
  2517. tok := arsEntityEnd;
  2518. if FValue.Length <> 0 then
  2519. begin
  2520. FCurrNode^.FNodeType := ntText;
  2521. FCurrNode^.FQName := nil;
  2522. SetString(FCurrNode^.FValueStr, FValue.Buffer, FValue.Length);
  2523. FAttrReadState := tok;
  2524. Exit;
  2525. end;
  2526. Break;
  2527. until False
  2528. else
  2529. tok := FAttrReadState;
  2530. if tok = arsEntity then
  2531. begin
  2532. HandleEntityStart;
  2533. FAttrReadState := arsText;
  2534. end
  2535. else if tok = arsEntityEnd then
  2536. begin
  2537. HandleEntityEnd;
  2538. FAttrReadState := arsText;
  2539. end;
  2540. end;
  2541. procedure TXMLTextReader.CleanAttrReadState;
  2542. begin
  2543. while FSource <> FAttrBaseSource do
  2544. ContextPop(True);
  2545. FAttrReadState := arsNone;
  2546. end;
  2547. function TXMLTextReader.GetHasValue: Boolean;
  2548. begin
  2549. result := FCurrNode^.FNodeType in [ntAttribute,ntText,ntCDATA,
  2550. ntProcessingInstruction,ntComment,ntWhitespace,ntSignificantWhitespace,
  2551. ntDocumentType];
  2552. end;
  2553. function TXMLTextReader.GetValue: XMLString;
  2554. begin
  2555. if (FCurrAttrIndex>=0) or (FAttrReadState <> arsNone) then
  2556. result := FCurrNode^.FValueStr
  2557. else
  2558. SetString(result, FCurrNode^.FValueStart, FCurrNode^.FValueLength);
  2559. end;
  2560. function TXMLTextReader.GetPrefix: XMLString;
  2561. begin
  2562. if Assigned(FCurrNode^.FPrefix) then
  2563. result := FCurrNode^.FPrefix^.Key
  2564. else
  2565. result := '';
  2566. end;
  2567. function TXMLTextReader.GetLocalName: XMLString;
  2568. begin
  2569. if FNamespaces and Assigned(FCurrNode^.FQName) then
  2570. if FCurrNode^.FColonPos < 0 then
  2571. Result := FCurrNode^.FQName^.Key
  2572. else
  2573. Result := Copy(FCurrNode^.FQName^.Key, FCurrNode^.FColonPos+2, MaxInt)
  2574. else
  2575. Result := '';
  2576. end;
  2577. function TXMLTextReader.GetNamespaceUri: XMLString;
  2578. begin
  2579. if Assigned(FCurrNode^.FNSURI) then
  2580. result := FCurrNode^.FNSURI^.Key
  2581. else
  2582. result := '';
  2583. end;
  2584. procedure TXMLTextReader.SetEOFState;
  2585. begin
  2586. FCurrNode := @FNodeStack[0];
  2587. Finalize(FCurrNode^);
  2588. FillChar(FCurrNode^, sizeof(TNodeData), 0);
  2589. FReadState := rsEndOfFile;
  2590. end;
  2591. procedure TXMLTextReader.ValidateCurrentNode;
  2592. var
  2593. ElDef: TElementDecl;
  2594. AttDef: TAttributeDef;
  2595. attr: PNodeData;
  2596. i: Integer;
  2597. begin
  2598. case FCurrNode^.FNodeType of
  2599. ntElement:
  2600. begin
  2601. if (FNesting = 0) and (not FFragmentMode) then
  2602. begin
  2603. if Assigned(FDocType) then
  2604. begin
  2605. if FDocType.FName <> FCurrNode^.FQName^.Key then
  2606. DoErrorPos(esError, 'Root element name does not match DTD', FCurrNode^.FLoc);
  2607. end
  2608. else
  2609. DoErrorPos(esError, 'Missing DTD', FCurrNode^.FLoc);
  2610. end;
  2611. ElDef := TElementDecl(FCurrNode^.FQName^.Data);
  2612. if (ElDef = nil) or (ElDef.ContentType = ctUndeclared) then
  2613. DoErrorPos(esError, 'Using undeclared element ''%s''',[FCurrNode^.FQName^.Key], FCurrNode^.FLoc);
  2614. if not FValidators[FValidatorNesting].IsElementAllowed(ElDef) then
  2615. DoErrorPos(esError, 'Element ''%s'' is not allowed in this context',[FCurrNode^.FQName^.Key], FCurrNode^.FLoc);
  2616. PushVC(ElDef);
  2617. if ElDef = nil then
  2618. Exit;
  2619. { Validate attributes }
  2620. for i := 1 to FAttrCount do
  2621. begin
  2622. attr := @FNodeStack[FNesting+i];
  2623. AttDef := TAttributeDef(attr^.FTypeInfo);
  2624. if AttDef = nil then
  2625. DoErrorPos(esError, 'Using undeclared attribute ''%s'' on element ''%s''',
  2626. [attr^.FQName^.Key, FCurrNode^.FQName^.Key], attr^.FLoc)
  2627. else if ((AttDef.DataType <> dtCdata) or (AttDef.Default = adFixed)) then
  2628. begin
  2629. if FStandalone and AttDef.ExternallyDeclared then
  2630. if attr^.FDenormalized then
  2631. DoErrorPos(esError, 'In a standalone document, externally defined attribute cannot cause value normalization', attr^.FLoc2)
  2632. else if i > FSpecifiedAttrs then
  2633. DoError(esError, 'In a standalone document, attribute cannot have a default value defined externally');
  2634. // TODO: what about normalization of AttDef.Value? (Currently it IS normalized)
  2635. if (AttDef.Default = adFixed) and (AttDef.Data^.FValueStr <> attr^.FValueStr) then
  2636. DoErrorPos(esError, 'Value of attribute ''%s'' does not match its #FIXED default',[attr^.FQName^.Key], attr^.FLoc2);
  2637. if not AttDef.ValidateSyntax(attr^.FValueStr, FNamespaces) then
  2638. DoErrorPos(esError, 'Attribute ''%s'' type mismatch', [attr^.FQName^.Key], attr^.FLoc2);
  2639. ValidateAttrValue(AttDef, attr);
  2640. end;
  2641. end;
  2642. { Check presence of #REQUIRED attributes }
  2643. if ElDef.HasRequiredAtts then
  2644. for i := 0 to ElDef.AttrDefCount-1 do
  2645. begin
  2646. if FAttrDefIndex[i] = FAttrTag then
  2647. Continue;
  2648. AttDef := ElDef.AttrDefs[i];
  2649. if AttDef.Default = adRequired then
  2650. ValidationError('Required attribute ''%s'' of element ''%s'' is missing',
  2651. [AttDef.Data^.FQName^.Key, FCurrNode^.FQName^.Key], 0)
  2652. end;
  2653. end;
  2654. ntEndElement:
  2655. begin
  2656. if FValidators[FValidatorNesting].Incomplete then
  2657. ValidationError('Element ''%s'' is missing required sub-elements', [FCurrNode^.FQName^.Key], -1);
  2658. if FValidatorNesting > 0 then
  2659. Dec(FValidatorNesting);
  2660. end;
  2661. ntText, ntSignificantWhitespace:
  2662. case FValidators[FValidatorNesting].FContentType of
  2663. ctChildren:
  2664. if FCurrNode^.FNodeType = ntText then
  2665. ValidationError('Character data is not allowed in element-only content',[])
  2666. else
  2667. begin
  2668. if FValidators[FValidatorNesting].FSaViolation then
  2669. ValidationError('Standalone constraint violation',[]);
  2670. FCurrNode^.FNodeType := ntWhitespace;
  2671. end;
  2672. ctEmpty:
  2673. ValidationError('Character data is not allowed in EMPTY elements', []);
  2674. end;
  2675. ntCDATA:
  2676. if FValidators[FValidatorNesting].FContentType = ctChildren then
  2677. ValidationError('CDATA sections are not allowed in element-only content',[]);
  2678. ntProcessingInstruction:
  2679. if FValidators[FValidatorNesting].FContentType = ctEmpty then
  2680. ValidationError('Processing instructions are not allowed within EMPTY elements', []);
  2681. ntComment:
  2682. if FValidators[FValidatorNesting].FContentType = ctEmpty then
  2683. ValidationError('Comments are not allowed within EMPTY elements', []);
  2684. ntDocumentType:
  2685. ValidateDTD;
  2686. end;
  2687. end;
  2688. procedure TXMLTextReader.HandleEntityStart;
  2689. begin
  2690. FCurrNode := @FNodeStack[FNesting+(FAttrCount+1)*ord(FAttrReadState<>arsNone)];
  2691. FCurrNode^.FNodeType := ntEntityReference;
  2692. FCurrNode^.FQName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  2693. FCurrNode^.FColonPos := -1;
  2694. FCurrNode^.FValueStart := nil;
  2695. FCurrNode^.FValueLength := 0;
  2696. FCurrNode^.FValueStr := '';
  2697. StoreLocation(FCurrNode^.FLoc);
  2698. { point past '&' to first char of entity name }
  2699. Dec(FCurrNode^.FLoc.LinePos, FName.Length+1);
  2700. end;
  2701. procedure TXMLTextReader.HandleEntityEnd;
  2702. begin
  2703. ContextPop(True);
  2704. if FNesting > 0 then Dec(FNesting);
  2705. FCurrNode := @FNodeStack[FNesting+(FAttrCount+1)*ord(FAttrReadState<>arsNone)];
  2706. FCurrNode^.FNodeType := ntEndEntity;
  2707. { point to trailing ';' }
  2708. Inc(FCurrNode^.FLoc.LinePos, Length(FCurrNode^.FQName^.Key));
  2709. end;
  2710. procedure TXMLTextReader.ResolveEntity;
  2711. var
  2712. n: PNodeData;
  2713. ent: TEntityDecl;
  2714. begin
  2715. if FCurrNode^.FNodeType <> ntEntityReference then
  2716. raise EInvalidOperation.Create('Wrong node type');
  2717. if FAttrReadState <> arsNone then
  2718. begin
  2719. { copy the EntityReference node to the stack if not already there }
  2720. n := AllocNodeData(FNesting+FAttrCount+1);
  2721. if n <> FCurrNode then
  2722. n^ := FCurrNode^;
  2723. ent := nil;
  2724. if Assigned(FDocType) then
  2725. ent := FDocType.Entities.Get(PWideChar(n^.FQName^.Key),Length(n^.FQName^.Key)) as TEntityDecl;
  2726. ContextPush(ent, True);
  2727. FAttrReadState := arsPushEntity;
  2728. end
  2729. else
  2730. FNext := xtPushEntity;
  2731. end;
  2732. procedure TXMLTextReader.DoStartEntity;
  2733. begin
  2734. Inc(FNesting);
  2735. FCurrNode := AllocNodeData(FNesting);
  2736. ContextPush(FCurrEntity, True);
  2737. FNext := xtText;
  2738. end;
  2739. // The code below does the bulk of the parsing, and must be as fast as possible.
  2740. // To minimize CPU cache effects, methods from different classes are kept together
  2741. function TXMLDecodingSource.SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
  2742. wsflag: PBoolean): WideChar;
  2743. var
  2744. old: PWideChar;
  2745. nonws: Boolean;
  2746. wc: WideChar;
  2747. begin
  2748. nonws := False;
  2749. repeat
  2750. old := FBuf;
  2751. repeat
  2752. wc := FBuf^;
  2753. if (wc = #10) or (wc = #13) or (FXML11Rules and ((wc = #$85) or
  2754. (wc = #$2028))) then
  2755. begin
  2756. // strictly this is needed only for 2-byte lineendings
  2757. BufAppendChunk(ToFill, old, FBuf);
  2758. NewLine;
  2759. old := FBuf;
  2760. wc := FBuf^
  2761. end
  2762. else if ((wc < #32) and (not ((wc = #0) and (FBuf >= FBufEnd))) and
  2763. (wc <> #9)) or (wc > #$FFFD) or
  2764. (FXML11Rules and (wc >= #$7F) and (wc <= #$9F)) then
  2765. FReader.FatalError('Invalid character');
  2766. if (wc < #255) and (Char(ord(wc)) in Delim) then
  2767. Break;
  2768. // the checks above filter away everything below #32 that isn't a whitespace
  2769. if wc > #32 then
  2770. nonws := True;
  2771. Inc(FBuf);
  2772. until False;
  2773. Result := wc;
  2774. BufAppendChunk(ToFill, old, FBuf);
  2775. until (Result <> #0) or (not Reload);
  2776. if Assigned(wsflag) then
  2777. wsflag^ := wsflag^ or nonws;
  2778. end;
  2779. const
  2780. TextDelims: array[Boolean] of TSetOfChar = (
  2781. [#0, '<', '&', '>'],
  2782. [#0, '>']
  2783. );
  2784. textNodeTypes: array[Boolean] of TXMLNodeType = (
  2785. ntSignificantWhitespace,
  2786. ntText
  2787. );
  2788. function TXMLTextReader.ReadTopLevel: Boolean;
  2789. var
  2790. tok: TXMLToken;
  2791. begin
  2792. if FNext = xtFakeLF then
  2793. begin
  2794. Result := SetupFakeLF(xtText);
  2795. Exit;
  2796. end;
  2797. StoreLocation(FTokenStart);
  2798. if FNext = xtText then
  2799. repeat
  2800. SkipS;
  2801. if FSource.FBuf^ = '<' then
  2802. begin
  2803. Inc(FSource.FBuf);
  2804. if FSource.FBufEnd < FSource.FBuf + 2 then
  2805. FSource.Reload;
  2806. if FSource.FBuf^ = '!' then
  2807. begin
  2808. Inc(FSource.FBuf);
  2809. if FSource.FBuf^ = '-' then
  2810. begin
  2811. if FIgnoreComments then
  2812. begin
  2813. ParseComment(True);
  2814. Continue;
  2815. end;
  2816. tok := xtComment;
  2817. end
  2818. else
  2819. tok := xtDoctype;
  2820. end
  2821. else if FSource.FBuf^ = '?' then
  2822. tok := xtPI
  2823. else
  2824. begin
  2825. CheckName;
  2826. tok := xtElement;
  2827. end;
  2828. end
  2829. else if FSource.FBuf >= FSource.FBufEnd then
  2830. begin
  2831. if FState < rsRoot then
  2832. FatalError('Root element is missing');
  2833. tok := xtEOF;
  2834. end
  2835. else
  2836. FatalError('Illegal at document level');
  2837. if FCanonical and (FState > rsRoot) and (tok <> xtEOF) then
  2838. begin
  2839. Result := SetupFakeLF(tok);
  2840. Exit;
  2841. end;
  2842. Break;
  2843. until False
  2844. else // FNext <> xtText
  2845. tok := FNext;
  2846. if FCanonical and (FState < rsRoot) and (tok <> xtDoctype) then
  2847. FNext := xtFakeLF
  2848. else
  2849. FNext := xtText;
  2850. case tok of
  2851. xtElement:
  2852. begin
  2853. if FState > rsRoot then
  2854. FatalError('Only one top-level element allowed', FName.Length)
  2855. else if FState < rsRoot then
  2856. begin
  2857. // dispose notation refs from DTD, if any
  2858. ClearForwardRefs;
  2859. FState := rsRoot;
  2860. end;
  2861. ParseStartTag;
  2862. end;
  2863. xtPI: ParsePI;
  2864. xtComment: ParseComment(False);
  2865. xtDoctype:
  2866. begin
  2867. ParseDoctypeDecl;
  2868. if FCanonical then
  2869. begin
  2870. // recurse, effectively ignoring the DTD
  2871. result := ReadTopLevel();
  2872. Exit;
  2873. end;
  2874. end;
  2875. xtEOF: SetEofState;
  2876. end;
  2877. Result := tok <> xtEOF;
  2878. end;
  2879. function TXMLTextReader.Read: Boolean;
  2880. var
  2881. nonWs: Boolean;
  2882. wc: WideChar;
  2883. InCDATA: Boolean;
  2884. tok: TXMLToken;
  2885. begin
  2886. if FReadState > rsInteractive then
  2887. begin
  2888. Result := False;
  2889. Exit;
  2890. end;
  2891. if FReadState = rsInitial then
  2892. begin
  2893. FReadState := rsInteractive;
  2894. FSource.Initialize;
  2895. FNext := xtText;
  2896. end;
  2897. if FAttrReadState <> arsNone then
  2898. CleanAttrReadState;
  2899. if FNext = xtPopEmptyElement then
  2900. begin
  2901. FNext := xtPopElement;
  2902. FCurrNode^.FNodeType := ntEndElement;
  2903. if FAttrCleanupFlag then
  2904. CleanupAttributes;
  2905. FAttrCount := 0;
  2906. FCurrAttrIndex := -1;
  2907. Result := True;
  2908. Exit;
  2909. end;
  2910. if FNext = xtPushElement then
  2911. begin
  2912. if FAttrCleanupFlag then
  2913. CleanupAttributes;
  2914. FAttrCount := 0;
  2915. Inc(FNesting);
  2916. FCurrAttrIndex := -1;
  2917. FNext := xtText;
  2918. end
  2919. else if FNext = xtPopElement then
  2920. PopElement
  2921. else if FNext = xtPushEntity then
  2922. DoStartEntity;
  2923. if FState <> rsRoot then
  2924. begin
  2925. Result := ReadTopLevel;
  2926. Exit;
  2927. end;
  2928. InCDATA := (FNext = xtCDSect);
  2929. StoreLocation(FTokenStart);
  2930. nonWs := False;
  2931. FValue.Length := 0;
  2932. if FNext in [xtCDSect, xtText] then
  2933. repeat
  2934. wc := FSource.SkipUntil(FValue, TextDelims[InCDATA], @nonWs);
  2935. if wc = '<' then
  2936. begin
  2937. Inc(FSource.FBuf);
  2938. if FSource.FBufEnd < FSource.FBuf + 2 then
  2939. FSource.Reload;
  2940. if FSource.FBuf^ = '/' then
  2941. tok := xtEndElement
  2942. else if CheckName([cnOptional]) then
  2943. tok := xtElement
  2944. else if FSource.FBuf^ = '!' then
  2945. begin
  2946. Inc(FSource.FBuf);
  2947. if FSource.FBuf^ = '[' then
  2948. begin
  2949. ExpectString('[CDATA[');
  2950. StoreLocation(FTokenStart);
  2951. InCDATA := True;
  2952. if FCDSectionsAsText or (FValue.Length = 0) then
  2953. Continue;
  2954. tok := xtCDSect;
  2955. end
  2956. else if FSource.FBuf^ = '-' then
  2957. begin
  2958. { Ignoring comments is tricky in validating mode; discarding a comment which
  2959. is the only child of an EMPTY element will make that element erroneously appear
  2960. as valid. Therefore, at this point we discard only comments which are preceded
  2961. by some text (since presence of text already renders an EMPTY element invalid).
  2962. Other comments should be reported to validation part and discarded there. }
  2963. if FIgnoreComments and (FValue.Length > 0) then
  2964. begin
  2965. ParseComment(True);
  2966. Continue;
  2967. end;
  2968. tok := xtComment;
  2969. end
  2970. else
  2971. tok := xtDoctype;
  2972. end
  2973. else if FSource.FBuf^ = '?' then
  2974. tok := xtPI
  2975. else
  2976. RaiseNameNotFound;
  2977. end
  2978. else if wc = #0 then
  2979. begin
  2980. if InCDATA then
  2981. FatalError('Unterminated CDATA section', -1);
  2982. if FNesting > FSource.FStartNesting then
  2983. FatalError('End-tag is missing for ''%s''', [FNodeStack[FNesting-1].FQName^.Key]);
  2984. if Assigned(FSource.FParent) then
  2985. begin
  2986. if FExpandEntities and ContextPop then
  2987. Continue
  2988. else
  2989. tok := xtEntityEnd;
  2990. end
  2991. else
  2992. tok := xtEOF;
  2993. end
  2994. else if wc = '>' then
  2995. begin
  2996. BufAppend(FValue, wc);
  2997. FSource.NextChar;
  2998. if (FValue.Length <= 2) or (FValue.Buffer[FValue.Length-2] <> ']') or
  2999. (FValue.Buffer[FValue.Length-3] <> ']') then Continue;
  3000. if InCData then // got a ']]>' separator
  3001. begin
  3002. Dec(FValue.Length, 3);
  3003. InCDATA := False;
  3004. if FCDSectionsAsText then
  3005. Continue;
  3006. SetNodeInfoWithValue(ntCDATA);
  3007. FNext := xtText;
  3008. Result := True;
  3009. Exit;
  3010. end
  3011. else
  3012. FatalError('Literal '']]>'' is not allowed in text', 3);
  3013. end
  3014. else if wc = '&' then
  3015. begin
  3016. if FValidators[FValidatorNesting].FContentType = ctEmpty then
  3017. ValidationError('References are illegal in EMPTY elements', []);
  3018. if ParseRef(FValue) or ResolvePredefined then
  3019. begin
  3020. nonWs := True; // CharRef to whitespace is not considered whitespace
  3021. Continue;
  3022. end
  3023. else
  3024. begin
  3025. FCurrEntity := EntityCheck;
  3026. if Assigned(FCurrEntity) and FExpandEntities then
  3027. begin
  3028. ContextPush(FCurrEntity);
  3029. Continue;
  3030. end;
  3031. tok := xtEntity;
  3032. end;
  3033. end;
  3034. if FValue.Length <> 0 then
  3035. begin
  3036. SetNodeInfoWithValue(textNodeTypes[nonWs]);
  3037. FNext := tok;
  3038. Result := True;
  3039. Exit;
  3040. end;
  3041. Break;
  3042. until False
  3043. else // not (FNext in [xtText, xtCDSect])
  3044. tok := FNext;
  3045. FNext := xtText;
  3046. case tok of
  3047. xtEntity: HandleEntityStart;
  3048. xtEntityEnd: HandleEntityEnd;
  3049. xtElement: ParseStartTag;
  3050. xtEndElement: ParseEndTag;
  3051. xtPI: ParsePI;
  3052. xtDoctype: ParseDoctypeDecl;
  3053. xtComment: ParseComment(False);
  3054. xtEOF: SetEofState;
  3055. end;
  3056. Result := tok <> xtEOF;
  3057. end;
  3058. procedure TXMLCharSource.NextChar;
  3059. begin
  3060. Inc(FBuf);
  3061. if FBuf >= FBufEnd then
  3062. Reload;
  3063. end;
  3064. procedure TXMLTextReader.ExpectChar(wc: WideChar);
  3065. begin
  3066. if FSource.FBuf^ = wc then
  3067. FSource.NextChar
  3068. else
  3069. FatalError(wc);
  3070. end;
  3071. // Element name already in FNameBuffer
  3072. procedure TXMLTextReader.ParseStartTag; // [39] [40] [44]
  3073. var
  3074. ElDef: TElementDecl;
  3075. IsEmpty: Boolean;
  3076. ElName: PHashItem;
  3077. b: TBinding;
  3078. Len: Integer;
  3079. begin
  3080. ElName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  3081. ElDef := TElementDecl(ElName^.Data);
  3082. if Assigned(ElDef) then
  3083. Len := ElDef.AttrDefCount+8 { overallocate a bit }
  3084. else
  3085. Len := 0;
  3086. // (re)initialize array of attribute definition tags
  3087. if (Len-8 > Length(FAttrDefIndex)) or (FAttrTag = 0) then
  3088. begin
  3089. SetLength(FAttrDefIndex, Len);
  3090. for Len := 0 to High(FAttrDefIndex) do
  3091. FAttrDefIndex[Len] := FAttrTag;
  3092. end;
  3093. // we're about to process a new set of attributes
  3094. {$push}{$r-,q-}
  3095. Dec(FAttrTag);
  3096. {$pop}
  3097. IsEmpty := False;
  3098. FAttrCount := 0;
  3099. FCurrAttrIndex := -1;
  3100. FPrefixedAttrs := 0;
  3101. FSpecifiedAttrs := 0;
  3102. FCurrNode := AllocNodeData(FNesting);
  3103. FCurrNode^.FQName := ElName;
  3104. FCurrNode^.FNodeType := ntElement;
  3105. FCurrNode^.FColonPos := FColonPos;
  3106. StoreLocation(FCurrNode^.FLoc);
  3107. Dec(FCurrNode^.FLoc.LinePos, FName.Length);
  3108. if FNamespaces then
  3109. begin
  3110. FNSHelper.PushScope;
  3111. if FColonPos > 0 then
  3112. FCurrNode^.FPrefix := FNSHelper.GetPrefix(FName.Buffer, FColonPos);
  3113. end;
  3114. while (FSource.FBuf^ <> '>') and (FSource.FBuf^ <> '/') do
  3115. begin
  3116. SkipS(True);
  3117. if (FSource.FBuf^ = '>') or (FSource.FBuf^ = '/') then
  3118. Break;
  3119. ParseAttribute(ElDef);
  3120. end;
  3121. if FSource.FBuf^ = '/' then
  3122. begin
  3123. IsEmpty := True;
  3124. FSource.NextChar;
  3125. end;
  3126. ExpectChar('>');
  3127. if Assigned(ElDef) and ElDef.NeedsDefaultPass then
  3128. ProcessDefaultAttributes(ElDef);
  3129. // Adding attributes might have reallocated FNodeStack, so restore FCurrNode once again
  3130. FCurrNode := @FNodeStack[FNesting];
  3131. if FNamespaces then
  3132. begin
  3133. { Assign namespace URIs to prefixed attrs }
  3134. if FPrefixedAttrs <> 0 then
  3135. ProcessNamespaceAtts;
  3136. { Expand the element name }
  3137. if Assigned(FCurrNode^.FPrefix) then
  3138. begin
  3139. b := TBinding(FCurrNode^.FPrefix^.Data);
  3140. if not (Assigned(b) and Assigned(b.uri) and (b.uri^.Key <> '')) then
  3141. DoErrorPos(esFatal, 'Unbound element name prefix "%s"', [FCurrNode^.FPrefix^.Key],FCurrNode^.FLoc);
  3142. FCurrNode^.FNsUri := b.uri;
  3143. end
  3144. else
  3145. begin
  3146. b := FNSHelper.DefaultNSBinding;
  3147. if Assigned(b) then
  3148. FCurrNode^.FNsUri := b.uri;
  3149. end;
  3150. end;
  3151. if not IsEmpty then
  3152. begin
  3153. if not FPreserveWhitespace then // critical for testsuite compliance
  3154. SkipS;
  3155. FNext := xtPushElement;
  3156. end
  3157. else
  3158. FNext := xtPopEmptyElement;
  3159. end;
  3160. procedure TXMLTextReader.ParseEndTag; // [42]
  3161. var
  3162. ElName: PHashItem;
  3163. begin
  3164. if FNesting <= FSource.FStartNesting then
  3165. FatalError('End-tag is not allowed here');
  3166. if FNesting > 0 then Dec(FNesting);
  3167. Inc(FSource.FBuf);
  3168. FCurrNode := @FNodeStack[FNesting]; // move off the possible child
  3169. FCurrNode^.FNodeType := ntEndElement;
  3170. StoreLocation(FTokenStart);
  3171. FCurrNode^.FLoc := FTokenStart;
  3172. ElName := FCurrNode^.FQName;
  3173. if not FSource.MatchesLong(ElName^.Key) then
  3174. FatalError('Unmatching element end tag (expected "</%s>")', [ElName^.Key], -1);
  3175. if FSource.FBuf^ = '>' then // this handles majority of cases
  3176. FSource.NextChar
  3177. else
  3178. begin // gives somewhat incorrect message for <a></aa>
  3179. SkipS;
  3180. ExpectChar('>');
  3181. end;
  3182. FNext := xtPopElement;
  3183. end;
  3184. procedure TXMLTextReader.ParseAttribute(ElDef: TElementDecl);
  3185. var
  3186. attrName: PHashItem;
  3187. attrData: PNodeData;
  3188. AttDef: TAttributeDef;
  3189. i: Integer;
  3190. begin
  3191. CheckName;
  3192. attrName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  3193. attrData := AllocAttributeData;
  3194. attrData^.FQName := attrName;
  3195. attrData^.FColonPos := FColonPos;
  3196. StoreLocation(attrData^.FLoc);
  3197. Dec(attrData^.FLoc.LinePos, FName.Length);
  3198. FSpecifiedAttrs := FAttrCount;
  3199. if Assigned(ElDef) then
  3200. begin
  3201. AttDef := ElDef.GetAttrDef(attrName);
  3202. // mark attribute as specified
  3203. if Assigned(AttDef) then
  3204. FAttrDefIndex[AttDef.Index] := FAttrTag;
  3205. end
  3206. else
  3207. AttDef := nil;
  3208. attrData^.FTypeInfo := AttDef;
  3209. // check for duplicates
  3210. for i := 1 to FAttrCount-1 do
  3211. if FNodeStack[FNesting+i].FQName = attrName then
  3212. FatalError('Duplicate attribute', FName.Length);
  3213. if FNamespaces then
  3214. begin
  3215. if ((FName.Length = 5) or (FColonPos = 5)) and
  3216. (FName.Buffer[0] = 'x') and (FName.Buffer[1] = 'm') and
  3217. (FName.Buffer[2] = 'l') and (FName.Buffer[3] = 'n') and
  3218. (FName.Buffer[4] = 's') then
  3219. begin
  3220. if FColonPos > 0 then
  3221. attrData^.FPrefix := FStdPrefix_xmlns;
  3222. attrData^.FNsUri := FStdUri_xmlns;
  3223. end
  3224. else if FColonPos > 0 then
  3225. begin
  3226. attrData^.FPrefix := FNSHelper.GetPrefix(FName.Buffer, FColonPos);
  3227. Inc(FPrefixedAttrs);
  3228. end;
  3229. end;
  3230. ExpectEq;
  3231. ExpectAttValue(attrData, Assigned(AttDef) and (AttDef.DataType <> dtCDATA));
  3232. if Assigned(attrData^.FNsUri) then
  3233. begin
  3234. if (not AddBinding(attrData)) and FCanonical then
  3235. begin
  3236. CleanupAttribute(attrData);
  3237. Dec(FAttrCount);
  3238. Dec(FSpecifiedAttrs);
  3239. end;
  3240. end;
  3241. end;
  3242. procedure TXMLTextReader.AddForwardRef(Buf: PWideChar; Length: Integer);
  3243. var
  3244. w: PForwardRef;
  3245. begin
  3246. if FForwardRefs = nil then
  3247. FForwardRefs := TFPList.Create;
  3248. New(w);
  3249. SetString(w^.Value, Buf, Length);
  3250. w^.Loc := FTokenStart;
  3251. FForwardRefs.Add(w);
  3252. end;
  3253. procedure TXMLTextReader.ClearForwardRefs;
  3254. var
  3255. I: Integer;
  3256. begin
  3257. if Assigned(FForwardRefs) then
  3258. begin
  3259. for I := 0 to FForwardRefs.Count-1 do
  3260. Dispose(PForwardRef(FForwardRefs.List^[I]));
  3261. FForwardRefs.Clear;
  3262. end;
  3263. end;
  3264. procedure TXMLTextReader.ValidateIdRefs;
  3265. var
  3266. I: Integer;
  3267. begin
  3268. if Assigned(FForwardRefs) then
  3269. begin
  3270. for I := 0 to FForwardRefs.Count-1 do
  3271. with PForwardRef(FForwardRefs.List^[I])^ do
  3272. if (FIDMap = nil) or (FIDMap.Find(PWideChar(Value), Length(Value)) = nil) then
  3273. DoErrorPos(esError, 'The ID ''%s'' does not match any element', [Value], Loc);
  3274. ClearForwardRefs;
  3275. end;
  3276. end;
  3277. procedure TXMLTextReader.ProcessDefaultAttributes(ElDef: TElementDecl);
  3278. var
  3279. I: Integer;
  3280. AttDef: TAttributeDef;
  3281. attrData: PNodeData;
  3282. begin
  3283. for I := 0 to ElDef.AttrDefCount-1 do
  3284. begin
  3285. if FAttrDefIndex[I] <> FAttrTag then // this one wasn't specified
  3286. begin
  3287. AttDef := ElDef.AttrDefs[I];
  3288. case AttDef.Default of
  3289. adDefault, adFixed: begin
  3290. attrData := AllocAttributeData;
  3291. attrData^ := AttDef.Data^;
  3292. if FCanonical then
  3293. attrData^.FIsDefault := False;
  3294. if FNamespaces then
  3295. begin
  3296. if AttDef.IsNamespaceDecl then
  3297. begin
  3298. if attrData^.FColonPos > 0 then
  3299. attrData^.FPrefix := FStdPrefix_xmlns;
  3300. attrData^.FNsUri := FStdUri_xmlns;
  3301. if (not AddBinding(attrData)) and FCanonical then
  3302. Dec(FAttrCount);
  3303. end
  3304. else if attrData^.FColonPos > 0 then
  3305. begin
  3306. attrData^.FPrefix := FNSHelper.GetPrefix(PWideChar(attrData^.FQName^.Key), attrData^.FColonPos);
  3307. Inc(FPrefixedAttrs);
  3308. end
  3309. else
  3310. attrData^.FNsUri := FEmptyStr;
  3311. end;
  3312. end;
  3313. end;
  3314. end;
  3315. end;
  3316. end;
  3317. function TXMLTextReader.AddBinding(attrData: PNodeData): Boolean;
  3318. var
  3319. nsUri, Pfx: PHashItem;
  3320. begin
  3321. nsUri := FNameTable.FindOrAdd(attrData^.FValueStr);
  3322. if attrData^.FColonPos > 0 then
  3323. Pfx := FNSHelper.GetPrefix(@attrData^.FQName^.key[7], Length(attrData^.FQName^.key)-6)
  3324. else
  3325. Pfx := FNSHelper.GetPrefix(nil, 0); { will return the default prefix }
  3326. { 'xml' is allowed to be bound to the correct namespace }
  3327. if ((nsUri = FStduri_xml) <> (Pfx = FStdPrefix_xml)) or
  3328. (Pfx = FStdPrefix_xmlns) or
  3329. (nsUri = FStduri_xmlns) then
  3330. begin
  3331. if (Pfx = FStdPrefix_xml) or (Pfx = FStdPrefix_xmlns) then
  3332. DoErrorPos(esFatal, 'Illegal usage of reserved prefix ''%s''', [Pfx^.Key], attrData^.FLoc)
  3333. else
  3334. DoErrorPos(esFatal, 'Illegal usage of reserved namespace URI ''%s''', [attrData^.FValueStr], attrData^.FLoc2);
  3335. end;
  3336. if (attrData^.FValueStr = '') and not (FXML11 or (Pfx^.Key = '')) then
  3337. DoErrorPos(esFatal, 'Illegal undefining of namespace', attrData^.FLoc2);
  3338. Result := (Pfx^.Data = nil) or (TBinding(Pfx^.Data).uri <> nsUri);
  3339. if Result then
  3340. FNSHelper.BindPrefix(nsUri, Pfx);
  3341. end;
  3342. procedure TXMLTextReader.ProcessNamespaceAtts;
  3343. var
  3344. I, J: Integer;
  3345. Pfx, AttrName: PHashItem;
  3346. attrData: PNodeData;
  3347. b: TBinding;
  3348. begin
  3349. FNsAttHash.Init(FPrefixedAttrs);
  3350. for I := 1 to FAttrCount do
  3351. begin
  3352. attrData := @FNodeStack[FNesting+i];
  3353. if Assigned(attrData^.FNsUri) then
  3354. Continue;
  3355. if (attrData^.FColonPos < 1) then
  3356. begin
  3357. attrData^.FNsUri := FEmptyStr;
  3358. Continue;
  3359. end;
  3360. Pfx := attrData^.FPrefix;
  3361. b := TBinding(Pfx^.Data);
  3362. if not (Assigned(b) and Assigned (b.uri) and (b.uri^.Key <> '')) then
  3363. DoErrorPos(esFatal, 'Unbound attribute name prefix "%s"', [Pfx^.Key], attrData^.FLoc);
  3364. { detect duplicates }
  3365. J := attrData^.FColonPos+1;
  3366. AttrName := attrData^.FQName;
  3367. if FNsAttHash.Locate(b.uri, @AttrName^.Key[J], Length(AttrName^.Key) - J+1) then
  3368. DoErrorPos(esFatal, 'Duplicate prefixed attribute', attrData^.FLoc);
  3369. attrData^.FNsUri := b.uri;
  3370. end;
  3371. end;
  3372. function TXMLTextReader.ParseExternalID(out SysID, PubID: XMLString; // [75]
  3373. out PubIDLoc: TLocation; SysIdOptional: Boolean): Boolean;
  3374. var
  3375. I: Integer;
  3376. wc: WideChar;
  3377. begin
  3378. Result := False;
  3379. if FSource.Matches('SYSTEM') then
  3380. SysIdOptional := False
  3381. else if FSource.Matches('PUBLIC') then
  3382. begin
  3383. ExpectWhitespace;
  3384. ParseLiteral(FValue, ltPubid, True);
  3385. PubIDLoc := FTokenStart;
  3386. SetString(PubID, FValue.Buffer, FValue.Length);
  3387. for I := 1 to Length(PubID) do
  3388. begin
  3389. wc := PubID[I];
  3390. if (wc > #255) or not (Char(ord(wc)) in PubidChars) then
  3391. FatalError('Illegal Public ID literal', -1);
  3392. end;
  3393. end
  3394. else
  3395. Exit;
  3396. if SysIdOptional then
  3397. SkipWhitespace
  3398. else
  3399. ExpectWhitespace;
  3400. if ParseLiteral(FValue, ltPlain, not SysIdOptional) then
  3401. SetString(SysID, FValue.Buffer, FValue.Length);
  3402. Result := True;
  3403. end;
  3404. procedure TXMLTextReader.ValidateAttrValue(AttrDef: TAttributeDef; attrData: PNodeData);
  3405. var
  3406. L, StartPos, EndPos: Integer;
  3407. Entity: TEntityDecl;
  3408. begin
  3409. L := Length(attrData^.FValueStr);
  3410. case AttrDef.DataType of
  3411. dtId: begin
  3412. if not AddID(attrData) then
  3413. DoErrorPos(esError, 'The ID ''%s'' is not unique', [attrData^.FValueStr], attrData^.FLoc2);
  3414. end;
  3415. dtIdRef, dtIdRefs: begin
  3416. StartPos := 1;
  3417. while StartPos <= L do
  3418. begin
  3419. EndPos := StartPos;
  3420. while (EndPos <= L) and (attrData^.FValueStr[EndPos] <> #32) do
  3421. Inc(EndPos);
  3422. if (FIDMap = nil) or (FIDMap.Find(@attrData^.FValueStr[StartPos], EndPos-StartPos) = nil) then
  3423. AddForwardRef(@attrData^.FValueStr[StartPos], EndPos-StartPos);
  3424. StartPos := EndPos + 1;
  3425. end;
  3426. end;
  3427. dtEntity, dtEntities: begin
  3428. StartPos := 1;
  3429. while StartPos <= L do
  3430. begin
  3431. EndPos := StartPos;
  3432. while (EndPos <= L) and (attrData^.FValueStr[EndPos] <> #32) do
  3433. Inc(EndPos);
  3434. Entity := TEntityDecl(FDocType.Entities.Get(@attrData^.FValueStr[StartPos], EndPos-StartPos));
  3435. if (Entity = nil) or (Entity.FNotationName = '') then
  3436. ValidationError('Attribute ''%s'' type mismatch', [attrData^.FQName^.Key], -1);
  3437. StartPos := EndPos + 1;
  3438. end;
  3439. end;
  3440. end;
  3441. end;
  3442. procedure TXMLTextReader.ValidateDTD;
  3443. var
  3444. I: Integer;
  3445. begin
  3446. if Assigned(FForwardRefs) then
  3447. begin
  3448. for I := 0 to FForwardRefs.Count-1 do
  3449. with PForwardRef(FForwardRefs[I])^ do
  3450. if FDocType.Notations.Get(PWideChar(Value), Length(Value)) = nil then
  3451. DoErrorPos(esError, 'Notation ''%s'' is not declared', [Value], Loc);
  3452. end;
  3453. end;
  3454. function TXMLTextReader.AddId(aNodeData: PNodeData): Boolean;
  3455. var
  3456. e: PHashItem;
  3457. begin
  3458. if FIDMap = nil then
  3459. FIDMap := THashTable.Create(256, False);
  3460. e := FIDMap.FindOrAdd(PWideChar(aNodeData^.FValueStr), Length(aNodeData^.FValueStr), Result);
  3461. Result := not Result;
  3462. if Result then
  3463. aNodeData^.FIDEntry := e;
  3464. end;
  3465. function TXMLTextReader.AllocAttributeData: PNodeData;
  3466. begin
  3467. Result := AllocNodeData(FNesting + FAttrCount + 1);
  3468. Result^.FNodeType := ntAttribute;
  3469. Result^.FIsDefault := False;
  3470. Inc(FAttrCount);
  3471. end;
  3472. procedure TXMLTextReader.AddPseudoAttribute(aName: PHashItem; const aValue: XMLString;
  3473. const nameLoc, valueLoc: TLocation);
  3474. begin
  3475. with AllocAttributeData^ do
  3476. begin
  3477. FQName := aName;
  3478. FColonPos := -1;
  3479. FValueStr := aValue;
  3480. FLoc := nameLoc;
  3481. FLoc2 := valueLoc;
  3482. end;
  3483. end;
  3484. function TXMLTextReader.AllocNodeData(AIndex: Integer): PNodeData;
  3485. begin
  3486. {make sure we have an extra slot to place child text/comment/etc}
  3487. if AIndex >= Length(FNodeStack)-1 then
  3488. SetLength(FNodeStack, AIndex * 2 + 2);
  3489. Result := @FNodeStack[AIndex];
  3490. Result^.FNext := nil;
  3491. Result^.FPrefix := nil;
  3492. Result^.FNsUri := nil;
  3493. Result^.FIDEntry := nil;
  3494. Result^.FValueStart := nil;
  3495. Result^.FValueLength := 0;
  3496. end;
  3497. procedure TXMLTextReader.AllocAttributeValueChunk(var APrev: PNodeData; Offset: Integer);
  3498. var
  3499. chunk: PNodeData;
  3500. begin
  3501. { when parsing DTD, don't take ownership of allocated data }
  3502. chunk := FFreeAttrChunk;
  3503. if Assigned(chunk) and (FState <> rsDTD) then
  3504. begin
  3505. FFreeAttrChunk := chunk^.FNext;
  3506. chunk^.FNext := nil;
  3507. end
  3508. else { no free chunks, create a new one }
  3509. chunk := AllocMem(sizeof(TNodeData));
  3510. APrev^.FNext := chunk;
  3511. APrev := chunk;
  3512. { assume text node, for entity refs it is overridden later }
  3513. chunk^.FNodeType := ntText;
  3514. chunk^.FQName := nil;
  3515. chunk^.FColonPos := -1;
  3516. { without PWideChar typecast and in $T-, FPC treats '@' result as PAnsiChar... }
  3517. SetString(chunk^.FValueStr, PWideChar(@FValue.Buffer[Offset]), FValue.Length-Offset);
  3518. end;
  3519. procedure TXMLTextReader.CleanupAttributes;
  3520. var
  3521. i: Integer;
  3522. begin
  3523. {cleanup only specified attributes; default ones are owned by DTD}
  3524. for i := 1 to FSpecifiedAttrs do
  3525. CleanupAttribute(@FNodeStack[FNesting+i]);
  3526. FAttrCleanupFlag := False;
  3527. end;
  3528. procedure TXMLTextReader.CleanupAttribute(aNode: PNodeData);
  3529. var
  3530. chunk: PNodeData;
  3531. begin
  3532. if Assigned(aNode^.FNext) then
  3533. begin
  3534. chunk := aNode^.FNext;
  3535. while Assigned(chunk^.FNext) do
  3536. chunk := chunk^.FNext;
  3537. chunk^.FNext := FFreeAttrChunk;
  3538. FFreeAttrChunk := aNode^.FNext;
  3539. aNode^.FNext := nil;
  3540. end;
  3541. end;
  3542. procedure TXMLTextReader.SetNodeInfoWithValue(typ: TXMLNodeType; AName: PHashItem = nil);
  3543. begin
  3544. FCurrNode := @FNodeStack[FNesting];
  3545. FCurrNode^.FNodeType := typ;
  3546. FCurrNode^.FQName := AName;
  3547. FCurrNode^.FColonPos := -1;
  3548. FCurrNode^.FValueStart := FValue.Buffer;
  3549. FCurrNode^.FValueLength := FValue.Length;
  3550. end;
  3551. function TXMLTextReader.SetupFakeLF(nextstate: TXMLToken): Boolean;
  3552. begin
  3553. FValue.Buffer[0] := #10;
  3554. FValue.Length := 1;
  3555. SetNodeInfoWithValue(ntWhitespace,nil);
  3556. FNext := nextstate;
  3557. Result := True;
  3558. end;
  3559. procedure TXMLTextReader.PushVC(aElDef: TElementDecl);
  3560. begin
  3561. Inc(FValidatorNesting);
  3562. if FValidatorNesting >= Length(FValidators) then
  3563. SetLength(FValidators, FValidatorNesting * 2);
  3564. with FValidators[FValidatorNesting] do
  3565. begin
  3566. FElementDef := aElDef;
  3567. FCurCP := nil;
  3568. FFailed := False;
  3569. FContentType := ctAny;
  3570. FSaViolation := False;
  3571. if Assigned(aElDef) then
  3572. begin
  3573. FContentType := aElDef.ContentType;
  3574. FSaViolation := FStandalone and aElDef.ExternallyDeclared;
  3575. end;
  3576. end;
  3577. end;
  3578. procedure TXMLTextReader.PopElement;
  3579. begin
  3580. if FNamespaces then
  3581. FNSHelper.PopScope;
  3582. if (FNesting = 0) and (not FFragmentMode) then
  3583. FState := rsEpilog;
  3584. FCurrNode := @FNodeStack[FNesting];
  3585. FNext := xtText;
  3586. end;
  3587. { TElementValidator }
  3588. function TElementValidator.IsElementAllowed(Def: TElementDecl): Boolean;
  3589. var
  3590. Next: TContentParticle;
  3591. begin
  3592. Result := True;
  3593. // if element is not declared, non-validity has been already reported, no need to report again...
  3594. if Assigned(Def) and Assigned(FElementDef) then
  3595. begin
  3596. case FElementDef.ContentType of
  3597. ctEmpty: Result := False;
  3598. ctChildren, ctMixed: begin
  3599. if FFailed then // if already detected a mismatch, don't waste time
  3600. Exit;
  3601. if FCurCP = nil then
  3602. Next := FElementDef.RootCP.FindFirst(Def)
  3603. else
  3604. Next := FCurCP.FindNext(Def, 0); { second arg ignored here }
  3605. Result := Assigned(Next);
  3606. if Result then
  3607. FCurCP := Next
  3608. else
  3609. FFailed := True; // used to prevent extra error at the end of element
  3610. end;
  3611. // ctAny, ctUndeclared: returns True by default
  3612. end;
  3613. end;
  3614. end;
  3615. function TElementValidator.Incomplete: Boolean;
  3616. begin
  3617. if Assigned(FElementDef) and (FElementDef.ContentType = ctChildren) and (not FFailed) then
  3618. begin
  3619. if FCurCP <> nil then
  3620. Result := FCurCP.MoreRequired(0) { arg ignored here }
  3621. else
  3622. Result := FElementDef.RootCP.IsRequired;
  3623. end
  3624. else
  3625. Result := False;
  3626. end;
  3627. end.