xmlread.pp 123 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472
  1. {
  2. This file is part of the Free Component Library
  3. XML reading routines.
  4. Copyright (c) 1999-2000 by Sebastian Guenther, [email protected]
  5. Modified in 2006 by Sergei Gorelkin, [email protected]
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. unit XMLRead;
  13. {$ifdef fpc}
  14. {$MODE objfpc}{$H+}
  15. {$endif}
  16. interface
  17. uses
  18. SysUtils, Classes, DOM, xmlutils;
  19. type
  20. TErrorSeverity = (esWarning, esError, esFatal);
  21. EXMLReadError = class(Exception)
  22. private
  23. FSeverity: TErrorSeverity;
  24. FErrorMessage: string;
  25. FLine: Integer;
  26. FLinePos: Integer;
  27. public
  28. property Severity: TErrorSeverity read FSeverity;
  29. property ErrorMessage: string read FErrorMessage;
  30. property Line: Integer read FLine;
  31. property LinePos: Integer read FLinePos;
  32. end;
  33. procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String); overload;
  34. procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text); overload;
  35. procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream); overload;
  36. procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String); overload;
  37. procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String); overload;
  38. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text); overload;
  39. procedure ReadXMLFragment(AParentNode: TDOMNode; f: TStream); overload;
  40. procedure ReadXMLFragment(AParentNode: TDOMNode; f: TStream; const ABaseURI: String); overload;
  41. procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String); overload;
  42. procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text); overload;
  43. procedure ReadDTDFile(out ADoc: TXMLDocument; f: TStream); overload;
  44. procedure ReadDTDFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String); overload;
  45. type
  46. TXMLErrorEvent = procedure(Error: EXMLReadError) of object;
  47. TXMLReaderSettings = class(TObject)
  48. private
  49. FValidate: Boolean;
  50. FPreserveWhitespace: Boolean;
  51. FExpandEntities: Boolean;
  52. FIgnoreComments: Boolean;
  53. FCDSectionsAsText: Boolean;
  54. FNamespaces: Boolean;
  55. FDisallowDoctype: Boolean;
  56. FCanonical: Boolean;
  57. FMaxChars: Cardinal;
  58. FOnError: TXMLErrorEvent;
  59. function GetCanonical: Boolean;
  60. procedure SetCanonical(aValue: Boolean);
  61. public
  62. property Validate: Boolean read FValidate write FValidate;
  63. property PreserveWhitespace: Boolean read FPreserveWhitespace write FPreserveWhitespace;
  64. property ExpandEntities: Boolean read FExpandEntities write FExpandEntities;
  65. property IgnoreComments: Boolean read FIgnoreComments write FIgnoreComments;
  66. property CDSectionsAsText: Boolean read FCDSectionsAsText write FCDSectionsAsText;
  67. property Namespaces: Boolean read FNamespaces write FNamespaces;
  68. property DisallowDoctype: Boolean read FDisallowDoctype write FDisallowDoctype;
  69. property MaxChars: Cardinal read FMaxChars write FMaxChars;
  70. property CanonicalForm: Boolean read GetCanonical write SetCanonical;
  71. property OnError: TXMLErrorEvent read FOnError write FOnError;
  72. end;
  73. TDOMParseOptions = TXMLReaderSettings;
  74. // NOTE: DOM 3 LS ACTION_TYPE enumeration starts at 1
  75. TXMLContextAction = (
  76. xaAppendAsChildren = 1,
  77. xaReplaceChildren,
  78. xaInsertBefore,
  79. xaInsertAfter,
  80. xaReplace);
  81. TXMLInputSource = class(TObject)
  82. private
  83. FStream: TStream;
  84. FStringData: string;
  85. FBaseURI: XMLString;
  86. FSystemID: XMLString;
  87. FPublicID: XMLString;
  88. // FEncoding: string;
  89. public
  90. constructor Create(AStream: TStream); overload;
  91. constructor Create(const AStringData: string); overload;
  92. property Stream: TStream read FStream;
  93. property StringData: string read FStringData;
  94. property BaseURI: XMLString read FBaseURI write FBaseURI;
  95. property SystemID: XMLString read FSystemID write FSystemID;
  96. property PublicID: XMLString read FPublicID write FPublicID;
  97. // property Encoding: string read FEncoding write FEncoding;
  98. end;
  99. TDOMParser = class(TObject)
  100. private
  101. FOptions: TDOMParseOptions;
  102. function GetOnError: TXMLErrorEvent;
  103. procedure SetOnError(value: TXMLErrorEvent);
  104. public
  105. constructor Create;
  106. destructor Destroy; override;
  107. procedure Parse(Src: TXMLInputSource; out ADoc: TXMLDocument);
  108. procedure ParseUri(const URI: XMLString; out ADoc: TXMLDocument);
  109. function ParseWithContext(Src: TXMLInputSource; Context: TDOMNode;
  110. Action: TXMLContextAction): TDOMNode;
  111. property Options: TDOMParseOptions read FOptions;
  112. property OnError: TXMLErrorEvent read GetOnError write SetOnError;
  113. end;
  114. TDecoder = record
  115. Context: Pointer;
  116. Decode: function(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  117. Cleanup: procedure(Context: Pointer); stdcall;
  118. end;
  119. TGetDecoderProc = function(const AEncoding: string; out Decoder: TDecoder): Boolean; stdcall;
  120. procedure RegisterDecoder(Proc: TGetDecoderProc);
  121. // =======================================================
  122. implementation
  123. uses
  124. UriParser, dtdmodel, xmlreader;
  125. const
  126. PubidChars: TSetOfChar = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
  127. '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
  128. '#', '@', '$', '_', '%'];
  129. type
  130. TDOMDocumentTypeEx = class(TDOMDocumentType);
  131. TDOMTopNodeEx = class(TDOMNode_TopLevel);
  132. TXMLSourceKind = (skNone, skInternalSubset, skManualPop);
  133. TDOMEntityEx = class(TDOMEntity);
  134. TXMLTextReader = class;
  135. TXMLCharSource = class(TObject)
  136. private
  137. FBuf: PWideChar;
  138. FBufEnd: PWideChar;
  139. FReader: TXMLTextReader;
  140. FParent: TXMLCharSource;
  141. FEntity: TEntityDecl;
  142. FLineNo: Integer;
  143. LFPos: PWideChar;
  144. FXML11Rules: Boolean;
  145. FSourceURI: XMLString;
  146. FCharCount: Cardinal;
  147. FStartNesting: Integer;
  148. FXMLVersion: TXMLVersion;
  149. FXMLEncoding: XMLString;
  150. function GetSourceURI: XMLString;
  151. protected
  152. function Reload: Boolean; virtual;
  153. public
  154. Kind: TXMLSourceKind;
  155. constructor Create(const AData: XMLString);
  156. procedure NextChar;
  157. procedure NewLine; virtual;
  158. function SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
  159. wsflag: PBoolean = nil): WideChar; virtual;
  160. procedure Initialize; virtual;
  161. function SetEncoding(const AEncoding: string): Boolean; virtual;
  162. function Matches(const arg: XMLString): Boolean;
  163. function MatchesLong(const arg: XMLString): Boolean;
  164. property SourceURI: XMLString read GetSourceURI write FSourceURI;
  165. end;
  166. TXMLDecodingSource = class(TXMLCharSource)
  167. private
  168. FCharBuf: PChar;
  169. FCharBufEnd: PChar;
  170. FBufStart: PWideChar;
  171. FDecoder: TDecoder;
  172. FHasBOM: Boolean;
  173. FFixedUCS2: string;
  174. FBufSize: Integer;
  175. procedure DecodingError(const Msg: string);
  176. protected
  177. function Reload: Boolean; override;
  178. procedure FetchData; virtual;
  179. public
  180. procedure AfterConstruction; override;
  181. destructor Destroy; override;
  182. function SetEncoding(const AEncoding: string): Boolean; override;
  183. procedure NewLine; override;
  184. function SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
  185. wsflag: PBoolean = nil): WideChar; override;
  186. procedure Initialize; override;
  187. end;
  188. TXMLStreamInputSource = class(TXMLDecodingSource)
  189. private
  190. FAllocated: PChar;
  191. FStream: TStream;
  192. FCapacity: Integer;
  193. FOwnStream: Boolean;
  194. FEof: Boolean;
  195. public
  196. constructor Create(AStream: TStream; AOwnStream: Boolean);
  197. destructor Destroy; override;
  198. procedure FetchData; override;
  199. end;
  200. TXMLFileInputSource = class(TXMLDecodingSource)
  201. private
  202. FFile: ^Text;
  203. FString: string;
  204. FTmp: string;
  205. public
  206. constructor Create(var AFile: Text);
  207. procedure FetchData; override;
  208. end;
  209. PForwardRef = ^TForwardRef;
  210. TForwardRef = record
  211. Value: XMLString;
  212. Loc: TLocation;
  213. end;
  214. TElementValidator = object
  215. FElementDef: TElementDecl;
  216. FCurCP: TContentParticle;
  217. FFailed: Boolean;
  218. FSaViolation: Boolean;
  219. FContentType: TElementContentType; // =ctAny when FElementDef is nil
  220. function IsElementAllowed(Def: TElementDecl): Boolean;
  221. function Incomplete: Boolean;
  222. end;
  223. TNodeDataDynArray = array of TNodeData;
  224. TValidatorDynArray = array of TElementValidator;
  225. TXMLReadState = (rsProlog, rsDTD, rsAfterDTD, rsRoot, rsEpilog);
  226. TCheckNameFlags = set of (cnOptional, cnToken);
  227. TXMLToken = (xtNone, xtEOF, xtText, xtElement, xtEndElement,
  228. xtCDSect, xtComment, xtPI, xtDoctype, xtEntity, xtEntityEnd, xtPopElement,
  229. xtPopEmptyElement, xtPushElement, xtPushEntity, xtPopEntity, xtFakeLF);
  230. TAttributeReadState = (arsNone, arsText, arsEntity, arsEntityEnd, arsPushEntity);
  231. TLiteralType = (ltPlain, ltPubid, ltEntity);
  232. TEntityEvent = procedure(Sender: TXMLTextReader; AEntity: TEntityDecl) of object;
  233. TXMLTextReader = class(TXMLReader, IXmlLineInfo)
  234. private
  235. FSource: TXMLCharSource;
  236. FNameTable: THashTable;
  237. FXML11: Boolean;
  238. FNameTableOwned: Boolean;
  239. FState: TXMLReadState;
  240. FHavePERefs: Boolean;
  241. FInsideDecl: Boolean;
  242. FValue: TWideCharBuf;
  243. FEntityValue: TWideCharBuf;
  244. FName: TWideCharBuf;
  245. FTokenStart: TLocation;
  246. FStandalone: Boolean;
  247. FDocType: TDTDModel;
  248. FPEMap: THashTable;
  249. FForwardRefs: TFPList;
  250. FDTDStartPos: PWideChar;
  251. FIntSubset: TWideCharBuf;
  252. FAttrTag: Cardinal;
  253. FDTDProcessed: Boolean;
  254. FFragmentMode: Boolean;
  255. FNext: TXMLToken;
  256. FCurrEntity: TEntityDecl;
  257. FIDMap: THashTable;
  258. FAttrDefIndex: array of Cardinal;
  259. FNSHelper: TNSSupport;
  260. FNsAttHash: TDblHashArray;
  261. FStdPrefix_xml: PHashItem;
  262. FStdPrefix_xmlns: PHashItem;
  263. FStdUri_xml: PHashItem;
  264. FStdUri_xmlns: PHashItem;
  265. FColonPos: Integer;
  266. FValidate: Boolean; // parsing options, copy of FCtrl.Options
  267. FPreserveWhitespace: Boolean;
  268. FExpandEntities: Boolean;
  269. FIgnoreComments: Boolean;
  270. FCDSectionsAsText: Boolean;
  271. FNamespaces: Boolean;
  272. FDisallowDoctype: Boolean;
  273. FCanonical: Boolean;
  274. FMaxChars: Cardinal;
  275. FOnError: TXMLErrorEvent;
  276. FCurrAttrIndex: Integer;
  277. FOnEntity: TEntityEvent;
  278. procedure CleanAttrReadState;
  279. procedure SetEOFState;
  280. procedure SkipQuote(out Delim: WideChar; required: Boolean = True);
  281. procedure Initialize(ASource: TXMLCharSource);
  282. procedure EntityToSource(AEntity: TEntityDecl; out Src: TXMLCharSource);
  283. function ContextPush(AEntity: TEntityDecl; DummySource: Boolean = False): Boolean;
  284. function ContextPop(Forced: Boolean = False): Boolean;
  285. function ParseQuantity: TCPQuant;
  286. procedure StoreLocation(out Loc: TLocation);
  287. procedure ValidateAttrValue(AttrDef: TAttributeDef; attrData: PNodeData);
  288. procedure AddForwardRef(Buf: PWideChar; Length: Integer);
  289. procedure ClearForwardRefs;
  290. procedure ValidateIdRefs;
  291. procedure CallErrorHandler(E: EXMLReadError);
  292. function FindOrCreateElDef: TElementDecl;
  293. function SkipUntilSeq(const Delim: TSetOfChar; c1: WideChar): Boolean;
  294. procedure CheckMaxChars(ToAdd: Cardinal);
  295. function AllocNodeData(AIndex: Integer): PNodeData;
  296. function AllocAttributeData: PNodeData;
  297. procedure AllocAttributeValueChunk(var APrev: PNodeData; Offset: Integer);
  298. procedure CleanupAttribute(aNode: PNodeData);
  299. procedure CleanupAttributes;
  300. procedure SetNodeInfoWithValue(typ: TXMLNodeType; AName: PHashItem = nil);
  301. function SetupFakeLF(nextstate: TXMLToken): Boolean;
  302. function AddId(aNodeData: PNodeData): Boolean;
  303. function QueryInterface(constref iid: TGUID; out obj): HRESULT; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
  304. function _AddRef: Longint; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
  305. function _Release: Longint; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
  306. protected
  307. FNesting: Integer;
  308. FCurrNode: PNodeData;
  309. FAttrCount: Integer;
  310. FPrefixedAttrs: Integer;
  311. FSpecifiedAttrs: Integer;
  312. FNodeStack: TNodeDataDynArray;
  313. FValidatorNesting: Integer;
  314. FValidators: TValidatorDynArray;
  315. FFreeAttrChunk: PNodeData;
  316. FAttrCleanupFlag: Boolean;
  317. // ReadAttributeValue state
  318. FAttrReadState: TAttributeReadState;
  319. FAttrBaseSource: TObject;
  320. procedure DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer=0);
  321. procedure DoErrorPos(Severity: TErrorSeverity; const descr: string;
  322. const ErrPos: TLocation); overload;
  323. procedure DoErrorPos(Severity: TErrorSeverity; const descr: string;
  324. const args: array of const; const ErrPos: TLocation); overload;
  325. procedure FatalError(const descr: String; LineOffs: Integer=0); overload;
  326. procedure FatalError(const descr: string; const args: array of const; LineOffs: Integer=0); overload;
  327. procedure FatalError(Expected: WideChar); overload;
  328. function SkipWhitespace(PercentAloneIsOk: Boolean = False): Boolean;
  329. function SkipS(required: Boolean = False): Boolean;
  330. procedure ExpectWhitespace;
  331. procedure ExpectString(const s: String);
  332. procedure ExpectChar(wc: WideChar);
  333. function CheckForChar(c: WideChar): Boolean;
  334. procedure RaiseNameNotFound;
  335. function CheckName(aFlags: TCheckNameFlags = []): Boolean;
  336. procedure CheckNCName;
  337. function ParseLiteral(var ToFill: TWideCharBuf; aType: TLiteralType;
  338. Required: Boolean): Boolean;
  339. procedure ExpectAttValue(attrData: PNodeData; NonCDATA: Boolean); // [10]
  340. procedure ParseComment(discard: Boolean); // [15]
  341. procedure ParsePI; // [16]
  342. procedure ParseXmlOrTextDecl(TextDecl: Boolean);
  343. procedure ExpectEq;
  344. procedure ParseDoctypeDecl; // [28]
  345. procedure ParseMarkupDecl; // [29]
  346. procedure ParseIgnoreSection;
  347. procedure ParseStartTag; // [39]
  348. procedure ParseEndTag; // [42]
  349. procedure HandleEntityStart;
  350. procedure HandleEntityEnd;
  351. procedure DoStartEntity;
  352. procedure ParseAttribute(ElDef: TElementDecl);
  353. function ReadTopLevel: Boolean;
  354. procedure NextAttrValueChunk;
  355. function GetHasLineInfo: Boolean;
  356. function GetLineNumber: Integer;
  357. function GetLinePosition: Integer;
  358. public
  359. function Read: Boolean; override;
  360. function MoveToFirstAttribute: Boolean; override;
  361. function MoveToNextAttribute: Boolean; override;
  362. function MoveToElement: Boolean; override;
  363. function ReadAttributeValue: Boolean; override;
  364. procedure Close; override;
  365. procedure ResolveEntity; override;
  366. function GetAttribute(i: Integer): XMLString; override;
  367. function GetAttribute(const AName: XMLString): XMLString; override;
  368. function GetAttribute(const ALocalName, nsuri: XMLString): XMLString; override;
  369. function LookupNamespace(const APrefix: XMLString): XMLString; override;
  370. property LineNumber: Integer read GetLineNumber;
  371. property LinePosition: Integer read GetLinePosition;
  372. protected
  373. function GetDepth: Integer; override;
  374. function GetNodeType: TXmlNodeType; override;
  375. function GetName: XMLString; override;
  376. function GetValue: XMLString; override;
  377. function GetLocalName: XMLString; override;
  378. function GetPrefix: XMLString; override;
  379. function GetNamespaceUri: XMLString; override;
  380. function GetHasValue: Boolean; override;
  381. function GetAttributeCount: Integer; override;
  382. function GetBaseUri: XMLString; override;
  383. function GetIsDefault: Boolean; override;
  384. function ResolvePredefined: Boolean;
  385. function EntityCheck(NoExternals: Boolean = False): TEntityDecl;
  386. function PrefetchEntity(AEntity: TEntityDecl): Boolean;
  387. procedure StartPE;
  388. function ParseRef(var ToFill: TWideCharBuf): Boolean; // [67]
  389. function ParseExternalID(out SysID, PubID: XMLString; // [75]
  390. SysIdOptional: Boolean): Boolean;
  391. procedure CheckPENesting(aExpected: TObject);
  392. procedure ParseEntityDecl;
  393. procedure ParseAttlistDecl;
  394. procedure ExpectChoiceOrSeq(CP: TContentParticle; MustEndIn: TObject);
  395. procedure ParseElementDecl;
  396. procedure ParseNotationDecl;
  397. function ResolveResource(const ASystemID, APublicID, ABaseURI: XMLString; out Source: TXMLCharSource): Boolean;
  398. procedure ProcessDefaultAttributes(ElDef: TElementDecl);
  399. procedure ProcessNamespaceAtts;
  400. function AddBinding(attrData: PNodeData): Boolean;
  401. procedure PushVC(aElDef: TElementDecl);
  402. procedure PopElement;
  403. procedure ValidateDTD;
  404. procedure ValidateCurrentNode;
  405. procedure ValidationError(const Msg: string; const args: array of const; LineOffs: Integer = -1);
  406. procedure ValidationErrorWithName(const Msg: string; LineOffs: Integer = -1);
  407. procedure DTDReloadHook;
  408. procedure ConvertSource(SrcIn: TXMLInputSource; out SrcOut: TXMLCharSource);
  409. procedure SetOptions(AValue: TXMLReaderSettings);
  410. procedure SetNametable(ANameTable: THashTable);
  411. public
  412. constructor Create(var AFile: Text; ANameTable: THashTable); overload;
  413. constructor Create(AStream: TStream; const ABaseUri: XMLString; ANameTable: THashTable); overload;
  414. constructor Create(ASrc: TXMLCharSource; AParent: TXMLTextReader); overload;
  415. constructor Create(const uri: XMLString; ANameTable: THashTable; ASettings: TXMLReaderSettings); overload;
  416. constructor Create(ASrc: TXMLInputSource; ANameTable: THashTable; ASettings: TXMLReaderSettings); overload;
  417. destructor Destroy; override;
  418. procedure AfterConstruction; override;
  419. property OnEntity: TEntityEvent read FOnEntity write FOnEntity;
  420. end;
  421. TLoader = object
  422. doc: TDOMDocument;
  423. reader: TXMLTextReader;
  424. function DoCDSect(ch: PWideChar; Count: Integer): TDOMNode;
  425. function CreatePINode: TDOMNode;
  426. procedure ParseContent(cursor: TDOMNode_WithChildren);
  427. procedure ProcessXML(ADoc: TDOMDocument; AReader: TXMLTextReader);
  428. procedure ProcessFragment(AOwner: TDOMNode; AReader: TXMLTextReader);
  429. procedure ProcessDTD(ADoc: TDOMDocument; AReader: TXMLTextReader);
  430. procedure ProcessEntity(Sender: TXMLTextReader; AEntity: TEntityDecl);
  431. end;
  432. const
  433. NullLocation: TLocation = (Line: 0; LinePos: 0);
  434. { Decoders }
  435. var
  436. Decoders: array of TGetDecoderProc;
  437. procedure RegisterDecoder(Proc: TGetDecoderProc);
  438. var
  439. L: Integer;
  440. begin
  441. L := Length(Decoders);
  442. SetLength(Decoders, L+1);
  443. Decoders[L] := Proc;
  444. end;
  445. function FindDecoder(const AEncoding: string; out Decoder: TDecoder): Boolean;
  446. var
  447. I: Integer;
  448. begin
  449. Result := False;
  450. for I := 0 to High(Decoders) do
  451. if Decoders[I](AEncoding, Decoder) then
  452. begin
  453. Result := True;
  454. Exit;
  455. end;
  456. end;
  457. function Is_8859_1(const AEncoding: string): Boolean;
  458. begin
  459. Result := SameText(AEncoding, 'ISO-8859-1') or
  460. SameText(AEncoding, 'ISO_8859-1') or
  461. SameText(AEncoding, 'latin1') or
  462. SameText(AEncoding, 'iso-ir-100') or
  463. SameText(AEncoding, 'l1') or
  464. SameText(AEncoding, 'IBM819') or
  465. SameText(AEncoding, 'CP819') or
  466. SameText(AEncoding, 'csISOLatin1') or
  467. // This one is not in character-sets.txt, but was used in FPC documentation,
  468. // and still being used in fcl-registry package
  469. SameText(AEncoding, 'ISO8859-1');
  470. end;
  471. { TXMLReaderSettings }
  472. function TXMLReaderSettings.GetCanonical: Boolean;
  473. begin
  474. Result := FCanonical and FExpandEntities and FCDSectionsAsText and
  475. { (not normalizeCharacters) and } FNamespaces and
  476. { namespaceDeclarations and } FPreserveWhitespace;
  477. end;
  478. procedure TXMLReaderSettings.SetCanonical(aValue: Boolean);
  479. begin
  480. FCanonical := aValue;
  481. if aValue then
  482. begin
  483. FExpandEntities := True;
  484. FCDSectionsAsText := True;
  485. FNamespaces := True;
  486. FPreserveWhitespace := True;
  487. { normalizeCharacters := False; }
  488. { namespaceDeclarations := True; }
  489. { wellFormed := True; }
  490. end;
  491. end;
  492. { TXMLInputSource }
  493. constructor TXMLInputSource.Create(AStream: TStream);
  494. begin
  495. inherited Create;
  496. FStream := AStream;
  497. end;
  498. constructor TXMLInputSource.Create(const AStringData: string);
  499. begin
  500. inherited Create;
  501. FStringData := AStringData;
  502. end;
  503. { TDOMParser }
  504. constructor TDOMParser.Create;
  505. begin
  506. FOptions := TDOMParseOptions.Create;
  507. end;
  508. destructor TDOMParser.Destroy;
  509. begin
  510. FOptions.Free;
  511. inherited Destroy;
  512. end;
  513. function TDOMParser.GetOnError: TXMLErrorEvent;
  514. begin
  515. result := Options.OnError;
  516. end;
  517. procedure TDOMParser.SetOnError(value: TXMLErrorEvent);
  518. begin
  519. Options.OnError := value;
  520. end;
  521. procedure TDOMParser.Parse(Src: TXMLInputSource; out ADoc: TXMLDocument);
  522. var
  523. Reader: TXMLTextReader;
  524. ldr: TLoader;
  525. begin
  526. ADoc := TXMLDocument.Create;
  527. Reader := TXMLTextReader.Create(Src, ADoc.Names, Options);
  528. try
  529. ldr.ProcessXML(ADoc, Reader);
  530. finally
  531. Reader.Free;
  532. end;
  533. end;
  534. procedure TDOMParser.ParseUri(const URI: XMLString; out ADoc: TXMLDocument);
  535. var
  536. Reader: TXMLTextReader;
  537. ldr: TLoader;
  538. begin
  539. ADoc := TXMLDocument.Create;
  540. Reader := TXMLTextReader.Create(URI, ADoc.Names, Options);
  541. try
  542. ldr.ProcessXML(ADoc, Reader)
  543. finally
  544. Reader.Free;
  545. end;
  546. end;
  547. function TDOMParser.ParseWithContext(Src: TXMLInputSource;
  548. Context: TDOMNode; Action: TXMLContextAction): TDOMNode;
  549. var
  550. Frag: TDOMDocumentFragment;
  551. node: TDOMNode;
  552. reader: TXMLTextReader;
  553. ldr: TLoader;
  554. begin
  555. if Action in [xaInsertBefore, xaInsertAfter, xaReplace] then
  556. node := Context.ParentNode
  557. else
  558. node := Context;
  559. // TODO: replacing document isn't yet supported
  560. if (Action = xaReplaceChildren) and (node.NodeType = DOCUMENT_NODE) then
  561. raise EDOMNotSupported.Create('DOMParser.ParseWithContext');
  562. if not (node.NodeType in [ELEMENT_NODE, DOCUMENT_FRAGMENT_NODE]) then
  563. raise EDOMHierarchyRequest.Create('DOMParser.ParseWithContext');
  564. reader := TXMLTextReader.Create(Src, Context.OwnerDocument.Names, Options);
  565. try
  566. Frag := Context.OwnerDocument.CreateDocumentFragment;
  567. try
  568. ldr.ProcessFragment(Frag, reader);
  569. Result := Frag.FirstChild;
  570. case Action of
  571. xaAppendAsChildren: Context.AppendChild(Frag);
  572. xaReplaceChildren: begin
  573. Context.TextContent := ''; // removes children
  574. Context.ReplaceChild(Frag, Context.FirstChild);
  575. end;
  576. xaInsertBefore: node.InsertBefore(Frag, Context);
  577. xaInsertAfter: node.InsertBefore(Frag, Context.NextSibling);
  578. xaReplace: node.ReplaceChild(Frag, Context);
  579. end;
  580. finally
  581. Frag.Free;
  582. end;
  583. finally
  584. reader.Free;
  585. end;
  586. end;
  587. { TXMLCharSource }
  588. constructor TXMLCharSource.Create(const AData: XMLString);
  589. begin
  590. inherited Create;
  591. FLineNo := 1;
  592. FBuf := PWideChar(AData);
  593. FBufEnd := FBuf + Length(AData);
  594. LFPos := FBuf-1;
  595. FCharCount := Length(AData);
  596. end;
  597. procedure TXMLCharSource.Initialize;
  598. begin
  599. end;
  600. function TXMLCharSource.SetEncoding(const AEncoding: string): Boolean;
  601. begin
  602. Result := True; // always succeed
  603. end;
  604. function TXMLCharSource.GetSourceURI: XMLString;
  605. begin
  606. if FSourceURI <> '' then
  607. Result := FSourceURI
  608. else if Assigned(FParent) then
  609. Result := FParent.SourceURI
  610. else
  611. Result := '';
  612. end;
  613. function TXMLCharSource.Reload: Boolean;
  614. begin
  615. Result := False;
  616. end;
  617. procedure TXMLCharSource.NewLine;
  618. begin
  619. Inc(FLineNo);
  620. LFPos := FBuf;
  621. end;
  622. function TXMLCharSource.SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
  623. wsflag: PBoolean): WideChar;
  624. var
  625. old: PWideChar;
  626. nonws: Boolean;
  627. begin
  628. old := FBuf;
  629. nonws := False;
  630. repeat
  631. if FBuf^ = #10 then
  632. NewLine;
  633. if (FBuf^ < #255) and (Char(ord(FBuf^)) in Delim) then
  634. Break;
  635. if (FBuf^ > #32) or not (Char(ord(FBuf^)) in [#32, #9, #10, #13]) then
  636. nonws := True;
  637. Inc(FBuf);
  638. until False;
  639. Result := FBuf^;
  640. BufAppendChunk(ToFill, old, FBuf);
  641. if Assigned(wsflag) then
  642. wsflag^ := wsflag^ or nonws;
  643. end;
  644. function TXMLCharSource.Matches(const arg: XMLString): Boolean;
  645. begin
  646. Result := False;
  647. if (FBufEnd >= FBuf + Length(arg)) or Reload then
  648. Result := CompareMem(Pointer(arg), FBuf, Length(arg)*sizeof(WideChar));
  649. if Result then
  650. begin
  651. Inc(FBuf, Length(arg));
  652. if FBuf >= FBufEnd then
  653. Reload;
  654. end;
  655. end;
  656. { Used to check element name in end-tags, difference from Matches is that
  657. buffer may be reloaded more than once. XML has no restriction on name
  658. length, so a name longer than input buffer may be encountered. }
  659. function TXMLCharSource.MatchesLong(const arg: XMLString): Boolean;
  660. var
  661. idx, len, chunk: Integer;
  662. begin
  663. Result := False;
  664. idx := 1;
  665. len := Length(arg);
  666. repeat
  667. if (FBuf >= FBufEnd) and not Reload then
  668. Exit;
  669. if FBufEnd >= FBuf + len then
  670. chunk := len
  671. else
  672. chunk := FBufEnd - FBuf;
  673. if not CompareMem(@arg[idx], FBuf, chunk*sizeof(WideChar)) then
  674. Exit;
  675. Inc(FBuf, chunk);
  676. Inc(idx,chunk);
  677. Dec(len,chunk);
  678. until len = 0;
  679. Result := True;
  680. if FBuf >= FBufEnd then
  681. Reload;
  682. end;
  683. { TXMLDecodingSource }
  684. procedure TXMLDecodingSource.AfterConstruction;
  685. begin
  686. inherited AfterConstruction;
  687. FBufStart := AllocMem(4096);
  688. FBuf := FBufStart;
  689. FBufEnd := FBuf;
  690. LFPos := FBuf-1;
  691. end;
  692. destructor TXMLDecodingSource.Destroy;
  693. begin
  694. FreeMem(FBufStart);
  695. if Assigned(FDecoder.Cleanup) then
  696. FDecoder.Cleanup(FDecoder.Context);
  697. inherited Destroy;
  698. end;
  699. procedure TXMLDecodingSource.FetchData;
  700. begin
  701. end;
  702. procedure TXMLDecodingSource.DecodingError(const Msg: string);
  703. begin
  704. // count line endings to obtain correct error location
  705. while FBuf < FBufEnd do
  706. begin
  707. if (FBuf^ = #10) or (FBuf^ = #13) or (FXML11Rules and ((FBuf^ = #$85) or (FBuf^ = #$2028))) then
  708. begin
  709. if (FBuf^ = #13) and (FBuf < FBufEnd-1) and
  710. ((FBuf[1] = #10) or (FXML11Rules and (FBuf[1] = #$85))) then
  711. Inc(FBuf);
  712. LFPos := FBuf;
  713. Inc(FLineNo);
  714. end;
  715. Inc(FBuf);
  716. end;
  717. FReader.FatalError(Msg);
  718. end;
  719. function TXMLDecodingSource.Reload: Boolean;
  720. var
  721. Remainder: PtrInt;
  722. r, inLeft: Cardinal;
  723. rslt: Integer;
  724. begin
  725. if Kind = skInternalSubset then
  726. FReader.DTDReloadHook;
  727. Remainder := FBufEnd - FBuf;
  728. if Remainder > 0 then
  729. Move(FBuf^, FBufStart^, Remainder * sizeof(WideChar));
  730. Dec(LFPos, FBuf-FBufStart);
  731. FBuf := FBufStart;
  732. FBufEnd := FBufStart + Remainder;
  733. repeat
  734. inLeft := FCharBufEnd - FCharBuf;
  735. if inLeft < 4 then // may contain an incomplete char
  736. begin
  737. FetchData;
  738. inLeft := FCharBufEnd - FCharBuf;
  739. if inLeft <= 0 then
  740. Break;
  741. end;
  742. r := FBufStart + FBufSize - FBufEnd;
  743. if r = 0 then
  744. Break;
  745. rslt := FDecoder.Decode(FDecoder.Context, FCharBuf, inLeft, FBufEnd, r);
  746. { Sanity checks: r and inLeft must not increase. }
  747. if inLeft + FCharBuf <= FCharBufEnd then
  748. FCharBuf := FCharBufEnd - inLeft
  749. else
  750. DecodingError('Decoder error: input byte count out of bounds');
  751. if r + FBufEnd <= FBufStart + FBufSize then
  752. FBufEnd := FBufStart + FBufSize - r
  753. else
  754. DecodingError('Decoder error: output char count out of bounds');
  755. if rslt = 0 then
  756. Break
  757. else if rslt < 0 then
  758. DecodingError('Invalid character in input stream')
  759. else
  760. FReader.CheckMaxChars(rslt);
  761. until False;
  762. FBufEnd^ := #0;
  763. Result := FBuf < FBufEnd;
  764. end;
  765. const
  766. XmlSign: array [0..4] of WideChar = ('<', '?', 'x', 'm', 'l');
  767. procedure TXMLDecodingSource.Initialize;
  768. begin
  769. inherited;
  770. FLineNo := 1;
  771. FDecoder.Decode := @Decode_UTF8;
  772. FFixedUCS2 := '';
  773. if FCharBufEnd-FCharBuf > 1 then
  774. begin
  775. if (FCharBuf[0] = #$FE) and (FCharBuf[1] = #$FF) then
  776. begin
  777. FFixedUCS2 := 'UTF-16BE';
  778. FDecoder.Decode := {$IFNDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
  779. end
  780. else if (FCharBuf[0] = #$FF) and (FCharBuf[1] = #$FE) then
  781. begin
  782. FFixedUCS2 := 'UTF-16LE';
  783. FDecoder.Decode := {$IFDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
  784. end;
  785. end;
  786. FBufSize := 6; // possible BOM and '<?xml'
  787. Reload;
  788. if FBuf^ = #$FEFF then
  789. begin
  790. FHasBOM := True;
  791. Inc(FBuf);
  792. end;
  793. LFPos := FBuf-1;
  794. if CompareMem(FBuf, @XmlSign[0], sizeof(XmlSign)) then
  795. begin
  796. FBufSize := 3; // don't decode past XML declaration
  797. Inc(FBuf, Length(XmlSign));
  798. FReader.ParseXmlOrTextDecl((FParent <> nil) or (FReader.FState <> rsProlog));
  799. end;
  800. FBufSize := 2047;
  801. if FReader.FXML11 then
  802. FXml11Rules := True;
  803. end;
  804. function TXMLDecodingSource.SetEncoding(const AEncoding: string): Boolean;
  805. var
  806. NewDecoder: TDecoder;
  807. begin
  808. Result := True;
  809. if (FFixedUCS2 = '') and SameText(AEncoding, 'UTF-8') then
  810. Exit;
  811. if FFixedUCS2 <> '' then
  812. begin
  813. Result := SameText(AEncoding, FFixedUCS2) or
  814. SameText(AEncoding, 'UTF-16') or
  815. SameText(AEncoding, 'unicode');
  816. Exit;
  817. end;
  818. // TODO: must fail when a byte-based stream is labeled as word-based.
  819. // see rmt-e2e-61, it now fails but for a completely different reason.
  820. FillChar(NewDecoder, sizeof(TDecoder), 0);
  821. if Is_8859_1(AEncoding) then
  822. FDecoder.Decode := @Decode_8859_1
  823. else if FindDecoder(AEncoding, NewDecoder) then
  824. FDecoder := NewDecoder
  825. else
  826. Result := False;
  827. end;
  828. procedure TXMLDecodingSource.NewLine;
  829. begin
  830. case FBuf^ of
  831. #10: ;
  832. #13: begin
  833. // Reload trashes the buffer, it should be consumed beforehand
  834. if (FBufEnd >= FBuf+2) or Reload then
  835. begin
  836. if (FBuf[1] = #10) or (FXML11Rules and (FBuf[1] = #$85)) then
  837. Inc(FBuf);
  838. end;
  839. FBuf^ := #10;
  840. end;
  841. #$85, #$2028: if FXML11Rules then
  842. FBuf^ := #10
  843. else
  844. Exit;
  845. else
  846. Exit;
  847. end;
  848. Inc(FLineNo);
  849. LFPos := FBuf;
  850. end;
  851. { TXMLStreamInputSource }
  852. const
  853. Slack = 16;
  854. constructor TXMLStreamInputSource.Create(AStream: TStream; AOwnStream: Boolean);
  855. begin
  856. FStream := AStream;
  857. FCapacity := 4096;
  858. GetMem(FAllocated, FCapacity+Slack);
  859. FCharBuf := FAllocated+(Slack-4);
  860. FCharBufEnd := FCharBuf;
  861. FOwnStream := AOwnStream;
  862. FetchData;
  863. end;
  864. destructor TXMLStreamInputSource.Destroy;
  865. begin
  866. FreeMem(FAllocated);
  867. if FOwnStream then
  868. FStream.Free;
  869. inherited Destroy;
  870. end;
  871. procedure TXMLStreamInputSource.FetchData;
  872. var
  873. Remainder, BytesRead: Integer;
  874. OldBuf: PChar;
  875. begin
  876. Assert(FCharBufEnd - FCharBuf < Slack-4);
  877. if FEof then
  878. Exit;
  879. OldBuf := FCharBuf;
  880. Remainder := FCharBufEnd - FCharBuf;
  881. if Remainder < 0 then
  882. Remainder := 0;
  883. FCharBuf := FAllocated+Slack-4-Remainder;
  884. if Remainder > 0 then
  885. Move(OldBuf^, FCharBuf^, Remainder);
  886. BytesRead := FStream.Read(FAllocated[Slack-4], FCapacity);
  887. if BytesRead < FCapacity then
  888. FEof := True;
  889. FCharBufEnd := FAllocated + (Slack-4) + BytesRead;
  890. { Null-termination has been removed:
  891. 1) Built-in decoders don't need it because they respect the buffer length.
  892. 2) It was causing unaligned access errors on ARM CPUs.
  893. }
  894. //PWideChar(FCharBufEnd)^ := #0;
  895. end;
  896. { TXMLFileInputSource }
  897. constructor TXMLFileInputSource.Create(var AFile: Text);
  898. begin
  899. FFile := @AFile;
  900. SourceURI := FilenameToURI(TTextRec(AFile).Name);
  901. FetchData;
  902. end;
  903. procedure TXMLFileInputSource.FetchData;
  904. var
  905. Remainder: Integer;
  906. begin
  907. if not Eof(FFile^) then
  908. begin
  909. Remainder := FCharBufEnd - FCharBuf;
  910. if Remainder > 0 then
  911. SetString(FTmp, FCharBuf, Remainder);
  912. ReadLn(FFile^, FString);
  913. FString := FString + #10; // bad solution...
  914. if Remainder > 0 then
  915. Insert(FTmp, FString, 1);
  916. FCharBuf := PChar(FString);
  917. FCharBufEnd := FCharBuf + Length(FString);
  918. end;
  919. end;
  920. { helper that closes handle upon destruction }
  921. type
  922. THandleOwnerStream = class(THandleStream)
  923. public
  924. destructor Destroy; override;
  925. end;
  926. destructor THandleOwnerStream.Destroy;
  927. begin
  928. FileClose(Handle);
  929. inherited Destroy;
  930. end;
  931. { TXMLTextReader }
  932. function TXMLTextReader.QueryInterface(constref iid: TGUID; out obj): HRESULT; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
  933. begin
  934. if GetInterface(iid,obj) then
  935. result := S_OK
  936. else
  937. result:= E_NOINTERFACE;
  938. end;
  939. function TXMLTextReader._AddRef: Longint; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
  940. begin
  941. result := -1;
  942. end;
  943. function TXMLTextReader._Release: Longint; {$IFNDEF WINDOWS}cdecl{$ELSE}stdcall{$ENDIF};
  944. begin
  945. result := -1;
  946. end;
  947. procedure TXMLTextReader.ConvertSource(SrcIn: TXMLInputSource; out SrcOut: TXMLCharSource);
  948. begin
  949. SrcOut := nil;
  950. if Assigned(SrcIn) then
  951. begin
  952. if Assigned(SrcIn.FStream) then
  953. SrcOut := TXMLStreamInputSource.Create(SrcIn.FStream, False)
  954. else if SrcIn.FStringData <> '' then
  955. SrcOut := TXMLStreamInputSource.Create(TStringStream.Create(SrcIn.FStringData), True)
  956. else if (SrcIn.SystemID <> '') then
  957. ResolveResource(SrcIn.SystemID, SrcIn.PublicID, SrcIn.BaseURI, SrcOut);
  958. end;
  959. if (SrcOut = nil) and (FSource = nil) then
  960. DoErrorPos(esFatal, 'No input source specified', NullLocation);
  961. end;
  962. procedure TXMLTextReader.StoreLocation(out Loc: TLocation);
  963. begin
  964. Loc.Line := FSource.FLineNo;
  965. Loc.LinePos := FSource.FBuf-FSource.LFPos;
  966. end;
  967. function TXMLTextReader.ResolveResource(const ASystemID, APublicID, ABaseURI: XMLString; out Source: TXMLCharSource): Boolean;
  968. var
  969. SrcURI: XMLString;
  970. Filename: string;
  971. Stream: TStream;
  972. fd: THandle;
  973. begin
  974. Source := nil;
  975. Result := False;
  976. if not ResolveRelativeURI(ABaseURI, ASystemID, SrcURI) then
  977. Exit;
  978. { TODO: alternative resolvers
  979. These may be 'internal' resolvers or a handler set by application.
  980. Internal resolvers should probably produce a TStream
  981. ( so that internal classes need not be exported ).
  982. External resolver will produce TXMLInputSource that should be converted.
  983. External resolver must NOT be called for root entity.
  984. External resolver can return nil, in which case we do the default }
  985. if URIToFilename(SrcURI, Filename) then
  986. begin
  987. fd := FileOpen(Filename, fmOpenRead + fmShareDenyWrite);
  988. if fd <> THandle(-1) then
  989. begin
  990. Stream := THandleOwnerStream.Create(fd);
  991. Source := TXMLStreamInputSource.Create(Stream, True);
  992. Source.SourceURI := SrcURI;
  993. end;
  994. end;
  995. Result := Assigned(Source);
  996. end;
  997. procedure TXMLTextReader.Initialize(ASource: TXMLCharSource);
  998. begin
  999. ASource.FParent := FSource;
  1000. FSource := ASource;
  1001. FSource.FReader := Self;
  1002. FSource.FStartNesting := FNesting;
  1003. FSource.Initialize;
  1004. end;
  1005. procedure TXMLTextReader.FatalError(Expected: WideChar);
  1006. begin
  1007. // FIX: don't output what is found - anything may be found, including exploits...
  1008. FatalError('Expected "%1s"', [string(Expected)]);
  1009. end;
  1010. procedure TXMLTextReader.FatalError(const descr: String; LineOffs: Integer);
  1011. begin
  1012. DoError(esFatal, descr, LineOffs);
  1013. end;
  1014. procedure TXMLTextReader.FatalError(const descr: string; const args: array of const; LineOffs: Integer);
  1015. begin
  1016. DoError(esFatal, Format(descr, args), LineOffs);
  1017. end;
  1018. procedure TXMLTextReader.ValidationError(const Msg: string; const Args: array of const; LineOffs: Integer);
  1019. begin
  1020. if FValidate then
  1021. DoError(esError, Format(Msg, Args), LineOffs);
  1022. end;
  1023. procedure TXMLTextReader.ValidationErrorWithName(const Msg: string; LineOffs: Integer);
  1024. var
  1025. ws: XMLString;
  1026. begin
  1027. SetString(ws, FName.Buffer, FName.Length);
  1028. ValidationError(Msg, [ws], LineOffs);
  1029. end;
  1030. procedure TXMLTextReader.DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer);
  1031. var
  1032. Loc: TLocation;
  1033. begin
  1034. StoreLocation(Loc);
  1035. if LineOffs >= 0 then
  1036. begin
  1037. Dec(Loc.LinePos, LineOffs);
  1038. DoErrorPos(Severity, descr, Loc);
  1039. end
  1040. else
  1041. DoErrorPos(Severity, descr, FTokenStart);
  1042. end;
  1043. procedure TXMLTextReader.DoErrorPos(Severity: TErrorSeverity; const descr: string;
  1044. const args: array of const; const ErrPos: TLocation);
  1045. begin
  1046. DoErrorPos(Severity, Format(descr, args), ErrPos);
  1047. end;
  1048. procedure TXMLTextReader.DoErrorPos(Severity: TErrorSeverity; const descr: string; const ErrPos: TLocation);
  1049. var
  1050. E: EXMLReadError;
  1051. srcuri: XMLString;
  1052. begin
  1053. if Assigned(FSource) then
  1054. begin
  1055. srcuri := FSource.FSourceURI;
  1056. if (srcuri = '') and Assigned(FSource.FEntity) then
  1057. srcuri := FSource.FEntity.FURI;
  1058. E := EXMLReadError.CreateFmt('In ''%s'' (line %d pos %d): %s', [srcuri, ErrPos.Line, ErrPos.LinePos, descr]);
  1059. end
  1060. else
  1061. E := EXMLReadError.Create(descr);
  1062. E.FSeverity := Severity;
  1063. E.FErrorMessage := descr;
  1064. E.FLine := ErrPos.Line;
  1065. E.FLinePos := ErrPos.LinePos;
  1066. CallErrorHandler(E);
  1067. // No 'finally'! If user handler raises exception, control should not get here
  1068. // and the exception will be freed in CallErrorHandler (below)
  1069. E.Free;
  1070. end;
  1071. procedure TXMLTextReader.CheckMaxChars(ToAdd: Cardinal);
  1072. var
  1073. src: TXMLCharSource;
  1074. total: Cardinal;
  1075. begin
  1076. Inc(FSource.FCharCount, ToAdd);
  1077. if FMaxChars = 0 then
  1078. Exit;
  1079. src := FSource;
  1080. total := 0;
  1081. repeat
  1082. Inc(total, src.FCharCount);
  1083. if total > FMaxChars then
  1084. FatalError('Exceeded character count limit');
  1085. src := src.FParent;
  1086. until src = nil;
  1087. end;
  1088. procedure TXMLTextReader.CallErrorHandler(E: EXMLReadError);
  1089. begin
  1090. try
  1091. if Assigned(FOnError) then
  1092. FOnError(E);
  1093. if E.Severity = esFatal then
  1094. raise E;
  1095. except
  1096. FReadState := rsError;
  1097. if ExceptObject <> E then
  1098. E.Free;
  1099. raise;
  1100. end;
  1101. end;
  1102. function TXMLTextReader.SkipWhitespace(PercentAloneIsOk: Boolean): Boolean;
  1103. begin
  1104. Result := False;
  1105. repeat
  1106. Result := SkipS or Result;
  1107. if FSource.FBuf^ = #0 then
  1108. begin
  1109. Result := True; // report whitespace upon exiting the PE
  1110. if not ContextPop then
  1111. Break;
  1112. end
  1113. else if FSource.FBuf^ = '%' then
  1114. begin
  1115. if (FState <> rsDTD) or ((FSource.Kind = skInternalSubset) and FInsideDecl) then
  1116. Break;
  1117. // This is the only case where look-ahead is needed
  1118. if FSource.FBuf > FSource.FBufEnd-2 then
  1119. FSource.Reload;
  1120. if (not PercentAloneIsOk) or (Byte(FSource.FBuf[1]) in NamingBitmap[NamePages[hi(Word(FSource.FBuf[1]))]]) or
  1121. ((FSource.FBuf[1] >= #$D800) and (FSource.FBuf[1] <= #$DB7F)) then
  1122. begin
  1123. Inc(FSource.FBuf); // skip '%'
  1124. CheckName;
  1125. ExpectChar(';');
  1126. StartPE;
  1127. Result := True; // report whitespace upon entering the PE
  1128. end
  1129. else Break;
  1130. end
  1131. else
  1132. Break;
  1133. until False;
  1134. end;
  1135. procedure TXMLTextReader.ExpectWhitespace;
  1136. begin
  1137. if not SkipWhitespace then
  1138. FatalError('Expected whitespace');
  1139. end;
  1140. function TXMLTextReader.SkipS(Required: Boolean): Boolean;
  1141. var
  1142. p: PWideChar;
  1143. begin
  1144. Result := False;
  1145. repeat
  1146. p := FSource.FBuf;
  1147. repeat
  1148. if (p^ = #10) or (p^ = #13) or (FXML11 and ((p^ = #$85) or (p^ = #$2028))) then
  1149. begin
  1150. FSource.FBuf := p;
  1151. FSource.NewLine;
  1152. p := FSource.FBuf;
  1153. end
  1154. else if (p^ <> #32) and (p^ <> #9) then
  1155. Break;
  1156. Inc(p);
  1157. Result := True;
  1158. until False;
  1159. FSource.FBuf := p;
  1160. until (FSource.FBuf < FSource.FBufEnd) or (not FSource.Reload);
  1161. if (not Result) and Required then
  1162. FatalError('Expected whitespace');
  1163. end;
  1164. procedure TXMLTextReader.ExpectString(const s: String);
  1165. var
  1166. I: Integer;
  1167. begin
  1168. for I := 1 to Length(s) do
  1169. begin
  1170. if FSource.FBuf^ <> WideChar(ord(s[i])) then
  1171. FatalError('Expected "%s"', [s], i-1);
  1172. FSource.NextChar;
  1173. end;
  1174. end;
  1175. function TXMLTextReader.CheckForChar(c: WideChar): Boolean;
  1176. begin
  1177. Result := (FSource.FBuf^ = c);
  1178. if Result then
  1179. begin
  1180. Inc(FSource.FBuf);
  1181. if FSource.FBuf >= FSource.FBufEnd then
  1182. FSource.Reload;
  1183. end;
  1184. end;
  1185. procedure TXMLTextReader.SkipQuote(out Delim: WideChar; required: Boolean);
  1186. begin
  1187. Delim := #0;
  1188. if (FSource.FBuf^ = '''') or (FSource.FBuf^ = '"') then
  1189. begin
  1190. Delim := FSource.FBuf^;
  1191. FSource.NextChar; // skip quote
  1192. StoreLocation(FTokenStart);
  1193. end
  1194. else if required then
  1195. FatalError('Expected single or double quote');
  1196. end;
  1197. const
  1198. PrefixDefault: array[0..4] of WideChar = ('x','m','l','n','s');
  1199. procedure TXMLTextReader.SetOptions(AValue: TXMLReaderSettings);
  1200. begin
  1201. FValidate := AValue.Validate;
  1202. FPreserveWhitespace := AValue.PreserveWhitespace;
  1203. FExpandEntities := AValue.ExpandEntities;
  1204. FCDSectionsAsText := AValue.CDSectionsAsText;
  1205. FIgnoreComments := AValue.IgnoreComments;
  1206. FNamespaces := AValue.Namespaces;
  1207. FDisallowDoctype := AValue.DisallowDoctype;
  1208. FCanonical := AValue.CanonicalForm;
  1209. FMaxChars := AValue.MaxChars;
  1210. FOnError := AValue.OnError;
  1211. end;
  1212. constructor TXMLTextReader.Create(ASrc: TXMLInputSource; ANameTable: THashTable; ASettings: TXMLReaderSettings);
  1213. var
  1214. InputSrc: TXMLCharSource;
  1215. begin
  1216. Create;
  1217. SetOptions(ASettings);
  1218. FNameTable := ANameTable;
  1219. ConvertSource(ASrc, InputSrc);
  1220. FSource := InputSrc;
  1221. FSource.FReader := Self;
  1222. end;
  1223. constructor TXMLTextReader.Create(const uri: XMLString; ANameTable: THashTable; ASettings: TXMLReaderSettings);
  1224. begin
  1225. Create;
  1226. SetOptions(ASettings);
  1227. FNameTable := ANameTable;
  1228. if ResolveResource(uri, '', '', FSource) then
  1229. FSource.FReader := Self
  1230. else
  1231. DoErrorPos(esFatal, 'The specified URI could not be resolved', NullLocation);
  1232. end;
  1233. procedure TXMLTextReader.SetNametable(ANameTable: THashTable);
  1234. begin
  1235. if ANameTable = nil then
  1236. begin
  1237. ANameTable := THashTable.Create(256, True);
  1238. FNameTableOwned := True;
  1239. end;
  1240. FNameTable := ANameTable;
  1241. end;
  1242. constructor TXMLTextReader.Create(var AFile: Text; ANameTable: THashTable);
  1243. begin
  1244. SetNametable(ANameTable);
  1245. FSource := TXMLFileInputSource.Create(AFile);
  1246. FSource.FReader := Self;
  1247. end;
  1248. constructor TXMLTextReader.Create(AStream: TStream; const ABaseUri: XMLString; ANameTable: THashTable);
  1249. begin
  1250. SetNametable(ANameTable);
  1251. FSource := TXMLStreamInputSource.Create(AStream, False);
  1252. FSource.SourceURI := ABaseUri;
  1253. FSource.FReader := Self;
  1254. end;
  1255. constructor TXMLTextReader.Create(ASrc: TXMLCharSource; AParent: TXMLTextReader);
  1256. begin
  1257. FNameTable := AParent.FNameTable;
  1258. FSource := ASrc;
  1259. FSource.FReader := Self;
  1260. FValidate := AParent.FValidate;
  1261. FPreserveWhitespace := AParent.FPreserveWhitespace;
  1262. FExpandEntities := AParent.FExpandEntities;
  1263. FCDSectionsAsText := AParent.FCDSectionsAsText;
  1264. FIgnoreComments := AParent.FIgnoreComments;
  1265. FNamespaces := AParent.FNamespaces;
  1266. FDisallowDoctype := AParent.FDisallowDoctype;
  1267. FCanonical := AParent.FCanonical;
  1268. FMaxChars := AParent.FMaxChars;
  1269. FOnError := AParent.FOnError;
  1270. end;
  1271. destructor TXMLTextReader.Destroy;
  1272. var
  1273. cur: PNodeData;
  1274. begin
  1275. if FAttrCleanupFlag then
  1276. CleanupAttributes;
  1277. while Assigned(FFreeAttrChunk) do
  1278. begin
  1279. cur := FFreeAttrChunk;
  1280. FFreeAttrChunk := cur^.FNext;
  1281. Dispose(cur);
  1282. end;
  1283. if Assigned(FEntityValue.Buffer) then
  1284. FreeMem(FEntityValue.Buffer);
  1285. FreeMem(FName.Buffer);
  1286. FreeMem(FValue.Buffer);
  1287. if Assigned(FSource) then
  1288. while ContextPop(True) do; // clean input stack
  1289. FSource.Free;
  1290. FPEMap.Free;
  1291. ClearForwardRefs;
  1292. FNsAttHash.Free;
  1293. FNSHelper.Free;
  1294. FDocType.Release;
  1295. FIDMap.Free;
  1296. FForwardRefs.Free;
  1297. if FNameTableOwned then
  1298. FNameTable.Free;
  1299. inherited Destroy;
  1300. end;
  1301. procedure TXMLTextReader.AfterConstruction;
  1302. begin
  1303. BufAllocate(FName, 128);
  1304. BufAllocate(FValue, 512);
  1305. SetLength(FNodeStack, 16);
  1306. SetLength(FValidators, 16);
  1307. FNesting := 0;
  1308. FValidatorNesting := 0;
  1309. FCurrNode := @FNodeStack[0];
  1310. FCurrAttrIndex := -1;
  1311. if FNamespaces then
  1312. begin
  1313. FNSHelper := TNSSupport.Create(FNameTable);
  1314. FNsAttHash := TDblHashArray.Create;
  1315. FStdPrefix_xml := FNSHelper.GetPrefix(@PrefixDefault, 3);
  1316. FStdPrefix_xmlns := FNSHelper.GetPrefix(@PrefixDefault, 5);
  1317. FStdUri_xmlns := FNameTable.FindOrAdd(stduri_xmlns);
  1318. FStdUri_xml := FNameTable.FindOrAdd(stduri_xml);
  1319. end;
  1320. end;
  1321. procedure TLoader.ProcessXML(ADoc: TDOMDocument; AReader: TXMLTextReader);
  1322. begin
  1323. doc := ADoc;
  1324. reader := AReader;
  1325. reader.OnEntity := @ProcessEntity;
  1326. doc.documentURI := reader.BaseURI;
  1327. reader.FState := rsProlog;
  1328. reader.FFragmentMode := False;
  1329. ParseContent(doc);
  1330. doc.XMLStandalone := reader.FStandalone;
  1331. if reader.FValidate then
  1332. reader.ValidateIdRefs;
  1333. doc.IDs := reader.FIDMap;
  1334. reader.FIDMap := nil;
  1335. end;
  1336. procedure TLoader.ProcessFragment(AOwner: TDOMNode; AReader: TXMLTextReader);
  1337. var
  1338. DoctypeNode: TDOMDocumentTypeEx;
  1339. begin
  1340. doc := AOwner.OwnerDocument;
  1341. reader := AReader;
  1342. reader.OnEntity := @ProcessEntity;
  1343. reader.FState := rsRoot;
  1344. reader.FFragmentMode := True;
  1345. reader.FXML11 := doc.XMLVersion = '1.1';
  1346. DoctypeNode := TDOMDocumentTypeEx(doc.DocType);
  1347. if Assigned(DoctypeNode) then
  1348. reader.FDocType := DocTypeNode.FModel.Reference;
  1349. ParseContent(aOwner as TDOMNode_WithChildren);
  1350. end;
  1351. procedure TLoader.ProcessEntity(Sender: TXMLTextReader; AEntity: TEntityDecl);
  1352. var
  1353. DoctypeNode: TDOMDocumentType;
  1354. Ent: TDOMEntityEx;
  1355. src: TXMLCharSource;
  1356. InnerReader: TXMLTextReader;
  1357. InnerLoader: TLoader;
  1358. begin
  1359. DoctypeNode := TDOMDocument(doc).DocType;
  1360. if DoctypeNode = nil then
  1361. Exit;
  1362. Ent := TDOMEntityEx(DocTypeNode.Entities.GetNamedItem(AEntity.FName));
  1363. if Ent = nil then
  1364. Exit;
  1365. Sender.EntityToSource(AEntity, Src);
  1366. if Src = nil then
  1367. Exit;
  1368. InnerReader := TXMLTextReader.Create(Src, Sender);
  1369. try
  1370. Ent.SetReadOnly(False);
  1371. InnerLoader.ProcessFragment(Ent, InnerReader);
  1372. AEntity.FResolved := True;
  1373. finally
  1374. InnerReader.Free;
  1375. AEntity.FOnStack := False;
  1376. Ent.SetReadOnly(True);
  1377. end;
  1378. end;
  1379. procedure TLoader.ParseContent(cursor: TDOMNode_WithChildren);
  1380. var
  1381. element: TDOMElement;
  1382. begin
  1383. if reader.ReadState = rsInitial then
  1384. begin
  1385. if not reader.Read then
  1386. Exit;
  1387. if cursor is TDOMNode_TopLevel then
  1388. begin
  1389. if reader.FSource.FXMLVersion <> xmlVersionUnknown then
  1390. TDOMTopNodeEx(cursor).FXMLVersion := reader.FSource.FXMLVersion;
  1391. TDOMTopNodeEx(cursor).FXMLEncoding := reader.FSource.FXMLEncoding;
  1392. end;
  1393. end;
  1394. with reader do
  1395. repeat
  1396. if FValidate then
  1397. ValidateCurrentNode;
  1398. case FCurrNode^.FNodeType of
  1399. ntText:
  1400. cursor.InternalAppend(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length, False));
  1401. ntWhitespace, ntSignificantWhitespace:
  1402. if FPreserveWhitespace then
  1403. cursor.InternalAppend(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length, FCurrNode^.FNodeType = ntWhitespace));
  1404. ntCDATA:
  1405. cursor.InternalAppend(DoCDSect(FValue.Buffer, FValue.Length));
  1406. ntProcessingInstruction:
  1407. cursor.InternalAppend(CreatePINode);
  1408. ntComment:
  1409. if not FIgnoreComments then
  1410. cursor.InternalAppend(doc.CreateCommentBuf(FCurrNode^.FValueStart, FCurrNode^.FValueLength));
  1411. ntElement:
  1412. begin
  1413. element := LoadElement(doc, FCurrNode, reader.FAttrCount);
  1414. cursor.InternalAppend(element);
  1415. cursor := element;
  1416. end;
  1417. ntEndElement:
  1418. cursor := TDOMNode_WithChildren(cursor.ParentNode);
  1419. ntDocumentType:
  1420. cursor.InternalAppend(TDOMDocumentType.Create(doc, FDocType));
  1421. ntEntityReference:
  1422. begin
  1423. cursor.InternalAppend(doc.CreateEntityReference(FCurrNode^.FQName^.Key));
  1424. { Seeing an entity reference while expanding means that the entity
  1425. fails to expand. }
  1426. if not FExpandEntities then
  1427. begin
  1428. { Make reader iterate through contents of the reference,
  1429. to ensure correct validation events and character counts. }
  1430. ResolveEntity;
  1431. while FCurrNode^.FNodeType <> ntEndEntity do
  1432. Read;
  1433. end;
  1434. end;
  1435. end;
  1436. until not Read;
  1437. end;
  1438. function TLoader.CreatePINode: TDOMNode;
  1439. var
  1440. NameStr, ValueStr: DOMString;
  1441. begin
  1442. SetString(NameStr, reader.FName.Buffer, reader.FName.Length);
  1443. SetString(ValueStr, reader.FValue.Buffer, reader.FValue.Length);
  1444. result := Doc.CreateProcessingInstruction(NameStr, ValueStr);
  1445. end;
  1446. function TLoader.DoCDSect(ch: PWideChar; Count: Integer): TDOMNode;
  1447. var
  1448. s: XMLString;
  1449. begin
  1450. SetString(s, ch, Count);
  1451. result := doc.CreateCDATASection(s);
  1452. end;
  1453. function TXMLTextReader.CheckName(aFlags: TCheckNameFlags): Boolean;
  1454. var
  1455. p: PWideChar;
  1456. NameStartFlag: Boolean;
  1457. begin
  1458. p := FSource.FBuf;
  1459. FName.Length := 0;
  1460. FColonPos := -1;
  1461. NameStartFlag := not (cnToken in aFlags);
  1462. repeat
  1463. if NameStartFlag then
  1464. begin
  1465. if (Byte(p^) in NamingBitmap[NamePages[hi(Word(p^))]]) or
  1466. ((p^ = ':') and (not FNamespaces)) then
  1467. Inc(p)
  1468. else if ((p^ >= #$D800) and (p^ <= #$DB7F) and
  1469. (p[1] >= #$DC00) and (p[1] <= #$DFFF)) then
  1470. Inc(p, 2)
  1471. else
  1472. begin
  1473. // here we come either when first char of name is bad (it may be a colon),
  1474. // or when a colon is not followed by a valid NameStartChar
  1475. FSource.FBuf := p;
  1476. Result := False;
  1477. Break;
  1478. end;
  1479. NameStartFlag := False;
  1480. end;
  1481. repeat
  1482. if Byte(p^) in NamingBitmap[NamePages[$100+hi(Word(p^))]] then
  1483. Inc(p)
  1484. else if ((p^ >= #$D800) and (p^ <= #$DB7F) and
  1485. (p[1] >= #$DC00) and (p[1] <= #$DFFF)) then
  1486. Inc(p,2)
  1487. else
  1488. Break;
  1489. until False;
  1490. if p^ = ':' then
  1491. begin
  1492. if (cnToken in aFlags) or not FNamespaces then // colon has no specific meaning
  1493. begin
  1494. Inc(p);
  1495. if p^ <> #0 then Continue;
  1496. end
  1497. else if FColonPos = -1 then // this is the first colon, remember it
  1498. begin
  1499. FColonPos := p-FSource.FBuf+FName.Length;
  1500. NameStartFlag := True;
  1501. Inc(p);
  1502. if p^ <> #0 then Continue;
  1503. end;
  1504. end;
  1505. BufAppendChunk(FName, FSource.FBuf, p);
  1506. Result := (FName.Length > 0);
  1507. FSource.FBuf := p;
  1508. if (p^ <> #0) or not FSource.Reload then
  1509. Break;
  1510. p := FSource.FBuf;
  1511. until False;
  1512. if not (Result or (cnOptional in aFlags)) then
  1513. RaiseNameNotFound;
  1514. end;
  1515. procedure TXMLTextReader.CheckNCName;
  1516. begin
  1517. if FNamespaces and (FColonPos <> -1) then
  1518. FatalError('Names of entities, notations and processing instructions may not contain colons', FName.Length);
  1519. end;
  1520. procedure TXMLTextReader.RaiseNameNotFound;
  1521. begin
  1522. if FColonPos <> -1 then
  1523. FatalError('Bad QName syntax, local part is missing')
  1524. else
  1525. // Coming at no cost, this allows more user-friendly error messages
  1526. with FSource do
  1527. if (FBuf^ = #32) or (FBuf^ = #10) or (FBuf^ = #9) or (FBuf^ = #13) then
  1528. FatalError('Whitespace is not allowed here')
  1529. else
  1530. FatalError('Name starts with invalid character');
  1531. end;
  1532. function TXMLTextReader.ResolvePredefined: Boolean;
  1533. var
  1534. wc: WideChar;
  1535. begin
  1536. Result := False;
  1537. with FName do
  1538. begin
  1539. if (Length = 2) and (Buffer[1] = 't') then
  1540. begin
  1541. if Buffer[0] = 'l' then
  1542. wc := '<'
  1543. else if Buffer[0] = 'g' then
  1544. wc := '>'
  1545. else Exit;
  1546. end
  1547. else if Buffer[0] = 'a' then
  1548. begin
  1549. if (Length = 3) and (Buffer[1] = 'm') and (Buffer[2] = 'p') then
  1550. wc := '&'
  1551. else if (Length = 4) and (Buffer[1] = 'p') and (Buffer[2] = 'o') and
  1552. (Buffer[3] = 's') then
  1553. wc := ''''
  1554. else Exit;
  1555. end
  1556. else if (Length = 4) and (Buffer[0] = 'q') and (Buffer[1] = 'u') and
  1557. (Buffer[2] = 'o') and (Buffer[3] ='t') then
  1558. wc := '"'
  1559. else
  1560. Exit;
  1561. end; // with
  1562. BufAppend(FValue, wc);
  1563. Result := True;
  1564. end;
  1565. function TXMLTextReader.ParseRef(var ToFill: TWideCharBuf): Boolean; // [67]
  1566. var
  1567. Code: Integer;
  1568. begin
  1569. FSource.NextChar; // skip '&'
  1570. Result := CheckForChar('#');
  1571. if Result then
  1572. begin
  1573. Code := 0;
  1574. if CheckForChar('x') then
  1575. repeat
  1576. case FSource.FBuf^ of
  1577. '0'..'9': Code := Code * 16 + Ord(FSource.FBuf^) - Ord('0');
  1578. 'a'..'f': Code := Code * 16 + Ord(FSource.FBuf^) - (Ord('a') - 10);
  1579. 'A'..'F': Code := Code * 16 + Ord(FSource.FBuf^) - (Ord('A') - 10);
  1580. else
  1581. Break;
  1582. end;
  1583. FSource.NextChar;
  1584. until Code > $10FFFF
  1585. else
  1586. repeat
  1587. case FSource.FBuf^ of
  1588. '0'..'9': Code := Code * 10 + Ord(FSource.FBuf^) - Ord('0');
  1589. else
  1590. Break;
  1591. end;
  1592. FSource.NextChar;
  1593. until Code > $10FFFF;
  1594. case Code of
  1595. $01..$08, $0B..$0C, $0E..$1F:
  1596. if FXML11 then
  1597. BufAppend(ToFill, WideChar(Code))
  1598. else
  1599. FatalError('Invalid character reference');
  1600. $09, $0A, $0D, $20..$D7FF, $E000..$FFFD:
  1601. BufAppend(ToFill, WideChar(Code));
  1602. $10000..$10FFFF:
  1603. begin
  1604. BufAppend(ToFill, WideChar($D7C0 + (Code shr 10)));
  1605. BufAppend(ToFill, WideChar($DC00 xor (Code and $3FF)));
  1606. end;
  1607. else
  1608. FatalError('Invalid character reference');
  1609. end;
  1610. end
  1611. else CheckName;
  1612. ExpectChar(';');
  1613. end;
  1614. const
  1615. AttrDelims: TSetOfChar = [#0, '<', '&', '''', '"', #9, #10, #13];
  1616. GT_Delim: TSetOfChar = [#0, '>'];
  1617. { Parse attribute literal, producing plain string value in AttrData.FValueStr.
  1618. If entity references are encountered and FExpandEntities=False, also builds
  1619. a node chain starting from AttrData.FNext. Node chain is built only for the
  1620. first level. If NonCDATA=True, additionally normalizes whitespace in string value. }
  1621. procedure TXMLTextReader.ExpectAttValue(AttrData: PNodeData; NonCDATA: Boolean);
  1622. var
  1623. wc: WideChar;
  1624. Delim: WideChar;
  1625. ent: TEntityDecl;
  1626. start: TObject;
  1627. curr: PNodeData;
  1628. StartPos: Integer;
  1629. StartLoc: TLocation;
  1630. entName: PHashItem;
  1631. begin
  1632. SkipQuote(Delim);
  1633. AttrData^.FLoc2 := FTokenStart;
  1634. StartLoc := FTokenStart;
  1635. curr := AttrData;
  1636. FValue.Length := 0;
  1637. StartPos := 0;
  1638. start := FSource.FEntity;
  1639. repeat
  1640. wc := FSource.SkipUntil(FValue, AttrDelims);
  1641. if wc = '<' then
  1642. FatalError('Character ''<'' is not allowed in attribute value')
  1643. else if wc = '&' then
  1644. begin
  1645. if ParseRef(FValue) or ResolvePredefined then
  1646. Continue;
  1647. entName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  1648. ent := EntityCheck(True);
  1649. if ((ent = nil) or (not FExpandEntities)) and (FSource.FEntity = start) then
  1650. begin
  1651. if FValue.Length > StartPos then
  1652. begin
  1653. AllocAttributeValueChunk(curr, StartPos);
  1654. curr^.FLoc := StartLoc;
  1655. end;
  1656. AllocAttributeValueChunk(curr, FValue.Length);
  1657. curr^.FNodeType := ntEntityReference;
  1658. curr^.FQName := entName;
  1659. StoreLocation(StartLoc);
  1660. curr^.FLoc := StartLoc;
  1661. Dec(curr^.FLoc.LinePos, FName.Length+1);
  1662. end;
  1663. StartPos := FValue.Length;
  1664. if Assigned(ent) then
  1665. ContextPush(ent);
  1666. end
  1667. else if wc <> #0 then
  1668. begin
  1669. FSource.NextChar;
  1670. if (wc = Delim) and (FSource.FEntity = start) then
  1671. Break;
  1672. if (wc = #10) or (wc = #9) or (wc = #13) then
  1673. wc := #32;
  1674. BufAppend(FValue, wc);
  1675. end
  1676. else
  1677. begin
  1678. if (FSource.FEntity = start) or not ContextPop then // #0
  1679. FatalError('Literal has no closing quote', -1);
  1680. StartPos := FValue.Length;
  1681. end;
  1682. until False;
  1683. if Assigned(attrData^.FNext) then
  1684. begin
  1685. FAttrCleanupFlag := True;
  1686. if FValue.Length > StartPos then
  1687. begin
  1688. AllocAttributeValueChunk(curr, StartPos);
  1689. curr^.FLoc := StartLoc;
  1690. end;
  1691. end;
  1692. if nonCDATA then
  1693. BufNormalize(FValue, attrData^.FDenormalized)
  1694. else
  1695. attrData^.FDenormalized := False;
  1696. SetString(attrData^.FValueStr, FValue.Buffer, FValue.Length);
  1697. end;
  1698. const
  1699. PrefixChar: array[Boolean] of string = ('', '%');
  1700. procedure TXMLTextReader.EntityToSource(AEntity: TEntityDecl; out Src: TXMLCharSource);
  1701. begin
  1702. if AEntity.FOnStack then
  1703. FatalError('Entity ''%s%s'' recursively references itself', [PrefixChar[AEntity.FIsPE], AEntity.FName]);
  1704. if (AEntity.FSystemID <> '') and not AEntity.FPrefetched then
  1705. begin
  1706. if not ResolveResource(AEntity.FSystemID, AEntity.FPublicID, AEntity.FURI, Src) then
  1707. begin
  1708. // TODO: a detailed message like SysErrorMessage(GetLastError) would be great here
  1709. ValidationError('Unable to resolve external entity ''%s''', [AEntity.FName]);
  1710. Src := nil;
  1711. Exit;
  1712. end;
  1713. end
  1714. else
  1715. begin
  1716. Src := TXMLCharSource.Create(AEntity.FReplacementText);
  1717. Src.FLineNo := AEntity.FStartLocation.Line;
  1718. Src.LFPos := Src.FBuf - AEntity.FStartLocation.LinePos;
  1719. // needed in case of prefetched external PE
  1720. if AEntity.FSystemID <> '' then
  1721. Src.SourceURI := AEntity.FURI;
  1722. end;
  1723. AEntity.FOnStack := True;
  1724. Src.FEntity := AEntity;
  1725. end;
  1726. function TXMLTextReader.ContextPush(AEntity: TEntityDecl; DummySource: Boolean): Boolean;
  1727. var
  1728. Src: TXMLCharSource;
  1729. begin
  1730. Src := nil;
  1731. if Assigned(AEntity) then
  1732. EntityToSource(AEntity, Src);
  1733. if (Src = nil) and DummySource then
  1734. begin
  1735. Src := TXMLCharSource.Create('');
  1736. if FExpandEntities then
  1737. Src.Kind := skManualPop;
  1738. end;
  1739. Result := Assigned(Src);
  1740. if Result then
  1741. Initialize(Src);
  1742. end;
  1743. function TXMLTextReader.ContextPop(Forced: Boolean): Boolean;
  1744. var
  1745. Src: TXMLCharSource;
  1746. Error: Boolean;
  1747. begin
  1748. Result := Assigned(FSource.FParent) and (Forced or (FSource.Kind = skNone));
  1749. if Result then
  1750. begin
  1751. Src := FSource.FParent;
  1752. Error := False;
  1753. if Assigned(FSource.FEntity) then
  1754. begin
  1755. FSource.FEntity.FOnStack := False;
  1756. FSource.FEntity.FCharCount := FSource.FCharCount;
  1757. // [28a] PE that was started between MarkupDecls may not end inside MarkupDecl
  1758. Error := FSource.FEntity.FBetweenDecls and FInsideDecl;
  1759. end;
  1760. FSource.Free;
  1761. FSource := Src;
  1762. // correct position of this error is after PE reference
  1763. if Error then
  1764. FatalError('Parameter entities must be properly nested');
  1765. end;
  1766. end;
  1767. function TXMLTextReader.EntityCheck(NoExternals: Boolean): TEntityDecl;
  1768. var
  1769. RefName: XMLString;
  1770. cnt: Integer;
  1771. begin
  1772. Result := nil;
  1773. SetString(RefName, FName.Buffer, FName.Length);
  1774. cnt := FName.Length+2;
  1775. if Assigned(FDocType) then
  1776. Result := FDocType.Entities.Get(FName.Buffer, FName.Length) as TEntityDecl;
  1777. if Result = nil then
  1778. begin
  1779. if FStandalone or (FDocType = nil) or not (FHavePERefs or (FDocType.FSystemID <> '')) then
  1780. FatalError('Reference to undefined entity ''%s''', [RefName], cnt)
  1781. else
  1782. ValidationError('Undefined entity ''%s'' referenced', [RefName], cnt);
  1783. Exit;
  1784. end;
  1785. if FStandalone and Result.ExternallyDeclared then
  1786. FatalError('Standalone constraint violation', cnt);
  1787. if Result.FNotationName <> '' then
  1788. FatalError('Reference to unparsed entity ''%s''', [RefName], cnt);
  1789. if NoExternals and (Result.FSystemID <> '') then
  1790. FatalError('External entity reference is not allowed in attribute value', cnt);
  1791. if not Result.FResolved then
  1792. if Assigned(FOnEntity) then
  1793. FOnEntity(Self, Result);
  1794. // at this point we know the charcount of the entity being included
  1795. if Result.FCharCount >= cnt then
  1796. CheckMaxChars(Result.FCharCount - cnt);
  1797. end;
  1798. procedure TXMLTextReader.StartPE;
  1799. var
  1800. PEnt: TEntityDecl;
  1801. begin
  1802. PEnt := nil;
  1803. if Assigned(FPEMap) then
  1804. PEnt := FPEMap.Get(FName.Buffer, FName.Length) as TEntityDecl;
  1805. if PEnt = nil then
  1806. begin
  1807. ValidationErrorWithName('Undefined parameter entity ''%s'' referenced', FName.Length+2);
  1808. // cease processing declarations, unless document is standalone.
  1809. FDTDProcessed := FStandalone;
  1810. Exit;
  1811. end;
  1812. { cache an external PE so it's only fetched once }
  1813. if (PEnt.FSystemID <> '') and (not PEnt.FPrefetched) and (not PrefetchEntity(PEnt)) then
  1814. begin
  1815. FDTDProcessed := FStandalone;
  1816. Exit;
  1817. end;
  1818. CheckMaxChars(PEnt.FCharCount);
  1819. PEnt.FBetweenDecls := not FInsideDecl;
  1820. ContextPush(PEnt);
  1821. FHavePERefs := True;
  1822. end;
  1823. function TXMLTextReader.PrefetchEntity(AEntity: TEntityDecl): Boolean;
  1824. begin
  1825. Result := ContextPush(AEntity);
  1826. if Result then
  1827. try
  1828. FValue.Length := 0;
  1829. FSource.SkipUntil(FValue, [#0]);
  1830. SetString(AEntity.FReplacementText, FValue.Buffer, FValue.Length);
  1831. AEntity.FCharCount := FValue.Length;
  1832. AEntity.FStartLocation.Line := 1;
  1833. AEntity.FStartLocation.LinePos := 1;
  1834. AEntity.FURI := FSource.SourceURI; // replace base URI with absolute one
  1835. finally
  1836. ContextPop;
  1837. AEntity.FPrefetched := True;
  1838. FValue.Length := 0;
  1839. end;
  1840. end;
  1841. const
  1842. LiteralDelims: array[TLiteralType] of TSetOfChar = (
  1843. [#0, '''', '"'], // ltPlain
  1844. [#0, '''', '"', #13, #10], // ltPubid
  1845. [#0, '%', '&', '''', '"'] // ltEntity
  1846. );
  1847. function TXMLTextReader.ParseLiteral(var ToFill: TWideCharBuf; aType: TLiteralType;
  1848. Required: Boolean): Boolean;
  1849. var
  1850. start: TObject;
  1851. wc, Delim: WideChar;
  1852. dummy: Boolean;
  1853. begin
  1854. SkipQuote(Delim, Required);
  1855. Result := (Delim <> #0);
  1856. if not Result then
  1857. Exit;
  1858. ToFill.Length := 0;
  1859. start := FSource.FEntity;
  1860. repeat
  1861. wc := FSource.SkipUntil(ToFill, LiteralDelims[aType]);
  1862. if wc = '%' then { ltEntity only }
  1863. begin
  1864. FSource.NextChar;
  1865. CheckName;
  1866. ExpectChar(';');
  1867. if FSource.Kind = skInternalSubset then
  1868. FatalError('PE reference not allowed here in internal subset', FName.Length+2);
  1869. StartPE;
  1870. end
  1871. else if wc = '&' then { ltEntity }
  1872. begin
  1873. if ParseRef(ToFill) then // charRefs always expanded
  1874. Continue;
  1875. BufAppend(ToFill, '&');
  1876. BufAppendChunk(ToFill, FName.Buffer, FName.Buffer + FName.Length);
  1877. BufAppend(ToFill, ';');
  1878. end
  1879. else if wc <> #0 then
  1880. begin
  1881. FSource.NextChar;
  1882. if (wc = #10) or (wc = #13) then
  1883. wc := #32
  1884. // terminating delimiter must be in the same context as the starting one
  1885. else if (wc = Delim) and (start = FSource.FEntity) then
  1886. Break;
  1887. BufAppend(ToFill, wc);
  1888. end
  1889. else if (FSource.FEntity = start) or not ContextPop then // #0
  1890. FatalError('Literal has no closing quote', -1);
  1891. until False;
  1892. if aType = ltPubid then
  1893. BufNormalize(ToFill, dummy);
  1894. end;
  1895. function TXMLTextReader.SkipUntilSeq(const Delim: TSetOfChar; c1: WideChar): Boolean;
  1896. var
  1897. wc: WideChar;
  1898. begin
  1899. Result := False;
  1900. StoreLocation(FTokenStart);
  1901. repeat
  1902. wc := FSource.SkipUntil(FValue, Delim);
  1903. if wc <> #0 then
  1904. begin
  1905. FSource.NextChar;
  1906. if (FValue.Length > 0) then
  1907. begin
  1908. if (FValue.Buffer[FValue.Length-1] = c1) then
  1909. begin
  1910. Dec(FValue.Length);
  1911. Result := True;
  1912. Exit;
  1913. end;
  1914. end;
  1915. BufAppend(FValue, wc);
  1916. end;
  1917. until wc = #0;
  1918. end;
  1919. procedure TXMLTextReader.ParseComment(discard: Boolean); // [15]
  1920. var
  1921. SaveLength: Integer;
  1922. begin
  1923. ExpectString('--');
  1924. SaveLength := FValue.Length;
  1925. if not SkipUntilSeq([#0, '-'], '-') then
  1926. FatalError('Unterminated comment', -1);
  1927. ExpectChar('>');
  1928. if not discard then
  1929. begin
  1930. FCurrNode := @FNodeStack[FNesting];
  1931. FCurrNode^.FNodeType := ntComment;
  1932. FCurrNode^.FQName := nil;
  1933. FCurrNode^.FValueStart := @FValue.Buffer[SaveLength];
  1934. FCurrNode^.FValueLength := FValue.Length-SaveLength;
  1935. end;
  1936. FValue.Length := SaveLength;
  1937. end;
  1938. procedure TXMLTextReader.ParsePI; // [16]
  1939. begin
  1940. FSource.NextChar; // skip '?'
  1941. CheckName;
  1942. CheckNCName;
  1943. with FName do
  1944. if (Length = 3) and
  1945. ((Buffer[0] = 'X') or (Buffer[0] = 'x')) and
  1946. ((Buffer[1] = 'M') or (Buffer[1] = 'm')) and
  1947. ((Buffer[2] = 'L') or (Buffer[2] = 'l')) then
  1948. begin
  1949. if not BufEquals(FName, 'xml') then
  1950. FatalError('''xml'' is a reserved word; it must be lowercase', FName.Length)
  1951. else
  1952. FatalError('XML declaration is not allowed here', FName.Length);
  1953. end;
  1954. if FSource.FBuf^ <> '?' then
  1955. SkipS(True);
  1956. FValue.Length := 0;
  1957. if not SkipUntilSeq(GT_Delim, '?') then
  1958. FatalError('Unterminated processing instruction', -1);
  1959. SetNodeInfoWithValue(ntProcessingInstruction,
  1960. FNameTable.FindOrAdd(FName.Buffer, FName.Length));
  1961. end;
  1962. const
  1963. vers: array[Boolean] of TXMLVersion = (xmlVersion10, xmlVersion11);
  1964. procedure TXMLTextReader.ParseXmlOrTextDecl(TextDecl: Boolean);
  1965. var
  1966. Delim: WideChar;
  1967. buf: array[0..31] of WideChar;
  1968. I: Integer;
  1969. begin
  1970. SkipS(True);
  1971. // [24] VersionInfo: optional in TextDecl, required in XmlDecl
  1972. if (not TextDecl) or (FSource.FBuf^ = 'v') then
  1973. begin
  1974. ExpectString('version');
  1975. ExpectEq;
  1976. SkipQuote(Delim);
  1977. { !! Definition "VersionNum ::= '1.' [0-9]+" per XML 1.0 Fifth Edition
  1978. implies that version literal can have unlimited length. }
  1979. I := 0;
  1980. while (I < 3) and (FSource.FBuf^ <> Delim) do
  1981. begin
  1982. buf[I] := FSource.FBuf^;
  1983. Inc(I);
  1984. FSource.NextChar;
  1985. end;
  1986. if (I <> 3) or (buf[0] <> '1') or (buf[1] <> '.') or
  1987. (buf[2] < '0') or (buf[2] > '9') then
  1988. FatalError('Illegal version number', -1);
  1989. ExpectChar(Delim);
  1990. FSource.FXMLVersion := vers[buf[2] = '1'];
  1991. if TextDecl and (FSource.FXMLVersion = xmlVersion11) and not FXML11 then
  1992. FatalError('XML 1.0 document cannot invoke XML 1.1 entities', -1);
  1993. if TextDecl or (FSource.FBuf^ <> '?') then
  1994. SkipS(True);
  1995. end;
  1996. // [80] EncodingDecl: required in TextDecl, optional in XmlDecl
  1997. if TextDecl or (FSource.FBuf^ = 'e') then
  1998. begin
  1999. ExpectString('encoding');
  2000. ExpectEq;
  2001. SkipQuote(Delim);
  2002. I := 0;
  2003. while (I < 30) and (FSource.FBuf^ <> Delim) and (FSource.FBuf^ < #127) and
  2004. ((Char(ord(FSource.FBuf^)) in ['A'..'Z', 'a'..'z']) or
  2005. ((I > 0) and (Char(ord(FSource.FBuf^)) in ['0'..'9', '.', '-', '_']))) do
  2006. begin
  2007. buf[I] := FSource.FBuf^;
  2008. Inc(I);
  2009. FSource.NextChar;
  2010. end;
  2011. if not CheckForChar(Delim) then
  2012. FatalError('Illegal encoding name', i);
  2013. SetString(FSource.FXMLEncoding, buf, i);
  2014. if not FSource.SetEncoding(FSource.FXMLEncoding) then // <-- Wide2Ansi conversion here
  2015. FatalError('Encoding ''%s'' is not supported', [FSource.FXMLEncoding], i+1);
  2016. if FSource.FBuf^ <> '?' then
  2017. SkipS(not TextDecl);
  2018. end;
  2019. // [32] SDDecl: forbidden in TextDecl, optional in XmlDecl
  2020. if (not TextDecl) and (FSource.FBuf^ = 's') then
  2021. begin
  2022. ExpectString('standalone');
  2023. ExpectEq;
  2024. SkipQuote(Delim);
  2025. if FSource.Matches('yes') then
  2026. FStandalone := True
  2027. else if not FSource.Matches('no') then
  2028. FatalError('Only "yes" or "no" are permitted as values of "standalone"', -1);
  2029. ExpectChar(Delim);
  2030. SkipS;
  2031. end;
  2032. ExpectString('?>');
  2033. { Switch to 1.1 rules only after declaration is parsed completely. This is to
  2034. ensure that NEL and LSEP within declaration are rejected (rmt-056, rmt-057) }
  2035. if FSource.FXMLVersion = xmlVersion11 then
  2036. FXML11 := True;
  2037. end;
  2038. procedure TXMLTextReader.DTDReloadHook;
  2039. var
  2040. p: PWideChar;
  2041. begin
  2042. { FSource converts CR, NEL and LSEP linebreaks to LF, and CR-NEL sequences to CR-LF.
  2043. We must further remove the CR chars and have only LF's left. }
  2044. p := FDTDStartPos;
  2045. while p < FSource.FBuf do
  2046. begin
  2047. while (p < FSource.FBuf) and (p^ <> #13) do
  2048. Inc(p);
  2049. BufAppendChunk(FIntSubset, FDTDStartPos, p);
  2050. if p^ = #13 then
  2051. Inc(p);
  2052. FDTDStartPos := p;
  2053. end;
  2054. FDTDStartPos := TXMLDecodingSource(FSource).FBufStart;
  2055. end;
  2056. procedure TXMLTextReader.ParseDoctypeDecl; // [28]
  2057. var
  2058. Src: TXMLCharSource;
  2059. DTDName: PHashItem;
  2060. begin
  2061. if FState >= rsDTD then
  2062. FatalError('Markup declaration is not allowed here');
  2063. if FDisallowDoctype then
  2064. FatalError('Document type is prohibited by parser settings');
  2065. ExpectString('DOCTYPE');
  2066. SkipS(True);
  2067. FDocType := TDTDModel.Create(FNameTable);
  2068. FDTDProcessed := True; // assume success
  2069. FState := rsDTD;
  2070. CheckName;
  2071. SetString(FDocType.FName, FName.Buffer, FName.Length);
  2072. DTDName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  2073. SkipS(True);
  2074. ParseExternalID(FDocType.FSystemID, FDocType.FPublicID, False);
  2075. SkipS;
  2076. if CheckForChar('[') then
  2077. begin
  2078. BufAllocate(FIntSubset, 256);
  2079. FSource.Kind := skInternalSubset;
  2080. try
  2081. FDTDStartPos := FSource.FBuf;
  2082. ParseMarkupDecl;
  2083. DTDReloadHook; // fetch last chunk
  2084. SetString(FDocType.FInternalSubset, FIntSubset.Buffer, FIntSubset.Length);
  2085. finally
  2086. FreeMem(FIntSubset.Buffer);
  2087. FSource.Kind := skNone;
  2088. end;
  2089. ExpectChar(']');
  2090. SkipS;
  2091. end;
  2092. ExpectChar('>');
  2093. if (FDocType.FSystemID <> '') then
  2094. begin
  2095. if ResolveResource(FDocType.FSystemID, FDocType.FPublicID, FSource.SourceURI, Src) then
  2096. begin
  2097. Initialize(Src);
  2098. try
  2099. Src.Kind := skManualPop;
  2100. ParseMarkupDecl;
  2101. finally
  2102. ContextPop(True);
  2103. end;
  2104. end
  2105. else
  2106. begin
  2107. ValidationError('Unable to resolve external DTD subset', []);
  2108. FDTDProcessed := FStandalone;
  2109. end;
  2110. end;
  2111. FState := rsAfterDTD;
  2112. FValue.Length := 0;
  2113. BufAppendString(FValue, FDocType.FInternalSubset);
  2114. SetNodeInfoWithValue(ntDocumentType, DTDName);
  2115. end;
  2116. procedure TXMLTextReader.ExpectEq; // [25]
  2117. begin
  2118. if FSource.FBuf^ <> '=' then
  2119. SkipS;
  2120. if FSource.FBuf^ <> '=' then
  2121. FatalError('Expected "="');
  2122. FSource.NextChar;
  2123. SkipS;
  2124. end;
  2125. { DTD stuff }
  2126. procedure TXMLTextReader.CheckPENesting(aExpected: TObject);
  2127. begin
  2128. if FSource.FEntity <> aExpected then
  2129. ValidationError('Parameter entities must be properly nested', [], 0);
  2130. end;
  2131. function TXMLTextReader.ParseQuantity: TCPQuant;
  2132. begin
  2133. case FSource.FBuf^ of
  2134. '?': Result := cqZeroOrOnce;
  2135. '*': Result := cqZeroOrMore;
  2136. '+': Result := cqOnceOrMore;
  2137. else
  2138. Result := cqOnce;
  2139. Exit;
  2140. end;
  2141. FSource.NextChar;
  2142. end;
  2143. function TXMLTextReader.FindOrCreateElDef: TElementDecl;
  2144. var
  2145. p: PHashItem;
  2146. begin
  2147. CheckName;
  2148. p := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  2149. Result := TElementDecl(p^.Data);
  2150. if Result = nil then
  2151. begin
  2152. Result := TElementDecl.Create;
  2153. p^.Data := Result;
  2154. end;
  2155. end;
  2156. procedure TXMLTextReader.ExpectChoiceOrSeq(CP: TContentParticle; MustEndIn: TObject); // [49], [50]
  2157. var
  2158. Delim: WideChar;
  2159. CurrentCP: TContentParticle;
  2160. begin
  2161. Delim := #0;
  2162. repeat
  2163. CurrentCP := CP.Add;
  2164. SkipWhitespace;
  2165. if CheckForChar('(') then
  2166. ExpectChoiceOrSeq(CurrentCP, FSource.FEntity)
  2167. else
  2168. CurrentCP.Def := FindOrCreateElDef;
  2169. CurrentCP.CPQuant := ParseQuantity;
  2170. SkipWhitespace;
  2171. if FSource.FBuf^ = ')' then
  2172. Break;
  2173. if Delim = #0 then
  2174. begin
  2175. if (FSource.FBuf^ = '|') or (FSource.FBuf^ = ',') then
  2176. Delim := FSource.FBuf^
  2177. else
  2178. FatalError('Expected pipe or comma delimiter');
  2179. end
  2180. else
  2181. if FSource.FBuf^ <> Delim then
  2182. FatalError(Delim);
  2183. FSource.NextChar; // skip delimiter
  2184. until False;
  2185. CheckPENesting(MustEndIn);
  2186. FSource.NextChar;
  2187. if Delim = '|' then
  2188. CP.CPType := ctChoice
  2189. else
  2190. CP.CPType := ctSeq; // '(foo)' is a sequence!
  2191. end;
  2192. procedure TXMLTextReader.ParseElementDecl; // [45]
  2193. var
  2194. ElDef: TElementDecl;
  2195. CurrentEntity: TObject;
  2196. I: Integer;
  2197. CP: TContentParticle;
  2198. Typ: TElementContentType;
  2199. ExtDecl: Boolean;
  2200. begin
  2201. CP := nil;
  2202. Typ := ctUndeclared; // satisfy compiler
  2203. ExpectWhitespace;
  2204. ElDef := FindOrCreateElDef;
  2205. if ElDef.ContentType <> ctUndeclared then
  2206. ValidationErrorWithName('Duplicate declaration of element ''%s''', FName.Length);
  2207. ExtDecl := FSource.Kind <> skInternalSubset;
  2208. ExpectWhitespace;
  2209. if FSource.Matches('EMPTY') then
  2210. Typ := ctEmpty
  2211. else if FSource.Matches('ANY') then
  2212. Typ := ctAny
  2213. else if CheckForChar('(') then
  2214. begin
  2215. CP := TContentParticle.Create;
  2216. try
  2217. CurrentEntity := FSource.FEntity;
  2218. SkipWhitespace;
  2219. if FSource.Matches('#PCDATA') then // Mixed section [51]
  2220. begin
  2221. SkipWhitespace;
  2222. Typ := ctMixed;
  2223. while FSource.FBuf^ <> ')' do
  2224. begin
  2225. ExpectChar('|');
  2226. SkipWhitespace;
  2227. with CP.Add do
  2228. begin
  2229. Def := FindOrCreateElDef;
  2230. for I := CP.ChildCount-2 downto 0 do
  2231. if Def = CP.Children[I].Def then
  2232. ValidationError('Duplicate token in mixed section', [], FName.Length);
  2233. end;
  2234. SkipWhitespace;
  2235. end;
  2236. CheckPENesting(CurrentEntity);
  2237. FSource.NextChar;
  2238. if (not CheckForChar('*')) and (CP.ChildCount > 0) then
  2239. FatalError(WideChar('*'));
  2240. CP.CPQuant := cqZeroOrMore;
  2241. CP.CPType := ctChoice;
  2242. end
  2243. else // Children section [47]
  2244. begin
  2245. Typ := ctChildren;
  2246. ExpectChoiceOrSeq(CP, CurrentEntity);
  2247. CP.CPQuant := ParseQuantity;
  2248. end;
  2249. except
  2250. CP.Free;
  2251. raise;
  2252. end;
  2253. end
  2254. else
  2255. FatalError('Invalid content specification');
  2256. // SAX: DeclHandler.ElementDecl(name, model);
  2257. if FDTDProcessed and (ElDef.ContentType = ctUndeclared) then
  2258. begin
  2259. ElDef.ExternallyDeclared := ExtDecl;
  2260. ElDef.ContentType := Typ;
  2261. ElDef.RootCP := CP;
  2262. end
  2263. else
  2264. CP.Free;
  2265. end;
  2266. procedure TXMLTextReader.ParseNotationDecl; // [82]
  2267. var
  2268. NameStr, SysID, PubID: XMLString;
  2269. Notation: TNotationDecl;
  2270. Entry: PHashItem;
  2271. Src: TXMLCharSource;
  2272. begin
  2273. Src := FSource;
  2274. ExpectWhitespace;
  2275. CheckName;
  2276. CheckNCName;
  2277. SetString(NameStr, FName.Buffer, FName.Length);
  2278. ExpectWhitespace;
  2279. if not ParseExternalID(SysID, PubID, True) then
  2280. FatalError('Expected external or public ID');
  2281. if FDTDProcessed then
  2282. begin
  2283. Entry := FDocType.Notations.FindOrAdd(NameStr);
  2284. if Entry^.Data = nil then
  2285. begin
  2286. Notation := TNotationDecl.Create;
  2287. Notation.FName := NameStr;
  2288. Notation.FPublicID := PubID;
  2289. Notation.FSystemID := SysID;
  2290. Notation.FURI := Src.SourceURI;
  2291. Entry^.Data := Notation;
  2292. end
  2293. else
  2294. ValidationError('Duplicate notation declaration: ''%s''', [NameStr]);
  2295. end;
  2296. end;
  2297. const
  2298. AttrDataTypeNames: array[TAttrDataType] of XMLString = (
  2299. 'CDATA',
  2300. 'ID',
  2301. 'IDREF',
  2302. 'IDREFS',
  2303. 'ENTITY',
  2304. 'ENTITIES',
  2305. 'NMTOKEN',
  2306. 'NMTOKENS',
  2307. 'NOTATION'
  2308. );
  2309. procedure TXMLTextReader.ParseAttlistDecl; // [52]
  2310. var
  2311. ElDef: TElementDecl;
  2312. AttDef: TAttributeDef;
  2313. dt: TAttrDataType;
  2314. Found, DiscardIt: Boolean;
  2315. Offsets: array [Boolean] of Integer;
  2316. attrName: PHashItem;
  2317. begin
  2318. ExpectWhitespace;
  2319. ElDef := FindOrCreateElDef;
  2320. SkipWhitespace;
  2321. while FSource.FBuf^ <> '>' do
  2322. begin
  2323. CheckName;
  2324. ExpectWhitespace;
  2325. attrName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  2326. AttDef := TAttributeDef.Create(attrName, FColonPos);
  2327. try
  2328. AttDef.ExternallyDeclared := FSource.Kind <> skInternalSubset;
  2329. // In case of duplicate declaration of the same attribute, we must discard it,
  2330. // not modifying ElDef, and suppressing certain validation errors.
  2331. DiscardIt := (not FDTDProcessed) or Assigned(ElDef.GetAttrDef(attrName));
  2332. if CheckForChar('(') then // [59]
  2333. begin
  2334. AttDef.DataType := dtNmToken;
  2335. repeat
  2336. SkipWhitespace;
  2337. CheckName([cnToken]);
  2338. if not AttDef.AddEnumToken(FName.Buffer, FName.Length) then
  2339. ValidationError('Duplicate token in enumerated attibute declaration', [], FName.Length);
  2340. SkipWhitespace;
  2341. until not CheckForChar('|');
  2342. ExpectChar(')');
  2343. ExpectWhitespace;
  2344. end
  2345. else
  2346. begin
  2347. StoreLocation(FTokenStart);
  2348. // search topside-up so that e.g. NMTOKENS is matched before NMTOKEN
  2349. for dt := dtNotation downto dtCData do
  2350. begin
  2351. Found := FSource.Matches(AttrDataTypeNames[dt]);
  2352. if Found then
  2353. Break;
  2354. end;
  2355. if Found and SkipWhitespace then
  2356. begin
  2357. AttDef.DataType := dt;
  2358. if (dt = dtId) and not DiscardIt then
  2359. begin
  2360. if Assigned(ElDef.IDAttr) then
  2361. ValidationError('Only one attribute of type ID is allowed per element',[])
  2362. else
  2363. ElDef.IDAttr := AttDef;
  2364. end
  2365. else if dt = dtNotation then // no test cases for these ?!
  2366. begin
  2367. if not DiscardIt then
  2368. begin
  2369. if Assigned(ElDef.NotationAttr) then
  2370. ValidationError('Only one attribute of type NOTATION is allowed per element',[])
  2371. else
  2372. ElDef.NotationAttr := AttDef;
  2373. if ElDef.ContentType = ctEmpty then
  2374. ValidationError('NOTATION attributes are not allowed on EMPTY elements',[]);
  2375. end;
  2376. ExpectChar('(');
  2377. repeat
  2378. SkipWhitespace;
  2379. StoreLocation(FTokenStart);
  2380. CheckName;
  2381. CheckNCName;
  2382. if not AttDef.AddEnumToken(FName.Buffer, FName.Length) then
  2383. ValidationError('Duplicate token in NOTATION attribute declaration',[], FName.Length);
  2384. if (not DiscardIt) and FValidate and
  2385. (FDocType.Notations.Get(FName.Buffer,FName.Length)=nil) then
  2386. AddForwardRef(FName.Buffer, FName.Length);
  2387. SkipWhitespace;
  2388. until not CheckForChar('|');
  2389. ExpectChar(')');
  2390. ExpectWhitespace;
  2391. end;
  2392. end
  2393. else
  2394. begin
  2395. // don't report 'expected whitespace' if token does not match completely
  2396. Offsets[False] := 0;
  2397. Offsets[True] := Length(AttrDataTypeNames[dt]);
  2398. if Found and (FSource.FBuf^ < 'A') then
  2399. ExpectWhitespace
  2400. else
  2401. FatalError('Illegal attribute type for ''%s''', [attrName^.Key], Offsets[Found]);
  2402. end;
  2403. end;
  2404. StoreLocation(FTokenStart);
  2405. if FSource.Matches('#REQUIRED') then
  2406. AttDef.Default := adRequired
  2407. else if FSource.Matches('#IMPLIED') then
  2408. AttDef.Default := adImplied
  2409. else if FSource.Matches('#FIXED') then
  2410. begin
  2411. AttDef.Default := adFixed;
  2412. ExpectWhitespace;
  2413. end
  2414. else
  2415. AttDef.Default := adDefault;
  2416. if AttDef.Default in [adDefault, adFixed] then
  2417. begin
  2418. if AttDef.DataType = dtId then
  2419. ValidationError('An attribute of type ID cannot have a default value',[]);
  2420. // See comments to valid-sa-094: PE expansion should be disabled in AttDef.
  2421. ExpectAttValue(AttDef.Data, dt <> dtCDATA);
  2422. if not AttDef.ValidateSyntax(AttDef.Data^.FValueStr, FNamespaces) then
  2423. ValidationError('Default value for attribute ''%s'' has wrong syntax', [attrName^.Key]);
  2424. end;
  2425. // SAX: DeclHandler.AttributeDecl(...)
  2426. if DiscardIt then
  2427. AttDef.Free
  2428. else
  2429. ElDef.AddAttrDef(AttDef);
  2430. except
  2431. AttDef.Free;
  2432. raise;
  2433. end;
  2434. SkipWhitespace;
  2435. end;
  2436. end;
  2437. procedure TXMLTextReader.ParseEntityDecl; // [70]
  2438. var
  2439. IsPE, Exists: Boolean;
  2440. Entity: TEntityDecl;
  2441. Map: THashTable;
  2442. Item: PHashItem;
  2443. Src: TXMLCharSource;
  2444. begin
  2445. Src := FSource;
  2446. if not SkipWhitespace(True) then
  2447. FatalError('Expected whitespace');
  2448. IsPE := CheckForChar('%');
  2449. if IsPE then // [72]
  2450. begin
  2451. ExpectWhitespace;
  2452. if FPEMap = nil then
  2453. FPEMap := THashTable.Create(64, True);
  2454. Map := FPEMap;
  2455. end
  2456. else
  2457. Map := FDocType.Entities;
  2458. Entity := TEntityDecl.Create;
  2459. try
  2460. Entity.ExternallyDeclared := FSource.Kind <> skInternalSubset;
  2461. Entity.FIsPE := IsPE;
  2462. CheckName;
  2463. CheckNCName;
  2464. Item := Map.FindOrAdd(FName.Buffer, FName.Length, Exists);
  2465. ExpectWhitespace;
  2466. // remember where the entity is declared, use URI from the point where declaration
  2467. // was starting.
  2468. Entity.FURI := Src.SourceURI;
  2469. if FEntityValue.Buffer = nil then
  2470. BufAllocate(FEntityValue, 256);
  2471. if ParseLiteral(FEntityValue, ltEntity, False) then
  2472. begin
  2473. SetString(Entity.FReplacementText, FEntityValue.Buffer, FEntityValue.Length);
  2474. Entity.FCharCount := FEntityValue.Length;
  2475. Entity.FStartLocation := FTokenStart;
  2476. end
  2477. else
  2478. begin
  2479. if not ParseExternalID(Entity.FSystemID, Entity.FPublicID, False) then
  2480. FatalError('Expected entity value or external ID');
  2481. if not IsPE then // [76]
  2482. begin
  2483. if FSource.FBuf^ <> '>' then
  2484. ExpectWhitespace;
  2485. if FSource.Matches('NDATA') then
  2486. begin
  2487. ExpectWhitespace;
  2488. StoreLocation(FTokenStart); { needed for AddForwardRef }
  2489. CheckName;
  2490. SetString(Entity.FNotationName, FName.Buffer, FName.Length);
  2491. if FValidate and (FDocType.Notations.Get(FName.Buffer, FName.Length)=nil) then
  2492. AddForwardRef(FName.Buffer, FName.Length);
  2493. // SAX: DTDHandler.UnparsedEntityDecl(...);
  2494. end;
  2495. end;
  2496. end;
  2497. except
  2498. Entity.Free;
  2499. raise;
  2500. end;
  2501. // Repeated declarations of same entity are legal but must be ignored
  2502. if FDTDProcessed and not Exists then
  2503. begin
  2504. Item^.Data := Entity;
  2505. Entity.FName := Item^.Key;
  2506. end
  2507. else
  2508. Entity.Free;
  2509. end;
  2510. procedure TXMLTextReader.ParseIgnoreSection;
  2511. var
  2512. IgnoreLoc: TLocation;
  2513. IgnoreLevel: Integer;
  2514. wc: WideChar;
  2515. begin
  2516. StoreLocation(IgnoreLoc);
  2517. IgnoreLevel := 1;
  2518. repeat
  2519. FValue.Length := 0;
  2520. wc := FSource.SkipUntil(FValue, [#0, '<', ']']);
  2521. if FSource.Matches('<![') then
  2522. Inc(IgnoreLevel)
  2523. else if FSource.Matches(']]>') then
  2524. Dec(IgnoreLevel)
  2525. else if wc <> #0 then
  2526. FSource.NextChar
  2527. else // PE's aren't recognized in ignore section, cannot ContextPop()
  2528. DoErrorPos(esFatal, 'IGNORE section is not closed', IgnoreLoc);
  2529. until IgnoreLevel=0;
  2530. end;
  2531. procedure TXMLTextReader.ParseMarkupDecl; // [29]
  2532. var
  2533. IncludeLevel: Integer;
  2534. CurrentEntity: TObject;
  2535. IncludeLoc: TLocation;
  2536. CondType: (ctUnknown, ctInclude, ctIgnore);
  2537. begin
  2538. IncludeLevel := 0;
  2539. repeat
  2540. SkipWhitespace;
  2541. if (FSource.FBuf^ = ']') and (IncludeLevel > 0) then
  2542. begin
  2543. ExpectString(']]>');
  2544. Dec(IncludeLevel);
  2545. Continue;
  2546. end;
  2547. if not CheckForChar('<') then
  2548. Break;
  2549. CurrentEntity := FSource.FEntity;
  2550. if FSource.FBuf^ = '?' then
  2551. begin
  2552. ParsePI;
  2553. end
  2554. else
  2555. begin
  2556. ExpectChar('!');
  2557. if FSource.FBuf^ = '-' then
  2558. ParseComment(True)
  2559. else if CheckForChar('[') then
  2560. begin
  2561. if FSource.Kind = skInternalSubset then
  2562. FatalError('Conditional sections are not allowed in internal subset', 1);
  2563. SkipWhitespace;
  2564. CondType := ctUnknown; // satisfy compiler
  2565. if FSource.Matches('INCLUDE') then
  2566. CondType := ctInclude
  2567. else if FSource.Matches('IGNORE') then
  2568. CondType := ctIgnore
  2569. else
  2570. FatalError('Expected "INCLUDE" or "IGNORE"');
  2571. SkipWhitespace;
  2572. CheckPENesting(CurrentEntity);
  2573. ExpectChar('[');
  2574. if CondType = ctInclude then
  2575. begin
  2576. if IncludeLevel = 0 then
  2577. StoreLocation(IncludeLoc);
  2578. Inc(IncludeLevel);
  2579. end
  2580. else if CondType = ctIgnore then
  2581. ParseIgnoreSection;
  2582. end
  2583. else
  2584. begin
  2585. FInsideDecl := True;
  2586. if FSource.Matches('ELEMENT') then
  2587. ParseElementDecl
  2588. else if FSource.Matches('ENTITY') then
  2589. ParseEntityDecl
  2590. else if FSource.Matches('ATTLIST') then
  2591. ParseAttlistDecl
  2592. else if FSource.Matches('NOTATION') then
  2593. ParseNotationDecl
  2594. else
  2595. FatalError('Illegal markup declaration');
  2596. SkipWhitespace;
  2597. CheckPENesting(CurrentEntity);
  2598. ExpectChar('>');
  2599. FInsideDecl := False;
  2600. end;
  2601. end;
  2602. until False;
  2603. if IncludeLevel > 0 then
  2604. DoErrorPos(esFatal, 'INCLUDE section is not closed', IncludeLoc);
  2605. if (FSource.Kind = skInternalSubset) and (FSource.FBuf^ = ']') then
  2606. Exit;
  2607. if FSource.FBuf^ <> #0 then
  2608. FatalError('Illegal character in DTD');
  2609. end;
  2610. procedure TLoader.ProcessDTD(ADoc: TDOMDocument; AReader: TXMLTextReader);
  2611. begin
  2612. AReader.FDocType := TDTDModel.Create(AReader.FNameTable);
  2613. // TODO: DTD labeled version 1.1 will be rejected - must set FXML11 flag
  2614. doc.AppendChild(TDOMDocumentType.Create(doc, AReader.FDocType));
  2615. AReader.FSource.Initialize;
  2616. AReader.ParseMarkupDecl;
  2617. end;
  2618. procedure TXMLTextReader.Close;
  2619. begin
  2620. FReadState := rsClosed;
  2621. FTokenStart.Line := 0;
  2622. FTokenStart.LinePos := 0;
  2623. end;
  2624. function TXMLTextReader.GetAttributeCount: Integer;
  2625. begin
  2626. result := FAttrCount;
  2627. end;
  2628. function TXMLTextReader.GetAttribute(i: Integer): XMLString;
  2629. begin
  2630. if (i < 0) or (i >= FAttrCount) then
  2631. raise EArgumentOutOfRangeException.Create('index');
  2632. result := FNodeStack[FNesting+i+1].FValueStr;
  2633. end;
  2634. function TXMLTextReader.GetAttribute(const AName: XMLString): XMLString;
  2635. var
  2636. i: Integer;
  2637. p: PHashItem;
  2638. begin
  2639. p := FNameTable.Find(PWideChar(AName), Length(AName));
  2640. if Assigned(p) then
  2641. for i := 1 to FAttrCount do
  2642. if FNodeStack[FNesting+i].FQName = p then
  2643. begin
  2644. result := FNodeStack[FNesting+i].FValueStr;
  2645. Exit;
  2646. end;
  2647. result := '';
  2648. end;
  2649. function TXMLTextReader.GetAttribute(const aLocalName, nsuri: XMLString): XMLString;
  2650. var
  2651. i: Integer;
  2652. p: PWideChar;
  2653. p1: PHashItem;
  2654. node: PNodeData;
  2655. begin
  2656. p1 := FNameTable.Find(PWideChar(nsuri), Length(nsuri));
  2657. if Assigned(p1) then
  2658. for i := 1 to FAttrCount do
  2659. begin
  2660. node := @FNodeStack[FNesting+i];
  2661. if node^.FNsUri = p1 then
  2662. begin
  2663. P := PWideChar(node^.FQName^.Key);
  2664. if node^.FColonPos > 0 then
  2665. Inc(P, node^.FColonPos+1);
  2666. if (Length(node^.FQName^.Key)-node^.FColonPos-1 = Length(aLocalName)) and
  2667. CompareMem(P, PWideChar(aLocalName), Length(aLocalName)*sizeof(WideChar)) then
  2668. begin
  2669. result := node^.FValueStr;
  2670. Exit;
  2671. end;
  2672. end;
  2673. end;
  2674. result := '';
  2675. end;
  2676. function TXMLTextReader.GetDepth: Integer;
  2677. begin
  2678. result := FNesting;
  2679. if FCurrAttrIndex >= 0 then
  2680. Inc(result);
  2681. if FAttrReadState <> arsNone then
  2682. Inc(result);
  2683. end;
  2684. function TXMLTextReader.GetNodeType: TXmlNodeType;
  2685. begin
  2686. result := FCurrNode^.FNodeType;
  2687. end;
  2688. function TXMLTextReader.GetName: XMLString;
  2689. begin
  2690. if Assigned(FCurrNode^.FQName) then
  2691. result := FCurrNode^.FQName^.Key
  2692. else
  2693. result := '';
  2694. end;
  2695. function TXMLTextReader.GetIsDefault: Boolean;
  2696. begin
  2697. result := FCurrNode^.FIsDefault;
  2698. end;
  2699. function TXMLTextReader.GetBaseUri: XMLString;
  2700. begin
  2701. result := FSource.SourceURI;
  2702. end;
  2703. { IXmlLineInfo methods }
  2704. function TXMLTextReader.GetHasLineInfo: Boolean;
  2705. begin
  2706. result := True;
  2707. end;
  2708. function TXMLTextReader.GetLineNumber: Integer;
  2709. begin
  2710. if (FCurrNode^.FNodeType in [ntElement,ntAttribute,ntEntityReference,ntEndEntity]) or (FAttrReadState <> arsNone) then
  2711. result := FCurrNode^.FLoc.Line
  2712. else
  2713. result := FTokenStart.Line;
  2714. end;
  2715. function TXMLTextReader.GetLinePosition: Integer;
  2716. begin
  2717. if (FCurrNode^.FNodeType in [ntElement,ntAttribute,ntEntityReference,ntEndEntity]) or (FAttrReadState <> arsNone) then
  2718. result := FCurrNode^.FLoc.LinePos
  2719. else
  2720. result := FTokenStart.LinePos;
  2721. end;
  2722. function TXMLTextReader.LookupNamespace(const APrefix: XMLString): XMLString;
  2723. begin
  2724. if Assigned(FNSHelper) then
  2725. result := FNSHelper.LookupNamespace(APrefix)
  2726. else
  2727. result := '';
  2728. end;
  2729. function TXMLTextReader.MoveToFirstAttribute: Boolean;
  2730. begin
  2731. result := False;
  2732. if FAttrCount = 0 then
  2733. exit;
  2734. FCurrAttrIndex := 0;
  2735. if FAttrReadState <> arsNone then
  2736. CleanAttrReadState;
  2737. FCurrNode := @FNodeStack[FNesting+1];
  2738. result := True;
  2739. end;
  2740. function TXMLTextReader.MoveToNextAttribute: Boolean;
  2741. begin
  2742. result := False;
  2743. if FCurrAttrIndex+1 >= FAttrCount then
  2744. exit;
  2745. Inc(FCurrAttrIndex);
  2746. if FAttrReadState <> arsNone then
  2747. CleanAttrReadState;
  2748. FCurrNode := @FNodeStack[FNesting+1+FCurrAttrIndex];
  2749. result := True;
  2750. end;
  2751. function TXMLTextReader.MoveToElement: Boolean;
  2752. begin
  2753. result := False;
  2754. if FAttrReadState <> arsNone then
  2755. CleanAttrReadState
  2756. else if FCurrNode^.FNodeType <> ntAttribute then
  2757. exit;
  2758. FCurrNode := @FNodeStack[FNesting];
  2759. FCurrAttrIndex := -1;
  2760. result := True;
  2761. end;
  2762. function TXMLTextReader.ReadAttributeValue: Boolean;
  2763. var
  2764. attrNode: PNodeData;
  2765. begin
  2766. Result := False;
  2767. if FAttrReadState = arsNone then
  2768. begin
  2769. if (FReadState <> rsInteractive) or (FCurrAttrIndex < 0) then
  2770. Exit;
  2771. attrNode := @FNodeStack[FNesting+FCurrAttrIndex+1];
  2772. if attrNode^.FNext = nil then
  2773. begin
  2774. if attrNode^.FValueStr = '' then
  2775. Exit; { we don't want to expose empty textnodes }
  2776. FCurrNode := AllocNodeData(FNesting+FAttrCount+1);
  2777. FCurrNode^.FNodeType := ntText;
  2778. FCurrNode^.FValueStr := attrNode^.FValueStr;
  2779. FCurrNode^.FLoc := attrNode^.FLoc2;
  2780. end
  2781. else
  2782. FCurrNode := attrNode^.FNext;
  2783. FAttrReadState := arsText;
  2784. FAttrBaseSource := FSource;
  2785. Result := True;
  2786. end
  2787. else // already reading, advance to next chunk
  2788. begin
  2789. if FSource = FAttrBaseSource then
  2790. begin
  2791. Result := Assigned(FCurrNode^.FNext);
  2792. if Result then
  2793. FCurrNode := FCurrNode^.FNext;
  2794. end
  2795. else
  2796. begin
  2797. NextAttrValueChunk;
  2798. Result := True;
  2799. end;
  2800. end;
  2801. end;
  2802. procedure TXMLTextReader.NextAttrValueChunk;
  2803. var
  2804. wc: WideChar;
  2805. tok: TAttributeReadState;
  2806. begin
  2807. if FAttrReadState = arsPushEntity then
  2808. begin
  2809. Inc(FNesting);
  2810. { make sure that the location is available }
  2811. AllocNodeData(FNesting+FAttrCount+1);
  2812. FAttrReadState := arsText;
  2813. end;
  2814. FCurrNode := @FNodeStack[FNesting+FAttrCount+1];
  2815. StoreLocation(FCurrNode^.FLoc);
  2816. FValue.Length := 0;
  2817. if FAttrReadState = arsText then
  2818. repeat
  2819. wc := FSource.SkipUntil(FValue, [#0, '&', #9, #10, #13]);
  2820. if wc = '&' then
  2821. begin
  2822. if ParseRef(FValue) or ResolvePredefined then
  2823. Continue;
  2824. tok := arsEntity;
  2825. end
  2826. else if wc <> #0 then { #9,#10,#13 -> replace by #32 }
  2827. begin
  2828. FSource.NextChar;
  2829. BufAppend(FValue, #32);
  2830. Continue;
  2831. end
  2832. else // #0
  2833. tok := arsEntityEnd;
  2834. if FValue.Length <> 0 then
  2835. begin
  2836. FCurrNode^.FNodeType := ntText;
  2837. FCurrNode^.FQName := nil;
  2838. SetString(FCurrNode^.FValueStr, FValue.Buffer, FValue.Length);
  2839. FAttrReadState := tok;
  2840. Exit;
  2841. end;
  2842. Break;
  2843. until False
  2844. else
  2845. tok := FAttrReadState;
  2846. if tok = arsEntity then
  2847. begin
  2848. HandleEntityStart;
  2849. FAttrReadState := arsText;
  2850. end
  2851. else if tok = arsEntityEnd then
  2852. begin
  2853. HandleEntityEnd;
  2854. FAttrReadState := arsText;
  2855. end;
  2856. end;
  2857. procedure TXMLTextReader.CleanAttrReadState;
  2858. begin
  2859. while FSource <> FAttrBaseSource do
  2860. ContextPop(True);
  2861. FAttrReadState := arsNone;
  2862. end;
  2863. function TXMLTextReader.GetHasValue: Boolean;
  2864. begin
  2865. result := FCurrNode^.FNodeType in [ntAttribute,ntText,ntCDATA,
  2866. ntProcessingInstruction,ntComment,ntWhitespace,ntSignificantWhitespace,
  2867. ntDocumentType];
  2868. end;
  2869. function TXMLTextReader.GetValue: XMLString;
  2870. begin
  2871. if (FCurrAttrIndex>=0) or (FAttrReadState <> arsNone) then
  2872. result := FCurrNode^.FValueStr
  2873. else
  2874. SetString(result, FCurrNode^.FValueStart, FCurrNode^.FValueLength);
  2875. end;
  2876. function TXMLTextReader.GetPrefix: XMLString;
  2877. begin
  2878. if Assigned(FCurrNode^.FPrefix) then
  2879. result := FCurrNode^.FPrefix^.Key
  2880. else
  2881. result := '';
  2882. end;
  2883. function TXMLTextReader.GetLocalName: XMLString;
  2884. begin
  2885. if FNamespaces and Assigned(FCurrNode^.FQName) then
  2886. if FColonPos < 0 then
  2887. Result := FCurrNode^.FQName^.Key
  2888. else
  2889. Result := Copy(FCurrNode^.FQName^.Key, FCurrNode^.FColonPos+2, MaxInt)
  2890. else
  2891. Result := '';
  2892. end;
  2893. function TXMLTextReader.GetNamespaceUri: XMLString;
  2894. begin
  2895. if Assigned(FCurrNode^.FNSURI) then
  2896. result := FCurrNode^.FNSURI^.Key
  2897. else
  2898. result := '';
  2899. end;
  2900. procedure TXMLTextReader.SetEOFState;
  2901. begin
  2902. FCurrNode := @FNodeStack[0];
  2903. Finalize(FCurrNode^);
  2904. FillChar(FCurrNode^, sizeof(TNodeData), 0);
  2905. FReadState := rsEndOfFile;
  2906. end;
  2907. procedure TXMLTextReader.ValidateCurrentNode;
  2908. var
  2909. ElDef: TElementDecl;
  2910. AttDef: TAttributeDef;
  2911. attr: PNodeData;
  2912. i: Integer;
  2913. begin
  2914. case FCurrNode^.FNodeType of
  2915. ntElement:
  2916. begin
  2917. if (FNesting = 0) and (not FFragmentMode) then
  2918. begin
  2919. if Assigned(FDocType) then
  2920. begin
  2921. if FDocType.FName <> FCurrNode^.FQName^.Key then
  2922. DoErrorPos(esError, 'Root element name does not match DTD', FCurrNode^.FLoc);
  2923. end
  2924. else
  2925. DoErrorPos(esError, 'Missing DTD', FCurrNode^.FLoc);
  2926. end;
  2927. ElDef := TElementDecl(FCurrNode^.FQName^.Data);
  2928. if (ElDef = nil) or (ElDef.ContentType = ctUndeclared) then
  2929. DoErrorPos(esError, 'Using undeclared element ''%s''',[FCurrNode^.FQName^.Key], FCurrNode^.FLoc);
  2930. if not FValidators[FValidatorNesting].IsElementAllowed(ElDef) then
  2931. DoErrorPos(esError, 'Element ''%s'' is not allowed in this context',[FCurrNode^.FQName^.Key], FCurrNode^.FLoc);
  2932. PushVC(ElDef);
  2933. if ElDef = nil then
  2934. Exit;
  2935. { Validate attributes }
  2936. for i := 1 to FAttrCount do
  2937. begin
  2938. attr := @FNodeStack[FNesting+i];
  2939. AttDef := TAttributeDef(attr^.FTypeInfo);
  2940. if AttDef = nil then
  2941. DoErrorPos(esError, 'Using undeclared attribute ''%s'' on element ''%s''',
  2942. [attr^.FQName^.Key, FCurrNode^.FQName^.Key], attr^.FLoc)
  2943. else if ((AttDef.DataType <> dtCdata) or (AttDef.Default = adFixed)) then
  2944. begin
  2945. if FStandalone and AttDef.ExternallyDeclared then
  2946. if attr^.FDenormalized then
  2947. DoErrorPos(esError, 'In a standalone document, externally defined attribute cannot cause value normalization', attr^.FLoc2)
  2948. else if i > FSpecifiedAttrs then
  2949. DoError(esError, 'In a standalone document, attribute cannot have a default value defined externally');
  2950. // TODO: what about normalization of AttDef.Value? (Currently it IS normalized)
  2951. if (AttDef.Default = adFixed) and (AttDef.Data^.FValueStr <> attr^.FValueStr) then
  2952. DoErrorPos(esError, 'Value of attribute ''%s'' does not match its #FIXED default',[attr^.FQName^.Key], attr^.FLoc2);
  2953. if not AttDef.ValidateSyntax(attr^.FValueStr, FNamespaces) then
  2954. DoErrorPos(esError, 'Attribute ''%s'' type mismatch', [attr^.FQName^.Key], attr^.FLoc2);
  2955. ValidateAttrValue(AttDef, attr);
  2956. end;
  2957. end;
  2958. { Check presence of #REQUIRED attributes }
  2959. if ElDef.HasRequiredAtts then
  2960. for i := 0 to ElDef.AttrDefCount-1 do
  2961. begin
  2962. if FAttrDefIndex[i] = FAttrTag then
  2963. Continue;
  2964. AttDef := ElDef.AttrDefs[i];
  2965. if AttDef.Default = adRequired then
  2966. ValidationError('Required attribute ''%s'' of element ''%s'' is missing',
  2967. [AttDef.Data^.FQName^.Key, FCurrNode^.FQName^.Key], 0)
  2968. end;
  2969. end;
  2970. ntEndElement:
  2971. begin
  2972. if FValidators[FValidatorNesting].Incomplete then
  2973. ValidationError('Element ''%s'' is missing required sub-elements', [FCurrNode^.FQName^.Key], -1);
  2974. if FValidatorNesting > 0 then
  2975. Dec(FValidatorNesting);
  2976. end;
  2977. ntText, ntSignificantWhitespace:
  2978. case FValidators[FValidatorNesting].FContentType of
  2979. ctChildren:
  2980. if FCurrNode^.FNodeType = ntText then
  2981. ValidationError('Character data is not allowed in element-only content',[])
  2982. else
  2983. begin
  2984. if FValidators[FValidatorNesting].FSaViolation then
  2985. ValidationError('Standalone constraint violation',[]);
  2986. FCurrNode^.FNodeType := ntWhitespace;
  2987. end;
  2988. ctEmpty:
  2989. ValidationError('Character data is not allowed in EMPTY elements', []);
  2990. end;
  2991. ntCDATA:
  2992. if FValidators[FValidatorNesting].FContentType = ctChildren then
  2993. ValidationError('CDATA sections are not allowed in element-only content',[]);
  2994. ntProcessingInstruction:
  2995. if FValidators[FValidatorNesting].FContentType = ctEmpty then
  2996. ValidationError('Processing instructions are not allowed within EMPTY elements', []);
  2997. ntComment:
  2998. if FValidators[FValidatorNesting].FContentType = ctEmpty then
  2999. ValidationError('Comments are not allowed within EMPTY elements', []);
  3000. ntDocumentType:
  3001. ValidateDTD;
  3002. end;
  3003. end;
  3004. procedure TXMLTextReader.HandleEntityStart;
  3005. begin
  3006. FCurrNode := @FNodeStack[FNesting+(FAttrCount+1)*ord(FAttrReadState<>arsNone)];
  3007. FCurrNode^.FNodeType := ntEntityReference;
  3008. FCurrNode^.FQName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  3009. FCurrNode^.FColonPos := -1;
  3010. FCurrNode^.FValueStart := nil;
  3011. FCurrNode^.FValueLength := 0;
  3012. FCurrNode^.FValueStr := '';
  3013. StoreLocation(FCurrNode^.FLoc);
  3014. { point past '&' to first char of entity name }
  3015. Dec(FCurrNode^.FLoc.LinePos, FName.Length+1);
  3016. end;
  3017. procedure TXMLTextReader.HandleEntityEnd;
  3018. begin
  3019. ContextPop(True);
  3020. if FNesting > 0 then Dec(FNesting);
  3021. FCurrNode := @FNodeStack[FNesting+(FAttrCount+1)*ord(FAttrReadState<>arsNone)];
  3022. FCurrNode^.FNodeType := ntEndEntity;
  3023. { point to trailing ';' }
  3024. Inc(FCurrNode^.FLoc.LinePos, Length(FCurrNode^.FQName^.Key));
  3025. end;
  3026. procedure TXMLTextReader.ResolveEntity;
  3027. var
  3028. n: PNodeData;
  3029. ent: TEntityDecl;
  3030. begin
  3031. if FCurrNode^.FNodeType <> ntEntityReference then
  3032. raise EInvalidOperation.Create('Wrong node type');
  3033. if FAttrReadState <> arsNone then
  3034. begin
  3035. { copy the EntityReference node to the stack if not already there }
  3036. n := AllocNodeData(FNesting+FAttrCount+1);
  3037. if n <> FCurrNode then
  3038. n^ := FCurrNode^;
  3039. ent := nil;
  3040. if Assigned(FDocType) then
  3041. ent := FDocType.Entities.Get(PWideChar(n^.FQName^.Key),Length(n^.FQName^.Key)) as TEntityDecl;
  3042. ContextPush(ent, True);
  3043. FAttrReadState := arsPushEntity;
  3044. end
  3045. else
  3046. FNext := xtPushEntity;
  3047. end;
  3048. procedure TXMLTextReader.DoStartEntity;
  3049. begin
  3050. Inc(FNesting);
  3051. FCurrNode := AllocNodeData(FNesting);
  3052. ContextPush(FCurrEntity, True);
  3053. FNext := xtText;
  3054. end;
  3055. // The code below does the bulk of the parsing, and must be as fast as possible.
  3056. // To minimize CPU cache effects, methods from different classes are kept together
  3057. function TXMLDecodingSource.SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
  3058. wsflag: PBoolean): WideChar;
  3059. var
  3060. old: PWideChar;
  3061. nonws: Boolean;
  3062. wc: WideChar;
  3063. begin
  3064. nonws := False;
  3065. repeat
  3066. old := FBuf;
  3067. repeat
  3068. wc := FBuf^;
  3069. if (wc = #10) or (wc = #13) or (FXML11Rules and ((wc = #$85) or
  3070. (wc = #$2028))) then
  3071. begin
  3072. // strictly this is needed only for 2-byte lineendings
  3073. BufAppendChunk(ToFill, old, FBuf);
  3074. NewLine;
  3075. old := FBuf;
  3076. wc := FBuf^
  3077. end
  3078. else if ((wc < #32) and (not ((wc = #0) and (FBuf >= FBufEnd))) and
  3079. (wc <> #9)) or (wc > #$FFFD) or
  3080. (FXML11Rules and (wc >= #$7F) and (wc <= #$9F)) then
  3081. FReader.FatalError('Invalid character');
  3082. if (wc < #255) and (Char(ord(wc)) in Delim) then
  3083. Break;
  3084. // the checks above filter away everything below #32 that isn't a whitespace
  3085. if wc > #32 then
  3086. nonws := True;
  3087. Inc(FBuf);
  3088. until False;
  3089. Result := wc;
  3090. BufAppendChunk(ToFill, old, FBuf);
  3091. until (Result <> #0) or (not Reload);
  3092. if Assigned(wsflag) then
  3093. wsflag^ := wsflag^ or nonws;
  3094. end;
  3095. const
  3096. TextDelims: array[Boolean] of TSetOfChar = (
  3097. [#0, '<', '&', '>'],
  3098. [#0, '>']
  3099. );
  3100. textNodeTypes: array[Boolean] of TXMLNodeType = (
  3101. ntSignificantWhitespace,
  3102. ntText
  3103. );
  3104. function TXMLTextReader.ReadTopLevel: Boolean;
  3105. var
  3106. tok: TXMLToken;
  3107. begin
  3108. if FNext = xtFakeLF then
  3109. begin
  3110. Result := SetupFakeLF(xtText);
  3111. Exit;
  3112. end;
  3113. StoreLocation(FTokenStart);
  3114. if FNext = xtText then
  3115. repeat
  3116. SkipS;
  3117. if FSource.FBuf^ = '<' then
  3118. begin
  3119. Inc(FSource.FBuf);
  3120. if FSource.FBufEnd < FSource.FBuf + 2 then
  3121. FSource.Reload;
  3122. if FSource.FBuf^ = '!' then
  3123. begin
  3124. Inc(FSource.FBuf);
  3125. if FSource.FBuf^ = '-' then
  3126. begin
  3127. if FIgnoreComments then
  3128. begin
  3129. ParseComment(True);
  3130. Continue;
  3131. end;
  3132. tok := xtComment;
  3133. end
  3134. else
  3135. tok := xtDoctype;
  3136. end
  3137. else if FSource.FBuf^ = '?' then
  3138. tok := xtPI
  3139. else
  3140. begin
  3141. CheckName;
  3142. tok := xtElement;
  3143. end;
  3144. end
  3145. else if FSource.FBuf^ = #0 then
  3146. begin
  3147. if FState < rsRoot then
  3148. FatalError('Root element is missing');
  3149. tok := xtEOF;
  3150. end
  3151. else
  3152. FatalError('Illegal at document level');
  3153. if FCanonical and (FState > rsRoot) and (tok <> xtEOF) then
  3154. begin
  3155. Result := SetupFakeLF(tok);
  3156. Exit;
  3157. end;
  3158. Break;
  3159. until False
  3160. else // FNext <> xtText
  3161. tok := FNext;
  3162. if FCanonical and (FState < rsRoot) and (tok <> xtDoctype) then
  3163. FNext := xtFakeLF
  3164. else
  3165. FNext := xtText;
  3166. case tok of
  3167. xtElement:
  3168. begin
  3169. if FState > rsRoot then
  3170. FatalError('Only one top-level element allowed', FName.Length)
  3171. else if FState < rsRoot then
  3172. begin
  3173. // dispose notation refs from DTD, if any
  3174. ClearForwardRefs;
  3175. FState := rsRoot;
  3176. end;
  3177. ParseStartTag;
  3178. end;
  3179. xtPI: ParsePI;
  3180. xtComment: ParseComment(False);
  3181. xtDoctype:
  3182. begin
  3183. ParseDoctypeDecl;
  3184. if FCanonical then
  3185. begin
  3186. // recurse, effectively ignoring the DTD
  3187. result := ReadTopLevel();
  3188. Exit;
  3189. end;
  3190. end;
  3191. xtEOF: SetEofState;
  3192. end;
  3193. Result := tok <> xtEOF;
  3194. end;
  3195. function TXMLTextReader.Read: Boolean;
  3196. var
  3197. nonWs: Boolean;
  3198. wc: WideChar;
  3199. InCDATA: Boolean;
  3200. tok: TXMLToken;
  3201. begin
  3202. if FReadState > rsInteractive then
  3203. begin
  3204. Result := False;
  3205. Exit;
  3206. end;
  3207. if FReadState = rsInitial then
  3208. begin
  3209. FReadState := rsInteractive;
  3210. FSource.Initialize;
  3211. FNext := xtText;
  3212. end;
  3213. if FAttrReadState <> arsNone then
  3214. CleanAttrReadState;
  3215. if FNext = xtPopEmptyElement then
  3216. begin
  3217. FNext := xtPopElement;
  3218. FCurrNode^.FNodeType := ntEndElement;
  3219. if FAttrCleanupFlag then
  3220. CleanupAttributes;
  3221. FAttrCount := 0;
  3222. FCurrAttrIndex := -1;
  3223. Result := True;
  3224. Exit;
  3225. end;
  3226. if FNext = xtPushElement then
  3227. begin
  3228. if FAttrCleanupFlag then
  3229. CleanupAttributes;
  3230. FAttrCount := 0;
  3231. Inc(FNesting);
  3232. FCurrAttrIndex := -1;
  3233. FNext := xtText;
  3234. end
  3235. else if FNext = xtPopElement then
  3236. PopElement
  3237. else if FNext = xtPushEntity then
  3238. DoStartEntity;
  3239. if FState <> rsRoot then
  3240. begin
  3241. Result := ReadTopLevel;
  3242. Exit;
  3243. end;
  3244. InCDATA := (FNext = xtCDSect);
  3245. StoreLocation(FTokenStart);
  3246. nonWs := False;
  3247. FValue.Length := 0;
  3248. if FNext in [xtCDSect, xtText] then
  3249. repeat
  3250. wc := FSource.SkipUntil(FValue, TextDelims[InCDATA], @nonWs);
  3251. if wc = '<' then
  3252. begin
  3253. Inc(FSource.FBuf);
  3254. if FSource.FBufEnd < FSource.FBuf + 2 then
  3255. FSource.Reload;
  3256. if FSource.FBuf^ = '/' then
  3257. tok := xtEndElement
  3258. else if CheckName([cnOptional]) then
  3259. tok := xtElement
  3260. else if FSource.FBuf^ = '!' then
  3261. begin
  3262. Inc(FSource.FBuf);
  3263. if FSource.FBuf^ = '[' then
  3264. begin
  3265. ExpectString('[CDATA[');
  3266. StoreLocation(FTokenStart);
  3267. InCDATA := True;
  3268. if FCDSectionsAsText or (FValue.Length = 0) then
  3269. Continue;
  3270. tok := xtCDSect;
  3271. end
  3272. else if FSource.FBuf^ = '-' then
  3273. begin
  3274. { Ignoring comments is tricky in validating mode; discarding a comment which
  3275. is the only child of an EMPTY element will make that element erroneously appear
  3276. as valid. Therefore, at this point we discard only comments which are preceded
  3277. by some text (since presence of text already renders an EMPTY element invalid).
  3278. Other comments should be reported to validation part and discarded there. }
  3279. if FIgnoreComments and (FValue.Length > 0) then
  3280. begin
  3281. ParseComment(True);
  3282. Continue;
  3283. end;
  3284. tok := xtComment;
  3285. end
  3286. else
  3287. tok := xtDoctype;
  3288. end
  3289. else if FSource.FBuf^ = '?' then
  3290. tok := xtPI
  3291. else
  3292. RaiseNameNotFound;
  3293. end
  3294. else if wc = #0 then
  3295. begin
  3296. if InCDATA then
  3297. FatalError('Unterminated CDATA section', -1);
  3298. if FNesting > FSource.FStartNesting then
  3299. FatalError('End-tag is missing for ''%s''', [FNodeStack[FNesting-1].FQName^.Key]);
  3300. if Assigned(FSource.FParent) then
  3301. begin
  3302. if FExpandEntities and ContextPop then
  3303. Continue
  3304. else
  3305. tok := xtEntityEnd;
  3306. end
  3307. else
  3308. tok := xtEOF;
  3309. end
  3310. else if wc = '>' then
  3311. begin
  3312. BufAppend(FValue, wc);
  3313. FSource.NextChar;
  3314. if (FValue.Length <= 2) or (FValue.Buffer[FValue.Length-2] <> ']') or
  3315. (FValue.Buffer[FValue.Length-3] <> ']') then Continue;
  3316. if InCData then // got a ']]>' separator
  3317. begin
  3318. Dec(FValue.Length, 3);
  3319. InCDATA := False;
  3320. if FCDSectionsAsText then
  3321. Continue;
  3322. SetNodeInfoWithValue(ntCDATA);
  3323. FNext := xtText;
  3324. Result := True;
  3325. Exit;
  3326. end
  3327. else
  3328. FatalError('Literal '']]>'' is not allowed in text', 3);
  3329. end
  3330. else if wc = '&' then
  3331. begin
  3332. if FValidators[FValidatorNesting].FContentType = ctEmpty then
  3333. ValidationError('References are illegal in EMPTY elements', []);
  3334. if ParseRef(FValue) or ResolvePredefined then
  3335. begin
  3336. nonWs := True; // CharRef to whitespace is not considered whitespace
  3337. Continue;
  3338. end
  3339. else
  3340. begin
  3341. FCurrEntity := EntityCheck;
  3342. if Assigned(FCurrEntity) and FExpandEntities then
  3343. begin
  3344. ContextPush(FCurrEntity);
  3345. Continue;
  3346. end;
  3347. tok := xtEntity;
  3348. end;
  3349. end;
  3350. if FValue.Length <> 0 then
  3351. begin
  3352. SetNodeInfoWithValue(textNodeTypes[nonWs]);
  3353. FNext := tok;
  3354. Result := True;
  3355. Exit;
  3356. end;
  3357. Break;
  3358. until False
  3359. else // not (FNext in [xtText, xtCDSect])
  3360. tok := FNext;
  3361. FNext := xtText;
  3362. case tok of
  3363. xtEntity: HandleEntityStart;
  3364. xtEntityEnd: HandleEntityEnd;
  3365. xtElement: ParseStartTag;
  3366. xtEndElement: ParseEndTag;
  3367. xtPI: ParsePI;
  3368. xtDoctype: ParseDoctypeDecl;
  3369. xtComment: ParseComment(False);
  3370. xtEOF: SetEofState;
  3371. end;
  3372. Result := tok <> xtEOF;
  3373. end;
  3374. procedure TXMLCharSource.NextChar;
  3375. begin
  3376. Inc(FBuf);
  3377. if FBuf >= FBufEnd then
  3378. Reload;
  3379. end;
  3380. procedure TXMLTextReader.ExpectChar(wc: WideChar);
  3381. begin
  3382. if FSource.FBuf^ = wc then
  3383. FSource.NextChar
  3384. else
  3385. FatalError(wc);
  3386. end;
  3387. // Element name already in FNameBuffer
  3388. procedure TXMLTextReader.ParseStartTag; // [39] [40] [44]
  3389. var
  3390. ElDef: TElementDecl;
  3391. IsEmpty: Boolean;
  3392. ElName: PHashItem;
  3393. b: TBinding;
  3394. Len: Integer;
  3395. begin
  3396. ElName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  3397. ElDef := TElementDecl(ElName^.Data);
  3398. if Assigned(ElDef) then
  3399. Len := ElDef.AttrDefCount+8 { overallocate a bit }
  3400. else
  3401. Len := 0;
  3402. // (re)initialize array of attribute definition tags
  3403. if (Len-8 > Length(FAttrDefIndex)) or (FAttrTag = 0) then
  3404. begin
  3405. SetLength(FAttrDefIndex, Len);
  3406. for Len := 0 to High(FAttrDefIndex) do
  3407. FAttrDefIndex[Len] := FAttrTag;
  3408. end;
  3409. // we're about to process a new set of attributes
  3410. {$push}{$r-,q-}
  3411. Dec(FAttrTag);
  3412. {$pop}
  3413. IsEmpty := False;
  3414. FAttrCount := 0;
  3415. FCurrAttrIndex := -1;
  3416. FPrefixedAttrs := 0;
  3417. FSpecifiedAttrs := 0;
  3418. FCurrNode := AllocNodeData(FNesting);
  3419. FCurrNode^.FQName := ElName;
  3420. FCurrNode^.FNodeType := ntElement;
  3421. FCurrNode^.FColonPos := FColonPos;
  3422. StoreLocation(FCurrNode^.FLoc);
  3423. Dec(FCurrNode^.FLoc.LinePos, FName.Length);
  3424. if FNamespaces then
  3425. begin
  3426. FNSHelper.PushScope;
  3427. if FColonPos > 0 then
  3428. FCurrNode^.FPrefix := FNSHelper.GetPrefix(FName.Buffer, FColonPos);
  3429. end;
  3430. while (FSource.FBuf^ <> '>') and (FSource.FBuf^ <> '/') do
  3431. begin
  3432. SkipS(True);
  3433. if (FSource.FBuf^ = '>') or (FSource.FBuf^ = '/') then
  3434. Break;
  3435. ParseAttribute(ElDef);
  3436. end;
  3437. if FSource.FBuf^ = '/' then
  3438. begin
  3439. IsEmpty := True;
  3440. FSource.NextChar;
  3441. end;
  3442. ExpectChar('>');
  3443. if Assigned(ElDef) and ElDef.NeedsDefaultPass then
  3444. ProcessDefaultAttributes(ElDef);
  3445. // Adding attributes might have reallocated FNodeStack, so restore FCurrNode once again
  3446. FCurrNode := @FNodeStack[FNesting];
  3447. if FNamespaces then
  3448. begin
  3449. { Assign namespace URIs to prefixed attrs }
  3450. if FPrefixedAttrs <> 0 then
  3451. ProcessNamespaceAtts;
  3452. { Expand the element name }
  3453. if Assigned(FCurrNode^.FPrefix) then
  3454. begin
  3455. b := TBinding(FCurrNode^.FPrefix^.Data);
  3456. if not (Assigned(b) and Assigned(b.uri) and (b.uri^.Key <> '')) then
  3457. DoErrorPos(esFatal, 'Unbound element name prefix "%s"', [FCurrNode^.FPrefix^.Key],FCurrNode^.FLoc);
  3458. FCurrNode^.FNsUri := b.uri;
  3459. end
  3460. else
  3461. begin
  3462. b := FNSHelper.DefaultNSBinding;
  3463. if Assigned(b) then
  3464. FCurrNode^.FNsUri := b.uri;
  3465. end;
  3466. end;
  3467. if not IsEmpty then
  3468. begin
  3469. if not FPreserveWhitespace then // critical for testsuite compliance
  3470. SkipS;
  3471. FNext := xtPushElement;
  3472. end
  3473. else
  3474. FNext := xtPopEmptyElement;
  3475. end;
  3476. procedure TXMLTextReader.ParseEndTag; // [42]
  3477. var
  3478. ElName: PHashItem;
  3479. begin
  3480. if FNesting <= FSource.FStartNesting then
  3481. FatalError('End-tag is not allowed here');
  3482. if FNesting > 0 then Dec(FNesting);
  3483. Inc(FSource.FBuf);
  3484. FCurrNode := @FNodeStack[FNesting]; // move off the possible child
  3485. FCurrNode^.FNodeType := ntEndElement;
  3486. StoreLocation(FTokenStart);
  3487. FCurrNode^.FLoc := FTokenStart;
  3488. ElName := FCurrNode^.FQName;
  3489. if not FSource.MatchesLong(ElName^.Key) then
  3490. FatalError('Unmatching element end tag (expected "</%s>")', [ElName^.Key], -1);
  3491. if FSource.FBuf^ = '>' then // this handles majority of cases
  3492. FSource.NextChar
  3493. else
  3494. begin // gives somewhat incorrect message for <a></aa>
  3495. SkipS;
  3496. ExpectChar('>');
  3497. end;
  3498. FNext := xtPopElement;
  3499. end;
  3500. procedure TXMLTextReader.ParseAttribute(ElDef: TElementDecl);
  3501. var
  3502. attrName: PHashItem;
  3503. attrData: PNodeData;
  3504. AttDef: TAttributeDef;
  3505. i: Integer;
  3506. begin
  3507. CheckName;
  3508. attrName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  3509. attrData := AllocAttributeData;
  3510. attrData^.FQName := attrName;
  3511. attrData^.FColonPos := FColonPos;
  3512. StoreLocation(attrData^.FLoc);
  3513. Dec(attrData^.FLoc.LinePos, FName.Length);
  3514. FSpecifiedAttrs := FAttrCount;
  3515. if Assigned(ElDef) then
  3516. begin
  3517. AttDef := ElDef.GetAttrDef(attrName);
  3518. // mark attribute as specified
  3519. if Assigned(AttDef) then
  3520. FAttrDefIndex[AttDef.Index] := FAttrTag;
  3521. end
  3522. else
  3523. AttDef := nil;
  3524. attrData^.FTypeInfo := AttDef;
  3525. // check for duplicates
  3526. for i := 1 to FAttrCount-1 do
  3527. if FNodeStack[FNesting+i].FQName = attrName then
  3528. FatalError('Duplicate attribute', FName.Length);
  3529. if FNamespaces then
  3530. begin
  3531. if ((FName.Length = 5) or (FColonPos = 5)) and
  3532. (FName.Buffer[0] = 'x') and (FName.Buffer[1] = 'm') and
  3533. (FName.Buffer[2] = 'l') and (FName.Buffer[3] = 'n') and
  3534. (FName.Buffer[4] = 's') then
  3535. begin
  3536. if FColonPos > 0 then
  3537. attrData^.FPrefix := FStdPrefix_xmlns;
  3538. attrData^.FNsUri := FStdUri_xmlns;
  3539. end
  3540. else if FColonPos > 0 then
  3541. begin
  3542. attrData^.FPrefix := FNSHelper.GetPrefix(FName.Buffer, FColonPos);
  3543. Inc(FPrefixedAttrs);
  3544. end;
  3545. end;
  3546. ExpectEq;
  3547. ExpectAttValue(attrData, Assigned(AttDef) and (AttDef.DataType <> dtCDATA));
  3548. if Assigned(attrData^.FNsUri) then
  3549. begin
  3550. if (not AddBinding(attrData)) and FCanonical then
  3551. begin
  3552. CleanupAttribute(attrData);
  3553. Dec(FAttrCount);
  3554. Dec(FSpecifiedAttrs);
  3555. end;
  3556. end;
  3557. end;
  3558. procedure TXMLTextReader.AddForwardRef(Buf: PWideChar; Length: Integer);
  3559. var
  3560. w: PForwardRef;
  3561. begin
  3562. if FForwardRefs = nil then
  3563. FForwardRefs := TFPList.Create;
  3564. New(w);
  3565. SetString(w^.Value, Buf, Length);
  3566. w^.Loc := FTokenStart;
  3567. FForwardRefs.Add(w);
  3568. end;
  3569. procedure TXMLTextReader.ClearForwardRefs;
  3570. var
  3571. I: Integer;
  3572. begin
  3573. if Assigned(FForwardRefs) then
  3574. begin
  3575. for I := 0 to FForwardRefs.Count-1 do
  3576. Dispose(PForwardRef(FForwardRefs.List^[I]));
  3577. FForwardRefs.Clear;
  3578. end;
  3579. end;
  3580. procedure TXMLTextReader.ValidateIdRefs;
  3581. var
  3582. I: Integer;
  3583. begin
  3584. if Assigned(FForwardRefs) then
  3585. begin
  3586. for I := 0 to FForwardRefs.Count-1 do
  3587. with PForwardRef(FForwardRefs.List^[I])^ do
  3588. if (FIDMap = nil) or (FIDMap.Find(PWideChar(Value), Length(Value)) = nil) then
  3589. DoErrorPos(esError, 'The ID ''%s'' does not match any element', [Value], Loc);
  3590. ClearForwardRefs;
  3591. end;
  3592. end;
  3593. procedure TXMLTextReader.ProcessDefaultAttributes(ElDef: TElementDecl);
  3594. var
  3595. I: Integer;
  3596. AttDef: TAttributeDef;
  3597. attrData: PNodeData;
  3598. begin
  3599. for I := 0 to ElDef.AttrDefCount-1 do
  3600. begin
  3601. if FAttrDefIndex[I] <> FAttrTag then // this one wasn't specified
  3602. begin
  3603. AttDef := ElDef.AttrDefs[I];
  3604. case AttDef.Default of
  3605. adDefault, adFixed: begin
  3606. attrData := AllocAttributeData;
  3607. attrData^ := AttDef.Data^;
  3608. if FCanonical then
  3609. attrData^.FIsDefault := False;
  3610. if FNamespaces then
  3611. begin
  3612. if AttDef.IsNamespaceDecl then
  3613. begin
  3614. if attrData^.FColonPos > 0 then
  3615. attrData^.FPrefix := FStdPrefix_xmlns;
  3616. attrData^.FNsUri := FStdUri_xmlns;
  3617. if (not AddBinding(attrData)) and FCanonical then
  3618. Dec(FAttrCount);
  3619. end
  3620. else if attrData^.FColonPos > 0 then
  3621. begin
  3622. attrData^.FPrefix := FNSHelper.GetPrefix(PWideChar(attrData^.FQName^.Key), attrData^.FColonPos);
  3623. Inc(FPrefixedAttrs);
  3624. end;
  3625. end;
  3626. end;
  3627. end;
  3628. end;
  3629. end;
  3630. end;
  3631. function TXMLTextReader.AddBinding(attrData: PNodeData): Boolean;
  3632. var
  3633. nsUri, Pfx: PHashItem;
  3634. begin
  3635. nsUri := FNameTable.FindOrAdd(attrData^.FValueStr);
  3636. if attrData^.FColonPos > 0 then
  3637. Pfx := FNSHelper.GetPrefix(@attrData^.FQName^.key[7], Length(attrData^.FQName^.key)-6)
  3638. else
  3639. Pfx := FNSHelper.GetPrefix(nil, 0); { will return the default prefix }
  3640. { 'xml' is allowed to be bound to the correct namespace }
  3641. if ((nsUri = FStduri_xml) <> (Pfx = FStdPrefix_xml)) or
  3642. (Pfx = FStdPrefix_xmlns) or
  3643. (nsUri = FStduri_xmlns) then
  3644. begin
  3645. if (Pfx = FStdPrefix_xml) or (Pfx = FStdPrefix_xmlns) then
  3646. DoErrorPos(esFatal, 'Illegal usage of reserved prefix ''%s''', [Pfx^.Key], attrData^.FLoc)
  3647. else
  3648. DoErrorPos(esFatal, 'Illegal usage of reserved namespace URI ''%s''', [attrData^.FValueStr], attrData^.FLoc2);
  3649. end;
  3650. if (attrData^.FValueStr = '') and not (FXML11 or (Pfx^.Key = '')) then
  3651. DoErrorPos(esFatal, 'Illegal undefining of namespace', attrData^.FLoc2);
  3652. Result := (Pfx^.Data = nil) or (TBinding(Pfx^.Data).uri <> nsUri);
  3653. if Result then
  3654. FNSHelper.BindPrefix(nsUri, Pfx);
  3655. end;
  3656. procedure TXMLTextReader.ProcessNamespaceAtts;
  3657. var
  3658. I, J: Integer;
  3659. Pfx, AttrName: PHashItem;
  3660. attrData: PNodeData;
  3661. b: TBinding;
  3662. begin
  3663. FNsAttHash.Init(FPrefixedAttrs);
  3664. for I := 1 to FAttrCount do
  3665. begin
  3666. attrData := @FNodeStack[FNesting+i];
  3667. if (attrData^.FColonPos < 1) or Assigned(attrData^.FNsUri) then
  3668. Continue;
  3669. Pfx := attrData^.FPrefix;
  3670. b := TBinding(Pfx^.Data);
  3671. if not (Assigned(b) and Assigned (b.uri) and (b.uri^.Key <> '')) then
  3672. DoErrorPos(esFatal, 'Unbound attribute name prefix "%s"', [Pfx^.Key], attrData^.FLoc);
  3673. { detect duplicates }
  3674. J := attrData^.FColonPos+1;
  3675. AttrName := attrData^.FQName;
  3676. if FNsAttHash.Locate(b.uri, @AttrName^.Key[J], Length(AttrName^.Key) - J+1) then
  3677. DoErrorPos(esFatal, 'Duplicate prefixed attribute', attrData^.FLoc);
  3678. attrData^.FNsUri := b.uri;
  3679. end;
  3680. end;
  3681. function TXMLTextReader.ParseExternalID(out SysID, PubID: XMLString; // [75]
  3682. SysIdOptional: Boolean): Boolean;
  3683. var
  3684. I: Integer;
  3685. wc: WideChar;
  3686. begin
  3687. Result := False;
  3688. if FSource.Matches('SYSTEM') then
  3689. SysIdOptional := False
  3690. else if FSource.Matches('PUBLIC') then
  3691. begin
  3692. ExpectWhitespace;
  3693. ParseLiteral(FValue, ltPubid, True);
  3694. SetString(PubID, FValue.Buffer, FValue.Length);
  3695. for I := 1 to Length(PubID) do
  3696. begin
  3697. wc := PubID[I];
  3698. if (wc > #255) or not (Char(ord(wc)) in PubidChars) then
  3699. FatalError('Illegal Public ID literal', -1);
  3700. end;
  3701. end
  3702. else
  3703. Exit;
  3704. if SysIdOptional then
  3705. SkipWhitespace
  3706. else
  3707. ExpectWhitespace;
  3708. if ParseLiteral(FValue, ltPlain, not SysIdOptional) then
  3709. SetString(SysID, FValue.Buffer, FValue.Length);
  3710. Result := True;
  3711. end;
  3712. procedure TXMLTextReader.ValidateAttrValue(AttrDef: TAttributeDef; attrData: PNodeData);
  3713. var
  3714. L, StartPos, EndPos: Integer;
  3715. Entity: TEntityDecl;
  3716. begin
  3717. L := Length(attrData^.FValueStr);
  3718. case AttrDef.DataType of
  3719. dtId: begin
  3720. if not AddID(attrData) then
  3721. DoErrorPos(esError, 'The ID ''%s'' is not unique', [attrData^.FValueStr], attrData^.FLoc2);
  3722. end;
  3723. dtIdRef, dtIdRefs: begin
  3724. StartPos := 1;
  3725. while StartPos <= L do
  3726. begin
  3727. EndPos := StartPos;
  3728. while (EndPos <= L) and (attrData^.FValueStr[EndPos] <> #32) do
  3729. Inc(EndPos);
  3730. if (FIDMap = nil) or (FIDMap.Find(@attrData^.FValueStr[StartPos], EndPos-StartPos) = nil) then
  3731. AddForwardRef(@attrData^.FValueStr[StartPos], EndPos-StartPos);
  3732. StartPos := EndPos + 1;
  3733. end;
  3734. end;
  3735. dtEntity, dtEntities: begin
  3736. StartPos := 1;
  3737. while StartPos <= L do
  3738. begin
  3739. EndPos := StartPos;
  3740. while (EndPos <= L) and (attrData^.FValueStr[EndPos] <> #32) do
  3741. Inc(EndPos);
  3742. Entity := TEntityDecl(FDocType.Entities.Get(@attrData^.FValueStr[StartPos], EndPos-StartPos));
  3743. if (Entity = nil) or (Entity.FNotationName = '') then
  3744. ValidationError('Attribute ''%s'' type mismatch', [attrData^.FQName^.Key], -1);
  3745. StartPos := EndPos + 1;
  3746. end;
  3747. end;
  3748. end;
  3749. end;
  3750. procedure TXMLTextReader.ValidateDTD;
  3751. var
  3752. I: Integer;
  3753. begin
  3754. if Assigned(FForwardRefs) then
  3755. begin
  3756. for I := 0 to FForwardRefs.Count-1 do
  3757. with PForwardRef(FForwardRefs[I])^ do
  3758. if FDocType.Notations.Get(PWideChar(Value), Length(Value)) = nil then
  3759. DoErrorPos(esError, 'Notation ''%s'' is not declared', [Value], Loc);
  3760. end;
  3761. end;
  3762. function TXMLTextReader.AddId(aNodeData: PNodeData): Boolean;
  3763. var
  3764. e: PHashItem;
  3765. begin
  3766. if FIDMap = nil then
  3767. FIDMap := THashTable.Create(256, False);
  3768. e := FIDMap.FindOrAdd(PWideChar(aNodeData^.FValueStr), Length(aNodeData^.FValueStr), Result);
  3769. Result := not Result;
  3770. if Result then
  3771. aNodeData^.FIDEntry := e;
  3772. end;
  3773. function TXMLTextReader.AllocAttributeData: PNodeData;
  3774. begin
  3775. Result := AllocNodeData(FNesting + FAttrCount + 1);
  3776. Result^.FNodeType := ntAttribute;
  3777. Result^.FIsDefault := False;
  3778. Inc(FAttrCount);
  3779. end;
  3780. function TXMLTextReader.AllocNodeData(AIndex: Integer): PNodeData;
  3781. begin
  3782. {make sure we have an extra slot to place child text/comment/etc}
  3783. if AIndex >= Length(FNodeStack)-1 then
  3784. SetLength(FNodeStack, AIndex * 2 + 2);
  3785. Result := @FNodeStack[AIndex];
  3786. Result^.FNext := nil;
  3787. Result^.FPrefix := nil;
  3788. Result^.FNsUri := nil;
  3789. Result^.FIDEntry := nil;
  3790. Result^.FValueStart := nil;
  3791. Result^.FValueLength := 0;
  3792. end;
  3793. procedure TXMLTextReader.AllocAttributeValueChunk(var APrev: PNodeData; Offset: Integer);
  3794. var
  3795. chunk: PNodeData;
  3796. begin
  3797. { when parsing DTD, don't take ownership of allocated data }
  3798. chunk := FFreeAttrChunk;
  3799. if Assigned(chunk) and (FState <> rsDTD) then
  3800. begin
  3801. FFreeAttrChunk := chunk^.FNext;
  3802. chunk^.FNext := nil;
  3803. end
  3804. else { no free chunks, create a new one }
  3805. chunk := AllocMem(sizeof(TNodeData));
  3806. APrev^.FNext := chunk;
  3807. APrev := chunk;
  3808. { assume text node, for entity refs it is overridden later }
  3809. chunk^.FNodeType := ntText;
  3810. chunk^.FQName := nil;
  3811. chunk^.FColonPos := -1;
  3812. { without PWideChar typecast and in $T-, FPC treats '@' result as PAnsiChar... }
  3813. SetString(chunk^.FValueStr, PWideChar(@FValue.Buffer[Offset]), FValue.Length-Offset);
  3814. end;
  3815. procedure TXMLTextReader.CleanupAttributes;
  3816. var
  3817. i: Integer;
  3818. begin
  3819. {cleanup only specified attributes; default ones are owned by DTD}
  3820. for i := 1 to FSpecifiedAttrs do
  3821. CleanupAttribute(@FNodeStack[FNesting+i]);
  3822. FAttrCleanupFlag := False;
  3823. end;
  3824. procedure TXMLTextReader.CleanupAttribute(aNode: PNodeData);
  3825. var
  3826. chunk: PNodeData;
  3827. begin
  3828. if Assigned(aNode^.FNext) then
  3829. begin
  3830. chunk := aNode^.FNext;
  3831. while Assigned(chunk^.FNext) do
  3832. chunk := chunk^.FNext;
  3833. chunk^.FNext := FFreeAttrChunk;
  3834. FFreeAttrChunk := aNode^.FNext;
  3835. aNode^.FNext := nil;
  3836. end;
  3837. end;
  3838. procedure TXMLTextReader.SetNodeInfoWithValue(typ: TXMLNodeType; AName: PHashItem = nil);
  3839. begin
  3840. FCurrNode := @FNodeStack[FNesting];
  3841. FCurrNode^.FNodeType := typ;
  3842. FCurrNode^.FQName := AName;
  3843. FCurrNode^.FColonPos := -1;
  3844. FCurrNode^.FValueStart := FValue.Buffer;
  3845. FCurrNode^.FValueLength := FValue.Length;
  3846. end;
  3847. function TXMLTextReader.SetupFakeLF(nextstate: TXMLToken): Boolean;
  3848. begin
  3849. FValue.Buffer[0] := #10;
  3850. FValue.Length := 1;
  3851. SetNodeInfoWithValue(ntWhitespace,nil);
  3852. FNext := nextstate;
  3853. Result := True;
  3854. end;
  3855. procedure TXMLTextReader.PushVC(aElDef: TElementDecl);
  3856. begin
  3857. Inc(FValidatorNesting);
  3858. if FValidatorNesting >= Length(FValidators) then
  3859. SetLength(FValidators, FValidatorNesting * 2);
  3860. with FValidators[FValidatorNesting] do
  3861. begin
  3862. FElementDef := aElDef;
  3863. FCurCP := nil;
  3864. FFailed := False;
  3865. FContentType := ctAny;
  3866. FSaViolation := False;
  3867. if Assigned(aElDef) then
  3868. begin
  3869. FContentType := aElDef.ContentType;
  3870. FSaViolation := FStandalone and aElDef.ExternallyDeclared;
  3871. end;
  3872. end;
  3873. end;
  3874. procedure TXMLTextReader.PopElement;
  3875. begin
  3876. if FNamespaces then
  3877. FNSHelper.PopScope;
  3878. if (FNesting = 0) and (not FFragmentMode) then
  3879. FState := rsEpilog;
  3880. FCurrNode := @FNodeStack[FNesting];
  3881. FNext := xtText;
  3882. end;
  3883. { TElementValidator }
  3884. function TElementValidator.IsElementAllowed(Def: TElementDecl): Boolean;
  3885. var
  3886. Next: TContentParticle;
  3887. begin
  3888. Result := True;
  3889. // if element is not declared, non-validity has been already reported, no need to report again...
  3890. if Assigned(Def) and Assigned(FElementDef) then
  3891. begin
  3892. case FElementDef.ContentType of
  3893. ctEmpty: Result := False;
  3894. ctChildren, ctMixed: begin
  3895. if FFailed then // if already detected a mismatch, don't waste time
  3896. Exit;
  3897. if FCurCP = nil then
  3898. Next := FElementDef.RootCP.FindFirst(Def)
  3899. else
  3900. Next := FCurCP.FindNext(Def, 0); { second arg ignored here }
  3901. Result := Assigned(Next);
  3902. if Result then
  3903. FCurCP := Next
  3904. else
  3905. FFailed := True; // used to prevent extra error at the end of element
  3906. end;
  3907. // ctAny, ctUndeclared: returns True by default
  3908. end;
  3909. end;
  3910. end;
  3911. function TElementValidator.Incomplete: Boolean;
  3912. begin
  3913. if Assigned(FElementDef) and (FElementDef.ContentType = ctChildren) and (not FFailed) then
  3914. begin
  3915. if FCurCP <> nil then
  3916. Result := FCurCP.MoreRequired(0) { arg ignored here }
  3917. else
  3918. Result := FElementDef.RootCP.IsRequired;
  3919. end
  3920. else
  3921. Result := False;
  3922. end;
  3923. { plain calls }
  3924. procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text);
  3925. var
  3926. Reader: TXMLTextReader;
  3927. ldr: TLoader;
  3928. begin
  3929. ADoc := TXMLDocument.Create;
  3930. Reader := TXMLTextReader.Create(f, ADoc.Names);
  3931. try
  3932. ldr.ProcessXML(ADoc,Reader);
  3933. finally
  3934. Reader.Free;
  3935. end;
  3936. end;
  3937. procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String);
  3938. var
  3939. Reader: TXMLTextReader;
  3940. ldr: TLoader;
  3941. begin
  3942. ADoc := TXMLDocument.Create;
  3943. Reader := TXMLTextReader.Create(f, ABaseURI, ADoc.Names);
  3944. try
  3945. ldr.ProcessXML(ADoc, Reader);
  3946. finally
  3947. Reader.Free;
  3948. end;
  3949. end;
  3950. procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream);
  3951. begin
  3952. ReadXMLFile(ADoc, f, 'stream:');
  3953. end;
  3954. procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String);
  3955. var
  3956. FileStream: TStream;
  3957. begin
  3958. ADoc := nil;
  3959. FileStream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
  3960. try
  3961. ReadXMLFile(ADoc, FileStream, FilenameToURI(AFilename));
  3962. finally
  3963. FileStream.Free;
  3964. end;
  3965. end;
  3966. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text);
  3967. var
  3968. Reader: TXMLTextReader;
  3969. ldr: TLoader;
  3970. begin
  3971. Reader := TXMLTextReader.Create(f, AParentNode.OwnerDocument.Names);
  3972. try
  3973. ldr.ProcessFragment(AParentNode, Reader);
  3974. finally
  3975. Reader.Free;
  3976. end;
  3977. end;
  3978. procedure ReadXMLFragment(AParentNode: TDOMNode; f: TStream; const ABaseURI: String);
  3979. var
  3980. Reader: TXMLTextReader;
  3981. ldr: TLoader;
  3982. begin
  3983. Reader := TXMLTextReader.Create(f, ABaseURI, AParentNode.OwnerDocument.Names);
  3984. try
  3985. ldr.ProcessFragment(AParentNode, Reader);
  3986. finally
  3987. Reader.Free;
  3988. end;
  3989. end;
  3990. procedure ReadXMLFragment(AParentNode: TDOMNode; f: TStream);
  3991. begin
  3992. ReadXMLFragment(AParentNode, f, 'stream:');
  3993. end;
  3994. procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String);
  3995. var
  3996. Stream: TStream;
  3997. begin
  3998. Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
  3999. try
  4000. ReadXMLFragment(AParentNode, Stream, FilenameToURI(AFilename));
  4001. finally
  4002. Stream.Free;
  4003. end;
  4004. end;
  4005. procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text);
  4006. var
  4007. Reader: TXMLTextReader;
  4008. ldr: TLoader;
  4009. begin
  4010. ADoc := TXMLDocument.Create;
  4011. Reader := TXMLTextReader.Create(f, ADoc.Names);
  4012. try
  4013. ldr.ProcessDTD(ADoc,Reader);
  4014. finally
  4015. Reader.Free;
  4016. end;
  4017. end;
  4018. procedure ReadDTDFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String);
  4019. var
  4020. Reader: TXMLTextReader;
  4021. ldr: TLoader;
  4022. begin
  4023. ADoc := TXMLDocument.Create;
  4024. Reader := TXMLTextReader.Create(f, ABaseURI, ADoc.Names);
  4025. try
  4026. ldr.ProcessDTD(ADoc,Reader);
  4027. finally
  4028. Reader.Free;
  4029. end;
  4030. end;
  4031. procedure ReadDTDFile(out ADoc: TXMLDocument; f: TStream);
  4032. begin
  4033. ReadDTDFile(ADoc, f, 'stream:');
  4034. end;
  4035. procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String);
  4036. var
  4037. Stream: TStream;
  4038. begin
  4039. ADoc := nil;
  4040. Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
  4041. try
  4042. ReadDTDFile(ADoc, Stream, FilenameToURI(AFilename));
  4043. finally
  4044. Stream.Free;
  4045. end;
  4046. end;
  4047. end.