xmlread.pp 108 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890
  1. {
  2. This file is part of the Free Component Library
  3. XML reading routines.
  4. Copyright (c) 1999-2000 by Sebastian Guenther, [email protected]
  5. Modified in 2006 by Sergei Gorelkin, [email protected]
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. unit XMLRead;
  13. {$ifdef fpc}
  14. {$MODE objfpc}{$H+}
  15. {$endif}
  16. interface
  17. uses
  18. SysUtils, Classes, DOM;
  19. type
  20. TErrorSeverity = (esWarning, esError, esFatal);
  21. EXMLReadError = class(Exception)
  22. private
  23. FSeverity: TErrorSeverity;
  24. FErrorMessage: string;
  25. FLine: Integer;
  26. FLinePos: Integer;
  27. public
  28. property Severity: TErrorSeverity read FSeverity;
  29. property ErrorMessage: string read FErrorMessage;
  30. property Line: Integer read FLine;
  31. property LinePos: Integer read FLinePos;
  32. end;
  33. procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String); overload;
  34. procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text); overload;
  35. procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream); overload;
  36. procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String); overload;
  37. procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String); overload;
  38. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text); overload;
  39. procedure ReadXMLFragment(AParentNode: TDOMNode; f: TStream); overload;
  40. procedure ReadXMLFragment(AParentNode: TDOMNode; f: TStream; const ABaseURI: String); overload;
  41. procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String); overload;
  42. procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text); overload;
  43. procedure ReadDTDFile(out ADoc: TXMLDocument; f: TStream); overload;
  44. procedure ReadDTDFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String); overload;
  45. type
  46. TDOMParseOptions = class(TObject)
  47. private
  48. FValidate: Boolean;
  49. FPreserveWhitespace: Boolean;
  50. FExpandEntities: Boolean;
  51. FIgnoreComments: Boolean;
  52. FCDSectionsAsText: Boolean;
  53. FResolveExternals: Boolean;
  54. FNamespaces: Boolean;
  55. FDisallowDoctype: Boolean;
  56. FCanonical: Boolean;
  57. FMaxChars: Cardinal;
  58. function GetCanonical: Boolean;
  59. procedure SetCanonical(aValue: Boolean);
  60. public
  61. property Validate: Boolean read FValidate write FValidate;
  62. property PreserveWhitespace: Boolean read FPreserveWhitespace write FPreserveWhitespace;
  63. property ExpandEntities: Boolean read FExpandEntities write FExpandEntities;
  64. property IgnoreComments: Boolean read FIgnoreComments write FIgnoreComments;
  65. property CDSectionsAsText: Boolean read FCDSectionsAsText write FCDSectionsAsText;
  66. property ResolveExternals: Boolean read FResolveExternals write FResolveExternals;
  67. property Namespaces: Boolean read FNamespaces write FNamespaces;
  68. property DisallowDoctype: Boolean read FDisallowDoctype write FDisallowDoctype;
  69. property MaxChars: Cardinal read FMaxChars write FMaxChars;
  70. property CanonicalForm: Boolean read GetCanonical write SetCanonical;
  71. end;
  72. // NOTE: DOM 3 LS ACTION_TYPE enumeration starts at 1
  73. TXMLContextAction = (
  74. xaAppendAsChildren = 1,
  75. xaReplaceChildren,
  76. xaInsertBefore,
  77. xaInsertAfter,
  78. xaReplace);
  79. TXMLErrorEvent = procedure(Error: EXMLReadError) of object;
  80. TXMLInputSource = class(TObject)
  81. private
  82. FStream: TStream;
  83. FStringData: string;
  84. FBaseURI: WideString;
  85. FSystemID: WideString;
  86. FPublicID: WideString;
  87. // FEncoding: string;
  88. public
  89. constructor Create(AStream: TStream); overload;
  90. constructor Create(const AStringData: string); overload;
  91. property Stream: TStream read FStream;
  92. property StringData: string read FStringData;
  93. property BaseURI: WideString read FBaseURI write FBaseURI;
  94. property SystemID: WideString read FSystemID write FSystemID;
  95. property PublicID: WideString read FPublicID write FPublicID;
  96. // property Encoding: string read FEncoding write FEncoding;
  97. end;
  98. TDOMParser = class(TObject)
  99. private
  100. FOptions: TDOMParseOptions;
  101. FOnError: TXMLErrorEvent;
  102. public
  103. constructor Create;
  104. destructor Destroy; override;
  105. procedure Parse(Src: TXMLInputSource; out ADoc: TXMLDocument);
  106. procedure ParseUri(const URI: WideString; out ADoc: TXMLDocument);
  107. function ParseWithContext(Src: TXMLInputSource; Context: TDOMNode;
  108. Action: TXMLContextAction): TDOMNode;
  109. property Options: TDOMParseOptions read FOptions;
  110. property OnError: TXMLErrorEvent read FOnError write FOnError;
  111. end;
  112. TDecoder = record
  113. Context: Pointer;
  114. Decode: function(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
  115. Cleanup: procedure(Context: Pointer); stdcall;
  116. end;
  117. TGetDecoderProc = function(const AEncoding: string; out Decoder: TDecoder): Boolean; stdcall;
  118. procedure RegisterDecoder(Proc: TGetDecoderProc);
  119. // =======================================================
  120. implementation
  121. uses
  122. UriParser, xmlutils, dtdmodel;
  123. const
  124. PubidChars: TSetOfChar = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
  125. '-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
  126. '#', '@', '$', '_', '%'];
  127. type
  128. TDOMDocumentTypeEx = class(TDOMDocumentType);
  129. TDOMTopNodeEx = class(TDOMNode_TopLevel);
  130. TXMLSourceKind = (skNone, skInternalSubset, skManualPop);
  131. TLocation = xmlutils.TLocation;
  132. TDOMEntityEx = class(TDOMEntity);
  133. TXMLReader = class;
  134. TXMLCharSource = class(TObject)
  135. private
  136. FBuf: PWideChar;
  137. FBufEnd: PWideChar;
  138. FReader: TXMLReader;
  139. FParent: TXMLCharSource;
  140. FEntity: TEntityDecl;
  141. FLineNo: Integer;
  142. LFPos: PWideChar;
  143. FXML11Rules: Boolean;
  144. FSystemID: WideString;
  145. FCharCount: Cardinal;
  146. FStartNesting: Integer;
  147. FXMLVersion: TXMLVersion;
  148. FXMLEncoding: WideString;
  149. function GetSystemID: WideString;
  150. protected
  151. function Reload: Boolean; virtual;
  152. public
  153. Kind: TXMLSourceKind;
  154. constructor Create(const AData: WideString);
  155. procedure NextChar;
  156. procedure NewLine; virtual;
  157. function SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
  158. wsflag: PBoolean = nil): WideChar; virtual;
  159. procedure Initialize; virtual;
  160. function SetEncoding(const AEncoding: string): Boolean; virtual;
  161. function Matches(const arg: WideString): Boolean;
  162. property SystemID: WideString read GetSystemID write FSystemID;
  163. end;
  164. TXMLDecodingSource = class(TXMLCharSource)
  165. private
  166. FCharBuf: PChar;
  167. FCharBufEnd: PChar;
  168. FBufStart: PWideChar;
  169. FDecoder: TDecoder;
  170. FHasBOM: Boolean;
  171. FFixedUCS2: string;
  172. FBufSize: Integer;
  173. procedure DecodingError(const Msg: string);
  174. protected
  175. function Reload: Boolean; override;
  176. procedure FetchData; virtual;
  177. public
  178. procedure AfterConstruction; override;
  179. destructor Destroy; override;
  180. function SetEncoding(const AEncoding: string): Boolean; override;
  181. procedure NewLine; override;
  182. function SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
  183. wsflag: PBoolean = nil): WideChar; override;
  184. procedure Initialize; override;
  185. end;
  186. TXMLStreamInputSource = class(TXMLDecodingSource)
  187. private
  188. FAllocated: PChar;
  189. FStream: TStream;
  190. FCapacity: Integer;
  191. FOwnStream: Boolean;
  192. FEof: Boolean;
  193. public
  194. constructor Create(AStream: TStream; AOwnStream: Boolean);
  195. destructor Destroy; override;
  196. procedure FetchData; override;
  197. end;
  198. TXMLFileInputSource = class(TXMLDecodingSource)
  199. private
  200. FFile: ^Text;
  201. FString: string;
  202. FTmp: string;
  203. public
  204. constructor Create(var AFile: Text);
  205. procedure FetchData; override;
  206. end;
  207. PForwardRef = ^TForwardRef;
  208. TForwardRef = record
  209. Value: WideString;
  210. Loc: TLocation;
  211. end;
  212. TElementValidator = object
  213. FElementDef: TElementDecl;
  214. FCurCP: TContentParticle;
  215. FFailed: Boolean;
  216. FSaViolation: Boolean;
  217. FContentType: TElementContentType; // =ctAny when FElementDef is nil
  218. function IsElementAllowed(Def: TElementDecl): Boolean;
  219. function Incomplete: Boolean;
  220. end;
  221. TNodeDataDynArray = array of TNodeData;
  222. TValidatorDynArray = array of TElementValidator;
  223. TXMLReadState = (rsProlog, rsDTD, rsAfterDTD, rsRoot, rsEpilog);
  224. TCheckNameFlags = set of (cnOptional, cnToken);
  225. TXMLToken = (xtNone, xtEOF, xtText, xtWhitespace, xtElement, xtEndElement,
  226. xtCDSect, xtComment, xtPI, xtDoctype, xtEntity, xtEntityEnd, xtPopElement,
  227. xtPopEmptyElement, xtPushElement, xtPushEntity, xtPopEntity);
  228. TLiteralType = (ltPlain, ltPubid, ltEntity);
  229. TXMLReader = class
  230. private
  231. FSource: TXMLCharSource;
  232. FNameTable: THashTable;
  233. FCtrl: TDOMParser;
  234. FXML11: Boolean;
  235. FState: TXMLReadState;
  236. FHavePERefs: Boolean;
  237. FInsideDecl: Boolean;
  238. FValue: TWideCharBuf;
  239. FEntityValue: TWideCharBuf;
  240. FName: TWideCharBuf;
  241. FTokenStart: TLocation;
  242. FStandalone: Boolean;
  243. FNamePages: PByteArray;
  244. FDocType: TDTDModel;
  245. FPEMap: THashTable;
  246. FForwardRefs: TFPList;
  247. FDTDStartPos: PWideChar;
  248. FIntSubset: TWideCharBuf;
  249. FAttrTag: Cardinal;
  250. FDTDProcessed: Boolean;
  251. FFragmentMode: Boolean;
  252. FNext: TXMLToken;
  253. FCurrEntity: TEntityDecl;
  254. FIDMap: THashTable;
  255. FNSHelper: TNSSupport;
  256. FNsAttHash: TDblHashArray;
  257. FStdPrefix_xml: PHashItem;
  258. FStdPrefix_xmlns: PHashItem;
  259. FStdUri_xml: PHashItem;
  260. FStdUri_xmlns: PHashItem;
  261. FColonPos: Integer;
  262. FValidate: Boolean; // parsing options, copy of FCtrl.Options
  263. FPreserveWhitespace: Boolean;
  264. FExpandEntities: Boolean;
  265. FIgnoreComments: Boolean;
  266. FCDSectionsAsText: Boolean;
  267. FResolveExternals: Boolean;
  268. FNamespaces: Boolean;
  269. FDisallowDoctype: Boolean;
  270. FCanonical: Boolean;
  271. FMaxChars: Cardinal;
  272. procedure SkipQuote(out Delim: WideChar; required: Boolean = True);
  273. procedure Initialize(ASource: TXMLCharSource);
  274. procedure NSPrepare;
  275. procedure EntityToSource(AEntity: TEntityDecl; out Src: TXMLCharSource);
  276. function ContextPush(AEntity: TEntityDecl): Boolean;
  277. function ContextPop(Forced: Boolean = False): Boolean;
  278. procedure XML11_BuildTables;
  279. function ParseQuantity: TCPQuant;
  280. procedure StoreLocation(out Loc: TLocation);
  281. function ValidateAttrSyntax(AttrDef: TAttributeDef; const aValue: WideString): Boolean;
  282. procedure ValidateAttrValue(AttrDef: TAttributeDef; attrData: PNodeData);
  283. procedure AddForwardRef(Buf: PWideChar; Length: Integer);
  284. procedure ClearForwardRefs;
  285. procedure ValidateIdRefs;
  286. procedure StandaloneError(LineOffs: Integer = 0);
  287. procedure CallErrorHandler(E: EXMLReadError);
  288. function FindOrCreateElDef: TElementDecl;
  289. function SkipUntilSeq(const Delim: TSetOfChar; c1: WideChar; c2: WideChar = #0): Boolean;
  290. procedure CheckMaxChars(ToAdd: Cardinal);
  291. function AllocNodeData(AIndex: Integer): PNodeData;
  292. function AllocAttributeData: PNodeData;
  293. function AllocAttributeValueChunk(APrev: PNodeData): PNodeData;
  294. procedure CleanupAttribute(aNode: PNodeData);
  295. procedure CleanupAttributes;
  296. procedure SetNodeInfoWithValue(typ: TXMLNodeType; AName: PHashItem = nil);
  297. function AddId(aNodeData: PNodeData): Boolean;
  298. protected
  299. FNesting: Integer;
  300. FCurrNode: PNodeData;
  301. FAttrCount: Integer;
  302. FPrefixedAttrs: Integer;
  303. FSpecifiedAttrs: Integer;
  304. FNodeStack: TNodeDataDynArray;
  305. FValidatorNesting: Integer;
  306. FValidators: TValidatorDynArray;
  307. FAttrChunks: TFPList;
  308. FFreeAttrChunk: PNodeData;
  309. FAttrCleanupFlag: Boolean;
  310. procedure DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer=0);
  311. procedure DoErrorPos(Severity: TErrorSeverity; const descr: string;
  312. const ErrPos: TLocation); overload;
  313. procedure DoErrorPos(Severity: TErrorSeverity; const descr: string;
  314. const args: array of const; const ErrPos: TLocation); overload;
  315. procedure FatalError(const descr: String; LineOffs: Integer=0); overload;
  316. procedure FatalError(const descr: string; const args: array of const; LineOffs: Integer=0); overload;
  317. procedure FatalError(Expected: WideChar); overload;
  318. function SkipWhitespace(PercentAloneIsOk: Boolean = False): Boolean;
  319. function SkipS(required: Boolean = False): Boolean;
  320. procedure ExpectWhitespace;
  321. procedure ExpectString(const s: String);
  322. procedure ExpectChar(wc: WideChar);
  323. function CheckForChar(c: WideChar): Boolean;
  324. procedure RaiseNameNotFound;
  325. function CheckName(aFlags: TCheckNameFlags = []): Boolean;
  326. procedure CheckNCName;
  327. function ExpectName: WideString; // [5]
  328. function ParseLiteral(var ToFill: TWideCharBuf; aType: TLiteralType;
  329. Required: Boolean): Boolean;
  330. procedure ExpectAttValue(attrData: PNodeData; NonCDATA: Boolean); // [10]
  331. procedure ParseComment(discard: Boolean); // [15]
  332. procedure ParsePI; // [16]
  333. function CreatePINode: TDOMNode;
  334. procedure ParseXmlOrTextDecl(TextDecl: Boolean);
  335. procedure ExpectEq;
  336. procedure ParseDoctypeDecl; // [28]
  337. procedure ParseMarkupDecl; // [29]
  338. procedure ParseStartTag; // [39]
  339. procedure ParseEndTag; // [42]
  340. function DoStartElement: TDOMElement;
  341. procedure HandleEntityStart;
  342. procedure HandleEntityEnd;
  343. procedure ResolveEntity;
  344. procedure DoStartEntity;
  345. procedure ParseAttribute(ElDef: TElementDecl);
  346. procedure ParseContent(parent: TDOMNode_WithChildren); // [43]
  347. function Read: Boolean;
  348. function ResolvePredefined: Boolean;
  349. function EntityCheck(NoExternals: Boolean = False): TEntityDecl;
  350. procedure LoadEntity(AEntity: TEntityDecl);
  351. function PrefetchEntity(AEntity: TEntityDecl): Boolean;
  352. procedure StartPE;
  353. function ParseRef(var ToFill: TWideCharBuf): Boolean; // [67]
  354. function ParseExternalID(out SysID, PubID: WideString; // [75]
  355. SysIdOptional: Boolean): Boolean;
  356. procedure BadPENesting(S: TErrorSeverity = esError);
  357. procedure ParseEntityDecl;
  358. procedure ParseAttlistDecl;
  359. procedure ExpectChoiceOrSeq(CP: TContentParticle; MustEndIn: TObject);
  360. procedure ParseElementDecl;
  361. procedure ParseNotationDecl;
  362. function ResolveResource(const ASystemID, APublicID, ABaseURI: WideString; out Source: TXMLCharSource): Boolean;
  363. procedure ProcessDefaultAttributes(ElDef: TElementDecl);
  364. procedure ProcessNamespaceAtts;
  365. function AddBinding(attrData: PNodeData): Boolean;
  366. procedure PushVC(aElDef: TElementDecl);
  367. procedure PopVC;
  368. procedure ValidateDTD;
  369. procedure ValidateCurrentNode;
  370. procedure ValidationError(const Msg: string; const args: array of const; LineOffs: Integer = -1);
  371. procedure ValidationErrorWithName(const Msg: string; LineOffs: Integer = -1);
  372. procedure DTDReloadHook;
  373. procedure ConvertSource(SrcIn: TXMLInputSource; out SrcOut: TXMLCharSource);
  374. function DoCDSect(ch: PWideChar; Count: Integer): TDOMNode;
  375. procedure DoNotationDecl(const aName, aPubID, aSysID: WideString);
  376. public
  377. doc: TDOMDocument;
  378. constructor Create; overload;
  379. constructor Create(AParser: TDOMParser); overload;
  380. destructor Destroy; override;
  381. procedure ProcessXML(ASource: TXMLCharSource); // [1]
  382. procedure ProcessFragment(ASource: TXMLCharSource; AOwner: TDOMNode);
  383. procedure ProcessDTD(ASource: TXMLCharSource); // ([29])
  384. end;
  385. const
  386. NullLocation: TLocation = (Line: 0; LinePos: 0);
  387. { Decoders }
  388. var
  389. Decoders: array of TGetDecoderProc;
  390. procedure RegisterDecoder(Proc: TGetDecoderProc);
  391. var
  392. L: Integer;
  393. begin
  394. L := Length(Decoders);
  395. SetLength(Decoders, L+1);
  396. Decoders[L] := Proc;
  397. end;
  398. function FindDecoder(const AEncoding: string; out Decoder: TDecoder): Boolean;
  399. var
  400. I: Integer;
  401. begin
  402. Result := False;
  403. for I := 0 to High(Decoders) do
  404. if Decoders[I](AEncoding, Decoder) then
  405. begin
  406. Result := True;
  407. Exit;
  408. end;
  409. end;
  410. function Is_8859_1(const AEncoding: string): Boolean;
  411. begin
  412. Result := SameText(AEncoding, 'ISO-8859-1') or
  413. SameText(AEncoding, 'ISO_8859-1') or
  414. SameText(AEncoding, 'latin1') or
  415. SameText(AEncoding, 'iso-ir-100') or
  416. SameText(AEncoding, 'l1') or
  417. SameText(AEncoding, 'IBM819') or
  418. SameText(AEncoding, 'CP819') or
  419. SameText(AEncoding, 'csISOLatin1') or
  420. // This one is not in character-sets.txt, but was used in FPC documentation,
  421. // and still being used in fcl-registry package
  422. SameText(AEncoding, 'ISO8859-1');
  423. end;
  424. { TDOMParseOptions }
  425. function TDOMParseOptions.GetCanonical: Boolean;
  426. begin
  427. Result := FCanonical and FExpandEntities and FCDSectionsAsText and
  428. { (not normalizeCharacters) and } FNamespaces and
  429. { namespaceDeclarations and } FPreserveWhitespace;
  430. end;
  431. procedure TDOMParseOptions.SetCanonical(aValue: Boolean);
  432. begin
  433. FCanonical := aValue;
  434. if aValue then
  435. begin
  436. FExpandEntities := True;
  437. FCDSectionsAsText := True;
  438. FNamespaces := True;
  439. FPreserveWhitespace := True;
  440. { normalizeCharacters := False; }
  441. { namespaceDeclarations := True; }
  442. { wellFormed := True; }
  443. end;
  444. end;
  445. { TXMLInputSource }
  446. constructor TXMLInputSource.Create(AStream: TStream);
  447. begin
  448. inherited Create;
  449. FStream := AStream;
  450. end;
  451. constructor TXMLInputSource.Create(const AStringData: string);
  452. begin
  453. inherited Create;
  454. FStringData := AStringData;
  455. end;
  456. { TDOMParser }
  457. constructor TDOMParser.Create;
  458. begin
  459. FOptions := TDOMParseOptions.Create;
  460. end;
  461. destructor TDOMParser.Destroy;
  462. begin
  463. FOptions.Free;
  464. inherited Destroy;
  465. end;
  466. procedure TDOMParser.Parse(Src: TXMLInputSource; out ADoc: TXMLDocument);
  467. var
  468. InputSrc: TXMLCharSource;
  469. begin
  470. with TXMLReader.Create(Self) do
  471. try
  472. ConvertSource(Src, InputSrc); // handles 'no-input-specified' case
  473. ProcessXML(InputSrc)
  474. finally
  475. ADoc := TXMLDocument(doc);
  476. Free;
  477. end;
  478. end;
  479. procedure TDOMParser.ParseUri(const URI: WideString; out ADoc: TXMLDocument);
  480. var
  481. Src: TXMLCharSource;
  482. begin
  483. ADoc := nil;
  484. with TXMLReader.Create(Self) do
  485. try
  486. if ResolveResource(URI, '', '', Src) then
  487. ProcessXML(Src)
  488. else
  489. DoErrorPos(esFatal, 'The specified URI could not be resolved', NullLocation);
  490. finally
  491. ADoc := TXMLDocument(doc);
  492. Free;
  493. end;
  494. end;
  495. function TDOMParser.ParseWithContext(Src: TXMLInputSource;
  496. Context: TDOMNode; Action: TXMLContextAction): TDOMNode;
  497. var
  498. InputSrc: TXMLCharSource;
  499. Frag: TDOMDocumentFragment;
  500. node: TDOMNode;
  501. begin
  502. if Action in [xaInsertBefore, xaInsertAfter, xaReplace] then
  503. node := Context.ParentNode
  504. else
  505. node := Context;
  506. // TODO: replacing document isn't yet supported
  507. if (Action = xaReplaceChildren) and (node.NodeType = DOCUMENT_NODE) then
  508. raise EDOMNotSupported.Create('DOMParser.ParseWithContext');
  509. if not (node.NodeType in [ELEMENT_NODE, DOCUMENT_FRAGMENT_NODE]) then
  510. raise EDOMHierarchyRequest.Create('DOMParser.ParseWithContext');
  511. with TXMLReader.Create(Self) do
  512. try
  513. ConvertSource(Src, InputSrc); // handles 'no-input-specified' case
  514. Frag := Context.OwnerDocument.CreateDocumentFragment;
  515. try
  516. ProcessFragment(InputSrc, Frag);
  517. Result := Frag.FirstChild;
  518. case Action of
  519. xaAppendAsChildren: Context.AppendChild(Frag);
  520. xaReplaceChildren: begin
  521. Context.TextContent := ''; // removes children
  522. Context.ReplaceChild(Frag, Context.FirstChild);
  523. end;
  524. xaInsertBefore: node.InsertBefore(Frag, Context);
  525. xaInsertAfter: node.InsertBefore(Frag, Context.NextSibling);
  526. xaReplace: node.ReplaceChild(Frag, Context);
  527. end;
  528. finally
  529. Frag.Free;
  530. end;
  531. finally
  532. Free;
  533. end;
  534. end;
  535. { TXMLCharSource }
  536. constructor TXMLCharSource.Create(const AData: WideString);
  537. begin
  538. inherited Create;
  539. FLineNo := 1;
  540. FBuf := PWideChar(AData);
  541. FBufEnd := FBuf + Length(AData);
  542. LFPos := FBuf-1;
  543. FCharCount := Length(AData);
  544. end;
  545. procedure TXMLCharSource.Initialize;
  546. begin
  547. end;
  548. function TXMLCharSource.SetEncoding(const AEncoding: string): Boolean;
  549. begin
  550. Result := True; // always succeed
  551. end;
  552. function TXMLCharSource.GetSystemID: WideString;
  553. begin
  554. if FSystemID <> '' then
  555. Result := FSystemID
  556. else if Assigned(FParent) then
  557. Result := FParent.SystemID
  558. else
  559. Result := '';
  560. end;
  561. function TXMLCharSource.Reload: Boolean;
  562. begin
  563. Result := False;
  564. end;
  565. procedure TXMLCharSource.NewLine;
  566. begin
  567. Inc(FLineNo);
  568. LFPos := FBuf;
  569. end;
  570. function TXMLCharSource.SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
  571. wsflag: PBoolean): WideChar;
  572. var
  573. old: PWideChar;
  574. nonws: Boolean;
  575. begin
  576. old := FBuf;
  577. nonws := False;
  578. repeat
  579. if FBuf^ = #10 then
  580. NewLine;
  581. if (FBuf^ < #255) and (Char(ord(FBuf^)) in Delim) then
  582. Break;
  583. if (FBuf^ > #32) or not (Char(ord(FBuf^)) in [#32, #9, #10, #13]) then
  584. nonws := True;
  585. Inc(FBuf);
  586. until False;
  587. Result := FBuf^;
  588. BufAppendChunk(ToFill, old, FBuf);
  589. if Assigned(wsflag) then
  590. wsflag^ := wsflag^ or nonws;
  591. end;
  592. function TXMLCharSource.Matches(const arg: WideString): Boolean;
  593. begin
  594. Result := False;
  595. if (FBufEnd >= FBuf + Length(arg)) or Reload then
  596. Result := CompareMem(Pointer(arg), FBuf, Length(arg)*sizeof(WideChar));
  597. if Result then
  598. begin
  599. Inc(FBuf, Length(arg));
  600. if FBuf >= FBufEnd then
  601. Reload;
  602. end;
  603. end;
  604. { TXMLDecodingSource }
  605. procedure TXMLDecodingSource.AfterConstruction;
  606. begin
  607. inherited AfterConstruction;
  608. FBufStart := AllocMem(4096);
  609. FBuf := FBufStart;
  610. FBufEnd := FBuf;
  611. LFPos := FBuf-1;
  612. end;
  613. destructor TXMLDecodingSource.Destroy;
  614. begin
  615. FreeMem(FBufStart);
  616. if Assigned(FDecoder.Cleanup) then
  617. FDecoder.Cleanup(FDecoder.Context);
  618. inherited Destroy;
  619. end;
  620. procedure TXMLDecodingSource.FetchData;
  621. begin
  622. end;
  623. procedure TXMLDecodingSource.DecodingError(const Msg: string);
  624. begin
  625. // count line endings to obtain correct error location
  626. while FBuf < FBufEnd do
  627. begin
  628. if (FBuf^ = #10) or (FBuf^ = #13) or (FXML11Rules and ((FBuf^ = #$85) or (FBuf^ = #$2028))) then
  629. begin
  630. if (FBuf^ = #13) and (FBuf < FBufEnd-1) and
  631. ((FBuf[1] = #10) or (FXML11Rules and (FBuf[1] = #$85))) then
  632. Inc(FBuf);
  633. LFPos := FBuf;
  634. Inc(FLineNo);
  635. end;
  636. Inc(FBuf);
  637. end;
  638. FReader.FatalError(Msg);
  639. end;
  640. function TXMLDecodingSource.Reload: Boolean;
  641. var
  642. Remainder: PtrInt;
  643. r, inLeft: Cardinal;
  644. rslt: Integer;
  645. begin
  646. if Kind = skInternalSubset then
  647. FReader.DTDReloadHook;
  648. Remainder := FBufEnd - FBuf;
  649. if Remainder > 0 then
  650. Move(FBuf^, FBufStart^, Remainder * sizeof(WideChar));
  651. Dec(LFPos, FBuf-FBufStart);
  652. FBuf := FBufStart;
  653. FBufEnd := FBufStart + Remainder;
  654. repeat
  655. inLeft := FCharBufEnd - FCharBuf;
  656. if inLeft < 4 then // may contain an incomplete char
  657. begin
  658. FetchData;
  659. inLeft := FCharBufEnd - FCharBuf;
  660. if inLeft <= 0 then
  661. Break;
  662. end;
  663. r := FBufStart + FBufSize - FBufEnd;
  664. if r = 0 then
  665. Break;
  666. rslt := FDecoder.Decode(FDecoder.Context, FCharBuf, inLeft, FBufEnd, r);
  667. { Sanity checks: r and inLeft must not increase. }
  668. if inLeft + FCharBuf <= FCharBufEnd then
  669. FCharBuf := FCharBufEnd - inLeft
  670. else
  671. DecodingError('Decoder error: input byte count out of bounds');
  672. if r + FBufEnd <= FBufStart + FBufSize then
  673. FBufEnd := FBufStart + FBufSize - r
  674. else
  675. DecodingError('Decoder error: output char count out of bounds');
  676. if rslt = 0 then
  677. Break
  678. else if rslt < 0 then
  679. DecodingError('Invalid character in input stream')
  680. else
  681. FReader.CheckMaxChars(rslt);
  682. until False;
  683. FBufEnd^ := #0;
  684. Result := FBuf < FBufEnd;
  685. end;
  686. const
  687. XmlSign: array [0..4] of WideChar = ('<', '?', 'x', 'm', 'l');
  688. procedure TXMLDecodingSource.Initialize;
  689. begin
  690. inherited;
  691. FLineNo := 1;
  692. FDecoder.Decode := @Decode_UTF8;
  693. FFixedUCS2 := '';
  694. if FCharBufEnd-FCharBuf > 1 then
  695. begin
  696. if (FCharBuf[0] = #$FE) and (FCharBuf[1] = #$FF) then
  697. begin
  698. FFixedUCS2 := 'UTF-16BE';
  699. FDecoder.Decode := {$IFNDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
  700. end
  701. else if (FCharBuf[0] = #$FF) and (FCharBuf[1] = #$FE) then
  702. begin
  703. FFixedUCS2 := 'UTF-16LE';
  704. FDecoder.Decode := {$IFDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
  705. end;
  706. end;
  707. FBufSize := 6; // possible BOM and '<?xml'
  708. Reload;
  709. if FBuf^ = #$FEFF then
  710. begin
  711. FHasBOM := True;
  712. Inc(FBuf);
  713. end;
  714. LFPos := FBuf-1;
  715. if CompareMem(FBuf, @XmlSign[0], sizeof(XmlSign)) then
  716. begin
  717. FBufSize := 3; // don't decode past XML declaration
  718. Inc(FBuf, Length(XmlSign));
  719. FReader.ParseXmlOrTextDecl((FParent <> nil) or (FReader.FState <> rsProlog));
  720. end;
  721. FBufSize := 2047;
  722. if FReader.FXML11 then
  723. FReader.XML11_BuildTables;
  724. end;
  725. function TXMLDecodingSource.SetEncoding(const AEncoding: string): Boolean;
  726. var
  727. NewDecoder: TDecoder;
  728. begin
  729. Result := True;
  730. if (FFixedUCS2 = '') and SameText(AEncoding, 'UTF-8') then
  731. Exit;
  732. if FFixedUCS2 <> '' then
  733. begin
  734. Result := SameText(AEncoding, FFixedUCS2) or
  735. SameText(AEncoding, 'UTF-16') or
  736. SameText(AEncoding, 'unicode');
  737. Exit;
  738. end;
  739. // TODO: must fail when a byte-based stream is labeled as word-based.
  740. // see rmt-e2e-61, it now fails but for a completely different reason.
  741. FillChar(NewDecoder, sizeof(TDecoder), 0);
  742. if Is_8859_1(AEncoding) then
  743. FDecoder.Decode := @Decode_8859_1
  744. else if FindDecoder(AEncoding, NewDecoder) then
  745. FDecoder := NewDecoder
  746. else
  747. Result := False;
  748. end;
  749. procedure TXMLDecodingSource.NewLine;
  750. begin
  751. case FBuf^ of
  752. #10: ;
  753. #13: begin
  754. // Reload trashes the buffer, it should be consumed beforehand
  755. if (FBufEnd >= FBuf+2) or Reload then
  756. begin
  757. if (FBuf[1] = #10) or (FXML11Rules and (FBuf[1] = #$85)) then
  758. Inc(FBuf);
  759. end;
  760. FBuf^ := #10;
  761. end;
  762. #$85, #$2028: if FXML11Rules then
  763. FBuf^ := #10
  764. else
  765. Exit;
  766. else
  767. Exit;
  768. end;
  769. Inc(FLineNo);
  770. LFPos := FBuf;
  771. end;
  772. { TXMLStreamInputSource }
  773. const
  774. Slack = 16;
  775. constructor TXMLStreamInputSource.Create(AStream: TStream; AOwnStream: Boolean);
  776. begin
  777. FStream := AStream;
  778. FCapacity := 4096;
  779. GetMem(FAllocated, FCapacity+Slack);
  780. FCharBuf := FAllocated+(Slack-4);
  781. FCharBufEnd := FCharBuf;
  782. FOwnStream := AOwnStream;
  783. FetchData;
  784. end;
  785. destructor TXMLStreamInputSource.Destroy;
  786. begin
  787. FreeMem(FAllocated);
  788. if FOwnStream then
  789. FStream.Free;
  790. inherited Destroy;
  791. end;
  792. procedure TXMLStreamInputSource.FetchData;
  793. var
  794. Remainder, BytesRead: Integer;
  795. OldBuf: PChar;
  796. begin
  797. Assert(FCharBufEnd - FCharBuf < Slack-4);
  798. if FEof then
  799. Exit;
  800. OldBuf := FCharBuf;
  801. Remainder := FCharBufEnd - FCharBuf;
  802. if Remainder < 0 then
  803. Remainder := 0;
  804. FCharBuf := FAllocated+Slack-4-Remainder;
  805. if Remainder > 0 then
  806. Move(OldBuf^, FCharBuf^, Remainder);
  807. BytesRead := FStream.Read(FAllocated[Slack-4], FCapacity);
  808. if BytesRead < FCapacity then
  809. FEof := True;
  810. FCharBufEnd := FAllocated + (Slack-4) + BytesRead;
  811. { Null-termination has been removed:
  812. 1) Built-in decoders don't need it because they respect the buffer length.
  813. 2) It was causing unaligned access errors on ARM CPUs.
  814. }
  815. //PWideChar(FCharBufEnd)^ := #0;
  816. end;
  817. { TXMLFileInputSource }
  818. constructor TXMLFileInputSource.Create(var AFile: Text);
  819. begin
  820. FFile := @AFile;
  821. SystemID := FilenameToURI(TTextRec(AFile).Name);
  822. FetchData;
  823. end;
  824. procedure TXMLFileInputSource.FetchData;
  825. var
  826. Remainder: Integer;
  827. begin
  828. if not Eof(FFile^) then
  829. begin
  830. Remainder := FCharBufEnd - FCharBuf;
  831. if Remainder > 0 then
  832. SetString(FTmp, FCharBuf, Remainder);
  833. ReadLn(FFile^, FString);
  834. FString := FString + #10; // bad solution...
  835. if Remainder > 0 then
  836. Insert(FTmp, FString, 1);
  837. FCharBuf := PChar(FString);
  838. FCharBufEnd := FCharBuf + Length(FString);
  839. end;
  840. end;
  841. { helper that closes handle upon destruction }
  842. type
  843. THandleOwnerStream = class(THandleStream)
  844. public
  845. destructor Destroy; override;
  846. end;
  847. destructor THandleOwnerStream.Destroy;
  848. begin
  849. if Handle >= 0 then FileClose(Handle);
  850. inherited Destroy;
  851. end;
  852. { TXMLReader }
  853. procedure TXMLReader.ConvertSource(SrcIn: TXMLInputSource; out SrcOut: TXMLCharSource);
  854. begin
  855. SrcOut := nil;
  856. if Assigned(SrcIn) then
  857. begin
  858. if Assigned(SrcIn.FStream) then
  859. SrcOut := TXMLStreamInputSource.Create(SrcIn.FStream, False)
  860. else if SrcIn.FStringData <> '' then
  861. SrcOut := TXMLStreamInputSource.Create(TStringStream.Create(SrcIn.FStringData), True)
  862. else if (SrcIn.SystemID <> '') then
  863. ResolveResource(SrcIn.SystemID, SrcIn.PublicID, SrcIn.BaseURI, SrcOut);
  864. end;
  865. if (SrcOut = nil) and (FSource = nil) then
  866. DoErrorPos(esFatal, 'No input source specified', NullLocation);
  867. end;
  868. procedure TXMLReader.StoreLocation(out Loc: TLocation);
  869. begin
  870. Loc.Line := FSource.FLineNo;
  871. Loc.LinePos := FSource.FBuf-FSource.LFPos;
  872. end;
  873. function TXMLReader.ResolveResource(const ASystemID, APublicID, ABaseURI: WideString; out Source: TXMLCharSource): Boolean;
  874. var
  875. AbsSysID: WideString;
  876. Filename: string;
  877. Stream: TStream;
  878. fd: THandle;
  879. begin
  880. Source := nil;
  881. Result := False;
  882. if not ResolveRelativeURI(ABaseURI, ASystemID, AbsSysID) then
  883. Exit;
  884. { TODO: alternative resolvers
  885. These may be 'internal' resolvers or a handler set by application.
  886. Internal resolvers should probably produce a TStream
  887. ( so that internal classes need not be exported ).
  888. External resolver will produce TXMLInputSource that should be converted.
  889. External resolver must NOT be called for root entity.
  890. External resolver can return nil, in which case we do the default }
  891. if URIToFilename(AbsSysID, Filename) then
  892. begin
  893. fd := FileOpen(Filename, fmOpenRead + fmShareDenyWrite);
  894. if fd <> THandle(-1) then
  895. begin
  896. Stream := THandleOwnerStream.Create(fd);
  897. Source := TXMLStreamInputSource.Create(Stream, True);
  898. Source.SystemID := AbsSysID; // <- Revisit: Really need absolute sysID?
  899. end;
  900. end;
  901. Result := Assigned(Source);
  902. end;
  903. procedure TXMLReader.Initialize(ASource: TXMLCharSource);
  904. begin
  905. ASource.FParent := FSource;
  906. FSource := ASource;
  907. FSource.FReader := Self;
  908. FSource.FStartNesting := FNesting;
  909. FSource.Initialize;
  910. end;
  911. procedure TXMLReader.FatalError(Expected: WideChar);
  912. begin
  913. // FIX: don't output what is found - anything may be found, including exploits...
  914. FatalError('Expected "%1s"', [string(Expected)]);
  915. end;
  916. procedure TXMLReader.FatalError(const descr: String; LineOffs: Integer);
  917. begin
  918. DoError(esFatal, descr, LineOffs);
  919. end;
  920. procedure TXMLReader.FatalError(const descr: string; const args: array of const; LineOffs: Integer);
  921. begin
  922. DoError(esFatal, Format(descr, args), LineOffs);
  923. end;
  924. procedure TXMLReader.ValidationError(const Msg: string; const Args: array of const; LineOffs: Integer);
  925. begin
  926. if FValidate then
  927. DoError(esError, Format(Msg, Args), LineOffs);
  928. end;
  929. procedure TXMLReader.ValidationErrorWithName(const Msg: string; LineOffs: Integer);
  930. var
  931. ws: WideString;
  932. begin
  933. SetString(ws, FName.Buffer, FName.Length);
  934. ValidationError(Msg, [ws], LineOffs);
  935. end;
  936. procedure TXMLReader.DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer);
  937. var
  938. Loc: TLocation;
  939. begin
  940. StoreLocation(Loc);
  941. if LineOffs >= 0 then
  942. begin
  943. Dec(Loc.LinePos, LineOffs);
  944. DoErrorPos(Severity, descr, Loc);
  945. end
  946. else
  947. DoErrorPos(Severity, descr, FTokenStart);
  948. end;
  949. procedure TXMLReader.DoErrorPos(Severity: TErrorSeverity; const descr: string;
  950. const args: array of const; const ErrPos: TLocation);
  951. begin
  952. DoErrorPos(Severity, Format(descr, args), ErrPos);
  953. end;
  954. procedure TXMLReader.DoErrorPos(Severity: TErrorSeverity; const descr: string; const ErrPos: TLocation);
  955. var
  956. E: EXMLReadError;
  957. sysid: WideString;
  958. begin
  959. if Assigned(FSource) then
  960. begin
  961. sysid := FSource.FSystemID;
  962. if (sysid = '') and Assigned(FSource.FEntity) then
  963. sysid := FSource.FEntity.FURI;
  964. E := EXMLReadError.CreateFmt('In ''%s'' (line %d pos %d): %s', [sysid, ErrPos.Line, ErrPos.LinePos, descr]);
  965. end
  966. else
  967. E := EXMLReadError.Create(descr);
  968. E.FSeverity := Severity;
  969. E.FErrorMessage := descr;
  970. E.FLine := ErrPos.Line;
  971. E.FLinePos := ErrPos.LinePos;
  972. CallErrorHandler(E);
  973. // No 'finally'! If user handler raises exception, control should not get here
  974. // and the exception will be freed in CallErrorHandler (below)
  975. E.Free;
  976. end;
  977. procedure TXMLReader.CheckMaxChars(ToAdd: Cardinal);
  978. var
  979. src: TXMLCharSource;
  980. total: Cardinal;
  981. begin
  982. Inc(FSource.FCharCount, ToAdd);
  983. if FMaxChars = 0 then
  984. Exit;
  985. src := FSource;
  986. total := 0;
  987. repeat
  988. Inc(total, src.FCharCount);
  989. if total > FMaxChars then
  990. FatalError('Exceeded character count limit');
  991. src := src.FParent;
  992. until src = nil;
  993. end;
  994. procedure TXMLReader.CallErrorHandler(E: EXMLReadError);
  995. begin
  996. try
  997. if Assigned(FCtrl) and Assigned(FCtrl.FOnError) then
  998. FCtrl.FOnError(E);
  999. if E.Severity = esFatal then
  1000. raise E;
  1001. except
  1002. if ExceptObject <> E then
  1003. E.Free;
  1004. raise;
  1005. end;
  1006. end;
  1007. function TXMLReader.SkipWhitespace(PercentAloneIsOk: Boolean): Boolean;
  1008. begin
  1009. Result := False;
  1010. repeat
  1011. Result := SkipS or Result;
  1012. if FSource.FBuf^ = #0 then
  1013. begin
  1014. Result := True; // report whitespace upon exiting the PE
  1015. if not ContextPop then
  1016. Break;
  1017. end
  1018. else if FSource.FBuf^ = '%' then
  1019. begin
  1020. if (FState <> rsDTD) or ((FSource.Kind = skInternalSubset) and FInsideDecl) then
  1021. Break;
  1022. // This is the only case where look-ahead is needed
  1023. if FSource.FBuf > FSource.FBufEnd-2 then
  1024. FSource.Reload;
  1025. if (not PercentAloneIsOk) or (Byte(FSource.FBuf[1]) in NamingBitmap[FNamePages^[$100+hi(Word(FSource.FBuf[1]))]]) or
  1026. (FXML11 and (FSource.FBuf[1] >= #$D800) and (FSource.FBuf[1] <= #$DB7F)) then
  1027. begin
  1028. Inc(FSource.FBuf); // skip '%'
  1029. CheckName;
  1030. ExpectChar(';');
  1031. StartPE;
  1032. Result := True; // report whitespace upon entering the PE
  1033. end
  1034. else Break;
  1035. end
  1036. else
  1037. Break;
  1038. until False;
  1039. end;
  1040. procedure TXMLReader.ExpectWhitespace;
  1041. begin
  1042. if not SkipWhitespace then
  1043. FatalError('Expected whitespace');
  1044. end;
  1045. function TXMLReader.SkipS(Required: Boolean): Boolean;
  1046. var
  1047. p: PWideChar;
  1048. begin
  1049. Result := False;
  1050. repeat
  1051. p := FSource.FBuf;
  1052. repeat
  1053. if (p^ = #10) or (p^ = #13) or (FXML11 and ((p^ = #$85) or (p^ = #$2028))) then
  1054. begin
  1055. FSource.FBuf := p;
  1056. FSource.NewLine;
  1057. p := FSource.FBuf;
  1058. end
  1059. else if (p^ <> #32) and (p^ <> #9) then
  1060. Break;
  1061. Inc(p);
  1062. Result := True;
  1063. until False;
  1064. FSource.FBuf := p;
  1065. until (FSource.FBuf < FSource.FBufEnd) or (not FSource.Reload);
  1066. if (not Result) and Required then
  1067. FatalError('Expected whitespace');
  1068. end;
  1069. procedure TXMLReader.ExpectString(const s: String);
  1070. var
  1071. I: Integer;
  1072. begin
  1073. for I := 1 to Length(s) do
  1074. begin
  1075. if FSource.FBuf^ <> WideChar(ord(s[i])) then
  1076. FatalError('Expected "%s"', [s], i-1);
  1077. FSource.NextChar;
  1078. end;
  1079. end;
  1080. function TXMLReader.CheckForChar(c: WideChar): Boolean;
  1081. begin
  1082. Result := (FSource.FBuf^ = c);
  1083. if Result then
  1084. begin
  1085. Inc(FSource.FBuf);
  1086. if FSource.FBuf >= FSource.FBufEnd then
  1087. FSource.Reload;
  1088. end;
  1089. end;
  1090. procedure TXMLReader.SkipQuote(out Delim: WideChar; required: Boolean);
  1091. begin
  1092. Delim := #0;
  1093. if (FSource.FBuf^ = '''') or (FSource.FBuf^ = '"') then
  1094. begin
  1095. Delim := FSource.FBuf^;
  1096. FSource.NextChar; // skip quote
  1097. StoreLocation(FTokenStart);
  1098. end
  1099. else if required then
  1100. FatalError('Expected single or double quote');
  1101. end;
  1102. const
  1103. PrefixDefault: array[0..4] of WideChar = ('x','m','l','n','s');
  1104. constructor TXMLReader.Create;
  1105. begin
  1106. inherited Create;
  1107. BufAllocate(FName, 128);
  1108. BufAllocate(FValue, 512);
  1109. FForwardRefs := TFPList.Create;
  1110. FAttrChunks := TFPList.Create;
  1111. // Set char rules to XML 1.0
  1112. FNamePages := @NamePages;
  1113. SetLength(FNodeStack, 16);
  1114. SetLength(FValidators, 16);
  1115. end;
  1116. constructor TXMLReader.Create(AParser: TDOMParser);
  1117. begin
  1118. Create;
  1119. FCtrl := AParser;
  1120. if FCtrl = nil then
  1121. Exit;
  1122. FValidate := FCtrl.Options.Validate;
  1123. FPreserveWhitespace := FCtrl.Options.PreserveWhitespace;
  1124. FExpandEntities := FCtrl.Options.ExpandEntities;
  1125. FCDSectionsAsText := FCtrl.Options.CDSectionsAsText;
  1126. FIgnoreComments := FCtrl.Options.IgnoreComments;
  1127. FResolveExternals := FCtrl.Options.ResolveExternals;
  1128. FNamespaces := FCtrl.Options.Namespaces;
  1129. FDisallowDoctype := FCtrl.Options.DisallowDoctype;
  1130. FCanonical := FCtrl.Options.CanonicalForm;
  1131. FMaxChars := FCtrl.Options.MaxChars;
  1132. end;
  1133. destructor TXMLReader.Destroy;
  1134. var
  1135. i: Integer;
  1136. begin
  1137. for i := FAttrChunks.Count-1 downto 0 do
  1138. Dispose(PNodeData(FAttrChunks.List^[i]));
  1139. if Assigned(FEntityValue.Buffer) then
  1140. FreeMem(FEntityValue.Buffer);
  1141. FreeMem(FName.Buffer);
  1142. FreeMem(FValue.Buffer);
  1143. if Assigned(FSource) then
  1144. while ContextPop(True) do; // clean input stack
  1145. FSource.Free;
  1146. FPEMap.Free;
  1147. ClearForwardRefs;
  1148. FNsAttHash.Free;
  1149. FNSHelper.Free;
  1150. FDocType.Release;
  1151. FIDMap.Free;
  1152. FForwardRefs.Free;
  1153. FAttrChunks.Free;
  1154. inherited Destroy;
  1155. end;
  1156. procedure TXMLReader.XML11_BuildTables;
  1157. begin
  1158. FNamePages := Xml11NamePages;
  1159. FXML11 := True;
  1160. FSource.FXml11Rules := True;
  1161. end;
  1162. { Must be executed after doc has been set.
  1163. After introducing own NameTable, merge this into constructor }
  1164. procedure TXMLReader.NSPrepare;
  1165. begin
  1166. if FNamespaces then
  1167. begin
  1168. FNSHelper := TNSSupport.Create;
  1169. FNsAttHash := TDblHashArray.Create;
  1170. FStdPrefix_xml := FNSHelper.GetPrefix(@PrefixDefault, 3);
  1171. FStdPrefix_xmlns := FNSHelper.GetPrefix(@PrefixDefault, 5);
  1172. FStdUri_xmlns := FNameTable.FindOrAdd(PWideChar(stduri_xmlns), Length(stduri_xmlns));
  1173. FStdUri_xml := FNameTable.FindOrAdd(PWideChar(stduri_xml), Length(stduri_xml));
  1174. end;
  1175. end;
  1176. procedure TXMLReader.ProcessXML(ASource: TXMLCharSource);
  1177. begin
  1178. doc := TXMLDocument.Create;
  1179. doc.documentURI := ASource.SystemID; // TODO: to be changed to URI or BaseURI
  1180. FNameTable := doc.Names;
  1181. FState := rsProlog;
  1182. FNesting := 0;
  1183. FValidatorNesting := 0;
  1184. FCurrNode := @FNodeStack[0];
  1185. FFragmentMode := False;
  1186. NSPrepare;
  1187. Initialize(ASource);
  1188. if FSource.FXMLVersion <> xmlVersionUnknown then
  1189. TDOMTopNodeEx(TDOMNode(doc)).FXMLVersion := FSource.FXMLVersion;
  1190. TDOMTopNodeEx(TDOMNode(doc)).FXMLEncoding := FSource.FXMLEncoding;
  1191. FNext := xtText;
  1192. ParseContent(doc);
  1193. if FState < rsRoot then
  1194. FatalError('Root element is missing');
  1195. if FValidate then
  1196. ValidateIdRefs;
  1197. doc.IDs := FIDMap;
  1198. FIDMap := nil;
  1199. end;
  1200. procedure TXMLReader.ProcessFragment(ASource: TXMLCharSource; AOwner: TDOMNode);
  1201. var
  1202. DoctypeNode: TDOMDocumentTypeEx;
  1203. begin
  1204. doc := AOwner.OwnerDocument;
  1205. FNameTable := doc.Names;
  1206. FState := rsRoot;
  1207. FNesting := 0;
  1208. FValidatorNesting := 0;
  1209. FCurrNode := @FNodeStack[0];
  1210. FFragmentMode := True;
  1211. FXML11 := doc.XMLVersion = '1.1';
  1212. NSPrepare;
  1213. Initialize(ASource);
  1214. { Get doctype from the owner's document, but only if it is not already assigned
  1215. (It is set directly when parsing children of an Entity, see LoadEntity procedure) }
  1216. if FDocType = nil then
  1217. begin
  1218. DoctypeNode := TDOMDocumentTypeEx(doc.DocType);
  1219. if Assigned(DoctypeNode) then
  1220. FDocType := DocTypeNode.FModel.Reference;
  1221. end;
  1222. if AOwner is TDOMEntity then
  1223. begin
  1224. TDOMTopNodeEx(AOwner).FXMLVersion := FSource.FXMLVersion;
  1225. TDOMTopNodeEx(AOwner).FXMLEncoding := FSource.FXMLEncoding;
  1226. end;
  1227. FNext := xtText;
  1228. ParseContent(aOwner as TDOMNode_WithChildren);
  1229. end;
  1230. function TXMLReader.CheckName(aFlags: TCheckNameFlags): Boolean;
  1231. var
  1232. p: PWideChar;
  1233. NameStartFlag: Boolean;
  1234. begin
  1235. p := FSource.FBuf;
  1236. FName.Length := 0;
  1237. FColonPos := -1;
  1238. NameStartFlag := not (cnToken in aFlags);
  1239. repeat
  1240. if NameStartFlag then
  1241. begin
  1242. if (Byte(p^) in NamingBitmap[FNamePages^[hi(Word(p^))]]) or
  1243. ((p^ = ':') and (not FNamespaces)) then
  1244. Inc(p)
  1245. else if FXML11 and ((p^ >= #$D800) and (p^ <= #$DB7F) and
  1246. (p[1] >= #$DC00) and (p[1] <= #$DFFF)) then
  1247. Inc(p, 2)
  1248. else
  1249. begin
  1250. // here we come either when first char of name is bad (it may be a colon),
  1251. // or when a colon is not followed by a valid NameStartChar
  1252. FSource.FBuf := p;
  1253. Result := False;
  1254. Break;
  1255. end;
  1256. NameStartFlag := False;
  1257. end;
  1258. if FXML11 then
  1259. repeat
  1260. if Byte(p^) in NamingBitmap[FNamePages^[$100+hi(Word(p^))]] then
  1261. Inc(p)
  1262. else if ((p^ >= #$D800) and (p^ <= #$DB7F) and
  1263. (p[1] >= #$DC00) and (p[1] <= #$DFFF)) then
  1264. Inc(p,2)
  1265. else
  1266. Break;
  1267. until False
  1268. else
  1269. while Byte(p^) in NamingBitmap[FNamePages^[$100+hi(Word(p^))]] do
  1270. Inc(p);
  1271. if p^ = ':' then
  1272. begin
  1273. if (cnToken in aFlags) or not FNamespaces then // colon has no specific meaning
  1274. begin
  1275. Inc(p);
  1276. if p^ <> #0 then Continue;
  1277. end
  1278. else if FColonPos = -1 then // this is the first colon, remember it
  1279. begin
  1280. FColonPos := p-FSource.FBuf+FName.Length;
  1281. NameStartFlag := True;
  1282. Inc(p);
  1283. if p^ <> #0 then Continue;
  1284. end;
  1285. end;
  1286. BufAppendChunk(FName, FSource.FBuf, p);
  1287. Result := (FName.Length > 0);
  1288. FSource.FBuf := p;
  1289. if (p^ <> #0) or not FSource.Reload then
  1290. Break;
  1291. p := FSource.FBuf;
  1292. until False;
  1293. if not (Result or (cnOptional in aFlags)) then
  1294. RaiseNameNotFound;
  1295. end;
  1296. procedure TXMLReader.CheckNCName;
  1297. begin
  1298. if FNamespaces and (FColonPos <> -1) then
  1299. FatalError('Names of entities, notations and processing instructions may not contain colons', FName.Length);
  1300. end;
  1301. procedure TXMLReader.RaiseNameNotFound;
  1302. begin
  1303. if FColonPos <> -1 then
  1304. FatalError('Bad QName syntax, local part is missing')
  1305. else
  1306. // Coming at no cost, this allows more user-friendly error messages
  1307. with FSource do
  1308. if (FBuf^ = #32) or (FBuf^ = #10) or (FBuf^ = #9) or (FBuf^ = #13) then
  1309. FatalError('Whitespace is not allowed here')
  1310. else
  1311. FatalError('Name starts with invalid character');
  1312. end;
  1313. function TXMLReader.ExpectName: WideString;
  1314. begin
  1315. CheckName;
  1316. SetString(Result, FName.Buffer, FName.Length);
  1317. end;
  1318. function TXMLReader.ResolvePredefined: Boolean;
  1319. var
  1320. wc: WideChar;
  1321. begin
  1322. Result := False;
  1323. with FName do
  1324. begin
  1325. if (Length = 2) and (Buffer[1] = 't') then
  1326. begin
  1327. if Buffer[0] = 'l' then
  1328. wc := '<'
  1329. else if Buffer[0] = 'g' then
  1330. wc := '>'
  1331. else Exit;
  1332. end
  1333. else if Buffer[0] = 'a' then
  1334. begin
  1335. if (Length = 3) and (Buffer[1] = 'm') and (Buffer[2] = 'p') then
  1336. wc := '&'
  1337. else if (Length = 4) and (Buffer[1] = 'p') and (Buffer[2] = 'o') and
  1338. (Buffer[3] = 's') then
  1339. wc := ''''
  1340. else Exit;
  1341. end
  1342. else if (Length = 4) and (Buffer[0] = 'q') and (Buffer[1] = 'u') and
  1343. (Buffer[2] = 'o') and (Buffer[3] ='t') then
  1344. wc := '"'
  1345. else
  1346. Exit;
  1347. end; // with
  1348. BufAppend(FValue, wc);
  1349. Result := True;
  1350. end;
  1351. function TXMLReader.ParseRef(var ToFill: TWideCharBuf): Boolean; // [67]
  1352. var
  1353. Code: Integer;
  1354. begin
  1355. FSource.NextChar; // skip '&'
  1356. Result := CheckForChar('#');
  1357. if Result then
  1358. begin
  1359. Code := 0;
  1360. if CheckForChar('x') then
  1361. repeat
  1362. case FSource.FBuf^ of
  1363. '0'..'9': Code := Code * 16 + Ord(FSource.FBuf^) - Ord('0');
  1364. 'a'..'f': Code := Code * 16 + Ord(FSource.FBuf^) - (Ord('a') - 10);
  1365. 'A'..'F': Code := Code * 16 + Ord(FSource.FBuf^) - (Ord('A') - 10);
  1366. else
  1367. Break;
  1368. end;
  1369. FSource.NextChar;
  1370. until Code > $10FFFF
  1371. else
  1372. repeat
  1373. case FSource.FBuf^ of
  1374. '0'..'9': Code := Code * 10 + Ord(FSource.FBuf^) - Ord('0');
  1375. else
  1376. Break;
  1377. end;
  1378. FSource.NextChar;
  1379. until Code > $10FFFF;
  1380. case Code of
  1381. $01..$08, $0B..$0C, $0E..$1F:
  1382. if FXML11 then
  1383. BufAppend(ToFill, WideChar(Code))
  1384. else
  1385. FatalError('Invalid character reference');
  1386. $09, $0A, $0D, $20..$D7FF, $E000..$FFFD:
  1387. BufAppend(ToFill, WideChar(Code));
  1388. $10000..$10FFFF:
  1389. begin
  1390. BufAppend(ToFill, WideChar($D7C0 + (Code shr 10)));
  1391. BufAppend(ToFill, WideChar($DC00 xor (Code and $3FF)));
  1392. end;
  1393. else
  1394. FatalError('Invalid character reference');
  1395. end;
  1396. end
  1397. else CheckName;
  1398. ExpectChar(';');
  1399. end;
  1400. const
  1401. AttrDelims: TSetOfChar = [#0, '<', '&', '''', '"', #9, #10, #13];
  1402. GT_Delim: TSetOfChar = [#0, '>'];
  1403. { Parse attribute literal, producing plain string value in AttrData.FValueStr.
  1404. If entity references are encountered and FExpandEntities=False, also builds
  1405. a node chain starting from AttrData.FNext. Node chain is built only for the
  1406. first level. If NonCDATA=True, additionally normalizes whitespace in string value. }
  1407. procedure TXMLReader.ExpectAttValue(AttrData: PNodeData; NonCDATA: Boolean);
  1408. var
  1409. wc: WideChar;
  1410. Delim: WideChar;
  1411. ent: TEntityDecl;
  1412. start: TObject;
  1413. curr: PNodeData;
  1414. StartPos: Integer;
  1415. begin
  1416. SkipQuote(Delim);
  1417. curr := AttrData;
  1418. FValue.Length := 0;
  1419. StartPos := 0;
  1420. start := FSource.FEntity;
  1421. repeat
  1422. wc := FSource.SkipUntil(FValue, AttrDelims);
  1423. if wc = '<' then
  1424. FatalError('Character ''<'' is not allowed in attribute value')
  1425. else if wc = '&' then
  1426. begin
  1427. if ParseRef(FValue) or ResolvePredefined then
  1428. Continue;
  1429. ent := EntityCheck(True);
  1430. if ((ent = nil) or (not FExpandEntities)) and (FSource.FEntity = start) then
  1431. begin
  1432. if FValue.Length > StartPos then
  1433. begin
  1434. curr := AllocAttributeValueChunk(curr);
  1435. curr^.FNodeType := ntText;
  1436. // without PWideChar typecast and in {$T-}, FPC treats '@' result as PAnsiChar...
  1437. SetString(curr^.FValueStr, PWideChar(@FValue.Buffer[StartPos]), FValue.Length-StartPos);
  1438. end;
  1439. curr := AllocAttributeValueChunk(curr);
  1440. curr^.FNodeType := ntEntityReference;
  1441. // TODO: this probably should be placed to 'name'
  1442. if ent = nil then
  1443. SetString(curr^.FValueStr, FName.Buffer, FName.Length)
  1444. else
  1445. curr^.FValueStr := ent.FName;
  1446. end;
  1447. StartPos := FValue.Length;
  1448. if Assigned(ent) then
  1449. ContextPush(ent);
  1450. end
  1451. else if wc <> #0 then
  1452. begin
  1453. FSource.NextChar;
  1454. if (wc = Delim) and (FSource.FEntity = start) then
  1455. Break;
  1456. if (wc = #10) or (wc = #9) or (wc = #13) then
  1457. wc := #32;
  1458. BufAppend(FValue, wc);
  1459. end
  1460. else
  1461. begin
  1462. if (FSource.FEntity = start) or not ContextPop then // #0
  1463. FatalError('Literal has no closing quote', -1);
  1464. StartPos := FValue.Length;
  1465. end;
  1466. until False;
  1467. if Assigned(attrData^.FNext) then
  1468. begin
  1469. FAttrCleanupFlag := True;
  1470. if FValue.Length > StartPos then
  1471. begin
  1472. curr := AllocAttributeValueChunk(curr);
  1473. curr^.FNodeType := ntText;
  1474. SetString(curr^.FValueStr, PWideChar(@FValue.Buffer[StartPos]), FValue.Length-StartPos);
  1475. end;
  1476. end;
  1477. if nonCDATA then
  1478. BufNormalize(FValue, attrData^.FDenormalized)
  1479. else
  1480. attrData^.FDenormalized := False;
  1481. SetString(attrData^.FValueStr, FValue.Buffer, FValue.Length);
  1482. end;
  1483. const
  1484. PrefixChar: array[Boolean] of string = ('', '%');
  1485. procedure TXMLReader.EntityToSource(AEntity: TEntityDecl; out Src: TXMLCharSource);
  1486. begin
  1487. if AEntity.FOnStack then
  1488. FatalError('Entity ''%s%s'' recursively references itself', [PrefixChar[AEntity.FIsPE], AEntity.FName]);
  1489. if (AEntity.FSystemID <> '') and not AEntity.FPrefetched then
  1490. begin
  1491. if not ResolveResource(AEntity.FSystemID, AEntity.FPublicID, AEntity.FURI, Src) then
  1492. begin
  1493. // TODO: a detailed message like SysErrorMessage(GetLastError) would be great here
  1494. ValidationError('Unable to resolve external entity ''%s''', [AEntity.FName]);
  1495. Src := nil;
  1496. Exit;
  1497. end;
  1498. end
  1499. else
  1500. begin
  1501. Src := TXMLCharSource.Create(AEntity.FReplacementText);
  1502. Src.FLineNo := AEntity.FStartLocation.Line;
  1503. Src.LFPos := Src.FBuf - AEntity.FStartLocation.LinePos;
  1504. // needed in case of prefetched external PE
  1505. if AEntity.FSystemID <> '' then
  1506. Src.SystemID := AEntity.FURI;
  1507. end;
  1508. AEntity.FOnStack := True;
  1509. Src.FEntity := AEntity;
  1510. end;
  1511. function TXMLReader.ContextPush(AEntity: TEntityDecl): Boolean;
  1512. var
  1513. Src: TXMLCharSource;
  1514. begin
  1515. EntityToSource(AEntity, Src);
  1516. Result := Assigned(Src);
  1517. if Result then
  1518. Initialize(Src);
  1519. end;
  1520. function TXMLReader.ContextPop(Forced: Boolean): Boolean;
  1521. var
  1522. Src: TXMLCharSource;
  1523. Error: Boolean;
  1524. begin
  1525. Result := Assigned(FSource.FParent) and (Forced or (FSource.Kind = skNone));
  1526. if Result then
  1527. begin
  1528. Src := FSource.FParent;
  1529. Error := False;
  1530. if Assigned(FSource.FEntity) then
  1531. begin
  1532. FSource.FEntity.FOnStack := False;
  1533. FSource.FEntity.FCharCount := FSource.FCharCount;
  1534. // [28a] PE that was started between MarkupDecls may not end inside MarkupDecl
  1535. Error := FSource.FEntity.FBetweenDecls and FInsideDecl;
  1536. end;
  1537. FSource.Free;
  1538. FSource := Src;
  1539. // correct position of this error is after PE reference
  1540. if Error then
  1541. BadPENesting(esFatal);
  1542. end;
  1543. end;
  1544. function TXMLReader.EntityCheck(NoExternals: Boolean): TEntityDecl;
  1545. var
  1546. RefName: WideString;
  1547. cnt: Integer;
  1548. begin
  1549. Result := nil;
  1550. SetString(RefName, FName.Buffer, FName.Length);
  1551. cnt := FName.Length+2;
  1552. if Assigned(FDocType) then
  1553. Result := FDocType.Entities.Get(FName.Buffer, FName.Length) as TEntityDecl;
  1554. if Result = nil then
  1555. begin
  1556. if FStandalone or (FDocType = nil) or not (FHavePERefs or (FDocType.FSystemID <> '')) then
  1557. FatalError('Reference to undefined entity ''%s''', [RefName], cnt)
  1558. else
  1559. ValidationError('Undefined entity ''%s'' referenced', [RefName], cnt);
  1560. Exit;
  1561. end;
  1562. if FStandalone and Result.ExternallyDeclared then
  1563. FatalError('Standalone constraint violation', cnt);
  1564. if Result.FNotationName <> '' then
  1565. FatalError('Reference to unparsed entity ''%s''', [RefName], cnt);
  1566. if NoExternals and (Result.FSystemID <> '') then
  1567. FatalError('External entity reference is not allowed in attribute value', cnt);
  1568. if not Result.FResolved then
  1569. LoadEntity(Result);
  1570. // at this point we know the charcount of the entity being included
  1571. if Result.FCharCount >= cnt then
  1572. CheckMaxChars(Result.FCharCount - cnt);
  1573. end;
  1574. procedure TXMLReader.StartPE;
  1575. var
  1576. PEnt: TEntityDecl;
  1577. begin
  1578. PEnt := nil;
  1579. if Assigned(FPEMap) then
  1580. PEnt := FPEMap.Get(FName.Buffer, FName.Length) as TEntityDecl;
  1581. if PEnt = nil then
  1582. begin
  1583. ValidationErrorWithName('Undefined parameter entity ''%s'' referenced', FName.Length+2);
  1584. // cease processing declarations, unless document is standalone.
  1585. FDTDProcessed := FStandalone;
  1586. Exit;
  1587. end;
  1588. { cache an external PE so it's only fetched once }
  1589. if (PEnt.FSystemID <> '') and (not PEnt.FPrefetched) and (not PrefetchEntity(PEnt)) then
  1590. begin
  1591. FDTDProcessed := FStandalone;
  1592. Exit;
  1593. end;
  1594. CheckMaxChars(PEnt.FCharCount);
  1595. PEnt.FBetweenDecls := not FInsideDecl;
  1596. ContextPush(PEnt);
  1597. FHavePERefs := True;
  1598. end;
  1599. function TXMLReader.PrefetchEntity(AEntity: TEntityDecl): Boolean;
  1600. begin
  1601. Result := ContextPush(AEntity);
  1602. if Result then
  1603. try
  1604. FValue.Length := 0;
  1605. FSource.SkipUntil(FValue, [#0]);
  1606. SetString(AEntity.FReplacementText, FValue.Buffer, FValue.Length);
  1607. AEntity.FCharCount := FValue.Length;
  1608. AEntity.FStartLocation.Line := 1;
  1609. AEntity.FStartLocation.LinePos := 1;
  1610. AEntity.FURI := FSource.SystemID; // replace base URI with absolute one
  1611. finally
  1612. ContextPop;
  1613. AEntity.FPrefetched := True;
  1614. FValue.Length := 0;
  1615. end;
  1616. end;
  1617. const
  1618. LiteralDelims: array[TLiteralType] of TSetOfChar = (
  1619. [#0, '''', '"'], // ltPlain
  1620. [#0, '''', '"', #13, #10], // ltPubid
  1621. [#0, '%', '&', '''', '"'] // ltEntity
  1622. );
  1623. function TXMLReader.ParseLiteral(var ToFill: TWideCharBuf; aType: TLiteralType;
  1624. Required: Boolean): Boolean;
  1625. var
  1626. start: TObject;
  1627. wc, Delim: WideChar;
  1628. dummy: Boolean;
  1629. begin
  1630. SkipQuote(Delim, Required);
  1631. Result := (Delim <> #0);
  1632. if not Result then
  1633. Exit;
  1634. ToFill.Length := 0;
  1635. start := FSource.FEntity;
  1636. repeat
  1637. wc := FSource.SkipUntil(ToFill, LiteralDelims[aType]);
  1638. if wc = '%' then { ltEntity only }
  1639. begin
  1640. FSource.NextChar;
  1641. CheckName;
  1642. ExpectChar(';');
  1643. if FSource.Kind = skInternalSubset then
  1644. FatalError('PE reference not allowed here in internal subset', FName.Length+2);
  1645. StartPE;
  1646. end
  1647. else if wc = '&' then { ltEntity }
  1648. begin
  1649. if ParseRef(ToFill) then // charRefs always expanded
  1650. Continue;
  1651. BufAppend(ToFill, '&');
  1652. BufAppendChunk(ToFill, FName.Buffer, FName.Buffer + FName.Length);
  1653. BufAppend(ToFill, ';');
  1654. end
  1655. else if wc <> #0 then
  1656. begin
  1657. FSource.NextChar;
  1658. if (wc = #10) or (wc = #13) then
  1659. wc := #32
  1660. // terminating delimiter must be in the same context as the starting one
  1661. else if (wc = Delim) and (start = FSource.FEntity) then
  1662. Break;
  1663. BufAppend(ToFill, wc);
  1664. end
  1665. else if (FSource.FEntity = start) or not ContextPop then // #0
  1666. FatalError('Literal has no closing quote', -1);
  1667. until False;
  1668. if aType = ltPubid then
  1669. BufNormalize(ToFill, dummy);
  1670. end;
  1671. function TXMLReader.SkipUntilSeq(const Delim: TSetOfChar; c1: WideChar; c2: WideChar = #0): Boolean;
  1672. var
  1673. wc: WideChar;
  1674. begin
  1675. Result := False;
  1676. StoreLocation(FTokenStart);
  1677. repeat
  1678. wc := FSource.SkipUntil(FValue, Delim);
  1679. if wc <> #0 then
  1680. begin
  1681. FSource.NextChar;
  1682. if (FValue.Length > ord(c2 <> #0)) then
  1683. begin
  1684. if (FValue.Buffer[FValue.Length-1] = c1) and
  1685. ((c2 = #0) or ((c2 <> #0) and (FValue.Buffer[FValue.Length-2] = c2))) then
  1686. begin
  1687. Dec(FValue.Length, ord(c2 <> #0) + 1);
  1688. Result := True;
  1689. Exit;
  1690. end;
  1691. end;
  1692. BufAppend(FValue, wc);
  1693. end;
  1694. until wc = #0;
  1695. end;
  1696. procedure TXMLReader.ParseComment(discard: Boolean); // [15]
  1697. var
  1698. SaveLength: Integer;
  1699. begin
  1700. ExpectString('--');
  1701. SaveLength := FValue.Length;
  1702. if not SkipUntilSeq([#0, '-'], '-') then
  1703. FatalError('Unterminated comment', -1);
  1704. ExpectChar('>');
  1705. if not discard then
  1706. begin
  1707. FCurrNode := @FNodeStack[FNesting+1];
  1708. FCurrNode^.FNodeType := ntComment;
  1709. FCurrNode^.FQName := nil;
  1710. FCurrNode^.FValueStart := @FValue.Buffer[SaveLength];
  1711. FCurrNode^.FValueLength := FValue.Length-SaveLength;
  1712. end;
  1713. FValue.Length := SaveLength;
  1714. end;
  1715. procedure TXMLReader.ParsePI; // [16]
  1716. begin
  1717. FSource.NextChar; // skip '?'
  1718. CheckName;
  1719. CheckNCName;
  1720. with FName do
  1721. if (Length = 3) and
  1722. ((Buffer[0] = 'X') or (Buffer[0] = 'x')) and
  1723. ((Buffer[1] = 'M') or (Buffer[1] = 'm')) and
  1724. ((Buffer[2] = 'L') or (Buffer[2] = 'l')) then
  1725. begin
  1726. if not BufEquals(FName, 'xml') then
  1727. FatalError('''xml'' is a reserved word; it must be lowercase', FName.Length)
  1728. else
  1729. FatalError('XML declaration is not allowed here', FName.Length);
  1730. end;
  1731. if FSource.FBuf^ <> '?' then
  1732. SkipS(True);
  1733. FValue.Length := 0;
  1734. if not SkipUntilSeq(GT_Delim, '?') then
  1735. FatalError('Unterminated processing instruction', -1);
  1736. SetNodeInfoWithValue(ntProcessingInstruction,
  1737. FNameTable.FindOrAdd(FName.Buffer, FName.Length));
  1738. end;
  1739. function TXMLReader.CreatePINode: TDOMNode;
  1740. var
  1741. NameStr, ValueStr: WideString;
  1742. begin
  1743. SetString(NameStr, FName.Buffer, FName.Length);
  1744. SetString(ValueStr, FValue.Buffer, FValue.Length);
  1745. result := Doc.CreateProcessingInstruction(NameStr, ValueStr);
  1746. end;
  1747. const
  1748. vers: array[Boolean] of TXMLVersion = (xmlVersion10, xmlVersion11);
  1749. procedure TXMLReader.ParseXmlOrTextDecl(TextDecl: Boolean);
  1750. var
  1751. Delim: WideChar;
  1752. buf: array[0..31] of WideChar;
  1753. I: Integer;
  1754. begin
  1755. SkipS(True);
  1756. // [24] VersionInfo: optional in TextDecl, required in XmlDecl
  1757. if (not TextDecl) or (FSource.FBuf^ = 'v') then
  1758. begin
  1759. ExpectString('version');
  1760. ExpectEq;
  1761. SkipQuote(Delim);
  1762. I := 0;
  1763. while (I < 3) and (FSource.FBuf^ <> Delim) do
  1764. begin
  1765. buf[I] := FSource.FBuf^;
  1766. Inc(I);
  1767. FSource.NextChar;
  1768. end;
  1769. if (I <> 3) or (buf[0] <> '1') or (buf[1] <> '.') or
  1770. ((buf[2] <> '0') and (buf[2] <> '1')) then
  1771. FatalError('Illegal version number', -1);
  1772. ExpectChar(Delim);
  1773. FSource.FXMLVersion := vers[buf[2] = '1'];
  1774. if TextDecl and (FSource.FXMLVersion = xmlVersion11) and not FXML11 then
  1775. FatalError('XML 1.0 document cannot invoke XML 1.1 entities', -1);
  1776. if TextDecl or (FSource.FBuf^ <> '?') then
  1777. SkipS(True);
  1778. end;
  1779. // [80] EncodingDecl: required in TextDecl, optional in XmlDecl
  1780. if TextDecl or (FSource.FBuf^ = 'e') then
  1781. begin
  1782. ExpectString('encoding');
  1783. ExpectEq;
  1784. SkipQuote(Delim);
  1785. I := 0;
  1786. while (I < 30) and (FSource.FBuf^ <> Delim) and (FSource.FBuf^ < #127) and
  1787. ((Char(ord(FSource.FBuf^)) in ['A'..'Z', 'a'..'z']) or
  1788. ((I > 0) and (Char(ord(FSource.FBuf^)) in ['0'..'9', '.', '-', '_']))) do
  1789. begin
  1790. buf[I] := FSource.FBuf^;
  1791. Inc(I);
  1792. FSource.NextChar;
  1793. end;
  1794. if not CheckForChar(Delim) then
  1795. FatalError('Illegal encoding name', i);
  1796. SetString(FSource.FXMLEncoding, buf, i);
  1797. if not FSource.SetEncoding(FSource.FXMLEncoding) then // <-- Wide2Ansi conversion here
  1798. FatalError('Encoding ''%s'' is not supported', [FSource.FXMLEncoding], i+1);
  1799. if FSource.FBuf^ <> '?' then
  1800. SkipS(not TextDecl);
  1801. end;
  1802. // [32] SDDecl: forbidden in TextDecl, optional in XmlDecl
  1803. if (not TextDecl) and (FSource.FBuf^ = 's') then
  1804. begin
  1805. ExpectString('standalone');
  1806. ExpectEq;
  1807. SkipQuote(Delim);
  1808. if FSource.Matches('yes') then
  1809. FStandalone := True
  1810. else if not FSource.Matches('no') then
  1811. FatalError('Only "yes" or "no" are permitted as values of "standalone"', -1);
  1812. ExpectChar(Delim);
  1813. SkipS;
  1814. end;
  1815. ExpectString('?>');
  1816. { Switch to 1.1 rules only after declaration is parsed completely. This is to
  1817. ensure that NEL and LSEP within declaration are rejected (rmt-056, rmt-057) }
  1818. if FSource.FXMLVersion = xmlVersion11 then
  1819. FXML11 := True;
  1820. end;
  1821. procedure TXMLReader.DTDReloadHook;
  1822. var
  1823. p: PWideChar;
  1824. begin
  1825. { FSource converts CR, NEL and LSEP linebreaks to LF, and CR-NEL sequences to CR-LF.
  1826. We must further remove the CR chars and have only LF's left. }
  1827. p := FDTDStartPos;
  1828. while p < FSource.FBuf do
  1829. begin
  1830. while (p < FSource.FBuf) and (p^ <> #13) do
  1831. Inc(p);
  1832. BufAppendChunk(FIntSubset, FDTDStartPos, p);
  1833. if p^ = #13 then
  1834. Inc(p);
  1835. FDTDStartPos := p;
  1836. end;
  1837. FDTDStartPos := TXMLDecodingSource(FSource).FBufStart;
  1838. end;
  1839. procedure TXMLReader.ParseDoctypeDecl; // [28]
  1840. var
  1841. Src: TXMLCharSource;
  1842. begin
  1843. if FState >= rsDTD then
  1844. FatalError('Markup declaration is not allowed here');
  1845. if FDisallowDoctype then
  1846. FatalError('Document type is prohibited by parser settings');
  1847. ExpectString('DOCTYPE');
  1848. SkipS(True);
  1849. FDocType := TDTDModel.Create(FNameTable);
  1850. FDTDProcessed := True; // assume success
  1851. FState := rsDTD;
  1852. FDocType.FName := ExpectName;
  1853. SkipS(True);
  1854. ParseExternalID(FDocType.FSystemID, FDocType.FPublicID, False);
  1855. SkipS;
  1856. if CheckForChar('[') then
  1857. begin
  1858. BufAllocate(FIntSubset, 256);
  1859. FSource.Kind := skInternalSubset;
  1860. try
  1861. FDTDStartPos := FSource.FBuf;
  1862. ParseMarkupDecl;
  1863. DTDReloadHook; // fetch last chunk
  1864. SetString(FDocType.FInternalSubset, FIntSubset.Buffer, FIntSubset.Length);
  1865. finally
  1866. FreeMem(FIntSubset.Buffer);
  1867. FSource.Kind := skNone;
  1868. end;
  1869. ExpectChar(']');
  1870. SkipS;
  1871. end;
  1872. ExpectChar('>');
  1873. if (FDocType.FSystemID <> '') then
  1874. begin
  1875. if ResolveResource(FDocType.FSystemID, FDocType.FPublicID, FSource.SystemID, Src) then
  1876. begin
  1877. Initialize(Src);
  1878. try
  1879. Src.Kind := skManualPop;
  1880. ParseMarkupDecl;
  1881. finally
  1882. ContextPop(True);
  1883. end;
  1884. end
  1885. else
  1886. begin
  1887. ValidationError('Unable to resolve external DTD subset', []);
  1888. FDTDProcessed := FStandalone;
  1889. end;
  1890. end;
  1891. FState := rsAfterDTD;
  1892. FCurrNode^.FNodeType := ntDocumentType;
  1893. end;
  1894. procedure TXMLReader.ExpectEq; // [25]
  1895. begin
  1896. if FSource.FBuf^ <> '=' then
  1897. SkipS;
  1898. if FSource.FBuf^ <> '=' then
  1899. FatalError('Expected "="');
  1900. FSource.NextChar;
  1901. SkipS;
  1902. end;
  1903. { DTD stuff }
  1904. procedure TXMLReader.BadPENesting(S: TErrorSeverity);
  1905. begin
  1906. if (S = esFatal) or FValidate then
  1907. DoError(S, 'Parameter entities must be properly nested');
  1908. end;
  1909. procedure TXMLReader.StandaloneError(LineOffs: Integer);
  1910. begin
  1911. ValidationError('Standalone constriant violation', [], LineOffs);
  1912. end;
  1913. function TXMLReader.ParseQuantity: TCPQuant;
  1914. begin
  1915. case FSource.FBuf^ of
  1916. '?': Result := cqZeroOrOnce;
  1917. '*': Result := cqZeroOrMore;
  1918. '+': Result := cqOnceOrMore;
  1919. else
  1920. Result := cqOnce;
  1921. Exit;
  1922. end;
  1923. FSource.NextChar;
  1924. end;
  1925. function TXMLReader.FindOrCreateElDef: TElementDecl;
  1926. var
  1927. p: PHashItem;
  1928. begin
  1929. CheckName;
  1930. p := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  1931. Result := TElementDecl(p^.Data);
  1932. if Result = nil then
  1933. begin
  1934. Result := TElementDecl.Create;
  1935. p^.Data := Result;
  1936. end;
  1937. end;
  1938. procedure TXMLReader.ExpectChoiceOrSeq(CP: TContentParticle; MustEndIn: TObject); // [49], [50]
  1939. var
  1940. Delim: WideChar;
  1941. CurrentCP: TContentParticle;
  1942. begin
  1943. Delim := #0;
  1944. repeat
  1945. CurrentCP := CP.Add;
  1946. SkipWhitespace;
  1947. if CheckForChar('(') then
  1948. ExpectChoiceOrSeq(CurrentCP, FSource.FEntity)
  1949. else
  1950. CurrentCP.Def := FindOrCreateElDef;
  1951. CurrentCP.CPQuant := ParseQuantity;
  1952. SkipWhitespace;
  1953. if FSource.FBuf^ = ')' then
  1954. Break;
  1955. if Delim = #0 then
  1956. begin
  1957. if (FSource.FBuf^ = '|') or (FSource.FBuf^ = ',') then
  1958. Delim := FSource.FBuf^
  1959. else
  1960. FatalError('Expected pipe or comma delimiter');
  1961. end
  1962. else
  1963. if FSource.FBuf^ <> Delim then
  1964. FatalError(Delim);
  1965. FSource.NextChar; // skip delimiter
  1966. until False;
  1967. if MustEndIn <> FSource.FEntity then
  1968. BadPENesting;
  1969. FSource.NextChar;
  1970. if Delim = '|' then
  1971. CP.CPType := ctChoice
  1972. else
  1973. CP.CPType := ctSeq; // '(foo)' is a sequence!
  1974. end;
  1975. procedure TXMLReader.ParseElementDecl; // [45]
  1976. var
  1977. ElDef: TElementDecl;
  1978. CurrentEntity: TObject;
  1979. I: Integer;
  1980. CP: TContentParticle;
  1981. Typ: TElementContentType;
  1982. ExtDecl: Boolean;
  1983. begin
  1984. CP := nil;
  1985. Typ := ctUndeclared; // satisfy compiler
  1986. ExpectWhitespace;
  1987. ElDef := FindOrCreateElDef;
  1988. if ElDef.ContentType <> ctUndeclared then
  1989. ValidationErrorWithName('Duplicate declaration of element ''%s''', FName.Length);
  1990. ExtDecl := FSource.Kind <> skInternalSubset;
  1991. ExpectWhitespace;
  1992. if FSource.Matches('EMPTY') then
  1993. Typ := ctEmpty
  1994. else if FSource.Matches('ANY') then
  1995. Typ := ctAny
  1996. else if CheckForChar('(') then
  1997. begin
  1998. CP := TContentParticle.Create;
  1999. try
  2000. CurrentEntity := FSource.FEntity;
  2001. SkipWhitespace;
  2002. if FSource.Matches('#PCDATA') then // Mixed section [51]
  2003. begin
  2004. SkipWhitespace;
  2005. Typ := ctMixed;
  2006. while FSource.FBuf^ <> ')' do
  2007. begin
  2008. ExpectChar('|');
  2009. SkipWhitespace;
  2010. with CP.Add do
  2011. begin
  2012. Def := FindOrCreateElDef;
  2013. for I := CP.ChildCount-2 downto 0 do
  2014. if Def = CP.Children[I].Def then
  2015. ValidationError('Duplicate token in mixed section', [], FName.Length);
  2016. end;
  2017. SkipWhitespace;
  2018. end;
  2019. if CurrentEntity <> FSource.FEntity then
  2020. BadPENesting;
  2021. FSource.NextChar;
  2022. if (not CheckForChar('*')) and (CP.ChildCount > 0) then
  2023. FatalError(WideChar('*'));
  2024. CP.CPQuant := cqZeroOrMore;
  2025. CP.CPType := ctChoice;
  2026. end
  2027. else // Children section [47]
  2028. begin
  2029. Typ := ctChildren;
  2030. ExpectChoiceOrSeq(CP, CurrentEntity);
  2031. CP.CPQuant := ParseQuantity;
  2032. end;
  2033. except
  2034. CP.Free;
  2035. raise;
  2036. end;
  2037. end
  2038. else
  2039. FatalError('Invalid content specification');
  2040. // SAX: DeclHandler.ElementDecl(name, model);
  2041. if FDTDProcessed and (ElDef.ContentType = ctUndeclared) then
  2042. begin
  2043. ElDef.ExternallyDeclared := ExtDecl;
  2044. ElDef.ContentType := Typ;
  2045. ElDef.RootCP := CP;
  2046. end
  2047. else
  2048. CP.Free;
  2049. end;
  2050. procedure TXMLReader.ParseNotationDecl; // [82]
  2051. var
  2052. NameStr, SysID, PubID: WideString;
  2053. begin
  2054. ExpectWhitespace;
  2055. NameStr := ExpectName;
  2056. CheckNCName;
  2057. ExpectWhitespace;
  2058. if not ParseExternalID(SysID, PubID, True) then
  2059. FatalError('Expected external or public ID');
  2060. if FDTDProcessed then
  2061. DoNotationDecl(NameStr, PubID, SysID);
  2062. end;
  2063. const
  2064. AttrDataTypeNames: array[TAttrDataType] of WideString = (
  2065. 'CDATA',
  2066. 'ID',
  2067. 'IDREF',
  2068. 'IDREFS',
  2069. 'ENTITY',
  2070. 'ENTITIES',
  2071. 'NMTOKEN',
  2072. 'NMTOKENS',
  2073. 'NOTATION'
  2074. );
  2075. procedure TXMLReader.ParseAttlistDecl; // [52]
  2076. var
  2077. ElDef: TElementDecl;
  2078. AttDef: TAttributeDef;
  2079. dt: TAttrDataType;
  2080. Found, DiscardIt: Boolean;
  2081. Offsets: array [Boolean] of Integer;
  2082. attrName: PHashItem;
  2083. begin
  2084. ExpectWhitespace;
  2085. ElDef := FindOrCreateElDef;
  2086. SkipWhitespace;
  2087. while FSource.FBuf^ <> '>' do
  2088. begin
  2089. CheckName;
  2090. ExpectWhitespace;
  2091. attrName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  2092. AttDef := TAttributeDef.Create(attrName, FColonPos);
  2093. try
  2094. AttDef.ExternallyDeclared := FSource.Kind <> skInternalSubset;
  2095. // In case of duplicate declaration of the same attribute, we must discard it,
  2096. // not modifying ElDef, and suppressing certain validation errors.
  2097. DiscardIt := (not FDTDProcessed) or Assigned(ElDef.GetAttrDef(attrName));
  2098. if CheckForChar('(') then // [59]
  2099. begin
  2100. AttDef.DataType := dtNmToken;
  2101. repeat
  2102. SkipWhitespace;
  2103. CheckName([cnToken]);
  2104. if not AttDef.AddEnumToken(FName.Buffer, FName.Length) then
  2105. ValidationError('Duplicate token in enumerated attibute declaration', [], FName.Length);
  2106. SkipWhitespace;
  2107. until not CheckForChar('|');
  2108. ExpectChar(')');
  2109. ExpectWhitespace;
  2110. end
  2111. else
  2112. begin
  2113. StoreLocation(FTokenStart);
  2114. // search topside-up so that e.g. NMTOKENS is matched before NMTOKEN
  2115. for dt := dtNotation downto dtCData do
  2116. begin
  2117. Found := FSource.Matches(AttrDataTypeNames[dt]);
  2118. if Found then
  2119. Break;
  2120. end;
  2121. if Found and SkipWhitespace then
  2122. begin
  2123. AttDef.DataType := dt;
  2124. if (dt = dtId) and not DiscardIt then
  2125. begin
  2126. if Assigned(ElDef.IDAttr) then
  2127. ValidationError('Only one attribute of type ID is allowed per element',[])
  2128. else
  2129. ElDef.IDAttr := AttDef;
  2130. end
  2131. else if dt = dtNotation then // no test cases for these ?!
  2132. begin
  2133. if not DiscardIt then
  2134. begin
  2135. if Assigned(ElDef.NotationAttr) then
  2136. ValidationError('Only one attribute of type NOTATION is allowed per element',[])
  2137. else
  2138. ElDef.NotationAttr := AttDef;
  2139. if ElDef.ContentType = ctEmpty then
  2140. ValidationError('NOTATION attributes are not allowed on EMPTY elements',[]);
  2141. end;
  2142. ExpectChar('(');
  2143. repeat
  2144. SkipWhitespace;
  2145. StoreLocation(FTokenStart);
  2146. CheckName;
  2147. CheckNCName;
  2148. if not AttDef.AddEnumToken(FName.Buffer, FName.Length) then
  2149. ValidationError('Duplicate token in NOTATION attribute declaration',[], FName.Length);
  2150. if (not DiscardIt) and FValidate then
  2151. AddForwardRef(FName.Buffer, FName.Length);
  2152. SkipWhitespace;
  2153. until not CheckForChar('|');
  2154. ExpectChar(')');
  2155. ExpectWhitespace;
  2156. end;
  2157. end
  2158. else
  2159. begin
  2160. // don't report 'expected whitespace' if token does not match completely
  2161. Offsets[False] := 0;
  2162. Offsets[True] := Length(AttrDataTypeNames[dt]);
  2163. if Found and (FSource.FBuf^ < 'A') then
  2164. ExpectWhitespace
  2165. else
  2166. FatalError('Illegal attribute type for ''%s''', [attrName^.Key], Offsets[Found]);
  2167. end;
  2168. end;
  2169. StoreLocation(FTokenStart);
  2170. if FSource.Matches('#REQUIRED') then
  2171. AttDef.Default := adRequired
  2172. else if FSource.Matches('#IMPLIED') then
  2173. AttDef.Default := adImplied
  2174. else if FSource.Matches('#FIXED') then
  2175. begin
  2176. AttDef.Default := adFixed;
  2177. ExpectWhitespace;
  2178. end
  2179. else
  2180. AttDef.Default := adDefault;
  2181. if AttDef.Default in [adDefault, adFixed] then
  2182. begin
  2183. if AttDef.DataType = dtId then
  2184. ValidationError('An attribute of type ID cannot have a default value',[]);
  2185. // See comments to valid-sa-094: PE expansion should be disabled in AttDef.
  2186. ExpectAttValue(AttDef.Data, dt <> dtCDATA);
  2187. if not ValidateAttrSyntax(AttDef, AttDef.Data^.FValueStr) then
  2188. ValidationError('Default value for attribute ''%s'' has wrong syntax', [attrName^.Key]);
  2189. end;
  2190. // SAX: DeclHandler.AttributeDecl(...)
  2191. if DiscardIt then
  2192. AttDef.Free
  2193. else
  2194. ElDef.AddAttrDef(AttDef);
  2195. except
  2196. AttDef.Free;
  2197. raise;
  2198. end;
  2199. SkipWhitespace;
  2200. end;
  2201. end;
  2202. procedure TXMLReader.ParseEntityDecl; // [70]
  2203. var
  2204. IsPE, Exists: Boolean;
  2205. Entity: TEntityDecl;
  2206. Map: THashTable;
  2207. Item: PHashItem;
  2208. begin
  2209. if not SkipWhitespace(True) then
  2210. FatalError('Expected whitespace');
  2211. IsPE := CheckForChar('%');
  2212. if IsPE then // [72]
  2213. begin
  2214. ExpectWhitespace;
  2215. if FPEMap = nil then
  2216. FPEMap := THashTable.Create(64, True);
  2217. Map := FPEMap;
  2218. end
  2219. else
  2220. Map := FDocType.Entities;
  2221. Entity := TEntityDecl.Create;
  2222. try
  2223. Entity.ExternallyDeclared := FSource.Kind <> skInternalSubset;
  2224. Entity.FIsPE := IsPE;
  2225. CheckName;
  2226. CheckNCName;
  2227. Item := Map.FindOrAdd(FName.Buffer, FName.Length, Exists);
  2228. ExpectWhitespace;
  2229. // remember where the entity is declared
  2230. Entity.FURI := FSource.SystemID;
  2231. if FEntityValue.Buffer = nil then
  2232. BufAllocate(FEntityValue, 256);
  2233. if ParseLiteral(FEntityValue, ltEntity, False) then
  2234. begin
  2235. SetString(Entity.FReplacementText, FEntityValue.Buffer, FEntityValue.Length);
  2236. Entity.FCharCount := FEntityValue.Length;
  2237. Entity.FStartLocation := FTokenStart;
  2238. end
  2239. else
  2240. begin
  2241. if not ParseExternalID(Entity.FSystemID, Entity.FPublicID, False) then
  2242. FatalError('Expected entity value or external ID');
  2243. if not IsPE then // [76]
  2244. begin
  2245. if FSource.FBuf^ <> '>' then
  2246. ExpectWhitespace;
  2247. if FSource.Matches('NDATA') then
  2248. begin
  2249. ExpectWhitespace;
  2250. StoreLocation(FTokenStart);
  2251. Entity.FNotationName := ExpectName;
  2252. if FValidate then
  2253. AddForwardRef(FName.Buffer, FName.Length);
  2254. // SAX: DTDHandler.UnparsedEntityDecl(...);
  2255. end;
  2256. end;
  2257. end;
  2258. except
  2259. Entity.Free;
  2260. raise;
  2261. end;
  2262. // Repeated declarations of same entity are legal but must be ignored
  2263. if FDTDProcessed and not Exists then
  2264. begin
  2265. Item^.Data := Entity;
  2266. Entity.FName := Item^.Key;
  2267. end
  2268. else
  2269. Entity.Free;
  2270. end;
  2271. procedure TXMLReader.ParseMarkupDecl; // [29]
  2272. var
  2273. IncludeLevel: Integer;
  2274. IgnoreLevel: Integer;
  2275. CurrentEntity: TObject;
  2276. IncludeLoc: TLocation;
  2277. IgnoreLoc: TLocation;
  2278. wc: WideChar;
  2279. CondType: (ctUnknown, ctInclude, ctIgnore);
  2280. begin
  2281. IncludeLevel := 0;
  2282. IgnoreLevel := 0;
  2283. repeat
  2284. SkipWhitespace;
  2285. if (FSource.FBuf^ = ']') and (IncludeLevel > 0) then
  2286. begin
  2287. ExpectString(']]>');
  2288. Dec(IncludeLevel);
  2289. Continue;
  2290. end;
  2291. if not CheckForChar('<') then
  2292. Break;
  2293. CurrentEntity := FSource.FEntity;
  2294. if FSource.FBuf^ = '?' then
  2295. begin
  2296. ParsePI;
  2297. doc.AppendChild(CreatePINode);
  2298. end
  2299. else
  2300. begin
  2301. ExpectChar('!');
  2302. if FSource.FBuf^ = '-' then
  2303. ParseComment(True)
  2304. else if CheckForChar('[') then
  2305. begin
  2306. if FSource.Kind = skInternalSubset then
  2307. FatalError('Conditional sections are not allowed in internal subset', 1);
  2308. SkipWhitespace;
  2309. CondType := ctUnknown; // satisfy compiler
  2310. if FSource.Matches('INCLUDE') then
  2311. CondType := ctInclude
  2312. else if FSource.Matches('IGNORE') then
  2313. CondType := ctIgnore
  2314. else
  2315. FatalError('Expected "INCLUDE" or "IGNORE"');
  2316. SkipWhitespace;
  2317. if CurrentEntity <> FSource.FEntity then
  2318. BadPENesting;
  2319. ExpectChar('[');
  2320. if CondType = ctInclude then
  2321. begin
  2322. if IncludeLevel = 0 then
  2323. StoreLocation(IncludeLoc);
  2324. Inc(IncludeLevel);
  2325. end
  2326. else if CondType = ctIgnore then
  2327. begin
  2328. StoreLocation(IgnoreLoc);
  2329. IgnoreLevel := 1;
  2330. repeat
  2331. FValue.Length := 0;
  2332. wc := FSource.SkipUntil(FValue, [#0, '<', ']']);
  2333. if FSource.Matches('<![') then
  2334. Inc(IgnoreLevel)
  2335. else if FSource.Matches(']]>') then
  2336. Dec(IgnoreLevel)
  2337. else if wc <> #0 then
  2338. FSource.NextChar
  2339. else // PE's aren't recognized in ignore section, cannot ContextPop()
  2340. DoErrorPos(esFatal, 'IGNORE section is not closed', IgnoreLoc);
  2341. until IgnoreLevel=0;
  2342. end;
  2343. end
  2344. else
  2345. begin
  2346. FInsideDecl := True;
  2347. if FSource.Matches('ELEMENT') then
  2348. ParseElementDecl
  2349. else if FSource.Matches('ENTITY') then
  2350. ParseEntityDecl
  2351. else if FSource.Matches('ATTLIST') then
  2352. ParseAttlistDecl
  2353. else if FSource.Matches('NOTATION') then
  2354. ParseNotationDecl
  2355. else
  2356. FatalError('Illegal markup declaration');
  2357. SkipWhitespace;
  2358. if CurrentEntity <> FSource.FEntity then
  2359. BadPENesting;
  2360. ExpectChar('>');
  2361. FInsideDecl := False;
  2362. end;
  2363. end;
  2364. until False;
  2365. if IncludeLevel > 0 then
  2366. DoErrorPos(esFatal, 'INCLUDE section is not closed', IncludeLoc);
  2367. if (FSource.Kind = skInternalSubset) and (FSource.FBuf^ = ']') then
  2368. Exit;
  2369. if FSource.FBuf^ <> #0 then
  2370. FatalError('Illegal character in DTD');
  2371. end;
  2372. procedure TXMLReader.ProcessDTD(ASource: TXMLCharSource);
  2373. begin
  2374. doc := TXMLDocument.Create;
  2375. FNameTable := doc.Names;
  2376. FDocType := TDTDModel.Create(FNameTable);
  2377. // TODO: DTD labeled version 1.1 will be rejected - must set FXML11 flag
  2378. doc.AppendChild(TDOMDocumentType.Create(doc, FDocType));
  2379. NSPrepare;
  2380. Initialize(ASource);
  2381. ParseMarkupDecl;
  2382. end;
  2383. procedure TXMLReader.LoadEntity(AEntity: TEntityDecl);
  2384. var
  2385. InnerReader: TXMLReader;
  2386. Src: TXMLCharSource;
  2387. Ent: TDOMEntityEx;
  2388. DoctypeNode: TDOMDocumentType;
  2389. begin
  2390. DoctypeNode := doc.DocType;
  2391. if DoctypeNode = nil then
  2392. Exit;
  2393. Ent := TDOMEntityEx(DocTypeNode.Entities.GetNamedItem(AEntity.FName));
  2394. if Ent = nil then
  2395. Exit;
  2396. InnerReader := TXMLReader.Create(FCtrl);
  2397. try
  2398. InnerReader.FAttrTag := FAttrTag;
  2399. InnerReader.FDocType := FDocType.Reference;
  2400. EntityToSource(AEntity, Src);
  2401. Ent.SetReadOnly(False);
  2402. if Assigned(Src) then
  2403. InnerReader.ProcessFragment(Src, Ent);
  2404. AEntity.FResolved := True;
  2405. finally
  2406. FAttrTag := InnerReader.FAttrTag;
  2407. InnerReader.Free;
  2408. AEntity.FOnStack := False;
  2409. Ent.SetReadOnly(True);
  2410. end;
  2411. end;
  2412. procedure TXMLReader.ValidateCurrentNode;
  2413. var
  2414. ElDef: TElementDecl;
  2415. AttDef: TAttributeDef;
  2416. attr: PNodeData;
  2417. i: Integer;
  2418. begin
  2419. case FCurrNode^.FNodeType of
  2420. ntElement:
  2421. begin
  2422. if (FNesting = 1) and (not FFragmentMode) then
  2423. begin
  2424. if Assigned(FDocType) then
  2425. begin
  2426. if FDocType.FName <> FCurrNode^.FQName^.Key then
  2427. DoErrorPos(esError, 'Root element name does not match DTD', FCurrNode^.FLoc);
  2428. end
  2429. else
  2430. DoErrorPos(esError, 'Missing DTD', FCurrNode^.FLoc);
  2431. end;
  2432. ElDef := TElementDecl(FCurrNode^.FQName^.Data);
  2433. if (ElDef = nil) or (ElDef.ContentType = ctUndeclared) then
  2434. DoErrorPos(esError, 'Using undeclared element ''%s''',[FCurrNode^.FQName^.Key], FCurrNode^.FLoc);
  2435. if not FValidators[FValidatorNesting].IsElementAllowed(ElDef) then
  2436. DoErrorPos(esError, 'Element ''%s'' is not allowed in this context',[FCurrNode^.FQName^.Key], FCurrNode^.FLoc);
  2437. PushVC(ElDef);
  2438. { Validate attributes }
  2439. for i := 1 to FAttrCount do
  2440. begin
  2441. attr := @FNodeStack[FNesting+i];
  2442. AttDef := TAttributeDef(attr^.FTypeInfo);
  2443. if AttDef = nil then
  2444. DoErrorPos(esError, 'Using undeclared attribute ''%s'' on element ''%s''',
  2445. [attr^.FQName^.Key, FCurrNode^.FQName^.Key], attr^.FLoc)
  2446. else if ((AttDef.DataType <> dtCdata) or (AttDef.Default = adFixed)) then
  2447. begin
  2448. if FStandalone and AttDef.ExternallyDeclared then
  2449. { TODO: perhaps should use different and more descriptive messages }
  2450. if attr^.FDenormalized then
  2451. DoErrorPos(esError, 'Standalone constraint violation', attr^.FLoc2)
  2452. else if i > FSpecifiedAttrs then
  2453. DoError(esError, 'Standalone constraint violation');
  2454. // TODO: what about normalization of AttDef.Value? (Currently it IS normalized)
  2455. if (AttDef.Default = adFixed) and (AttDef.Data^.FValueStr <> attr^.FValueStr) then
  2456. DoErrorPos(esError, 'Value of attribute ''%s'' does not match its #FIXED default',[attr^.FQName^.Key], attr^.FLoc2);
  2457. if not ValidateAttrSyntax(AttDef, attr^.FValueStr) then
  2458. DoErrorPos(esError, 'Attribute ''%s'' type mismatch', [attr^.FQName^.Key], attr^.FLoc2);
  2459. ValidateAttrValue(AttDef, attr);
  2460. end;
  2461. end;
  2462. end;
  2463. ntEndElement:
  2464. begin
  2465. if FValidators[FValidatorNesting].Incomplete then
  2466. ValidationError('Element ''%s'' is missing required sub-elements', [FCurrNode^.FQName^.Key], -1);
  2467. if FValidatorNesting > 0 then
  2468. Dec(FValidatorNesting);
  2469. end;
  2470. ntText, ntSignificantWhitespace:
  2471. case FValidators[FValidatorNesting].FContentType of
  2472. ctChildren:
  2473. if FCurrNode^.FNodeType = ntText then
  2474. ValidationError('Character data is not allowed in element-only content',[])
  2475. else
  2476. begin
  2477. if FValidators[FValidatorNesting].FSaViolation then
  2478. StandaloneError(-1);
  2479. FCurrNode^.FNodeType := ntWhitespace;
  2480. end;
  2481. ctEmpty:
  2482. ValidationError('Character data is not allowed in EMPTY elements', []);
  2483. end;
  2484. ntCDATA:
  2485. if FValidators[FValidatorNesting].FContentType = ctChildren then
  2486. ValidationError('CDATA sections are not allowed in element-only content',[]);
  2487. ntProcessingInstruction:
  2488. if FValidators[FValidatorNesting].FContentType = ctEmpty then
  2489. ValidationError('Processing instructions are not allowed within EMPTY elements', []);
  2490. ntComment:
  2491. if FValidators[FValidatorNesting].FContentType = ctEmpty then
  2492. ValidationError('Comments are not allowed within EMPTY elements', []);
  2493. ntDocumentType:
  2494. ValidateDTD;
  2495. end;
  2496. end;
  2497. procedure TXMLReader.HandleEntityStart;
  2498. begin
  2499. { FNesting+1 is available due to overallocation in AllocNodeData() }
  2500. FCurrNode := @FNodeStack[FNesting+1];
  2501. FCurrNode^.FNodeType := ntEntityReference;
  2502. FCurrNode^.FQName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  2503. FCurrNode^.FValueStart := nil;
  2504. FCurrNode^.FValueLength := 0;
  2505. end;
  2506. procedure TXMLReader.HandleEntityEnd;
  2507. begin
  2508. ContextPop(True);
  2509. if FNesting > 0 then Dec(FNesting);
  2510. FCurrNode := @FNodeStack[FNesting+1];
  2511. FCurrNode^.FNodeType := ntEndEntity;
  2512. // TODO: other properties of FCurrNode
  2513. FNext := xtText;
  2514. end;
  2515. procedure TXMLReader.ResolveEntity;
  2516. begin
  2517. if FCurrNode^.FNodeType <> ntEntityReference then
  2518. raise EInvalidOperation.Create('Wrong node type');
  2519. {... here must actually call EntityCheck, but it's called in main loop}
  2520. FNext := xtPushEntity;
  2521. end;
  2522. procedure TXMLReader.DoStartEntity;
  2523. var
  2524. src: TXMLCharSource;
  2525. begin
  2526. Inc(FNesting);
  2527. FCurrNode := AllocNodeData(FNesting);
  2528. if Assigned(FCurrEntity) then
  2529. ContextPush(FCurrEntity)
  2530. else
  2531. begin
  2532. // Undefined entity -- use a dummy inputsource, in order to get a matching EndEntity event
  2533. src := TXMLCharSource.Create('');
  2534. src.Kind := skManualPop;
  2535. Initialize(src);
  2536. end;
  2537. FNext := xtText;
  2538. end;
  2539. function TXMLReader.DoStartElement: TDOMElement;
  2540. var
  2541. Attr: TDOMAttr;
  2542. i: Integer;
  2543. begin
  2544. with FCurrNode^.FQName^ do
  2545. Result := doc.CreateElementBuf(PWideChar(Key), Length(Key));
  2546. if Assigned(FCurrNode^.FNsUri) then
  2547. Result.SetNSI(FCurrNode^.FNsUri^.Key, FCurrNode^.FColonPos+1);
  2548. for i := 1 to FAttrCount do
  2549. begin
  2550. Attr := LoadAttribute(doc, @FNodeStack[FNesting+i]);
  2551. Result.SetAttributeNode(Attr);
  2552. // Attach element to ID map entry if necessary
  2553. if Assigned(FNodeStack[FNesting+i].FIDEntry) then
  2554. FNodeStack[FNesting+i].FIDEntry^.Data := Result;
  2555. end;
  2556. end;
  2557. // The code below does the bulk of the parsing, and must be as fast as possible.
  2558. // To minimize CPU cache effects, methods from different classes are kept together
  2559. function TXMLDecodingSource.SkipUntil(var ToFill: TWideCharBuf; const Delim: TSetOfChar;
  2560. wsflag: PBoolean): WideChar;
  2561. var
  2562. old: PWideChar;
  2563. nonws: Boolean;
  2564. wc: WideChar;
  2565. begin
  2566. nonws := False;
  2567. repeat
  2568. old := FBuf;
  2569. repeat
  2570. wc := FBuf^;
  2571. if (wc = #10) or (wc = #13) or (FXML11Rules and ((wc = #$85) or
  2572. (wc = #$2028))) then
  2573. begin
  2574. // strictly this is needed only for 2-byte lineendings
  2575. BufAppendChunk(ToFill, old, FBuf);
  2576. NewLine;
  2577. old := FBuf;
  2578. wc := FBuf^
  2579. end
  2580. else if ((wc < #32) and (not ((wc = #0) and (FBuf >= FBufEnd))) and
  2581. (wc <> #9)) or (wc > #$FFFD) or
  2582. (FXML11Rules and (wc >= #$7F) and (wc <= #$9F)) then
  2583. FReader.FatalError('Invalid character');
  2584. if (wc < #255) and (Char(ord(wc)) in Delim) then
  2585. Break;
  2586. // the checks above filter away everything below #32 that isn't a whitespace
  2587. if wc > #32 then
  2588. nonws := True;
  2589. Inc(FBuf);
  2590. until False;
  2591. Result := wc;
  2592. BufAppendChunk(ToFill, old, FBuf);
  2593. until (Result <> #0) or (not Reload);
  2594. if Assigned(wsflag) then
  2595. wsflag^ := wsflag^ or nonws;
  2596. end;
  2597. const
  2598. TextDelims: array[Boolean] of TSetOfChar = (
  2599. [#0, '<', '&', '>'],
  2600. [#0, '>']
  2601. );
  2602. textNodeTypes: array[Boolean] of TXMLNodeType = (
  2603. ntSignificantWhitespace,
  2604. ntText
  2605. );
  2606. procedure TXMLReader.ParseContent(parent: TDOMNode_WithChildren);
  2607. var
  2608. cursor: TDOMNode_WithChildren;
  2609. element: TDOMElement;
  2610. begin
  2611. cursor := parent;
  2612. while Read do
  2613. begin
  2614. if FValidate then
  2615. ValidateCurrentNode;
  2616. case FCurrNode^.FNodeType of
  2617. ntText:
  2618. cursor.InternalAppend(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length, False));
  2619. ntWhitespace, ntSignificantWhitespace:
  2620. if FPreserveWhitespace then
  2621. cursor.InternalAppend(doc.CreateTextNodeBuf(FValue.Buffer, FValue.Length, FCurrNode^.FNodeType = ntWhitespace));
  2622. ntCDATA:
  2623. cursor.InternalAppend(DoCDSect(FValue.Buffer, FValue.Length));
  2624. ntProcessingInstruction:
  2625. cursor.InternalAppend(CreatePINode);
  2626. ntComment:
  2627. if not FIgnoreComments then
  2628. cursor.InternalAppend(doc.CreateCommentBuf(FCurrNode^.FValueStart, FCurrNode^.FValueLength));
  2629. ntElement:
  2630. begin
  2631. element := DoStartElement;
  2632. cursor.InternalAppend(element);
  2633. cursor := element;
  2634. end;
  2635. ntEndElement:
  2636. cursor := TDOMNode_WithChildren(cursor.ParentNode);
  2637. ntDocumentType:
  2638. if not FCanonical then
  2639. cursor.InternalAppend(TDOMDocumentType.Create(doc, FDocType));
  2640. ntEntityReference:
  2641. cursor.InternalAppend(doc.CreateEntityReference(FCurrNode^.FQName^.Key));
  2642. end;
  2643. end;
  2644. end;
  2645. function TXMLReader.Read: Boolean;
  2646. var
  2647. nonWs: Boolean;
  2648. wc: WideChar;
  2649. InCDATA: Boolean;
  2650. tok: TXMLToken;
  2651. begin
  2652. if FNext = xtPopEmptyElement then
  2653. begin
  2654. FNext := xtPopElement;
  2655. FCurrNode^.FNodeType := ntEndElement;
  2656. if FAttrCleanupFlag then
  2657. CleanupAttributes;
  2658. FAttrCount := 0;
  2659. Result := True;
  2660. Exit;
  2661. end;
  2662. if FNext = xtPushElement then
  2663. begin
  2664. if FAttrCleanupFlag then
  2665. CleanupAttributes;
  2666. FAttrCount := 0;
  2667. FNext := xtText;
  2668. end
  2669. else if FNext = xtPopElement then
  2670. begin
  2671. if FNamespaces then
  2672. FNSHelper.EndElement;
  2673. PopVC;
  2674. end
  2675. else if FNext = xtPushEntity then
  2676. DoStartEntity;
  2677. InCDATA := (FNext = xtCDSect);
  2678. StoreLocation(FTokenStart);
  2679. nonWs := False;
  2680. FValue.Length := 0;
  2681. if FNext in [xtCDSect, xtText] then
  2682. repeat
  2683. wc := FSource.SkipUntil(FValue, TextDelims[InCDATA], @nonWs);
  2684. if wc = '<' then
  2685. begin
  2686. Inc(FSource.FBuf);
  2687. if FSource.FBufEnd < FSource.FBuf + 2 then
  2688. FSource.Reload;
  2689. if FSource.FBuf^ = '/' then
  2690. tok := xtEndElement
  2691. else if CheckName([cnOptional]) then
  2692. tok := xtElement
  2693. else if FSource.FBuf^ = '!' then
  2694. begin
  2695. Inc(FSource.FBuf);
  2696. if FSource.FBuf^ = '[' then
  2697. begin
  2698. ExpectString('[CDATA[');
  2699. if FState <> rsRoot then
  2700. FatalError('Illegal at document level');
  2701. StoreLocation(FTokenStart);
  2702. InCDATA := True;
  2703. if FCDSectionsAsText or (FValue.Length = 0) then
  2704. Continue;
  2705. tok := xtCDSect;
  2706. end
  2707. else if FSource.FBuf^ = '-' then
  2708. begin
  2709. { Ignoring comments is tricky in validating mode; discarding a comment which
  2710. is the only child of an EMPTY element will make that element erroneously appear
  2711. as valid. Therefore, at this point we discard only comments which are preceded
  2712. by some text (since presence of text already renders an EMPTY element invalid).
  2713. Other comments should be reported to validation part and discarded there. }
  2714. if FIgnoreComments and (FValue.Length > 0) then
  2715. begin
  2716. ParseComment(True);
  2717. Continue;
  2718. end;
  2719. tok := xtComment;
  2720. end
  2721. else
  2722. tok := xtDoctype;
  2723. end
  2724. else if FSource.FBuf^ = '?' then
  2725. tok := xtPI
  2726. else
  2727. RaiseNameNotFound;
  2728. end
  2729. else if wc = #0 then
  2730. begin
  2731. if InCDATA then
  2732. FatalError('Unterminated CDATA section', -1);
  2733. if FNesting > FSource.FStartNesting then
  2734. FatalError('End-tag is missing for ''%s''', [FNodeStack[FNesting].FQName^.Key]);
  2735. if Assigned(FSource.FParent) then
  2736. begin
  2737. if FExpandEntities and ContextPop then
  2738. Continue
  2739. else
  2740. tok := xtEntityEnd;
  2741. end
  2742. else
  2743. tok := xtEOF;
  2744. end
  2745. else if wc = '>' then
  2746. begin
  2747. BufAppend(FValue, wc);
  2748. FSource.NextChar;
  2749. if (FValue.Length <= 2) or (FValue.Buffer[FValue.Length-2] <> ']') or
  2750. (FValue.Buffer[FValue.Length-3] <> ']') then Continue;
  2751. if InCData then // got a ']]>' separator
  2752. begin
  2753. Dec(FValue.Length, 3);
  2754. InCDATA := False;
  2755. if FCDSectionsAsText then
  2756. Continue;
  2757. SetNodeInfoWithValue(ntCDATA);
  2758. FNext := xtText;
  2759. Result := True;
  2760. Exit;
  2761. end
  2762. else
  2763. FatalError('Literal '']]>'' is not allowed in text', 3);
  2764. end
  2765. else if wc = '&' then
  2766. begin
  2767. if FState <> rsRoot then
  2768. FatalError('Illegal at document level');
  2769. if FValidators[FValidatorNesting].FContentType = ctEmpty then
  2770. ValidationError('References are illegal in EMPTY elements', []);
  2771. if ParseRef(FValue) or ResolvePredefined then
  2772. begin
  2773. nonWs := True; // CharRef to whitespace is not considered whitespace
  2774. Continue;
  2775. end
  2776. else
  2777. begin
  2778. FCurrEntity := EntityCheck;
  2779. if Assigned(FCurrEntity) and FExpandEntities then
  2780. begin
  2781. ContextPush(FCurrEntity);
  2782. Continue;
  2783. end;
  2784. tok := xtEntity;
  2785. end;
  2786. end;
  2787. if FValue.Length <> 0 then
  2788. begin
  2789. if FState <> rsRoot then
  2790. if nonWs then
  2791. FatalError('Illegal at document level', -1)
  2792. else
  2793. Break;
  2794. SetNodeInfoWithValue(textNodeTypes[nonWs]);
  2795. FNext := tok;
  2796. Result := True;
  2797. Exit;
  2798. end;
  2799. Break;
  2800. until False
  2801. else // not (FNext in [xtText, xtCDSect])
  2802. tok := FNext;
  2803. FNext := xtText;
  2804. case tok of
  2805. xtEntity: HandleEntityStart;
  2806. xtEntityEnd: HandleEntityEnd;
  2807. xtElement: ParseStartTag;
  2808. xtEndElement: ParseEndTag;
  2809. xtPI: ParsePI;
  2810. xtDoctype: ParseDoctypeDecl;
  2811. xtComment: ParseComment(False);
  2812. end;
  2813. Result := tok <> xtEOF;
  2814. end;
  2815. procedure TXMLCharSource.NextChar;
  2816. begin
  2817. Inc(FBuf);
  2818. if FBuf >= FBufEnd then
  2819. Reload;
  2820. end;
  2821. procedure TXMLReader.ExpectChar(wc: WideChar);
  2822. begin
  2823. if FSource.FBuf^ = wc then
  2824. FSource.NextChar
  2825. else
  2826. FatalError(wc);
  2827. end;
  2828. // Element name already in FNameBuffer
  2829. procedure TXMLReader.ParseStartTag; // [39] [40] [44]
  2830. var
  2831. ElDef: TElementDecl;
  2832. IsEmpty: Boolean;
  2833. ElName: PHashItem;
  2834. b: TBinding;
  2835. begin
  2836. if FState > rsRoot then
  2837. FatalError('Only one top-level element allowed', FName.Length)
  2838. else if FState < rsRoot then
  2839. begin
  2840. // dispose notation refs from DTD, if any
  2841. ClearForwardRefs;
  2842. FState := rsRoot;
  2843. end;
  2844. // we're about to process a new set of attributes
  2845. Inc(FAttrTag);
  2846. // Get hash entry for element name
  2847. ElName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  2848. // Find declaration for this element
  2849. ElDef := TElementDecl(ElName^.Data);
  2850. IsEmpty := False;
  2851. FAttrCount := 0;
  2852. FPrefixedAttrs := 0;
  2853. FSpecifiedAttrs := 0;
  2854. Inc(FNesting);
  2855. FCurrNode := AllocNodeData(FNesting);
  2856. FCurrNode^.FQName := ElName;
  2857. FCurrNode^.FNodeType := ntElement;
  2858. FCurrNode^.FColonPos := FColonPos;
  2859. StoreLocation(FCurrNode^.FLoc);
  2860. Dec(FCurrNode^.FLoc.LinePos, FName.Length);
  2861. if FNamespaces then
  2862. begin
  2863. FNSHelper.StartElement;
  2864. if FColonPos > 0 then
  2865. FCurrNode^.FPrefix := FNSHelper.GetPrefix(FName.Buffer, FColonPos);
  2866. end;
  2867. while (FSource.FBuf^ <> '>') and (FSource.FBuf^ <> '/') do
  2868. begin
  2869. SkipS(True);
  2870. if (FSource.FBuf^ = '>') or (FSource.FBuf^ = '/') then
  2871. Break;
  2872. ParseAttribute(ElDef);
  2873. end;
  2874. if FSource.FBuf^ = '/' then
  2875. begin
  2876. IsEmpty := True;
  2877. FSource.NextChar;
  2878. end;
  2879. ExpectChar('>');
  2880. if Assigned(ElDef) and ElDef.NeedsDefaultPass then
  2881. ProcessDefaultAttributes(ElDef);
  2882. // Adding attributes might have reallocated FNodeStack, so restore FCurrNode once again
  2883. FCurrNode := @FNodeStack[FNesting];
  2884. if FNamespaces then
  2885. begin
  2886. { Assign namespace URIs to prefixed attrs }
  2887. if FPrefixedAttrs <> 0 then
  2888. ProcessNamespaceAtts;
  2889. { Expand the element name }
  2890. if Assigned(FCurrNode^.FPrefix) then
  2891. begin
  2892. b := TBinding(FCurrNode^.FPrefix^.Data);
  2893. if not (Assigned(b) and (b.uri <> '')) then
  2894. DoErrorPos(esFatal, 'Unbound element name prefix "%s"', [FCurrNode^.FPrefix^.Key],FCurrNode^.FLoc);
  2895. FCurrNode^.FNsUri := FNameTable.FindOrAdd(PWideChar(b.uri), Length(b.uri));
  2896. end
  2897. else
  2898. begin
  2899. b := FNSHelper.DefaultNSBinding;
  2900. if Assigned(b) then
  2901. FCurrNode^.FNsUri := FNameTable.FindOrAdd(PWideChar(b.uri), Length(b.uri));
  2902. end;
  2903. end;
  2904. if not IsEmpty then
  2905. begin
  2906. if not FPreserveWhitespace then // critical for testsuite compliance
  2907. SkipS;
  2908. FNext := xtPushElement;
  2909. end
  2910. else
  2911. FNext := xtPopEmptyElement;
  2912. end;
  2913. procedure TXMLReader.ParseEndTag; // [42]
  2914. var
  2915. ElName: PHashItem;
  2916. begin
  2917. if FNesting <= FSource.FStartNesting then
  2918. FatalError('End-tag is not allowed here');
  2919. Inc(FSource.FBuf);
  2920. FCurrNode := @FNodeStack[FNesting]; // move off the possible child
  2921. FCurrNode^.FNodeType := ntEndElement;
  2922. ElName := FCurrNode^.FQName;
  2923. CheckName;
  2924. if not BufEquals(FName, ElName^.Key) then
  2925. FatalError('Unmatching element end tag (expected "</%s>")', [ElName^.Key], FName.Length);
  2926. if FSource.FBuf^ = '>' then // this handles majority of cases
  2927. FSource.NextChar
  2928. else
  2929. begin
  2930. SkipS;
  2931. ExpectChar('>');
  2932. end;
  2933. Inc(FTokenStart.LinePos, 2); // move over '</' chars
  2934. FNext := xtPopElement;
  2935. end;
  2936. procedure TXMLReader.ParseAttribute(ElDef: TElementDecl);
  2937. var
  2938. attrName: PHashItem;
  2939. attrData: PNodeData;
  2940. AttDef: TAttributeDef;
  2941. i: Integer;
  2942. begin
  2943. CheckName;
  2944. attrName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
  2945. attrData := AllocAttributeData;
  2946. attrData^.FQName := attrName;
  2947. attrData^.FColonPos := FColonPos;
  2948. StoreLocation(attrData^.FLoc);
  2949. Dec(attrData^.FLoc.LinePos, FName.Length);
  2950. FSpecifiedAttrs := FAttrCount;
  2951. if Assigned(ElDef) then
  2952. begin
  2953. AttDef := ElDef.GetAttrDef(attrName);
  2954. if Assigned(AttDef) then
  2955. AttDef.Tag := FAttrTag; // indicates that this one is specified
  2956. end
  2957. else
  2958. AttDef := nil;
  2959. attrData^.FTypeInfo := AttDef;
  2960. // check for duplicates
  2961. for i := 1 to FAttrCount-1 do
  2962. if FNodeStack[FNesting+i].FQName = attrName then
  2963. FatalError('Duplicate attribute', FName.Length);
  2964. if FNamespaces then
  2965. begin
  2966. if ((FName.Length = 5) or (FColonPos = 5)) and
  2967. (FName.Buffer[0] = 'x') and (FName.Buffer[1] = 'm') and
  2968. (FName.Buffer[2] = 'l') and (FName.Buffer[3] = 'n') and
  2969. (FName.Buffer[4] = 's') then
  2970. begin
  2971. if FColonPos > 0 then
  2972. attrData^.FPrefix := FStdPrefix_xmlns;
  2973. attrData^.FNsUri := FStdUri_xmlns;
  2974. end
  2975. else if FColonPos > 0 then
  2976. begin
  2977. attrData^.FPrefix := FNSHelper.GetPrefix(FName.Buffer, FColonPos);
  2978. Inc(FPrefixedAttrs);
  2979. end;
  2980. end;
  2981. ExpectEq;
  2982. ExpectAttValue(attrData, Assigned(AttDef) and (AttDef.DataType <> dtCDATA));
  2983. attrData^.FLoc2 := FTokenStart;
  2984. if Assigned(attrData^.FNsUri) then
  2985. begin
  2986. if (not AddBinding(attrData)) and FCanonical then
  2987. begin
  2988. CleanupAttribute(attrData);
  2989. Dec(FAttrCount);
  2990. Dec(FSpecifiedAttrs);
  2991. end;
  2992. end;
  2993. end;
  2994. procedure TXMLReader.AddForwardRef(Buf: PWideChar; Length: Integer);
  2995. var
  2996. w: PForwardRef;
  2997. begin
  2998. New(w);
  2999. SetString(w^.Value, Buf, Length);
  3000. w^.Loc := FTokenStart;
  3001. FForwardRefs.Add(w);
  3002. end;
  3003. procedure TXMLReader.ClearForwardRefs;
  3004. var
  3005. I: Integer;
  3006. begin
  3007. for I := 0 to FForwardRefs.Count-1 do
  3008. Dispose(PForwardRef(FForwardRefs.List^[I]));
  3009. FForwardRefs.Clear;
  3010. end;
  3011. procedure TXMLReader.ValidateIdRefs;
  3012. var
  3013. I: Integer;
  3014. begin
  3015. for I := 0 to FForwardRefs.Count-1 do
  3016. with PForwardRef(FForwardRefs.List^[I])^ do
  3017. if (FIDMap = nil) or (FIDMap.Find(PWideChar(Value), Length(Value)) = nil) then
  3018. DoErrorPos(esError, 'The ID ''%s'' does not match any element', [Value], Loc);
  3019. ClearForwardRefs;
  3020. end;
  3021. procedure TXMLReader.ProcessDefaultAttributes(ElDef: TElementDecl);
  3022. var
  3023. I: Integer;
  3024. AttDef: TAttributeDef;
  3025. attrData: PNodeData;
  3026. begin
  3027. for I := 0 to ElDef.AttrDefCount-1 do
  3028. begin
  3029. AttDef := ElDef.AttrDefs[I];
  3030. if AttDef.Tag <> FAttrTag then // this one wasn't specified
  3031. begin
  3032. case AttDef.Default of
  3033. adDefault, adFixed: begin
  3034. attrData := AllocAttributeData;
  3035. attrData^ := AttDef.Data^;
  3036. if FCanonical then
  3037. attrData^.FIsDefault := False;
  3038. if FNamespaces then
  3039. begin
  3040. if AttDef.IsNamespaceDecl then
  3041. begin
  3042. if attrData^.FColonPos > 0 then
  3043. attrData^.FPrefix := FStdPrefix_xmlns;
  3044. attrData^.FNsUri := FStdUri_xmlns;
  3045. if (not AddBinding(attrData)) and FCanonical then
  3046. Dec(FAttrCount);
  3047. end
  3048. else if attrData^.FColonPos > 0 then
  3049. begin
  3050. attrData^.FPrefix := FNSHelper.GetPrefix(PWideChar(attrData^.FQName^.Key), attrData^.FColonPos);
  3051. Inc(FPrefixedAttrs);
  3052. end;
  3053. end;
  3054. end;
  3055. adRequired:
  3056. ValidationError('Required attribute ''%s'' of element ''%s'' is missing',
  3057. [AttDef.Data^.FQName^.Key, FNodeStack[FNesting].FQName^.Key], 0)
  3058. end;
  3059. end;
  3060. end;
  3061. end;
  3062. function TXMLReader.AddBinding(attrData: PNodeData): Boolean;
  3063. var
  3064. nsUri, Pfx: PHashItem;
  3065. begin
  3066. nsUri := FNameTable.FindOrAdd(PWideChar(attrData^.FValueStr), Length(attrData^.FValueStr));
  3067. if attrData^.FColonPos > 0 then
  3068. Pfx := FNSHelper.GetPrefix(@attrData^.FQName^.key[7], Length(attrData^.FQName^.key)-6)
  3069. else
  3070. Pfx := FNSHelper.GetPrefix(nil, 0); { will return the default prefix }
  3071. { 'xml' is allowed to be bound to the correct namespace }
  3072. if ((nsUri = FStduri_xml) <> (Pfx = FStdPrefix_xml)) or
  3073. (Pfx = FStdPrefix_xmlns) or
  3074. (nsUri = FStduri_xmlns) then
  3075. begin
  3076. if (Pfx = FStdPrefix_xml) or (Pfx = FStdPrefix_xmlns) then
  3077. DoErrorPos(esFatal, 'Illegal usage of reserved prefix ''%s''', [Pfx^.Key], attrData^.FLoc)
  3078. else
  3079. DoErrorPos(esFatal, 'Illegal usage of reserved namespace URI ''%s''', [attrData^.FValueStr], attrData^.FLoc2);
  3080. end;
  3081. if (attrData^.FValueStr = '') and not (FXML11 or (Pfx^.Key = '')) then
  3082. DoErrorPos(esFatal, 'Illegal undefining of namespace', attrData^.FLoc2);
  3083. Result := (Pfx^.Data = nil) or (TBinding(Pfx^.Data).uri <> attrData^.FValueStr);
  3084. if Result then
  3085. FNSHelper.BindPrefix(attrData^.FValueStr, Pfx);
  3086. end;
  3087. procedure TXMLReader.ProcessNamespaceAtts;
  3088. var
  3089. I, J: Integer;
  3090. Pfx, AttrName: PHashItem;
  3091. attrData: PNodeData;
  3092. b: TBinding;
  3093. begin
  3094. FNsAttHash.Init(FPrefixedAttrs);
  3095. for I := 1 to FAttrCount do
  3096. begin
  3097. attrData := @FNodeStack[FNesting+i];
  3098. if (attrData^.FColonPos < 1) or Assigned(attrData^.FNsUri) then
  3099. Continue;
  3100. Pfx := attrData^.FPrefix;
  3101. b := TBinding(Pfx^.Data);
  3102. if not (Assigned(b) and (b.uri <> '')) then
  3103. DoErrorPos(esFatal, 'Unbound attribute name prefix "%s"', [Pfx^.Key], attrData^.FLoc);
  3104. { detect duplicates }
  3105. J := attrData^.FColonPos+1;
  3106. AttrName := attrData^.FQName;
  3107. if FNsAttHash.Locate(@b.uri, @AttrName^.Key[J], Length(AttrName^.Key) - J+1) then
  3108. DoErrorPos(esFatal, 'Duplicate prefixed attribute', attrData^.FLoc);
  3109. attrData^.FNsUri := FNameTable.FindOrAdd(PWideChar(b.uri), Length(b.uri));
  3110. end;
  3111. end;
  3112. function TXMLReader.ParseExternalID(out SysID, PubID: WideString; // [75]
  3113. SysIdOptional: Boolean): Boolean;
  3114. var
  3115. I: Integer;
  3116. wc: WideChar;
  3117. begin
  3118. Result := False;
  3119. if FSource.Matches('SYSTEM') then
  3120. SysIdOptional := False
  3121. else if FSource.Matches('PUBLIC') then
  3122. begin
  3123. ExpectWhitespace;
  3124. ParseLiteral(FValue, ltPubid, True);
  3125. SetString(PubID, FValue.Buffer, FValue.Length);
  3126. for I := 1 to Length(PubID) do
  3127. begin
  3128. wc := PubID[I];
  3129. if (wc > #255) or not (Char(ord(wc)) in PubidChars) then
  3130. FatalError('Illegal Public ID literal', -1);
  3131. end;
  3132. end
  3133. else
  3134. Exit;
  3135. if SysIdOptional then
  3136. SkipWhitespace
  3137. else
  3138. ExpectWhitespace;
  3139. if ParseLiteral(FValue, ltPlain, not SysIdOptional) then
  3140. SetString(SysID, FValue.Buffer, FValue.Length);
  3141. Result := True;
  3142. end;
  3143. function TXMLReader.ValidateAttrSyntax(AttrDef: TAttributeDef; const aValue: WideString): Boolean;
  3144. begin
  3145. case AttrDef.DataType of
  3146. dtId, dtIdRef, dtEntity: Result := IsXmlName(aValue, FXML11) and
  3147. ((not FNamespaces) or (Pos(WideChar(':'), aValue) = 0));
  3148. dtIdRefs, dtEntities: Result := IsXmlNames(aValue, FXML11) and
  3149. ((not FNamespaces) or (Pos(WideChar(':'), aValue) = 0));
  3150. dtNmToken: Result := IsXmlNmToken(aValue, FXML11) and AttrDef.HasEnumToken(aValue);
  3151. dtNmTokens: Result := IsXmlNmTokens(aValue, FXML11);
  3152. // IsXmlName() not necessary - enum is never empty and contains valid names
  3153. dtNotation: Result := AttrDef.HasEnumToken(aValue);
  3154. else
  3155. Result := True;
  3156. end;
  3157. end;
  3158. procedure TXMLReader.ValidateAttrValue(AttrDef: TAttributeDef; attrData: PNodeData);
  3159. var
  3160. L, StartPos, EndPos: Integer;
  3161. Entity: TEntityDecl;
  3162. begin
  3163. L := Length(attrData^.FValueStr);
  3164. case AttrDef.DataType of
  3165. dtId: begin
  3166. if not AddID(attrData) then
  3167. DoErrorPos(esError, 'The ID ''%s'' is not unique', [attrData^.FValueStr], attrData^.FLoc2);
  3168. end;
  3169. dtIdRef, dtIdRefs: begin
  3170. StartPos := 1;
  3171. while StartPos <= L do
  3172. begin
  3173. EndPos := StartPos;
  3174. while (EndPos <= L) and (attrData^.FValueStr[EndPos] <> #32) do
  3175. Inc(EndPos);
  3176. if (FIDMap = nil) or (FIDMap.Find(@attrData^.FValueStr[StartPos], EndPos-StartPos) = nil) then
  3177. AddForwardRef(@attrData^.FValueStr[StartPos], EndPos-StartPos);
  3178. StartPos := EndPos + 1;
  3179. end;
  3180. end;
  3181. dtEntity, dtEntities: begin
  3182. StartPos := 1;
  3183. while StartPos <= L do
  3184. begin
  3185. EndPos := StartPos;
  3186. while (EndPos <= L) and (attrData^.FValueStr[EndPos] <> #32) do
  3187. Inc(EndPos);
  3188. Entity := TEntityDecl(FDocType.Entities.Get(@attrData^.FValueStr[StartPos], EndPos-StartPos));
  3189. if (Entity = nil) or (Entity.FNotationName = '') then
  3190. ValidationError('Attribute ''%s'' type mismatch', [attrData^.FQName^.Key], -1);
  3191. StartPos := EndPos + 1;
  3192. end;
  3193. end;
  3194. end;
  3195. end;
  3196. procedure TXMLReader.ValidateDTD;
  3197. var
  3198. I: Integer;
  3199. begin
  3200. for I := 0 to FForwardRefs.Count-1 do
  3201. with PForwardRef(FForwardRefs[I])^ do
  3202. if FDocType.Notations.Get(PWideChar(Value), Length(Value)) = nil then
  3203. DoErrorPos(esError, 'Notation ''%s'' is not declared', [Value], Loc);
  3204. end;
  3205. function TXMLReader.DoCDSect(ch: PWideChar; Count: Integer): TDOMNode;
  3206. var
  3207. s: WideString;
  3208. begin
  3209. Assert(not FCDSectionsAsText, 'Should not be called when CDSectionsAsText=True');
  3210. SetString(s, ch, Count);
  3211. result := doc.CreateCDATASection(s);
  3212. end;
  3213. procedure TXMLReader.DoNotationDecl(const aName, aPubID, aSysID: WideString);
  3214. var
  3215. Notation: TNotationDecl;
  3216. Entry: PHashItem;
  3217. begin
  3218. Entry := FDocType.Notations.FindOrAdd(PWideChar(aName), Length(aName));
  3219. if Entry^.Data = nil then
  3220. begin
  3221. Notation := TNotationDecl.Create;
  3222. Notation.FName := aName;
  3223. Notation.FPublicID := aPubID;
  3224. Notation.FSystemID := aSysID;
  3225. Entry^.Data := Notation;
  3226. end
  3227. else
  3228. ValidationError('Duplicate notation declaration: ''%s''', [aName]);
  3229. end;
  3230. function TXMLReader.AddId(aNodeData: PNodeData): Boolean;
  3231. var
  3232. e: PHashItem;
  3233. begin
  3234. if FIDMap = nil then
  3235. FIDMap := THashTable.Create(256, False);
  3236. e := FIDMap.FindOrAdd(PWideChar(aNodeData^.FValueStr), Length(aNodeData^.FValueStr), Result);
  3237. Result := not Result;
  3238. if Result then
  3239. aNodeData^.FIDEntry := e;
  3240. end;
  3241. function TXMLReader.AllocAttributeData: PNodeData;
  3242. begin
  3243. Result := AllocNodeData(FNesting + FAttrCount + 1);
  3244. Result^.FNodeType := ntAttribute;
  3245. Result^.FIsDefault := False;
  3246. Inc(FAttrCount);
  3247. end;
  3248. function TXMLReader.AllocNodeData(AIndex: Integer): PNodeData;
  3249. begin
  3250. {make sure we have an extra slot to place child text/comment/etc}
  3251. if AIndex >= Length(FNodeStack)-1 then
  3252. SetLength(FNodeStack, AIndex * 2 + 2);
  3253. Result := @FNodeStack[AIndex];
  3254. Result^.FPrefix := nil;
  3255. Result^.FNsUri := nil;
  3256. Result^.FIDEntry := nil;
  3257. end;
  3258. function TXMLReader.AllocAttributeValueChunk(APrev: PNodeData): PNodeData;
  3259. begin
  3260. { when parsing DTD, don't take ownership of allocated data }
  3261. if FState = rsDTD then
  3262. begin
  3263. New(result);
  3264. FillChar(result^, sizeof(TNodeData), 0);
  3265. end
  3266. else
  3267. begin
  3268. result := FFreeAttrChunk;
  3269. if Assigned(result) then
  3270. begin
  3271. FFreeAttrChunk := result^.FNext;
  3272. result^.FNext := nil;
  3273. end
  3274. else { no free chunks, create a new one }
  3275. begin
  3276. New(result);
  3277. FillChar(result^, sizeof(TNodeData), 0);
  3278. FAttrChunks.Add(result);
  3279. end;
  3280. end;
  3281. APrev^.FNext := result;
  3282. end;
  3283. procedure TXMLReader.CleanupAttributes;
  3284. var
  3285. i: Integer;
  3286. begin
  3287. {cleanup only specified attributes; default ones are owned by DTD}
  3288. for i := 1 to FSpecifiedAttrs do
  3289. CleanupAttribute(@FNodeStack[FNesting+i]);
  3290. FAttrCleanupFlag := False;
  3291. end;
  3292. procedure TXMLReader.CleanupAttribute(aNode: PNodeData);
  3293. var
  3294. chunk, tmp: PNodeData;
  3295. begin
  3296. chunk := aNode^.FNext;
  3297. while Assigned(chunk) do
  3298. begin
  3299. tmp := chunk^.FNext;
  3300. chunk^.FNext := FFreeAttrChunk;
  3301. FFreeAttrChunk := chunk;
  3302. chunk := tmp;
  3303. end;
  3304. aNode^.FNext := nil;
  3305. end;
  3306. procedure TXMLReader.SetNodeInfoWithValue(typ: TXMLNodeType; AName: PHashItem = nil);
  3307. begin
  3308. {FNesting+1 is available due to overallocation in AllocNodeData() }
  3309. FCurrNode := @FNodeStack[FNesting+1];
  3310. FCurrNode^.FNodeType := typ;
  3311. FCurrNode^.FQName := AName;
  3312. FCurrNode^.FValueStart := FValue.Buffer;
  3313. FCurrNode^.FValueLength := FValue.Length;
  3314. end;
  3315. procedure TXMLReader.PushVC(aElDef: TElementDecl);
  3316. begin
  3317. Inc(FValidatorNesting);
  3318. if FValidatorNesting >= Length(FValidators) then
  3319. SetLength(FValidators, FValidatorNesting * 2);
  3320. with FValidators[FValidatorNesting] do
  3321. begin
  3322. FElementDef := aElDef;
  3323. FCurCP := nil;
  3324. FFailed := False;
  3325. FContentType := ctAny;
  3326. FSaViolation := False;
  3327. if Assigned(aElDef) then
  3328. begin
  3329. FContentType := aElDef.ContentType;
  3330. FSaViolation := FStandalone and aElDef.ExternallyDeclared;
  3331. end;
  3332. end;
  3333. end;
  3334. procedure TXMLReader.PopVC;
  3335. begin
  3336. if (FNesting = 1) and (not FFragmentMode) then
  3337. FState := rsEpilog;
  3338. if FNesting > 0 then Dec(FNesting);
  3339. FCurrNode := @FNodeStack[FNesting];
  3340. FNext := xtText;
  3341. end;
  3342. { TElementValidator }
  3343. function TElementValidator.IsElementAllowed(Def: TElementDecl): Boolean;
  3344. var
  3345. Next: TContentParticle;
  3346. begin
  3347. Result := True;
  3348. // if element is not declared, non-validity has been already reported, no need to report again...
  3349. if Assigned(Def) and Assigned(FElementDef) then
  3350. begin
  3351. case FElementDef.ContentType of
  3352. ctEmpty: Result := False;
  3353. ctChildren, ctMixed: begin
  3354. if FFailed then // if already detected a mismatch, don't waste time
  3355. Exit;
  3356. if FCurCP = nil then
  3357. Next := FElementDef.RootCP.FindFirst(Def)
  3358. else
  3359. Next := FCurCP.FindNext(Def, 0); { second arg ignored here }
  3360. Result := Assigned(Next);
  3361. if Result then
  3362. FCurCP := Next
  3363. else
  3364. FFailed := True; // used to prevent extra error at the end of element
  3365. end;
  3366. // ctAny, ctUndeclared: returns True by default
  3367. end;
  3368. end;
  3369. end;
  3370. function TElementValidator.Incomplete: Boolean;
  3371. begin
  3372. if Assigned(FElementDef) and (FElementDef.ContentType = ctChildren) and (not FFailed) then
  3373. begin
  3374. if FCurCP <> nil then
  3375. Result := FCurCP.MoreRequired(0) { arg ignored here }
  3376. else
  3377. Result := FElementDef.RootCP.IsRequired;
  3378. end
  3379. else
  3380. Result := False;
  3381. end;
  3382. { plain calls }
  3383. procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text);
  3384. var
  3385. Reader: TXMLReader;
  3386. Src: TXMLCharSource;
  3387. begin
  3388. ADoc := nil;
  3389. Src := TXMLFileInputSource.Create(f);
  3390. Reader := TXMLReader.Create;
  3391. try
  3392. Reader.ProcessXML(Src);
  3393. finally
  3394. ADoc := TXMLDocument(Reader.Doc);
  3395. Reader.Free;
  3396. end;
  3397. end;
  3398. procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String);
  3399. var
  3400. Reader: TXMLReader;
  3401. Src: TXMLCharSource;
  3402. begin
  3403. ADoc := nil;
  3404. Reader := TXMLReader.Create;
  3405. try
  3406. Src := TXMLStreamInputSource.Create(f, False);
  3407. Src.SystemID := ABaseURI;
  3408. Reader.ProcessXML(Src);
  3409. finally
  3410. ADoc := TXMLDocument(Reader.doc);
  3411. Reader.Free;
  3412. end;
  3413. end;
  3414. procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream);
  3415. begin
  3416. ReadXMLFile(ADoc, f, 'stream:');
  3417. end;
  3418. procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String);
  3419. var
  3420. FileStream: TStream;
  3421. begin
  3422. ADoc := nil;
  3423. FileStream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
  3424. try
  3425. ReadXMLFile(ADoc, FileStream, FilenameToURI(AFilename));
  3426. finally
  3427. FileStream.Free;
  3428. end;
  3429. end;
  3430. procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text);
  3431. var
  3432. Reader: TXMLReader;
  3433. Src: TXMLCharSource;
  3434. begin
  3435. Reader := TXMLReader.Create;
  3436. try
  3437. Src := TXMLFileInputSource.Create(f);
  3438. Reader.ProcessFragment(Src, AParentNode);
  3439. finally
  3440. Reader.Free;
  3441. end;
  3442. end;
  3443. procedure ReadXMLFragment(AParentNode: TDOMNode; f: TStream; const ABaseURI: String);
  3444. var
  3445. Reader: TXMLReader;
  3446. Src: TXMLCharSource;
  3447. begin
  3448. Reader := TXMLReader.Create;
  3449. try
  3450. Src := TXMLStreamInputSource.Create(f, False);
  3451. Src.SystemID := ABaseURI;
  3452. Reader.ProcessFragment(Src, AParentNode);
  3453. finally
  3454. Reader.Free;
  3455. end;
  3456. end;
  3457. procedure ReadXMLFragment(AParentNode: TDOMNode; f: TStream);
  3458. begin
  3459. ReadXMLFragment(AParentNode, f, 'stream:');
  3460. end;
  3461. procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String);
  3462. var
  3463. Stream: TStream;
  3464. begin
  3465. Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
  3466. try
  3467. ReadXMLFragment(AParentNode, Stream, FilenameToURI(AFilename));
  3468. finally
  3469. Stream.Free;
  3470. end;
  3471. end;
  3472. procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text);
  3473. var
  3474. Reader: TXMLReader;
  3475. Src: TXMLCharSource;
  3476. begin
  3477. ADoc := nil;
  3478. Reader := TXMLReader.Create;
  3479. try
  3480. Src := TXMLFileInputSource.Create(f);
  3481. Reader.ProcessDTD(Src);
  3482. finally
  3483. ADoc := TXMLDocument(Reader.doc);
  3484. Reader.Free;
  3485. end;
  3486. end;
  3487. procedure ReadDTDFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String);
  3488. var
  3489. Reader: TXMLReader;
  3490. Src: TXMLCharSource;
  3491. begin
  3492. ADoc := nil;
  3493. Reader := TXMLReader.Create;
  3494. try
  3495. Src := TXMLStreamInputSource.Create(f, False);
  3496. Src.SystemID := ABaseURI;
  3497. Reader.ProcessDTD(Src);
  3498. finally
  3499. ADoc := TXMLDocument(Reader.doc);
  3500. Reader.Free;
  3501. end;
  3502. end;
  3503. procedure ReadDTDFile(out ADoc: TXMLDocument; f: TStream);
  3504. begin
  3505. ReadDTDFile(ADoc, f, 'stream:');
  3506. end;
  3507. procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String);
  3508. var
  3509. Stream: TStream;
  3510. begin
  3511. ADoc := nil;
  3512. Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
  3513. try
  3514. ReadDTDFile(ADoc, Stream, FilenameToURI(AFilename));
  3515. finally
  3516. Stream.Free;
  3517. end;
  3518. end;
  3519. end.