libxmlparser.pas 107 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688
  1. (**
  2. ===============================================================================================
  3. Name : LibXmlParser
  4. ===============================================================================================
  5. Project : All Projects
  6. ===============================================================================================
  7. Subject : Progressive XML Parser for all types of XML Files
  8. ===============================================================================================
  9. Author : Stefan Heymann
  10. Eschenweg 3
  11. 72076 Tübingen
  12. GERMANY
  13. E-Mail: [email protected]
  14. URL: www.destructor.de
  15. ===============================================================================================
  16. Source, Legals ("Licence")
  17. --------------------------
  18. The official site to get this parser is http://www.destructor.de/
  19. Usage and Distribution of this Source Code is ruled by the
  20. "Destructor.de Source code Licence" (DSL) which comes with this file or
  21. can be downloaded at http://www.destructor.de/
  22. IN SHORT: Usage and distribution of this source code is free.
  23. You use it completely on your own risk.
  24. Postcardware
  25. ------------
  26. If you like this code, please send a postcard of your city to my above address.
  27. ===============================================================================================
  28. !!! All parts of this code which are not finished or not conforming exactly to
  29. the XmlSpec are marked with three exclamation marks
  30. -!- Parts where the parser may be able to detect errors in the document's syntax are
  31. marked with the dash-exlamation mark-dash sequence.
  32. ===============================================================================================
  33. Terminology:
  34. ------------
  35. - Start: Start of a buffer part
  36. - Final: End (last character) of a buffer part
  37. - DTD: Document Type Definition
  38. - DTDc: Document Type Declaration
  39. - XMLSpec: The current W3C XML Recommendation (version 1.0 as of 1998-02-10), Chapter No.
  40. - Cur*: Fields concerning the "Current" part passed back by the "Scan" method
  41. ===============================================================================================
  42. Scanning the XML document
  43. -------------------------
  44. - Create TXmlParser Instance MyXml := TXmlParser.Create;
  45. - Load XML Document MyXml.LoadFromFile (Filename);
  46. - Start Scanning MyXml.StartScan;
  47. - Scan Loop WHILE MyXml.Scan DO
  48. - Test for Part Type CASE MyXml.CurPartType OF
  49. - Handle Parts ... : ;;;
  50. - Handle Parts ... : ;;;
  51. - Handle Parts ... : ;;;
  52. END;
  53. - Destroy MyXml.Free;
  54. ===============================================================================================
  55. Loading the XML document
  56. ------------------------
  57. You can load the XML document from a file with the "LoadFromFile" method.
  58. It is beyond the scope of this parser to perform HTTP or FTP accesses. If you want your
  59. application to handle such requests (URLs), you can load the XML via HTTP or FTP or whatever
  60. protocol and hand over the data buffer using the "LoadFromBuffer" or "SetBuffer" method.
  61. "LoadFromBuffer" loads the internal buffer of TXmlParser with the given null-terminated
  62. string, thereby creating a copy of that buffer.
  63. "SetBuffer" just takes the pointer to another buffer, which means that the given
  64. buffer pointer must be valid while the document is accessed via TXmlParser.
  65. ===============================================================================================
  66. Encodings:
  67. ----------
  68. This XML parser kind of "understands" the following encodings:
  69. - UTF-8
  70. - ISO-8859-1
  71. - Windows-1252
  72. Any flavor of multi-byte characters (and this includes UTF-16) is not supported. Sorry.
  73. Every string which has to be passed to the application passes the virtual method
  74. "TranslateEncoding" which translates the string from the current encoding (stored in
  75. "CurEncoding") into the encoding the application wishes to receive.
  76. The "TranslateEncoding" method that is built into TXmlParser assumes that the application
  77. wants to receive Windows ANSI (Windows-1252, about the same as ISO-8859-1) and is able
  78. to convert UTF-8 and ISO-8859-1 encodings.
  79. For other source and target encodings, you will have to override "TranslateEncoding".
  80. ===============================================================================================
  81. Buffer Handling
  82. ---------------
  83. - The document must be loaded completely into a piece of RAM
  84. - All character positions are referenced by PChar pointers
  85. - The TXmlParser instance can either "own" the buffer itself (then, FBufferSize is > 0)
  86. or reference the buffer of another instance or object (then, FBuffersize is 0 and
  87. FBuffer is not NIL)
  88. - The Property DocBuffer passes back a pointer to the first byte of the document. If there
  89. is no document stored (FBuffer is NIL), the DocBuffer returns a pointer to a NULL character.
  90. ===============================================================================================
  91. Whitespace Handling
  92. -------------------
  93. The TXmlParser property "PackSpaces" determines how Whitespace is returned in Text Content:
  94. While PackSpaces is true, all leading and trailing whitespace characters are trimmed of, all
  95. Whitespace is converted to Space #x20 characters and contiguous Whitespace characters are
  96. compressed to one.
  97. If the "Scan" method reports a ptContent part, the application can get the original text
  98. with all whitespace characters by extracting the characters from "CurStart" to "CurFinal".
  99. If the application detects an xml:space attribute, it can set "PackSpaces" accordingly or
  100. use CurStart/CurFinal.
  101. Please note that TXmlParser does _not_ normalize Line Breaks to single LineFeed characters
  102. as the XmlSpec requires (XmlSpec 2.11).
  103. The xml:space attribute is not handled by TXmlParser. This is on behalf of the application.
  104. ===============================================================================================
  105. Non-XML-Conforming
  106. ------------------
  107. TXmlParser does not conform 100 % exactly to the XmlSpec:
  108. - UTF-16 is not supported (XmlSpec 2.2)
  109. (Workaround: Convert UTF-16 to UTF-8 and hand the buffer over to TXmlParser)
  110. - As the parser only works with single byte strings, all Unicode characters > 255
  111. can currently not be handled correctly.
  112. - Line breaks are not normalized to single Linefeed #x0A characters (XmlSpec 2.11)
  113. (Workaround: The Application can access the text contents on its own [CurStart, CurFinal],
  114. thereby applying every normalization it wishes to)
  115. - The attribute value normalization does not work exactly as defined in the
  116. Second Edition of the XML 1.0 specification.
  117. - See also the code parts marked with three consecutive exclamation marks. These are
  118. parts which are not finished in the current code release.
  119. This list may be incomplete, so it may grow if I get to know any other points.
  120. As work on the parser proceeds, this list may also shrink.
  121. ===============================================================================================
  122. Things Todo
  123. -----------
  124. - Introduce a new event/callback which is called when there is an unresolvable
  125. entity or character reference
  126. - Support Unicode
  127. - Use Streams instead of reading the whole XML into memory
  128. ===============================================================================================
  129. Change History, Version numbers
  130. -------------------------------
  131. The Date is given in ISO Year-Month-Day (YYYY-MM-DD) order.
  132. Versions are counted from 1.0.0 beginning with the version from 2000-03-16.
  133. Unreleased versions don't get a version number.
  134. Date Author Version Changes
  135. -----------------------------------------------------------------------------------------------
  136. 2000-03-16 HeySt 1.0.0 Start
  137. 2000-03-28 HeySt 1.0.1 Initial Publishing of TXmlParser on the destructor.de Web Site
  138. 2000-03-30 HeySt 1.0.2 TXmlParser.AnalyzeCData: Call "TranslateEncoding" for CurContent
  139. 2000-03-31 HeySt 1.0.3 Deleted the StrPosE function (was not needed anyway)
  140. 2000-04-04 HeySt 1.0.4 TDtdElementRec modified: Start/Final for all Elements;
  141. Should be backwards compatible.
  142. AnalyzeDtdc: Set CurPartType to ptDtdc
  143. 2000-04-23 HeySt 1.0.5 New class TObjectList. Eliminated reference to the Delphi 5
  144. "Contnrs" unit so LibXmlParser is Delphi 4 compatible.
  145. 2000-07-03 HeySt 1.0.6 TNvpNode: Added Constructor
  146. 2000-07-11 HeySt 1.0.7 Removed "Windows" from USES clause
  147. Added three-exclamation-mark comments for Utf8ToAnsi/AnsiToUtf8
  148. Added three-exclamation-mark comments for CHR function calls
  149. 2000-07-23 HeySt 1.0.8 TXmlParser.Clear: CurAttr.Clear; EntityStack.Clear;
  150. (This was not a bug; just defensive programming)
  151. 2000-07-29 HeySt 1.0.9 TNvpList: Added methods: Node(Index), Value(Index), Name(Index);
  152. 2000-10-07 HeySt Introduced Conditional Defines
  153. Uses Contnrs unit and its TObjectList class again for
  154. Delphi 5 and newer versions
  155. 2001-01-30 HeySt Introduced Version Numbering
  156. Made LoadFromFile and LoadFromBuffer BOOLEAN functions
  157. Introduced FileMode parameter for LoadFromFile
  158. BugFix: TAttrList.Analyze: Must add CWhitespace to ExtractName call
  159. Comments worked over
  160. 2001-02-28 HeySt 1.0.10 Completely worked over and tested the UTF-8 functions
  161. Fixed a bug in TXmlParser.Scan which caused it to start over when it
  162. was called after the end of scanning, resulting in an endless loop
  163. TEntityStack is now a TObjectList instead of TList
  164. 2001-07-03 HeySt 1.0.11 Updated Compiler Version IFDEFs for Kylix
  165. 2001-07-11 HeySt 1.0.12 New TCustomXmlScanner component (taken over from LibXmlComps.pas)
  166. 2001-07-14 HeySt 1.0.13 Bugfix TCustomXmlScanner.FOnTranslateEncoding
  167. 2001-10-22 HeySt Don't clear CurName anymore when the parser finds a CDATA section.
  168. 2001-12-03 HeySt 1.0.14 TObjectList.Clear: Make call to INHERITED method (fixes a memory leak)
  169. 2001-12-05 HeySt 1.0.15 TObjectList.Clear: removed call to INHERITED method
  170. TObjectList.Destroy: Inserted SetCapacity call.
  171. Reduces need for frequent re-allocation of pointer buffer
  172. Dedicated to my father, Theodor Heymann
  173. 2002-06-26 HeySt 1.0.16 TXmlParser.Scan: Fixed a bug with PIs whose name is beginning
  174. with 'xml'. Thanks to Uwe Kamm for submitting this bug.
  175. The CurEncoding property is now always in uppercase letters (the XML
  176. spec wants it to be treated case independently so when it's uppercase
  177. comparisons are faster)
  178. 2002-03-04 HeySt 1.0.17 Included an IFDEF for Delphi 7 (VER150) and Kylix
  179. There is a new symbol HAS_CONTNRS_UNIT which is used now to
  180. distinguish between IDEs which come with the Contnrs unit and
  181. those that don't.
  182. *)
  183. UNIT libxmlparser;
  184. {$I jedi-sdl.inc}
  185. INTERFACE
  186. USES
  187. SysUtils, Classes,
  188. (*$IFDEF HAS_CONTNRS_UNIT *) // The Contnrs Unit was introduced in Delphi 5
  189. Contnrs,
  190. (*$ENDIF*)
  191. Math;
  192. CONST
  193. CVersion = '1.0.17'; // This variable will be updated for every release
  194. // (I hope, I won't forget to do it everytime ...)
  195. TYPE
  196. TPartType = // --- Document Part Types
  197. (ptNone, // Nothing
  198. ptXmlProlog, // XML Prolog XmlSpec 2.8 / 4.3.1
  199. ptComment, // Comment XmlSpec 2.5
  200. ptPI, // Processing Instruction XmlSpec 2.6
  201. ptDtdc, // Document Type Declaration XmlSpec 2.8
  202. ptStartTag, // Start Tag XmlSpec 3.1
  203. ptEmptyTag, // Empty-Element Tag XmlSpec 3.1
  204. ptEndTag, // End Tag XmlSpec 3.1
  205. ptContent, // Text Content between Tags
  206. ptCData); // CDATA Section XmlSpec 2.7
  207. TDtdElemType = // --- DTD Elements
  208. (deElement, // !ELEMENT declaration
  209. deAttList, // !ATTLIST declaration
  210. deEntity, // !ENTITY declaration
  211. deNotation, // !NOTATION declaration
  212. dePI, // PI in DTD
  213. deComment, // Comment in DTD
  214. deError); // Error found in the DTD
  215. TYPE
  216. TAttrList = CLASS;
  217. TEntityStack = CLASS;
  218. TNvpList = CLASS;
  219. TElemDef = CLASS;
  220. TElemList = CLASS;
  221. TEntityDef = CLASS;
  222. TNotationDef = CLASS;
  223. TDtdElementRec = RECORD // --- This Record is returned by the DTD parser callback function
  224. Start, Final : PChar; // Start/End of the Element's Declaration
  225. CASE ElementType : TDtdElemType OF // Type of the Element
  226. deElement, // <!ELEMENT>
  227. deAttList : (ElemDef : TElemDef); // <!ATTLIST>
  228. deEntity : (EntityDef : TEntityDef); // <!ENTITY>
  229. deNotation : (NotationDef : TNotationDef); // <!NOTATION>
  230. dePI : (Target : PChar; // <?PI ?>
  231. Content : PChar;
  232. AttrList : TAttrList);
  233. deError : (Pos : PChar); // Error
  234. // deComment : ((No additional fields here)); // <!-- Comment -->
  235. END;
  236. TXmlParser = CLASS // --- Internal Properties and Methods
  237. PROTECTED
  238. FBuffer : PChar; // NIL if there is no buffer available
  239. FBufferSize : INTEGER; // 0 if the buffer is not owned by the Document instance
  240. FSource : STRING; // Name of Source of document. Filename for Documents loaded with LoadFromFile
  241. FXmlVersion : STRING; // XML version from Document header. Default is '1.0'
  242. FEncoding : STRING; // Encoding from Document header. Default is 'UTF-8'
  243. FStandalone : BOOLEAN; // Standalone declaration from Document header. Default is 'yes'
  244. FRootName : STRING; // Name of the Root Element (= DTD name)
  245. FDtdcFinal : PChar; // Pointer to the '>' character terminating the DTD declaration
  246. FNormalize : BOOLEAN; // If true: Pack Whitespace and don't return empty contents
  247. EntityStack : TEntityStack; // Entity Stack for Parameter and General Entities
  248. FCurEncoding : STRING; // Current Encoding during parsing (always uppercase)
  249. PROCEDURE AnalyzeProlog; // Analyze XML Prolog or Text Declaration
  250. PROCEDURE AnalyzeComment (Start : PChar; VAR Final : PChar); // Analyze Comments
  251. PROCEDURE AnalyzePI (Start : PChar; VAR Final : PChar); // Analyze Processing Instructions (PI)
  252. PROCEDURE AnalyzeDtdc; // Analyze Document Type Declaration
  253. PROCEDURE AnalyzeDtdElements (Start : PChar; VAR Final : PChar); // Analyze DTD declarations
  254. PROCEDURE AnalyzeTag; // Analyze Start/End/Empty-Element Tags
  255. PROCEDURE AnalyzeCData; // Analyze CDATA Sections
  256. PROCEDURE AnalyzeText (VAR IsDone : BOOLEAN); // Analyze Text Content between Tags
  257. PROCEDURE AnalyzeElementDecl (Start : PChar; VAR Final : PChar);
  258. PROCEDURE AnalyzeAttListDecl (Start : PChar; VAR Final : PChar);
  259. PROCEDURE AnalyzeEntityDecl (Start : PChar; VAR Final : PChar);
  260. PROCEDURE AnalyzeNotationDecl (Start : PChar; VAR Final : PChar);
  261. PROCEDURE PushPE (VAR Start : PChar);
  262. PROCEDURE ReplaceCharacterEntities (VAR Str : STRING);
  263. PROCEDURE ReplaceParameterEntities (VAR Str : STRING);
  264. PROCEDURE ReplaceGeneralEntities (VAR Str : STRING);
  265. FUNCTION GetDocBuffer : PChar; // Returns FBuffer or a pointer to a NUL char if Buffer is empty
  266. PUBLIC // --- Document Properties
  267. PROPERTY XmlVersion : STRING READ FXmlVersion; // XML version from the Document Prolog
  268. PROPERTY Encoding : STRING READ FEncoding; // Document Encoding from Prolog
  269. PROPERTY Standalone : BOOLEAN READ FStandalone; // Standalone Declaration from Prolog
  270. PROPERTY RootName : STRING READ FRootName; // Name of the Root Element
  271. PROPERTY Normalize : BOOLEAN READ FNormalize WRITE FNormalize; // True if Content is to be normalized
  272. PROPERTY Source : STRING READ FSource; // Name of Document Source (Filename)
  273. PROPERTY DocBuffer : PChar READ GetDocBuffer; // Returns document buffer
  274. PUBLIC // --- DTD Objects
  275. Elements : TElemList; // Elements: List of TElemDef (contains Attribute Definitions)
  276. Entities : TNvpList; // General Entities: List of TEntityDef
  277. ParEntities : TNvpList; // Parameter Entities: List of TEntityDef
  278. Notations : TNvpList; // Notations: List of TNotationDef
  279. PUBLIC
  280. CONSTRUCTOR Create;
  281. DESTRUCTOR Destroy; OVERRIDE;
  282. // --- Document Handling
  283. FUNCTION LoadFromFile (Filename : STRING;
  284. FileMode : INTEGER = fmOpenRead OR fmShareDenyNone) : BOOLEAN;
  285. // Loads Document from given file
  286. FUNCTION LoadFromBuffer (Buffer : PChar) : BOOLEAN; // Loads Document from another buffer
  287. PROCEDURE SetBuffer (Buffer : PChar); // References another buffer
  288. PROCEDURE Clear; // Clear Document
  289. PUBLIC
  290. // --- Scanning through the document
  291. CurPartType : TPartType; // Current Type
  292. CurName : STRING; // Current Name
  293. CurContent : STRING; // Current Normalized Content
  294. CurStart : PChar; // Current First character
  295. CurFinal : PChar; // Current Last character
  296. CurAttr : TAttrList; // Current Attribute List
  297. PROPERTY CurEncoding : STRING READ FCurEncoding; // Current Encoding
  298. PROCEDURE StartScan;
  299. FUNCTION Scan : BOOLEAN;
  300. // --- Events / Callbacks
  301. FUNCTION LoadExternalEntity (SystemId, PublicId,
  302. Notation : STRING) : TXmlParser; VIRTUAL;
  303. FUNCTION TranslateEncoding (CONST Source : STRING) : STRING; VIRTUAL;
  304. PROCEDURE DtdElementFound (DtdElementRec : TDtdElementRec); VIRTUAL;
  305. END;
  306. TValueType = // --- Attribute Value Type
  307. (vtNormal, // Normal specified Attribute
  308. vtImplied, // #IMPLIED attribute value
  309. vtFixed, // #FIXED attribute value
  310. vtDefault); // Attribute value from default value in !ATTLIST declaration
  311. TAttrDefault = // --- Attribute Default Type
  312. (adDefault, // Normal default value
  313. adRequired, // #REQUIRED attribute
  314. adImplied, // #IMPLIED attribute
  315. adFixed); // #FIXED attribute
  316. TAttrType = // --- Type of attribute
  317. (atUnknown, // Unknown type
  318. atCData, // Character data only
  319. atID, // ID
  320. atIdRef, // ID Reference
  321. atIdRefs, // Several ID References, separated by Whitespace
  322. atEntity, // Name of an unparsed Entity
  323. atEntities, // Several unparsed Entity names, separated by Whitespace
  324. atNmToken, // Name Token
  325. atNmTokens, // Several Name Tokens, separated by Whitespace
  326. atNotation, // A selection of Notation names (Unparsed Entity)
  327. atEnumeration); // Enumeration
  328. TElemType = // --- Element content type
  329. (etEmpty, // Element is always empty
  330. etAny, // Element can have any mixture of PCDATA and any elements
  331. etChildren, // Element must contain only elements
  332. etMixed); // Mixed PCDATA and elements
  333. (*$IFDEF HAS_CONTNRS_UNIT *)
  334. TObjectList = Contnrs.TObjectList; // Re-Export this identifier
  335. (*$ELSE *)
  336. TObjectList = CLASS (TList)
  337. DESTRUCTOR Destroy; OVERRIDE;
  338. PROCEDURE Delete (Index : INTEGER);
  339. PROCEDURE Clear; OVERRIDE;
  340. END;
  341. (*$ENDIF *)
  342. TNvpNode = CLASS // Name-Value Pair Node
  343. Name : STRING;
  344. Value : STRING;
  345. CONSTRUCTOR Create (TheName : STRING = ''; TheValue : STRING = '');
  346. END;
  347. TNvpList = CLASS (TObjectList) // Name-Value Pair List
  348. PROCEDURE Add (Node : TNvpNode);
  349. FUNCTION Node (Name : STRING) : TNvpNode; OVERLOAD;
  350. FUNCTION Node (Index : INTEGER) : TNvpNode; OVERLOAD;
  351. FUNCTION Value (Name : STRING) : STRING; OVERLOAD;
  352. FUNCTION Value (Index : INTEGER) : STRING; OVERLOAD;
  353. FUNCTION Name (Index : INTEGER) : STRING;
  354. END;
  355. TAttr = CLASS (TNvpNode) // Attribute of a Start-Tag or Empty-Element-Tag
  356. ValueType : TValueType;
  357. AttrType : TAttrType;
  358. END;
  359. TAttrList = CLASS (TNvpList) // List of Attributes
  360. PROCEDURE Analyze (Start : PChar; VAR Final : PChar);
  361. END;
  362. TEntityStack = CLASS (TObjectList) // Stack where current position is stored before parsing entities
  363. PROTECTED
  364. Owner : TXmlParser;
  365. PUBLIC
  366. CONSTRUCTOR Create (TheOwner : TXmlParser);
  367. PROCEDURE Push (LastPos : PChar); OVERLOAD;
  368. PROCEDURE Push (Instance : TObject; LastPos : PChar); OVERLOAD;
  369. FUNCTION Pop : PChar; // Returns next char or NIL if EOF is reached. Frees Instance.
  370. END;
  371. TAttrDef = CLASS (TNvpNode) // Represents a <!ATTLIST Definition. "Value" is the default value
  372. TypeDef : STRING; // Type definition from the DTD
  373. Notations : STRING; // Notation List, separated by pipe symbols '|'
  374. AttrType : TAttrType; // Attribute Type
  375. DefaultType : TAttrDefault; // Default Type
  376. END;
  377. TElemDef = CLASS (TNvpList) // Represents a <!ELEMENT Definition. Is a list of TAttrDef-Nodes
  378. Name : STRING; // Element name
  379. ElemType : TElemType; // Element type
  380. Definition : STRING; // Element definition from DTD
  381. END;
  382. TElemList = CLASS (TObjectList) // List of TElemDef nodes
  383. FUNCTION Node (Name : STRING) : TElemDef;
  384. PROCEDURE Add (Node : TElemDef);
  385. END;
  386. TEntityDef = CLASS (TNvpNode) // Represents a <!ENTITY Definition.
  387. SystemId : STRING;
  388. PublicId : STRING;
  389. NotationName : STRING;
  390. END;
  391. TNotationDef = CLASS (TNvpNode) // Represents a <!NOTATION Definition. Value is the System ID
  392. PublicId : STRING;
  393. END;
  394. TCharset = SET OF CHAR;
  395. CONST
  396. CWhitespace = [#32, #9, #13, #10]; // Whitespace characters (XmlSpec 2.3)
  397. CLetter = [#$41..#$5A, #$61..#$7A, #$C0..#$D6, #$D8..#$F6, #$F8..#$FF];
  398. CDigit = [#$30..#$39];
  399. CNameChar = CLetter + CDigit + ['.', '-', '_', ':', #$B7];
  400. CNameStart = CLetter + ['_', ':'];
  401. CQuoteChar = ['"', ''''];
  402. CPubidChar = [#32, ^M, ^J, #9, 'a'..'z', 'A'..'Z', '0'..'9',
  403. '-', '''', '(', ')', '+', ',', '.', '/', ':',
  404. '=', '?', ';', '!', '*', '#', '@', '$', '_', '%'];
  405. CDStart = '<![CDATA[';
  406. CDEnd = ']]>';
  407. // --- Name Constants for the above enumeration types
  408. CPartType_Name : ARRAY [TPartType] OF STRING =
  409. ('', 'XML Prolog', 'Comment', 'PI',
  410. 'DTD Declaration', 'Start Tag', 'Empty Tag', 'End Tag',
  411. 'Text', 'CDATA');
  412. CValueType_Name : ARRAY [TValueType] OF STRING = ('Normal', 'Implied', 'Fixed', 'Default');
  413. CAttrDefault_Name : ARRAY [TAttrDefault] OF STRING = ('Default', 'Required', 'Implied', 'Fixed');
  414. CElemType_Name : ARRAY [TElemType] OF STRING = ('Empty', 'Any', 'Childs only', 'Mixed');
  415. CAttrType_Name : ARRAY [TAttrType] OF STRING = ('Unknown', 'CDATA',
  416. 'ID', 'IDREF', 'IDREFS',
  417. 'ENTITY', 'ENTITIES',
  418. 'NMTOKEN', 'NMTOKENS',
  419. 'Notation', 'Enumeration');
  420. FUNCTION ConvertWs (Source: STRING; PackWs: BOOLEAN) : STRING; // Convert WS to spaces #x20
  421. PROCEDURE SetStringSF (VAR S : STRING; BufferStart, BufferFinal : PChar); // SetString by Start/Final of buffer
  422. FUNCTION StrSFPas (Start, Finish : PChar) : STRING; // Convert buffer part to Pascal string
  423. FUNCTION TrimWs (Source : STRING) : STRING; // Trim Whitespace
  424. FUNCTION AnsiToUtf8 (Source : ANSISTRING) : STRING; // Convert Win-1252 to UTF-8
  425. FUNCTION Utf8ToAnsi (Source : STRING; UnknownChar : CHAR = '¿') : ANSISTRING; // Convert UTF-8 to Win-1252
  426. (*
  427. ===============================================================================================
  428. TCustomXmlScanner event based component wrapper for TXmlParser
  429. ===============================================================================================
  430. *)
  431. TYPE
  432. TCustomXmlScanner = CLASS;
  433. TXmlPrologEvent = PROCEDURE (Sender : TObject; XmlVersion, Encoding: STRING; Standalone : BOOLEAN) OF OBJECT;
  434. TCommentEvent = PROCEDURE (Sender : TObject; Comment : STRING) OF OBJECT;
  435. TPIEvent = PROCEDURE (Sender : TObject; Target, Content: STRING; Attributes : TAttrList) OF OBJECT;
  436. TDtdEvent = PROCEDURE (Sender : TObject; RootElementName : STRING) OF OBJECT;
  437. TStartTagEvent = PROCEDURE (Sender : TObject; TagName : STRING; Attributes : TAttrList) OF OBJECT;
  438. TEndTagEvent = PROCEDURE (Sender : TObject; TagName : STRING) OF OBJECT;
  439. TContentEvent = PROCEDURE (Sender : TObject; Content : STRING) OF OBJECT;
  440. TElementEvent = PROCEDURE (Sender : TObject; ElemDef : TElemDef) OF OBJECT;
  441. TEntityEvent = PROCEDURE (Sender : TObject; EntityDef : TEntityDef) OF OBJECT;
  442. TNotationEvent = PROCEDURE (Sender : TObject; NotationDef : TNotationDef) OF OBJECT;
  443. TErrorEvent = PROCEDURE (Sender : TObject; ErrorPos : PChar) OF OBJECT;
  444. TExternalEvent = PROCEDURE (Sender : TObject; SystemId, PublicId, NotationId : STRING;
  445. VAR Result : TXmlParser) OF OBJECT;
  446. TEncodingEvent = FUNCTION (Sender : TObject; CurrentEncoding, Source : STRING) : STRING OF OBJECT;
  447. TCustomXmlScanner = CLASS (TComponent)
  448. PROTECTED
  449. FXmlParser : TXmlParser;
  450. FOnXmlProlog : TXmlPrologEvent;
  451. FOnComment : TCommentEvent;
  452. FOnPI : TPIEvent;
  453. FOnDtdRead : TDtdEvent;
  454. FOnStartTag : TStartTagEvent;
  455. FOnEmptyTag : TStartTagEvent;
  456. FOnEndTag : TEndTagEvent;
  457. FOnContent : TContentEvent;
  458. FOnCData : TContentEvent;
  459. FOnElement : TElementEvent;
  460. FOnAttList : TElementEvent;
  461. FOnEntity : TEntityEvent;
  462. FOnNotation : TNotationEvent;
  463. FOnDtdError : TErrorEvent;
  464. FOnLoadExternal : TExternalEvent;
  465. FOnTranslateEncoding : TEncodingEvent;
  466. FStopParser : BOOLEAN;
  467. FUNCTION GetNormalize : BOOLEAN;
  468. PROCEDURE SetNormalize (Value : BOOLEAN);
  469. PROCEDURE WhenXmlProlog(XmlVersion, Encoding: STRING; Standalone : BOOLEAN); VIRTUAL;
  470. PROCEDURE WhenComment (Comment : STRING); VIRTUAL;
  471. PROCEDURE WhenPI (Target, Content: STRING; Attributes : TAttrList); VIRTUAL;
  472. PROCEDURE WhenDtdRead (RootElementName : STRING); VIRTUAL;
  473. PROCEDURE WhenStartTag (TagName : STRING; Attributes : TAttrList); VIRTUAL;
  474. PROCEDURE WhenEmptyTag (TagName : STRING; Attributes : TAttrList); VIRTUAL;
  475. PROCEDURE WhenEndTag (TagName : STRING); VIRTUAL;
  476. PROCEDURE WhenContent (Content : STRING); VIRTUAL;
  477. PROCEDURE WhenCData (Content : STRING); VIRTUAL;
  478. PROCEDURE WhenElement (ElemDef : TElemDef); VIRTUAL;
  479. PROCEDURE WhenAttList (ElemDef : TElemDef); VIRTUAL;
  480. PROCEDURE WhenEntity (EntityDef : TEntityDef); VIRTUAL;
  481. PROCEDURE WhenNotation (NotationDef : TNotationDef); VIRTUAL;
  482. PROCEDURE WhenDtdError (ErrorPos : PChar); VIRTUAL;
  483. PUBLIC
  484. CONSTRUCTOR Create (AOwner: TComponent); OVERRIDE;
  485. DESTRUCTOR Destroy; OVERRIDE;
  486. PROCEDURE LoadFromFile (Filename : TFilename); // Load XML Document from file
  487. PROCEDURE LoadFromBuffer (Buffer : PChar); // Load XML Document from buffer
  488. PROCEDURE SetBuffer (Buffer : PChar); // Refer to Buffer
  489. FUNCTION GetFilename : TFilename;
  490. PROCEDURE Execute; // Perform scanning
  491. PROTECTED
  492. PROPERTY XmlParser : TXmlParser READ FXmlParser;
  493. PROPERTY StopParser : BOOLEAN READ FStopParser WRITE FStopParser;
  494. PROPERTY Filename : TFilename READ GetFilename WRITE LoadFromFile;
  495. PROPERTY Normalize : BOOLEAN READ GetNormalize WRITE SetNormalize;
  496. PROPERTY OnXmlProlog : TXmlPrologEvent READ FOnXmlProlog WRITE FOnXmlProlog;
  497. PROPERTY OnComment : TCommentEvent READ FOnComment WRITE FOnComment;
  498. PROPERTY OnPI : TPIEvent READ FOnPI WRITE FOnPI;
  499. PROPERTY OnDtdRead : TDtdEvent READ FOnDtdRead WRITE FOnDtdRead;
  500. PROPERTY OnStartTag : TStartTagEvent READ FOnStartTag WRITE FOnStartTag;
  501. PROPERTY OnEmptyTag : TStartTagEvent READ FOnEmptyTag WRITE FOnEmptyTag;
  502. PROPERTY OnEndTag : TEndTagEvent READ FOnEndTag WRITE FOnEndTag;
  503. PROPERTY OnContent : TContentEvent READ FOnContent WRITE FOnContent;
  504. PROPERTY OnCData : TContentEvent READ FOnCData WRITE FOnCData;
  505. PROPERTY OnElement : TElementEvent READ FOnElement WRITE FOnElement;
  506. PROPERTY OnAttList : TElementEvent READ FOnAttList WRITE FOnAttList;
  507. PROPERTY OnEntity : TEntityEvent READ FOnEntity WRITE FOnEntity;
  508. PROPERTY OnNotation : TNotationEvent READ FOnNotation WRITE FOnNotation;
  509. PROPERTY OnDtdError : TErrorEvent READ FOnDtdError WRITE FOnDtdError;
  510. PROPERTY OnLoadExternal : TExternalEvent READ FOnLoadExternal WRITE FOnLoadExternal;
  511. PROPERTY OnTranslateEncoding : TEncodingEvent READ FOnTranslateEncoding WRITE FOnTranslateEncoding;
  512. END;
  513. (*
  514. ===============================================================================================
  515. IMPLEMENTATION
  516. ===============================================================================================
  517. *)
  518. IMPLEMENTATION
  519. (*
  520. ===============================================================================================
  521. Unicode and UTF-8 stuff
  522. ===============================================================================================
  523. *)
  524. CONST
  525. // --- Character Translation Table for Unicode <-> Win-1252
  526. WIN1252_UNICODE : ARRAY [$00..$FF] OF WORD = (
  527. $0000, $0001, $0002, $0003, $0004, $0005, $0006, $0007, $0008, $0009,
  528. $000A, $000B, $000C, $000D, $000E, $000F, $0010, $0011, $0012, $0013,
  529. $0014, $0015, $0016, $0017, $0018, $0019, $001A, $001B, $001C, $001D,
  530. $001E, $001F, $0020, $0021, $0022, $0023, $0024, $0025, $0026, $0027,
  531. $0028, $0029, $002A, $002B, $002C, $002D, $002E, $002F, $0030, $0031,
  532. $0032, $0033, $0034, $0035, $0036, $0037, $0038, $0039, $003A, $003B,
  533. $003C, $003D, $003E, $003F, $0040, $0041, $0042, $0043, $0044, $0045,
  534. $0046, $0047, $0048, $0049, $004A, $004B, $004C, $004D, $004E, $004F,
  535. $0050, $0051, $0052, $0053, $0054, $0055, $0056, $0057, $0058, $0059,
  536. $005A, $005B, $005C, $005D, $005E, $005F, $0060, $0061, $0062, $0063,
  537. $0064, $0065, $0066, $0067, $0068, $0069, $006A, $006B, $006C, $006D,
  538. $006E, $006F, $0070, $0071, $0072, $0073, $0074, $0075, $0076, $0077,
  539. $0078, $0079, $007A, $007B, $007C, $007D, $007E, $007F,
  540. $20AC, $0081, $201A, $0192, $201E, $2026, $2020, $2021, $02C6, $2030,
  541. $0160, $2039, $0152, $008D, $017D, $008F, $0090, $2018, $2019, $201C,
  542. $201D, $2022, $2013, $2014, $02DC, $2122, $0161, $203A, $0153, $009D,
  543. $017E, $0178, $00A0, $00A1, $00A2, $00A3, $00A4, $00A5, $00A6, $00A7,
  544. $00A8, $00A9, $00AA, $00AB, $00AC, $00AD, $00AE, $00AF, $00B0, $00B1,
  545. $00B2, $00B3, $00B4, $00B5, $00B6, $00B7, $00B8, $00B9, $00BA, $00BB,
  546. $00BC, $00BD, $00BE, $00BF, $00C0, $00C1, $00C2, $00C3, $00C4, $00C5,
  547. $00C6, $00C7, $00C8, $00C9, $00CA, $00CB, $00CC, $00CD, $00CE, $00CF,
  548. $00D0, $00D1, $00D2, $00D3, $00D4, $00D5, $00D6, $00D7, $00D8, $00D9,
  549. $00DA, $00DB, $00DC, $00DD, $00DE, $00DF, $00E0, $00E1, $00E2, $00E3,
  550. $00E4, $00E5, $00E6, $00E7, $00E8, $00E9, $00EA, $00EB, $00EC, $00ED,
  551. $00EE, $00EF, $00F0, $00F1, $00F2, $00F3, $00F4, $00F5, $00F6, $00F7,
  552. $00F8, $00F9, $00FA, $00FB, $00FC, $00FD, $00FE, $00FF);
  553. (* UTF-8 (somewhat simplified)
  554. -----
  555. Character Range Byte sequence
  556. --------------- -------------------------- (x=Bits from original character)
  557. $0000..$007F 0xxxxxxx
  558. $0080..$07FF 110xxxxx 10xxxxxx
  559. $8000..$FFFF 1110xxxx 10xxxxxx 10xxxxxx
  560. Example
  561. --------
  562. Transforming the Unicode character U+00E4 LATIN SMALL LETTER A WITH DIAERESIS ("ä"):
  563. ISO-8859-1, Decimal 228
  564. Win1252, Hex $E4
  565. ANSI Bin 1110 0100
  566. abcd efgh
  567. UTF-8 Binary 1100xxab 10cdefgh
  568. Binary 11000011 10100100
  569. Hex $C3 $A4
  570. Decimal 195 164
  571. ANSI Ã ¤ *)
  572. FUNCTION AnsiToUtf8 (Source : ANSISTRING) : STRING;
  573. (* Converts the given Windows ANSI (Win1252) String to UTF-8. *)
  574. VAR
  575. I : INTEGER; // Loop counter
  576. U : WORD; // Current Unicode value
  577. Len : INTEGER; // Current real length of "Result" string
  578. BEGIN
  579. SetLength (Result, Length (Source) * 3); // Worst case
  580. Len := 0;
  581. FOR I := 1 TO Length (Source) DO BEGIN
  582. U := WIN1252_UNICODE [ORD (Source [I])];
  583. CASE U OF
  584. $0000..$007F : BEGIN
  585. INC (Len);
  586. Result [Len] := CHR (U);
  587. END;
  588. $0080..$07FF : BEGIN
  589. INC (Len);
  590. Result [Len] := CHR ($C0 OR (U SHR 6));
  591. INC (Len);
  592. Result [Len] := CHR ($80 OR (U AND $3F));
  593. END;
  594. $0800..$FFFF : BEGIN
  595. INC (Len);
  596. Result [Len] := CHR ($E0 OR (U SHR 12));
  597. INC (Len);
  598. Result [Len] := CHR ($80 OR ((U SHR 6) AND $3F));
  599. INC (Len);
  600. Result [Len] := CHR ($80 OR (U AND $3F));
  601. END;
  602. END;
  603. END;
  604. SetLength (Result, Len);
  605. END;
  606. FUNCTION Utf8ToAnsi (Source : STRING; UnknownChar : CHAR = '¿') : ANSISTRING;
  607. (* Converts the given UTF-8 String to Windows ANSI (Win-1252).
  608. If a character can not be converted, the "UnknownChar" is inserted. *)
  609. VAR
  610. SourceLen : INTEGER; // Length of Source string
  611. I, K : INTEGER;
  612. A : BYTE; // Current ANSI character value
  613. U : WORD;
  614. Ch : CHAR; // Dest char
  615. Len : INTEGER; // Current real length of "Result" string
  616. BEGIN
  617. SourceLen := Length (Source);
  618. SetLength (Result, SourceLen); // Enough room to live
  619. Len := 0;
  620. I := 1;
  621. WHILE I <= SourceLen DO BEGIN
  622. A := ORD (Source [I]);
  623. IF A < $80 THEN BEGIN // Range $0000..$007F
  624. INC (Len);
  625. Result [Len] := Source [I];
  626. INC (I);
  627. END
  628. ELSE BEGIN // Determine U, Inc I
  629. IF (A AND $E0 = $C0) AND (I < SourceLen) THEN BEGIN // Range $0080..$07FF
  630. U := (WORD (A AND $1F) SHL 6) OR (ORD (Source [I+1]) AND $3F);
  631. INC (I, 2);
  632. END
  633. ELSE IF (A AND $F0 = $E0) AND (I < SourceLen-1) THEN BEGIN // Range $0800..$FFFF
  634. U := (WORD (A AND $0F) SHL 12) OR
  635. (WORD (ORD (Source [I+1]) AND $3F) SHL 6) OR
  636. ( ORD (Source [I+2]) AND $3F);
  637. INC (I, 3);
  638. END
  639. ELSE BEGIN // Unknown/unsupported
  640. INC (I);
  641. FOR K := 7 DOWNTO 0 DO
  642. IF A AND (1 SHL K) = 0 THEN BEGIN
  643. INC (I, (A SHR (K+1))-1);
  644. BREAK;
  645. END;
  646. U := WIN1252_UNICODE [ORD (UnknownChar)];
  647. END;
  648. Ch := UnknownChar; // Retrieve ANSI char
  649. FOR A := $00 TO $FF DO
  650. IF WIN1252_UNICODE [A] = U THEN BEGIN
  651. Ch := CHR (A);
  652. BREAK;
  653. END;
  654. INC (Len);
  655. Result [Len] := Ch;
  656. END;
  657. END;
  658. SetLength (Result, Len);
  659. END;
  660. (*
  661. ===============================================================================================
  662. "Special" Helper Functions
  663. Don't ask me why. But including these functions makes the parser *DRAMATICALLY* faster
  664. on my K6-233 machine. You can test it yourself just by commenting them out.
  665. They do exactly the same as the Assembler routines defined in SysUtils.
  666. (This is where you can see how great the Delphi compiler really is. The compiled code is
  667. faster than hand-coded assembler!)
  668. ===============================================================================================
  669. --> Just move this line below the StrScan function --> *)
  670. FUNCTION StrPos (CONST Str, SearchStr : PChar) : PChar;
  671. // Same functionality as SysUtils.StrPos
  672. VAR
  673. First : CHAR;
  674. Len : INTEGER;
  675. BEGIN
  676. First := SearchStr^;
  677. Len := StrLen (SearchStr);
  678. Result := Str;
  679. REPEAT
  680. IF Result^ = First THEN
  681. IF StrLComp (Result, SearchStr, Len) = 0 THEN BREAK;
  682. IF Result^ = #0 THEN BEGIN
  683. Result := NIL;
  684. BREAK;
  685. END;
  686. INC (Result);
  687. UNTIL FALSE;
  688. END;
  689. FUNCTION StrScan (CONST Start : PChar; CONST Ch : CHAR) : PChar;
  690. // Same functionality as SysUtils.StrScan
  691. BEGIN
  692. Result := Start;
  693. WHILE Result^ <> Ch DO BEGIN
  694. IF Result^ = #0 THEN BEGIN
  695. Result := NIL;
  696. EXIT;
  697. END;
  698. INC (Result);
  699. END;
  700. END;
  701. (*
  702. ===============================================================================================
  703. Helper Functions
  704. ===============================================================================================
  705. *)
  706. FUNCTION DelChars (Source : STRING; CharsToDelete : TCharset) : STRING;
  707. // Delete all "CharsToDelete" from the string
  708. VAR
  709. I : INTEGER;
  710. BEGIN
  711. Result := Source;
  712. FOR I := Length (Result) DOWNTO 1 DO
  713. IF Result [I] IN CharsToDelete THEN
  714. Delete (Result, I, 1);
  715. END;
  716. FUNCTION TrimWs (Source : STRING) : STRING;
  717. // Trimms off Whitespace characters from both ends of the string
  718. VAR
  719. I : INTEGER;
  720. BEGIN
  721. // --- Trim Left
  722. I := 1;
  723. WHILE (I <= Length (Source)) AND (Source [I] IN CWhitespace) DO
  724. INC (I);
  725. Result := Copy (Source, I, MaxInt);
  726. // --- Trim Right
  727. I := Length (Result);
  728. WHILE (I > 1) AND (Result [I] IN CWhitespace) DO
  729. DEC (I);
  730. Delete (Result, I+1, Length (Result)-I);
  731. END;
  732. FUNCTION ConvertWs (Source: STRING; PackWs: BOOLEAN) : STRING;
  733. // Converts all Whitespace characters to the Space #x20 character
  734. // If "PackWs" is true, contiguous Whitespace characters are packed to one
  735. VAR
  736. I : INTEGER;
  737. BEGIN
  738. Result := Source;
  739. FOR I := Length (Result) DOWNTO 1 DO
  740. IF (Result [I] IN CWhitespace) THEN
  741. IF PackWs AND (I > 1) AND (Result [I-1] IN CWhitespace)
  742. THEN Delete (Result, I, 1)
  743. ELSE Result [I] := #32;
  744. END;
  745. PROCEDURE SetStringSF (VAR S : STRING; BufferStart, BufferFinal : PChar);
  746. BEGIN
  747. SetString (S, BufferStart, BufferFinal-BufferStart+1);
  748. END;
  749. FUNCTION StrLPas (Start : PChar; Len : INTEGER) : STRING;
  750. BEGIN
  751. SetString (Result, Start, Len);
  752. END;
  753. FUNCTION StrSFPas (Start, Finish : PChar) : STRING;
  754. BEGIN
  755. SetString (Result, Start, Finish-Start+1);
  756. END;
  757. FUNCTION StrScanE (CONST Source : PChar; CONST CharToScanFor : CHAR) : PChar;
  758. // If "CharToScanFor" is not found, StrScanE returns the last char of the
  759. // buffer instead of NIL
  760. BEGIN
  761. Result := StrScan (Source, CharToScanFor);
  762. IF Result = NIL THEN
  763. Result := StrEnd (Source)-1;
  764. END;
  765. PROCEDURE ExtractName (Start : PChar; Terminators : TCharset; VAR Final : PChar);
  766. (* Extracts the complete Name beginning at "Start".
  767. It is assumed that the name is contained in Markup, so the '>' character is
  768. always a Termination.
  769. Start: IN Pointer to first char of name. Is always considered to be valid
  770. Terminators: IN Characters which terminate the name
  771. Final: OUT Pointer to last char of name *)
  772. BEGIN
  773. Final := Start+1;
  774. Include (Terminators, #0);
  775. Include (Terminators, '>');
  776. WHILE NOT (Final^ IN Terminators) DO
  777. INC (Final);
  778. DEC (Final);
  779. END;
  780. PROCEDURE ExtractQuote (Start : PChar; VAR Content : STRING; VAR Final : PChar);
  781. (* Extract a string which is contained in single or double Quotes.
  782. Start: IN Pointer to opening quote
  783. Content: OUT The quoted string
  784. Final: OUT Pointer to closing quote *)
  785. BEGIN
  786. Final := StrScan (Start+1, Start^);
  787. IF Final = NIL THEN BEGIN
  788. Final := StrEnd (Start+1)-1;
  789. SetString (Content, Start+1, Final-Start);
  790. END
  791. ELSE
  792. SetString (Content, Start+1, Final-1-Start);
  793. END;
  794. (*
  795. ===============================================================================================
  796. TEntityStackNode
  797. This Node is pushed to the "Entity Stack" whenever the parser parses entity replacement text.
  798. The "Instance" field holds the Instance pointer of an External Entity buffer. When it is
  799. popped, the Instance is freed.
  800. The "Encoding" field holds the name of the Encoding. External Parsed Entities may have
  801. another encoding as the document entity (XmlSpec 4.3.3). So when there is an "<?xml" PI
  802. found in the stream (= Text Declaration at the beginning of external parsed entities), the
  803. Encoding found there is used for the External Entity (is assigned to TXmlParser.CurEncoding)
  804. Default Encoding is for the Document Entity is UTF-8. It is assumed that External Entities
  805. have the same Encoding as the Document Entity, unless they carry a Text Declaration.
  806. ===============================================================================================
  807. *)
  808. TYPE
  809. TEntityStackNode = CLASS
  810. Instance : TObject;
  811. Encoding : STRING;
  812. LastPos : PChar;
  813. END;
  814. (*
  815. ===============================================================================================
  816. TEntityStack
  817. For nesting of Entities.
  818. When there is an entity reference found in the data stream, the corresponding entity
  819. definition is searched and the current position is pushed to this stack.
  820. From then on, the program scans the entitiy replacement text as if it were normal content.
  821. When the parser reaches the end of an entity, the current position is popped off the
  822. stack again.
  823. ===============================================================================================
  824. *)
  825. CONSTRUCTOR TEntityStack.Create (TheOwner : TXmlParser);
  826. BEGIN
  827. INHERITED Create;
  828. Owner := TheOwner;
  829. END;
  830. PROCEDURE TEntityStack.Push (LastPos : PChar);
  831. BEGIN
  832. Push (NIL, LastPos);
  833. END;
  834. PROCEDURE TEntityStack.Push (Instance : TObject; LastPos : PChar);
  835. VAR
  836. ESN : TEntityStackNode;
  837. BEGIN
  838. ESN := TEntityStackNode.Create;
  839. ESN.Instance := Instance;
  840. ESN.Encoding := Owner.FCurEncoding; // Save current Encoding
  841. ESN.LastPos := LastPos;
  842. Add (ESN);
  843. END;
  844. FUNCTION TEntityStack.Pop : PChar;
  845. VAR
  846. ESN : TEntityStackNode;
  847. BEGIN
  848. IF Count > 0 THEN BEGIN
  849. ESN := TEntityStackNode (Items [Count-1]);
  850. Result := ESN.LastPos;
  851. IF ESN.Instance <> NIL THEN
  852. ESN.Instance.Free;
  853. IF ESN.Encoding <> '' THEN
  854. Owner.FCurEncoding := ESN.Encoding; // Restore current Encoding
  855. Delete (Count-1);
  856. END
  857. ELSE
  858. Result := NIL;
  859. END;
  860. (*
  861. ===============================================================================================
  862. TExternalID
  863. -----------
  864. XmlSpec 4.2.2: ExternalID ::= 'SYSTEM' S SystemLiteral |
  865. 'PUBLIC' S PubidLiteral S SystemLiteral
  866. XmlSpec 4.7: PublicID ::= 'PUBLIC' S PubidLiteral
  867. SystemLiteral and PubidLiteral are quoted
  868. ===============================================================================================
  869. *)
  870. TYPE
  871. TExternalID = CLASS
  872. PublicId : STRING;
  873. SystemId : STRING;
  874. Final : PChar;
  875. CONSTRUCTOR Create (Start : PChar);
  876. END;
  877. CONSTRUCTOR TExternalID.Create (Start : PChar);
  878. BEGIN
  879. INHERITED Create;
  880. Final := Start;
  881. IF StrLComp (Start, 'SYSTEM', 6) = 0 THEN BEGIN
  882. WHILE NOT (Final^ IN (CQuoteChar + [#0, '>', '['])) DO INC (Final);
  883. IF NOT (Final^ IN CQuoteChar) THEN EXIT;
  884. ExtractQuote (Final, SystemID, Final);
  885. END
  886. ELSE IF StrLComp (Start, 'PUBLIC', 6) = 0 THEN BEGIN
  887. WHILE NOT (Final^ IN (CQuoteChar + [#0, '>', '['])) DO INC (Final);
  888. IF NOT (Final^ IN CQuoteChar) THEN EXIT;
  889. ExtractQuote (Final, PublicID, Final);
  890. INC (Final);
  891. WHILE NOT (Final^ IN (CQuoteChar + [#0, '>', '['])) DO INC (Final);
  892. IF NOT (Final^ IN CQuoteChar) THEN EXIT;
  893. ExtractQuote (Final, SystemID, Final);
  894. END;
  895. END;
  896. (*
  897. ===============================================================================================
  898. TXmlParser
  899. ===============================================================================================
  900. *)
  901. CONSTRUCTOR TXmlParser.Create;
  902. BEGIN
  903. INHERITED Create;
  904. FBuffer := NIL;
  905. FBufferSize := 0;
  906. Elements := TElemList.Create;
  907. Entities := TNvpList.Create;
  908. ParEntities := TNvpList.Create;
  909. Notations := TNvpList.Create;
  910. CurAttr := TAttrList.Create;
  911. EntityStack := TEntityStack.Create (Self);
  912. Clear;
  913. END;
  914. DESTRUCTOR TXmlParser.Destroy;
  915. BEGIN
  916. Clear;
  917. Elements.Free;
  918. Entities.Free;
  919. ParEntities.Free;
  920. Notations.Free;
  921. CurAttr.Free;
  922. EntityStack.Free;
  923. INHERITED Destroy;
  924. END;
  925. PROCEDURE TXmlParser.Clear;
  926. // Free Buffer and clear all object attributes
  927. BEGIN
  928. IF (FBufferSize > 0) AND (FBuffer <> NIL) THEN
  929. FreeMem (FBuffer);
  930. FBuffer := NIL;
  931. FBufferSize := 0;
  932. FSource := '';
  933. FXmlVersion := '';
  934. FEncoding := '';
  935. FStandalone := FALSE;
  936. FRootName := '';
  937. FDtdcFinal := NIL;
  938. FNormalize := TRUE;
  939. Elements.Clear;
  940. Entities.Clear;
  941. ParEntities.Clear;
  942. Notations.Clear;
  943. CurAttr.Clear;
  944. EntityStack.Clear;
  945. END;
  946. FUNCTION TXmlParser.LoadFromFile (Filename : STRING; FileMode : INTEGER = fmOpenRead OR fmShareDenyNone) : BOOLEAN;
  947. // Loads Document from given file
  948. // Returns TRUE if successful
  949. VAR
  950. f : FILE;
  951. ReadIn : INTEGER;
  952. OldFileMode : INTEGER;
  953. BEGIN
  954. Result := FALSE;
  955. Clear;
  956. // --- Open File
  957. OldFileMode := SYSTEM.FileMode;
  958. TRY
  959. SYSTEM.FileMode := FileMode;
  960. TRY
  961. AssignFile (f, Filename);
  962. Reset (f, 1);
  963. EXCEPT
  964. EXIT;
  965. END;
  966. TRY
  967. // --- Allocate Memory
  968. TRY
  969. FBufferSize := Filesize (f) + 1;
  970. GetMem (FBuffer, FBufferSize);
  971. EXCEPT
  972. Clear;
  973. EXIT;
  974. END;
  975. // --- Read File
  976. TRY
  977. BlockRead (f, FBuffer^, FBufferSize, ReadIn);
  978. (FBuffer+ReadIn)^ := #0; // NULL termination
  979. EXCEPT
  980. Clear;
  981. EXIT;
  982. END;
  983. FINALLY
  984. CloseFile (f);
  985. END;
  986. FSource := Filename;
  987. Result := TRUE;
  988. FINALLY
  989. SYSTEM.FileMode := OldFileMode;
  990. END;
  991. END;
  992. FUNCTION TXmlParser.LoadFromBuffer (Buffer : PChar) : BOOLEAN;
  993. // Loads Document from another buffer
  994. // Returns TRUE if successful
  995. // The "Source" property becomes '<MEM>' if successful
  996. BEGIN
  997. Result := FALSE;
  998. Clear;
  999. FBufferSize := StrLen (Buffer) + 1;
  1000. TRY
  1001. GetMem (FBuffer, FBufferSize);
  1002. EXCEPT
  1003. Clear;
  1004. EXIT;
  1005. END;
  1006. StrCopy (FBuffer, Buffer);
  1007. FSource := '<MEM>';
  1008. Result := TRUE;
  1009. END;
  1010. PROCEDURE TXmlParser.SetBuffer (Buffer : PChar); // References another buffer
  1011. BEGIN
  1012. Clear;
  1013. FBuffer := Buffer;
  1014. FBufferSize := 0;
  1015. FSource := '<REFERENCE>';
  1016. END;
  1017. //-----------------------------------------------------------------------------------------------
  1018. // Scanning through the document
  1019. //-----------------------------------------------------------------------------------------------
  1020. PROCEDURE TXmlParser.StartScan;
  1021. BEGIN
  1022. CurPartType := ptNone;
  1023. CurName := '';
  1024. CurContent := '';
  1025. CurStart := NIL;
  1026. CurFinal := NIL;
  1027. CurAttr.Clear;
  1028. EntityStack.Clear;
  1029. END;
  1030. FUNCTION TXmlParser.Scan : BOOLEAN;
  1031. // Scans the next Part
  1032. // Returns TRUE if a part could be found, FALSE if there is no part any more
  1033. //
  1034. // "IsDone" can be set to FALSE by AnalyzeText in order to go to the next part
  1035. // if there is no Content due to normalization
  1036. VAR
  1037. IsDone : BOOLEAN;
  1038. BEGIN
  1039. REPEAT
  1040. IsDone := TRUE;
  1041. // --- Start of next Part
  1042. IF CurStart = NIL
  1043. THEN CurStart := DocBuffer
  1044. ELSE CurStart := CurFinal+1;
  1045. CurFinal := CurStart;
  1046. // --- End of Document of Pop off a new part from the Entity stack?
  1047. IF CurStart^ = #0 THEN
  1048. CurStart := EntityStack.Pop;
  1049. // --- No Document or End Of Document: Terminate Scan
  1050. IF (CurStart = NIL) OR (CurStart^ = #0) THEN BEGIN
  1051. CurStart := StrEnd (DocBuffer);
  1052. CurFinal := CurStart-1;
  1053. EntityStack.Clear;
  1054. Result := FALSE;
  1055. EXIT;
  1056. END;
  1057. IF (StrLComp (CurStart, '<?xml', 5) = 0) AND
  1058. ((CurStart+5)^ IN CWhitespace) THEN AnalyzeProlog // XML Declaration, Text Declaration
  1059. ELSE IF StrLComp (CurStart, '<?', 2) = 0 THEN AnalyzePI (CurStart, CurFinal) // PI
  1060. ELSE IF StrLComp (CurStart, '<!--', 4) = 0 THEN AnalyzeComment (CurStart, CurFinal) // Comment
  1061. ELSE IF StrLComp (CurStart, '<!DOCTYPE', 9) = 0 THEN AnalyzeDtdc // DTDc
  1062. ELSE IF StrLComp (CurStart, CDStart, Length (CDStart)) = 0 THEN AnalyzeCdata // CDATA Section
  1063. ELSE IF StrLComp (CurStart, '<', 1) = 0 THEN AnalyzeTag // Start-Tag, End-Tag, Empty-Element-Tag
  1064. ELSE AnalyzeText (IsDone); // Text Content
  1065. UNTIL IsDone;
  1066. Result := TRUE;
  1067. END;
  1068. PROCEDURE TXmlParser.AnalyzeProlog;
  1069. // Analyze XML Prolog or Text Declaration
  1070. VAR
  1071. F : PChar;
  1072. BEGIN
  1073. CurAttr.Analyze (CurStart+5, F);
  1074. IF EntityStack.Count = 0 THEN BEGIN
  1075. FXmlVersion := CurAttr.Value ('version');
  1076. FEncoding := CurAttr.Value ('encoding');
  1077. FStandalone := CurAttr.Value ('standalone') = 'yes';
  1078. END;
  1079. CurFinal := StrPos (F, '?>');
  1080. IF CurFinal <> NIL
  1081. THEN INC (CurFinal)
  1082. ELSE CurFinal := StrEnd (CurStart)-1;
  1083. FCurEncoding := AnsiUpperCase (CurAttr.Value ('encoding'));
  1084. IF FCurEncoding = '' THEN
  1085. FCurEncoding := 'UTF-8'; // Default XML Encoding is UTF-8
  1086. CurPartType := ptXmlProlog;
  1087. CurName := '';
  1088. CurContent := '';
  1089. END;
  1090. PROCEDURE TXmlParser.AnalyzeComment (Start : PChar; VAR Final : PChar);
  1091. // Analyze Comments
  1092. BEGIN
  1093. Final := StrPos (Start+4, '-->');
  1094. IF Final = NIL
  1095. THEN Final := StrEnd (Start)-1
  1096. ELSE INC (Final, 2);
  1097. CurPartType := ptComment;
  1098. END;
  1099. PROCEDURE TXmlParser.AnalyzePI (Start : PChar; VAR Final : PChar);
  1100. // Analyze Processing Instructions (PI)
  1101. // This is also called for Character
  1102. VAR
  1103. F : PChar;
  1104. BEGIN
  1105. CurPartType := ptPI;
  1106. Final := StrPos (Start+2, '?>');
  1107. IF Final = NIL
  1108. THEN Final := StrEnd (Start)-1
  1109. ELSE INC (Final);
  1110. ExtractName (Start+2, CWhitespace + ['?', '>'], F);
  1111. SetStringSF (CurName, Start+2, F);
  1112. SetStringSF (CurContent, F+1, Final-2);
  1113. CurAttr.Analyze (F+1, F);
  1114. END;
  1115. PROCEDURE TXmlParser.AnalyzeDtdc;
  1116. (* Analyze Document Type Declaration
  1117. doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
  1118. markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
  1119. PEReference ::= '%' Name ';'
  1120. elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
  1121. AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
  1122. EntityDecl ::= '<!ENTITY' S Name S EntityDef S? '>' |
  1123. '<!ENTITY' S '%' S Name S PEDef S? '>'
  1124. NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
  1125. PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char* )))? '?>'
  1126. Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' *)
  1127. TYPE
  1128. TPhase = (phName, phDtd, phInternal, phFinishing);
  1129. VAR
  1130. Phase : TPhase;
  1131. F : PChar;
  1132. ExternalID : TExternalID;
  1133. ExternalDTD : TXmlParser;
  1134. DER : TDtdElementRec;
  1135. BEGIN
  1136. DER.Start := CurStart;
  1137. EntityStack.Clear; // Clear stack for Parameter Entities
  1138. CurPartType := ptDtdc;
  1139. // --- Don't read DTDc twice
  1140. IF FDtdcFinal <> NIL THEN BEGIN
  1141. CurFinal := FDtdcFinal;
  1142. EXIT;
  1143. END;
  1144. // --- Scan DTDc
  1145. CurFinal := CurStart + 9; // First char after '<!DOCTYPE'
  1146. Phase := phName;
  1147. REPEAT
  1148. CASE CurFinal^ OF
  1149. '%' : BEGIN
  1150. PushPE (CurFinal);
  1151. CONTINUE;
  1152. END;
  1153. #0 : IF EntityStack.Count = 0 THEN
  1154. BREAK
  1155. ELSE BEGIN
  1156. CurFinal := EntityStack.Pop;
  1157. CONTINUE;
  1158. END;
  1159. '[' : BEGIN
  1160. Phase := phInternal;
  1161. AnalyzeDtdElements (CurFinal+1, CurFinal);
  1162. CONTINUE;
  1163. END;
  1164. ']' : Phase := phFinishing;
  1165. '>' : BREAK;
  1166. ELSE IF NOT (CurFinal^ IN CWhitespace) THEN BEGIN
  1167. CASE Phase OF
  1168. phName : IF (CurFinal^ IN CNameStart) THEN BEGIN
  1169. ExtractName (CurFinal, CWhitespace + ['[', '>'], F);
  1170. SetStringSF (FRootName, CurFinal, F);
  1171. CurFinal := F;
  1172. Phase := phDtd;
  1173. END;
  1174. phDtd : IF (StrLComp (CurFinal, 'SYSTEM', 6) = 0) OR
  1175. (StrLComp (CurFinal, 'PUBLIC', 6) = 0) THEN BEGIN
  1176. ExternalID := TExternalID.Create (CurFinal);
  1177. ExternalDTD := LoadExternalEntity (ExternalId.SystemId, ExternalID.PublicId, '');
  1178. F := StrPos (ExternalDtd.DocBuffer, '<!');
  1179. IF F <> NIL THEN
  1180. AnalyzeDtdElements (F, F);
  1181. ExternalDTD.Free;
  1182. CurFinal := ExternalID.Final;
  1183. ExternalID.Free;
  1184. END;
  1185. ELSE BEGIN
  1186. DER.ElementType := deError;
  1187. DER.Pos := CurFinal;
  1188. DER.Final := CurFinal;
  1189. DtdElementFound (DER);
  1190. END;
  1191. END;
  1192. END;
  1193. END;
  1194. INC (CurFinal);
  1195. UNTIL FALSE;
  1196. CurPartType := ptDtdc;
  1197. CurName := '';
  1198. CurContent := '';
  1199. // It is an error in the document if "EntityStack" is not empty now
  1200. IF EntityStack.Count > 0 THEN BEGIN
  1201. DER.ElementType := deError;
  1202. DER.Final := CurFinal;
  1203. DER.Pos := CurFinal;
  1204. DtdElementFound (DER);
  1205. END;
  1206. EntityStack.Clear; // Clear stack for General Entities
  1207. FDtdcFinal := CurFinal;
  1208. END;
  1209. PROCEDURE TXmlParser.AnalyzeDtdElements (Start : PChar; VAR Final : PChar);
  1210. // Analyze the "Elements" of a DTD contained in the external or
  1211. // internal DTD subset.
  1212. VAR
  1213. DER : TDtdElementRec;
  1214. BEGIN
  1215. Final := Start;
  1216. REPEAT
  1217. CASE Final^ OF
  1218. '%' : BEGIN
  1219. PushPE (Final);
  1220. CONTINUE;
  1221. END;
  1222. #0 : IF EntityStack.Count = 0 THEN
  1223. BREAK
  1224. ELSE BEGIN
  1225. CurFinal := EntityStack.Pop;
  1226. CONTINUE;
  1227. END;
  1228. ']',
  1229. '>' : BREAK;
  1230. '<' : IF StrLComp (Final, '<!ELEMENT', 9) = 0 THEN AnalyzeElementDecl (Final, Final)
  1231. ELSE IF StrLComp (Final, '<!ATTLIST', 9) = 0 THEN AnalyzeAttListDecl (Final, Final)
  1232. ELSE IF StrLComp (Final, '<!ENTITY', 8) = 0 THEN AnalyzeEntityDecl (Final, Final)
  1233. ELSE IF StrLComp (Final, '<!NOTATION', 10) = 0 THEN AnalyzeNotationDecl (Final, Final)
  1234. ELSE IF StrLComp (Final, '<?', 2) = 0 THEN BEGIN // PI in DTD
  1235. DER.ElementType := dePI;
  1236. DER.Start := Final;
  1237. AnalyzePI (Final, Final);
  1238. DER.Target := PChar (CurName);
  1239. DER.Content := PChar (CurContent);
  1240. DER.AttrList := CurAttr;
  1241. DER.Final := Final;
  1242. DtdElementFound (DER);
  1243. END
  1244. ELSE IF StrLComp (Final, '<!--', 4) = 0 THEN BEGIN // Comment in DTD
  1245. DER.ElementType := deComment;
  1246. DER.Start := Final;
  1247. AnalyzeComment (Final, Final);
  1248. DER.Final := Final;
  1249. DtdElementFound (DER);
  1250. END
  1251. ELSE BEGIN
  1252. DER.ElementType := deError;
  1253. DER.Start := Final;
  1254. DER.Pos := Final;
  1255. DER.Final := Final;
  1256. DtdElementFound (DER);
  1257. END;
  1258. END;
  1259. INC (Final);
  1260. UNTIL FALSE;
  1261. END;
  1262. PROCEDURE TXmlParser.AnalyzeTag;
  1263. // Analyze Tags
  1264. VAR
  1265. S, F : PChar;
  1266. Attr : TAttr;
  1267. ElemDef : TElemDef;
  1268. AttrDef : TAttrDef;
  1269. I : INTEGER;
  1270. BEGIN
  1271. CurPartType := ptStartTag;
  1272. S := CurStart+1;
  1273. IF S^ = '/' THEN BEGIN
  1274. CurPartType := ptEndTag;
  1275. INC (S);
  1276. END;
  1277. ExtractName (S, CWhitespace + ['/'], F);
  1278. SetStringSF (CurName, S, F);
  1279. CurAttr.Analyze (F+1, CurFinal);
  1280. IF CurFinal^ = '/' THEN BEGIN
  1281. CurPartType := ptEmptyTag;
  1282. END;
  1283. CurFinal := StrScanE (CurFinal, '>');
  1284. // --- Set Default Attribute values for nonexistent attributes
  1285. IF (CurPartType = ptStartTag) OR (CurPartType = ptEmptyTag) THEN BEGIN
  1286. ElemDef := Elements.Node (CurName);
  1287. IF ElemDef <> NIL THEN BEGIN
  1288. FOR I := 0 TO ElemDef.Count-1 DO BEGIN
  1289. AttrDef := TAttrDef (ElemDef [I]);
  1290. Attr := TAttr (CurAttr.Node (AttrDef.Name));
  1291. IF (Attr = NIL) AND (AttrDef.Value <> '') THEN BEGIN
  1292. Attr := TAttr.Create (AttrDef.Name, AttrDef.Value);
  1293. Attr.ValueType := vtDefault;
  1294. CurAttr.Add (Attr);
  1295. END;
  1296. IF Attr <> NIL THEN BEGIN
  1297. CASE AttrDef.DefaultType OF
  1298. adDefault : ;
  1299. adRequired : ; // -!- It is an error in the document if "Attr.Value" is an empty string
  1300. adImplied : Attr.ValueType := vtImplied;
  1301. adFixed : BEGIN
  1302. Attr.ValueType := vtFixed;
  1303. Attr.Value := AttrDef.Value;
  1304. END;
  1305. END;
  1306. Attr.AttrType := AttrDef.AttrType;
  1307. END;
  1308. END;
  1309. END;
  1310. // --- Normalize Attribute Values. XmlSpec:
  1311. // - a character reference is processed by appending the referenced character to the attribute value
  1312. // - an entity reference is processed by recursively processing the replacement text of the entity
  1313. // - a whitespace character (#x20, #xD, #xA, #x9) is processed by appending #x20 to the normalized value,
  1314. // except that only a single #x20 is appended for a "#xD#xA" sequence that is part of an external
  1315. // parsed entity or the literal entity value of an internal parsed entity
  1316. // - other characters are processed by appending them to the normalized value
  1317. // If the declared value is not CDATA, then the XML processor must further process the
  1318. // normalized attribute value by discarding any leading and trailing space (#x20) characters,
  1319. // and by replacing sequences of space (#x20) characters by a single space (#x20) character.
  1320. // All attributes for which no declaration has been read should be treated by a
  1321. // non-validating parser as if declared CDATA.
  1322. // !!! The XML 1.0 SE specification is somewhat different here
  1323. // This code does not conform exactly to this specification
  1324. FOR I := 0 TO CurAttr.Count-1 DO
  1325. WITH TAttr (CurAttr [I]) DO BEGIN
  1326. ReplaceGeneralEntities (Value);
  1327. ReplaceCharacterEntities (Value);
  1328. IF (AttrType <> atCData) AND (AttrType <> atUnknown)
  1329. THEN Value := TranslateEncoding (TrimWs (ConvertWs (Value, TRUE)))
  1330. ELSE Value := TranslateEncoding (ConvertWs (Value, FALSE));
  1331. END;
  1332. END;
  1333. END;
  1334. PROCEDURE TXmlParser.AnalyzeCData;
  1335. // Analyze CDATA Sections
  1336. BEGIN
  1337. CurPartType := ptCData;
  1338. CurFinal := StrPos (CurStart, CDEnd);
  1339. IF CurFinal = NIL THEN BEGIN
  1340. CurFinal := StrEnd (CurStart)-1;
  1341. CurContent := TranslateEncoding (StrPas (CurStart+Length (CDStart)));
  1342. END
  1343. ELSE BEGIN
  1344. SetStringSF (CurContent, CurStart+Length (CDStart), CurFinal-1);
  1345. INC (CurFinal, Length (CDEnd)-1);
  1346. CurContent := TranslateEncoding (CurContent);
  1347. END;
  1348. END;
  1349. PROCEDURE TXmlParser.AnalyzeText (VAR IsDone : BOOLEAN);
  1350. (* Analyzes Text Content between Tags. CurFinal will point to the last content character.
  1351. Content ends at a '<' character or at the end of the document.
  1352. Entity References and Character Entity references are resolved.
  1353. If PackSpaces is TRUE, contiguous Whitespace Characters will be compressed to
  1354. one Space #x20 character, Whitespace at the beginning and end of content will
  1355. be trimmed off and content which is or becomes empty is not returned to
  1356. the application (in this case, "IsDone" is set to FALSE which causes the
  1357. Scan method to proceed directly to the next part. *)
  1358. PROCEDURE ProcessEntity;
  1359. (* Is called if there is an ampsersand '&' character found in the document.
  1360. IN "CurFinal" points to the ampersand
  1361. OUT "CurFinal" points to the first character after the semi-colon ';' *)
  1362. VAR
  1363. P : PChar;
  1364. Name : STRING;
  1365. EntityDef : TEntityDef;
  1366. ExternalEntity : TXmlParser;
  1367. BEGIN
  1368. P := StrScan (CurFinal , ';');
  1369. IF P <> NIL THEN BEGIN
  1370. SetStringSF (Name, CurFinal+1, P-1);
  1371. // Is it a Character Entity?
  1372. IF (CurFinal+1)^ = '#' THEN BEGIN
  1373. IF UpCase ((CurFinal+2)^) = 'X' // !!! Can't use "CHR" for Unicode characters > 255:
  1374. THEN CurContent := CurContent + CHR (StrToIntDef ('$'+Copy (Name, 3, MaxInt), 32))
  1375. ELSE CurContent := CurContent + CHR (StrToIntDef (Copy (Name, 2, MaxInt), 32));
  1376. CurFinal := P+1;
  1377. EXIT;
  1378. END
  1379. // Is it a Predefined Entity?
  1380. ELSE IF Name = 'lt' THEN BEGIN CurContent := CurContent + '<'; CurFinal := P+1; EXIT; END
  1381. ELSE IF Name = 'gt' THEN BEGIN CurContent := CurContent + '>'; CurFinal := P+1; EXIT; END
  1382. ELSE IF Name = 'amp' THEN BEGIN CurContent := CurContent + '&'; CurFinal := P+1; EXIT; END
  1383. ELSE IF Name = 'apos' THEN BEGIN CurContent := CurContent + ''''; CurFinal := P+1; EXIT; END
  1384. ELSE IF Name = 'quot' THEN BEGIN CurContent := CurContent + '"'; CurFinal := P+1; EXIT; END;
  1385. // Replace with Entity from DTD
  1386. EntityDef := TEntityDef (Entities.Node (Name));
  1387. IF EntityDef <> NIL THEN BEGIN
  1388. IF EntityDef.Value <> '' THEN BEGIN
  1389. EntityStack.Push (P+1);
  1390. CurFinal := PChar (EntityDef.Value);
  1391. END
  1392. ELSE BEGIN
  1393. ExternalEntity := LoadExternalEntity (EntityDef.SystemId, EntityDef.PublicId, EntityDef.NotationName);
  1394. EntityStack.Push (ExternalEntity, P+1);
  1395. CurFinal := ExternalEntity.DocBuffer;
  1396. END;
  1397. END
  1398. ELSE BEGIN
  1399. CurContent := CurContent + Name;
  1400. CurFinal := P+1;
  1401. END;
  1402. END
  1403. ELSE BEGIN
  1404. INC (CurFinal);
  1405. END;
  1406. END;
  1407. VAR
  1408. C : INTEGER;
  1409. BEGIN
  1410. CurFinal := CurStart;
  1411. CurPartType := ptContent;
  1412. CurContent := '';
  1413. C := 0;
  1414. REPEAT
  1415. CASE CurFinal^ OF
  1416. '&' : BEGIN
  1417. CurContent := CurContent + TranslateEncoding (StrLPas (CurFinal-C, C));
  1418. C := 0;
  1419. ProcessEntity;
  1420. CONTINUE;
  1421. END;
  1422. #0 : BEGIN
  1423. IF EntityStack.Count = 0 THEN
  1424. BREAK
  1425. ELSE BEGIN
  1426. CurContent := CurContent + TranslateEncoding (StrLPas (CurFinal-C, C));
  1427. C := 0;
  1428. CurFinal := EntityStack.Pop;
  1429. CONTINUE;
  1430. END;
  1431. END;
  1432. '<' : BREAK;
  1433. ELSE INC (C);
  1434. END;
  1435. INC (CurFinal);
  1436. UNTIL FALSE;
  1437. CurContent := CurContent + TranslateEncoding (StrLPas (CurFinal-C, C));
  1438. DEC (CurFinal);
  1439. IF FNormalize THEN BEGIN
  1440. CurContent := ConvertWs (TrimWs (CurContent), TRUE);
  1441. IsDone := CurContent <> ''; // IsDone will only get FALSE if PackSpaces is TRUE
  1442. END;
  1443. END;
  1444. PROCEDURE TXmlParser.AnalyzeElementDecl (Start : PChar; VAR Final : PChar);
  1445. (* Parse <!ELEMENT declaration starting at "Start"
  1446. Final must point to the terminating '>' character
  1447. XmlSpec 3.2:
  1448. elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
  1449. contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
  1450. Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
  1451. '(' S? '#PCDATA' S? ')'
  1452. children ::= (choice | seq) ('?' | '*' | '+')?
  1453. choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
  1454. cp ::= (Name | choice | seq) ('?' | '*' | '+')?
  1455. seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
  1456. More simply:
  1457. contentspec ::= EMPTY
  1458. ANY
  1459. '(#PCDATA)'
  1460. '(#PCDATA | A | B)*'
  1461. '(A, B, C)'
  1462. '(A | B | C)'
  1463. '(A?, B*, C+),
  1464. '(A, (B | C | D)* )' *)
  1465. VAR
  1466. Element : TElemDef;
  1467. Elem2 : TElemDef;
  1468. F : PChar;
  1469. DER : TDtdElementRec;
  1470. BEGIN
  1471. Element := TElemDef.Create;
  1472. Final := Start + 9;
  1473. DER.Start := Start;
  1474. REPEAT
  1475. IF Final^ = '>' THEN BREAK;
  1476. IF (Final^ IN CNameStart) AND (Element.Name = '') THEN BEGIN
  1477. ExtractName (Final, CWhitespace, F);
  1478. SetStringSF (Element.Name, Final, F);
  1479. Final := F;
  1480. F := StrScan (Final+1, '>');
  1481. IF F = NIL THEN BEGIN
  1482. Element.Definition := STRING (Final);
  1483. Final := StrEnd (Final);
  1484. BREAK;
  1485. END
  1486. ELSE BEGIN
  1487. SetStringSF (Element.Definition, Final+1, F-1);
  1488. Final := F;
  1489. BREAK;
  1490. END;
  1491. END;
  1492. INC (Final);
  1493. UNTIL FALSE;
  1494. Element.Definition := DelChars (Element.Definition, CWhitespace);
  1495. ReplaceParameterEntities (Element.Definition);
  1496. IF Element.Definition = 'EMPTY' THEN Element.ElemType := etEmpty
  1497. ELSE IF Element.Definition = 'ANY' THEN Element.ElemType := etAny
  1498. ELSE IF Copy (Element.Definition, 1, 8) = '(#PCDATA' THEN Element.ElemType := etMixed
  1499. ELSE IF Copy (Element.Definition, 1, 1) = '(' THEN Element.ElemType := etChildren
  1500. ELSE Element.ElemType := etAny;
  1501. Elem2 := Elements.Node (Element.Name);
  1502. IF Elem2 <> NIL THEN
  1503. Elements.Delete (Elements.IndexOf (Elem2));
  1504. Elements.Add (Element);
  1505. Final := StrScanE (Final, '>');
  1506. DER.ElementType := deElement;
  1507. DER.ElemDef := Element;
  1508. DER.Final := Final;
  1509. DtdElementFound (DER);
  1510. END;
  1511. PROCEDURE TXmlParser.AnalyzeAttListDecl (Start : PChar; VAR Final : PChar);
  1512. (* Parse <!ATTLIST declaration starting at "Start"
  1513. Final must point to the terminating '>' character
  1514. XmlSpec 3.3:
  1515. AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
  1516. AttDef ::= S Name S AttType S DefaultDecl
  1517. AttType ::= StringType | TokenizedType | EnumeratedType
  1518. StringType ::= 'CDATA'
  1519. TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
  1520. EnumeratedType ::= NotationType | Enumeration
  1521. NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
  1522. Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
  1523. DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
  1524. AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
  1525. Examples:
  1526. <!ATTLIST address
  1527. A1 CDATA "Default"
  1528. A2 ID #REQUIRED
  1529. A3 IDREF #IMPLIED
  1530. A4 IDREFS #IMPLIED
  1531. A5 ENTITY #FIXED "&at;&#252;"
  1532. A6 ENTITIES #REQUIRED
  1533. A7 NOTATION (WMF | DXF) "WMF"
  1534. A8 (A | B | C) #REQUIRED> *)
  1535. TYPE
  1536. TPhase = (phElementName, phName, phType, phNotationContent, phDefault);
  1537. VAR
  1538. Phase : TPhase;
  1539. F : PChar;
  1540. ElementName : STRING;
  1541. ElemDef : TElemDef;
  1542. AttrDef : TAttrDef;
  1543. AttrDef2 : TAttrDef;
  1544. Strg : STRING;
  1545. DER : TDtdElementRec;
  1546. BEGIN
  1547. Final := Start + 9; // The character after <!ATTLIST
  1548. Phase := phElementName;
  1549. DER.Start := Start;
  1550. AttrDef := NIL;
  1551. ElemDef := NIL;
  1552. REPEAT
  1553. IF NOT (Final^ IN CWhitespace) THEN
  1554. CASE Final^ OF
  1555. '%' : BEGIN
  1556. PushPE (Final);
  1557. CONTINUE;
  1558. END;
  1559. #0 : IF EntityStack.Count = 0 THEN
  1560. BREAK
  1561. ELSE BEGIN
  1562. Final := EntityStack.Pop;
  1563. CONTINUE;
  1564. END;
  1565. '>' : BREAK;
  1566. ELSE CASE Phase OF
  1567. phElementName : BEGIN
  1568. ExtractName (Final, CWhitespace + CQuoteChar + ['#'], F);
  1569. SetStringSF (ElementName, Final, F);
  1570. Final := F;
  1571. ElemDef := Elements.Node (ElementName);
  1572. IF ElemDef = NIL THEN BEGIN
  1573. ElemDef := TElemDef.Create;
  1574. ElemDef.Name := ElementName;
  1575. ElemDef.Definition := 'ANY';
  1576. ElemDef.ElemType := etAny;
  1577. Elements.Add (ElemDef);
  1578. END;
  1579. Phase := phName;
  1580. END;
  1581. phName : BEGIN
  1582. AttrDef := TAttrDef.Create;
  1583. ExtractName (Final, CWhitespace + CQuoteChar + ['#'], F);
  1584. SetStringSF (AttrDef.Name, Final, F);
  1585. Final := F;
  1586. AttrDef2 := TAttrDef (ElemDef.Node (AttrDef.Name));
  1587. IF AttrDef2 <> NIL THEN
  1588. ElemDef.Delete (ElemDef.IndexOf (AttrDef2));
  1589. ElemDef.Add (AttrDef);
  1590. Phase := phType;
  1591. END;
  1592. phType : BEGIN
  1593. IF Final^ = '(' THEN BEGIN
  1594. F := StrScan (Final+1, ')');
  1595. IF F <> NIL
  1596. THEN SetStringSF (AttrDef.TypeDef, Final+1, F-1)
  1597. ELSE AttrDef.TypeDef := STRING (Final+1);
  1598. AttrDef.TypeDef := DelChars (AttrDef.TypeDef, CWhitespace);
  1599. AttrDef.AttrType := atEnumeration;
  1600. ReplaceParameterEntities (AttrDef.TypeDef);
  1601. ReplaceCharacterEntities (AttrDef.TypeDef);
  1602. Phase := phDefault;
  1603. END
  1604. ELSE IF StrLComp (Final, 'NOTATION', 8) = 0 THEN BEGIN
  1605. INC (Final, 8);
  1606. AttrDef.AttrType := atNotation;
  1607. Phase := phNotationContent;
  1608. END
  1609. ELSE BEGIN
  1610. ExtractName (Final, CWhitespace+CQuoteChar+['#'], F);
  1611. SetStringSF (AttrDef.TypeDef, Final, F);
  1612. IF AttrDef.TypeDef = 'CDATA' THEN AttrDef.AttrType := atCData
  1613. ELSE IF AttrDef.TypeDef = 'ID' THEN AttrDef.AttrType := atId
  1614. ELSE IF AttrDef.TypeDef = 'IDREF' THEN AttrDef.AttrType := atIdRef
  1615. ELSE IF AttrDef.TypeDef = 'IDREFS' THEN AttrDef.AttrType := atIdRefs
  1616. ELSE IF AttrDef.TypeDef = 'ENTITY' THEN AttrDef.AttrType := atEntity
  1617. ELSE IF AttrDef.TypeDef = 'ENTITIES' THEN AttrDef.AttrType := atEntities
  1618. ELSE IF AttrDef.TypeDef = 'NMTOKEN' THEN AttrDef.AttrType := atNmToken
  1619. ELSE IF AttrDef.TypeDef = 'NMTOKENS' THEN AttrDef.AttrType := atNmTokens;
  1620. Phase := phDefault;
  1621. END
  1622. END;
  1623. phNotationContent : BEGIN
  1624. F := StrScan (Final, ')');
  1625. IF F <> NIL THEN
  1626. SetStringSF (AttrDef.Notations, Final+1, F-1)
  1627. ELSE BEGIN
  1628. AttrDef.Notations := STRING (Final+1);
  1629. Final := StrEnd (Final);
  1630. END;
  1631. ReplaceParameterEntities (AttrDef.Notations);
  1632. AttrDef.Notations := DelChars (AttrDef.Notations, CWhitespace);
  1633. Phase := phDefault;
  1634. END;
  1635. phDefault : BEGIN
  1636. IF Final^ = '#' THEN BEGIN
  1637. ExtractName (Final, CWhiteSpace + CQuoteChar, F);
  1638. SetStringSF (Strg, Final, F);
  1639. Final := F;
  1640. ReplaceParameterEntities (Strg);
  1641. IF Strg = '#REQUIRED' THEN BEGIN AttrDef.DefaultType := adRequired; Phase := phName; END
  1642. ELSE IF Strg = '#IMPLIED' THEN BEGIN AttrDef.DefaultType := adImplied; Phase := phName; END
  1643. ELSE IF Strg = '#FIXED' THEN AttrDef.DefaultType := adFixed;
  1644. END
  1645. ELSE IF (Final^ IN CQuoteChar) THEN BEGIN
  1646. ExtractQuote (Final, AttrDef.Value, Final);
  1647. ReplaceParameterEntities (AttrDef.Value);
  1648. ReplaceCharacterEntities (AttrDef.Value);
  1649. Phase := phName;
  1650. END;
  1651. IF Phase = phName THEN BEGIN
  1652. AttrDef := NIL;
  1653. END;
  1654. END;
  1655. END;
  1656. END;
  1657. INC (Final);
  1658. UNTIL FALSE;
  1659. Final := StrScan (Final, '>');
  1660. DER.ElementType := deAttList;
  1661. DER.ElemDef := ElemDef;
  1662. DER.Final := Final;
  1663. DtdElementFound (DER);
  1664. END;
  1665. PROCEDURE TXmlParser.AnalyzeEntityDecl (Start : PChar; VAR Final : PChar);
  1666. (* Parse <!ENTITY declaration starting at "Start"
  1667. Final must point to the terminating '>' character
  1668. XmlSpec 4.2:
  1669. EntityDecl ::= '<!ENTITY' S Name S EntityDef S? '>' |
  1670. '<!ENTITY' S '%' S Name S PEDef S? '>'
  1671. EntityDef ::= EntityValue | (ExternalID NDataDecl?)
  1672. PEDef ::= EntityValue | ExternalID
  1673. NDataDecl ::= S 'NDATA' S Name
  1674. EntityValue ::= '"' ([^%&"] | PEReference | EntityRef | CharRef)* '"' |
  1675. "'" ([^%&'] | PEReference | EntityRef | CharRef)* "'"
  1676. PEReference ::= '%' Name ';'
  1677. Examples
  1678. <!ENTITY test1 "Stefan Heymann"> <!-- Internal, general, parsed -->
  1679. <!ENTITY test2 SYSTEM "ent2.xml"> <!-- External, general, parsed -->
  1680. <!ENTITY test2 SYSTEM "ent3.gif" NDATA gif> <!-- External, general, unparsed -->
  1681. <!ENTITY % test3 "<!ELEMENT q ANY>"> <!-- Internal, parameter -->
  1682. <!ENTITY % test6 SYSTEM "ent6.xml"> <!-- External, parameter -->
  1683. <!ENTITY test4 "&test1; ist lieb"> <!-- IGP, Replacement text <> literal value -->
  1684. <!ENTITY test5 "<p>Dies ist ein Test-Absatz</p>"> <!-- IGP, See XmlSpec 2.4 -->
  1685. *)
  1686. TYPE
  1687. TPhase = (phName, phContent, phNData, phNotationName, phFinalGT);
  1688. VAR
  1689. Phase : TPhase;
  1690. IsParamEntity : BOOLEAN;
  1691. F : PChar;
  1692. ExternalID : TExternalID;
  1693. EntityDef : TEntityDef;
  1694. EntityDef2 : TEntityDef;
  1695. DER : TDtdElementRec;
  1696. BEGIN
  1697. Final := Start + 8; // First char after <!ENTITY
  1698. DER.Start := Start;
  1699. Phase := phName;
  1700. IsParamEntity := FALSE;
  1701. EntityDef := TEntityDef.Create;
  1702. REPEAT
  1703. IF NOT (Final^ IN CWhitespace) THEN
  1704. CASE Final^ OF
  1705. '%' : IsParamEntity := TRUE;
  1706. '>' : BREAK;
  1707. ELSE CASE Phase OF
  1708. phName : IF Final^ IN CNameStart THEN BEGIN
  1709. ExtractName (Final, CWhitespace + CQuoteChar, F);
  1710. SetStringSF (EntityDef.Name, Final, F);
  1711. Final := F;
  1712. Phase := phContent;
  1713. END;
  1714. phContent : IF Final^ IN CQuoteChar THEN BEGIN
  1715. ExtractQuote (Final, EntityDef.Value, Final);
  1716. Phase := phFinalGT;
  1717. END
  1718. ELSE IF (StrLComp (Final, 'SYSTEM', 6) = 0) OR
  1719. (StrLComp (Final, 'PUBLIC', 6) = 0) THEN BEGIN
  1720. ExternalID := TExternalID.Create (Final);
  1721. EntityDef.SystemId := ExternalID.SystemId;
  1722. EntityDef.PublicId := ExternalID.PublicId;
  1723. Final := ExternalID.Final;
  1724. Phase := phNData;
  1725. ExternalID.Free;
  1726. END;
  1727. phNData : IF StrLComp (Final, 'NDATA', 5) = 0 THEN BEGIN
  1728. INC (Final, 4);
  1729. Phase := phNotationName;
  1730. END;
  1731. phNotationName : IF Final^ IN CNameStart THEN BEGIN
  1732. ExtractName (Final, CWhitespace + ['>'], F);
  1733. SetStringSF (EntityDef.NotationName, Final, F);
  1734. Final := F;
  1735. Phase := phFinalGT;
  1736. END;
  1737. phFinalGT : ; // -!- There is an error in the document if this branch is called
  1738. END;
  1739. END;
  1740. INC (Final);
  1741. UNTIL FALSE;
  1742. IF IsParamEntity THEN BEGIN
  1743. EntityDef2 := TEntityDef (ParEntities.Node (EntityDef.Name));
  1744. IF EntityDef2 <> NIL THEN
  1745. ParEntities.Delete (ParEntities.IndexOf (EntityDef2));
  1746. ParEntities.Add (EntityDef);
  1747. ReplaceCharacterEntities (EntityDef.Value);
  1748. END
  1749. ELSE BEGIN
  1750. EntityDef2 := TEntityDef (Entities.Node (EntityDef.Name));
  1751. IF EntityDef2 <> NIL THEN
  1752. Entities.Delete (Entities.IndexOf (EntityDef2));
  1753. Entities.Add (EntityDef);
  1754. ReplaceParameterEntities (EntityDef.Value); // Create replacement texts (see XmlSpec 4.5)
  1755. ReplaceCharacterEntities (EntityDef.Value);
  1756. END;
  1757. Final := StrScanE (Final, '>');
  1758. DER.ElementType := deEntity;
  1759. DER.EntityDef := EntityDef;
  1760. DER.Final := Final;
  1761. DtdElementFound (DER);
  1762. END;
  1763. PROCEDURE TXmlParser.AnalyzeNotationDecl (Start : PChar; VAR Final : PChar);
  1764. // Parse <!NOTATION declaration starting at "Start"
  1765. // Final must point to the terminating '>' character
  1766. // XmlSpec 4.7: NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
  1767. TYPE
  1768. TPhase = (phName, phExtId, phEnd);
  1769. VAR
  1770. ExternalID : TExternalID;
  1771. Phase : TPhase;
  1772. F : PChar;
  1773. NotationDef : TNotationDef;
  1774. DER : TDtdElementRec;
  1775. BEGIN
  1776. Final := Start + 10; // Character after <!NOTATION
  1777. DER.Start := Start;
  1778. Phase := phName;
  1779. NotationDef := TNotationDef.Create;
  1780. REPEAT
  1781. IF NOT (Final^ IN CWhitespace) THEN
  1782. CASE Final^ OF
  1783. '>',
  1784. #0 : BREAK;
  1785. ELSE CASE Phase OF
  1786. phName : BEGIN
  1787. ExtractName (Final, CWhitespace + ['>'], F);
  1788. SetStringSF (NotationDef.Name, Final, F);
  1789. Final := F;
  1790. Phase := phExtId;
  1791. END;
  1792. phExtId : BEGIN
  1793. ExternalID := TExternalID.Create (Final);
  1794. NotationDef.Value := ExternalID.SystemId;
  1795. NotationDef.PublicId := ExternalID.PublicId;
  1796. Final := ExternalId.Final;
  1797. ExternalId.Free;
  1798. Phase := phEnd;
  1799. END;
  1800. phEnd : ; // -!- There is an error in the document if this branch is called
  1801. END;
  1802. END;
  1803. INC (Final);
  1804. UNTIL FALSE;
  1805. Notations.Add (NotationDef);
  1806. Final := StrScanE (Final, '>');
  1807. DER.ElementType := deNotation;
  1808. DER.NotationDef := NotationDef;
  1809. DER.Final := Final;
  1810. DtdElementFound (DER);
  1811. END;
  1812. PROCEDURE TXmlParser.PushPE (VAR Start : PChar);
  1813. (* If there is a parameter entity reference found in the data stream,
  1814. the current position will be pushed to the entity stack.
  1815. Start: IN Pointer to the '%' character starting the PE reference
  1816. OUT Pointer to first character of PE replacement text *)
  1817. VAR
  1818. P : PChar;
  1819. EntityDef : TEntityDef;
  1820. BEGIN
  1821. P := StrScan (Start, ';');
  1822. IF P <> NIL THEN BEGIN
  1823. EntityDef := TEntityDef (ParEntities.Node (StrSFPas (Start+1, P-1)));
  1824. IF EntityDef <> NIL THEN BEGIN
  1825. EntityStack.Push (P+1);
  1826. Start := PChar (EntityDef.Value);
  1827. END
  1828. ELSE
  1829. Start := P+1;
  1830. END;
  1831. END;
  1832. PROCEDURE TXmlParser.ReplaceCharacterEntities (VAR Str : STRING);
  1833. // Replaces all Character Entity References in the String
  1834. VAR
  1835. Start : INTEGER;
  1836. PAmp : PChar;
  1837. PSemi : PChar;
  1838. PosAmp : INTEGER;
  1839. Len : INTEGER; // Length of Entity Reference
  1840. BEGIN
  1841. IF Str = '' THEN EXIT;
  1842. Start := 1;
  1843. REPEAT
  1844. PAmp := StrPos (PChar (Str) + Start-1, '&#');
  1845. IF PAmp = NIL THEN BREAK;
  1846. PSemi := StrScan (PAmp+2, ';');
  1847. IF PSemi = NIL THEN BREAK;
  1848. PosAmp := PAmp - PChar (Str) + 1;
  1849. Len := PSemi-PAmp+1;
  1850. IF CompareText (Str [PosAmp+2], 'x') = 0 // !!! Can't use "CHR" for Unicode characters > 255
  1851. THEN Str [PosAmp] := CHR (StrToIntDef ('$'+Copy (Str, PosAmp+3, Len-4), 0))
  1852. ELSE Str [PosAmp] := CHR (StrToIntDef (Copy (Str, PosAmp+2, Len-3), 32));
  1853. Delete (Str, PosAmp+1, Len-1);
  1854. Start := PosAmp + 1;
  1855. UNTIL FALSE;
  1856. END;
  1857. PROCEDURE TXmlParser.ReplaceParameterEntities (VAR Str : STRING);
  1858. // Recursively replaces all Parameter Entity References in the String
  1859. PROCEDURE ReplaceEntities (VAR Str : STRING);
  1860. VAR
  1861. Start : INTEGER;
  1862. PAmp : PChar;
  1863. PSemi : PChar;
  1864. PosAmp : INTEGER;
  1865. Len : INTEGER;
  1866. Entity : TEntityDef;
  1867. Repl : STRING; // Replacement
  1868. BEGIN
  1869. IF Str = '' THEN EXIT;
  1870. Start := 1;
  1871. REPEAT
  1872. PAmp := StrPos (PChar (Str)+Start-1, '%');
  1873. IF PAmp = NIL THEN BREAK;
  1874. PSemi := StrScan (PAmp+2, ';');
  1875. IF PSemi = NIL THEN BREAK;
  1876. PosAmp := PAmp - PChar (Str) + 1;
  1877. Len := PSemi-PAmp+1;
  1878. Entity := TEntityDef (ParEntities.Node (Copy (Str, PosAmp+1, Len-2)));
  1879. IF Entity <> NIL THEN BEGIN
  1880. Repl := Entity.Value;
  1881. ReplaceEntities (Repl); // Recursion
  1882. END
  1883. ELSE
  1884. Repl := Copy (Str, PosAmp, Len);
  1885. Delete (Str, PosAmp, Len);
  1886. Insert (Repl, Str, PosAmp);
  1887. Start := PosAmp + Length (Repl);
  1888. UNTIL FALSE;
  1889. END;
  1890. BEGIN
  1891. ReplaceEntities (Str);
  1892. END;
  1893. PROCEDURE TXmlParser.ReplaceGeneralEntities (VAR Str : STRING);
  1894. // Recursively replaces General Entity References in the String
  1895. PROCEDURE ReplaceEntities (VAR Str : STRING);
  1896. VAR
  1897. Start : INTEGER;
  1898. PAmp : PChar;
  1899. PSemi : PChar;
  1900. PosAmp : INTEGER;
  1901. Len : INTEGER;
  1902. EntityDef : TEntityDef;
  1903. EntName : STRING;
  1904. Repl : STRING; // Replacement
  1905. ExternalEntity : TXmlParser;
  1906. BEGIN
  1907. IF Str = '' THEN EXIT;
  1908. Start := 1;
  1909. REPEAT
  1910. PAmp := StrPos (PChar (Str)+Start-1, '&');
  1911. IF PAmp = NIL THEN BREAK;
  1912. PSemi := StrScan (PAmp+2, ';');
  1913. IF PSemi = NIL THEN BREAK;
  1914. PosAmp := PAmp - PChar (Str) + 1;
  1915. Len := PSemi-PAmp+1;
  1916. EntName := Copy (Str, PosAmp+1, Len-2);
  1917. IF EntName = 'lt' THEN Repl := '<'
  1918. ELSE IF EntName = 'gt' THEN Repl := '>'
  1919. ELSE IF EntName = 'amp' THEN Repl := '&'
  1920. ELSE IF EntName = 'apos' THEN Repl := ''''
  1921. ELSE IF EntName = 'quot' THEN Repl := '"'
  1922. ELSE BEGIN
  1923. EntityDef := TEntityDef (Entities.Node (EntName));
  1924. IF EntityDef <> NIL THEN BEGIN
  1925. IF EntityDef.Value <> '' THEN // Internal Entity
  1926. Repl := EntityDef.Value
  1927. ELSE BEGIN // External Entity
  1928. ExternalEntity := LoadExternalEntity (EntityDef.SystemId, EntityDef.PublicId, EntityDef.NotationName);
  1929. Repl := StrPas (ExternalEntity.DocBuffer); // !!! What if it contains a Text Declaration?
  1930. ExternalEntity.Free;
  1931. END;
  1932. ReplaceEntities (Repl); // Recursion
  1933. END
  1934. ELSE
  1935. Repl := Copy (Str, PosAmp, Len);
  1936. END;
  1937. Delete (Str, PosAmp, Len);
  1938. Insert (Repl, Str, PosAmp);
  1939. Start := PosAmp + Length (Repl);
  1940. UNTIL FALSE;
  1941. END;
  1942. BEGIN
  1943. ReplaceEntities (Str);
  1944. END;
  1945. FUNCTION TXmlParser.LoadExternalEntity (SystemId, PublicId, Notation : STRING) : TXmlParser;
  1946. // This will be called whenever there is a Parsed External Entity or
  1947. // the DTD External Subset to be parsed.
  1948. // It has to create a TXmlParser instance and load the desired Entity.
  1949. // This instance of LoadExternalEntity assumes that "SystemId" is a valid
  1950. // file name (relative to the Document source) and loads this file using
  1951. // the LoadFromFile method.
  1952. VAR
  1953. Filename : STRING;
  1954. BEGIN
  1955. // --- Convert System ID to complete filename
  1956. Filename := StringReplace (SystemId, '/', '\', [rfReplaceAll]);
  1957. IF Copy (FSource, 1, 1) <> '<' THEN
  1958. IF (Copy (Filename, 1, 2) = '\\') OR (Copy (Filename, 2, 1) = ':') THEN
  1959. // Already has an absolute Path
  1960. ELSE BEGIN
  1961. Filename := ExtractFilePath (FSource) + Filename;
  1962. END;
  1963. // --- Load the File
  1964. Result := TXmlParser.Create;
  1965. Result.LoadFromFile (Filename);
  1966. END;
  1967. FUNCTION TXmlParser.TranslateEncoding (CONST Source : STRING) : STRING;
  1968. // The member variable "CurEncoding" always holds the name of the current
  1969. // encoding, e.g. 'UTF-8' or 'ISO-8859-1'.
  1970. // This virtual method "TranslateEncoding" is responsible for translating
  1971. // the content passed in the "Source" parameter to the Encoding which
  1972. // is expected by the application.
  1973. // This instance of "TranlateEncoding" assumes that the Application expects
  1974. // Windows ANSI (Win1252) strings. It is able to transform UTF-8 or ISO-8859-1
  1975. // encodings.
  1976. // If you want your application to understand or create other encodings, you
  1977. // override this function.
  1978. BEGIN
  1979. IF CurEncoding = 'UTF-8'
  1980. THEN Result := Utf8ToAnsi (Source)
  1981. ELSE Result := Source;
  1982. END;
  1983. PROCEDURE TXmlParser.DtdElementFound (DtdElementRec : TDtdElementRec);
  1984. // This method is called for every element which is found in the DTD
  1985. // declaration. The variant record TDtdElementRec is passed which
  1986. // holds informations about the element.
  1987. // You can override this function to handle DTD declarations.
  1988. // Note that when you parse the same Document instance a second time,
  1989. // the DTD will not get parsed again.
  1990. BEGIN
  1991. END;
  1992. FUNCTION TXmlParser.GetDocBuffer: PChar;
  1993. // Returns FBuffer or a pointer to a NUL char if Buffer is empty
  1994. BEGIN
  1995. IF FBuffer = NIL
  1996. THEN Result := #0
  1997. ELSE Result := FBuffer;
  1998. END;
  1999. (*$IFNDEF HAS_CONTNRS_UNIT
  2000. ===============================================================================================
  2001. TObjectList
  2002. ===============================================================================================
  2003. *)
  2004. DESTRUCTOR TObjectList.Destroy;
  2005. BEGIN
  2006. Clear;
  2007. SetCapacity(0);
  2008. INHERITED Destroy;
  2009. END;
  2010. PROCEDURE TObjectList.Delete (Index : INTEGER);
  2011. BEGIN
  2012. IF (Index < 0) OR (Index >= Count) THEN EXIT;
  2013. TObject (Items [Index]).Free;
  2014. INHERITED Delete (Index);
  2015. END;
  2016. PROCEDURE TObjectList.Clear;
  2017. BEGIN
  2018. WHILE Count > 0 DO
  2019. Delete (Count-1);
  2020. END;
  2021. (*$ENDIF *)
  2022. (*
  2023. ===============================================================================================
  2024. TNvpNode
  2025. --------
  2026. Node base class for the TNvpList
  2027. ===============================================================================================
  2028. *)
  2029. CONSTRUCTOR TNvpNode.Create (TheName, TheValue : STRING);
  2030. BEGIN
  2031. INHERITED Create;
  2032. Name := TheName;
  2033. Value := TheValue;
  2034. END;
  2035. (*
  2036. ===============================================================================================
  2037. TNvpList
  2038. --------
  2039. A generic List of Name-Value Pairs, based on the TObjectList introduced in Delphi 5
  2040. ===============================================================================================
  2041. *)
  2042. PROCEDURE TNvpList.Add (Node : TNvpNode);
  2043. VAR
  2044. I : INTEGER;
  2045. BEGIN
  2046. FOR I := Count-1 DOWNTO 0 DO
  2047. IF Node.Name > TNvpNode (Items [I]).Name THEN BEGIN
  2048. Insert (I+1, Node);
  2049. EXIT;
  2050. END;
  2051. Insert (0, Node);
  2052. END;
  2053. FUNCTION TNvpList.Node (Name : STRING) : TNvpNode;
  2054. // Binary search for Node
  2055. VAR
  2056. L, H : INTEGER; // Low, High Limit
  2057. T, C : INTEGER; // Test Index, Comparison result
  2058. Last : INTEGER; // Last Test Index
  2059. BEGIN
  2060. IF Count=0 THEN BEGIN
  2061. Result := NIL;
  2062. EXIT;
  2063. END;
  2064. L := 0;
  2065. H := Count;
  2066. Last := -1;
  2067. REPEAT
  2068. T := (L+H) DIV 2;
  2069. IF T=Last THEN BREAK;
  2070. Result := TNvpNode (Items [T]);
  2071. C := CompareStr (Result.Name, Name);
  2072. IF C = 0 THEN EXIT
  2073. ELSE IF C < 0 THEN L := T
  2074. ELSE H := T;
  2075. Last := T;
  2076. UNTIL FALSE;
  2077. Result := NIL;
  2078. END;
  2079. FUNCTION TNvpList.Node (Index : INTEGER) : TNvpNode;
  2080. BEGIN
  2081. IF (Index < 0) OR (Index >= Count)
  2082. THEN Result := NIL
  2083. ELSE Result := TNvpNode (Items [Index]);
  2084. END;
  2085. FUNCTION TNvpList.Value (Name : STRING) : STRING;
  2086. VAR
  2087. Nvp : TNvpNode;
  2088. BEGIN
  2089. Nvp := TNvpNode (Node (Name));
  2090. IF Nvp <> NIL
  2091. THEN Result := Nvp.Value
  2092. ELSE Result := '';
  2093. END;
  2094. FUNCTION TNvpList.Value (Index : INTEGER) : STRING;
  2095. BEGIN
  2096. IF (Index < 0) OR (Index >= Count)
  2097. THEN Result := ''
  2098. ELSE Result := TNvpNode (Items [Index]).Value;
  2099. END;
  2100. FUNCTION TNvpList.Name (Index : INTEGER) : STRING;
  2101. BEGIN
  2102. IF (Index < 0) OR (Index >= Count)
  2103. THEN Result := ''
  2104. ELSE Result := TNvpNode (Items [Index]).Name;
  2105. END;
  2106. (*
  2107. ===============================================================================================
  2108. TAttrList
  2109. List of Attributes. The "Analyze" method extracts the Attributes from the given Buffer.
  2110. Is used for extraction of Attributes in Start-Tags, Empty-Element Tags and the "pseudo"
  2111. attributes in XML Prologs, Text Declarations and PIs.
  2112. ===============================================================================================
  2113. *)
  2114. PROCEDURE TAttrList.Analyze (Start : PChar; VAR Final : PChar);
  2115. // Analyze the Buffer for Attribute=Name pairs.
  2116. // Terminates when there is a character which is not IN CNameStart
  2117. // (e.g. '?>' or '>' or '/>')
  2118. TYPE
  2119. TPhase = (phName, phEq, phValue);
  2120. VAR
  2121. Phase : TPhase;
  2122. F : PChar;
  2123. Name : STRING;
  2124. Value : STRING;
  2125. Attr : TAttr;
  2126. BEGIN
  2127. Clear;
  2128. Phase := phName;
  2129. Final := Start;
  2130. REPEAT
  2131. IF (Final^ = #0) OR (Final^ = '>') THEN BREAK;
  2132. IF NOT (Final^ IN CWhitespace) THEN
  2133. CASE Phase OF
  2134. phName : BEGIN
  2135. IF NOT (Final^ IN CNameStart) THEN EXIT;
  2136. ExtractName (Final, CWhitespace + ['=', '/'], F);
  2137. SetStringSF (Name, Final, F);
  2138. Final := F;
  2139. Phase := phEq;
  2140. END;
  2141. phEq : BEGIN
  2142. IF Final^ = '=' THEN
  2143. Phase := phValue
  2144. END;
  2145. phValue : BEGIN
  2146. IF Final^ IN CQuoteChar THEN BEGIN
  2147. ExtractQuote (Final, Value, F);
  2148. Attr := TAttr.Create;
  2149. Attr.Name := Name;
  2150. Attr.Value := Value;
  2151. Attr.ValueType := vtNormal;
  2152. Add (Attr);
  2153. Final := F;
  2154. Phase := phName;
  2155. END;
  2156. END;
  2157. END;
  2158. INC (Final);
  2159. UNTIL FALSE;
  2160. END;
  2161. (*
  2162. ===============================================================================================
  2163. TElemList
  2164. List of TElemDef nodes.
  2165. ===============================================================================================
  2166. *)
  2167. FUNCTION TElemList.Node (Name : STRING) : TElemDef;
  2168. // Binary search for the Node with the given Name
  2169. VAR
  2170. L, H : INTEGER; // Low, High Limit
  2171. T, C : INTEGER; // Test Index, Comparison result
  2172. Last : INTEGER; // Last Test Index
  2173. BEGIN
  2174. IF Count=0 THEN BEGIN
  2175. Result := NIL;
  2176. EXIT;
  2177. END;
  2178. L := 0;
  2179. H := Count;
  2180. Last := -1;
  2181. REPEAT
  2182. T := (L+H) DIV 2;
  2183. IF T=Last THEN BREAK;
  2184. Result := TElemDef (Items [T]);
  2185. C := CompareStr (Result.Name, Name);
  2186. IF C = 0 THEN EXIT
  2187. ELSE IF C < 0 THEN L := T
  2188. ELSE H := T;
  2189. Last := T;
  2190. UNTIL FALSE;
  2191. Result := NIL;
  2192. END;
  2193. PROCEDURE TElemList.Add (Node : TElemDef);
  2194. VAR
  2195. I : INTEGER;
  2196. BEGIN
  2197. FOR I := Count-1 DOWNTO 0 DO
  2198. IF Node.Name > TElemDef (Items [I]).Name THEN BEGIN
  2199. Insert (I+1, Node);
  2200. EXIT;
  2201. END;
  2202. Insert (0, Node);
  2203. END;
  2204. (*
  2205. ===============================================================================================
  2206. TScannerXmlParser
  2207. A TXmlParser descendant for the TCustomXmlScanner component
  2208. ===============================================================================================
  2209. *)
  2210. TYPE
  2211. TScannerXmlParser = CLASS (TXmlParser)
  2212. Scanner : TCustomXmlScanner;
  2213. CONSTRUCTOR Create (TheScanner : TCustomXmlScanner);
  2214. FUNCTION LoadExternalEntity (SystemId, PublicId,
  2215. Notation : STRING) : TXmlParser; OVERRIDE;
  2216. FUNCTION TranslateEncoding (CONST Source : STRING) : STRING; OVERRIDE;
  2217. PROCEDURE DtdElementFound (DtdElementRec : TDtdElementRec); OVERRIDE;
  2218. END;
  2219. CONSTRUCTOR TScannerXmlParser.Create (TheScanner : TCustomXmlScanner);
  2220. BEGIN
  2221. INHERITED Create;
  2222. Scanner := TheScanner;
  2223. END;
  2224. FUNCTION TScannerXmlParser.LoadExternalEntity (SystemId, PublicId, Notation : STRING) : TXmlParser;
  2225. BEGIN
  2226. IF Assigned (Scanner.FOnLoadExternal)
  2227. THEN Scanner.FOnLoadExternal (Scanner, SystemId, PublicId, Notation, Result)
  2228. ELSE Result := INHERITED LoadExternalEntity (SystemId, PublicId, Notation);
  2229. END;
  2230. FUNCTION TScannerXmlParser.TranslateEncoding (CONST Source : STRING) : STRING;
  2231. BEGIN
  2232. IF Assigned (Scanner.FOnTranslateEncoding)
  2233. THEN Result := Scanner.FOnTranslateEncoding (Scanner, CurEncoding, Source)
  2234. ELSE Result := INHERITED TranslateEncoding (Source);
  2235. END;
  2236. PROCEDURE TScannerXmlParser.DtdElementFound (DtdElementRec : TDtdElementRec);
  2237. BEGIN
  2238. WITH DtdElementRec DO
  2239. CASE ElementType OF
  2240. deElement : Scanner.WhenElement (ElemDef);
  2241. deAttList : Scanner.WhenAttList (ElemDef);
  2242. deEntity : Scanner.WhenEntity (EntityDef);
  2243. deNotation : Scanner.WhenNotation (NotationDef);
  2244. dePI : Scanner.WhenPI (STRING (Target), STRING (Content), AttrList);
  2245. deComment : Scanner.WhenComment (StrSFPas (Start, Final));
  2246. deError : Scanner.WhenDtdError (Pos);
  2247. END;
  2248. END;
  2249. (*
  2250. ===============================================================================================
  2251. TCustomXmlScanner
  2252. ===============================================================================================
  2253. *)
  2254. CONSTRUCTOR TCustomXmlScanner.Create (AOwner: TComponent);
  2255. BEGIN
  2256. INHERITED;
  2257. FXmlParser := TScannerXmlParser.Create (Self);
  2258. END;
  2259. DESTRUCTOR TCustomXmlScanner.Destroy;
  2260. BEGIN
  2261. FXmlParser.Free;
  2262. INHERITED;
  2263. END;
  2264. PROCEDURE TCustomXmlScanner.LoadFromFile (Filename : TFilename);
  2265. // Load XML Document from file
  2266. BEGIN
  2267. FXmlParser.LoadFromFile (Filename);
  2268. END;
  2269. PROCEDURE TCustomXmlScanner.LoadFromBuffer (Buffer : PChar);
  2270. // Load XML Document from buffer
  2271. BEGIN
  2272. FXmlParser.LoadFromBuffer (Buffer);
  2273. END;
  2274. PROCEDURE TCustomXmlScanner.SetBuffer (Buffer : PChar);
  2275. // Refer to Buffer
  2276. BEGIN
  2277. FXmlParser.SetBuffer (Buffer);
  2278. END;
  2279. FUNCTION TCustomXmlScanner.GetFilename : TFilename;
  2280. BEGIN
  2281. Result := FXmlParser.Source;
  2282. END;
  2283. FUNCTION TCustomXmlScanner.GetNormalize : BOOLEAN;
  2284. BEGIN
  2285. Result := FXmlParser.Normalize;
  2286. END;
  2287. PROCEDURE TCustomXmlScanner.SetNormalize (Value : BOOLEAN);
  2288. BEGIN
  2289. FXmlParser.Normalize := Value;
  2290. END;
  2291. PROCEDURE TCustomXmlScanner.WhenXmlProlog(XmlVersion, Encoding: STRING; Standalone : BOOLEAN);
  2292. // Is called when the parser has parsed the <? xml ?> declaration of the prolog
  2293. BEGIN
  2294. IF Assigned (FOnXmlProlog) THEN FOnXmlProlog (Self, XmlVersion, Encoding, Standalone);
  2295. END;
  2296. PROCEDURE TCustomXmlScanner.WhenComment (Comment : STRING);
  2297. // Is called when the parser has parsed a <!-- comment -->
  2298. BEGIN
  2299. IF Assigned (FOnComment) THEN FOnComment (Self, Comment);
  2300. END;
  2301. PROCEDURE TCustomXmlScanner.WhenPI (Target, Content: STRING; Attributes : TAttrList);
  2302. // Is called when the parser has parsed a <?processing instruction ?>
  2303. BEGIN
  2304. IF Assigned (FOnPI) THEN FOnPI (Self, Target, Content, Attributes);
  2305. END;
  2306. PROCEDURE TCustomXmlScanner.WhenDtdRead (RootElementName : STRING);
  2307. // Is called when the parser has completely parsed the DTD
  2308. BEGIN
  2309. IF Assigned (FOnDtdRead) THEN FOnDtdRead (Self, RootElementName);
  2310. END;
  2311. PROCEDURE TCustomXmlScanner.WhenStartTag (TagName : STRING; Attributes : TAttrList);
  2312. // Is called when the parser has parsed a start tag like <p>
  2313. BEGIN
  2314. IF Assigned (FOnStartTag) THEN FOnStartTag (Self, TagName, Attributes);
  2315. END;
  2316. PROCEDURE TCustomXmlScanner.WhenEmptyTag (TagName : STRING; Attributes : TAttrList);
  2317. // Is called when the parser has parsed an Empty Element Tag like <br/>
  2318. BEGIN
  2319. IF Assigned (FOnEmptyTag) THEN FOnEmptyTag (Self, TagName, Attributes);
  2320. END;
  2321. PROCEDURE TCustomXmlScanner.WhenEndTag (TagName : STRING);
  2322. // Is called when the parser has parsed an End Tag like </p>
  2323. BEGIN
  2324. IF Assigned (FOnEndTag) THEN FOnEndTag (Self, TagName);
  2325. END;
  2326. PROCEDURE TCustomXmlScanner.WhenContent (Content : STRING);
  2327. // Is called when the parser has parsed an element's text content
  2328. BEGIN
  2329. IF Assigned (FOnContent) THEN FOnContent (Self, Content);
  2330. END;
  2331. PROCEDURE TCustomXmlScanner.WhenCData (Content : STRING);
  2332. // Is called when the parser has parsed a CDATA section
  2333. BEGIN
  2334. IF Assigned (FOnCData) THEN FOnCData (Self, Content);
  2335. END;
  2336. PROCEDURE TCustomXmlScanner.WhenElement (ElemDef : TElemDef);
  2337. // Is called when the parser has parsed an <!ELEMENT> definition
  2338. // inside the DTD
  2339. BEGIN
  2340. IF Assigned (FOnElement) THEN FOnElement (Self, ElemDef);
  2341. END;
  2342. PROCEDURE TCustomXmlScanner.WhenAttList (ElemDef : TElemDef);
  2343. // Is called when the parser has parsed an <!ATTLIST> definition
  2344. // inside the DTD
  2345. BEGIN
  2346. IF Assigned (FOnAttList) THEN FOnAttList (Self, ElemDef);
  2347. END;
  2348. PROCEDURE TCustomXmlScanner.WhenEntity (EntityDef : TEntityDef);
  2349. // Is called when the parser has parsed an <!ENTITY> definition
  2350. // inside the DTD
  2351. BEGIN
  2352. IF Assigned (FOnEntity) THEN FOnEntity (Self, EntityDef);
  2353. END;
  2354. PROCEDURE TCustomXmlScanner.WhenNotation (NotationDef : TNotationDef);
  2355. // Is called when the parser has parsed a <!NOTATION> definition
  2356. // inside the DTD
  2357. BEGIN
  2358. IF Assigned (FOnNotation) THEN FOnNotation (Self, NotationDef);
  2359. END;
  2360. PROCEDURE TCustomXmlScanner.WhenDtdError (ErrorPos : PChar);
  2361. // Is called when the parser has found an Error in the DTD
  2362. BEGIN
  2363. IF Assigned (FOnDtdError) THEN FOnDtdError (Self, ErrorPos);
  2364. END;
  2365. PROCEDURE TCustomXmlScanner.Execute;
  2366. // Perform scanning
  2367. // Scanning is done synchronously, i.e. you can expect events to be triggered
  2368. // in the order of the XML data stream. Execute will finish when the whole XML
  2369. // document has been scanned or when the StopParser property has been set to TRUE.
  2370. BEGIN
  2371. FStopParser := FALSE;
  2372. FXmlParser.StartScan;
  2373. WHILE FXmlParser.Scan AND (NOT FStopParser) DO
  2374. CASE FXmlParser.CurPartType OF
  2375. ptNone : ;
  2376. ptXmlProlog : WhenXmlProlog (FXmlParser.XmlVersion, FXmlParser.Encoding, FXmlParser.Standalone);
  2377. ptComment : WhenComment (StrSFPas (FXmlParser.CurStart, FXmlParser.CurFinal));
  2378. ptPI : WhenPI (FXmlParser.CurName, FXmlParser.CurContent, FXmlParser.CurAttr);
  2379. ptDtdc : WhenDtdRead (FXmlParser.RootName);
  2380. ptStartTag : WhenStartTag (FXmlParser.CurName, FXmlParser.CurAttr);
  2381. ptEmptyTag : WhenEmptyTag (FXmlParser.CurName, FXmlParser.CurAttr);
  2382. ptEndTag : WhenEndTag (FXmlParser.CurName);
  2383. ptContent : WhenContent (FXmlParser.CurContent);
  2384. ptCData : WhenCData (FXmlParser.CurContent);
  2385. END;
  2386. END;
  2387. END.