libxmlparser.pas 107 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651
  1. (**
  2. ===============================================================================================
  3. Name : LibXmlParser
  4. ===============================================================================================
  5. Project : All Projects
  6. ===============================================================================================
  7. Subject : Progressive XML Parser for all types of XML Files
  8. ===============================================================================================
  9. Author : Stefan Heymann
  10. Eschenweg 3
  11. 72076 Tübingen
  12. GERMANY
  13. E-Mail: [email protected]
  14. URL: www.destructor.de
  15. ===============================================================================================
  16. Source, Legals ("Licence")
  17. --------------------------
  18. The official site to get this parser is http://www.destructor.de/
  19. Usage and Distribution of this Source Code is ruled by the
  20. "Destructor.de Source code Licence" (DSL) which comes with this file or
  21. can be downloaded at http://www.destructor.de/
  22. IN SHORT: Usage and distribution of this source code is free.
  23. You use it completely on your own risk.
  24. Postcardware
  25. ------------
  26. If you like this code, please send a postcard of your city to my above address.
  27. ===============================================================================================
  28. !!! All parts of this code which are not finished or not conforming exactly to
  29. the XmlSpec are marked with three exclamation marks
  30. -!- Parts where the parser may be able to detect errors in the document's syntax are
  31. marked with the dash-exlamation mark-dash sequence.
  32. ===============================================================================================
  33. Terminology:
  34. ------------
  35. - Start: Start of a buffer part
  36. - Final: End (last character) of a buffer part
  37. - DTD: Document Type Definition
  38. - DTDc: Document Type Declaration
  39. - XMLSpec: The current W3C XML Recommendation (version 1.0 as of 1998-02-10), Chapter No.
  40. - Cur*: Fields concerning the "Current" part passed back by the "Scan" method
  41. ===============================================================================================
  42. Scanning the XML document
  43. -------------------------
  44. - Create TXmlParser Instance MyXml := TXmlParser.Create;
  45. - Load XML Document MyXml.LoadFromFile (Filename);
  46. - Start Scanning MyXml.StartScan;
  47. - Scan Loop WHILE MyXml.Scan DO
  48. - Test for Part Type CASE MyXml.CurPartType OF
  49. - Handle Parts ... : ;;;
  50. - Handle Parts ... : ;;;
  51. - Handle Parts ... : ;;;
  52. END;
  53. - Destroy MyXml.Free;
  54. ===============================================================================================
  55. Loading the XML document
  56. ------------------------
  57. You can load the XML document from a file with the "LoadFromFile" method.
  58. It is beyond the scope of this parser to perform HTTP or FTP accesses. If you want your
  59. application to handle such requests (URLs), you can load the XML via HTTP or FTP or whatever
  60. protocol and hand over the data buffer using the "LoadFromBuffer" or "SetBuffer" method.
  61. "LoadFromBuffer" loads the internal buffer of TXmlParser with the given null-terminated
  62. string, thereby creating a copy of that buffer.
  63. "SetBuffer" just takes the pointer to another buffer, which means that the given
  64. buffer pointer must be valid while the document is accessed via TXmlParser.
  65. ===============================================================================================
  66. Encodings:
  67. ----------
  68. This XML parser kind of "understands" the following encodings:
  69. - UTF-8
  70. - ISO-8859-1
  71. - Windows-1252
  72. Any flavor of multi-byte characters (and this includes UTF-16) is not supported. Sorry.
  73. Every string which has to be passed to the application passes the virtual method
  74. "TranslateEncoding" which translates the string from the current encoding (stored in
  75. "CurEncoding") into the encoding the application wishes to receive.
  76. The "TranslateEncoding" method that is built into TXmlParser assumes that the application
  77. wants to receive Windows ANSI (Windows-1252, about the same as ISO-8859-1) and is able
  78. to convert UTF-8 and ISO-8859-1 encodings.
  79. For other source and target encodings, you will have to override "TranslateEncoding".
  80. ===============================================================================================
  81. Buffer Handling
  82. ---------------
  83. - The document must be loaded completely into a piece of RAM
  84. - All character positions are referenced by PAnsiChar pointers
  85. - The TXmlParser instance can either "own" the buffer itself (then, FBufferSize is > 0)
  86. or reference the buffer of another instance or object (then, FBuffersize is 0 and
  87. FBuffer is not NIL)
  88. - The Property DocBuffer passes back a pointer to the first byte of the document. If there
  89. is no document stored (FBuffer is NIL), the DocBuffer returns a pointer to a NULL character.
  90. ===============================================================================================
  91. Whitespace Handling
  92. -------------------
  93. The TXmlParser property "PackSpaces" determines how Whitespace is returned in Text Content:
  94. While PackSpaces is true, all leading and trailing whitespace characters are trimmed of, all
  95. Whitespace is converted to Space #x20 characters and contiguous Whitespace characters are
  96. compressed to one.
  97. If the "Scan" method reports a ptContent part, the application can get the original text
  98. with all whitespace characters by extracting the characters from "CurStart" to "CurFinal".
  99. If the application detects an xml:space attribute, it can set "PackSpaces" accordingly or
  100. use CurStart/CurFinal.
  101. Please note that TXmlParser does _not_ normalize Line Breaks to single LineFeed characters
  102. as the XmlSpec requires (XmlSpec 2.11).
  103. The xml:space attribute is not handled by TXmlParser. This is on behalf of the application.
  104. ===============================================================================================
  105. Non-XML-Conforming
  106. ------------------
  107. TXmlParser does not conform 100 % exactly to the XmlSpec:
  108. - UTF-16 is not supported (XmlSpec 2.2)
  109. (Workaround: Convert UTF-16 to UTF-8 and hand the buffer over to TXmlParser)
  110. - As the parser only works with single byte strings, all Unicode characters > 255
  111. can currently not be handled correctly.
  112. - Line breaks are not normalized to single Linefeed #x0A characters (XmlSpec 2.11)
  113. (Workaround: The Application can access the text contents on its own [CurStart, CurFinal],
  114. thereby applying every normalization it wishes to)
  115. - The attribute value normalization does not work exactly as defined in the
  116. Second Edition of the XML 1.0 specification.
  117. - See also the code parts marked with three consecutive exclamation marks. These are
  118. parts which are not finished in the current code release.
  119. This list may be incomplete, so it may grow if I get to know any other points.
  120. As work on the parser proceeds, this list may also shrink.
  121. ===============================================================================================
  122. Things Todo
  123. -----------
  124. - Introduce a new event/callback which is called when there is an unresolvable
  125. entity or character reference
  126. - Support Unicode
  127. - Use Streams instead of reading the whole XML into memory
  128. ===============================================================================================
  129. Change History, Version numbers
  130. -------------------------------
  131. The Date is given in ISO Year-Month-Day (YYYY-MM-DD) order.
  132. Versions are counted from 1.0.0 beginning with the version from 2000-03-16.
  133. Unreleased versions don't get a version number.
  134. Date Author Version Changes
  135. -----------------------------------------------------------------------------------------------
  136. 2000-03-16 HeySt 1.0.0 Start
  137. 2000-03-28 HeySt 1.0.1 Initial Publishing of TXmlParser on the destructor.de Web Site
  138. 2000-03-30 HeySt 1.0.2 TXmlParser.AnalyzeCData: Call "TranslateEncoding" for CurContent
  139. 2000-03-31 HeySt 1.0.3 Deleted the StrPosE function (was not needed anyway)
  140. 2000-04-04 HeySt 1.0.4 TDtdElementRec modified: Start/Final for all Elements;
  141. Should be backwards compatible.
  142. AnalyzeDtdc: Set CurPartType to ptDtdc
  143. 2000-04-23 HeySt 1.0.5 New class TObjectList. Eliminated reference to the Delphi 5
  144. "Contnrs" unit so LibXmlParser is Delphi 4 compatible.
  145. 2000-07-03 HeySt 1.0.6 TNvpNode: Added Constructor
  146. 2000-07-11 HeySt 1.0.7 Removed "Windows" from USES clause
  147. Added three-exclamation-mark comments for Utf8ToAnsi/AnsiToUtf8
  148. Added three-exclamation-mark comments for CHR function calls
  149. 2000-07-23 HeySt 1.0.8 TXmlParser.Clear: CurAttr.Clear; EntityStack.Clear;
  150. (This was not a bug; just defensive programming)
  151. 2000-07-29 HeySt 1.0.9 TNvpList: Added methods: Node(Index), Value(Index), Name(Index);
  152. 2000-10-07 HeySt Introduced Conditional Defines
  153. Uses Contnrs unit and its TObjectList class again for
  154. Delphi 5 and newer versions
  155. 2001-01-30 HeySt Introduced Version Numbering
  156. Made LoadFromFile and LoadFromBuffer BOOLEAN functions
  157. Introduced FileMode parameter for LoadFromFile
  158. BugFix: TAttrList.Analyze: Must add CWhitespace to ExtractName call
  159. Comments worked over
  160. 2001-02-28 HeySt 1.0.10 Completely worked over and tested the UTF-8 functions
  161. Fixed a bug in TXmlParser.Scan which caused it to start over when it
  162. was called after the end of scanning, resulting in an endless loop
  163. TEntityStack is now a TObjectList instead of TList
  164. 2001-07-03 HeySt 1.0.11 Updated Compiler Version IFDEFs for Kylix
  165. 2001-07-11 HeySt 1.0.12 New TCustomXmlScanner component (taken over from LibXmlComps.pas)
  166. 2001-07-14 HeySt 1.0.13 Bugfix TCustomXmlScanner.FOnTranslateEncoding
  167. 2001-10-22 HeySt Don't clear CurName anymore when the parser finds a CDATA section.
  168. 2001-12-03 HeySt 1.0.14 TObjectList.Clear: Make call to INHERITED method (fixes a memory leak)
  169. 2001-12-05 HeySt 1.0.15 TObjectList.Clear: removed call to INHERITED method
  170. TObjectList.Destroy: Inserted SetCapacity call.
  171. Reduces need for frequent re-allocation of pointer buffer
  172. Dedicated to my father, Theodor Heymann
  173. 2002-06-26 HeySt 1.0.16 TXmlParser.Scan: Fixed a bug with PIs whose name is beginning
  174. with 'xml'. Thanks to Uwe Kamm for submitting this bug.
  175. The CurEncoding property is now always in uppercase letters (the XML
  176. spec wants it to be treated case independently so when it's uppercase
  177. comparisons are faster)
  178. 2002-03-04 HeySt 1.0.17 Included an IFDEF for Delphi 7 (VER150) and Kylix
  179. There is a new symbol HAS_CONTNRS_UNIT which is used now to
  180. distinguish between IDEs which come with the Contnrs unit and
  181. those that don't.
  182. *)
  183. {$IFNDEF FPC_DOTTEDUNITS}
  184. UNIT libxmlparser;
  185. {$ENDIF FPC_DOTTEDUNITS}
  186. {$I jedi-sdl.inc}
  187. INTERFACE
  188. {$IFDEF FPC_DOTTEDUNITS}
  189. USES
  190. System.SysUtils, System.Classes,
  191. (*$IFDEF HAS_CONTNRS_UNIT *) // The System.Contnrs Unit was introduced in Delphi 5
  192. System.Contnrs,
  193. (*$ENDIF*)
  194. System.Math;
  195. {$ELSE FPC_DOTTEDUNITS}
  196. USES
  197. SysUtils, Classes,
  198. (*$IFDEF HAS_CONTNRS_UNIT *) // The Contnrs Unit was introduced in Delphi 5
  199. Contnrs,
  200. (*$ENDIF*)
  201. Math;
  202. {$ENDIF FPC_DOTTEDUNITS}
  203. CONST
  204. CVersion = '1.0.17'; // This variable will be updated for every release
  205. // (I hope, I won't forget to do it everytime ...)
  206. TYPE
  207. TPartType = // --- Document Part Types
  208. (ptNone, // Nothing
  209. ptXmlProlog, // XML Prolog XmlSpec 2.8 / 4.3.1
  210. ptComment, // Comment XmlSpec 2.5
  211. ptPI, // Processing Instruction XmlSpec 2.6
  212. ptDtdc, // Document Type Declaration XmlSpec 2.8
  213. ptStartTag, // Start Tag XmlSpec 3.1
  214. ptEmptyTag, // Empty-Element Tag XmlSpec 3.1
  215. ptEndTag, // End Tag XmlSpec 3.1
  216. ptContent, // Text Content between Tags
  217. ptCData); // CDATA Section XmlSpec 2.7
  218. TDtdElemType = // --- DTD Elements
  219. (deElement, // !ELEMENT declaration
  220. deAttList, // !ATTLIST declaration
  221. deEntity, // !ENTITY declaration
  222. deNotation, // !NOTATION declaration
  223. dePI, // PI in DTD
  224. deComment, // Comment in DTD
  225. deError); // Error found in the DTD
  226. TYPE
  227. TAttrList = CLASS;
  228. TEntityStack = CLASS;
  229. TNvpList = CLASS;
  230. TElemDef = CLASS;
  231. TElemList = CLASS;
  232. TEntityDef = CLASS;
  233. TNotationDef = CLASS;
  234. TDtdElementRec = RECORD // --- This Record is returned by the DTD parser callback function
  235. Start, Final : PAnsiChar; // Start/End of the Element's Declaration
  236. CASE ElementType : TDtdElemType OF // Type of the Element
  237. deElement, // <!ELEMENT>
  238. deAttList : (ElemDef : TElemDef); // <!ATTLIST>
  239. deEntity : (EntityDef : TEntityDef); // <!ENTITY>
  240. deNotation : (NotationDef : TNotationDef); // <!NOTATION>
  241. dePI : (Target : PAnsiChar; // <?PI ?>
  242. Content : PAnsiChar;
  243. AttrList : TAttrList);
  244. deError : (Pos : PAnsiChar); // Error
  245. // deComment : ((No additional fields here)); // <!-- Comment -->
  246. END;
  247. TXmlParser = CLASS // --- Internal Properties and Methods
  248. PROTECTED
  249. FBuffer : PAnsiChar; // NIL if there is no buffer available
  250. FBufferSize : INTEGER; // 0 if the buffer is not owned by the Document instance
  251. FSource : AnsiString; // Name of Source of document. Filename for Documents loaded with LoadFromFile
  252. FXmlVersion : AnsiString; // XML version from Document header. Default is '1.0'
  253. FEncoding : AnsiString; // Encoding from Document header. Default is 'UTF-8'
  254. FStandalone : BOOLEAN; // Standalone declaration from Document header. Default is 'yes'
  255. FRootName : AnsiString; // Name of the Root Element (= DTD name)
  256. FDtdcFinal : PAnsiChar; // Pointer to the '>' character terminating the DTD declaration
  257. FNormalize : BOOLEAN; // If true: Pack Whitespace and don't return empty contents
  258. EntityStack : TEntityStack; // Entity Stack for Parameter and General Entities
  259. FCurEncoding : AnsiString; // Current Encoding during parsing (always uppercase)
  260. PROCEDURE AnalyzeProlog; // Analyze XML Prolog or Text Declaration
  261. PROCEDURE AnalyzeComment (Start : PAnsiChar; VAR Final : PAnsiChar); // Analyze Comments
  262. PROCEDURE AnalyzePI (Start : PAnsiChar; VAR Final : PAnsiChar); // Analyze Processing Instructions (PI)
  263. PROCEDURE AnalyzeDtdc; // Analyze Document Type Declaration
  264. PROCEDURE AnalyzeDtdElements (Start : PAnsiChar; VAR Final : PAnsiChar); // Analyze DTD declarations
  265. PROCEDURE AnalyzeTag; // Analyze Start/End/Empty-Element Tags
  266. PROCEDURE AnalyzeCData; // Analyze CDATA Sections
  267. PROCEDURE AnalyzeText (VAR IsDone : BOOLEAN); // Analyze Text Content between Tags
  268. PROCEDURE AnalyzeElementDecl (Start : PAnsiChar; VAR Final : PAnsiChar);
  269. PROCEDURE AnalyzeAttListDecl (Start : PAnsiChar; VAR Final : PAnsiChar);
  270. PROCEDURE AnalyzeEntityDecl (Start : PAnsiChar; VAR Final : PAnsiChar);
  271. PROCEDURE AnalyzeNotationDecl (Start : PAnsiChar; VAR Final : PAnsiChar);
  272. PROCEDURE PushPE (VAR Start : PAnsiChar);
  273. PROCEDURE ReplaceCharacterEntities (VAR Str : AnsiString);
  274. PROCEDURE ReplaceParameterEntities (VAR Str : AnsiString);
  275. PROCEDURE ReplaceGeneralEntities (VAR Str : AnsiString);
  276. FUNCTION GetDocBuffer : PAnsiChar; // Returns FBuffer or a pointer to a NUL AnsiChar if Buffer is empty
  277. PUBLIC // --- Document Properties
  278. PROPERTY XmlVersion : AnsiString READ FXmlVersion; // XML version from the Document Prolog
  279. PROPERTY Encoding : AnsiString READ FEncoding; // Document Encoding from Prolog
  280. PROPERTY Standalone : BOOLEAN READ FStandalone; // Standalone Declaration from Prolog
  281. PROPERTY RootName : AnsiString READ FRootName; // Name of the Root Element
  282. PROPERTY Normalize : BOOLEAN READ FNormalize WRITE FNormalize; // True if Content is to be normalized
  283. PROPERTY Source : AnsiString READ FSource; // Name of Document Source (Filename)
  284. PROPERTY DocBuffer : PAnsiChar READ GetDocBuffer; // Returns document buffer
  285. PUBLIC // --- DTD Objects
  286. Elements : TElemList; // Elements: List of TElemDef (contains Attribute Definitions)
  287. Entities : TNvpList; // General Entities: List of TEntityDef
  288. ParEntities : TNvpList; // Parameter Entities: List of TEntityDef
  289. Notations : TNvpList; // Notations: List of TNotationDef
  290. PUBLIC
  291. CONSTRUCTOR Create;
  292. DESTRUCTOR Destroy; OVERRIDE;
  293. // --- Document Handling
  294. FUNCTION LoadFromFile (Filename : AnsiString;
  295. FileMode : INTEGER = fmOpenRead OR fmShareDenyNone) : BOOLEAN;
  296. // Loads Document from given file
  297. FUNCTION LoadFromBuffer (Buffer : PAnsiChar) : BOOLEAN; // Loads Document from another buffer
  298. PROCEDURE SetBuffer (Buffer : PAnsiChar); // References another buffer
  299. PROCEDURE Clear; // Clear Document
  300. PUBLIC
  301. // --- Scanning through the document
  302. CurPartType : TPartType; // Current Type
  303. CurName : AnsiString; // Current Name
  304. CurContent : AnsiString; // Current Normalized Content
  305. CurStart : PAnsiChar; // Current First character
  306. CurFinal : PAnsiChar; // Current Last character
  307. CurAttr : TAttrList; // Current Attribute List
  308. PROPERTY CurEncoding : AnsiString READ FCurEncoding; // Current Encoding
  309. PROCEDURE StartScan;
  310. FUNCTION Scan : BOOLEAN;
  311. // --- Events / Callbacks
  312. FUNCTION LoadExternalEntity (SystemId, PublicId,
  313. Notation : AnsiString) : TXmlParser; VIRTUAL;
  314. FUNCTION TranslateEncoding (CONST Source : AnsiString) : AnsiString; VIRTUAL;
  315. PROCEDURE DtdElementFound (DtdElementRec : TDtdElementRec); VIRTUAL;
  316. END;
  317. TValueType = // --- Attribute Value Type
  318. (vtNormal, // Normal specified Attribute
  319. vtImplied, // #IMPLIED attribute value
  320. vtFixed, // #FIXED attribute value
  321. vtDefault); // Attribute value from default value in !ATTLIST declaration
  322. TAttrDefault = // --- Attribute Default Type
  323. (adDefault, // Normal default value
  324. adRequired, // #REQUIRED attribute
  325. adImplied, // #IMPLIED attribute
  326. adFixed); // #FIXED attribute
  327. TAttrType = // --- Type of attribute
  328. (atUnknown, // Unknown type
  329. atCData, // Character data only
  330. atID, // ID
  331. atIdRef, // ID Reference
  332. atIdRefs, // Several ID References, separated by Whitespace
  333. atEntity, // Name of an unparsed Entity
  334. atEntities, // Several unparsed Entity names, separated by Whitespace
  335. atNmToken, // Name Token
  336. atNmTokens, // Several Name Tokens, separated by Whitespace
  337. atNotation, // A selection of Notation names (Unparsed Entity)
  338. atEnumeration); // Enumeration
  339. TElemType = // --- Element content type
  340. (etEmpty, // Element is always empty
  341. etAny, // Element can have any mixture of PCDATA and any elements
  342. etChildren, // Element must contain only elements
  343. etMixed); // Mixed PCDATA and elements
  344. (*$IFDEF HAS_CONTNRS_UNIT *)
  345. TObjectList = Contnrs.TObjectList; // Re-Export this identifier
  346. (*$ELSE *)
  347. TObjectList = CLASS (TList)
  348. DESTRUCTOR Destroy; OVERRIDE;
  349. PROCEDURE Delete (Index : INTEGER);
  350. PROCEDURE Clear; OVERRIDE;
  351. END;
  352. (*$ENDIF *)
  353. TNvpNode = CLASS // Name-Value Pair Node
  354. Name : AnsiString;
  355. Value : AnsiString;
  356. CONSTRUCTOR Create (TheName : AnsiString = ''; TheValue : AnsiString = '');
  357. END;
  358. TNvpList = CLASS (TObjectList) // Name-Value Pair List
  359. PROCEDURE Add (Node : TNvpNode);
  360. FUNCTION Node (Name : AnsiString) : TNvpNode; OVERLOAD;
  361. FUNCTION Node (Index : INTEGER) : TNvpNode; OVERLOAD;
  362. FUNCTION Value (Name : AnsiString) : AnsiString; OVERLOAD;
  363. FUNCTION Value (Index : INTEGER) : AnsiString; OVERLOAD;
  364. FUNCTION Name (Index : INTEGER) : AnsiString;
  365. END;
  366. TAttr = CLASS (TNvpNode) // Attribute of a Start-Tag or Empty-Element-Tag
  367. ValueType : TValueType;
  368. AttrType : TAttrType;
  369. END;
  370. TAttrList = CLASS (TNvpList) // List of Attributes
  371. PROCEDURE Analyze (Start : PAnsiChar; VAR Final : PAnsiChar);
  372. END;
  373. TEntityStack = CLASS (TObjectList) // Stack where current position is stored before parsing entities
  374. PROTECTED
  375. Owner : TXmlParser;
  376. PUBLIC
  377. CONSTRUCTOR Create (TheOwner : TXmlParser);
  378. PROCEDURE Push (LastPos : PAnsiChar); OVERLOAD;
  379. PROCEDURE Push (Instance : TObject; LastPos : PAnsiChar); OVERLOAD;
  380. FUNCTION Pop : PAnsiChar; // Returns next AnsiChar or NIL if EOF is reached. Frees Instance.
  381. END;
  382. TAttrDef = CLASS (TNvpNode) // Represents a <!ATTLIST Definition. "Value" is the default value
  383. TypeDef : AnsiString; // Type definition from the DTD
  384. Notations : AnsiString; // Notation List, separated by pipe symbols '|'
  385. AttrType : TAttrType; // Attribute Type
  386. DefaultType : TAttrDefault; // Default Type
  387. END;
  388. TElemDef = CLASS (TNvpList) // Represents a <!ELEMENT Definition. Is a list of TAttrDef-Nodes
  389. Name : AnsiString; // Element name
  390. ElemType : TElemType; // Element type
  391. Definition : AnsiString; // Element definition from DTD
  392. END;
  393. TElemList = CLASS (TObjectList) // List of TElemDef nodes
  394. FUNCTION Node (Name : AnsiString) : TElemDef;
  395. PROCEDURE Add (Node : TElemDef);
  396. END;
  397. TEntityDef = CLASS (TNvpNode) // Represents a <!ENTITY Definition.
  398. SystemId : AnsiString;
  399. PublicId : AnsiString;
  400. NotationName : AnsiString;
  401. END;
  402. TNotationDef = CLASS (TNvpNode) // Represents a <!NOTATION Definition. Value is the System ID
  403. PublicId : AnsiString;
  404. END;
  405. TCharset = SET OF AnsiChar;
  406. CONST
  407. CWhitespace = [#32, #9, #13, #10]; // Whitespace characters (XmlSpec 2.3)
  408. CLetter = [#$41..#$5A, #$61..#$7A, #$C0..#$D6, #$D8..#$F6, #$F8..#$FF];
  409. CDigit = [#$30..#$39];
  410. CNameChar = CLetter + CDigit + ['.', '-', '_', ':', #$B7];
  411. CNameStart = CLetter + ['_', ':'];
  412. CQuoteChar = ['"', ''''];
  413. CPubidChar = [#32, ^M, ^J, #9, 'a'..'z', 'A'..'Z', '0'..'9',
  414. '-', '''', '(', ')', '+', ',', '.', '/', ':',
  415. '=', '?', ';', '!', '*', '#', '@', '$', '_', '%'];
  416. CDStart = '<![CDATA[';
  417. CDEnd = ']]>';
  418. // --- Name Constants for the above enumeration types
  419. CPartType_Name : ARRAY [TPartType] OF AnsiString =
  420. ('', 'XML Prolog', 'Comment', 'PI',
  421. 'DTD Declaration', 'Start Tag', 'Empty Tag', 'End Tag',
  422. 'Text', 'CDATA');
  423. CValueType_Name : ARRAY [TValueType] OF AnsiString = ('Normal', 'Implied', 'Fixed', 'Default');
  424. CAttrDefault_Name : ARRAY [TAttrDefault] OF AnsiString = ('Default', 'Required', 'Implied', 'Fixed');
  425. CElemType_Name : ARRAY [TElemType] OF AnsiString = ('Empty', 'Any', 'Childs only', 'Mixed');
  426. CAttrType_Name : ARRAY [TAttrType] OF AnsiString = ('Unknown', 'CDATA',
  427. 'ID', 'IDREF', 'IDREFS',
  428. 'ENTITY', 'ENTITIES',
  429. 'NMTOKEN', 'NMTOKENS',
  430. 'Notation', 'Enumeration');
  431. FUNCTION ConvertWs (Source: AnsiString; PackWs: BOOLEAN) : AnsiString; // Convert WS to spaces #x20
  432. PROCEDURE SetStringSF (VAR S : AnsiString; BufferStart, BufferFinal : PAnsiChar); // SetString by Start/Final of buffer
  433. FUNCTION StrSFPas (Start, Finish : PAnsiChar) : AnsiString; // Convert buffer part to Pascal string
  434. FUNCTION TrimWs (Source : AnsiString) : AnsiString; // Trim Whitespace
  435. FUNCTION AnsiToUtf8 (Source : ANSISTRING) : AnsiString; // Convert Win-1252 to UTF-8
  436. FUNCTION Utf8ToAnsi (Source : AnsiString; UnknownChar : AnsiChar = '¿') : ANSISTRING; // Convert UTF-8 to Win-1252
  437. (*
  438. ===============================================================================================
  439. TCustomXmlScanner event based component wrapper for TXmlParser
  440. ===============================================================================================
  441. *)
  442. TYPE
  443. TCustomXmlScanner = CLASS;
  444. TXmlPrologEvent = PROCEDURE (Sender : TObject; XmlVersion, Encoding: AnsiString; Standalone : BOOLEAN) OF OBJECT;
  445. TCommentEvent = PROCEDURE (Sender : TObject; Comment : AnsiString) OF OBJECT;
  446. TPIEvent = PROCEDURE (Sender : TObject; Target, Content: AnsiString; Attributes : TAttrList) OF OBJECT;
  447. TDtdEvent = PROCEDURE (Sender : TObject; RootElementName : AnsiString) OF OBJECT;
  448. TStartTagEvent = PROCEDURE (Sender : TObject; TagName : AnsiString; Attributes : TAttrList) OF OBJECT;
  449. TEndTagEvent = PROCEDURE (Sender : TObject; TagName : AnsiString) OF OBJECT;
  450. TContentEvent = PROCEDURE (Sender : TObject; Content : AnsiString) OF OBJECT;
  451. TElementEvent = PROCEDURE (Sender : TObject; ElemDef : TElemDef) OF OBJECT;
  452. TEntityEvent = PROCEDURE (Sender : TObject; EntityDef : TEntityDef) OF OBJECT;
  453. TNotationEvent = PROCEDURE (Sender : TObject; NotationDef : TNotationDef) OF OBJECT;
  454. TErrorEvent = PROCEDURE (Sender : TObject; ErrorPos : PAnsiChar) OF OBJECT;
  455. TExternalEvent = PROCEDURE (Sender : TObject; SystemId, PublicId, NotationId : AnsiString;
  456. VAR Result : TXmlParser) OF OBJECT;
  457. TEncodingEvent = FUNCTION (Sender : TObject; CurrentEncoding, Source : AnsiString) : AnsiString OF OBJECT;
  458. TCustomXmlScanner = CLASS (TComponent)
  459. PROTECTED
  460. FXmlParser : TXmlParser;
  461. FOnXmlProlog : TXmlPrologEvent;
  462. FOnComment : TCommentEvent;
  463. FOnPI : TPIEvent;
  464. FOnDtdRead : TDtdEvent;
  465. FOnStartTag : TStartTagEvent;
  466. FOnEmptyTag : TStartTagEvent;
  467. FOnEndTag : TEndTagEvent;
  468. FOnContent : TContentEvent;
  469. FOnCData : TContentEvent;
  470. FOnElement : TElementEvent;
  471. FOnAttList : TElementEvent;
  472. FOnEntity : TEntityEvent;
  473. FOnNotation : TNotationEvent;
  474. FOnDtdError : TErrorEvent;
  475. FOnLoadExternal : TExternalEvent;
  476. FOnTranslateEncoding : TEncodingEvent;
  477. FStopParser : BOOLEAN;
  478. FUNCTION GetNormalize : BOOLEAN;
  479. PROCEDURE SetNormalize (Value : BOOLEAN);
  480. PROCEDURE WhenXmlProlog(XmlVersion, Encoding: AnsiString; Standalone : BOOLEAN); VIRTUAL;
  481. PROCEDURE WhenComment (Comment : AnsiString); VIRTUAL;
  482. PROCEDURE WhenPI (Target, Content: AnsiString; Attributes : TAttrList); VIRTUAL;
  483. PROCEDURE WhenDtdRead (RootElementName : AnsiString); VIRTUAL;
  484. PROCEDURE WhenStartTag (TagName : AnsiString; Attributes : TAttrList); VIRTUAL;
  485. PROCEDURE WhenEmptyTag (TagName : AnsiString; Attributes : TAttrList); VIRTUAL;
  486. PROCEDURE WhenEndTag (TagName : AnsiString); VIRTUAL;
  487. PROCEDURE WhenContent (Content : AnsiString); VIRTUAL;
  488. PROCEDURE WhenCData (Content : AnsiString); VIRTUAL;
  489. PROCEDURE WhenElement (ElemDef : TElemDef); VIRTUAL;
  490. PROCEDURE WhenAttList (ElemDef : TElemDef); VIRTUAL;
  491. PROCEDURE WhenEntity (EntityDef : TEntityDef); VIRTUAL;
  492. PROCEDURE WhenNotation (NotationDef : TNotationDef); VIRTUAL;
  493. PROCEDURE WhenDtdError (ErrorPos : PAnsiChar); VIRTUAL;
  494. PUBLIC
  495. CONSTRUCTOR Create (AOwner: TComponent); OVERRIDE;
  496. DESTRUCTOR Destroy; OVERRIDE;
  497. PROCEDURE LoadFromFile (Filename : TFilename); // Load XML Document from file
  498. PROCEDURE LoadFromBuffer (Buffer : PAnsiChar); // Load XML Document from buffer
  499. PROCEDURE SetBuffer (Buffer : PAnsiChar); // Refer to Buffer
  500. FUNCTION GetFilename : TFilename;
  501. PROCEDURE Execute; // Perform scanning
  502. PROTECTED
  503. PROPERTY XmlParser : TXmlParser READ FXmlParser;
  504. PROPERTY StopParser : BOOLEAN READ FStopParser WRITE FStopParser;
  505. PROPERTY Filename : TFilename READ GetFilename WRITE LoadFromFile;
  506. PROPERTY Normalize : BOOLEAN READ GetNormalize WRITE SetNormalize;
  507. PROPERTY OnXmlProlog : TXmlPrologEvent READ FOnXmlProlog WRITE FOnXmlProlog;
  508. PROPERTY OnComment : TCommentEvent READ FOnComment WRITE FOnComment;
  509. PROPERTY OnPI : TPIEvent READ FOnPI WRITE FOnPI;
  510. PROPERTY OnDtdRead : TDtdEvent READ FOnDtdRead WRITE FOnDtdRead;
  511. PROPERTY OnStartTag : TStartTagEvent READ FOnStartTag WRITE FOnStartTag;
  512. PROPERTY OnEmptyTag : TStartTagEvent READ FOnEmptyTag WRITE FOnEmptyTag;
  513. PROPERTY OnEndTag : TEndTagEvent READ FOnEndTag WRITE FOnEndTag;
  514. PROPERTY OnContent : TContentEvent READ FOnContent WRITE FOnContent;
  515. PROPERTY OnCData : TContentEvent READ FOnCData WRITE FOnCData;
  516. PROPERTY OnElement : TElementEvent READ FOnElement WRITE FOnElement;
  517. PROPERTY OnAttList : TElementEvent READ FOnAttList WRITE FOnAttList;
  518. PROPERTY OnEntity : TEntityEvent READ FOnEntity WRITE FOnEntity;
  519. PROPERTY OnNotation : TNotationEvent READ FOnNotation WRITE FOnNotation;
  520. PROPERTY OnDtdError : TErrorEvent READ FOnDtdError WRITE FOnDtdError;
  521. PROPERTY OnLoadExternal : TExternalEvent READ FOnLoadExternal WRITE FOnLoadExternal;
  522. PROPERTY OnTranslateEncoding : TEncodingEvent READ FOnTranslateEncoding WRITE FOnTranslateEncoding;
  523. END;
  524. (*
  525. ===============================================================================================
  526. IMPLEMENTATION
  527. ===============================================================================================
  528. *)
  529. IMPLEMENTATION
  530. (*
  531. ===============================================================================================
  532. Unicode and UTF-8 stuff
  533. ===============================================================================================
  534. *)
  535. CONST
  536. // --- Character Translation Table for Unicode <-> Win-1252
  537. WIN1252_UNICODE : ARRAY [$00..$FF] OF WORD = (
  538. $0000, $0001, $0002, $0003, $0004, $0005, $0006, $0007, $0008, $0009,
  539. $000A, $000B, $000C, $000D, $000E, $000F, $0010, $0011, $0012, $0013,
  540. $0014, $0015, $0016, $0017, $0018, $0019, $001A, $001B, $001C, $001D,
  541. $001E, $001F, $0020, $0021, $0022, $0023, $0024, $0025, $0026, $0027,
  542. $0028, $0029, $002A, $002B, $002C, $002D, $002E, $002F, $0030, $0031,
  543. $0032, $0033, $0034, $0035, $0036, $0037, $0038, $0039, $003A, $003B,
  544. $003C, $003D, $003E, $003F, $0040, $0041, $0042, $0043, $0044, $0045,
  545. $0046, $0047, $0048, $0049, $004A, $004B, $004C, $004D, $004E, $004F,
  546. $0050, $0051, $0052, $0053, $0054, $0055, $0056, $0057, $0058, $0059,
  547. $005A, $005B, $005C, $005D, $005E, $005F, $0060, $0061, $0062, $0063,
  548. $0064, $0065, $0066, $0067, $0068, $0069, $006A, $006B, $006C, $006D,
  549. $006E, $006F, $0070, $0071, $0072, $0073, $0074, $0075, $0076, $0077,
  550. $0078, $0079, $007A, $007B, $007C, $007D, $007E, $007F,
  551. $20AC, $0081, $201A, $0192, $201E, $2026, $2020, $2021, $02C6, $2030,
  552. $0160, $2039, $0152, $008D, $017D, $008F, $0090, $2018, $2019, $201C,
  553. $201D, $2022, $2013, $2014, $02DC, $2122, $0161, $203A, $0153, $009D,
  554. $017E, $0178, $00A0, $00A1, $00A2, $00A3, $00A4, $00A5, $00A6, $00A7,
  555. $00A8, $00A9, $00AA, $00AB, $00AC, $00AD, $00AE, $00AF, $00B0, $00B1,
  556. $00B2, $00B3, $00B4, $00B5, $00B6, $00B7, $00B8, $00B9, $00BA, $00BB,
  557. $00BC, $00BD, $00BE, $00BF, $00C0, $00C1, $00C2, $00C3, $00C4, $00C5,
  558. $00C6, $00C7, $00C8, $00C9, $00CA, $00CB, $00CC, $00CD, $00CE, $00CF,
  559. $00D0, $00D1, $00D2, $00D3, $00D4, $00D5, $00D6, $00D7, $00D8, $00D9,
  560. $00DA, $00DB, $00DC, $00DD, $00DE, $00DF, $00E0, $00E1, $00E2, $00E3,
  561. $00E4, $00E5, $00E6, $00E7, $00E8, $00E9, $00EA, $00EB, $00EC, $00ED,
  562. $00EE, $00EF, $00F0, $00F1, $00F2, $00F3, $00F4, $00F5, $00F6, $00F7,
  563. $00F8, $00F9, $00FA, $00FB, $00FC, $00FD, $00FE, $00FF);
  564. (* UTF-8 (somewhat simplified)
  565. -----
  566. Character Range Byte sequence
  567. --------------- -------------------------- (x=Bits from original character)
  568. $0000..$007F 0xxxxxxx
  569. $0080..$07FF 110xxxxx 10xxxxxx
  570. $8000..$FFFF 1110xxxx 10xxxxxx 10xxxxxx
  571. Example
  572. --------
  573. Transforming the Unicode character U+00E4 LATIN SMALL LETTER A WITH DIAERESIS ("ä"):
  574. ISO-8859-1, Decimal 228
  575. Win1252, Hex $E4
  576. ANSI Bin 1110 0100
  577. abcd efgh
  578. UTF-8 Binary 1100xxab 10cdefgh
  579. Binary 11000011 10100100
  580. Hex $C3 $A4
  581. Decimal 195 164
  582. ANSI Ã ¤ *)
  583. FUNCTION AnsiToUtf8 (Source : ANSISTRING) : AnsiString;
  584. (* Converts the given Windows ANSI (Win1252) string to UTF-8. *)
  585. VAR
  586. I : INTEGER; // Loop counter
  587. U : WORD; // Current Unicode value
  588. Len : INTEGER; // Current real length of "Result" string
  589. BEGIN
  590. SetLength (Result, Length (Source) * 3); // Worst case
  591. Len := 0;
  592. FOR I := 1 TO Length (Source) DO BEGIN
  593. U := WIN1252_UNICODE [ORD (Source [I])];
  594. CASE U OF
  595. $0000..$007F : BEGIN
  596. INC (Len);
  597. Result [Len] := CHR (U);
  598. END;
  599. $0080..$07FF : BEGIN
  600. INC (Len);
  601. Result [Len] := CHR ($C0 OR (U SHR 6));
  602. INC (Len);
  603. Result [Len] := CHR ($80 OR (U AND $3F));
  604. END;
  605. $0800..$FFFF : BEGIN
  606. INC (Len);
  607. Result [Len] := CHR ($E0 OR (U SHR 12));
  608. INC (Len);
  609. Result [Len] := CHR ($80 OR ((U SHR 6) AND $3F));
  610. INC (Len);
  611. Result [Len] := CHR ($80 OR (U AND $3F));
  612. END;
  613. END;
  614. END;
  615. SetLength (Result, Len);
  616. END;
  617. FUNCTION Utf8ToAnsi (Source : AnsiString; UnknownChar : AnsiChar = '¿') : ANSISTRING;
  618. (* Converts the given UTF-8 string to Windows ANSI (Win-1252).
  619. If a character can not be converted, the "UnknownChar" is inserted. *)
  620. VAR
  621. SourceLen : INTEGER; // Length of Source string
  622. I, K : INTEGER;
  623. A : BYTE; // Current ANSI character value
  624. U : WORD;
  625. Ch : AnsiChar; // Dest AnsiChar
  626. Len : INTEGER; // Current real length of "Result" Sring
  627. BEGIN
  628. SourceLen := Length (Source);
  629. SetLength (Result, SourceLen); // Enough room to live
  630. Len := 0;
  631. I := 1;
  632. WHILE I <= SourceLen DO BEGIN
  633. A := ORD (Source [I]);
  634. IF A < $80 THEN BEGIN // Range $0000..$007F
  635. INC (Len);
  636. Result [Len] := Source [I];
  637. INC (I);
  638. END
  639. ELSE BEGIN // Determine U, Inc I
  640. IF (A AND $E0 = $C0) AND (I < SourceLen) THEN BEGIN // Range $0080..$07FF
  641. U := (WORD (A AND $1F) SHL 6) OR (ORD (Source [I+1]) AND $3F);
  642. INC (I, 2);
  643. END
  644. ELSE IF (A AND $F0 = $E0) AND (I < SourceLen-1) THEN BEGIN // Range $0800..$FFFF
  645. U := (WORD (A AND $0F) SHL 12) OR
  646. (WORD (ORD (Source [I+1]) AND $3F) SHL 6) OR
  647. ( ORD (Source [I+2]) AND $3F);
  648. INC (I, 3);
  649. END
  650. ELSE BEGIN // Unknown/unsupported
  651. INC (I);
  652. FOR K := 7 DOWNTO 0 DO
  653. IF A AND (1 SHL K) = 0 THEN BEGIN
  654. INC (I, (A SHR (K+1))-1);
  655. BREAK;
  656. END;
  657. U := WIN1252_UNICODE [ORD (UnknownChar)];
  658. END;
  659. Ch := UnknownChar; // Retrieve ANSI AnsiChar
  660. FOR A := $00 TO $FF DO
  661. IF WIN1252_UNICODE [A] = U THEN BEGIN
  662. Ch := CHR (A);
  663. BREAK;
  664. END;
  665. INC (Len);
  666. Result [Len] := Ch;
  667. END;
  668. END;
  669. SetLength (Result, Len);
  670. END;
  671. (*
  672. ===============================================================================================
  673. Helper Functions
  674. ===============================================================================================
  675. *)
  676. FUNCTION DelChars (Source : AnsiString; CharsToDelete : TCharset) : AnsiString;
  677. // Delete all "CharsToDelete" from the string
  678. VAR
  679. I : INTEGER;
  680. BEGIN
  681. Result := Source;
  682. FOR I := Length (Result) DOWNTO 1 DO
  683. IF Result [I] IN CharsToDelete THEN
  684. Delete (Result, I, 1);
  685. END;
  686. FUNCTION TrimWs (Source : AnsiString) : AnsiString;
  687. // Trimms off Whitespace characters from both ends of the string
  688. VAR
  689. I : INTEGER;
  690. BEGIN
  691. // --- Trim Left
  692. I := 1;
  693. WHILE (I <= Length (Source)) AND (Source [I] IN CWhitespace) DO
  694. INC (I);
  695. Result := Copy (Source, I, MaxInt);
  696. // --- Trim Right
  697. I := Length (Result);
  698. WHILE (I > 1) AND (Result [I] IN CWhitespace) DO
  699. DEC (I);
  700. Delete (Result, I+1, Length (Result)-I);
  701. END;
  702. FUNCTION ConvertWs (Source: AnsiString; PackWs: BOOLEAN) : AnsiString;
  703. // Converts all Whitespace characters to the Space #x20 character
  704. // If "PackWs" is true, contiguous Whitespace characters are packed to one
  705. VAR
  706. I : INTEGER;
  707. BEGIN
  708. Result := Source;
  709. FOR I := Length (Result) DOWNTO 1 DO
  710. IF (Result [I] IN CWhitespace) THEN
  711. IF PackWs AND (I > 1) AND (Result [I-1] IN CWhitespace)
  712. THEN Delete (Result, I, 1)
  713. ELSE Result [I] := #32;
  714. END;
  715. PROCEDURE SetStringSF (VAR S : AnsiString; BufferStart, BufferFinal : PAnsiChar);
  716. BEGIN
  717. SetString (S, BufferStart, BufferFinal-BufferStart+1);
  718. END;
  719. FUNCTION StrLPas (Start : PAnsiChar; Len : INTEGER) : AnsiString;
  720. BEGIN
  721. SetString (Result, Start, Len);
  722. END;
  723. FUNCTION StrSFPas (Start, Finish : PAnsiChar) : AnsiString;
  724. BEGIN
  725. SetString (Result, Start, Finish-Start+1);
  726. END;
  727. FUNCTION StrScanE (CONST Source : PAnsiChar; CONST CharToScanFor : AnsiChar) : PAnsiChar;
  728. // If "CharToScanFor" is not found, StrScanE returns the last AnsiChar of the
  729. // buffer instead of NIL
  730. BEGIN
  731. Result := StrScan (Source, CharToScanFor);
  732. IF Result = NIL THEN
  733. Result := StrEnd (Source)-1;
  734. END;
  735. PROCEDURE ExtractName (Start : PAnsiChar; Terminators : TCharset; VAR Final : PAnsiChar);
  736. (* Extracts the complete Name beginning at "Start".
  737. It is assumed that the name is contained in Markup, so the '>' character is
  738. always a Termination.
  739. Start: IN Pointer to first AnsiChar of name. Is always considered to be valid
  740. Terminators: IN Characters which terminate the name
  741. Final: OUT Pointer to last AnsiChar of name *)
  742. BEGIN
  743. Final := Start+1;
  744. Include (Terminators, #0);
  745. Include (Terminators, '>');
  746. WHILE NOT (Final^ IN Terminators) DO
  747. INC (Final);
  748. DEC (Final);
  749. END;
  750. PROCEDURE ExtractQuote (Start : PAnsiChar; VAR Content : AnsiString; VAR Final : PAnsiChar);
  751. (* Extract a string which is contained in single or double Quotes.
  752. Start: IN Pointer to opening quote
  753. Content: OUT The quoted string
  754. Final: OUT Pointer to closing quote *)
  755. BEGIN
  756. Final := StrScan (Start+1, Start^);
  757. IF Final = NIL THEN BEGIN
  758. Final := StrEnd (Start+1)-1;
  759. SetString (Content, Start+1, Final-Start);
  760. END
  761. ELSE
  762. SetString (Content, Start+1, Final-1-Start);
  763. END;
  764. (*
  765. ===============================================================================================
  766. TEntityStackNode
  767. This Node is pushed to the "Entity Stack" whenever the parser parses entity replacement text.
  768. The "Instance" field holds the Instance pointer of an External Entity buffer. When it is
  769. popped, the Instance is freed.
  770. The "Encoding" field holds the name of the Encoding. External Parsed Entities may have
  771. another encoding as the document entity (XmlSpec 4.3.3). So when there is an "<?xml" PI
  772. found in the stream (= Text Declaration at the beginning of external parsed entities), the
  773. Encoding found there is used for the External Entity (is assigned to TXmlParser.CurEncoding)
  774. Default Encoding is for the Document Entity is UTF-8. It is assumed that External Entities
  775. have the same Encoding as the Document Entity, unless they carry a Text Declaration.
  776. ===============================================================================================
  777. *)
  778. TYPE
  779. TEntityStackNode = CLASS
  780. Instance : TObject;
  781. Encoding : AnsiString;
  782. LastPos : PAnsiChar;
  783. END;
  784. (*
  785. ===============================================================================================
  786. TEntityStack
  787. For nesting of Entities.
  788. When there is an entity reference found in the data stream, the corresponding entity
  789. definition is searched and the current position is pushed to this stack.
  790. From then on, the program scans the entitiy replacement text as if it were normal content.
  791. When the parser reaches the end of an entity, the current position is popped off the
  792. stack again.
  793. ===============================================================================================
  794. *)
  795. CONSTRUCTOR TEntityStack.Create (TheOwner : TXmlParser);
  796. BEGIN
  797. INHERITED Create;
  798. Owner := TheOwner;
  799. END;
  800. PROCEDURE TEntityStack.Push (LastPos : PAnsiChar);
  801. BEGIN
  802. Push (NIL, LastPos);
  803. END;
  804. PROCEDURE TEntityStack.Push (Instance : TObject; LastPos : PAnsiChar);
  805. VAR
  806. ESN : TEntityStackNode;
  807. BEGIN
  808. ESN := TEntityStackNode.Create;
  809. ESN.Instance := Instance;
  810. ESN.Encoding := Owner.FCurEncoding; // Save current Encoding
  811. ESN.LastPos := LastPos;
  812. Add (ESN);
  813. END;
  814. FUNCTION TEntityStack.Pop : PAnsiChar;
  815. VAR
  816. ESN : TEntityStackNode;
  817. BEGIN
  818. IF Count > 0 THEN BEGIN
  819. ESN := TEntityStackNode (Items [Count-1]);
  820. Result := ESN.LastPos;
  821. IF ESN.Instance <> NIL THEN
  822. ESN.Instance.Free;
  823. IF ESN.Encoding <> '' THEN
  824. Owner.FCurEncoding := ESN.Encoding; // Restore current Encoding
  825. Delete (Count-1);
  826. END
  827. ELSE
  828. Result := NIL;
  829. END;
  830. (*
  831. ===============================================================================================
  832. TExternalID
  833. -----------
  834. XmlSpec 4.2.2: ExternalID ::= 'SYSTEM' S SystemLiteral |
  835. 'PUBLIC' S PubidLiteral S SystemLiteral
  836. XmlSpec 4.7: PublicID ::= 'PUBLIC' S PubidLiteral
  837. SystemLiteral and PubidLiteral are quoted
  838. ===============================================================================================
  839. *)
  840. TYPE
  841. TExternalID = CLASS
  842. PublicId : AnsiString;
  843. SystemId : AnsiString;
  844. Final : PAnsiChar;
  845. CONSTRUCTOR Create (Start : PAnsiChar);
  846. END;
  847. CONSTRUCTOR TExternalID.Create (Start : PAnsiChar);
  848. BEGIN
  849. INHERITED Create;
  850. Final := Start;
  851. IF StrLComp (Start, 'SYSTEM', 6) = 0 THEN BEGIN
  852. WHILE NOT (Final^ IN (CQuoteChar + [#0, '>', '['])) DO INC (Final);
  853. IF NOT (Final^ IN CQuoteChar) THEN EXIT;
  854. ExtractQuote (Final, SystemID, Final);
  855. END
  856. ELSE IF StrLComp (Start, 'PUBLIC', 6) = 0 THEN BEGIN
  857. WHILE NOT (Final^ IN (CQuoteChar + [#0, '>', '['])) DO INC (Final);
  858. IF NOT (Final^ IN CQuoteChar) THEN EXIT;
  859. ExtractQuote (Final, PublicID, Final);
  860. INC (Final);
  861. WHILE NOT (Final^ IN (CQuoteChar + [#0, '>', '['])) DO INC (Final);
  862. IF NOT (Final^ IN CQuoteChar) THEN EXIT;
  863. ExtractQuote (Final, SystemID, Final);
  864. END;
  865. END;
  866. (*
  867. ===============================================================================================
  868. TXmlParser
  869. ===============================================================================================
  870. *)
  871. CONSTRUCTOR TXmlParser.Create;
  872. BEGIN
  873. INHERITED Create;
  874. FBuffer := NIL;
  875. FBufferSize := 0;
  876. Elements := TElemList.Create;
  877. Entities := TNvpList.Create;
  878. ParEntities := TNvpList.Create;
  879. Notations := TNvpList.Create;
  880. CurAttr := TAttrList.Create;
  881. EntityStack := TEntityStack.Create (Self);
  882. Clear;
  883. END;
  884. DESTRUCTOR TXmlParser.Destroy;
  885. BEGIN
  886. Clear;
  887. Elements.Free;
  888. Entities.Free;
  889. ParEntities.Free;
  890. Notations.Free;
  891. CurAttr.Free;
  892. EntityStack.Free;
  893. INHERITED Destroy;
  894. END;
  895. PROCEDURE TXmlParser.Clear;
  896. // Free Buffer and clear all object attributes
  897. BEGIN
  898. IF (FBufferSize > 0) AND (FBuffer <> NIL) THEN
  899. FreeMem (FBuffer);
  900. FBuffer := NIL;
  901. FBufferSize := 0;
  902. FSource := '';
  903. FXmlVersion := '';
  904. FEncoding := '';
  905. FStandalone := FALSE;
  906. FRootName := '';
  907. FDtdcFinal := NIL;
  908. FNormalize := TRUE;
  909. Elements.Clear;
  910. Entities.Clear;
  911. ParEntities.Clear;
  912. Notations.Clear;
  913. CurAttr.Clear;
  914. EntityStack.Clear;
  915. END;
  916. FUNCTION TXmlParser.LoadFromFile (Filename : AnsiString; FileMode : INTEGER = fmOpenRead OR fmShareDenyNone) : BOOLEAN;
  917. // Loads Document from given file
  918. // Returns TRUE if successful
  919. VAR
  920. f : FILE;
  921. ReadIn : INTEGER;
  922. OldFileMode : INTEGER;
  923. BEGIN
  924. Result := FALSE;
  925. Clear;
  926. // --- Open File
  927. OldFileMode := SYSTEM.FileMode;
  928. TRY
  929. SYSTEM.FileMode := FileMode;
  930. TRY
  931. AssignFile (f, Filename);
  932. Reset (f, 1);
  933. EXCEPT
  934. EXIT;
  935. END;
  936. TRY
  937. // --- Allocate Memory
  938. TRY
  939. FBufferSize := Filesize (f) + 1;
  940. GetMem (FBuffer, FBufferSize);
  941. EXCEPT
  942. Clear;
  943. EXIT;
  944. END;
  945. // --- Read File
  946. TRY
  947. BlockRead (f, FBuffer^, FBufferSize, ReadIn);
  948. (FBuffer+ReadIn)^ := #0; // NULL termination
  949. EXCEPT
  950. Clear;
  951. EXIT;
  952. END;
  953. FINALLY
  954. CloseFile (f);
  955. END;
  956. FSource := Filename;
  957. Result := TRUE;
  958. FINALLY
  959. SYSTEM.FileMode := OldFileMode;
  960. END;
  961. END;
  962. FUNCTION TXmlParser.LoadFromBuffer (Buffer : PAnsiChar) : BOOLEAN;
  963. // Loads Document from another buffer
  964. // Returns TRUE if successful
  965. // The "Source" property becomes '<MEM>' if successful
  966. BEGIN
  967. Result := FALSE;
  968. Clear;
  969. FBufferSize := StrLen (Buffer) + 1;
  970. TRY
  971. GetMem (FBuffer, FBufferSize);
  972. EXCEPT
  973. Clear;
  974. EXIT;
  975. END;
  976. StrCopy (FBuffer, Buffer);
  977. FSource := '<MEM>';
  978. Result := TRUE;
  979. END;
  980. PROCEDURE TXmlParser.SetBuffer (Buffer : PAnsiChar); // References another buffer
  981. BEGIN
  982. Clear;
  983. FBuffer := Buffer;
  984. FBufferSize := 0;
  985. FSource := '<REFERENCE>';
  986. END;
  987. //-----------------------------------------------------------------------------------------------
  988. // Scanning through the document
  989. //-----------------------------------------------------------------------------------------------
  990. PROCEDURE TXmlParser.StartScan;
  991. BEGIN
  992. CurPartType := ptNone;
  993. CurName := '';
  994. CurContent := '';
  995. CurStart := NIL;
  996. CurFinal := NIL;
  997. CurAttr.Clear;
  998. EntityStack.Clear;
  999. END;
  1000. FUNCTION TXmlParser.Scan : BOOLEAN;
  1001. // Scans the next Part
  1002. // Returns TRUE if a part could be found, FALSE if there is no part any more
  1003. //
  1004. // "IsDone" can be set to FALSE by AnalyzeText in order to go to the next part
  1005. // if there is no Content due to normalization
  1006. VAR
  1007. IsDone : BOOLEAN;
  1008. BEGIN
  1009. REPEAT
  1010. IsDone := TRUE;
  1011. // --- Start of next Part
  1012. IF CurStart = NIL
  1013. THEN CurStart := DocBuffer
  1014. ELSE CurStart := CurFinal+1;
  1015. CurFinal := CurStart;
  1016. // --- End of Document of Pop off a new part from the Entity stack?
  1017. IF CurStart^ = #0 THEN
  1018. CurStart := EntityStack.Pop;
  1019. // --- No Document or End Of Document: Terminate Scan
  1020. IF (CurStart = NIL) OR (CurStart^ = #0) THEN BEGIN
  1021. CurStart := StrEnd (DocBuffer);
  1022. CurFinal := CurStart-1;
  1023. EntityStack.Clear;
  1024. Result := FALSE;
  1025. EXIT;
  1026. END;
  1027. IF (StrLComp (CurStart, '<?xml', 5) = 0) AND
  1028. ((CurStart+5)^ IN CWhitespace) THEN AnalyzeProlog // XML Declaration, Text Declaration
  1029. ELSE IF StrLComp (CurStart, '<?', 2) = 0 THEN AnalyzePI (CurStart, CurFinal) // PI
  1030. ELSE IF StrLComp (CurStart, '<!--', 4) = 0 THEN AnalyzeComment (CurStart, CurFinal) // Comment
  1031. ELSE IF StrLComp (CurStart, '<!DOCTYPE', 9) = 0 THEN AnalyzeDtdc // DTDc
  1032. ELSE IF StrLComp (CurStart, CDStart, Length (CDStart)) = 0 THEN AnalyzeCdata // CDATA Section
  1033. ELSE IF StrLComp (CurStart, '<', 1) = 0 THEN AnalyzeTag // Start-Tag, End-Tag, Empty-Element-Tag
  1034. ELSE AnalyzeText (IsDone); // Text Content
  1035. UNTIL IsDone;
  1036. Result := TRUE;
  1037. END;
  1038. PROCEDURE TXmlParser.AnalyzeProlog;
  1039. // Analyze XML Prolog or Text Declaration
  1040. VAR
  1041. F : PAnsiChar;
  1042. BEGIN
  1043. CurAttr.Analyze (CurStart+5, F);
  1044. IF EntityStack.Count = 0 THEN BEGIN
  1045. FXmlVersion := CurAttr.Value ('version');
  1046. FEncoding := CurAttr.Value ('encoding');
  1047. FStandalone := CurAttr.Value ('standalone') = 'yes';
  1048. END;
  1049. CurFinal := StrPos (F, '?>');
  1050. IF CurFinal <> NIL
  1051. THEN INC (CurFinal)
  1052. ELSE CurFinal := StrEnd (CurStart)-1;
  1053. FCurEncoding := AnsiUpperCase (CurAttr.Value ('encoding'));
  1054. IF FCurEncoding = '' THEN
  1055. FCurEncoding := 'UTF-8'; // Default XML Encoding is UTF-8
  1056. CurPartType := ptXmlProlog;
  1057. CurName := '';
  1058. CurContent := '';
  1059. END;
  1060. PROCEDURE TXmlParser.AnalyzeComment (Start : PAnsiChar; VAR Final : PAnsiChar);
  1061. // Analyze Comments
  1062. BEGIN
  1063. Final := StrPos (Start+4, '-->');
  1064. IF Final = NIL
  1065. THEN Final := StrEnd (Start)-1
  1066. ELSE INC (Final, 2);
  1067. CurPartType := ptComment;
  1068. END;
  1069. PROCEDURE TXmlParser.AnalyzePI (Start : PAnsiChar; VAR Final : PAnsiChar);
  1070. // Analyze Processing Instructions (PI)
  1071. // This is also called for Character
  1072. VAR
  1073. F : PAnsiChar;
  1074. BEGIN
  1075. CurPartType := ptPI;
  1076. Final := StrPos (Start+2, '?>');
  1077. IF Final = NIL
  1078. THEN Final := StrEnd (Start)-1
  1079. ELSE INC (Final);
  1080. ExtractName (Start+2, CWhitespace + ['?', '>'], F);
  1081. SetStringSF (CurName, Start+2, F);
  1082. SetStringSF (CurContent, F+1, Final-2);
  1083. CurAttr.Analyze (F+1, F);
  1084. END;
  1085. PROCEDURE TXmlParser.AnalyzeDtdc;
  1086. (* Analyze Document Type Declaration
  1087. doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
  1088. markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
  1089. PEReference ::= '%' Name ';'
  1090. elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
  1091. AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
  1092. EntityDecl ::= '<!ENTITY' S Name S EntityDef S? '>' |
  1093. '<!ENTITY' S '%' S Name S PEDef S? '>'
  1094. NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
  1095. PI ::= '<?' PITarget (S (AnsiChar* - (AnsiChar* '?>' AnsiChar* )))? '?>'
  1096. Comment ::= '<!--' ((AnsiChar - '-') | ('-' (AnsiChar - '-')))* '-->' *)
  1097. TYPE
  1098. TPhase = (phName, phDtd, phInternal, phFinishing);
  1099. VAR
  1100. Phase : TPhase;
  1101. F : PAnsiChar;
  1102. ExternalID : TExternalID;
  1103. ExternalDTD : TXmlParser;
  1104. DER : TDtdElementRec;
  1105. BEGIN
  1106. DER.Start := CurStart;
  1107. EntityStack.Clear; // Clear stack for Parameter Entities
  1108. CurPartType := ptDtdc;
  1109. // --- Don't read DTDc twice
  1110. IF FDtdcFinal <> NIL THEN BEGIN
  1111. CurFinal := FDtdcFinal;
  1112. EXIT;
  1113. END;
  1114. // --- Scan DTDc
  1115. CurFinal := CurStart + 9; // First AnsiChar after '<!DOCTYPE'
  1116. Phase := phName;
  1117. REPEAT
  1118. CASE CurFinal^ OF
  1119. '%' : BEGIN
  1120. PushPE (CurFinal);
  1121. CONTINUE;
  1122. END;
  1123. #0 : IF EntityStack.Count = 0 THEN
  1124. BREAK
  1125. ELSE BEGIN
  1126. CurFinal := EntityStack.Pop;
  1127. CONTINUE;
  1128. END;
  1129. '[' : BEGIN
  1130. Phase := phInternal;
  1131. AnalyzeDtdElements (CurFinal+1, CurFinal);
  1132. CONTINUE;
  1133. END;
  1134. ']' : Phase := phFinishing;
  1135. '>' : BREAK;
  1136. ELSE IF NOT (CurFinal^ IN CWhitespace) THEN BEGIN
  1137. CASE Phase OF
  1138. phName : IF (CurFinal^ IN CNameStart) THEN BEGIN
  1139. ExtractName (CurFinal, CWhitespace + ['[', '>'], F);
  1140. SetStringSF (FRootName, CurFinal, F);
  1141. CurFinal := F;
  1142. Phase := phDtd;
  1143. END;
  1144. phDtd : IF (StrLComp (CurFinal, 'SYSTEM', 6) = 0) OR
  1145. (StrLComp (CurFinal, 'PUBLIC', 6) = 0) THEN BEGIN
  1146. ExternalID := TExternalID.Create (CurFinal);
  1147. ExternalDTD := LoadExternalEntity (ExternalId.SystemId, ExternalID.PublicId, '');
  1148. F := StrPos (ExternalDtd.DocBuffer, '<!');
  1149. IF F <> NIL THEN
  1150. AnalyzeDtdElements (F, F);
  1151. ExternalDTD.Free;
  1152. CurFinal := ExternalID.Final;
  1153. ExternalID.Free;
  1154. END;
  1155. ELSE BEGIN
  1156. DER.ElementType := deError;
  1157. DER.Pos := CurFinal;
  1158. DER.Final := CurFinal;
  1159. DtdElementFound (DER);
  1160. END;
  1161. END;
  1162. END;
  1163. END;
  1164. INC (CurFinal);
  1165. UNTIL FALSE;
  1166. CurPartType := ptDtdc;
  1167. CurName := '';
  1168. CurContent := '';
  1169. // It is an error in the document if "EntityStack" is not empty now
  1170. IF EntityStack.Count > 0 THEN BEGIN
  1171. DER.ElementType := deError;
  1172. DER.Final := CurFinal;
  1173. DER.Pos := CurFinal;
  1174. DtdElementFound (DER);
  1175. END;
  1176. EntityStack.Clear; // Clear stack for General Entities
  1177. FDtdcFinal := CurFinal;
  1178. END;
  1179. PROCEDURE TXmlParser.AnalyzeDtdElements (Start : PAnsiChar; VAR Final : PAnsiChar);
  1180. // Analyze the "Elements" of a DTD contained in the external or
  1181. // internal DTD subset.
  1182. VAR
  1183. DER : TDtdElementRec;
  1184. BEGIN
  1185. Final := Start;
  1186. REPEAT
  1187. CASE Final^ OF
  1188. '%' : BEGIN
  1189. PushPE (Final);
  1190. CONTINUE;
  1191. END;
  1192. #0 : IF EntityStack.Count = 0 THEN
  1193. BREAK
  1194. ELSE BEGIN
  1195. CurFinal := EntityStack.Pop;
  1196. CONTINUE;
  1197. END;
  1198. ']',
  1199. '>' : BREAK;
  1200. '<' : IF StrLComp (Final, '<!ELEMENT', 9) = 0 THEN AnalyzeElementDecl (Final, Final)
  1201. ELSE IF StrLComp (Final, '<!ATTLIST', 9) = 0 THEN AnalyzeAttListDecl (Final, Final)
  1202. ELSE IF StrLComp (Final, '<!ENTITY', 8) = 0 THEN AnalyzeEntityDecl (Final, Final)
  1203. ELSE IF StrLComp (Final, '<!NOTATION', 10) = 0 THEN AnalyzeNotationDecl (Final, Final)
  1204. ELSE IF StrLComp (Final, '<?', 2) = 0 THEN BEGIN // PI in DTD
  1205. DER.ElementType := dePI;
  1206. DER.Start := Final;
  1207. AnalyzePI (Final, Final);
  1208. DER.Target := PAnsiChar (CurName);
  1209. DER.Content := PAnsiChar (CurContent);
  1210. DER.AttrList := CurAttr;
  1211. DER.Final := Final;
  1212. DtdElementFound (DER);
  1213. END
  1214. ELSE IF StrLComp (Final, '<!--', 4) = 0 THEN BEGIN // Comment in DTD
  1215. DER.ElementType := deComment;
  1216. DER.Start := Final;
  1217. AnalyzeComment (Final, Final);
  1218. DER.Final := Final;
  1219. DtdElementFound (DER);
  1220. END
  1221. ELSE BEGIN
  1222. DER.ElementType := deError;
  1223. DER.Start := Final;
  1224. DER.Pos := Final;
  1225. DER.Final := Final;
  1226. DtdElementFound (DER);
  1227. END;
  1228. END;
  1229. INC (Final);
  1230. UNTIL FALSE;
  1231. END;
  1232. PROCEDURE TXmlParser.AnalyzeTag;
  1233. // Analyze Tags
  1234. VAR
  1235. S, F : PAnsiChar;
  1236. Attr : TAttr;
  1237. ElemDef : TElemDef;
  1238. AttrDef : TAttrDef;
  1239. I : INTEGER;
  1240. BEGIN
  1241. CurPartType := ptStartTag;
  1242. S := CurStart+1;
  1243. IF S^ = '/' THEN BEGIN
  1244. CurPartType := ptEndTag;
  1245. INC (S);
  1246. END;
  1247. ExtractName (S, CWhitespace + ['/'], F);
  1248. SetStringSF (CurName, S, F);
  1249. CurAttr.Analyze (F+1, CurFinal);
  1250. IF CurFinal^ = '/' THEN BEGIN
  1251. CurPartType := ptEmptyTag;
  1252. END;
  1253. CurFinal := StrScanE (CurFinal, '>');
  1254. // --- Set Default Attribute values for nonexistent attributes
  1255. IF (CurPartType = ptStartTag) OR (CurPartType = ptEmptyTag) THEN BEGIN
  1256. ElemDef := Elements.Node (CurName);
  1257. IF ElemDef <> NIL THEN BEGIN
  1258. FOR I := 0 TO ElemDef.Count-1 DO BEGIN
  1259. AttrDef := TAttrDef (ElemDef [I]);
  1260. Attr := TAttr (CurAttr.Node (AttrDef.Name));
  1261. IF (Attr = NIL) AND (AttrDef.Value <> '') THEN BEGIN
  1262. Attr := TAttr.Create (AttrDef.Name, AttrDef.Value);
  1263. Attr.ValueType := vtDefault;
  1264. CurAttr.Add (Attr);
  1265. END;
  1266. IF Attr <> NIL THEN BEGIN
  1267. CASE AttrDef.DefaultType OF
  1268. adDefault : ;
  1269. adRequired : ; // -!- It is an error in the document if "Attr.Value" is an empty string
  1270. adImplied : Attr.ValueType := vtImplied;
  1271. adFixed : BEGIN
  1272. Attr.ValueType := vtFixed;
  1273. Attr.Value := AttrDef.Value;
  1274. END;
  1275. END;
  1276. Attr.AttrType := AttrDef.AttrType;
  1277. END;
  1278. END;
  1279. END;
  1280. // --- Normalize Attribute Values. XmlSpec:
  1281. // - a character reference is processed by appending the referenced character to the attribute value
  1282. // - an entity reference is processed by recursively processing the replacement text of the entity
  1283. // - a whitespace character (#x20, #xD, #xA, #x9) is processed by appending #x20 to the normalized value,
  1284. // except that only a single #x20 is appended for a "#xD#xA" sequence that is part of an external
  1285. // parsed entity or the literal entity value of an internal parsed entity
  1286. // - other characters are processed by appending them to the normalized value
  1287. // If the declared value is not CDATA, then the XML processor must further process the
  1288. // normalized attribute value by discarding any leading and trailing space (#x20) characters,
  1289. // and by replacing sequences of space (#x20) characters by a single space (#x20) character.
  1290. // All attributes for which no declaration has been read should be treated by a
  1291. // non-validating parser as if declared CDATA.
  1292. // !!! The XML 1.0 SE specification is somewhat different here
  1293. // This code does not conform exactly to this specification
  1294. FOR I := 0 TO CurAttr.Count-1 DO
  1295. WITH TAttr (CurAttr [I]) DO BEGIN
  1296. ReplaceGeneralEntities (Value);
  1297. ReplaceCharacterEntities (Value);
  1298. IF (AttrType <> atCData) AND (AttrType <> atUnknown)
  1299. THEN Value := TranslateEncoding (TrimWs (ConvertWs (Value, TRUE)))
  1300. ELSE Value := TranslateEncoding (ConvertWs (Value, FALSE));
  1301. END;
  1302. END;
  1303. END;
  1304. PROCEDURE TXmlParser.AnalyzeCData;
  1305. // Analyze CDATA Sections
  1306. BEGIN
  1307. CurPartType := ptCData;
  1308. CurFinal := StrPos (CurStart, CDEnd);
  1309. IF CurFinal = NIL THEN BEGIN
  1310. CurFinal := StrEnd (CurStart)-1;
  1311. CurContent := TranslateEncoding (StrPas (CurStart+Length (CDStart)));
  1312. END
  1313. ELSE BEGIN
  1314. SetStringSF (CurContent, CurStart+Length (CDStart), CurFinal-1);
  1315. INC (CurFinal, Length (CDEnd)-1);
  1316. CurContent := TranslateEncoding (CurContent);
  1317. END;
  1318. END;
  1319. PROCEDURE TXmlParser.AnalyzeText (VAR IsDone : BOOLEAN);
  1320. (* Analyzes Text Content between Tags. CurFinal will point to the last content character.
  1321. Content ends at a '<' character or at the end of the document.
  1322. Entity References and Character Entity references are resolved.
  1323. If PackSpaces is TRUE, contiguous Whitespace Characters will be compressed to
  1324. one Space #x20 character, Whitespace at the beginning and end of content will
  1325. be trimmed off and content which is or becomes empty is not returned to
  1326. the application (in this case, "IsDone" is set to FALSE which causes the
  1327. Scan method to proceed directly to the next part. *)
  1328. PROCEDURE ProcessEntity;
  1329. (* Is called if there is an ampsersand '&' character found in the document.
  1330. IN "CurFinal" points to the ampersand
  1331. OUT "CurFinal" points to the first character after the semi-colon ';' *)
  1332. VAR
  1333. P : PAnsiChar;
  1334. Name : AnsiString;
  1335. EntityDef : TEntityDef;
  1336. ExternalEntity : TXmlParser;
  1337. BEGIN
  1338. P := StrScan (CurFinal , ';');
  1339. IF P <> NIL THEN BEGIN
  1340. SetStringSF (Name, CurFinal+1, P-1);
  1341. // Is it a Character Entity?
  1342. IF (CurFinal+1)^ = '#' THEN BEGIN
  1343. IF UpCase ((CurFinal+2)^) = 'X' // !!! Can't use "CHR" for Unicode characters > 255:
  1344. THEN CurContent := CurContent + CHR (StrToIntDef ('$'+Copy (Name, 3, MaxInt), 32))
  1345. ELSE CurContent := CurContent + CHR (StrToIntDef (Copy (Name, 2, MaxInt), 32));
  1346. CurFinal := P+1;
  1347. EXIT;
  1348. END
  1349. // Is it a Predefined Entity?
  1350. ELSE IF Name = 'lt' THEN BEGIN CurContent := CurContent + '<'; CurFinal := P+1; EXIT; END
  1351. ELSE IF Name = 'gt' THEN BEGIN CurContent := CurContent + '>'; CurFinal := P+1; EXIT; END
  1352. ELSE IF Name = 'amp' THEN BEGIN CurContent := CurContent + '&'; CurFinal := P+1; EXIT; END
  1353. ELSE IF Name = 'apos' THEN BEGIN CurContent := CurContent + ''''; CurFinal := P+1; EXIT; END
  1354. ELSE IF Name = 'quot' THEN BEGIN CurContent := CurContent + '"'; CurFinal := P+1; EXIT; END;
  1355. // Replace with Entity from DTD
  1356. EntityDef := TEntityDef (Entities.Node (Name));
  1357. IF EntityDef <> NIL THEN BEGIN
  1358. IF EntityDef.Value <> '' THEN BEGIN
  1359. EntityStack.Push (P+1);
  1360. CurFinal := PAnsiChar (EntityDef.Value);
  1361. END
  1362. ELSE BEGIN
  1363. ExternalEntity := LoadExternalEntity (EntityDef.SystemId, EntityDef.PublicId, EntityDef.NotationName);
  1364. EntityStack.Push (ExternalEntity, P+1);
  1365. CurFinal := ExternalEntity.DocBuffer;
  1366. END;
  1367. END
  1368. ELSE BEGIN
  1369. CurContent := CurContent + Name;
  1370. CurFinal := P+1;
  1371. END;
  1372. END
  1373. ELSE BEGIN
  1374. INC (CurFinal);
  1375. END;
  1376. END;
  1377. VAR
  1378. C : INTEGER;
  1379. BEGIN
  1380. CurFinal := CurStart;
  1381. CurPartType := ptContent;
  1382. CurContent := '';
  1383. C := 0;
  1384. REPEAT
  1385. CASE CurFinal^ OF
  1386. '&' : BEGIN
  1387. CurContent := CurContent + TranslateEncoding (StrLPas (CurFinal-C, C));
  1388. C := 0;
  1389. ProcessEntity;
  1390. CONTINUE;
  1391. END;
  1392. #0 : BEGIN
  1393. IF EntityStack.Count = 0 THEN
  1394. BREAK
  1395. ELSE BEGIN
  1396. CurContent := CurContent + TranslateEncoding (StrLPas (CurFinal-C, C));
  1397. C := 0;
  1398. CurFinal := EntityStack.Pop;
  1399. CONTINUE;
  1400. END;
  1401. END;
  1402. '<' : BREAK;
  1403. ELSE INC (C);
  1404. END;
  1405. INC (CurFinal);
  1406. UNTIL FALSE;
  1407. CurContent := CurContent + TranslateEncoding (StrLPas (CurFinal-C, C));
  1408. DEC (CurFinal);
  1409. IF FNormalize THEN BEGIN
  1410. CurContent := ConvertWs (TrimWs (CurContent), TRUE);
  1411. IsDone := CurContent <> ''; // IsDone will only get FALSE if PackSpaces is TRUE
  1412. END;
  1413. END;
  1414. PROCEDURE TXmlParser.AnalyzeElementDecl (Start : PAnsiChar; VAR Final : PAnsiChar);
  1415. (* Parse <!ELEMENT declaration starting at "Start"
  1416. Final must point to the terminating '>' character
  1417. XmlSpec 3.2:
  1418. elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
  1419. contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
  1420. Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
  1421. '(' S? '#PCDATA' S? ')'
  1422. children ::= (choice | seq) ('?' | '*' | '+')?
  1423. choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
  1424. cp ::= (Name | choice | seq) ('?' | '*' | '+')?
  1425. seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
  1426. More simply:
  1427. contentspec ::= EMPTY
  1428. ANY
  1429. '(#PCDATA)'
  1430. '(#PCDATA | A | B)*'
  1431. '(A, B, C)'
  1432. '(A | B | C)'
  1433. '(A?, B*, C+),
  1434. '(A, (B | C | D)* )' *)
  1435. VAR
  1436. Element : TElemDef;
  1437. Elem2 : TElemDef;
  1438. F : PAnsiChar;
  1439. DER : TDtdElementRec;
  1440. BEGIN
  1441. Element := TElemDef.Create;
  1442. Final := Start + 9;
  1443. DER.Start := Start;
  1444. REPEAT
  1445. IF Final^ = '>' THEN BREAK;
  1446. IF (Final^ IN CNameStart) AND (Element.Name = '') THEN BEGIN
  1447. ExtractName (Final, CWhitespace, F);
  1448. SetStringSF (Element.Name, Final, F);
  1449. Final := F;
  1450. F := StrScan (Final+1, '>');
  1451. IF F = NIL THEN BEGIN
  1452. Element.Definition := AnsiString (Final);
  1453. Final := StrEnd (Final);
  1454. BREAK;
  1455. END
  1456. ELSE BEGIN
  1457. SetStringSF (Element.Definition, Final+1, F-1);
  1458. Final := F;
  1459. BREAK;
  1460. END;
  1461. END;
  1462. INC (Final);
  1463. UNTIL FALSE;
  1464. Element.Definition := DelChars (Element.Definition, CWhitespace);
  1465. ReplaceParameterEntities (Element.Definition);
  1466. IF Element.Definition = 'EMPTY' THEN Element.ElemType := etEmpty
  1467. ELSE IF Element.Definition = 'ANY' THEN Element.ElemType := etAny
  1468. ELSE IF Copy (Element.Definition, 1, 8) = '(#PCDATA' THEN Element.ElemType := etMixed
  1469. ELSE IF Copy (Element.Definition, 1, 1) = '(' THEN Element.ElemType := etChildren
  1470. ELSE Element.ElemType := etAny;
  1471. Elem2 := Elements.Node (Element.Name);
  1472. IF Elem2 <> NIL THEN
  1473. Elements.Delete (Elements.IndexOf (Elem2));
  1474. Elements.Add (Element);
  1475. Final := StrScanE (Final, '>');
  1476. DER.ElementType := deElement;
  1477. DER.ElemDef := Element;
  1478. DER.Final := Final;
  1479. DtdElementFound (DER);
  1480. END;
  1481. PROCEDURE TXmlParser.AnalyzeAttListDecl (Start : PAnsiChar; VAR Final : PAnsiChar);
  1482. (* Parse <!ATTLIST declaration starting at "Start"
  1483. Final must point to the terminating '>' character
  1484. XmlSpec 3.3:
  1485. AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
  1486. AttDef ::= S Name S AttType S DefaultDecl
  1487. AttType ::= StringType | TokenizedType | EnumeratedType
  1488. StringType ::= 'CDATA'
  1489. TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
  1490. EnumeratedType ::= NotationType | Enumeration
  1491. NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
  1492. Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
  1493. DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
  1494. AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
  1495. Examples:
  1496. <!ATTLIST address
  1497. A1 CDATA "Default"
  1498. A2 ID #REQUIRED
  1499. A3 IDREF #IMPLIED
  1500. A4 IDREFS #IMPLIED
  1501. A5 ENTITY #FIXED "&at;&#252;"
  1502. A6 ENTITIES #REQUIRED
  1503. A7 NOTATION (WMF | DXF) "WMF"
  1504. A8 (A | B | C) #REQUIRED> *)
  1505. TYPE
  1506. TPhase = (phElementName, phName, phType, phNotationContent, phDefault);
  1507. VAR
  1508. Phase : TPhase;
  1509. F : PAnsiChar;
  1510. ElementName : AnsiString;
  1511. ElemDef : TElemDef;
  1512. AttrDef : TAttrDef;
  1513. AttrDef2 : TAttrDef;
  1514. Strg : AnsiString;
  1515. DER : TDtdElementRec;
  1516. BEGIN
  1517. Final := Start + 9; // The character after <!ATTLIST
  1518. Phase := phElementName;
  1519. DER.Start := Start;
  1520. AttrDef := NIL;
  1521. ElemDef := NIL;
  1522. REPEAT
  1523. IF NOT (Final^ IN CWhitespace) THEN
  1524. CASE Final^ OF
  1525. '%' : BEGIN
  1526. PushPE (Final);
  1527. CONTINUE;
  1528. END;
  1529. #0 : IF EntityStack.Count = 0 THEN
  1530. BREAK
  1531. ELSE BEGIN
  1532. Final := EntityStack.Pop;
  1533. CONTINUE;
  1534. END;
  1535. '>' : BREAK;
  1536. ELSE CASE Phase OF
  1537. phElementName : BEGIN
  1538. ExtractName (Final, CWhitespace + CQuoteChar + ['#'], F);
  1539. SetStringSF (ElementName, Final, F);
  1540. Final := F;
  1541. ElemDef := Elements.Node (ElementName);
  1542. IF ElemDef = NIL THEN BEGIN
  1543. ElemDef := TElemDef.Create;
  1544. ElemDef.Name := ElementName;
  1545. ElemDef.Definition := 'ANY';
  1546. ElemDef.ElemType := etAny;
  1547. Elements.Add (ElemDef);
  1548. END;
  1549. Phase := phName;
  1550. END;
  1551. phName : BEGIN
  1552. AttrDef := TAttrDef.Create;
  1553. ExtractName (Final, CWhitespace + CQuoteChar + ['#'], F);
  1554. SetStringSF (AttrDef.Name, Final, F);
  1555. Final := F;
  1556. AttrDef2 := TAttrDef (ElemDef.Node (AttrDef.Name));
  1557. IF AttrDef2 <> NIL THEN
  1558. ElemDef.Delete (ElemDef.IndexOf (AttrDef2));
  1559. ElemDef.Add (AttrDef);
  1560. Phase := phType;
  1561. END;
  1562. phType : BEGIN
  1563. IF Final^ = '(' THEN BEGIN
  1564. F := StrScan (Final+1, ')');
  1565. IF F <> NIL
  1566. THEN SetStringSF (AttrDef.TypeDef, Final+1, F-1)
  1567. ELSE AttrDef.TypeDef := AnsiString (Final+1);
  1568. AttrDef.TypeDef := DelChars (AttrDef.TypeDef, CWhitespace);
  1569. AttrDef.AttrType := atEnumeration;
  1570. ReplaceParameterEntities (AttrDef.TypeDef);
  1571. ReplaceCharacterEntities (AttrDef.TypeDef);
  1572. Phase := phDefault;
  1573. END
  1574. ELSE IF StrLComp (Final, 'NOTATION', 8) = 0 THEN BEGIN
  1575. INC (Final, 8);
  1576. AttrDef.AttrType := atNotation;
  1577. Phase := phNotationContent;
  1578. END
  1579. ELSE BEGIN
  1580. ExtractName (Final, CWhitespace+CQuoteChar+['#'], F);
  1581. SetStringSF (AttrDef.TypeDef, Final, F);
  1582. IF AttrDef.TypeDef = 'CDATA' THEN AttrDef.AttrType := atCData
  1583. ELSE IF AttrDef.TypeDef = 'ID' THEN AttrDef.AttrType := atId
  1584. ELSE IF AttrDef.TypeDef = 'IDREF' THEN AttrDef.AttrType := atIdRef
  1585. ELSE IF AttrDef.TypeDef = 'IDREFS' THEN AttrDef.AttrType := atIdRefs
  1586. ELSE IF AttrDef.TypeDef = 'ENTITY' THEN AttrDef.AttrType := atEntity
  1587. ELSE IF AttrDef.TypeDef = 'ENTITIES' THEN AttrDef.AttrType := atEntities
  1588. ELSE IF AttrDef.TypeDef = 'NMTOKEN' THEN AttrDef.AttrType := atNmToken
  1589. ELSE IF AttrDef.TypeDef = 'NMTOKENS' THEN AttrDef.AttrType := atNmTokens;
  1590. Phase := phDefault;
  1591. END
  1592. END;
  1593. phNotationContent : BEGIN
  1594. F := StrScan (Final, ')');
  1595. IF F <> NIL THEN
  1596. SetStringSF (AttrDef.Notations, Final+1, F-1)
  1597. ELSE BEGIN
  1598. AttrDef.Notations := AnsiString (Final+1);
  1599. Final := StrEnd (Final);
  1600. END;
  1601. ReplaceParameterEntities (AttrDef.Notations);
  1602. AttrDef.Notations := DelChars (AttrDef.Notations, CWhitespace);
  1603. Phase := phDefault;
  1604. END;
  1605. phDefault : BEGIN
  1606. IF Final^ = '#' THEN BEGIN
  1607. ExtractName (Final, CWhiteSpace + CQuoteChar, F);
  1608. SetStringSF (Strg, Final, F);
  1609. Final := F;
  1610. ReplaceParameterEntities (Strg);
  1611. IF Strg = '#REQUIRED' THEN BEGIN AttrDef.DefaultType := adRequired; Phase := phName; END
  1612. ELSE IF Strg = '#IMPLIED' THEN BEGIN AttrDef.DefaultType := adImplied; Phase := phName; END
  1613. ELSE IF Strg = '#FIXED' THEN AttrDef.DefaultType := adFixed;
  1614. END
  1615. ELSE IF (Final^ IN CQuoteChar) THEN BEGIN
  1616. ExtractQuote (Final, AttrDef.Value, Final);
  1617. ReplaceParameterEntities (AttrDef.Value);
  1618. ReplaceCharacterEntities (AttrDef.Value);
  1619. Phase := phName;
  1620. END;
  1621. IF Phase = phName THEN BEGIN
  1622. AttrDef := NIL;
  1623. END;
  1624. END;
  1625. END;
  1626. END;
  1627. INC (Final);
  1628. UNTIL FALSE;
  1629. Final := StrScan (Final, '>');
  1630. DER.ElementType := deAttList;
  1631. DER.ElemDef := ElemDef;
  1632. DER.Final := Final;
  1633. DtdElementFound (DER);
  1634. END;
  1635. PROCEDURE TXmlParser.AnalyzeEntityDecl (Start : PAnsiChar; VAR Final : PAnsiChar);
  1636. (* Parse <!ENTITY declaration starting at "Start"
  1637. Final must point to the terminating '>' character
  1638. XmlSpec 4.2:
  1639. EntityDecl ::= '<!ENTITY' S Name S EntityDef S? '>' |
  1640. '<!ENTITY' S '%' S Name S PEDef S? '>'
  1641. EntityDef ::= EntityValue | (ExternalID NDataDecl?)
  1642. PEDef ::= EntityValue | ExternalID
  1643. NDataDecl ::= S 'NDATA' S Name
  1644. EntityValue ::= '"' ([^%&"] | PEReference | EntityRef | CharRef)* '"' |
  1645. "'" ([^%&'] | PEReference | EntityRef | CharRef)* "'"
  1646. PEReference ::= '%' Name ';'
  1647. Examples
  1648. <!ENTITY test1 "Stefan Heymann"> <!-- Internal, general, parsed -->
  1649. <!ENTITY test2 SYSTEM "ent2.xml"> <!-- External, general, parsed -->
  1650. <!ENTITY test2 SYSTEM "ent3.gif" NDATA gif> <!-- External, general, unparsed -->
  1651. <!ENTITY % test3 "<!ELEMENT q ANY>"> <!-- Internal, parameter -->
  1652. <!ENTITY % test6 SYSTEM "ent6.xml"> <!-- External, parameter -->
  1653. <!ENTITY test4 "&test1; ist lieb"> <!-- IGP, Replacement text <> literal value -->
  1654. <!ENTITY test5 "<p>Dies ist ein Test-Absatz</p>"> <!-- IGP, See XmlSpec 2.4 -->
  1655. *)
  1656. TYPE
  1657. TPhase = (phName, phContent, phNData, phNotationName, phFinalGT);
  1658. VAR
  1659. Phase : TPhase;
  1660. IsParamEntity : BOOLEAN;
  1661. F : PAnsiChar;
  1662. ExternalID : TExternalID;
  1663. EntityDef : TEntityDef;
  1664. EntityDef2 : TEntityDef;
  1665. DER : TDtdElementRec;
  1666. BEGIN
  1667. Final := Start + 8; // First AnsiChar after <!ENTITY
  1668. DER.Start := Start;
  1669. Phase := phName;
  1670. IsParamEntity := FALSE;
  1671. EntityDef := TEntityDef.Create;
  1672. REPEAT
  1673. IF NOT (Final^ IN CWhitespace) THEN
  1674. CASE Final^ OF
  1675. '%' : IsParamEntity := TRUE;
  1676. '>' : BREAK;
  1677. ELSE CASE Phase OF
  1678. phName : IF Final^ IN CNameStart THEN BEGIN
  1679. ExtractName (Final, CWhitespace + CQuoteChar, F);
  1680. SetStringSF (EntityDef.Name, Final, F);
  1681. Final := F;
  1682. Phase := phContent;
  1683. END;
  1684. phContent : IF Final^ IN CQuoteChar THEN BEGIN
  1685. ExtractQuote (Final, EntityDef.Value, Final);
  1686. Phase := phFinalGT;
  1687. END
  1688. ELSE IF (StrLComp (Final, 'SYSTEM', 6) = 0) OR
  1689. (StrLComp (Final, 'PUBLIC', 6) = 0) THEN BEGIN
  1690. ExternalID := TExternalID.Create (Final);
  1691. EntityDef.SystemId := ExternalID.SystemId;
  1692. EntityDef.PublicId := ExternalID.PublicId;
  1693. Final := ExternalID.Final;
  1694. Phase := phNData;
  1695. ExternalID.Free;
  1696. END;
  1697. phNData : IF StrLComp (Final, 'NDATA', 5) = 0 THEN BEGIN
  1698. INC (Final, 4);
  1699. Phase := phNotationName;
  1700. END;
  1701. phNotationName : IF Final^ IN CNameStart THEN BEGIN
  1702. ExtractName (Final, CWhitespace + ['>'], F);
  1703. SetStringSF (EntityDef.NotationName, Final, F);
  1704. Final := F;
  1705. Phase := phFinalGT;
  1706. END;
  1707. phFinalGT : ; // -!- There is an error in the document if this branch is called
  1708. END;
  1709. END;
  1710. INC (Final);
  1711. UNTIL FALSE;
  1712. IF IsParamEntity THEN BEGIN
  1713. EntityDef2 := TEntityDef (ParEntities.Node (EntityDef.Name));
  1714. IF EntityDef2 <> NIL THEN
  1715. ParEntities.Delete (ParEntities.IndexOf (EntityDef2));
  1716. ParEntities.Add (EntityDef);
  1717. ReplaceCharacterEntities (EntityDef.Value);
  1718. END
  1719. ELSE BEGIN
  1720. EntityDef2 := TEntityDef (Entities.Node (EntityDef.Name));
  1721. IF EntityDef2 <> NIL THEN
  1722. Entities.Delete (Entities.IndexOf (EntityDef2));
  1723. Entities.Add (EntityDef);
  1724. ReplaceParameterEntities (EntityDef.Value); // Create replacement texts (see XmlSpec 4.5)
  1725. ReplaceCharacterEntities (EntityDef.Value);
  1726. END;
  1727. Final := StrScanE (Final, '>');
  1728. DER.ElementType := deEntity;
  1729. DER.EntityDef := EntityDef;
  1730. DER.Final := Final;
  1731. DtdElementFound (DER);
  1732. END;
  1733. PROCEDURE TXmlParser.AnalyzeNotationDecl (Start : PAnsiChar; VAR Final : PAnsiChar);
  1734. // Parse <!NOTATION declaration starting at "Start"
  1735. // Final must point to the terminating '>' character
  1736. // XmlSpec 4.7: NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
  1737. TYPE
  1738. TPhase = (phName, phExtId, phEnd);
  1739. VAR
  1740. ExternalID : TExternalID;
  1741. Phase : TPhase;
  1742. F : PAnsiChar;
  1743. NotationDef : TNotationDef;
  1744. DER : TDtdElementRec;
  1745. BEGIN
  1746. Final := Start + 10; // Character after <!NOTATION
  1747. DER.Start := Start;
  1748. Phase := phName;
  1749. NotationDef := TNotationDef.Create;
  1750. REPEAT
  1751. IF NOT (Final^ IN CWhitespace) THEN
  1752. CASE Final^ OF
  1753. '>',
  1754. #0 : BREAK;
  1755. ELSE CASE Phase OF
  1756. phName : BEGIN
  1757. ExtractName (Final, CWhitespace + ['>'], F);
  1758. SetStringSF (NotationDef.Name, Final, F);
  1759. Final := F;
  1760. Phase := phExtId;
  1761. END;
  1762. phExtId : BEGIN
  1763. ExternalID := TExternalID.Create (Final);
  1764. NotationDef.Value := ExternalID.SystemId;
  1765. NotationDef.PublicId := ExternalID.PublicId;
  1766. Final := ExternalId.Final;
  1767. ExternalId.Free;
  1768. Phase := phEnd;
  1769. END;
  1770. phEnd : ; // -!- There is an error in the document if this branch is called
  1771. END;
  1772. END;
  1773. INC (Final);
  1774. UNTIL FALSE;
  1775. Notations.Add (NotationDef);
  1776. Final := StrScanE (Final, '>');
  1777. DER.ElementType := deNotation;
  1778. DER.NotationDef := NotationDef;
  1779. DER.Final := Final;
  1780. DtdElementFound (DER);
  1781. END;
  1782. PROCEDURE TXmlParser.PushPE (VAR Start : PAnsiChar);
  1783. (* If there is a parameter entity reference found in the data stream,
  1784. the current position will be pushed to the entity stack.
  1785. Start: IN Pointer to the '%' character starting the PE reference
  1786. OUT Pointer to first character of PE replacement text *)
  1787. VAR
  1788. P : PAnsiChar;
  1789. EntityDef : TEntityDef;
  1790. BEGIN
  1791. P := StrScan (Start, ';');
  1792. IF P <> NIL THEN BEGIN
  1793. EntityDef := TEntityDef (ParEntities.Node (StrSFPas (Start+1, P-1)));
  1794. IF EntityDef <> NIL THEN BEGIN
  1795. EntityStack.Push (P+1);
  1796. Start := PAnsiChar (EntityDef.Value);
  1797. END
  1798. ELSE
  1799. Start := P+1;
  1800. END;
  1801. END;
  1802. PROCEDURE TXmlParser.ReplaceCharacterEntities (VAR Str : AnsiString);
  1803. // Replaces all Character Entity References in the string
  1804. VAR
  1805. Start : INTEGER;
  1806. PAmp : PAnsiChar;
  1807. PSemi : PAnsiChar;
  1808. PosAmp : INTEGER;
  1809. Len : INTEGER; // Length of Entity Reference
  1810. BEGIN
  1811. IF Str = '' THEN EXIT;
  1812. Start := 1;
  1813. REPEAT
  1814. PAmp := StrPos (PAnsiChar (Str) + Start-1, '&#');
  1815. IF PAmp = NIL THEN BREAK;
  1816. PSemi := StrScan (PAmp+2, ';');
  1817. IF PSemi = NIL THEN BREAK;
  1818. PosAmp := PAmp - PAnsiChar (Str) + 1;
  1819. Len := PSemi-PAmp+1;
  1820. IF CompareText (Str [PosAmp+2], 'x') = 0 // !!! Can't use "CHR" for Unicode characters > 255
  1821. THEN Str [PosAmp] := CHR (StrToIntDef ('$'+Copy (Str, PosAmp+3, Len-4), 0))
  1822. ELSE Str [PosAmp] := CHR (StrToIntDef (Copy (Str, PosAmp+2, Len-3), 32));
  1823. Delete (Str, PosAmp+1, Len-1);
  1824. Start := PosAmp + 1;
  1825. UNTIL FALSE;
  1826. END;
  1827. PROCEDURE TXmlParser.ReplaceParameterEntities (VAR Str : AnsiString);
  1828. // Recursively replaces all Parameter Entity References in the string
  1829. PROCEDURE ReplaceEntities (VAR Str : AnsiString);
  1830. VAR
  1831. Start : INTEGER;
  1832. PAmp : PAnsiChar;
  1833. PSemi : PAnsiChar;
  1834. PosAmp : INTEGER;
  1835. Len : INTEGER;
  1836. Entity : TEntityDef;
  1837. Repl : AnsiString; // Replacement
  1838. BEGIN
  1839. IF Str = '' THEN EXIT;
  1840. Start := 1;
  1841. REPEAT
  1842. PAmp := StrPos (PAnsiChar (Str)+Start-1, '%');
  1843. IF PAmp = NIL THEN BREAK;
  1844. PSemi := StrScan (PAmp+2, ';');
  1845. IF PSemi = NIL THEN BREAK;
  1846. PosAmp := PAmp - PAnsiChar (Str) + 1;
  1847. Len := PSemi-PAmp+1;
  1848. Entity := TEntityDef (ParEntities.Node (Copy (Str, PosAmp+1, Len-2)));
  1849. IF Entity <> NIL THEN BEGIN
  1850. Repl := Entity.Value;
  1851. ReplaceEntities (Repl); // Recursion
  1852. END
  1853. ELSE
  1854. Repl := Copy (Str, PosAmp, Len);
  1855. Delete (Str, PosAmp, Len);
  1856. Insert (Repl, Str, PosAmp);
  1857. Start := PosAmp + Length (Repl);
  1858. UNTIL FALSE;
  1859. END;
  1860. BEGIN
  1861. ReplaceEntities (Str);
  1862. END;
  1863. PROCEDURE TXmlParser.ReplaceGeneralEntities (VAR Str : AnsiString);
  1864. // Recursively replaces General Entity References in the string
  1865. PROCEDURE ReplaceEntities (VAR Str : AnsiString);
  1866. VAR
  1867. Start : INTEGER;
  1868. PAmp : PAnsiChar;
  1869. PSemi : PAnsiChar;
  1870. PosAmp : INTEGER;
  1871. Len : INTEGER;
  1872. EntityDef : TEntityDef;
  1873. EntName : AnsiString;
  1874. Repl : AnsiString; // Replacement
  1875. ExternalEntity : TXmlParser;
  1876. BEGIN
  1877. IF Str = '' THEN EXIT;
  1878. Start := 1;
  1879. REPEAT
  1880. PAmp := StrPos (PAnsiChar (Str)+Start-1, '&');
  1881. IF PAmp = NIL THEN BREAK;
  1882. PSemi := StrScan (PAmp+2, ';');
  1883. IF PSemi = NIL THEN BREAK;
  1884. PosAmp := PAmp - PAnsiChar (Str) + 1;
  1885. Len := PSemi-PAmp+1;
  1886. EntName := Copy (Str, PosAmp+1, Len-2);
  1887. IF EntName = 'lt' THEN Repl := '<'
  1888. ELSE IF EntName = 'gt' THEN Repl := '>'
  1889. ELSE IF EntName = 'amp' THEN Repl := '&'
  1890. ELSE IF EntName = 'apos' THEN Repl := ''''
  1891. ELSE IF EntName = 'quot' THEN Repl := '"'
  1892. ELSE BEGIN
  1893. EntityDef := TEntityDef (Entities.Node (EntName));
  1894. IF EntityDef <> NIL THEN BEGIN
  1895. IF EntityDef.Value <> '' THEN // Internal Entity
  1896. Repl := EntityDef.Value
  1897. ELSE BEGIN // External Entity
  1898. ExternalEntity := LoadExternalEntity (EntityDef.SystemId, EntityDef.PublicId, EntityDef.NotationName);
  1899. Repl := StrPas (ExternalEntity.DocBuffer); // !!! What if it contains a Text Declaration?
  1900. ExternalEntity.Free;
  1901. END;
  1902. ReplaceEntities (Repl); // Recursion
  1903. END
  1904. ELSE
  1905. Repl := Copy (Str, PosAmp, Len);
  1906. END;
  1907. Delete (Str, PosAmp, Len);
  1908. Insert (Repl, Str, PosAmp);
  1909. Start := PosAmp + Length (Repl);
  1910. UNTIL FALSE;
  1911. END;
  1912. BEGIN
  1913. ReplaceEntities (Str);
  1914. END;
  1915. FUNCTION TXmlParser.LoadExternalEntity (SystemId, PublicId, Notation : AnsiString) : TXmlParser;
  1916. // This will be called whenever there is a Parsed External Entity or
  1917. // the DTD External Subset to be parsed.
  1918. // It has to create a TXmlParser instance and load the desired Entity.
  1919. // This instance of LoadExternalEntity assumes that "SystemId" is a valid
  1920. // file name (relative to the Document source) and loads this file using
  1921. // the LoadFromFile method.
  1922. VAR
  1923. Filename : AnsiString;
  1924. BEGIN
  1925. // --- Convert System ID to complete filename
  1926. Filename := StringReplace (SystemId, '/', '\', [rfReplaceAll]);
  1927. IF Copy (FSource, 1, 1) <> '<' THEN
  1928. IF (Copy (Filename, 1, 2) = '\\') OR (Copy (Filename, 2, 1) = ':') THEN
  1929. // Already has an absolute Path
  1930. ELSE BEGIN
  1931. Filename := ExtractFilePath (FSource) + Filename;
  1932. END;
  1933. // --- Load the File
  1934. Result := TXmlParser.Create;
  1935. Result.LoadFromFile (Filename);
  1936. END;
  1937. FUNCTION TXmlParser.TranslateEncoding (CONST Source : AnsiString) : AnsiString;
  1938. // The member variable "CurEncoding" always holds the name of the current
  1939. // encoding, e.g. 'UTF-8' or 'ISO-8859-1'.
  1940. // This virtual method "TranslateEncoding" is responsible for translating
  1941. // the content passed in the "Source" parameter to the Encoding which
  1942. // is expected by the application.
  1943. // This instance of "TranlateEncoding" assumes that the Application expects
  1944. // Windows ANSI (Win1252) strings. It is able to transform UTF-8 or ISO-8859-1
  1945. // encodings.
  1946. // If you want your application to understand or create other encodings, you
  1947. // override this function.
  1948. BEGIN
  1949. IF CurEncoding = 'UTF-8'
  1950. THEN Result := Utf8ToAnsi (Source)
  1951. ELSE Result := Source;
  1952. END;
  1953. PROCEDURE TXmlParser.DtdElementFound (DtdElementRec : TDtdElementRec);
  1954. // This method is called for every element which is found in the DTD
  1955. // declaration. The variant record TDtdElementRec is passed which
  1956. // holds informations about the element.
  1957. // You can override this function to handle DTD declarations.
  1958. // Note that when you parse the same Document instance a second time,
  1959. // the DTD will not get parsed again.
  1960. BEGIN
  1961. END;
  1962. FUNCTION TXmlParser.GetDocBuffer: PAnsiChar;
  1963. // Returns FBuffer or a pointer to a NUL AnsiChar if Buffer is empty
  1964. BEGIN
  1965. IF FBuffer = NIL
  1966. THEN Result := #0
  1967. ELSE Result := FBuffer;
  1968. END;
  1969. (*$IFNDEF HAS_CONTNRS_UNIT
  1970. ===============================================================================================
  1971. TObjectList
  1972. ===============================================================================================
  1973. *)
  1974. DESTRUCTOR TObjectList.Destroy;
  1975. BEGIN
  1976. Clear;
  1977. SetCapacity(0);
  1978. INHERITED Destroy;
  1979. END;
  1980. PROCEDURE TObjectList.Delete (Index : INTEGER);
  1981. BEGIN
  1982. IF (Index < 0) OR (Index >= Count) THEN EXIT;
  1983. TObject (Items [Index]).Free;
  1984. INHERITED Delete (Index);
  1985. END;
  1986. PROCEDURE TObjectList.Clear;
  1987. BEGIN
  1988. WHILE Count > 0 DO
  1989. Delete (Count-1);
  1990. END;
  1991. (*$ENDIF *)
  1992. (*
  1993. ===============================================================================================
  1994. TNvpNode
  1995. --------
  1996. Node base class for the TNvpList
  1997. ===============================================================================================
  1998. *)
  1999. CONSTRUCTOR TNvpNode.Create (TheName, TheValue : AnsiString);
  2000. BEGIN
  2001. INHERITED Create;
  2002. Name := TheName;
  2003. Value := TheValue;
  2004. END;
  2005. (*
  2006. ===============================================================================================
  2007. TNvpList
  2008. --------
  2009. A generic List of Name-Value Pairs, based on the TObjectList introduced in Delphi 5
  2010. ===============================================================================================
  2011. *)
  2012. PROCEDURE TNvpList.Add (Node : TNvpNode);
  2013. VAR
  2014. I : INTEGER;
  2015. BEGIN
  2016. FOR I := Count-1 DOWNTO 0 DO
  2017. IF Node.Name > TNvpNode (Items [I]).Name THEN BEGIN
  2018. Insert (I+1, Node);
  2019. EXIT;
  2020. END;
  2021. Insert (0, Node);
  2022. END;
  2023. FUNCTION TNvpList.Node (Name : AnsiString) : TNvpNode;
  2024. // Binary search for Node
  2025. VAR
  2026. L, H : INTEGER; // Low, High Limit
  2027. T, C : INTEGER; // Test Index, Comparison result
  2028. Last : INTEGER; // Last Test Index
  2029. BEGIN
  2030. IF Count=0 THEN BEGIN
  2031. Result := NIL;
  2032. EXIT;
  2033. END;
  2034. L := 0;
  2035. H := Count;
  2036. Last := -1;
  2037. REPEAT
  2038. T := (L+H) DIV 2;
  2039. IF T=Last THEN BREAK;
  2040. Result := TNvpNode (Items [T]);
  2041. C := CompareStr (Result.Name, Name);
  2042. IF C = 0 THEN EXIT
  2043. ELSE IF C < 0 THEN L := T
  2044. ELSE H := T;
  2045. Last := T;
  2046. UNTIL FALSE;
  2047. Result := NIL;
  2048. END;
  2049. FUNCTION TNvpList.Node (Index : INTEGER) : TNvpNode;
  2050. BEGIN
  2051. IF (Index < 0) OR (Index >= Count)
  2052. THEN Result := NIL
  2053. ELSE Result := TNvpNode (Items [Index]);
  2054. END;
  2055. FUNCTION TNvpList.Value (Name : AnsiString) : AnsiString;
  2056. VAR
  2057. Nvp : TNvpNode;
  2058. BEGIN
  2059. Nvp := TNvpNode (Node (Name));
  2060. IF Nvp <> NIL
  2061. THEN Result := Nvp.Value
  2062. ELSE Result := '';
  2063. END;
  2064. FUNCTION TNvpList.Value (Index : INTEGER) : AnsiString;
  2065. BEGIN
  2066. IF (Index < 0) OR (Index >= Count)
  2067. THEN Result := ''
  2068. ELSE Result := TNvpNode (Items [Index]).Value;
  2069. END;
  2070. FUNCTION TNvpList.Name (Index : INTEGER) : AnsiString;
  2071. BEGIN
  2072. IF (Index < 0) OR (Index >= Count)
  2073. THEN Result := ''
  2074. ELSE Result := TNvpNode (Items [Index]).Name;
  2075. END;
  2076. (*
  2077. ===============================================================================================
  2078. TAttrList
  2079. List of Attributes. The "Analyze" method extracts the Attributes from the given Buffer.
  2080. Is used for extraction of Attributes in Start-Tags, Empty-Element Tags and the "pseudo"
  2081. attributes in XML Prologs, Text Declarations and PIs.
  2082. ===============================================================================================
  2083. *)
  2084. PROCEDURE TAttrList.Analyze (Start : PAnsiChar; VAR Final : PAnsiChar);
  2085. // Analyze the Buffer for Attribute=Name pairs.
  2086. // Terminates when there is a character which is not IN CNameStart
  2087. // (e.g. '?>' or '>' or '/>')
  2088. TYPE
  2089. TPhase = (phName, phEq, phValue);
  2090. VAR
  2091. Phase : TPhase;
  2092. F : PAnsiChar;
  2093. Name : AnsiString;
  2094. Value : AnsiString;
  2095. Attr : TAttr;
  2096. BEGIN
  2097. Clear;
  2098. Phase := phName;
  2099. Final := Start;
  2100. REPEAT
  2101. IF (Final^ = #0) OR (Final^ = '>') THEN BREAK;
  2102. IF NOT (Final^ IN CWhitespace) THEN
  2103. CASE Phase OF
  2104. phName : BEGIN
  2105. IF NOT (Final^ IN CNameStart) THEN EXIT;
  2106. ExtractName (Final, CWhitespace + ['=', '/'], F);
  2107. SetStringSF (Name, Final, F);
  2108. Final := F;
  2109. Phase := phEq;
  2110. END;
  2111. phEq : BEGIN
  2112. IF Final^ = '=' THEN
  2113. Phase := phValue
  2114. END;
  2115. phValue : BEGIN
  2116. IF Final^ IN CQuoteChar THEN BEGIN
  2117. ExtractQuote (Final, Value, F);
  2118. Attr := TAttr.Create;
  2119. Attr.Name := Name;
  2120. Attr.Value := Value;
  2121. Attr.ValueType := vtNormal;
  2122. Add (Attr);
  2123. Final := F;
  2124. Phase := phName;
  2125. END;
  2126. END;
  2127. END;
  2128. INC (Final);
  2129. UNTIL FALSE;
  2130. END;
  2131. (*
  2132. ===============================================================================================
  2133. TElemList
  2134. List of TElemDef nodes.
  2135. ===============================================================================================
  2136. *)
  2137. FUNCTION TElemList.Node (Name : AnsiString) : TElemDef;
  2138. // Binary search for the Node with the given Name
  2139. VAR
  2140. L, H : INTEGER; // Low, High Limit
  2141. T, C : INTEGER; // Test Index, Comparison result
  2142. Last : INTEGER; // Last Test Index
  2143. BEGIN
  2144. IF Count=0 THEN BEGIN
  2145. Result := NIL;
  2146. EXIT;
  2147. END;
  2148. L := 0;
  2149. H := Count;
  2150. Last := -1;
  2151. REPEAT
  2152. T := (L+H) DIV 2;
  2153. IF T=Last THEN BREAK;
  2154. Result := TElemDef (Items [T]);
  2155. C := CompareStr (Result.Name, Name);
  2156. IF C = 0 THEN EXIT
  2157. ELSE IF C < 0 THEN L := T
  2158. ELSE H := T;
  2159. Last := T;
  2160. UNTIL FALSE;
  2161. Result := NIL;
  2162. END;
  2163. PROCEDURE TElemList.Add (Node : TElemDef);
  2164. VAR
  2165. I : INTEGER;
  2166. BEGIN
  2167. FOR I := Count-1 DOWNTO 0 DO
  2168. IF Node.Name > TElemDef (Items [I]).Name THEN BEGIN
  2169. Insert (I+1, Node);
  2170. EXIT;
  2171. END;
  2172. Insert (0, Node);
  2173. END;
  2174. (*
  2175. ===============================================================================================
  2176. TScannerXmlParser
  2177. A TXmlParser descendant for the TCustomXmlScanner component
  2178. ===============================================================================================
  2179. *)
  2180. TYPE
  2181. TScannerXmlParser = CLASS (TXmlParser)
  2182. Scanner : TCustomXmlScanner;
  2183. CONSTRUCTOR Create (TheScanner : TCustomXmlScanner);
  2184. FUNCTION LoadExternalEntity (SystemId, PublicId,
  2185. Notation : AnsiString) : TXmlParser; OVERRIDE;
  2186. FUNCTION TranslateEncoding (CONST Source : AnsiString) : AnsiString; OVERRIDE;
  2187. PROCEDURE DtdElementFound (DtdElementRec : TDtdElementRec); OVERRIDE;
  2188. END;
  2189. CONSTRUCTOR TScannerXmlParser.Create (TheScanner : TCustomXmlScanner);
  2190. BEGIN
  2191. INHERITED Create;
  2192. Scanner := TheScanner;
  2193. END;
  2194. FUNCTION TScannerXmlParser.LoadExternalEntity (SystemId, PublicId, Notation : AnsiString) : TXmlParser;
  2195. BEGIN
  2196. IF Assigned (Scanner.FOnLoadExternal)
  2197. THEN Scanner.FOnLoadExternal (Scanner, SystemId, PublicId, Notation, Result)
  2198. ELSE Result := INHERITED LoadExternalEntity (SystemId, PublicId, Notation);
  2199. END;
  2200. FUNCTION TScannerXmlParser.TranslateEncoding (CONST Source : AnsiString) : AnsiString;
  2201. BEGIN
  2202. IF Assigned (Scanner.FOnTranslateEncoding)
  2203. THEN Result := Scanner.FOnTranslateEncoding (Scanner, CurEncoding, Source)
  2204. ELSE Result := INHERITED TranslateEncoding (Source);
  2205. END;
  2206. PROCEDURE TScannerXmlParser.DtdElementFound (DtdElementRec : TDtdElementRec);
  2207. BEGIN
  2208. WITH DtdElementRec DO
  2209. CASE ElementType OF
  2210. deElement : Scanner.WhenElement (ElemDef);
  2211. deAttList : Scanner.WhenAttList (ElemDef);
  2212. deEntity : Scanner.WhenEntity (EntityDef);
  2213. deNotation : Scanner.WhenNotation (NotationDef);
  2214. dePI : Scanner.WhenPI (AnsiString (Target), AnsiString (Content), AttrList);
  2215. deComment : Scanner.WhenComment (StrSFPas (Start, Final));
  2216. deError : Scanner.WhenDtdError (Pos);
  2217. END;
  2218. END;
  2219. (*
  2220. ===============================================================================================
  2221. TCustomXmlScanner
  2222. ===============================================================================================
  2223. *)
  2224. CONSTRUCTOR TCustomXmlScanner.Create (AOwner: TComponent);
  2225. BEGIN
  2226. INHERITED;
  2227. FXmlParser := TScannerXmlParser.Create (Self);
  2228. END;
  2229. DESTRUCTOR TCustomXmlScanner.Destroy;
  2230. BEGIN
  2231. FXmlParser.Free;
  2232. INHERITED;
  2233. END;
  2234. PROCEDURE TCustomXmlScanner.LoadFromFile (Filename : TFilename);
  2235. // Load XML Document from file
  2236. BEGIN
  2237. FXmlParser.LoadFromFile (Filename);
  2238. END;
  2239. PROCEDURE TCustomXmlScanner.LoadFromBuffer (Buffer : PAnsiChar);
  2240. // Load XML Document from buffer
  2241. BEGIN
  2242. FXmlParser.LoadFromBuffer (Buffer);
  2243. END;
  2244. PROCEDURE TCustomXmlScanner.SetBuffer (Buffer : PAnsiChar);
  2245. // Refer to Buffer
  2246. BEGIN
  2247. FXmlParser.SetBuffer (Buffer);
  2248. END;
  2249. FUNCTION TCustomXmlScanner.GetFilename : TFilename;
  2250. BEGIN
  2251. Result := FXmlParser.Source;
  2252. END;
  2253. FUNCTION TCustomXmlScanner.GetNormalize : BOOLEAN;
  2254. BEGIN
  2255. Result := FXmlParser.Normalize;
  2256. END;
  2257. PROCEDURE TCustomXmlScanner.SetNormalize (Value : BOOLEAN);
  2258. BEGIN
  2259. FXmlParser.Normalize := Value;
  2260. END;
  2261. PROCEDURE TCustomXmlScanner.WhenXmlProlog(XmlVersion, Encoding: AnsiString; Standalone : BOOLEAN);
  2262. // Is called when the parser has parsed the <? xml ?> declaration of the prolog
  2263. BEGIN
  2264. IF Assigned (FOnXmlProlog) THEN FOnXmlProlog (Self, XmlVersion, Encoding, Standalone);
  2265. END;
  2266. PROCEDURE TCustomXmlScanner.WhenComment (Comment : AnsiString);
  2267. // Is called when the parser has parsed a <!-- comment -->
  2268. BEGIN
  2269. IF Assigned (FOnComment) THEN FOnComment (Self, Comment);
  2270. END;
  2271. PROCEDURE TCustomXmlScanner.WhenPI (Target, Content: AnsiString; Attributes : TAttrList);
  2272. // Is called when the parser has parsed a <?processing instruction ?>
  2273. BEGIN
  2274. IF Assigned (FOnPI) THEN FOnPI (Self, Target, Content, Attributes);
  2275. END;
  2276. PROCEDURE TCustomXmlScanner.WhenDtdRead (RootElementName : AnsiString);
  2277. // Is called when the parser has completely parsed the DTD
  2278. BEGIN
  2279. IF Assigned (FOnDtdRead) THEN FOnDtdRead (Self, RootElementName);
  2280. END;
  2281. PROCEDURE TCustomXmlScanner.WhenStartTag (TagName : AnsiString; Attributes : TAttrList);
  2282. // Is called when the parser has parsed a start tag like <p>
  2283. BEGIN
  2284. IF Assigned (FOnStartTag) THEN FOnStartTag (Self, TagName, Attributes);
  2285. END;
  2286. PROCEDURE TCustomXmlScanner.WhenEmptyTag (TagName : AnsiString; Attributes : TAttrList);
  2287. // Is called when the parser has parsed an Empty Element Tag like <br/>
  2288. BEGIN
  2289. IF Assigned (FOnEmptyTag) THEN FOnEmptyTag (Self, TagName, Attributes);
  2290. END;
  2291. PROCEDURE TCustomXmlScanner.WhenEndTag (TagName : AnsiString);
  2292. // Is called when the parser has parsed an End Tag like </p>
  2293. BEGIN
  2294. IF Assigned (FOnEndTag) THEN FOnEndTag (Self, TagName);
  2295. END;
  2296. PROCEDURE TCustomXmlScanner.WhenContent (Content : AnsiString);
  2297. // Is called when the parser has parsed an element's text content
  2298. BEGIN
  2299. IF Assigned (FOnContent) THEN FOnContent (Self, Content);
  2300. END;
  2301. PROCEDURE TCustomXmlScanner.WhenCData (Content : AnsiString);
  2302. // Is called when the parser has parsed a CDATA section
  2303. BEGIN
  2304. IF Assigned (FOnCData) THEN FOnCData (Self, Content);
  2305. END;
  2306. PROCEDURE TCustomXmlScanner.WhenElement (ElemDef : TElemDef);
  2307. // Is called when the parser has parsed an <!ELEMENT> definition
  2308. // inside the DTD
  2309. BEGIN
  2310. IF Assigned (FOnElement) THEN FOnElement (Self, ElemDef);
  2311. END;
  2312. PROCEDURE TCustomXmlScanner.WhenAttList (ElemDef : TElemDef);
  2313. // Is called when the parser has parsed an <!ATTLIST> definition
  2314. // inside the DTD
  2315. BEGIN
  2316. IF Assigned (FOnAttList) THEN FOnAttList (Self, ElemDef);
  2317. END;
  2318. PROCEDURE TCustomXmlScanner.WhenEntity (EntityDef : TEntityDef);
  2319. // Is called when the parser has parsed an <!ENTITY> definition
  2320. // inside the DTD
  2321. BEGIN
  2322. IF Assigned (FOnEntity) THEN FOnEntity (Self, EntityDef);
  2323. END;
  2324. PROCEDURE TCustomXmlScanner.WhenNotation (NotationDef : TNotationDef);
  2325. // Is called when the parser has parsed a <!NOTATION> definition
  2326. // inside the DTD
  2327. BEGIN
  2328. IF Assigned (FOnNotation) THEN FOnNotation (Self, NotationDef);
  2329. END;
  2330. PROCEDURE TCustomXmlScanner.WhenDtdError (ErrorPos : PAnsiChar);
  2331. // Is called when the parser has found an Error in the DTD
  2332. BEGIN
  2333. IF Assigned (FOnDtdError) THEN FOnDtdError (Self, ErrorPos);
  2334. END;
  2335. PROCEDURE TCustomXmlScanner.Execute;
  2336. // Perform scanning
  2337. // Scanning is done synchronously, i.e. you can expect events to be triggered
  2338. // in the order of the XML data stream. Execute will finish when the whole XML
  2339. // document has been scanned or when the StopParser property has been set to TRUE.
  2340. BEGIN
  2341. FStopParser := FALSE;
  2342. FXmlParser.StartScan;
  2343. WHILE FXmlParser.Scan AND (NOT FStopParser) DO
  2344. CASE FXmlParser.CurPartType OF
  2345. ptNone : ;
  2346. ptXmlProlog : WhenXmlProlog (FXmlParser.XmlVersion, FXmlParser.Encoding, FXmlParser.Standalone);
  2347. ptComment : WhenComment (StrSFPas (FXmlParser.CurStart, FXmlParser.CurFinal));
  2348. ptPI : WhenPI (FXmlParser.CurName, FXmlParser.CurContent, FXmlParser.CurAttr);
  2349. ptDtdc : WhenDtdRead (FXmlParser.RootName);
  2350. ptStartTag : WhenStartTag (FXmlParser.CurName, FXmlParser.CurAttr);
  2351. ptEmptyTag : WhenEmptyTag (FXmlParser.CurName, FXmlParser.CurAttr);
  2352. ptEndTag : WhenEndTag (FXmlParser.CurName);
  2353. ptContent : WhenContent (FXmlParser.CurContent);
  2354. ptCData : WhenCData (FXmlParser.CurContent);
  2355. END;
  2356. END;
  2357. END.