XmlParser.cs 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516
  1. // -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
  2. //
  3. // internal System.Xml.XmlParser
  4. //
  5. // Author:
  6. // Daniel Weber ([email protected])
  7. //
  8. // (C) 2001 Daniel Weber
  9. //
  10. using System;
  11. using System.Collections;
  12. namespace System.Xml
  13. {
  14. internal class XmlParser
  15. {
  16. // Private data members
  17. XmlDocument Fdoc;
  18. XmlInputSource Fsrc;
  19. // Elements push themselves on on start, pop on complete
  20. Stack RefNodes;
  21. // private classes
  22. private enum DomPieceType
  23. {
  24. xmlProcessingInstruction,
  25. xmlXmlDeclaration,
  26. xmlTextDeclaration,
  27. xmlComment,xmlCDATA,
  28. xmlPCDATA,
  29. xmlDoctype,
  30. xmlStartTag,
  31. xmlEndTag,
  32. xmlEmptyElementTag,
  33. xmlCharRef,
  34. xmlEntityRef,
  35. xmlParameterEntityRef,
  36. xmlEntityDecl,
  37. xmlElementDecl,
  38. xmlAttributeDecl,
  39. xmlNotationDecl,
  40. xmlCondSection,
  41. xmlUnknown,
  42. }
  43. // constants
  44. private const char CR = (char) 0x0D;
  45. private const char LF = (char) 0x0A;
  46. private const char QM = '?';
  47. private const char AMP = '&';
  48. private const char GTCODE = '<';
  49. private const char LTCODE = '>';
  50. private const char SINGLEQUOTE = (char) 0x39; // '
  51. private const char DOUBLEQUOTE = (char) 0x34; // "
  52. private const char NUMBERSIGN = (char) 0x35; // #
  53. private const char SLASH = (char) 0x47; // /
  54. private const string PISTART = "<?";
  55. private const string PIEND = "?>";
  56. private const string XMLDECLSTART = "<?xml";
  57. private const string COMMENTSTART = "<!--";
  58. private const string CDATASTART = "<![CDATA[";
  59. private const string DOCTYPESTART = "<!DOCTYPE";
  60. // private properties
  61. private XmlNode refNode
  62. {
  63. get
  64. {
  65. Object e = RefNodes.Peek();
  66. if ( e == null )
  67. return null;
  68. else
  69. return e as XmlNode;
  70. }
  71. }
  72. // public properties
  73. // public methods
  74. public bool parse()
  75. {
  76. bool retval = true;
  77. XmlException parseError = null;
  78. bool singleQuoteOpen = false;
  79. bool doubleQuoteOpen = false;
  80. bool bracketOpen = false;
  81. string content = "";
  82. DomPieceType pieceType = DomPieceType.xmlUnknown;
  83. string subEndMarker = "";
  84. string subStartMarker = "";
  85. try
  86. {
  87. while ( !Fsrc.atEOF() )
  88. {
  89. if ( parseError != null) break;
  90. char c = Fsrc.getNextChar();
  91. if ( !XmlNames_1_0.IsXmlChar(c) )
  92. {
  93. content += c;
  94. throw new XmlException("'Invalid character error.'", Fsrc);
  95. }
  96. switch (pieceType)
  97. {
  98. case DomPieceType.xmlUnknown:
  99. if ( c == GTCODE)
  100. pieceType = DomPieceType.xmlStartTag;
  101. else if ( c == AMP)
  102. pieceType = DomPieceType.xmlEntityRef;
  103. else
  104. pieceType = DomPieceType.xmlPCDATA;
  105. content += c;
  106. Fsrc.pieceStart();
  107. break;
  108. case DomPieceType.xmlPCDATA:
  109. if ( c == GTCODE )
  110. {
  111. parseError = writePCDATA(content);
  112. content = "";
  113. pieceType = DomPieceType.xmlStartTag;
  114. Fsrc.pieceStart();
  115. }
  116. else if ( c == AMP)
  117. {
  118. parseError = writePCDATA(content);
  119. content = "";
  120. pieceType = DomPieceType.xmlEntityRef;
  121. Fsrc.pieceStart();
  122. }
  123. content += c;
  124. break;
  125. case DomPieceType.xmlEntityRef:
  126. content += c;
  127. if ( c == ';' )
  128. {
  129. if ( content[2] == NUMBERSIGN )
  130. parseError = writeCharRef(content);
  131. else
  132. parseError = writeEntityRef(content);
  133. content = "";
  134. pieceType = DomPieceType.xmlUnknown;
  135. }
  136. break;
  137. case DomPieceType.xmlStartTag:
  138. content += c;
  139. switch( content.Length)
  140. {
  141. case 2:
  142. if (content.StartsWith(PISTART))
  143. pieceType = DomPieceType.xmlProcessingInstruction;
  144. break;
  145. case 4:
  146. if (content.StartsWith(COMMENTSTART))
  147. pieceType = DomPieceType.xmlComment;
  148. break;
  149. case 9:
  150. if (content.StartsWith(CDATASTART))
  151. pieceType = DomPieceType.xmlCDATA;
  152. else if (content.StartsWith(DOCTYPESTART))
  153. {
  154. pieceType = DomPieceType.xmlDoctype;
  155. subEndMarker = "";
  156. subStartMarker = "";
  157. bracketOpen = false;
  158. }
  159. break;
  160. }
  161. // Count quotation marks:
  162. if ((c == SINGLEQUOTE) && (! doubleQuoteOpen))
  163. singleQuoteOpen = ! singleQuoteOpen;
  164. else if ((c == DOUBLEQUOTE) && (! singleQuoteOpen))
  165. doubleQuoteOpen = ! doubleQuoteOpen;
  166. else if (c == LTCODE)
  167. {
  168. if ((! doubleQuoteOpen) && (! singleQuoteOpen))
  169. {
  170. if (content[2] == SLASH)
  171. {
  172. int l = content.Length;
  173. int offset = 3;
  174. // eliminate white-space after tag name:
  175. while ((l-offset > 0) && XmlNames_1_0.IsXmlWhiteSpace(content[l-offset+2]))
  176. offset++;
  177. parseError = writeEndElement(content.Substring(3, l-offset));
  178. }
  179. else
  180. {
  181. if (content[content.Length-1] == SLASH)
  182. parseError = writeEmptyElement(content.Substring(2, content.Length-3));
  183. else
  184. parseError = writeStartElement(content.Substring(2, content.Length-2));
  185. }
  186. content = "";
  187. pieceType = DomPieceType.xmlUnknown;
  188. }
  189. }
  190. break;
  191. //<?PINAME ?>
  192. case DomPieceType.xmlProcessingInstruction:
  193. content += c;
  194. if ( c == LTCODE )
  195. if (content[content.Length-1] == QM)
  196. {
  197. if ( (content.Length > 5) &&
  198. (XmlNames_1_0.IsXmlWhiteSpace(content[6])) &&
  199. (content.StartsWith(XMLDECLSTART)) )
  200. parseError = writeXmlDeclaration(content.Substring(3, content.Length-4));
  201. else
  202. parseError = writeProcessingInstruction(content.Substring(3, content.Length-4));
  203. content = "";
  204. pieceType = DomPieceType.xmlUnknown;
  205. }
  206. break;
  207. case DomPieceType.xmlComment:
  208. content += c;
  209. if (c == LTCODE)
  210. if ( (content.EndsWith("-->") ) && (content.Length > 6) )
  211. {
  212. parseError = writeComment(content.Substring(5, content.Length-7));
  213. content = "";
  214. pieceType = DomPieceType.xmlUnknown;
  215. }
  216. break;
  217. case DomPieceType.xmlCDATA:
  218. content += c;
  219. if (c == LTCODE )
  220. {
  221. if (content[content.Length-1] == ']' )
  222. if (content[content.Length-2] == ']')
  223. {
  224. parseError = writeCDATA(content.Substring(10, content.Length-12));
  225. content = "";
  226. pieceType = DomPieceType.xmlUnknown;
  227. }
  228. }
  229. break;
  230. case DomPieceType.xmlDoctype:
  231. content += c;
  232. if (subEndMarker == "")
  233. {
  234. if ( (c == SINGLEQUOTE) && (! doubleQuoteOpen))
  235. {
  236. singleQuoteOpen = !singleQuoteOpen;
  237. }
  238. else if ( (c == DOUBLEQUOTE) && (! singleQuoteOpen))
  239. {
  240. doubleQuoteOpen = ! doubleQuoteOpen;
  241. }
  242. if (bracketOpen)
  243. {
  244. if (! (singleQuoteOpen | doubleQuoteOpen) )
  245. {
  246. if (c == GTCODE)
  247. {
  248. subStartMarker = "<";
  249. }
  250. else if ( (c == '!') && (subStartMarker == "<"))
  251. {
  252. subStartMarker = "<";
  253. }
  254. else if ( (c == QM) && (subStartMarker == "<") )
  255. {
  256. subStartMarker = "";
  257. subEndMarker = PIEND;
  258. }
  259. else if ((c == '-') && (subStartMarker == "<!"))
  260. {
  261. subStartMarker = "<!-";
  262. }
  263. else if ((c == '-') && (subStartMarker == "<!-"))
  264. {
  265. subStartMarker = "";
  266. subEndMarker = "-->";
  267. }
  268. else if (subStartMarker != "")
  269. {
  270. subStartMarker = "";
  271. }
  272. if ((c == ']') && (! singleQuoteOpen) && (! doubleQuoteOpen))
  273. bracketOpen = false;
  274. }
  275. }
  276. else // if BracketOpened ...
  277. {
  278. if ((c == '[') && (! singleQuoteOpen) && (! doubleQuoteOpen))
  279. bracketOpen = true;
  280. }
  281. } //if BracketOpened ... else ...
  282. else // if (SubEndMarker = '') ...
  283. {
  284. if (content.EndsWith(subEndMarker))
  285. subEndMarker = "";
  286. } //if (SubEndMarker = '') ... else ...
  287. if ((! doubleQuoteOpen) && (! singleQuoteOpen) && (! bracketOpen) &&
  288. (subEndMarker == "") && (c == '>'))
  289. {
  290. parseError = writeDoctype(content);
  291. content = "";
  292. pieceType = DomPieceType.xmlUnknown;
  293. }
  294. break;
  295. } // switch
  296. } // while more characters
  297. if (parseError == null)
  298. if (content.Length > 0)
  299. parseError = writePCDATA(content);
  300. } // try
  301. catch
  302. {
  303. // we need to raise the exception again, converted to an XmlException
  304. /*
  305. except
  306. on EConvertError do raise EParserInvalidCharacter_Err.create('Invalid character error.');
  307. on EReadError do raise EParserInvalidCharacter_Err.create('Invalid character error.');
  308. end; {try ...}
  309. except
  310. on E: EParserInvalidCharacter_Err do
  311. parserError:= parserErrorFactory(sender,inputSource.Locator,
  312. EParserInvalidCharacter_Err.create('Invalid character error.'),
  313. '');
  314. */
  315. }
  316. if (parseError != null)
  317. {
  318. // Deal with the error, somehow
  319. retval = false;
  320. }
  321. return retval;
  322. }
  323. // private methods
  324. private XmlException writePCDATA(string content)
  325. {
  326. return null;
  327. }
  328. private XmlException writeEntityRef(string content)
  329. {
  330. string entityName = content.Substring(2, content.Length - 2);
  331. return null;
  332. }
  333. private XmlException writeCharRef( string content)
  334. {
  335. return null;
  336. }
  337. private XmlException writeEndElement( string content)
  338. {
  339. return null;
  340. }
  341. private XmlException writeEmptyElement( string content)
  342. {
  343. return null;
  344. }
  345. private XmlException writeStartElement( string content)
  346. {
  347. return null;
  348. }
  349. private XmlException writeComment( string content)
  350. {
  351. /*
  352. procedure TdomStandardIntSubsetBuilder.comment(const sender: TdomCustomParser;
  353. const locator: TdomStandardLocator;
  354. const data: wideString);
  355. var
  356. newComment: TdomCMComment;
  357. begin
  358. if not assigned(FRefNode) then exit;
  359. try
  360. newComment:= FRefNode.OwnerCMObject.CreateCMComment(data);
  361. try
  362. FRefNode.appendChild(newComment);
  363. except
  364. if assigned(newComment.ParentNode)
  365. then newComment.ParentNode.RemoveChild(newComment);
  366. FRefNode.OwnerCMObject.FreeAllCMNodes(TdomCMNode(newComment));
  367. raise;
  368. end; {try ...}
  369. except
  370. raise EParserInvalidCharacter_Err.create('Invalid character error.');
  371. end; {try ...}
  372. end;
  373. function TXmlDocBuilder.comment(const sender: TXmlCustomProcessorAgent;
  374. const locator: TdomStandardLocator;
  375. data: wideString): TXmlParserError;
  376. var
  377. newComment: TdomComment;
  378. begin
  379. if assigned(FOnComment) then FOnComment(sender,locator,data);
  380. result:= nil;
  381. if assigned(FRefNode) then begin
  382. try
  383. newComment:= FRefNode.OwnerDocument.CreateComment(data);
  384. try
  385. FRefNode.appendChild(newComment);
  386. except
  387. if assigned(newComment.ParentNode)
  388. then newComment.ParentNode.RemoveChild(newComment);
  389. FRefNode.OwnerDocument.FreeAllNodes(TdomNode(newComment));
  390. raise;
  391. end; {try ...}
  392. except
  393. result:= parserErrorFactory(sender,locator,
  394. EParserInvalidComment_Err.create('Invalid comment error.'),
  395. data);
  396. end; {try ...}
  397. end; {if assigned(FRefNode) ...}
  398. if not assigned(result)
  399. then if assigned(nextHandler)
  400. then result:= nextHandler.comment(sender,locator,data);
  401. end;
  402. function TXmlWFTestContentHandler.comment(const sender: TXmlCustomProcessorAgent;
  403. const locator: TdomStandardLocator;
  404. data: wideString): TXmlParserError;
  405. var
  406. dataLength: integer;
  407. begin
  408. if assigned(FOnComment) then FOnComment(sender,locator,data);
  409. if not FIsActive
  410. then raise EParserException.Create('TXmlWFTestContentHandler not active.');
  411. result:= nil;
  412. FXMLDeclarationAllowed:= false;
  413. if pos('--',data) > 0
  414. then result:= parserErrorFactory(sender,locator,
  415. EParserInvalidComment_Err.create('Invalid comment error.'),
  416. '--');
  417. dataLength:= length(data);
  418. if dataLength > 0
  419. then if WideChar(data[dataLength]) = '-'
  420. then if not assigned(result)
  421. then result:= parserErrorFactory(sender,locator,
  422. EParserInvalidComment_Err.create('Invalid comment error.'),
  423. '-');
  424. if not IsXmlChars(data)
  425. then if not assigned(result)
  426. then result:= parserErrorFactory(sender,locator,
  427. EParserInvalidCharacter_Err.create('Invalid character error.'),
  428. data);
  429. if not assigned(result)
  430. then if assigned(nextHandler)
  431. then result:= nextHandler.comment(sender,locator,data);
  432. end;
  433. */
  434. return null;
  435. }
  436. private XmlException writeXmlDeclaration ( string content)
  437. {
  438. return null;
  439. }
  440. private XmlException writeProcessingInstruction( string content)
  441. {
  442. return null;
  443. }
  444. private XmlException writeCDATA( string content)
  445. {
  446. return null;
  447. }
  448. private XmlException writeDoctype( string content)
  449. {
  450. return null;
  451. }
  452. private void mainLoop()
  453. {
  454. }
  455. // Constructors
  456. public XmlParser ( XmlInputSource src, XmlDocument doc )
  457. {
  458. Fsrc = src;
  459. Fdoc = doc;
  460. RefNodes = new Stack();
  461. }
  462. }
  463. }