XmlTextReader.cs 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797
  1. // -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
  2. //
  3. // System.Xml.XmlTextReader.cs
  4. //
  5. // Author:
  6. // Jason Diamond ([email protected])
  7. //
  8. // (C) 2001 Jason Diamond http://injektilo.org/
  9. //
  10. // FIXME:
  11. // This can only parse basic XML: elements, attributes, processing
  12. // instructions, and comments are OK but there's no support for
  13. // entity/character references or namespaces yet.
  14. //
  15. // It barfs on DOCTYPE declarations and CDATA sections.
  16. //
  17. // There's also no checking being done for either well-formedness
  18. // or validity.
  19. //
  20. // ParserContext and NameTables aren't being used yet.
  21. //
  22. // The XmlTextReader-specific properties and methods have yet to
  23. // be added or implemented.
  24. //
  25. // Some thought needs to be given to performance. There's too many
  26. // strings and string builders being allocated.
  27. //
  28. // None of the MoveTo methods have been implemented yet.
  29. //
  30. // LineNumber and LinePosition aren't being tracked.
  31. //
  32. // xml:space, xml:lang, and xml:base aren't being tracked.
  33. //
  34. // Depth isn't being tracked.
  35. using System;
  36. using System.Collections;
  37. using System.IO;
  38. using System.Net;
  39. using System.Text;
  40. namespace System.Xml
  41. {
  42. public class XmlTextReader : XmlReader
  43. {
  44. // constructors
  45. protected XmlTextReader()
  46. {
  47. Init();
  48. }
  49. public XmlTextReader(Stream input)
  50. {
  51. Init();
  52. reader = new StreamReader(
  53. input,
  54. Encoding.UTF8,
  55. true);
  56. }
  57. public XmlTextReader(string url)
  58. {
  59. Init();
  60. WebClient client = new WebClient();
  61. reader = new StreamReader(
  62. client.OpenRead(url),
  63. Encoding.UTF8,
  64. true);
  65. }
  66. public XmlTextReader(TextReader input)
  67. {
  68. Init();
  69. reader = input;
  70. }
  71. public XmlTextReader(Stream input, XmlNameTable nameTable)
  72. {
  73. // TODO: implement me.
  74. throw new NotImplementedException();
  75. }
  76. public XmlTextReader(string baseURI, Stream input)
  77. {
  78. // TODO: implement me.
  79. throw new NotImplementedException();
  80. }
  81. public XmlTextReader(string baseURI, TextReader input)
  82. {
  83. // TODO: implement me.
  84. throw new NotImplementedException();
  85. }
  86. public XmlTextReader(string url, XmlNameTable nameTable)
  87. {
  88. // TODO: implement me.
  89. throw new NotImplementedException();
  90. }
  91. public XmlTextReader(
  92. TextReader input,
  93. XmlNameTable nameTable)
  94. {
  95. // TODO: implement me.
  96. throw new NotImplementedException();
  97. }
  98. public XmlTextReader(
  99. Stream inputFragment,
  100. XmlNodeType fragmentType,
  101. XmlParserContext context)
  102. {
  103. // TODO: implement me.
  104. throw new NotImplementedException();
  105. }
  106. public XmlTextReader(
  107. string baseURI,
  108. Stream input,
  109. XmlNameTable nameTable)
  110. {
  111. // TODO: implement me.
  112. throw new NotImplementedException();
  113. }
  114. public XmlTextReader(
  115. string baseURI,
  116. TextReader input,
  117. XmlNameTable nameTable)
  118. {
  119. // TODO: implement me.
  120. throw new NotImplementedException();
  121. }
  122. public XmlTextReader(
  123. string fragment,
  124. XmlNodeType fragmentType,
  125. XmlParserContext context)
  126. {
  127. // TODO: implement me.
  128. throw new NotImplementedException();
  129. }
  130. // properties
  131. public override int AttributeCount
  132. {
  133. get
  134. {
  135. return attributes.Count;
  136. }
  137. }
  138. public override string BaseURI
  139. {
  140. get
  141. {
  142. // TODO: implement me.
  143. return null;
  144. }
  145. }
  146. public override bool CanResolveEntity
  147. {
  148. get
  149. {
  150. // TODO: implement me.
  151. return false;
  152. }
  153. }
  154. public override int Depth
  155. {
  156. get
  157. {
  158. // TODO: implement me.
  159. return 0;
  160. }
  161. }
  162. public override bool EOF
  163. {
  164. get
  165. {
  166. return
  167. readState == ReadState.EndOfFile ||
  168. readState == ReadState.Closed;
  169. }
  170. }
  171. public override bool HasValue
  172. {
  173. get
  174. {
  175. return value != String.Empty;
  176. }
  177. }
  178. public override bool IsDefault
  179. {
  180. get
  181. {
  182. // TODO: implement me.
  183. return false;
  184. }
  185. }
  186. public override bool IsEmptyElement
  187. {
  188. get
  189. {
  190. return isEmptyElement;
  191. }
  192. }
  193. public override string this[int i]
  194. {
  195. get
  196. {
  197. return GetAttribute(i);
  198. }
  199. }
  200. public override string this[string name]
  201. {
  202. get
  203. {
  204. return GetAttribute(name);
  205. }
  206. }
  207. public override string this[
  208. string localName,
  209. string namespaceName]
  210. {
  211. get
  212. {
  213. return GetAttribute(localName, namespaceName);
  214. }
  215. }
  216. public override string LocalName
  217. {
  218. get
  219. {
  220. // TODO: implement me.
  221. return null;
  222. }
  223. }
  224. public override string Name
  225. {
  226. get
  227. {
  228. return name;
  229. }
  230. }
  231. public override string NamespaceURI
  232. {
  233. get
  234. {
  235. // TODO: implement me.
  236. return null;
  237. }
  238. }
  239. public override XmlNameTable NameTable
  240. {
  241. get
  242. {
  243. // TODO: implement me.
  244. return null;
  245. }
  246. }
  247. public override XmlNodeType NodeType
  248. {
  249. get
  250. {
  251. return nodeType;
  252. }
  253. }
  254. public override string Prefix
  255. {
  256. get
  257. {
  258. // TODO: implement me.
  259. return null;
  260. }
  261. }
  262. public override char QuoteChar
  263. {
  264. get
  265. {
  266. // TODO: implement me.
  267. return '"';
  268. }
  269. }
  270. public override ReadState ReadState
  271. {
  272. get
  273. {
  274. return readState;
  275. }
  276. }
  277. public override string Value
  278. {
  279. get
  280. {
  281. return value;
  282. }
  283. }
  284. public override string XmlLang
  285. {
  286. get
  287. {
  288. // TODO: implement me.
  289. return null;
  290. }
  291. }
  292. public override XmlSpace XmlSpace
  293. {
  294. get
  295. {
  296. // TODO: implement me.
  297. return XmlSpace.Default;
  298. }
  299. }
  300. // methods
  301. public override void Close()
  302. {
  303. readState = ReadState.Closed;
  304. }
  305. public override string GetAttribute(int i)
  306. {
  307. // TODO: implement me.
  308. return null;
  309. }
  310. public override string GetAttribute(string name)
  311. {
  312. return (string)attributes[name];
  313. }
  314. public override string GetAttribute(
  315. string localName,
  316. string namespaceName)
  317. {
  318. // TODO: implement me.
  319. return null;
  320. }
  321. public override string LookupNamespace(string prefix)
  322. {
  323. // TODO: implement me.
  324. return null;
  325. }
  326. public override void MoveToAttribute(int i)
  327. {
  328. // TODO: implement me.
  329. }
  330. public override bool MoveToAttribute(string name)
  331. {
  332. // TODO: implement me.
  333. return false;
  334. }
  335. public override bool MoveToAttribute(
  336. string localName,
  337. string namespaceName)
  338. {
  339. // TODO: implement me.
  340. return false;
  341. }
  342. public override bool MoveToElement()
  343. {
  344. // TODO: implement me.
  345. return false;
  346. }
  347. public override bool MoveToFirstAttribute()
  348. {
  349. // TODO: implement me.
  350. return false;
  351. }
  352. public override bool MoveToNextAttribute()
  353. {
  354. // TODO: implement me.
  355. return false;
  356. }
  357. public override bool Read()
  358. {
  359. bool more = false;
  360. readState = ReadState.Interactive;
  361. more = ReadContent();
  362. return more;
  363. }
  364. public override bool ReadAttributeValue()
  365. {
  366. // TODO: implement me.
  367. return false;
  368. }
  369. public override string ReadInnerXml()
  370. {
  371. // TODO: implement me.
  372. return null;
  373. }
  374. public override string ReadOuterXml()
  375. {
  376. // TODO: implement me.
  377. return null;
  378. }
  379. public override string ReadString()
  380. {
  381. // TODO: implement me.
  382. return null;
  383. }
  384. public override void ResolveEntity()
  385. {
  386. // TODO: implement me.
  387. }
  388. // privates
  389. private TextReader reader;
  390. private ReadState readState;
  391. private XmlNodeType nodeType;
  392. private string name;
  393. private bool isEmptyElement;
  394. private string value;
  395. private Hashtable attributes;
  396. private void Init()
  397. {
  398. readState = ReadState.Initial;
  399. nodeType = XmlNodeType.None;
  400. name = String.Empty;
  401. isEmptyElement = false;
  402. value = String.Empty;
  403. attributes = new Hashtable();
  404. }
  405. // Use this method rather than setting the properties
  406. // directly so that all the necessary properties can
  407. // be changed in harmony with each other. Maybe the
  408. // fields should be in a seperate class to help enforce
  409. // this.
  410. private void SetProperties(
  411. XmlNodeType nodeType,
  412. string name,
  413. bool isEmptyElement,
  414. string value,
  415. bool clearAttributes)
  416. {
  417. this.nodeType = nodeType;
  418. this.name = name;
  419. this.isEmptyElement = isEmptyElement;
  420. this.value = value;
  421. if (clearAttributes)
  422. {
  423. ClearAttributes();
  424. }
  425. }
  426. private void AddAttribute(string name, string value)
  427. {
  428. attributes.Add(name, value);
  429. }
  430. private void ClearAttributes()
  431. {
  432. attributes.Clear();
  433. }
  434. // This should really keep track of some state so
  435. // that it's not possible to have more than one document
  436. // element or text outside of the document element.
  437. private bool ReadContent()
  438. {
  439. bool more = false;
  440. switch (reader.Peek())
  441. {
  442. case '<':
  443. reader.Read();
  444. ReadTag();
  445. more = true;
  446. break;
  447. case -1:
  448. readState = ReadState.EndOfFile;
  449. SetProperties(
  450. XmlNodeType.None, // nodeType
  451. String.Empty, // name
  452. false, // isEmptyElement
  453. String.Empty, // value
  454. true // clearAttributes
  455. );
  456. more = false;
  457. break;
  458. default:
  459. ReadText();
  460. more = true;
  461. break;
  462. }
  463. return more;
  464. }
  465. // The leading '<' has already been consumed.
  466. private void ReadTag()
  467. {
  468. switch (reader.Peek())
  469. {
  470. case '/':
  471. reader.Read();
  472. ReadEndTag();
  473. break;
  474. case '?':
  475. reader.Read();
  476. ReadProcessingInstruction();
  477. break;
  478. case '!':
  479. reader.Read();
  480. ReadComment();
  481. break;
  482. default:
  483. ReadStartTag();
  484. break;
  485. }
  486. }
  487. // The leading '<' has already been consumed.
  488. private void ReadStartTag()
  489. {
  490. string name = ReadName();
  491. SkipWhitespace();
  492. bool isEmptyElement = false;
  493. ClearAttributes();
  494. if (XmlChar.IsFirstNameChar(reader.Peek()))
  495. {
  496. ReadAttributes();
  497. }
  498. if (reader.Peek() == '/')
  499. {
  500. reader.Read();
  501. isEmptyElement = true;
  502. }
  503. Expect('>');
  504. SetProperties(
  505. XmlNodeType.Element, // nodeType
  506. name, // name
  507. isEmptyElement, // isEmptyElement
  508. String.Empty, // value
  509. false // clearAttributes
  510. );
  511. }
  512. // The reader is positioned on the first character
  513. // of the element's name.
  514. private void ReadEndTag()
  515. {
  516. string name = ReadName();
  517. SkipWhitespace();
  518. Expect('>');
  519. SetProperties(
  520. XmlNodeType.EndElement, // nodeType
  521. name, // name
  522. false, // isEmptyElement
  523. String.Empty, // value
  524. true // clearAttributes
  525. );
  526. }
  527. // The reader is positioned on the first character
  528. // of the text.
  529. private void ReadText()
  530. {
  531. StringBuilder text = new StringBuilder();
  532. text.Append((char)reader.Read());
  533. while (reader.Peek() != '<' && reader.Peek() != -1)
  534. {
  535. text.Append((char)reader.Read());
  536. }
  537. SetProperties(
  538. XmlNodeType.Text, // nodeType
  539. String.Empty, // name
  540. false, // isEmptyElement
  541. text.ToString(), // value
  542. true // clearAttributes
  543. );
  544. }
  545. // The reader is positioned on the first character of
  546. // the attribute name.
  547. private void ReadAttributes()
  548. {
  549. do
  550. {
  551. string name = ReadName();
  552. SkipWhitespace();
  553. Expect('=');
  554. SkipWhitespace();
  555. string value = ReadAttribute();
  556. SkipWhitespace();
  557. AddAttribute(name, value);
  558. }
  559. while (reader.Peek() != '/' && reader.Peek() != '>' && reader.Peek() != -1);
  560. }
  561. // The reader is positioned on the quote character.
  562. private string ReadAttribute()
  563. {
  564. int quoteChar = reader.Read();
  565. if (quoteChar != '\'' && quoteChar != '\"')
  566. {
  567. throw new Exception("an attribute value was not quoted");
  568. }
  569. StringBuilder valueBuilder = new StringBuilder();
  570. while (reader.Peek() != quoteChar)
  571. {
  572. int ch = reader.Read();
  573. switch (ch)
  574. {
  575. case '<':
  576. throw new Exception("attribute values cannot contain '<'");
  577. case -1:
  578. throw new Exception("unexpected end of file in an attribute value");
  579. }
  580. valueBuilder.Append((char)ch);
  581. }
  582. reader.Read();
  583. return valueBuilder.ToString();
  584. }
  585. // The reader is positioned on the first character
  586. // of the target.
  587. private void ReadProcessingInstruction()
  588. {
  589. string target = ReadName();
  590. SkipWhitespace();
  591. StringBuilder valueBuilder = new StringBuilder();
  592. while (reader.Peek() != -1)
  593. {
  594. int ch = reader.Read();
  595. if (ch == '?' && reader.Peek() == '>')
  596. {
  597. reader.Read();
  598. break;
  599. }
  600. valueBuilder.Append((char)ch);
  601. }
  602. SetProperties(
  603. XmlNodeType.ProcessingInstruction, // nodeType
  604. target, // name
  605. false, // isEmptyElement
  606. valueBuilder.ToString(), // value
  607. true // clearAttributes
  608. );
  609. }
  610. // The reader is positioned on the first character after
  611. // the leading '<!'.
  612. private void ReadComment()
  613. {
  614. Expect('-');
  615. Expect('-');
  616. StringBuilder valueBuilder = new StringBuilder();
  617. while (reader.Peek() != -1)
  618. {
  619. int ch = reader.Read();
  620. if (ch == '-' && reader.Peek() == '-')
  621. {
  622. reader.Read();
  623. if (reader.Peek() != '>')
  624. {
  625. throw new Exception("comments cannot contain '--'");
  626. }
  627. reader.Read();
  628. break;
  629. }
  630. valueBuilder.Append((char)ch);
  631. }
  632. SetProperties(
  633. XmlNodeType.Comment, // nodeType
  634. String.Empty, // name
  635. false, // isEmptyElement
  636. valueBuilder.ToString(), // value
  637. true // clearAttributes
  638. );
  639. }
  640. // The reader is positioned on the first character
  641. // of the name.
  642. private string ReadName()
  643. {
  644. if (!XmlChar.IsFirstNameChar(reader.Peek()))
  645. {
  646. throw new Exception("a name did not start with a legal character");
  647. }
  648. StringBuilder nameBuilder = new StringBuilder();
  649. nameBuilder.Append((char)reader.Read());
  650. while (XmlChar.IsNameChar(reader.Peek()))
  651. {
  652. nameBuilder.Append((char)reader.Read());
  653. }
  654. return nameBuilder.ToString();
  655. }
  656. // Read the next character and compare it against the
  657. // specified character.
  658. private void Expect(int expected)
  659. {
  660. int ch = reader.Read();
  661. if (ch != expected)
  662. {
  663. throw new Exception(String.Format(
  664. "expected '{0}' ({1:X}) but found '{2}' ({3:X})",
  665. (char)expected,
  666. expected,
  667. (char)ch,
  668. ch));
  669. }
  670. }
  671. // Does not consume the first non-whitespace character.
  672. private void SkipWhitespace()
  673. {
  674. while (XmlChar.IsWhitespace(reader.Peek()))
  675. {
  676. reader.Read();
  677. }
  678. }
  679. }
  680. }