DTDValidatingReader.cs 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733
  1. using System;
  2. using System.Collections.Specialized;
  3. using System.Collections;
  4. using System.Text;
  5. using System.Xml;
  6. using System.Xml.Schema;
  7. namespace Mono.Xml
  8. {
  9. public class DTDValidatingReader : /*XmlValidatingReader*/XmlReader, IXmlLineInfo
  10. {
  11. public DTDValidatingReader (XmlReader reader)
  12. : this (reader, null)
  13. {
  14. }
  15. public DTDValidatingReader (XmlReader reader,
  16. XmlValidatingReader validatingReader)
  17. // : base (reader)
  18. {
  19. this.reader = reader;
  20. this.sourceTextReader = reader as XmlTextReader;
  21. elementStack = new Stack ();
  22. automataStack = new Stack ();
  23. attributes = new StringCollection ();
  24. attributeValues = new NameValueCollection ();
  25. this.validatingReader = validatingReader;
  26. valueBuilder = new StringBuilder ();
  27. idList = new ArrayList ();
  28. missingIDReferences = new ArrayList ();
  29. }
  30. XmlReader reader;
  31. XmlTextReader sourceTextReader;
  32. DTDObjectModel dtd;
  33. Stack elementStack;
  34. Stack automataStack;
  35. string currentElement;
  36. string currentAttribute;
  37. bool consumedAttribute;
  38. bool insideContent;
  39. // bool insideAttributeValue;
  40. DTDAutomata currentAutomata;
  41. DTDAutomata previousAutomata;
  42. bool isStandalone;
  43. StringCollection attributes;
  44. NameValueCollection attributeValues;
  45. StringBuilder valueBuilder;
  46. ArrayList idList;
  47. ArrayList missingIDReferences;
  48. XmlValidatingReader validatingReader;
  49. // ValidationEventHandler handler;
  50. public DTDObjectModel DTD {
  51. get { return dtd; }
  52. }
  53. public override void Close ()
  54. {
  55. reader.Close ();
  56. }
  57. // We had already done attribute validation, so can ignore name.
  58. public override string GetAttribute (int i)
  59. {
  60. if (dtd == null)
  61. return reader.GetAttribute (i);
  62. if (attributes.Count <= i)
  63. throw new IndexOutOfRangeException ("Specified index is out of range: " + i);
  64. return FilterNormalization (attributeValues [i]);
  65. }
  66. public override string GetAttribute (string name)
  67. {
  68. if (dtd == null)
  69. return reader.GetAttribute (name);
  70. return FilterNormalization (attributeValues [name]);
  71. }
  72. public override string GetAttribute (string name, string ns)
  73. {
  74. if (dtd == null)
  75. return reader.GetAttribute (name, ns);
  76. // FIXME: check whether this way is correct.
  77. if (ns == String.Empty)
  78. return GetAttribute (name);
  79. else
  80. return FilterNormalization (reader.GetAttribute (name, ns));
  81. }
  82. bool IXmlLineInfo.HasLineInfo ()
  83. {
  84. IXmlLineInfo ixli = reader as IXmlLineInfo;
  85. if (ixli != null)
  86. return ixli.HasLineInfo ();
  87. else
  88. return false;
  89. }
  90. public override string LookupNamespace (string prefix)
  91. {
  92. // Does it mean anything with DTD?
  93. return reader.LookupNamespace (prefix);
  94. }
  95. public override void MoveToAttribute (int i)
  96. {
  97. if (dtd == null) {
  98. reader.MoveToAttribute (i);
  99. currentAttribute = reader.Name;
  100. consumedAttribute = false;
  101. return;
  102. }
  103. if (currentElement == null)
  104. return;
  105. if (attributes.Count > i) {
  106. currentAttribute = attributes [i];
  107. consumedAttribute = false;
  108. return;
  109. } else
  110. throw new IndexOutOfRangeException ("The index is out of range.");
  111. }
  112. public override bool MoveToAttribute (string name)
  113. {
  114. if (dtd == null) {
  115. bool b = reader.MoveToAttribute (name);
  116. if (b) {
  117. currentAttribute = reader.Name;
  118. consumedAttribute = false;
  119. }
  120. return b;
  121. }
  122. if (currentElement == null)
  123. return false;
  124. int idx = attributes.IndexOf (name);
  125. if (idx >= 0) {
  126. currentAttribute = name;
  127. consumedAttribute = false;
  128. return true;
  129. }
  130. return false;
  131. }
  132. public override bool MoveToAttribute (string name, string ns)
  133. {
  134. if (dtd == null) {
  135. bool b = reader.MoveToAttribute (name, ns);
  136. if (b) {
  137. currentAttribute = reader.Name;
  138. consumedAttribute = false;
  139. }
  140. return b;
  141. }
  142. if (reader.MoveToAttribute (name, ns)) {
  143. currentAttribute = reader.Name;
  144. consumedAttribute = false;
  145. return true;
  146. }
  147. if (ns != String.Empty)
  148. throw new InvalidOperationException ("DTD validating reader does not support namespace.");
  149. return MoveToAttribute (name);
  150. }
  151. public override bool MoveToElement ()
  152. {
  153. bool b = reader.MoveToElement ();
  154. if (!b)
  155. return false;
  156. currentAttribute = null;
  157. consumedAttribute = false;
  158. return true;
  159. }
  160. public override bool MoveToFirstAttribute ()
  161. {
  162. if (dtd == null) {
  163. bool b = reader.MoveToFirstAttribute ();
  164. if (b) {
  165. currentAttribute = reader.Name;
  166. consumedAttribute = false;
  167. }
  168. return b;
  169. }
  170. // It should access attributes by *defined* order.
  171. if (NodeType != XmlNodeType.Element)
  172. return false;
  173. if (attributes.Count == 0)
  174. return false;
  175. reader.MoveToFirstAttribute ();
  176. currentAttribute = attributes [0];
  177. consumedAttribute = false;
  178. return true;
  179. }
  180. public override bool MoveToNextAttribute ()
  181. {
  182. if (dtd == null) {
  183. bool b = reader.MoveToNextAttribute ();
  184. if (b) {
  185. currentAttribute = reader.Name;
  186. consumedAttribute = false;
  187. }
  188. return b;
  189. }
  190. if (currentAttribute == null)
  191. return MoveToFirstAttribute ();
  192. int idx = attributes.IndexOf (currentAttribute);
  193. if (idx + 1 < attributes.Count) {
  194. reader.MoveToNextAttribute ();
  195. currentAttribute = attributes [idx + 1];
  196. consumedAttribute = false;
  197. return true;
  198. } else
  199. return false;
  200. }
  201. [MonoTODO]
  202. public override bool Read ()
  203. {
  204. MoveToElement ();
  205. bool b = reader.Read ();
  206. currentElement = null;
  207. currentAttribute = null;
  208. consumedAttribute = false;
  209. attributes.Clear ();
  210. attributeValues.Clear ();
  211. if (!insideContent && reader.NodeType == XmlNodeType.Element) {
  212. insideContent = true;
  213. if (dtd == null)
  214. currentAutomata = null;
  215. else
  216. currentAutomata = dtd.RootAutomata;
  217. }
  218. if (!b) {
  219. if (elementStack.Count != 0)
  220. throw new InvalidOperationException ("Unexpected end of XmlReader.");
  221. return false;
  222. }
  223. switch (reader.NodeType) {
  224. case XmlNodeType.XmlDeclaration:
  225. if (GetAttribute ("standalone") == "yes")
  226. isStandalone = true;
  227. break;
  228. case XmlNodeType.DocumentType:
  229. XmlTextReader xmlTextReader = reader as XmlTextReader;
  230. if (xmlTextReader == null) {
  231. xmlTextReader = new XmlTextReader ("", XmlNodeType.Document, null);
  232. xmlTextReader.GenerateDTDObjectModel (reader.Name,
  233. reader ["PUBLIC"], reader ["SYSTEM"], reader.Value);
  234. }
  235. this.dtd = xmlTextReader.DTD;
  236. break;
  237. case XmlNodeType.Element: // startElementDeriv
  238. // If no schema specification, then skip validation.
  239. if (currentAutomata == null) {
  240. SetupValidityIgnorantAttributes ();
  241. break;
  242. }
  243. previousAutomata = currentAutomata;
  244. currentAutomata = currentAutomata.TryStartElement (reader.Name);
  245. if (currentAutomata == DTD.Invalid) {
  246. HandleError (String.Format ("Invalid start element found: {0}", reader.Name),
  247. XmlSeverityType.Error);
  248. // FIXME: validation recovery code here.
  249. currentAutomata = previousAutomata;
  250. }
  251. DTDElementDeclaration decl = DTD.ElementDecls [reader.Name];
  252. if (decl == null) {
  253. HandleError (String.Format ("Element {0} is not declared.", reader.Name),
  254. XmlSeverityType.Error);
  255. // FIXME: validation recovery code here.
  256. currentAutomata = previousAutomata;
  257. }
  258. currentElement = Name;
  259. elementStack.Push (reader.Name);
  260. automataStack.Push (currentAutomata);
  261. if (decl != null) { // i.e. not invalid
  262. currentAutomata = decl.ContentModel.GetAutomata ();
  263. // check attributes
  264. if (decl.Attributes == null) {
  265. if (reader.HasAttributes) {
  266. HandleError (String.Format ("Attributes are found on element {0} while it has no attribute definitions.",decl.Name),
  267. XmlSeverityType.Error);
  268. // FIXME: validation recovery code here.
  269. }
  270. }
  271. else
  272. ValidateAttributes (decl);
  273. } else
  274. SetupValidityIgnorantAttributes ();
  275. // If it is empty element then directly check end element.
  276. if (reader.IsEmptyElement)
  277. goto case XmlNodeType.EndElement;
  278. break;
  279. case XmlNodeType.EndElement: // endElementDeriv
  280. // If no schema specification, then skip validation.
  281. if (currentAutomata == null)
  282. break;
  283. decl = DTD.ElementDecls [reader.Name];
  284. if (decl == null) {
  285. HandleError (String.Format ("Element {0} is not declared.", reader.Name),
  286. XmlSeverityType.Error);
  287. // FIXME: validation recovery code here.
  288. }
  289. previousAutomata = currentAutomata;
  290. // Don't let currentAutomata
  291. DTDAutomata tmpAutomata = currentAutomata.TryEndElement ();
  292. if (tmpAutomata == DTD.Invalid) {
  293. HandleError (String.Format ("Invalid end element found: {0}", reader.Name),
  294. XmlSeverityType.Error);
  295. // FIXME: validation recovery code here.
  296. currentAutomata = previousAutomata;
  297. }
  298. elementStack.Pop ();
  299. currentAutomata = automataStack.Pop () as DTDAutomata;
  300. break;
  301. case XmlNodeType.CDATA:
  302. case XmlNodeType.SignificantWhitespace:
  303. case XmlNodeType.Text:
  304. // If no schema specification, then skip validation.
  305. if (currentAutomata == null)
  306. break;
  307. DTDElementDeclaration elem = dtd.ElementDecls [elementStack.Peek () as string];
  308. if (!elem.IsMixedContent) {
  309. HandleError (String.Format ("Current element {0} does not allow character data content.", elementStack.Peek () as string),
  310. XmlSeverityType.Error);
  311. // FIXME: validation recovery code here.
  312. currentAutomata = previousAutomata;
  313. }
  314. break;
  315. }
  316. return true;
  317. }
  318. private void SetupValidityIgnorantAttributes ()
  319. {
  320. if (reader.MoveToFirstAttribute ()) {
  321. // If it was invalid, simply add specified attributes.
  322. do {
  323. attributes.Add (reader.Name);
  324. attributeValues.Add (reader.Name, reader.Value);
  325. } while (reader.MoveToNextAttribute ());
  326. reader.MoveToElement ();
  327. }
  328. }
  329. private void HandleError (string message, XmlSeverityType severity)
  330. {
  331. if (validatingReader != null &&
  332. validatingReader.ValidationType == ValidationType.None)
  333. return;
  334. IXmlLineInfo info = this as IXmlLineInfo;
  335. bool hasLine = info.HasLineInfo ();
  336. XmlSchemaException ex = new XmlSchemaException (
  337. message,
  338. hasLine ? info.LineNumber : 0,
  339. hasLine ? info.LinePosition : 0,
  340. null,
  341. BaseURI,
  342. null);
  343. if (validatingReader != null)
  344. this.validatingReader.OnValidationEvent (this,
  345. new ValidationEventArgs (ex, message, severity));
  346. else
  347. throw ex;
  348. }
  349. private void ValidateAttributes (DTDElementDeclaration decl)
  350. {
  351. while (reader.MoveToNextAttribute ()) {
  352. string attrName = reader.Name;
  353. attributes.Add (attrName);
  354. bool hasError = false;
  355. while (reader.ReadAttributeValue ()) {
  356. if (reader.NodeType == XmlNodeType.EntityReference) {
  357. DTDEntityDeclaration edecl = DTD.EntityDecls [reader.Name];
  358. if (edecl == null) {
  359. HandleError (String.Format ("Referenced entity {0} is not declared.", reader.Name),
  360. XmlSeverityType.Error);
  361. hasError = true;
  362. }
  363. else
  364. valueBuilder.Append (edecl.EntityValue);
  365. }
  366. else
  367. valueBuilder.Append (reader.Value);
  368. }
  369. reader.MoveToElement ();
  370. reader.MoveToAttribute (attrName);
  371. string attrValue = valueBuilder.ToString ();
  372. valueBuilder.Length = 0;
  373. attributeValues.Add (attrName, attrValue);
  374. DTDAttributeDefinition def = decl.Attributes [reader.Name];
  375. if (def == null) {
  376. HandleError (String.Format ("Attribute {0} is not declared.", reader.Name),
  377. XmlSeverityType.Error);
  378. // FIXME: validation recovery code here.
  379. } else {
  380. // check identity constraint
  381. switch (def.Datatype.TokenizedType) {
  382. case XmlTokenizedType.ID:
  383. if (this.idList.Contains (attrValue)) {
  384. HandleError (String.Format ("Node with ID {0} was already appeared.", attrValue),
  385. XmlSeverityType.Error);
  386. // FIXME: validation recovery code here.
  387. } else {
  388. if (missingIDReferences.Contains (attrValue))
  389. missingIDReferences.Remove (attrValue);
  390. idList.Add (attrValue);
  391. }
  392. break;
  393. case XmlTokenizedType.IDREF:
  394. if (!idList.Contains (attrValue))
  395. missingIDReferences.Add (attrValue);
  396. break;
  397. case XmlTokenizedType.IDREFS:
  398. string [] idrefs = def.Datatype.ParseValue (attrValue, NameTable, null) as string [];
  399. foreach (string idref in idrefs)
  400. if (!idList.Contains (attrValue))
  401. missingIDReferences.Add (attrValue);
  402. break;
  403. }
  404. switch (def.OccurenceType) {
  405. case DTDAttributeOccurenceType.Required:
  406. if (attrValue == String.Empty) {
  407. HandleError (String.Format ("Required attribute {0} in element {1} not found .",
  408. def.Name, decl.Name),
  409. XmlSeverityType.Error);
  410. // FIXME: validation recovery code here.
  411. }
  412. break;
  413. case DTDAttributeOccurenceType.Fixed:
  414. if (attrValue != def.DefaultValue) {
  415. HandleError (String.Format ("Fixed attribute {0} in element {1} has invalid value {2}.",
  416. def.Name, decl.Name, attrValue),
  417. XmlSeverityType.Error);
  418. // FIXME: validation recovery code here.
  419. }
  420. break;
  421. }
  422. }
  423. }
  424. // Check if all required attributes exist, and/or
  425. // if there is default values, then add them.
  426. foreach (DTDAttributeDefinition def in decl.Attributes.Definitions)
  427. if (!attributes.Contains (def.Name)) {
  428. if (def.OccurenceType == DTDAttributeOccurenceType.Required) {
  429. HandleError (String.Format ("Required attribute {0} was not found.", decl.Name),
  430. XmlSeverityType.Error);
  431. // FIXME: validation recovery code here.
  432. }
  433. else if (def.DefaultValue != null) {
  434. attributes.Add (def.Name);
  435. attributeValues.Add (def.Name, def.DefaultValue);
  436. }
  437. }
  438. reader.MoveToElement ();
  439. }
  440. public override bool ReadAttributeValue ()
  441. {
  442. if (consumedAttribute)
  443. return false;
  444. if (NodeType == XmlNodeType.Attribute &&
  445. validatingReader.EntityHandling == EntityHandling.ExpandEntities) {
  446. consumedAttribute = true;
  447. return true;
  448. }
  449. else
  450. return reader.ReadAttributeValue ();
  451. }
  452. public override string ReadInnerXml ()
  453. {
  454. // MS.NET 1.0 has a serious bug here. It skips validation.
  455. return reader.ReadInnerXml ();
  456. }
  457. public override string ReadOuterXml ()
  458. {
  459. // MS.NET 1.0 has a serious bug here. It skips validation.
  460. return reader.ReadOuterXml ();
  461. }
  462. public override string ReadString ()
  463. {
  464. // It seems to be the same as ReadInnerXml().
  465. return reader.ReadString ();
  466. }
  467. [MonoTODO]
  468. public override void ResolveEntity ()
  469. {
  470. throw new NotImplementedException ();
  471. }
  472. public override int AttributeCount {
  473. get {
  474. if (dtd == null || !insideContent)
  475. return reader.AttributeCount;
  476. return attributes.Count;
  477. }
  478. }
  479. [MonoTODO ("Should consider general entities.")]
  480. public override string BaseURI {
  481. get {
  482. return reader.BaseURI;
  483. }
  484. }
  485. public override bool CanResolveEntity {
  486. get { return true; }
  487. }
  488. [MonoTODO ("Should consider general entities' depth")]
  489. public override int Depth {
  490. get { return IsDefault ? reader.Depth + 1 : reader.Depth; }
  491. }
  492. [MonoTODO]
  493. public override bool EOF {
  494. get { return reader.EOF; }
  495. }
  496. public override bool HasValue {
  497. get { return IsDefault ? true : reader.HasValue; }
  498. }
  499. public override bool IsDefault {
  500. get {
  501. if (currentAttribute == null)
  502. return false;
  503. return reader.GetAttribute (currentAttribute) == null;
  504. }
  505. }
  506. public override bool IsEmptyElement {
  507. get { return reader.IsEmptyElement; }
  508. }
  509. public override string this [int i] {
  510. get { return GetAttribute (i); }
  511. }
  512. public override string this [string name] {
  513. get { return GetAttribute (name); }
  514. }
  515. public override string this [string name, string ns] {
  516. get { return GetAttribute (name, ns); }
  517. }
  518. public int LineNumber {
  519. get {
  520. IXmlLineInfo info = reader as IXmlLineInfo;
  521. return (info != null) ? info.LineNumber : 0;
  522. }
  523. }
  524. public int LinePosition {
  525. get {
  526. IXmlLineInfo info = reader as IXmlLineInfo;
  527. return (info != null) ? info.LinePosition : 0;
  528. }
  529. }
  530. public override string LocalName {
  531. get {
  532. return IsDefault ?
  533. consumedAttribute ? String.Empty : currentAttribute :
  534. reader.LocalName;
  535. }
  536. }
  537. public override string Name {
  538. get {
  539. return IsDefault ?
  540. consumedAttribute ? String.Empty : currentAttribute :
  541. reader.Name;
  542. }
  543. }
  544. public override string NamespaceURI {
  545. get {
  546. return IsDefault ?
  547. consumedAttribute ? String.Empty : String.Empty :
  548. reader.NamespaceURI;
  549. }
  550. }
  551. public override XmlNameTable NameTable {
  552. get { return reader.NameTable; }
  553. }
  554. public override XmlNodeType NodeType {
  555. get {
  556. // If consumedAttribute is true, then entities must be resolved.
  557. return consumedAttribute ? XmlNodeType.Text :
  558. IsDefault ? XmlNodeType.Attribute :
  559. reader.NodeType;
  560. }
  561. }
  562. public override string Prefix {
  563. get {
  564. if (currentAttribute != null && NodeType != XmlNodeType.Attribute)
  565. return String.Empty;
  566. return IsDefault ? String.Empty : reader.Prefix;
  567. }
  568. }
  569. public override char QuoteChar {
  570. get {
  571. // If it is not actually on an attribute, then it returns
  572. // undefined value or '"'.
  573. return reader.QuoteChar;
  574. }
  575. }
  576. public override ReadState ReadState {
  577. get {
  578. return reader.ReadState;
  579. }
  580. }
  581. char [] whitespaceChars = new char [] {' '};
  582. private string FilterNormalization (string rawValue)
  583. {
  584. if (DTD != null &&
  585. NodeType == XmlNodeType.Attribute &&
  586. sourceTextReader != null &&
  587. sourceTextReader.Normalization) {
  588. DTDAttributeDefinition def =
  589. dtd.ElementDecls [currentElement]
  590. .Attributes [currentAttribute]
  591. as DTDAttributeDefinition;
  592. valueBuilder.Append (rawValue);
  593. valueBuilder.Replace ('\r', ' ');
  594. valueBuilder.Replace ('\n', ' ');
  595. valueBuilder.Replace ('\t', ' ');
  596. try {
  597. if (def.Datatype.TokenizedType != XmlTokenizedType.CDATA) {
  598. for (int i=0; i < valueBuilder.Length; i++) {
  599. if (valueBuilder [i] == ' ') {
  600. while (++i < valueBuilder.Length && valueBuilder [i] == ' ')
  601. valueBuilder.Remove (i, 1);
  602. }
  603. }
  604. return valueBuilder.ToString ().Trim (whitespaceChars);
  605. }
  606. else
  607. return valueBuilder.ToString ();
  608. } finally {
  609. valueBuilder.Length = 0;
  610. }
  611. }
  612. else
  613. return rawValue;
  614. }
  615. public override string Value {
  616. get {
  617. // This check also covers value node of default attributes.
  618. if (IsDefault) {
  619. DTDAttributeDefinition def =
  620. dtd.ElementDecls [currentElement]
  621. .Attributes [currentAttribute]
  622. as DTDAttributeDefinition;
  623. return sourceTextReader != null && sourceTextReader.Normalization ?
  624. def.NormalizedDefaultValue : def.DefaultValue;
  625. }
  626. // As to this property, MS.NET seems ignorant of EntityHandling...
  627. else if (NodeType == XmlNodeType.Attribute)// &&
  628. // validatingReader.EntityHandling == EntityHandling.ExpandEntities)
  629. return FilterNormalization (attributeValues [currentAttribute]);
  630. else if (consumedAttribute)
  631. return FilterNormalization (attributeValues [this.currentAttribute]);
  632. else
  633. return FilterNormalization (reader.Value);
  634. }
  635. }
  636. [MonoTODO ("Should consider default xml:lang values.")]
  637. public override string XmlLang {
  638. get { return reader.XmlLang; }
  639. }
  640. [MonoTODO ("Should consider default xml:space values.")]
  641. public override XmlSpace XmlSpace {
  642. get { return reader.XmlSpace; }
  643. }
  644. }
  645. }