XmlSchemaInference.cs 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135
  1. //
  2. // XmlSchemaInference.cs
  3. //
  4. // Author:
  5. // Atsushi Enomoto <[email protected]>
  6. //
  7. // Copyright (C)2004 Novell Inc.
  8. //
  9. //
  10. // Permission is hereby granted, free of charge, to any person obtaining
  11. // a copy of this software and associated documentation files (the
  12. // "Software"), to deal in the Software without restriction, including
  13. // without limitation the rights to use, copy, modify, merge, publish,
  14. // distribute, sublicense, and/or sell copies of the Software, and to
  15. // permit persons to whom the Software is furnished to do so, subject to
  16. // the following conditions:
  17. //
  18. // The above copyright notice and this permission notice shall be
  19. // included in all copies or substantial portions of the Software.
  20. //
  21. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  22. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  23. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  24. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  25. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  26. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  27. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  28. //
  29. #if NET_2_0
  30. using System;
  31. using System.Collections;
  32. using System.Xml;
  33. using System.Xml.Schema;
  34. using QName = System.Xml.XmlQualifiedName;
  35. using Form = System.Xml.Schema.XmlSchemaForm;
  36. using Use = System.Xml.Schema.XmlSchemaUse;
  37. using SOMList = System.Xml.Schema.XmlSchemaObjectCollection;
  38. using SOMObject = System.Xml.Schema.XmlSchemaObject;
  39. using Element = System.Xml.Schema.XmlSchemaElement;
  40. using Attr = System.Xml.Schema.XmlSchemaAttribute;
  41. using AttrGroup = System.Xml.Schema.XmlSchemaAttributeGroup;
  42. using AttrGroupRef = System.Xml.Schema.XmlSchemaAttributeGroupRef;
  43. using SimpleType = System.Xml.Schema.XmlSchemaSimpleType;
  44. using ComplexType = System.Xml.Schema.XmlSchemaComplexType;
  45. using SimpleModel = System.Xml.Schema.XmlSchemaSimpleContent;
  46. using SimpleExt = System.Xml.Schema.XmlSchemaSimpleContentExtension;
  47. using SimpleRst = System.Xml.Schema.XmlSchemaSimpleContentRestriction;
  48. using ComplexModel = System.Xml.Schema.XmlSchemaComplexContent;
  49. using ComplexExt = System.Xml.Schema.XmlSchemaComplexContentExtension;
  50. using ComplexRst = System.Xml.Schema.XmlSchemaComplexContentRestriction;
  51. using SimpleTypeRst = System.Xml.Schema.XmlSchemaSimpleTypeRestriction;
  52. using SimpleList = System.Xml.Schema.XmlSchemaSimpleTypeList;
  53. using SimpleUnion = System.Xml.Schema.XmlSchemaSimpleTypeUnion;
  54. using SchemaFacet = System.Xml.Schema.XmlSchemaFacet;
  55. using LengthFacet = System.Xml.Schema.XmlSchemaLengthFacet;
  56. using MinLengthFacet = System.Xml.Schema.XmlSchemaMinLengthFacet;
  57. using Particle = System.Xml.Schema.XmlSchemaParticle;
  58. using Sequence = System.Xml.Schema.XmlSchemaSequence;
  59. using Choice = System.Xml.Schema.XmlSchemaChoice;
  60. namespace System.Xml.Schema
  61. {
  62. [MonoTODO ("merge primitive types; infer gYearMonth too; in some cases sequence should contain element whose minOccurs=0 (no obvious rules right now); reject some non-supported schema components")]
  63. public class XmlSchemaInference
  64. {
  65. public enum InferenceOption {
  66. Restricted,
  67. Relaxed,
  68. }
  69. InferenceOption occurrence = InferenceOption.Restricted;
  70. InferenceOption typeInference = InferenceOption.Restricted;
  71. public XmlSchemaInference ()
  72. {
  73. }
  74. public InferenceOption Occurrence {
  75. get { return occurrence; }
  76. set { occurrence = value; }
  77. }
  78. public InferenceOption TypeInference {
  79. get { return TypeInference; }
  80. set { typeInference = value; }
  81. }
  82. public XmlSchemaSet InferSchema (XmlReader xmlReader)
  83. {
  84. return InferSchema (xmlReader, new XmlSchemaSet ());
  85. }
  86. public XmlSchemaSet InferSchema (XmlReader xmlReader,
  87. XmlSchemaSet schemas)
  88. {
  89. return XsdInference.Process (xmlReader, schemas,
  90. occurrence == InferenceOption.Relaxed,
  91. typeInference == InferenceOption.Relaxed);
  92. }
  93. }
  94. class XsdInference
  95. {
  96. public static XmlSchemaSet Process (XmlReader xmlReader,
  97. XmlSchemaSet schemas,
  98. bool laxOccurence,
  99. bool laxTypeInference)
  100. {
  101. XsdInference impl = new XsdInference (xmlReader,
  102. schemas, laxOccurence, laxTypeInference);
  103. impl.Run ();
  104. return impl.schemas;
  105. }
  106. public const string NamespaceXml =
  107. "http://www.w3.org/XML/1998/namespace";
  108. public const string NamespaceXmlns =
  109. "http://www.w3.org/2000/xmlns/";
  110. public const string XdtNamespace =
  111. "http://www.w3.org/2003/11/xpath-datatypes";
  112. static readonly QName QNameString = new QName (
  113. "string", XmlSchema.Namespace);
  114. static readonly QName QNameBoolean = new QName (
  115. "boolean", XmlSchema.Namespace);
  116. static readonly QName QNameAnyType = new QName (
  117. "anyType", XmlSchema.Namespace);
  118. static readonly QName QNameByte = new QName (
  119. "byte", XmlSchema.Namespace);
  120. static readonly QName QNameUByte = new QName (
  121. "unsignedByte", XmlSchema.Namespace);
  122. static readonly QName QNameShort = new QName (
  123. "short", XmlSchema.Namespace);
  124. static readonly QName QNameUShort = new QName (
  125. "unsignedShort", XmlSchema.Namespace);
  126. static readonly QName QNameInt = new QName (
  127. "int", XmlSchema.Namespace);
  128. static readonly QName QNameUInt = new QName (
  129. "unsignedInt", XmlSchema.Namespace);
  130. static readonly QName QNameLong = new QName (
  131. "long", XmlSchema.Namespace);
  132. static readonly QName QNameULong = new QName (
  133. "unsignedLong", XmlSchema.Namespace);
  134. static readonly QName QNameDecimal = new QName (
  135. "decimal", XmlSchema.Namespace);
  136. static readonly QName QNameUDecimal = new QName (
  137. "unsignedDecimal", XmlSchema.Namespace);
  138. static readonly QName QNameDouble = new QName (
  139. "double", XmlSchema.Namespace);
  140. static readonly QName QNameFloat = new QName (
  141. "float", XmlSchema.Namespace);
  142. static readonly QName QNameDateTime = new QName (
  143. "dateTime", XmlSchema.Namespace);
  144. static readonly QName QNameDuration = new QName (
  145. "duration", XmlSchema.Namespace);
  146. XmlReader source;
  147. XmlSchemaSet schemas;
  148. bool laxOccurence;
  149. bool laxTypeInference;
  150. Hashtable newElements = new Hashtable ();
  151. Hashtable newAttributes = new Hashtable ();
  152. private XsdInference (XmlReader xmlReader,
  153. XmlSchemaSet schemas,
  154. bool laxOccurence,
  155. bool laxTypeInference)
  156. {
  157. this.source = xmlReader;
  158. this.schemas = schemas;
  159. this.laxOccurence = laxOccurence;
  160. this.laxTypeInference = laxTypeInference;
  161. }
  162. private void Run ()
  163. {
  164. // XmlSchemaSet need to be compiled.
  165. schemas.Compile ();
  166. // move to top-level element
  167. source.MoveToContent ();
  168. int depth = source.Depth;
  169. if (source.NodeType != XmlNodeType.Element)
  170. throw new ArgumentException ("Argument XmlReader content is expected to be an element.");
  171. QName qname = new QName (source.LocalName,
  172. source.NamespaceURI);
  173. Element el = GetGlobalElement (qname);
  174. if (el == null) {
  175. el = CreateGlobalElement (qname);
  176. InferElement (el, qname.Namespace, true);
  177. }
  178. else
  179. InferElement (el, qname.Namespace, false);
  180. // finally compile again.
  181. schemas.Compile ();
  182. }
  183. private void IncludeXmlAttributes ()
  184. {
  185. if (schemas.Schemas (NamespaceXml).Count == 0)
  186. // FIXME: do it from resources.
  187. schemas.Add (NamespaceXml,
  188. "http://www.w3.org/2001/xml.xsd");
  189. }
  190. private void InferElement (Element el, string ns, bool isNew)
  191. {
  192. // Quick check for reference to another definition
  193. // (i.e. element ref='...' that should be redirected)
  194. if (el.RefName != QName.Empty) {
  195. Element body = GetGlobalElement (el.RefName);
  196. if (body == null) {
  197. body = CreateElement (el.RefName);
  198. InferElement (body, ns, true);
  199. }
  200. else
  201. InferElement (body, ns, isNew);
  202. return;
  203. }
  204. // Attributes
  205. if (source.MoveToFirstAttribute ()) {
  206. InferAttributes (el, ns, isNew);
  207. source.MoveToElement ();
  208. }
  209. // Content
  210. if (source.IsEmptyElement) {
  211. InferAsEmptyElement (el, ns, isNew);
  212. source.Read ();
  213. source.MoveToContent ();
  214. }
  215. else {
  216. InferContent (el, ns, isNew);
  217. source.ReadEndElement ();
  218. }
  219. if (el.SchemaType == null &&
  220. el.SchemaTypeName == QName.Empty)
  221. el.SchemaTypeName = QNameString;
  222. }
  223. #region Attribute Inference
  224. private Hashtable CollectAttrTable (SOMList attList)
  225. {
  226. // get attribute definition table.
  227. Hashtable table = new Hashtable ();
  228. foreach (XmlSchemaObject obj in attList) {
  229. Attr attr = obj as Attr;
  230. if (attr == null)
  231. throw Error (obj, String.Format ("Attribute inference only supports direct attribute definition. {0} is not supported.", obj.GetType ()));
  232. if (attr.RefName != QName.Empty)
  233. table.Add (attr.RefName, attr);
  234. else
  235. table.Add (new QName (attr.Name, ""),
  236. attr);
  237. }
  238. return table;
  239. }
  240. private void InferAttributes (Element el, string ns, bool isNew)
  241. {
  242. // Now this element is going to have complexType.
  243. // It currently not, then we have to replace it.
  244. ComplexType ct = null;
  245. SOMList attList = null;
  246. Hashtable table = null;
  247. do {
  248. switch (source.NamespaceURI) {
  249. case NamespaceXml:
  250. if (schemas.Schemas (
  251. NamespaceXml) .Count == 0)
  252. IncludeXmlAttributes ();
  253. break;
  254. case XmlSchema.InstanceNamespace:
  255. if (source.LocalName == "nil")
  256. el.IsNillable = true;
  257. // all other xsi:* atts are ignored
  258. continue;
  259. case NamespaceXmlns:
  260. continue;
  261. }
  262. if (ct == null) {
  263. ct = ToComplexType (el);
  264. attList = GetAttributes (ct);
  265. table = CollectAttrTable (attList);
  266. }
  267. QName attrName = new QName (
  268. source.LocalName, source.NamespaceURI);
  269. Attr attr = table [attrName] as Attr;
  270. if (attr == null) {
  271. attList.Add (InferNewAttribute (
  272. attrName, isNew));
  273. } else {
  274. table.Remove (attrName);
  275. if (attr.RefName != null &&
  276. attr.RefName != QName.Empty)
  277. continue; // just a reference
  278. InferMergedAttribute (attr);
  279. }
  280. } while (source.MoveToNextAttribute ());
  281. // mark all attr definitions that did not appear
  282. // as optional.
  283. if (table != null)
  284. foreach (Attr attr in table.Values)
  285. attr.Use = Use.Optional;
  286. }
  287. private XmlSchemaAttribute InferNewAttribute (
  288. QName attrName, bool isNewTypeDefinition)
  289. {
  290. Attr attr = null;
  291. bool mergedRequired = false;
  292. if (attrName.Namespace.Length > 0) {
  293. // global attribute; might be already defined.
  294. attr = GetGlobalAttribute (attrName) as Attr;
  295. if (attr == null) {
  296. attr = CreateGlobalAttribute (attrName);
  297. attr.SchemaTypeName =
  298. InferSimpleType (source.Value);
  299. } else {
  300. InferMergedAttribute (attr);
  301. mergedRequired =
  302. attr.Use == Use.Required;
  303. }
  304. attr = new Attr ();
  305. attr.RefName = attrName;
  306. } else {
  307. // local attribute
  308. attr = new Attr ();
  309. attr.Name = attrName.Name;
  310. attr.SchemaTypeName =
  311. InferSimpleType (source.Value);
  312. }
  313. if (!laxOccurence &&
  314. (isNewTypeDefinition || mergedRequired))
  315. attr.Use = Use.Required;
  316. else
  317. attr.Use = Use.Optional;
  318. return attr;
  319. }
  320. // validate string value agains attr and
  321. // if invalid, then relax the type.
  322. private void InferMergedAttribute (Attr attr)
  323. {
  324. attr.SchemaTypeName = InferMergedType (source.Value,
  325. attr.SchemaTypeName);
  326. attr.SchemaType = null;
  327. }
  328. private QName InferMergedType (string value, QName typeName)
  329. {
  330. // examine value against specified type and
  331. // if unacceptable, then return a relaxed type.
  332. SimpleType st = XmlSchemaType.GetBuiltInSimpleType (
  333. typeName);
  334. if (st == null) // non-primitive type => see above.
  335. return QNameString;
  336. do {
  337. try {
  338. st.Datatype.ParseValue (value,
  339. source.NameTable,
  340. source as IXmlNamespaceResolver);
  341. return typeName;
  342. } catch {
  343. st = st.BaseXmlSchemaType as XmlSchemaSimpleType;
  344. typeName = st != null ? st.QualifiedName : QName.Empty;
  345. }
  346. } while (typeName != QName.Empty);
  347. return QNameString;
  348. }
  349. private SOMList GetAttributes (ComplexType ct)
  350. {
  351. if (ct.ContentModel == null)
  352. return ct.Attributes;
  353. SimpleModel sc = ct.ContentModel as SimpleModel;
  354. if (sc != null) {
  355. SimpleExt sce = sc.Content as SimpleExt;
  356. if (sce != null)
  357. return sce.Attributes;
  358. SimpleRst scr = sc.Content as SimpleRst;
  359. if (scr != null)
  360. return scr.Attributes;
  361. else
  362. throw Error (sc, "Invalid simple content model.");
  363. }
  364. ComplexModel cc = ct.ContentModel as ComplexModel;
  365. if (cc != null) {
  366. ComplexExt cce = cc.Content as ComplexExt;
  367. if (cce != null)
  368. return cce.Attributes;
  369. ComplexRst ccr = cc.Content as ComplexRst;
  370. if (ccr != null)
  371. return ccr.Attributes;
  372. else
  373. throw Error (cc, "Invalid simple content model.");
  374. }
  375. throw Error (cc, "Invalid complexType. Should not happen.");
  376. }
  377. private ComplexType ToComplexType (Element el)
  378. {
  379. QName name = el.SchemaTypeName;
  380. XmlSchemaType type = el.SchemaType;
  381. // 1. element type is complex.
  382. ComplexType ct = type as ComplexType;
  383. if (ct != null)
  384. return ct;
  385. // 2. reference to global complexType.
  386. XmlSchemaType globalType = schemas.GlobalTypes [name]
  387. as XmlSchemaType;
  388. ct = globalType as ComplexType;
  389. if (ct != null)
  390. return ct;
  391. ct = new ComplexType ();
  392. el.SchemaType = ct;
  393. el.SchemaTypeName = QName.Empty;
  394. // 3. base type name is xs:anyType or no specification.
  395. // <xs:complexType />
  396. if (name == QNameAnyType)
  397. return ct;
  398. else if (type == null && name == QName.Empty)
  399. return ct;
  400. SimpleModel sc = new SimpleModel ();
  401. ct.ContentModel = sc;
  402. // 4. type is simpleType
  403. // -> extension of existing simple type.
  404. SimpleType st = type as SimpleType;
  405. if (st != null) {
  406. SimpleRst scr = new SimpleRst ();
  407. scr.BaseType = st;
  408. sc.Content = scr;
  409. return ct;
  410. }
  411. SimpleExt sce = new SimpleExt ();
  412. sc.Content = sce;
  413. // 5. type name points to primitive type
  414. // -> simple extension of a primitive type
  415. st = XmlSchemaType.GetBuiltInSimpleType (name);
  416. if (st != null) {
  417. sce.BaseTypeName = name;
  418. return ct;
  419. }
  420. // 6. type name points to global simpleType.
  421. st = globalType as SimpleType;
  422. if (st != null) {
  423. sce.BaseTypeName = name;
  424. return ct;
  425. }
  426. throw Error (el, "Unexpected schema component that contains simpleTypeName that could not be resolved.");
  427. }
  428. #endregion
  429. #region Element Type
  430. private void InferAsEmptyElement (Element el, string ns,
  431. bool isNew)
  432. {
  433. ComplexType ct = el.SchemaType as ComplexType;
  434. if (ct != null) {
  435. SimpleModel sm =
  436. ct.ContentModel as SimpleModel;
  437. if (sm != null) {
  438. ToEmptiableSimpleContent (sm, isNew);
  439. return;
  440. }
  441. ComplexModel cm = ct.ContentModel
  442. as ComplexModel;
  443. if (cm != null) {
  444. ToEmptiableComplexContent (cm, isNew);
  445. return;
  446. }
  447. if (ct.Particle != null)
  448. ct.Particle.MinOccurs = 0;
  449. return;
  450. }
  451. SimpleType st = el.SchemaType as SimpleType;
  452. if (st != null) {
  453. st = MakeBaseTypeAsEmptiable (st);
  454. switch (st.QualifiedName.Namespace) {
  455. case XmlSchema.Namespace:
  456. case XdtNamespace:
  457. el.SchemaTypeName = st.QualifiedName;
  458. break;
  459. default:
  460. el.SchemaType =st;
  461. break;
  462. }
  463. }
  464. }
  465. private SimpleType MakeBaseTypeAsEmptiable (SimpleType st)
  466. {
  467. switch (st.QualifiedName.Namespace) {
  468. case XmlSchema.Namespace:
  469. case XdtNamespace:
  470. // If a primitive type
  471. return XmlSchemaType.GetBuiltInSimpleType (
  472. XmlTypeCode.String);
  473. }
  474. SimpleTypeRst str = st.Content as SimpleTypeRst;
  475. if (str != null) {
  476. ArrayList al = null;
  477. foreach (SchemaFacet f in str.Facets) {
  478. if (f is LengthFacet ||
  479. f is MinLengthFacet) {
  480. if (al == null)
  481. al = new ArrayList ();
  482. al.Add (f);
  483. }
  484. }
  485. foreach (SchemaFacet f in al)
  486. str.Facets.Remove (f);
  487. if (str.BaseType != null)
  488. str.BaseType =
  489. MakeBaseTypeAsEmptiable (st);
  490. else
  491. // It might have a reference to an
  492. // external simple type, but there is
  493. // no assurance that any of those
  494. // external types allow an empty
  495. // string. So just set base type as
  496. // xs:string.
  497. str.BaseTypeName = QNameString;
  498. } // union/list can have empty string value.
  499. return st;
  500. }
  501. private void ToEmptiableSimpleContent (
  502. SimpleModel sm, bool isNew)
  503. {
  504. SimpleExt se = sm.Content as SimpleExt;
  505. if (se != null)
  506. se.BaseTypeName = QNameString;
  507. else {
  508. SimpleRst sr = sm.Content
  509. as SimpleRst;
  510. if (sr == null)
  511. throw Error (sm, "Invalid simple content model was passed.");
  512. sr.BaseTypeName = QNameString;
  513. sr.BaseType = null;
  514. }
  515. }
  516. private void ToEmptiableComplexContent (
  517. ComplexModel cm, bool isNew)
  518. {
  519. ComplexExt ce = cm.Content
  520. as ComplexExt;
  521. if (ce != null) {
  522. if (ce.Particle != null)
  523. ce.Particle.MinOccurs = 0;
  524. else if (ce.BaseTypeName != null &&
  525. ce.BaseTypeName != QName.Empty &&
  526. ce.BaseTypeName != QNameAnyType)
  527. throw Error (ce, "Complex type content extension has a reference to an external component that is not supported.");
  528. }
  529. else {
  530. ComplexRst cr = cm.Content
  531. as ComplexRst;
  532. if (cr == null)
  533. throw Error (cm, "Invalid complex content model was passed.");
  534. if (cr.Particle != null)
  535. cr.Particle.MinOccurs = 0;
  536. else if (cr.BaseTypeName != null &&
  537. cr.BaseTypeName != QName.Empty &&
  538. cr.BaseTypeName != QNameAnyType)
  539. throw Error (cr, "Complex type content extension has a reference to an external component that is not supported.");
  540. }
  541. }
  542. private void InferContent (Element el, string ns, bool isNew)
  543. {
  544. source.Read ();
  545. source.MoveToContent ();
  546. switch (source.NodeType) {
  547. case XmlNodeType.EndElement:
  548. InferAsEmptyElement (el, ns, isNew);
  549. break;
  550. case XmlNodeType.Element:
  551. InferComplexContent (el, ns, isNew);
  552. break;
  553. case XmlNodeType.Text:
  554. case XmlNodeType.CDATA:
  555. case XmlNodeType.SignificantWhitespace:
  556. InferTextContent (el, isNew);
  557. source.MoveToContent ();
  558. if (source.NodeType == XmlNodeType.Element)
  559. goto case XmlNodeType.Element;
  560. break;
  561. case XmlNodeType.Whitespace:
  562. InferContent (el, ns, isNew); // skip and retry
  563. break;
  564. }
  565. }
  566. private void InferComplexContent (Element el, string ns,
  567. bool isNew)
  568. {
  569. ComplexType ct = ToComplexType (el);
  570. ToComplexContentType (ct);
  571. int position = 0;
  572. bool consumed = false;
  573. do {
  574. switch (source.NodeType) {
  575. case XmlNodeType.Element:
  576. Sequence s = PopulateSequence (ct);
  577. Choice c = s.Items.Count > 0 ?
  578. s.Items [0] as Choice :
  579. null;
  580. if (c != null)
  581. ProcessLax (c, ns);
  582. else
  583. ProcessSequence (ct, s, ns,
  584. ref position,
  585. ref consumed,
  586. isNew);
  587. source.MoveToContent ();
  588. break;
  589. case XmlNodeType.Text:
  590. case XmlNodeType.CDATA:
  591. case XmlNodeType.SignificantWhitespace:
  592. MarkAsMixed (ct);
  593. source.ReadString ();
  594. source.MoveToContent ();
  595. break;
  596. case XmlNodeType.EndElement:
  597. return; // finished
  598. case XmlNodeType.None:
  599. throw new NotImplementedException ("Internal Error: Should not happen.");
  600. }
  601. } while (true);
  602. }
  603. private void InferTextContent (Element el, bool isNew)
  604. {
  605. string value = source.ReadString ();
  606. if (el.SchemaType == null) {
  607. if (el.SchemaTypeName == QName.Empty) {
  608. // no type information -> infer type
  609. if (isNew)
  610. el.SchemaTypeName =
  611. InferSimpleType (
  612. value);
  613. else
  614. el.SchemaTypeName =
  615. QNameString;
  616. return;
  617. }
  618. switch (el.SchemaTypeName.Namespace) {
  619. case XmlSchema.Namespace:
  620. case XdtNamespace:
  621. // existing primitive type
  622. el.SchemaTypeName = InferMergedType (
  623. value, el.SchemaTypeName);
  624. break;
  625. default:
  626. ComplexType ct = schemas.GlobalTypes [
  627. el.SchemaTypeName]
  628. as ComplexType;
  629. // If it is complex, then just set
  630. // mixed='true' (type cannot be set.)
  631. // If it is simple, then we cannot
  632. // make sure that string value is
  633. // valid. So just set as xs:string.
  634. if (ct != null)
  635. MarkAsMixed (ct);
  636. else
  637. el.SchemaTypeName = QNameString;
  638. break;
  639. }
  640. return;
  641. }
  642. // simpleType
  643. SimpleType st = el.SchemaType as SimpleType;
  644. if (st != null) {
  645. // If simple, then (described above)
  646. el.SchemaType = null;
  647. el.SchemaTypeName = QNameString;
  648. return;
  649. }
  650. // complexType
  651. ComplexType ect = el.SchemaType as ComplexType;
  652. SimpleModel sm = ect.ContentModel as SimpleModel;
  653. if (sm == null) {
  654. // - ComplexContent
  655. MarkAsMixed (ect);
  656. return;
  657. }
  658. // - SimpleContent
  659. SimpleExt se = sm.Content as SimpleExt;
  660. if (se != null)
  661. se.BaseTypeName = InferMergedType (value,
  662. se.BaseTypeName);
  663. SimpleRst sr = sm.Content as SimpleRst;
  664. if (sr != null) {
  665. sr.BaseTypeName = InferMergedType (value,
  666. sr.BaseTypeName);
  667. sr.BaseType = null;
  668. }
  669. }
  670. private void MarkAsMixed (ComplexType ct)
  671. {
  672. ComplexModel cm = ct.ContentModel as ComplexModel;
  673. if (cm != null)
  674. cm.IsMixed = true;
  675. else
  676. ct.IsMixed = true;
  677. }
  678. #endregion
  679. #region Particles
  680. private void ProcessLax (Choice c, string ns)
  681. {
  682. foreach (Particle p in c.Items) {
  683. Element el = p as Element;
  684. if (el == null)
  685. throw Error (c, String.Format ("Target schema item contains unacceptable particle {0}. Only element is allowed here."));
  686. if (ElementMatches (el, ns)) {
  687. InferElement (el, ns, false);
  688. return;
  689. }
  690. }
  691. // append a new element particle to lax term.
  692. Element nel = new Element ();
  693. if (source.NamespaceURI == ns)
  694. nel.Name = source.LocalName;
  695. else
  696. nel.RefName = new QName (source.LocalName,
  697. source.NamespaceURI);
  698. InferElement (nel, source.NamespaceURI, true);
  699. c.Items.Add (nel);
  700. }
  701. private bool ElementMatches (Element el, string ns)
  702. {
  703. bool matches = false;
  704. if (el.RefName != QName.Empty) {
  705. if (el.RefName.Name == source.LocalName &&
  706. el.RefName.Namespace ==
  707. source.NamespaceURI)
  708. matches = true;
  709. }
  710. else if (el.Name == source.LocalName &&
  711. ns == source.NamespaceURI)
  712. matches = true;
  713. return matches;
  714. }
  715. private void ProcessSequence (ComplexType ct, Sequence s,
  716. string ns, ref int position, ref bool consumed,
  717. bool isNew)
  718. {
  719. for (int i = 0; i < position; i++) {
  720. Element iel = s.Items [i] as Element;
  721. if (ElementMatches (iel, ns)) {
  722. // Sequence element type violation
  723. // might happen (might not, but we
  724. // cannot backtrack here). So switch
  725. // to sequence of choice* here.
  726. ProcessLax (ToSequenceOfChoice (s), ns);
  727. return;
  728. }
  729. }
  730. if (s.Items.Count <= position) {
  731. QName name = new QName (source.LocalName,
  732. source.NamespaceURI);
  733. Element nel = CreateElement (name);
  734. InferElement (nel, ns, true);
  735. if (ns == name.Namespace)
  736. s.Items.Add (nel);
  737. else {
  738. Element re = new Element ();
  739. re.RefName = name;
  740. s.Items.Add (re);
  741. }
  742. consumed = true;
  743. return;
  744. }
  745. Element el = s.Items [position] as Element;
  746. if (el == null)
  747. throw Error (s, String.Format ("Target complex type content sequence has an unacceptable type of particle {0}", s.Items [position]));
  748. bool matches = ElementMatches (el, ns);
  749. if (matches) {
  750. if (consumed)
  751. el.MaxOccursString = "unbounded";
  752. InferElement (el, source.NamespaceURI, false);
  753. source.MoveToContent ();
  754. switch (source.NodeType) {
  755. case XmlNodeType.None:
  756. if (source.NodeType ==
  757. XmlNodeType.Element)
  758. goto case XmlNodeType.Element;
  759. else if (source.NodeType ==
  760. XmlNodeType.EndElement)
  761. goto case XmlNodeType.EndElement;
  762. break;
  763. case XmlNodeType.Element:
  764. ProcessSequence (ct, s, ns, ref position,
  765. ref consumed, isNew);
  766. break;
  767. case XmlNodeType.Text:
  768. case XmlNodeType.CDATA:
  769. case XmlNodeType.SignificantWhitespace:
  770. MarkAsMixed (ct);
  771. source.ReadString ();
  772. goto case XmlNodeType.None;
  773. case XmlNodeType.Whitespace:
  774. source.ReadString ();
  775. goto case XmlNodeType.None;
  776. case XmlNodeType.EndElement:
  777. return;
  778. default:
  779. source.Read ();
  780. break;
  781. }
  782. }
  783. else {
  784. if (consumed) {
  785. position++;
  786. consumed = false;
  787. ProcessSequence (ct, s, ns,
  788. ref position, ref consumed,
  789. isNew);
  790. }
  791. else
  792. ProcessLax (ToSequenceOfChoice (s), ns);
  793. }
  794. }
  795. // Note that it does not return the changed sequence.
  796. private Choice ToSequenceOfChoice (Sequence s)
  797. {
  798. Choice c = new Choice ();
  799. if (laxOccurence)
  800. c.MinOccurs = 0;
  801. c.MaxOccursString = "unbounded";
  802. foreach (Particle p in s.Items)
  803. c.Items.Add (p);
  804. s.Items.Clear ();
  805. s.Items.Add (c);
  806. return c;
  807. }
  808. // It makes complexType not to have Simple content model.
  809. private void ToComplexContentType (ComplexType type)
  810. {
  811. SimpleModel sm = type.ContentModel as SimpleModel;
  812. if (sm == null)
  813. return;
  814. SOMList atts = GetAttributes (type);
  815. foreach (SOMObject o in atts)
  816. type.Attributes.Add (o);
  817. // FIXME: need to copy AnyAttribute.
  818. // (though not considered right now)
  819. type.ContentModel = null;
  820. type.IsMixed = true;
  821. }
  822. private Sequence PopulateSequence (ComplexType ct)
  823. {
  824. Particle p = PopulateParticle (ct);
  825. Sequence s = p as Sequence;
  826. if (s != null)
  827. return s;
  828. else
  829. throw Error (ct, String.Format ("Target complexType contains unacceptable type of particle {0}", p));
  830. }
  831. private Sequence CreateSequence ()
  832. {
  833. Sequence s = new Sequence ();
  834. if (laxOccurence)
  835. s.MinOccurs = 0;
  836. return s;
  837. }
  838. private Particle PopulateParticle (ComplexType ct)
  839. {
  840. if (ct.ContentModel == null) {
  841. if (ct.Particle == null)
  842. ct.Particle = CreateSequence ();
  843. return ct.Particle;
  844. }
  845. ComplexModel cm = ct.ContentModel as ComplexModel;
  846. if (cm != null) {
  847. ComplexExt ce = cm.Content as ComplexExt;
  848. if (ce != null) {
  849. if (ce.Particle == null)
  850. ce.Particle = CreateSequence ();
  851. return ce.Particle;
  852. }
  853. ComplexRst cr = cm.Content as ComplexRst;
  854. if (cr != null) {
  855. if (cr.Particle == null)
  856. cr.Particle = CreateSequence ();
  857. return cr.Particle;
  858. }
  859. }
  860. throw Error (ct, "Schema inference internal error. The complexType should have been converted to have a complex content.");
  861. }
  862. #endregion
  863. #region String Value
  864. // primitive type inference.
  865. // When running lax type inference, it just returns xs:string.
  866. private QName InferSimpleType (string value)
  867. {
  868. if (laxTypeInference)
  869. return QNameString;
  870. switch (value) {
  871. // 0 and 1 are not infered as byte unlike MS.XSDInfer
  872. // case "0":
  873. // case "1":
  874. case "true":
  875. case "false":
  876. return QNameBoolean;
  877. }
  878. try {
  879. long dec = XmlConvert.ToInt64 (value);
  880. if (byte.MinValue <= dec && dec <= byte.MaxValue)
  881. return QNameUByte;
  882. if (sbyte.MinValue <= dec && dec <= sbyte.MaxValue)
  883. return QNameByte;
  884. if (ushort.MinValue <= dec && dec <= ushort.MaxValue)
  885. return QNameUShort;
  886. if (short.MinValue <= dec && dec <= short.MaxValue)
  887. return QNameShort;
  888. if (uint.MinValue <= dec && dec <= uint.MaxValue)
  889. return QNameUInt;
  890. if (int.MinValue <= dec && dec <= int.MaxValue)
  891. return QNameInt;
  892. return QNameLong;
  893. } catch (Exception) {
  894. }
  895. try {
  896. XmlConvert.ToUInt64 (value);
  897. return QNameULong;
  898. } catch (Exception) {
  899. }
  900. try {
  901. XmlConvert.ToDecimal (value);
  902. return QNameDecimal;
  903. } catch (Exception) {
  904. }
  905. try {
  906. double dbl = XmlConvert.ToDouble (value);
  907. if (float.MinValue <= dbl &&
  908. dbl <= float.MaxValue)
  909. return QNameFloat;
  910. else
  911. return QNameDouble;
  912. } catch (Exception) {
  913. }
  914. try {
  915. // FIXME: also try DateTimeSerializationMode
  916. // and gYearMonth
  917. XmlConvert.ToDateTime (value);
  918. return QNameDateTime;
  919. } catch (Exception) {
  920. }
  921. try {
  922. XmlConvert.ToTimeSpan (value);
  923. return QNameDuration;
  924. } catch (Exception) {
  925. }
  926. // xs:string
  927. return QNameString;
  928. }
  929. #endregion
  930. #region Utilities
  931. private Element GetGlobalElement (QName name)
  932. {
  933. Element el = newElements [name] as Element;
  934. if (el == null)
  935. el = schemas.GlobalElements [name] as Element;
  936. return el;
  937. }
  938. private Attr GetGlobalAttribute (QName name)
  939. {
  940. Attr a = newElements [name] as Attr;
  941. if (a == null)
  942. a = schemas.GlobalAttributes [name] as Attr;
  943. return a;
  944. }
  945. private Element CreateElement (QName name)
  946. {
  947. Element el = new Element ();
  948. el.Name = name.Name;
  949. return el;
  950. }
  951. private Element CreateGlobalElement (QName name)
  952. {
  953. Element el = CreateElement (name);
  954. XmlSchema schema = PopulateSchema (name.Namespace);
  955. schema.Items.Add (el);
  956. newElements.Add (name, el);
  957. return el;
  958. }
  959. private Attr CreateGlobalAttribute (QName name)
  960. {
  961. Attr attr = new Attr ();
  962. XmlSchema schema = PopulateSchema (name.Namespace);
  963. attr.Name = name.Name;
  964. schema.Items.Add (attr);
  965. newAttributes.Add (name, attr);
  966. return attr;
  967. }
  968. // Note that the return value never assures that all the
  969. // components in the parameter ns must reside in it.
  970. private XmlSchema PopulateSchema (string ns)
  971. {
  972. ICollection list = schemas.Schemas (ns);
  973. if (list.Count > 0) {
  974. IEnumerator e = list.GetEnumerator ();
  975. e.MoveNext ();
  976. return (XmlSchema) e.Current;
  977. }
  978. XmlSchema s = new XmlSchema ();
  979. if (ns != null && ns.Length > 0)
  980. s.TargetNamespace = ns;
  981. s.ElementFormDefault = Form.Qualified;
  982. s.AttributeFormDefault = Form.Unqualified;
  983. schemas.Add (s);
  984. return s;
  985. }
  986. private XmlSchemaInferenceException Error (
  987. XmlSchemaObject sourceObj,
  988. string message)
  989. {
  990. // This override is mainly for schema component error.
  991. return Error (sourceObj, false, message);
  992. }
  993. private XmlSchemaInferenceException Error (
  994. XmlSchemaObject sourceObj,
  995. bool useReader,
  996. string message)
  997. {
  998. string msg = String.Concat (
  999. message,
  1000. sourceObj != null ?
  1001. String.Format (". Related schema component is {0}",
  1002. sourceObj.SourceUri,
  1003. sourceObj.LineNumber,
  1004. sourceObj.LinePosition) :
  1005. String.Empty,
  1006. useReader ?
  1007. String.Format (". {0}", source.BaseURI) :
  1008. String.Empty);
  1009. IXmlLineInfo li = source as IXmlLineInfo;
  1010. if (useReader && li != null)
  1011. return new XmlSchemaInferenceException (
  1012. msg, null, li.LineNumber,
  1013. li.LinePosition);
  1014. else
  1015. return new XmlSchemaInferenceException (msg);
  1016. }
  1017. #endregion
  1018. }
  1019. }
  1020. #endif