XmlSchemaInference.cs 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132
  1. //
  2. // XmlSchemaInference.cs
  3. //
  4. // Author:
  5. // Atsushi Enomoto <[email protected]>
  6. //
  7. // Copyright (C)2004 Novell Inc.
  8. //
  9. //
  10. // Permission is hereby granted, free of charge, to any person obtaining
  11. // a copy of this software and associated documentation files (the
  12. // "Software"), to deal in the Software without restriction, including
  13. // without limitation the rights to use, copy, modify, merge, publish,
  14. // distribute, sublicense, and/or sell copies of the Software, and to
  15. // permit persons to whom the Software is furnished to do so, subject to
  16. // the following conditions:
  17. //
  18. // The above copyright notice and this permission notice shall be
  19. // included in all copies or substantial portions of the Software.
  20. //
  21. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  22. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  23. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  24. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  25. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  26. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  27. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  28. //
  29. #if NET_2_0
  30. using System;
  31. using System.Collections;
  32. using System.Xml;
  33. using System.Xml.Schema;
  34. using QName = System.Xml.XmlQualifiedName;
  35. using Form = System.Xml.Schema.XmlSchemaForm;
  36. using Use = System.Xml.Schema.XmlSchemaUse;
  37. using SOMList = System.Xml.Schema.XmlSchemaObjectCollection;
  38. using SOMObject = System.Xml.Schema.XmlSchemaObject;
  39. using Element = System.Xml.Schema.XmlSchemaElement;
  40. using Attr = System.Xml.Schema.XmlSchemaAttribute;
  41. using AttrGroup = System.Xml.Schema.XmlSchemaAttributeGroup;
  42. using AttrGroupRef = System.Xml.Schema.XmlSchemaAttributeGroupRef;
  43. using SimpleType = System.Xml.Schema.XmlSchemaSimpleType;
  44. using ComplexType = System.Xml.Schema.XmlSchemaComplexType;
  45. using SimpleModel = System.Xml.Schema.XmlSchemaSimpleContent;
  46. using SimpleExt = System.Xml.Schema.XmlSchemaSimpleContentExtension;
  47. using SimpleRst = System.Xml.Schema.XmlSchemaSimpleContentRestriction;
  48. using ComplexModel = System.Xml.Schema.XmlSchemaComplexContent;
  49. using ComplexExt = System.Xml.Schema.XmlSchemaComplexContentExtension;
  50. using ComplexRst = System.Xml.Schema.XmlSchemaComplexContentRestriction;
  51. using SimpleTypeRst = System.Xml.Schema.XmlSchemaSimpleTypeRestriction;
  52. using SimpleList = System.Xml.Schema.XmlSchemaSimpleTypeList;
  53. using SimpleUnion = System.Xml.Schema.XmlSchemaSimpleTypeUnion;
  54. using SchemaFacet = System.Xml.Schema.XmlSchemaFacet;
  55. using LengthFacet = System.Xml.Schema.XmlSchemaLengthFacet;
  56. using MinLengthFacet = System.Xml.Schema.XmlSchemaMinLengthFacet;
  57. using Particle = System.Xml.Schema.XmlSchemaParticle;
  58. using Sequence = System.Xml.Schema.XmlSchemaSequence;
  59. using Choice = System.Xml.Schema.XmlSchemaChoice;
  60. namespace System.Xml.Schema
  61. {
  62. [MonoTODO ("merge primitive types; infer gYearMonth too; in some cases sequence should contain element whose minOccurs=0 (no obvious rules right now); reject some non-supported schema components")]
  63. public class XmlSchemaInference
  64. {
  65. public enum InferenceOption {
  66. Restricted,
  67. Relaxed,
  68. }
  69. InferenceOption occurrence = InferenceOption.Restricted;
  70. InferenceOption typeInference = InferenceOption.Restricted;
  71. public XmlSchemaInference ()
  72. {
  73. }
  74. public InferenceOption Occurrence {
  75. get { return occurrence; }
  76. set { occurrence = value; }
  77. }
  78. public InferenceOption TypeInference {
  79. get { return TypeInference; }
  80. set { typeInference = value; }
  81. }
  82. public XmlSchemaSet InferSchema (XmlReader xmlReader)
  83. {
  84. return InferSchema (xmlReader, new XmlSchemaSet ());
  85. }
  86. public XmlSchemaSet InferSchema (XmlReader xmlReader,
  87. XmlSchemaSet schemas)
  88. {
  89. return XsdInference.Process (xmlReader, schemas,
  90. occurrence == InferenceOption.Relaxed,
  91. typeInference == InferenceOption.Relaxed);
  92. }
  93. }
  94. class XsdInference
  95. {
  96. public static XmlSchemaSet Process (XmlReader xmlReader,
  97. XmlSchemaSet schemas,
  98. bool laxOccurence,
  99. bool laxTypeInference)
  100. {
  101. XsdInference impl = new XsdInference (xmlReader,
  102. schemas, laxOccurence, laxTypeInference);
  103. impl.Run ();
  104. return impl.schemas;
  105. }
  106. public const string NamespaceXml =
  107. "http://www.w3.org/XML/1998/namespace";
  108. public const string NamespaceXmlns =
  109. "http://www.w3.org/2000/xmlns/";
  110. public const string XdtNamespace =
  111. "http://www.w3.org/2003/11/xpath-datatypes";
  112. static readonly QName QNameString = new QName (
  113. "string", XmlSchema.Namespace);
  114. static readonly QName QNameBoolean = new QName (
  115. "boolean", XmlSchema.Namespace);
  116. static readonly QName QNameAnyType = new QName (
  117. "anyType", XmlSchema.Namespace);
  118. static readonly QName QNameByte = new QName (
  119. "byte", XmlSchema.Namespace);
  120. static readonly QName QNameUByte = new QName (
  121. "unsignedByte", XmlSchema.Namespace);
  122. static readonly QName QNameShort = new QName (
  123. "short", XmlSchema.Namespace);
  124. static readonly QName QNameUShort = new QName (
  125. "unsignedShort", XmlSchema.Namespace);
  126. static readonly QName QNameInt = new QName (
  127. "int", XmlSchema.Namespace);
  128. static readonly QName QNameUInt = new QName (
  129. "unsignedInt", XmlSchema.Namespace);
  130. static readonly QName QNameLong = new QName (
  131. "long", XmlSchema.Namespace);
  132. static readonly QName QNameULong = new QName (
  133. "unsignedLong", XmlSchema.Namespace);
  134. static readonly QName QNameDecimal = new QName (
  135. "decimal", XmlSchema.Namespace);
  136. static readonly QName QNameUDecimal = new QName (
  137. "unsignedDecimal", XmlSchema.Namespace);
  138. static readonly QName QNameDouble = new QName (
  139. "double", XmlSchema.Namespace);
  140. static readonly QName QNameFloat = new QName (
  141. "float", XmlSchema.Namespace);
  142. static readonly QName QNameDateTime = new QName (
  143. "dateTime", XmlSchema.Namespace);
  144. static readonly QName QNameDuration = new QName (
  145. "duration", XmlSchema.Namespace);
  146. XmlReader source;
  147. XmlSchemaSet schemas;
  148. bool laxOccurence;
  149. bool laxTypeInference;
  150. Hashtable newElements = new Hashtable ();
  151. Hashtable newAttributes = new Hashtable ();
  152. private XsdInference (XmlReader xmlReader,
  153. XmlSchemaSet schemas,
  154. bool laxOccurence,
  155. bool laxTypeInference)
  156. {
  157. this.source = xmlReader;
  158. this.schemas = schemas;
  159. this.laxOccurence = laxOccurence;
  160. this.laxTypeInference = laxTypeInference;
  161. }
  162. private void Run ()
  163. {
  164. // XmlSchemaSet need to be compiled.
  165. schemas.Compile ();
  166. // move to top-level element
  167. source.MoveToContent ();
  168. int depth = source.Depth;
  169. if (source.NodeType != XmlNodeType.Element)
  170. throw new ArgumentException ("Argument XmlReader content is expected to be an element.");
  171. QName qname = new QName (source.LocalName,
  172. source.NamespaceURI);
  173. Element el = GetGlobalElement (qname);
  174. if (el == null) {
  175. el = CreateGlobalElement (qname);
  176. InferElement (el, qname.Namespace, true);
  177. }
  178. else
  179. InferElement (el, qname.Namespace, false);
  180. }
  181. private void IncludeXmlAttributes ()
  182. {
  183. if (schemas.Schemas (NamespaceXml).Count == 0)
  184. // FIXME: do it from resources.
  185. schemas.Add (NamespaceXml,
  186. "http://www.w3.org/2001/xml.xsd");
  187. }
  188. private void InferElement (Element el, string ns, bool isNew)
  189. {
  190. // Quick check for reference to another definition
  191. // (i.e. element ref='...' that should be redirected)
  192. if (el.RefName != QName.Empty) {
  193. Element body = GetGlobalElement (el.RefName);
  194. if (body == null) {
  195. body = CreateElement (el.RefName);
  196. InferElement (body, ns, true);
  197. }
  198. else
  199. InferElement (body, ns, isNew);
  200. return;
  201. }
  202. // Attributes
  203. if (source.MoveToFirstAttribute ()) {
  204. InferAttributes (el, ns, isNew);
  205. source.MoveToElement ();
  206. }
  207. // Content
  208. if (source.IsEmptyElement) {
  209. InferAsEmptyElement (el, ns, isNew);
  210. source.Read ();
  211. source.MoveToContent ();
  212. }
  213. else {
  214. InferContent (el, ns, isNew);
  215. source.ReadEndElement ();
  216. }
  217. if (el.SchemaType == null &&
  218. el.SchemaTypeName == QName.Empty)
  219. el.SchemaTypeName = QNameString;
  220. }
  221. #region Attribute Inference
  222. private Hashtable CollectAttrTable (SOMList attList)
  223. {
  224. // get attribute definition table.
  225. Hashtable table = new Hashtable ();
  226. foreach (XmlSchemaObject obj in attList) {
  227. Attr attr = obj as Attr;
  228. if (attr == null)
  229. throw Error (obj, String.Format ("Attribute inference only supports direct attribute definition. {0} is not supported.", obj.GetType ()));
  230. if (attr.RefName != QName.Empty)
  231. table.Add (attr.RefName, attr);
  232. else
  233. table.Add (new QName (attr.Name, ""),
  234. attr);
  235. }
  236. return table;
  237. }
  238. private void InferAttributes (Element el, string ns, bool isNew)
  239. {
  240. // Now this element is going to have complexType.
  241. // It currently not, then we have to replace it.
  242. ComplexType ct = null;
  243. SOMList attList = null;
  244. Hashtable table = null;
  245. do {
  246. switch (source.NamespaceURI) {
  247. case NamespaceXml:
  248. if (schemas.Schemas (
  249. NamespaceXml) .Count == 0)
  250. IncludeXmlAttributes ();
  251. break;
  252. case XmlSchema.InstanceNamespace:
  253. if (source.LocalName == "nil")
  254. el.IsNillable = true;
  255. // all other xsi:* atts are ignored
  256. continue;
  257. case NamespaceXmlns:
  258. continue;
  259. }
  260. if (ct == null) {
  261. ct = ToComplexType (el);
  262. attList = GetAttributes (ct);
  263. table = CollectAttrTable (attList);
  264. }
  265. QName attrName = new QName (
  266. source.LocalName, source.NamespaceURI);
  267. Attr attr = table [attrName] as Attr;
  268. if (attr == null) {
  269. attList.Add (InferNewAttribute (
  270. attrName, isNew));
  271. } else {
  272. table.Remove (attrName);
  273. if (attr.RefName != null &&
  274. attr.RefName != QName.Empty)
  275. continue; // just a reference
  276. InferMergedAttribute (attr);
  277. }
  278. } while (source.MoveToNextAttribute ());
  279. // mark all attr definitions that did not appear
  280. // as optional.
  281. if (table != null)
  282. foreach (Attr attr in table.Values)
  283. attr.Use = Use.Optional;
  284. }
  285. private XmlSchemaAttribute InferNewAttribute (
  286. QName attrName, bool isNewTypeDefinition)
  287. {
  288. Attr attr = null;
  289. bool mergedRequired = false;
  290. if (attrName.Namespace.Length > 0) {
  291. // global attribute; might be already defined.
  292. attr = GetGlobalAttribute (attrName) as Attr;
  293. if (attr == null) {
  294. attr = CreateGlobalAttribute (attrName);
  295. attr.SchemaTypeName =
  296. InferSimpleType (source.Value);
  297. } else {
  298. InferMergedAttribute (attr);
  299. mergedRequired =
  300. attr.Use == Use.Required;
  301. }
  302. attr = new Attr ();
  303. attr.RefName = attrName;
  304. } else {
  305. // local attribute
  306. attr = new Attr ();
  307. attr.Name = attrName.Name;
  308. attr.SchemaTypeName =
  309. InferSimpleType (source.Value);
  310. }
  311. if (!laxOccurence &&
  312. (isNewTypeDefinition || mergedRequired))
  313. attr.Use = Use.Required;
  314. else
  315. attr.Use = Use.Optional;
  316. return attr;
  317. }
  318. // validate string value agains attr and
  319. // if invalid, then relax the type.
  320. private void InferMergedAttribute (Attr attr)
  321. {
  322. attr.SchemaTypeName = InferMergedType (source.Value,
  323. attr.SchemaTypeName);
  324. attr.SchemaType = null;
  325. }
  326. private QName InferMergedType (string value, QName typeName)
  327. {
  328. // examine value against specified type and
  329. // if unacceptable, then return a relaxed type.
  330. SimpleType st = XmlSchemaType.GetBuiltInSimpleType (
  331. typeName);
  332. if (st == null) // non-primitive type => see above.
  333. return QNameString;
  334. do {
  335. try {
  336. st.Datatype.ParseValue (value,
  337. source.NameTable,
  338. source as IXmlNamespaceResolver);
  339. return typeName;
  340. } catch {
  341. st = st.BaseXmlSchemaType as XmlSchemaSimpleType;
  342. typeName = st != null ? st.QualifiedName : QName.Empty;
  343. }
  344. } while (typeName != QName.Empty);
  345. return QNameString;
  346. }
  347. private SOMList GetAttributes (ComplexType ct)
  348. {
  349. if (ct.ContentModel == null)
  350. return ct.Attributes;
  351. SimpleModel sc = ct.ContentModel as SimpleModel;
  352. if (sc != null) {
  353. SimpleExt sce = sc.Content as SimpleExt;
  354. if (sce != null)
  355. return sce.Attributes;
  356. SimpleRst scr = sc.Content as SimpleRst;
  357. if (scr != null)
  358. return scr.Attributes;
  359. else
  360. throw Error (sc, "Invalid simple content model.");
  361. }
  362. ComplexModel cc = ct.ContentModel as ComplexModel;
  363. if (cc != null) {
  364. ComplexExt cce = cc.Content as ComplexExt;
  365. if (cce != null)
  366. return cce.Attributes;
  367. ComplexRst ccr = cc.Content as ComplexRst;
  368. if (ccr != null)
  369. return ccr.Attributes;
  370. else
  371. throw Error (cc, "Invalid simple content model.");
  372. }
  373. throw Error (cc, "Invalid complexType. Should not happen.");
  374. }
  375. private ComplexType ToComplexType (Element el)
  376. {
  377. QName name = el.SchemaTypeName;
  378. XmlSchemaType type = el.SchemaType;
  379. // 1. element type is complex.
  380. ComplexType ct = type as ComplexType;
  381. if (ct != null)
  382. return ct;
  383. // 2. reference to global complexType.
  384. XmlSchemaType globalType = schemas.GlobalTypes [name]
  385. as XmlSchemaType;
  386. ct = globalType as ComplexType;
  387. if (ct != null)
  388. return ct;
  389. ct = new ComplexType ();
  390. el.SchemaType = ct;
  391. el.SchemaTypeName = QName.Empty;
  392. // 3. base type name is xs:anyType or no specification.
  393. // <xs:complexType />
  394. if (name == QNameAnyType)
  395. return ct;
  396. else if (type == null && name == QName.Empty)
  397. return ct;
  398. SimpleModel sc = new SimpleModel ();
  399. ct.ContentModel = sc;
  400. // 4. type is simpleType
  401. // -> extension of existing simple type.
  402. SimpleType st = type as SimpleType;
  403. if (st != null) {
  404. SimpleRst scr = new SimpleRst ();
  405. scr.BaseType = st;
  406. sc.Content = scr;
  407. return ct;
  408. }
  409. SimpleExt sce = new SimpleExt ();
  410. sc.Content = sce;
  411. // 5. type name points to primitive type
  412. // -> simple extension of a primitive type
  413. st = XmlSchemaType.GetBuiltInSimpleType (name);
  414. if (st != null) {
  415. sce.BaseTypeName = name;
  416. return ct;
  417. }
  418. // 6. type name points to global simpleType.
  419. st = globalType as SimpleType;
  420. if (st != null) {
  421. sce.BaseTypeName = name;
  422. return ct;
  423. }
  424. throw Error (el, "Unexpected schema component that contains simpleTypeName that could not be resolved.");
  425. }
  426. #endregion
  427. #region Element Type
  428. private void InferAsEmptyElement (Element el, string ns,
  429. bool isNew)
  430. {
  431. ComplexType ct = el.SchemaType as ComplexType;
  432. if (ct != null) {
  433. SimpleModel sm =
  434. ct.ContentModel as SimpleModel;
  435. if (sm != null) {
  436. ToEmptiableSimpleContent (sm, isNew);
  437. return;
  438. }
  439. ComplexModel cm = ct.ContentModel
  440. as ComplexModel;
  441. if (cm != null) {
  442. ToEmptiableComplexContent (cm, isNew);
  443. return;
  444. }
  445. if (ct.Particle != null)
  446. ct.Particle.MinOccurs = 0;
  447. return;
  448. }
  449. SimpleType st = el.SchemaType as SimpleType;
  450. if (st != null) {
  451. st = MakeBaseTypeAsEmptiable (st);
  452. switch (st.QualifiedName.Namespace) {
  453. case XmlSchema.Namespace:
  454. case XdtNamespace:
  455. el.SchemaTypeName = st.QualifiedName;
  456. break;
  457. default:
  458. el.SchemaType =st;
  459. break;
  460. }
  461. }
  462. }
  463. private SimpleType MakeBaseTypeAsEmptiable (SimpleType st)
  464. {
  465. switch (st.QualifiedName.Namespace) {
  466. case XmlSchema.Namespace:
  467. case XdtNamespace:
  468. // If a primitive type
  469. return XmlSchemaType.GetBuiltInSimpleType (
  470. XmlTypeCode.String);
  471. }
  472. SimpleTypeRst str = st.Content as SimpleTypeRst;
  473. if (str != null) {
  474. ArrayList al = null;
  475. foreach (SchemaFacet f in str.Facets) {
  476. if (f is LengthFacet ||
  477. f is MinLengthFacet) {
  478. if (al == null)
  479. al = new ArrayList ();
  480. al.Add (f);
  481. }
  482. }
  483. foreach (SchemaFacet f in al)
  484. str.Facets.Remove (f);
  485. if (str.BaseType != null)
  486. str.BaseType =
  487. MakeBaseTypeAsEmptiable (st);
  488. else
  489. // It might have a reference to an
  490. // external simple type, but there is
  491. // no assurance that any of those
  492. // external types allow an empty
  493. // string. So just set base type as
  494. // xs:string.
  495. str.BaseTypeName = QNameString;
  496. } // union/list can have empty string value.
  497. return st;
  498. }
  499. private void ToEmptiableSimpleContent (
  500. SimpleModel sm, bool isNew)
  501. {
  502. SimpleExt se = sm.Content as SimpleExt;
  503. if (se != null)
  504. se.BaseTypeName = QNameString;
  505. else {
  506. SimpleRst sr = sm.Content
  507. as SimpleRst;
  508. if (sr == null)
  509. throw Error (sm, "Invalid simple content model was passed.");
  510. sr.BaseTypeName = QNameString;
  511. sr.BaseType = null;
  512. }
  513. }
  514. private void ToEmptiableComplexContent (
  515. ComplexModel cm, bool isNew)
  516. {
  517. ComplexExt ce = cm.Content
  518. as ComplexExt;
  519. if (ce != null) {
  520. if (ce.Particle != null)
  521. ce.Particle.MinOccurs = 0;
  522. else if (ce.BaseTypeName != null &&
  523. ce.BaseTypeName != QName.Empty &&
  524. ce.BaseTypeName != QNameAnyType)
  525. throw Error (ce, "Complex type content extension has a reference to an external component that is not supported.");
  526. }
  527. else {
  528. ComplexRst cr = cm.Content
  529. as ComplexRst;
  530. if (cr == null)
  531. throw Error (cm, "Invalid complex content model was passed.");
  532. if (cr.Particle != null)
  533. cr.Particle.MinOccurs = 0;
  534. else if (cr.BaseTypeName != null &&
  535. cr.BaseTypeName != QName.Empty &&
  536. cr.BaseTypeName != QNameAnyType)
  537. throw Error (cr, "Complex type content extension has a reference to an external component that is not supported.");
  538. }
  539. }
  540. private void InferContent (Element el, string ns, bool isNew)
  541. {
  542. source.Read ();
  543. source.MoveToContent ();
  544. switch (source.NodeType) {
  545. case XmlNodeType.EndElement:
  546. InferAsEmptyElement (el, ns, isNew);
  547. break;
  548. case XmlNodeType.Element:
  549. InferComplexContent (el, ns, isNew);
  550. break;
  551. case XmlNodeType.Text:
  552. case XmlNodeType.CDATA:
  553. case XmlNodeType.SignificantWhitespace:
  554. InferTextContent (el, isNew);
  555. source.MoveToContent ();
  556. if (source.NodeType == XmlNodeType.Element)
  557. goto case XmlNodeType.Element;
  558. break;
  559. case XmlNodeType.Whitespace:
  560. InferContent (el, ns, isNew); // skip and retry
  561. break;
  562. }
  563. }
  564. private void InferComplexContent (Element el, string ns,
  565. bool isNew)
  566. {
  567. ComplexType ct = ToComplexType (el);
  568. ToComplexContentType (ct);
  569. int position = 0;
  570. bool consumed = false;
  571. do {
  572. switch (source.NodeType) {
  573. case XmlNodeType.Element:
  574. Sequence s = PopulateSequence (ct);
  575. Choice c = s.Items.Count > 0 ?
  576. s.Items [0] as Choice :
  577. null;
  578. if (c != null)
  579. ProcessLax (c, ns);
  580. else
  581. ProcessSequence (ct, s, ns,
  582. ref position,
  583. ref consumed,
  584. isNew);
  585. source.MoveToContent ();
  586. break;
  587. case XmlNodeType.Text:
  588. case XmlNodeType.CDATA:
  589. case XmlNodeType.SignificantWhitespace:
  590. MarkAsMixed (ct);
  591. source.ReadString ();
  592. source.MoveToContent ();
  593. break;
  594. case XmlNodeType.EndElement:
  595. return; // finished
  596. case XmlNodeType.None:
  597. throw new NotImplementedException ("Internal Error: Should not happen.");
  598. }
  599. } while (true);
  600. }
  601. private void InferTextContent (Element el, bool isNew)
  602. {
  603. string value = source.ReadString ();
  604. if (el.SchemaType == null) {
  605. if (el.SchemaTypeName == QName.Empty) {
  606. // no type information -> infer type
  607. if (isNew)
  608. el.SchemaTypeName =
  609. InferSimpleType (
  610. value);
  611. else
  612. el.SchemaTypeName =
  613. QNameString;
  614. return;
  615. }
  616. switch (el.SchemaTypeName.Namespace) {
  617. case XmlSchema.Namespace:
  618. case XdtNamespace:
  619. // existing primitive type
  620. el.SchemaTypeName = InferMergedType (
  621. value, el.SchemaTypeName);
  622. break;
  623. default:
  624. ComplexType ct = schemas.GlobalTypes [
  625. el.SchemaTypeName]
  626. as ComplexType;
  627. // If it is complex, then just set
  628. // mixed='true' (type cannot be set.)
  629. // If it is simple, then we cannot
  630. // make sure that string value is
  631. // valid. So just set as xs:string.
  632. if (ct != null)
  633. MarkAsMixed (ct);
  634. else
  635. el.SchemaTypeName = QNameString;
  636. break;
  637. }
  638. return;
  639. }
  640. // simpleType
  641. SimpleType st = el.SchemaType as SimpleType;
  642. if (st != null) {
  643. // If simple, then (described above)
  644. el.SchemaType = null;
  645. el.SchemaTypeName = QNameString;
  646. return;
  647. }
  648. // complexType
  649. ComplexType ect = el.SchemaType as ComplexType;
  650. SimpleModel sm = ect.ContentModel as SimpleModel;
  651. if (sm == null) {
  652. // - ComplexContent
  653. MarkAsMixed (ect);
  654. return;
  655. }
  656. // - SimpleContent
  657. SimpleExt se = sm.Content as SimpleExt;
  658. if (se != null)
  659. se.BaseTypeName = InferMergedType (value,
  660. se.BaseTypeName);
  661. SimpleRst sr = sm.Content as SimpleRst;
  662. if (sr != null) {
  663. sr.BaseTypeName = InferMergedType (value,
  664. sr.BaseTypeName);
  665. sr.BaseType = null;
  666. }
  667. }
  668. private void MarkAsMixed (ComplexType ct)
  669. {
  670. ComplexModel cm = ct.ContentModel as ComplexModel;
  671. if (cm != null)
  672. cm.IsMixed = true;
  673. else
  674. ct.IsMixed = true;
  675. }
  676. #endregion
  677. #region Particles
  678. private void ProcessLax (Choice c, string ns)
  679. {
  680. foreach (Particle p in c.Items) {
  681. Element el = p as Element;
  682. if (el == null)
  683. throw Error (c, String.Format ("Target schema item contains unacceptable particle {0}. Only element is allowed here."));
  684. if (ElementMatches (el, ns)) {
  685. InferElement (el, ns, false);
  686. return;
  687. }
  688. }
  689. // append a new element particle to lax term.
  690. Element nel = new Element ();
  691. if (source.NamespaceURI == ns)
  692. nel.Name = source.LocalName;
  693. else
  694. nel.RefName = new QName (source.LocalName,
  695. source.NamespaceURI);
  696. InferElement (nel, source.NamespaceURI, true);
  697. c.Items.Add (nel);
  698. }
  699. private bool ElementMatches (Element el, string ns)
  700. {
  701. bool matches = false;
  702. if (el.RefName != QName.Empty) {
  703. if (el.RefName.Name == source.LocalName &&
  704. el.RefName.Namespace ==
  705. source.NamespaceURI)
  706. matches = true;
  707. }
  708. else if (el.Name == source.LocalName &&
  709. ns == source.NamespaceURI)
  710. matches = true;
  711. return matches;
  712. }
  713. private void ProcessSequence (ComplexType ct, Sequence s,
  714. string ns, ref int position, ref bool consumed,
  715. bool isNew)
  716. {
  717. for (int i = 0; i < position; i++) {
  718. Element iel = s.Items [i] as Element;
  719. if (ElementMatches (iel, ns)) {
  720. // Sequence element type violation
  721. // might happen (might not, but we
  722. // cannot backtrack here). So switch
  723. // to sequence of choice* here.
  724. ProcessLax (ToSequenceOfChoice (s), ns);
  725. return;
  726. }
  727. }
  728. if (s.Items.Count <= position) {
  729. QName name = new QName (source.LocalName,
  730. source.NamespaceURI);
  731. Element nel = CreateElement (name);
  732. InferElement (nel, ns, true);
  733. if (ns == name.Namespace)
  734. s.Items.Add (nel);
  735. else {
  736. Element re = new Element ();
  737. re.RefName = name;
  738. s.Items.Add (re);
  739. }
  740. consumed = true;
  741. return;
  742. }
  743. Element el = s.Items [position] as Element;
  744. if (el == null)
  745. throw Error (s, String.Format ("Target complex type content sequence has an unacceptable type of particle {0}", s.Items [position]));
  746. bool matches = ElementMatches (el, ns);
  747. if (matches) {
  748. if (consumed)
  749. el.MaxOccursString = "unbounded";
  750. InferElement (el, source.NamespaceURI, false);
  751. source.MoveToContent ();
  752. switch (source.NodeType) {
  753. case XmlNodeType.None:
  754. if (source.NodeType ==
  755. XmlNodeType.Element)
  756. goto case XmlNodeType.Element;
  757. else if (source.NodeType ==
  758. XmlNodeType.EndElement)
  759. goto case XmlNodeType.EndElement;
  760. break;
  761. case XmlNodeType.Element:
  762. ProcessSequence (ct, s, ns, ref position,
  763. ref consumed, isNew);
  764. break;
  765. case XmlNodeType.Text:
  766. case XmlNodeType.CDATA:
  767. case XmlNodeType.SignificantWhitespace:
  768. MarkAsMixed (ct);
  769. source.ReadString ();
  770. goto case XmlNodeType.None;
  771. case XmlNodeType.Whitespace:
  772. source.ReadString ();
  773. goto case XmlNodeType.None;
  774. case XmlNodeType.EndElement:
  775. return;
  776. default:
  777. source.Read ();
  778. break;
  779. }
  780. }
  781. else {
  782. if (consumed) {
  783. position++;
  784. consumed = false;
  785. ProcessSequence (ct, s, ns,
  786. ref position, ref consumed,
  787. isNew);
  788. }
  789. else
  790. ProcessLax (ToSequenceOfChoice (s), ns);
  791. }
  792. }
  793. // Note that it does not return the changed sequence.
  794. private Choice ToSequenceOfChoice (Sequence s)
  795. {
  796. Choice c = new Choice ();
  797. if (laxOccurence)
  798. c.MinOccurs = 0;
  799. c.MaxOccursString = "unbounded";
  800. foreach (Particle p in s.Items)
  801. c.Items.Add (p);
  802. s.Items.Clear ();
  803. s.Items.Add (c);
  804. return c;
  805. }
  806. // It makes complexType not to have Simple content model.
  807. private void ToComplexContentType (ComplexType type)
  808. {
  809. SimpleModel sm = type.ContentModel as SimpleModel;
  810. if (sm == null)
  811. return;
  812. SOMList atts = GetAttributes (type);
  813. foreach (SOMObject o in atts)
  814. type.Attributes.Add (o);
  815. // FIXME: need to copy AnyAttribute.
  816. // (though not considered right now)
  817. type.ContentModel = null;
  818. type.IsMixed = true;
  819. }
  820. private Sequence PopulateSequence (ComplexType ct)
  821. {
  822. Particle p = PopulateParticle (ct);
  823. Sequence s = p as Sequence;
  824. if (s != null)
  825. return s;
  826. else
  827. throw Error (ct, String.Format ("Target complexType contains unacceptable type of particle {0}", p));
  828. }
  829. private Sequence CreateSequence ()
  830. {
  831. Sequence s = new Sequence ();
  832. if (laxOccurence)
  833. s.MinOccurs = 0;
  834. return s;
  835. }
  836. private Particle PopulateParticle (ComplexType ct)
  837. {
  838. if (ct.ContentModel == null) {
  839. if (ct.Particle == null)
  840. ct.Particle = CreateSequence ();
  841. return ct.Particle;
  842. }
  843. ComplexModel cm = ct.ContentModel as ComplexModel;
  844. if (cm != null) {
  845. ComplexExt ce = cm.Content as ComplexExt;
  846. if (ce != null) {
  847. if (ce.Particle == null)
  848. ce.Particle = CreateSequence ();
  849. return ce.Particle;
  850. }
  851. ComplexRst cr = cm.Content as ComplexRst;
  852. if (cr != null) {
  853. if (cr.Particle == null)
  854. cr.Particle = CreateSequence ();
  855. return cr.Particle;
  856. }
  857. }
  858. throw Error (ct, "Schema inference internal error. The complexType should have been converted to have a complex content.");
  859. }
  860. #endregion
  861. #region String Value
  862. // primitive type inference.
  863. // When running lax type inference, it just returns xs:string.
  864. private QName InferSimpleType (string value)
  865. {
  866. if (laxTypeInference)
  867. return QNameString;
  868. switch (value) {
  869. // 0 and 1 are not infered as byte unlike MS.XSDInfer
  870. // case "0":
  871. // case "1":
  872. case "true":
  873. case "false":
  874. return QNameBoolean;
  875. }
  876. try {
  877. long dec = XmlConvert.ToInt64 (value);
  878. if (byte.MinValue <= dec && dec <= byte.MaxValue)
  879. return QNameUByte;
  880. if (sbyte.MinValue <= dec && dec <= sbyte.MaxValue)
  881. return QNameByte;
  882. if (ushort.MinValue <= dec && dec <= ushort.MaxValue)
  883. return QNameUShort;
  884. if (short.MinValue <= dec && dec <= short.MaxValue)
  885. return QNameShort;
  886. if (uint.MinValue <= dec && dec <= uint.MaxValue)
  887. return QNameUInt;
  888. if (int.MinValue <= dec && dec <= int.MaxValue)
  889. return QNameInt;
  890. return QNameLong;
  891. } catch (Exception) {
  892. }
  893. try {
  894. XmlConvert.ToUInt64 (value);
  895. return QNameULong;
  896. } catch (Exception) {
  897. }
  898. try {
  899. XmlConvert.ToDecimal (value);
  900. return QNameDecimal;
  901. } catch (Exception) {
  902. }
  903. try {
  904. double dbl = XmlConvert.ToDouble (value);
  905. if (float.MinValue <= dbl &&
  906. dbl <= float.MaxValue)
  907. return QNameFloat;
  908. else
  909. return QNameDouble;
  910. } catch (Exception) {
  911. }
  912. try {
  913. // FIXME: also try DateTimeSerializationMode
  914. // and gYearMonth
  915. XmlConvert.ToDateTime (value);
  916. return QNameDateTime;
  917. } catch (Exception) {
  918. }
  919. try {
  920. XmlConvert.ToTimeSpan (value);
  921. return QNameDuration;
  922. } catch (Exception) {
  923. }
  924. // xs:string
  925. return QNameString;
  926. }
  927. #endregion
  928. #region Utilities
  929. private Element GetGlobalElement (QName name)
  930. {
  931. Element el = newElements [name] as Element;
  932. if (el == null)
  933. el = schemas.GlobalElements [name] as Element;
  934. return el;
  935. }
  936. private Attr GetGlobalAttribute (QName name)
  937. {
  938. Attr a = newElements [name] as Attr;
  939. if (a == null)
  940. a = schemas.GlobalAttributes [name] as Attr;
  941. return a;
  942. }
  943. private Element CreateElement (QName name)
  944. {
  945. Element el = new Element ();
  946. el.Name = name.Name;
  947. return el;
  948. }
  949. private Element CreateGlobalElement (QName name)
  950. {
  951. Element el = CreateElement (name);
  952. XmlSchema schema = PopulateSchema (name.Namespace);
  953. schema.Items.Add (el);
  954. newElements.Add (name, el);
  955. return el;
  956. }
  957. private Attr CreateGlobalAttribute (QName name)
  958. {
  959. Attr attr = new Attr ();
  960. XmlSchema schema = PopulateSchema (name.Namespace);
  961. attr.Name = name.Name;
  962. schema.Items.Add (attr);
  963. newAttributes.Add (name, attr);
  964. return attr;
  965. }
  966. // Note that the return value never assures that all the
  967. // components in the parameter ns must reside in it.
  968. private XmlSchema PopulateSchema (string ns)
  969. {
  970. ICollection list = schemas.Schemas (ns);
  971. if (list.Count > 0) {
  972. IEnumerator e = list.GetEnumerator ();
  973. e.MoveNext ();
  974. return (XmlSchema) e.Current;
  975. }
  976. XmlSchema s = new XmlSchema ();
  977. if (ns != null && ns.Length > 0)
  978. s.TargetNamespace = ns;
  979. s.ElementFormDefault = Form.Qualified;
  980. s.AttributeFormDefault = Form.Unqualified;
  981. schemas.Add (s);
  982. return s;
  983. }
  984. private XmlSchemaInferenceException Error (
  985. XmlSchemaObject sourceObj,
  986. string message)
  987. {
  988. // This override is mainly for schema component error.
  989. return Error (sourceObj, false, message);
  990. }
  991. private XmlSchemaInferenceException Error (
  992. XmlSchemaObject sourceObj,
  993. bool useReader,
  994. string message)
  995. {
  996. string msg = String.Concat (
  997. message,
  998. sourceObj != null ?
  999. String.Format (". Related schema component is {0}",
  1000. sourceObj.SourceUri,
  1001. sourceObj.LineNumber,
  1002. sourceObj.LinePosition) :
  1003. String.Empty,
  1004. useReader ?
  1005. String.Format (". {0}", source.BaseURI) :
  1006. String.Empty);
  1007. IXmlLineInfo li = source as IXmlLineInfo;
  1008. if (useReader && li != null)
  1009. return new XmlSchemaInferenceException (
  1010. msg, null, li.LineNumber,
  1011. li.LinePosition);
  1012. else
  1013. return new XmlSchemaInferenceException (msg);
  1014. }
  1015. #endregion
  1016. }
  1017. }
  1018. #endif