XmlSchemaInference.cs 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086
  1. //
  2. // XmlSchemaInference.cs
  3. //
  4. // Author:
  5. // Atsushi Enomoto <[email protected]>
  6. //
  7. // Copyright (C)2004 Novell Inc.
  8. //
  9. //
  10. // Permission is hereby granted, free of charge, to any person obtaining
  11. // a copy of this software and associated documentation files (the
  12. // "Software"), to deal in the Software without restriction, including
  13. // without limitation the rights to use, copy, modify, merge, publish,
  14. // distribute, sublicense, and/or sell copies of the Software, and to
  15. // permit persons to whom the Software is furnished to do so, subject to
  16. // the following conditions:
  17. //
  18. // The above copyright notice and this permission notice shall be
  19. // included in all copies or substantial portions of the Software.
  20. //
  21. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  22. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  23. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  24. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  25. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  26. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  27. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  28. //
  29. #if NET_2_0
  30. using System;
  31. using System.Collections;
  32. using System.Xml;
  33. using System.Xml.Schema;
  34. using QName = System.Xml.XmlQualifiedName;
  35. using Form = System.Xml.Schema.XmlSchemaForm;
  36. using Use = System.Xml.Schema.XmlSchemaUse;
  37. using SOMList = System.Xml.Schema.XmlSchemaObjectCollection;
  38. using SOMObject = System.Xml.Schema.XmlSchemaObject;
  39. using Element = System.Xml.Schema.XmlSchemaElement;
  40. using Attr = System.Xml.Schema.XmlSchemaAttribute;
  41. using AttrGroup = System.Xml.Schema.XmlSchemaAttributeGroup;
  42. using AttrGroupRef = System.Xml.Schema.XmlSchemaAttributeGroupRef;
  43. using SimpleType = System.Xml.Schema.XmlSchemaSimpleType;
  44. using ComplexType = System.Xml.Schema.XmlSchemaComplexType;
  45. using SimpleModel = System.Xml.Schema.XmlSchemaSimpleContent;
  46. using SimpleExt = System.Xml.Schema.XmlSchemaSimpleContentExtension;
  47. using SimpleRst = System.Xml.Schema.XmlSchemaSimpleContentRestriction;
  48. using ComplexModel = System.Xml.Schema.XmlSchemaComplexContent;
  49. using ComplexExt = System.Xml.Schema.XmlSchemaComplexContentExtension;
  50. using ComplexRst = System.Xml.Schema.XmlSchemaComplexContentRestriction;
  51. using SimpleTypeRst = System.Xml.Schema.XmlSchemaSimpleTypeRestriction;
  52. using SimpleList = System.Xml.Schema.XmlSchemaSimpleTypeList;
  53. using SimpleUnion = System.Xml.Schema.XmlSchemaSimpleTypeUnion;
  54. using SchemaFacet = System.Xml.Schema.XmlSchemaFacet;
  55. using LengthFacet = System.Xml.Schema.XmlSchemaLengthFacet;
  56. using MinLengthFacet = System.Xml.Schema.XmlSchemaMinLengthFacet;
  57. using Particle = System.Xml.Schema.XmlSchemaParticle;
  58. using Sequence = System.Xml.Schema.XmlSchemaSequence;
  59. using Choice = System.Xml.Schema.XmlSchemaChoice;
  60. namespace System.Xml.Schema
  61. {
  62. [MonoTODO ("merge primitive types; infer gYearMonth too; in some cases sequence should contain element whose minOccurs=0 (no obvious rules right now); reject some non-supported schema components")]
  63. public class XmlSchemaInference
  64. {
  65. public enum InferenceOption {
  66. Restricted,
  67. Relaxed,
  68. }
  69. InferenceOption occurrence = InferenceOption.Restricted;
  70. InferenceOption typeInference = InferenceOption.Restricted;
  71. public XmlSchemaInference ()
  72. {
  73. }
  74. public InferenceOption Occurrence {
  75. get { return occurrence; }
  76. set { occurrence = value; }
  77. }
  78. public InferenceOption TypeInference {
  79. get { return TypeInference; }
  80. set { typeInference = value; }
  81. }
  82. public XmlSchemaSet InferSchema (XmlReader xmlReader)
  83. {
  84. return InferSchema (xmlReader, new XmlSchemaSet ());
  85. }
  86. public XmlSchemaSet InferSchema (XmlReader xmlReader,
  87. XmlSchemaSet schemas)
  88. {
  89. return XsdInference.Process (xmlReader, schemas,
  90. occurrence == InferenceOption.Relaxed,
  91. typeInference == InferenceOption.Relaxed);
  92. }
  93. }
  94. class XsdInference
  95. {
  96. public static XmlSchemaSet Process (XmlReader xmlReader,
  97. XmlSchemaSet schemas,
  98. bool laxOccurence,
  99. bool laxTypeInference)
  100. {
  101. XsdInference impl = new XsdInference (xmlReader,
  102. schemas, laxOccurence, laxTypeInference);
  103. impl.Run ();
  104. return impl.schemas;
  105. }
  106. public const string NamespaceXml = "http://www.w3.org/XML/1998/namespace";
  107. public const string NamespaceXmlns = "http://www.w3.org/2000/xmlns/";
  108. public const string XdtNamespace = "http://www.w3.org/2003/11/xpath-datatypes";
  109. static readonly QName QNameString = new QName ("string",
  110. XmlSchema.Namespace);
  111. static readonly QName QNameBoolean = new QName ("boolean",
  112. XmlSchema.Namespace);
  113. static readonly QName QNameAnyType = new QName ("anyType",
  114. XmlSchema.Namespace);
  115. XmlReader source;
  116. XmlSchemaSet schemas;
  117. bool laxOccurence;
  118. bool laxTypeInference;
  119. Hashtable newElements = new Hashtable ();
  120. Hashtable newAttributes = new Hashtable ();
  121. private XsdInference (XmlReader xmlReader,
  122. XmlSchemaSet schemas,
  123. bool laxOccurence,
  124. bool laxTypeInference)
  125. {
  126. this.source = xmlReader;
  127. this.schemas = schemas;
  128. this.laxOccurence = laxOccurence;
  129. this.laxTypeInference = laxTypeInference;
  130. }
  131. private void Run ()
  132. {
  133. // XmlSchemaSet need to be compiled.
  134. schemas.Compile ();
  135. // move to top-level element
  136. source.MoveToContent ();
  137. int depth = source.Depth;
  138. if (source.NodeType != XmlNodeType.Element)
  139. throw new ArgumentException ("Argument XmlReader content is expected to be an element.");
  140. QName qname = new QName (source.LocalName,
  141. source.NamespaceURI);
  142. Element el = GetGlobalElement (qname);
  143. if (el == null) {
  144. el = CreateGlobalElement (qname);
  145. InferElement (el, qname.Namespace, true);
  146. }
  147. else
  148. InferElement (el, qname.Namespace, false);
  149. }
  150. private void IncludeXmlAttributes ()
  151. {
  152. if (schemas.Schemas (NamespaceXml).Count == 0)
  153. // FIXME: do it from resources.
  154. schemas.Add (NamespaceXml,
  155. "http://www.w3.org/2001/xml.xsd");
  156. }
  157. private void InferElement (Element el, string ns, bool isNew)
  158. {
  159. // Quick check for reference to another definition
  160. // (i.e. element ref='...' that should be redirected)
  161. if (el.RefName != QName.Empty) {
  162. Element body = GetGlobalElement (el.RefName);
  163. if (body == null) {
  164. body = CreateElement (el.RefName);
  165. InferElement (body, ns, true);
  166. }
  167. else
  168. InferElement (body, ns, isNew);
  169. return;
  170. }
  171. // Attributes
  172. if (source.MoveToFirstAttribute ()) {
  173. InferAttributes (el, ns, isNew);
  174. source.MoveToElement ();
  175. }
  176. // Content
  177. if (source.IsEmptyElement) {
  178. InferAsEmptyElement (el, ns, isNew);
  179. source.Read ();
  180. source.MoveToContent ();
  181. }
  182. else {
  183. InferContent (el, ns, isNew);
  184. source.ReadEndElement ();
  185. }
  186. if (el.SchemaType == null &&
  187. el.SchemaTypeName == QName.Empty)
  188. el.SchemaTypeName = QNameString;
  189. }
  190. #region Attribute Inference
  191. private Hashtable CollectAttrTable (SOMList attList)
  192. {
  193. // get attribute definition table.
  194. Hashtable table = new Hashtable ();
  195. foreach (XmlSchemaObject obj in attList) {
  196. Attr attr = obj as Attr;
  197. if (attr == null)
  198. throw Error (obj, String.Format ("Attribute inference only supports direct attribute definition. {0} is not supported.", obj.GetType ()));
  199. if (attr.RefName != QName.Empty)
  200. table.Add (attr.RefName, attr);
  201. else
  202. table.Add (new QName (attr.Name, ""),
  203. attr);
  204. }
  205. return table;
  206. }
  207. private void InferAttributes (Element el, string ns, bool isNew)
  208. {
  209. // Now this element is going to have complexType.
  210. // It currently not, then we have to replace it.
  211. ComplexType ct = null;
  212. SOMList attList = null;
  213. Hashtable table = null;
  214. do {
  215. switch (source.NamespaceURI) {
  216. case NamespaceXml:
  217. if (schemas.Schemas (
  218. NamespaceXml) .Count == 0)
  219. IncludeXmlAttributes ();
  220. break;
  221. case XmlSchema.InstanceNamespace:
  222. if (source.LocalName == "nil")
  223. el.IsNillable = true;
  224. // all other xsi:* atts are ignored
  225. continue;
  226. case NamespaceXmlns:
  227. continue;
  228. }
  229. if (ct == null) {
  230. ct = ToComplexType (el);
  231. attList = GetAttributes (ct);
  232. table = CollectAttrTable (attList);
  233. }
  234. QName attrName = new QName (
  235. source.LocalName, source.NamespaceURI);
  236. Attr attr = table [attrName] as Attr;
  237. if (attr == null) {
  238. attList.Add (InferNewAttribute (
  239. attrName, isNew));
  240. } else {
  241. table.Remove (attrName);
  242. if (attr.RefName != null &&
  243. attr.RefName != QName.Empty)
  244. continue; // just a reference
  245. InferMergedAttribute (attr);
  246. }
  247. } while (source.MoveToNextAttribute ());
  248. // mark all attr definitions that did not appear
  249. // as optional.
  250. if (table != null)
  251. foreach (Attr attr in table.Values)
  252. attr.Use = Use.Optional;
  253. }
  254. private XmlSchemaAttribute InferNewAttribute (
  255. QName attrName, bool isNewTypeDefinition)
  256. {
  257. Attr attr = null;
  258. bool mergedRequired = false;
  259. if (attrName.Namespace.Length > 0) {
  260. // global attribute; might be already defined.
  261. attr = GetGlobalAttribute (attrName) as Attr;
  262. if (attr == null) {
  263. attr = CreateGlobalAttribute (attrName);
  264. attr.SchemaTypeName =
  265. InferSimpleType (source.Value);
  266. } else {
  267. InferMergedAttribute (attr);
  268. mergedRequired =
  269. attr.Use == Use.Required;
  270. }
  271. attr = new Attr ();
  272. attr.RefName = attrName;
  273. } else {
  274. // local attribute
  275. attr = new Attr ();
  276. attr.Name = attrName.Name;
  277. attr.SchemaTypeName =
  278. InferSimpleType (source.Value);
  279. }
  280. if (!laxOccurence &&
  281. (isNewTypeDefinition || mergedRequired))
  282. attr.Use = Use.Required;
  283. else
  284. attr.Use = Use.Optional;
  285. return attr;
  286. }
  287. // validate string value agains attr and
  288. // if invalid, then relax the type.
  289. private void InferMergedAttribute (Attr attr)
  290. {
  291. attr.SchemaTypeName = InferMergedType (source.Value,
  292. attr.SchemaTypeName);
  293. attr.SchemaType = null;
  294. }
  295. private QName InferMergedType (string value, QName typeName)
  296. {
  297. // examine value against specified type and
  298. // if unacceptable, then return a relaxed type.
  299. SimpleType st = XmlSchemaType.GetBuiltInSimpleType (
  300. typeName);
  301. if (st == null) // non-primitive type => see above.
  302. return QNameString;
  303. try {
  304. st.Datatype.ParseValue (value,
  305. source.NameTable,
  306. source as IXmlNamespaceResolver);
  307. // the string value was value
  308. return typeName;
  309. } catch {
  310. // The types were incompatible.
  311. // FIXME: find the base common type
  312. return QNameString;
  313. }
  314. }
  315. private SOMList GetAttributes (ComplexType ct)
  316. {
  317. if (ct.ContentModel == null)
  318. return ct.Attributes;
  319. SimpleModel sc = ct.ContentModel as SimpleModel;
  320. if (sc != null) {
  321. SimpleExt sce = sc.Content as SimpleExt;
  322. if (sce != null)
  323. return sce.Attributes;
  324. SimpleRst scr = sc.Content as SimpleRst;
  325. if (scr != null)
  326. return scr.Attributes;
  327. else
  328. throw Error (sc, "Invalid simple content model.");
  329. }
  330. ComplexModel cc = ct.ContentModel as ComplexModel;
  331. if (cc != null) {
  332. ComplexExt cce = cc.Content as ComplexExt;
  333. if (cce != null)
  334. return cce.Attributes;
  335. ComplexRst ccr = cc.Content as ComplexRst;
  336. if (ccr != null)
  337. return ccr.Attributes;
  338. else
  339. throw Error (cc, "Invalid simple content model.");
  340. }
  341. throw Error (cc, "Invalid complexType. Should not happen.");
  342. }
  343. private ComplexType ToComplexType (Element el)
  344. {
  345. QName name = el.SchemaTypeName;
  346. XmlSchemaType type = el.SchemaType;
  347. // 1. element type is complex.
  348. ComplexType ct = type as ComplexType;
  349. if (ct != null)
  350. return ct;
  351. // 2. reference to global complexType.
  352. XmlSchemaType globalType = schemas.GlobalTypes [name]
  353. as XmlSchemaType;
  354. ct = globalType as ComplexType;
  355. if (ct != null)
  356. return ct;
  357. ct = new ComplexType ();
  358. el.SchemaType = ct;
  359. el.SchemaTypeName = QName.Empty;
  360. // 3. base type name is xs:anyType or no specification.
  361. // <xs:complexType />
  362. if (name == QNameAnyType)
  363. return ct;
  364. else if (type == null && name == QName.Empty)
  365. return ct;
  366. SimpleModel sc = new SimpleModel ();
  367. ct.ContentModel = sc;
  368. // 4. type is simpleType
  369. // -> extension of existing simple type.
  370. SimpleType st = type as SimpleType;
  371. if (st != null) {
  372. SimpleRst scr = new SimpleRst ();
  373. scr.BaseType = st;
  374. sc.Content = scr;
  375. return ct;
  376. }
  377. SimpleExt sce = new SimpleExt ();
  378. sc.Content = sce;
  379. // 5. type name points to primitive type
  380. // -> simple extension of a primitive type
  381. st = XmlSchemaType.GetBuiltInSimpleType (name);
  382. if (st != null) {
  383. sce.BaseTypeName = name;
  384. return ct;
  385. }
  386. // 6. type name points to global simpleType.
  387. st = globalType as SimpleType;
  388. if (st != null) {
  389. sce.BaseTypeName = name;
  390. return ct;
  391. }
  392. throw Error (el, "Unexpected schema component that contains simpleTypeName that could not be resolved.");
  393. }
  394. #endregion
  395. #region Element Type
  396. private void InferAsEmptyElement (Element el, string ns,
  397. bool isNew)
  398. {
  399. ComplexType ct = el.SchemaType as ComplexType;
  400. if (ct != null) {
  401. SimpleModel sm =
  402. ct.ContentModel as SimpleModel;
  403. if (sm != null) {
  404. ToEmptiableSimpleContent (sm, isNew);
  405. return;
  406. }
  407. ComplexModel cm = ct.ContentModel
  408. as ComplexModel;
  409. if (cm != null) {
  410. ToEmptiableComplexContent (cm, isNew);
  411. return;
  412. }
  413. if (ct.Particle != null)
  414. ct.Particle.MinOccurs = 0;
  415. return;
  416. }
  417. SimpleType st = el.SchemaType as SimpleType;
  418. if (st != null) {
  419. st = MakeBaseTypeAsEmptiable (st);
  420. switch (st.QualifiedName.Namespace) {
  421. case XmlSchema.Namespace:
  422. case XdtNamespace:
  423. el.SchemaTypeName = st.QualifiedName;
  424. break;
  425. default:
  426. el.SchemaType =st;
  427. break;
  428. }
  429. }
  430. }
  431. private SimpleType MakeBaseTypeAsEmptiable (SimpleType st)
  432. {
  433. switch (st.QualifiedName.Namespace) {
  434. case XmlSchema.Namespace:
  435. case XdtNamespace:
  436. // If a primitive type
  437. return XmlSchemaType.GetBuiltInSimpleType (
  438. XmlTypeCode.String);
  439. }
  440. SimpleTypeRst str = st.Content as SimpleTypeRst;
  441. if (str != null) {
  442. ArrayList al = null;
  443. foreach (SchemaFacet f in str.Facets) {
  444. if (f is LengthFacet ||
  445. f is MinLengthFacet) {
  446. if (al == null)
  447. al = new ArrayList ();
  448. al.Add (f);
  449. }
  450. }
  451. foreach (SchemaFacet f in al)
  452. str.Facets.Remove (f);
  453. if (str.BaseType != null)
  454. str.BaseType =
  455. MakeBaseTypeAsEmptiable (st);
  456. else
  457. // It might have a reference to an
  458. // external simple type, but there is
  459. // no assurance that any of those
  460. // external types allow an empty
  461. // string. So just set base type as
  462. // xs:string.
  463. str.BaseTypeName = QNameString;
  464. } // union/list can have empty string value.
  465. return st;
  466. }
  467. private void ToEmptiableSimpleContent (
  468. SimpleModel sm, bool isNew)
  469. {
  470. SimpleExt se = sm.Content as SimpleExt;
  471. if (se != null)
  472. se.BaseTypeName = QNameString;
  473. else {
  474. SimpleRst sr = sm.Content
  475. as SimpleRst;
  476. if (sr == null)
  477. throw Error (sm, "Invalid simple content model was passed.");
  478. sr.BaseTypeName = QNameString;
  479. sr.BaseType = null;
  480. }
  481. }
  482. private void ToEmptiableComplexContent (
  483. ComplexModel cm, bool isNew)
  484. {
  485. ComplexExt ce = cm.Content
  486. as ComplexExt;
  487. if (ce != null) {
  488. if (ce.Particle != null)
  489. ce.Particle.MinOccurs = 0;
  490. else if (ce.BaseTypeName != null &&
  491. ce.BaseTypeName != QName.Empty &&
  492. ce.BaseTypeName != QNameAnyType)
  493. throw Error (ce, "Complex type content extension has a reference to an external component that is not supported.");
  494. }
  495. else {
  496. ComplexRst cr = cm.Content
  497. as ComplexRst;
  498. if (cr == null)
  499. throw Error (cm, "Invalid complex content model was passed.");
  500. if (cr.Particle != null)
  501. cr.Particle.MinOccurs = 0;
  502. else if (cr.BaseTypeName != null &&
  503. cr.BaseTypeName != QName.Empty &&
  504. cr.BaseTypeName != QNameAnyType)
  505. throw Error (cr, "Complex type content extension has a reference to an external component that is not supported.");
  506. }
  507. }
  508. private void InferContent (Element el, string ns, bool isNew)
  509. {
  510. source.Read ();
  511. source.MoveToContent ();
  512. switch (source.NodeType) {
  513. case XmlNodeType.EndElement:
  514. InferAsEmptyElement (el, ns, isNew);
  515. break;
  516. case XmlNodeType.Element:
  517. InferComplexContent (el, ns, isNew);
  518. break;
  519. case XmlNodeType.Text:
  520. case XmlNodeType.CDATA:
  521. case XmlNodeType.SignificantWhitespace:
  522. InferTextContent (el, isNew);
  523. source.MoveToContent ();
  524. if (source.NodeType == XmlNodeType.Element)
  525. goto case XmlNodeType.Element;
  526. break;
  527. case XmlNodeType.Whitespace:
  528. InferContent (el, ns, isNew); // skip and retry
  529. break;
  530. }
  531. }
  532. private void InferComplexContent (Element el, string ns,
  533. bool isNew)
  534. {
  535. ComplexType ct = ToComplexType (el);
  536. ToComplexContentType (ct);
  537. int position = 0;
  538. bool consumed = false;
  539. do {
  540. switch (source.NodeType) {
  541. case XmlNodeType.Element:
  542. Sequence s = PopulateSequence (ct);
  543. Choice c = s.Items.Count > 0 ?
  544. s.Items [0] as Choice :
  545. null;
  546. if (c != null)
  547. ProcessLax (c, ns);
  548. else
  549. ProcessSequence (ct, s, ns,
  550. ref position,
  551. ref consumed,
  552. isNew);
  553. source.MoveToContent ();
  554. break;
  555. case XmlNodeType.Text:
  556. case XmlNodeType.CDATA:
  557. case XmlNodeType.SignificantWhitespace:
  558. MarkAsMixed (ct);
  559. source.ReadString ();
  560. source.MoveToContent ();
  561. break;
  562. case XmlNodeType.EndElement:
  563. return; // finished
  564. case XmlNodeType.None:
  565. throw new NotImplementedException ("Internal Error: Should not happen.");
  566. }
  567. } while (true);
  568. }
  569. private void InferTextContent (Element el, bool isNew)
  570. {
  571. string value = source.ReadString ();
  572. if (el.SchemaType == null) {
  573. if (el.SchemaTypeName == QName.Empty) {
  574. // no type information -> infer type
  575. if (isNew)
  576. el.SchemaTypeName =
  577. InferSimpleType (
  578. value);
  579. else
  580. el.SchemaTypeName =
  581. QNameString;
  582. return;
  583. }
  584. switch (el.SchemaTypeName.Namespace) {
  585. case XmlSchema.Namespace:
  586. case XdtNamespace:
  587. // existing primitive type
  588. el.SchemaTypeName = InferMergedType (
  589. value, el.SchemaTypeName);
  590. break;
  591. default:
  592. ComplexType ct = schemas.GlobalTypes [
  593. el.SchemaTypeName]
  594. as ComplexType;
  595. // If it is complex, then just set
  596. // mixed='true' (type cannot be set.)
  597. // If it is simple, then we cannot
  598. // make sure that string value is
  599. // valid. So just set as xs:string.
  600. if (ct != null)
  601. MarkAsMixed (ct);
  602. else
  603. el.SchemaTypeName = QNameString;
  604. break;
  605. }
  606. return;
  607. }
  608. // simpleType
  609. SimpleType st = el.SchemaType as SimpleType;
  610. if (st != null) {
  611. // If simple, then (described above)
  612. el.SchemaType = null;
  613. el.SchemaTypeName = QNameString;
  614. return;
  615. }
  616. // complexType
  617. ComplexType ect = el.SchemaType as ComplexType;
  618. SimpleModel sm = ect.ContentModel as SimpleModel;
  619. if (sm == null) {
  620. // - ComplexContent
  621. MarkAsMixed (ect);
  622. return;
  623. }
  624. // - SimpleContent
  625. SimpleExt se = sm.Content as SimpleExt;
  626. if (se != null)
  627. se.BaseTypeName = InferMergedType (value,
  628. se.BaseTypeName);
  629. SimpleRst sr = sm.Content as SimpleRst;
  630. if (sr != null) {
  631. sr.BaseTypeName = InferMergedType (value,
  632. sr.BaseTypeName);
  633. sr.BaseType = null;
  634. }
  635. }
  636. private void MarkAsMixed (ComplexType ct)
  637. {
  638. ComplexModel cm = ct.ContentModel as ComplexModel;
  639. if (cm != null)
  640. cm.IsMixed = true;
  641. else
  642. ct.IsMixed = true;
  643. }
  644. #endregion
  645. #region Particles
  646. private void ProcessLax (Choice c, string ns)
  647. {
  648. foreach (Particle p in c.Items) {
  649. Element el = p as Element;
  650. if (el == null)
  651. throw Error (c, String.Format ("Target schema item contains unacceptable particle {0}. Only element is allowed here."));
  652. if (ElementMatches (el, ns)) {
  653. InferElement (el, ns, false);
  654. return;
  655. }
  656. }
  657. // append a new element particle to lax term.
  658. Element nel = new Element ();
  659. if (source.NamespaceURI == ns)
  660. nel.Name = source.LocalName;
  661. else
  662. nel.RefName = new QName (source.LocalName,
  663. source.NamespaceURI);
  664. InferElement (nel, source.NamespaceURI, true);
  665. c.Items.Add (nel);
  666. }
  667. private bool ElementMatches (Element el, string ns)
  668. {
  669. bool matches = false;
  670. if (el.RefName != QName.Empty) {
  671. if (el.RefName.Name == source.LocalName &&
  672. el.RefName.Namespace ==
  673. source.NamespaceURI)
  674. matches = true;
  675. }
  676. else if (el.Name == source.LocalName &&
  677. ns == source.NamespaceURI)
  678. matches = true;
  679. return matches;
  680. }
  681. private void ProcessSequence (ComplexType ct, Sequence s,
  682. string ns, ref int position, ref bool consumed,
  683. bool isNew)
  684. {
  685. for (int i = 0; i < position; i++) {
  686. Element iel = s.Items [i] as Element;
  687. if (ElementMatches (iel, ns)) {
  688. // Sequence element type violation
  689. // might happen (might not, but we
  690. // cannot backtrack here). So switch
  691. // to sequence of choice* here.
  692. ProcessLax (ToSequenceOfChoice (s), ns);
  693. return;
  694. }
  695. }
  696. if (s.Items.Count <= position) {
  697. QName name = new QName (source.LocalName,
  698. source.NamespaceURI);
  699. Element nel = CreateElement (name);
  700. InferElement (nel, ns, true);
  701. if (ns == name.Namespace)
  702. s.Items.Add (nel);
  703. else {
  704. Element re = new Element ();
  705. re.RefName = name;
  706. s.Items.Add (re);
  707. }
  708. consumed = true;
  709. return;
  710. }
  711. Element el = s.Items [position] as Element;
  712. if (el == null)
  713. throw Error (s, String.Format ("Target complex type content sequence has an unacceptable type of particle {0}", s.Items [position]));
  714. bool matches = ElementMatches (el, ns);
  715. if (matches) {
  716. if (consumed)
  717. el.MaxOccursString = "unbounded";
  718. InferElement (el, source.NamespaceURI, false);
  719. source.MoveToContent ();
  720. switch (source.NodeType) {
  721. case XmlNodeType.None:
  722. if (source.NodeType ==
  723. XmlNodeType.Element)
  724. goto case XmlNodeType.Element;
  725. else if (source.NodeType ==
  726. XmlNodeType.EndElement)
  727. goto case XmlNodeType.EndElement;
  728. break;
  729. case XmlNodeType.Element:
  730. ProcessSequence (ct, s, ns, ref position,
  731. ref consumed, isNew);
  732. break;
  733. case XmlNodeType.Text:
  734. case XmlNodeType.CDATA:
  735. case XmlNodeType.SignificantWhitespace:
  736. MarkAsMixed (ct);
  737. source.ReadString ();
  738. goto case XmlNodeType.None;
  739. case XmlNodeType.Whitespace:
  740. source.ReadString ();
  741. goto case XmlNodeType.None;
  742. case XmlNodeType.EndElement:
  743. return;
  744. default:
  745. source.Read ();
  746. break;
  747. }
  748. }
  749. else {
  750. if (consumed) {
  751. position++;
  752. consumed = false;
  753. ProcessSequence (ct, s, ns,
  754. ref position, ref consumed,
  755. isNew);
  756. }
  757. else
  758. ProcessLax (ToSequenceOfChoice (s), ns);
  759. }
  760. }
  761. // Note that it does not return the changed sequence.
  762. private Choice ToSequenceOfChoice (Sequence s)
  763. {
  764. Choice c = new Choice ();
  765. if (laxOccurence)
  766. c.MinOccurs = 0;
  767. c.MaxOccursString = "unbounded";
  768. foreach (Particle p in s.Items)
  769. c.Items.Add (p);
  770. s.Items.Clear ();
  771. s.Items.Add (c);
  772. return c;
  773. }
  774. // It makes complexType not to have Simple content model.
  775. private void ToComplexContentType (ComplexType type)
  776. {
  777. SimpleModel sm = type.ContentModel as SimpleModel;
  778. if (sm == null)
  779. return;
  780. SOMList atts = GetAttributes (type);
  781. foreach (SOMObject o in atts)
  782. type.Attributes.Add (o);
  783. // FIXME: need to copy AnyAttribute.
  784. // (though not considered right now)
  785. type.ContentModel = null;
  786. type.IsMixed = true;
  787. }
  788. private Sequence PopulateSequence (ComplexType ct)
  789. {
  790. Particle p = PopulateParticle (ct);
  791. Sequence s = p as Sequence;
  792. if (s != null)
  793. return s;
  794. else
  795. throw Error (ct, String.Format ("Target complexType contains unacceptable type of particle {0}", p));
  796. }
  797. private Sequence CreateSequence ()
  798. {
  799. Sequence s = new Sequence ();
  800. if (laxOccurence)
  801. s.MinOccurs = 0;
  802. return s;
  803. }
  804. private Particle PopulateParticle (ComplexType ct)
  805. {
  806. if (ct.ContentModel == null) {
  807. if (ct.Particle == null)
  808. ct.Particle = CreateSequence ();
  809. return ct.Particle;
  810. }
  811. ComplexModel cm = ct.ContentModel as ComplexModel;
  812. if (cm != null) {
  813. ComplexExt ce = cm.Content as ComplexExt;
  814. if (ce != null) {
  815. if (ce.Particle == null)
  816. ce.Particle = CreateSequence ();
  817. return ce.Particle;
  818. }
  819. ComplexRst cr = cm.Content as ComplexRst;
  820. if (cr != null) {
  821. if (cr.Particle == null)
  822. cr.Particle = CreateSequence ();
  823. return cr.Particle;
  824. }
  825. }
  826. throw Error (ct, "Schema inference internal error. The complexType should have been converted to have a complex content.");
  827. }
  828. #endregion
  829. #region String Value
  830. // primitive type inference.
  831. // When running lax type inference, it just returns xs:string.
  832. private QName InferSimpleType (string value)
  833. {
  834. if (laxTypeInference)
  835. return QNameString;
  836. switch (value) {
  837. // 0 and 1 are not infered as byte unlike MS.XSDInfer
  838. // case "0":
  839. // case "1":
  840. case "true":
  841. case "false":
  842. return QNameBoolean;
  843. }
  844. try {
  845. long dec = XmlConvert.ToInt64 (value);
  846. if (byte.MinValue <= dec && dec <= byte.MaxValue)
  847. return XmlSchemaType.GetBuiltInSimpleType (XmlTypeCode.UnsignedByte).QualifiedName;
  848. if (sbyte.MinValue <= dec && dec <= sbyte.MaxValue)
  849. return XmlSchemaType.GetBuiltInSimpleType (XmlTypeCode.Byte).QualifiedName;
  850. if (ushort.MinValue <= dec && dec <= ushort.MaxValue)
  851. return XmlSchemaType.GetBuiltInSimpleType (XmlTypeCode.UnsignedShort).QualifiedName;
  852. if (short.MinValue <= dec && dec <= short.MaxValue)
  853. return XmlSchemaType.GetBuiltInSimpleType (XmlTypeCode.Short).QualifiedName;
  854. if (uint.MinValue <= dec && dec <= uint.MaxValue)
  855. return XmlSchemaType.GetBuiltInSimpleType (XmlTypeCode.UnsignedInt).QualifiedName;
  856. if (int.MinValue <= dec && dec <= int.MaxValue)
  857. return XmlSchemaType.GetBuiltInSimpleType (XmlTypeCode.Int).QualifiedName;
  858. return XmlSchemaType.GetBuiltInSimpleType (XmlTypeCode.Long).QualifiedName;
  859. } catch (Exception) {
  860. }
  861. try {
  862. XmlConvert.ToUInt64 (value);
  863. return XmlSchemaType.GetBuiltInSimpleType (XmlTypeCode.UnsignedLong).QualifiedName;
  864. } catch (Exception) {
  865. }
  866. try {
  867. XmlConvert.ToDecimal (value);
  868. return XmlSchemaType.GetBuiltInSimpleType (XmlTypeCode.Decimal).QualifiedName;
  869. } catch (Exception) {
  870. }
  871. try {
  872. double dbl = XmlConvert.ToDouble (value);
  873. if (float.MinValue <= dbl &&
  874. dbl <= float.MaxValue)
  875. return XmlSchemaType.GetBuiltInSimpleType (XmlTypeCode.Float).QualifiedName;
  876. else
  877. return XmlSchemaType.GetBuiltInSimpleType (XmlTypeCode.Double).QualifiedName;
  878. } catch (Exception) {
  879. }
  880. try {
  881. // FIXME: also try DateTimeSerializationMode
  882. // and gYearMonth
  883. XmlConvert.ToDateTime (value);
  884. return XmlSchemaType.GetBuiltInSimpleType (XmlTypeCode.DateTime).QualifiedName;
  885. } catch (Exception) {
  886. }
  887. try {
  888. XmlConvert.ToTimeSpan (value);
  889. return XmlSchemaType.GetBuiltInSimpleType (XmlTypeCode.Duration).QualifiedName;
  890. } catch (Exception) {
  891. }
  892. // xs:string
  893. return QNameString;
  894. }
  895. #endregion
  896. #region Utilities
  897. private Element GetGlobalElement (QName name)
  898. {
  899. Element el = newElements [name] as Element;
  900. if (el == null)
  901. el = schemas.GlobalElements [name] as Element;
  902. return el;
  903. }
  904. private Attr GetGlobalAttribute (QName name)
  905. {
  906. Attr a = newElements [name] as Attr;
  907. if (a == null)
  908. a = schemas.GlobalAttributes [name] as Attr;
  909. return a;
  910. }
  911. private Element CreateElement (QName name)
  912. {
  913. Element el = new Element ();
  914. el.Name = name.Name;
  915. return el;
  916. }
  917. private Element CreateGlobalElement (QName name)
  918. {
  919. Element el = CreateElement (name);
  920. XmlSchema schema = PopulateSchema (name.Namespace);
  921. schema.Items.Add (el);
  922. newElements.Add (name, el);
  923. return el;
  924. }
  925. private Attr CreateGlobalAttribute (QName name)
  926. {
  927. Attr attr = new Attr ();
  928. XmlSchema schema = PopulateSchema (name.Namespace);
  929. attr.Name = name.Name;
  930. schema.Items.Add (attr);
  931. newAttributes.Add (name, attr);
  932. return attr;
  933. }
  934. // Note that the return value never assures that all the
  935. // components in the parameter ns must reside in it.
  936. private XmlSchema PopulateSchema (string ns)
  937. {
  938. ICollection list = schemas.Schemas (ns);
  939. if (list.Count > 0) {
  940. IEnumerator e = list.GetEnumerator ();
  941. e.MoveNext ();
  942. return (XmlSchema) e.Current;
  943. }
  944. XmlSchema s = new XmlSchema ();
  945. if (ns != null && ns.Length > 0)
  946. s.TargetNamespace = ns;
  947. s.ElementFormDefault = Form.Qualified;
  948. s.AttributeFormDefault = Form.Unqualified;
  949. schemas.Add (s);
  950. return s;
  951. }
  952. private XmlSchemaInferenceException Error (
  953. XmlSchemaObject sourceObj,
  954. string message)
  955. {
  956. // This override is mainly for schema component error.
  957. return Error (sourceObj, false, message);
  958. }
  959. private XmlSchemaInferenceException Error (
  960. XmlSchemaObject sourceObj,
  961. bool useReader,
  962. string message)
  963. {
  964. string msg = String.Concat (
  965. message,
  966. sourceObj != null ?
  967. String.Format (". Related schema component is {0}",
  968. sourceObj.SourceUri,
  969. sourceObj.LineNumber,
  970. sourceObj.LinePosition) :
  971. String.Empty,
  972. useReader ?
  973. String.Format (". {0}", source.BaseURI) :
  974. String.Empty);
  975. IXmlLineInfo li = source as IXmlLineInfo;
  976. if (useReader && li != null)
  977. return new XmlSchemaInferenceException (
  978. msg, null, li.LineNumber,
  979. li.LinePosition);
  980. else
  981. return new XmlSchemaInferenceException (msg);
  982. }
  983. #endregion
  984. }
  985. }
  986. #endif