XmlSchemaInference.cs 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163
  1. //
  2. // XmlSchemaInference.cs
  3. //
  4. // Author:
  5. // Atsushi Enomoto <[email protected]>
  6. //
  7. // Copyright (C)2004 Novell Inc.
  8. //
  9. //
  10. // Permission is hereby granted, free of charge, to any person obtaining
  11. // a copy of this software and associated documentation files (the
  12. // "Software"), to deal in the Software without restriction, including
  13. // without limitation the rights to use, copy, modify, merge, publish,
  14. // distribute, sublicense, and/or sell copies of the Software, and to
  15. // permit persons to whom the Software is furnished to do so, subject to
  16. // the following conditions:
  17. //
  18. // The above copyright notice and this permission notice shall be
  19. // included in all copies or substantial portions of the Software.
  20. //
  21. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  22. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  23. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  24. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  25. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  26. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  27. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  28. //
  29. #if NET_2_0
  30. using System;
  31. using System.Collections;
  32. using System.Xml;
  33. using System.Xml.Schema;
  34. using QName = System.Xml.XmlQualifiedName;
  35. using Form = System.Xml.Schema.XmlSchemaForm;
  36. using Use = System.Xml.Schema.XmlSchemaUse;
  37. using SOMList = System.Xml.Schema.XmlSchemaObjectCollection;
  38. using SOMObject = System.Xml.Schema.XmlSchemaObject;
  39. using Import = System.Xml.Schema.XmlSchemaImport;
  40. using Element = System.Xml.Schema.XmlSchemaElement;
  41. using Attr = System.Xml.Schema.XmlSchemaAttribute;
  42. using AttrGroup = System.Xml.Schema.XmlSchemaAttributeGroup;
  43. using AttrGroupRef = System.Xml.Schema.XmlSchemaAttributeGroupRef;
  44. using SimpleType = System.Xml.Schema.XmlSchemaSimpleType;
  45. using ComplexType = System.Xml.Schema.XmlSchemaComplexType;
  46. using SimpleModel = System.Xml.Schema.XmlSchemaSimpleContent;
  47. using SimpleExt = System.Xml.Schema.XmlSchemaSimpleContentExtension;
  48. using SimpleRst = System.Xml.Schema.XmlSchemaSimpleContentRestriction;
  49. using ComplexModel = System.Xml.Schema.XmlSchemaComplexContent;
  50. using ComplexExt = System.Xml.Schema.XmlSchemaComplexContentExtension;
  51. using ComplexRst = System.Xml.Schema.XmlSchemaComplexContentRestriction;
  52. using SimpleTypeRst = System.Xml.Schema.XmlSchemaSimpleTypeRestriction;
  53. using SimpleList = System.Xml.Schema.XmlSchemaSimpleTypeList;
  54. using SimpleUnion = System.Xml.Schema.XmlSchemaSimpleTypeUnion;
  55. using SchemaFacet = System.Xml.Schema.XmlSchemaFacet;
  56. using LengthFacet = System.Xml.Schema.XmlSchemaLengthFacet;
  57. using MinLengthFacet = System.Xml.Schema.XmlSchemaMinLengthFacet;
  58. using Particle = System.Xml.Schema.XmlSchemaParticle;
  59. using Sequence = System.Xml.Schema.XmlSchemaSequence;
  60. using Choice = System.Xml.Schema.XmlSchemaChoice;
  61. namespace System.Xml.Schema
  62. {
  63. [MonoTODO ("merge primitive types; infer gYearMonth too; in some cases sequence should contain element whose minOccurs=0 (no obvious rules right now); reject some non-supported schema components")]
  64. public sealed class XmlSchemaInference
  65. {
  66. public enum InferenceOption {
  67. Restricted,
  68. Relaxed,
  69. }
  70. InferenceOption occurrence = InferenceOption.Restricted;
  71. InferenceOption typeInference = InferenceOption.Restricted;
  72. public XmlSchemaInference ()
  73. {
  74. }
  75. public InferenceOption Occurrence {
  76. get { return occurrence; }
  77. set { occurrence = value; }
  78. }
  79. public InferenceOption TypeInference {
  80. get { return TypeInference; }
  81. set { typeInference = value; }
  82. }
  83. public XmlSchemaSet InferSchema (XmlReader xmlReader)
  84. {
  85. return InferSchema (xmlReader, new XmlSchemaSet ());
  86. }
  87. public XmlSchemaSet InferSchema (XmlReader xmlReader,
  88. XmlSchemaSet schemas)
  89. {
  90. return XsdInference.Process (xmlReader, schemas,
  91. occurrence == InferenceOption.Relaxed,
  92. typeInference == InferenceOption.Relaxed);
  93. }
  94. }
  95. class XsdInference
  96. {
  97. public static XmlSchemaSet Process (XmlReader xmlReader,
  98. XmlSchemaSet schemas,
  99. bool laxOccurrence,
  100. bool laxTypeInference)
  101. {
  102. XsdInference impl = new XsdInference (xmlReader,
  103. schemas, laxOccurrence, laxTypeInference);
  104. impl.Run ();
  105. return impl.schemas;
  106. }
  107. public const string NamespaceXml =
  108. "http://www.w3.org/XML/1998/namespace";
  109. public const string NamespaceXmlns =
  110. "http://www.w3.org/2000/xmlns/";
  111. public const string XdtNamespace =
  112. "http://www.w3.org/2003/11/xpath-datatypes";
  113. static readonly QName QNameString = new QName (
  114. "string", XmlSchema.Namespace);
  115. static readonly QName QNameBoolean = new QName (
  116. "boolean", XmlSchema.Namespace);
  117. static readonly QName QNameAnyType = new QName (
  118. "anyType", XmlSchema.Namespace);
  119. static readonly QName QNameByte = new QName (
  120. "byte", XmlSchema.Namespace);
  121. static readonly QName QNameUByte = new QName (
  122. "unsignedByte", XmlSchema.Namespace);
  123. static readonly QName QNameShort = new QName (
  124. "short", XmlSchema.Namespace);
  125. static readonly QName QNameUShort = new QName (
  126. "unsignedShort", XmlSchema.Namespace);
  127. static readonly QName QNameInt = new QName (
  128. "int", XmlSchema.Namespace);
  129. static readonly QName QNameUInt = new QName (
  130. "unsignedInt", XmlSchema.Namespace);
  131. static readonly QName QNameLong = new QName (
  132. "long", XmlSchema.Namespace);
  133. static readonly QName QNameULong = new QName (
  134. "unsignedLong", XmlSchema.Namespace);
  135. static readonly QName QNameDecimal = new QName (
  136. "decimal", XmlSchema.Namespace);
  137. static readonly QName QNameUDecimal = new QName (
  138. "unsignedDecimal", XmlSchema.Namespace);
  139. static readonly QName QNameDouble = new QName (
  140. "double", XmlSchema.Namespace);
  141. static readonly QName QNameFloat = new QName (
  142. "float", XmlSchema.Namespace);
  143. static readonly QName QNameDateTime = new QName (
  144. "dateTime", XmlSchema.Namespace);
  145. static readonly QName QNameDuration = new QName (
  146. "duration", XmlSchema.Namespace);
  147. XmlReader source;
  148. XmlSchemaSet schemas;
  149. bool laxOccurrence;
  150. bool laxTypeInference;
  151. Hashtable newElements = new Hashtable ();
  152. Hashtable newAttributes = new Hashtable ();
  153. private XsdInference (XmlReader xmlReader,
  154. XmlSchemaSet schemas,
  155. bool laxOccurrence,
  156. bool laxTypeInference)
  157. {
  158. this.source = xmlReader;
  159. this.schemas = schemas;
  160. this.laxOccurrence = laxOccurrence;
  161. this.laxTypeInference = laxTypeInference;
  162. }
  163. private void Run ()
  164. {
  165. // XmlSchemaSet need to be compiled.
  166. schemas.Compile ();
  167. // move to top-level element
  168. source.MoveToContent ();
  169. int depth = source.Depth;
  170. if (source.NodeType != XmlNodeType.Element)
  171. throw new ArgumentException ("Argument XmlReader content is expected to be an element.");
  172. QName qname = new QName (source.LocalName,
  173. source.NamespaceURI);
  174. Element el = GetGlobalElement (qname);
  175. if (el == null) {
  176. el = CreateGlobalElement (qname);
  177. InferElement (el, qname.Namespace, true);
  178. }
  179. else
  180. InferElement (el, qname.Namespace, false);
  181. // FIXME: compile again.
  182. // foreach (XmlSchema schema in schemas.Schemas ())
  183. // schemas.Reprocess (schema);
  184. }
  185. private void AddImport (string current, string import)
  186. {
  187. foreach (XmlSchema schema in schemas.Schemas (current)) {
  188. bool exists = false;
  189. foreach (XmlSchemaExternal e in schema.Includes) {
  190. Import imp = e as Import;
  191. if (imp != null &&
  192. imp.Namespace == import)
  193. exists = true;
  194. }
  195. if (exists)
  196. continue;
  197. Import newimp = new Import ();
  198. newimp.Namespace = import;
  199. schema.Includes.Add (newimp);
  200. }
  201. }
  202. private void IncludeXmlAttributes ()
  203. {
  204. if (schemas.Schemas (NamespaceXml).Count == 0)
  205. // FIXME: do it from resources.
  206. schemas.Add (NamespaceXml,
  207. "http://www.w3.org/2001/xml.xsd");
  208. }
  209. private void InferElement (Element el, string ns, bool isNew)
  210. {
  211. // Quick check for reference to another definition
  212. // (i.e. element ref='...' that should be redirected)
  213. if (el.RefName != QName.Empty) {
  214. Element body = GetGlobalElement (el.RefName);
  215. if (body == null) {
  216. body = CreateElement (el.RefName);
  217. InferElement (body, ns, true);
  218. }
  219. else
  220. InferElement (body, ns, isNew);
  221. return;
  222. }
  223. // Attributes
  224. if (source.MoveToFirstAttribute ()) {
  225. InferAttributes (el, ns, isNew);
  226. source.MoveToElement ();
  227. }
  228. // Content
  229. if (source.IsEmptyElement) {
  230. InferAsEmptyElement (el, ns, isNew);
  231. source.Read ();
  232. source.MoveToContent ();
  233. }
  234. else {
  235. InferContent (el, ns, isNew);
  236. source.ReadEndElement ();
  237. }
  238. if (el.SchemaType == null &&
  239. el.SchemaTypeName == QName.Empty)
  240. el.SchemaTypeName = QNameString;
  241. }
  242. #region Attribute Inference
  243. private Hashtable CollectAttrTable (SOMList attList)
  244. {
  245. // get attribute definition table.
  246. Hashtable table = new Hashtable ();
  247. foreach (XmlSchemaObject obj in attList) {
  248. Attr attr = obj as Attr;
  249. if (attr == null)
  250. throw Error (obj, String.Format ("Attribute inference only supports direct attribute definition. {0} is not supported.", obj.GetType ()));
  251. if (attr.RefName != QName.Empty)
  252. table.Add (attr.RefName, attr);
  253. else
  254. table.Add (new QName (attr.Name, ""),
  255. attr);
  256. }
  257. return table;
  258. }
  259. private void InferAttributes (Element el, string ns, bool isNew)
  260. {
  261. // Now this element is going to have complexType.
  262. // It currently not, then we have to replace it.
  263. ComplexType ct = null;
  264. SOMList attList = null;
  265. Hashtable table = null;
  266. do {
  267. switch (source.NamespaceURI) {
  268. case NamespaceXml:
  269. if (schemas.Schemas (
  270. NamespaceXml) .Count == 0)
  271. IncludeXmlAttributes ();
  272. break;
  273. case XmlSchema.InstanceNamespace:
  274. if (source.LocalName == "nil")
  275. el.IsNillable = true;
  276. // all other xsi:* atts are ignored
  277. continue;
  278. case NamespaceXmlns:
  279. continue;
  280. }
  281. if (ct == null) {
  282. ct = ToComplexType (el);
  283. attList = GetAttributes (ct);
  284. table = CollectAttrTable (attList);
  285. }
  286. QName attrName = new QName (
  287. source.LocalName, source.NamespaceURI);
  288. Attr attr = table [attrName] as Attr;
  289. if (attr == null) {
  290. attList.Add (InferNewAttribute (
  291. attrName, isNew, ns));
  292. } else {
  293. table.Remove (attrName);
  294. if (attr.RefName != null &&
  295. attr.RefName != QName.Empty)
  296. continue; // just a reference
  297. InferMergedAttribute (attr);
  298. }
  299. } while (source.MoveToNextAttribute ());
  300. // mark all attr definitions that did not appear
  301. // as optional.
  302. if (table != null)
  303. foreach (Attr attr in table.Values)
  304. attr.Use = Use.Optional;
  305. }
  306. private XmlSchemaAttribute InferNewAttribute (
  307. QName attrName, bool isNewTypeDefinition, string ns)
  308. {
  309. Attr attr = null;
  310. bool mergedRequired = false;
  311. if (attrName.Namespace.Length > 0) {
  312. // global attribute; might be already defined.
  313. attr = GetGlobalAttribute (attrName) as Attr;
  314. if (attr == null) {
  315. attr = CreateGlobalAttribute (attrName);
  316. attr.SchemaTypeName =
  317. InferSimpleType (source.Value);
  318. } else {
  319. InferMergedAttribute (attr);
  320. mergedRequired =
  321. attr.Use == Use.Required;
  322. }
  323. attr = new Attr ();
  324. attr.RefName = attrName;
  325. AddImport (ns, attrName.Namespace);
  326. } else {
  327. // local attribute
  328. attr = new Attr ();
  329. attr.Name = attrName.Name;
  330. attr.SchemaTypeName =
  331. InferSimpleType (source.Value);
  332. }
  333. if (!laxOccurrence &&
  334. (isNewTypeDefinition || mergedRequired))
  335. attr.Use = Use.Required;
  336. else
  337. attr.Use = Use.Optional;
  338. return attr;
  339. }
  340. // validate string value agains attr and
  341. // if invalid, then relax the type.
  342. private void InferMergedAttribute (Attr attr)
  343. {
  344. attr.SchemaTypeName = InferMergedType (source.Value,
  345. attr.SchemaTypeName);
  346. attr.SchemaType = null;
  347. }
  348. private QName InferMergedType (string value, QName typeName)
  349. {
  350. // examine value against specified type and
  351. // if unacceptable, then return a relaxed type.
  352. SimpleType st = XmlSchemaType.GetBuiltInSimpleType (
  353. typeName);
  354. if (st == null) // non-primitive type => see above.
  355. return QNameString;
  356. do {
  357. try {
  358. st.Datatype.ParseValue (value,
  359. source.NameTable,
  360. source as IXmlNamespaceResolver);
  361. return typeName;
  362. } catch {
  363. st = st.BaseXmlSchemaType as XmlSchemaSimpleType;
  364. typeName = st != null ? st.QualifiedName : QName.Empty;
  365. }
  366. } while (typeName != QName.Empty);
  367. return QNameString;
  368. }
  369. private SOMList GetAttributes (ComplexType ct)
  370. {
  371. if (ct.ContentModel == null)
  372. return ct.Attributes;
  373. SimpleModel sc = ct.ContentModel as SimpleModel;
  374. if (sc != null) {
  375. SimpleExt sce = sc.Content as SimpleExt;
  376. if (sce != null)
  377. return sce.Attributes;
  378. SimpleRst scr = sc.Content as SimpleRst;
  379. if (scr != null)
  380. return scr.Attributes;
  381. else
  382. throw Error (sc, "Invalid simple content model.");
  383. }
  384. ComplexModel cc = ct.ContentModel as ComplexModel;
  385. if (cc != null) {
  386. ComplexExt cce = cc.Content as ComplexExt;
  387. if (cce != null)
  388. return cce.Attributes;
  389. ComplexRst ccr = cc.Content as ComplexRst;
  390. if (ccr != null)
  391. return ccr.Attributes;
  392. else
  393. throw Error (cc, "Invalid simple content model.");
  394. }
  395. throw Error (cc, "Invalid complexType. Should not happen.");
  396. }
  397. private ComplexType ToComplexType (Element el)
  398. {
  399. QName name = el.SchemaTypeName;
  400. XmlSchemaType type = el.SchemaType;
  401. // 1. element type is complex.
  402. ComplexType ct = type as ComplexType;
  403. if (ct != null)
  404. return ct;
  405. // 2. reference to global complexType.
  406. XmlSchemaType globalType = schemas.GlobalTypes [name]
  407. as XmlSchemaType;
  408. ct = globalType as ComplexType;
  409. if (ct != null)
  410. return ct;
  411. ct = new ComplexType ();
  412. el.SchemaType = ct;
  413. el.SchemaTypeName = QName.Empty;
  414. // 3. base type name is xs:anyType or no specification.
  415. // <xs:complexType />
  416. if (name == QNameAnyType)
  417. return ct;
  418. else if (type == null && name == QName.Empty)
  419. return ct;
  420. SimpleModel sc = new SimpleModel ();
  421. ct.ContentModel = sc;
  422. // 4. type is simpleType
  423. // -> extension of existing simple type.
  424. SimpleType st = type as SimpleType;
  425. if (st != null) {
  426. SimpleRst scr = new SimpleRst ();
  427. scr.BaseType = st;
  428. sc.Content = scr;
  429. return ct;
  430. }
  431. SimpleExt sce = new SimpleExt ();
  432. sc.Content = sce;
  433. // 5. type name points to primitive type
  434. // -> simple extension of a primitive type
  435. st = XmlSchemaType.GetBuiltInSimpleType (name);
  436. if (st != null) {
  437. sce.BaseTypeName = name;
  438. return ct;
  439. }
  440. // 6. type name points to global simpleType.
  441. st = globalType as SimpleType;
  442. if (st != null) {
  443. sce.BaseTypeName = name;
  444. return ct;
  445. }
  446. throw Error (el, "Unexpected schema component that contains simpleTypeName that could not be resolved.");
  447. }
  448. #endregion
  449. #region Element Type
  450. private void InferAsEmptyElement (Element el, string ns,
  451. bool isNew)
  452. {
  453. ComplexType ct = el.SchemaType as ComplexType;
  454. if (ct != null) {
  455. SimpleModel sm =
  456. ct.ContentModel as SimpleModel;
  457. if (sm != null) {
  458. ToEmptiableSimpleContent (sm, isNew);
  459. return;
  460. }
  461. ComplexModel cm = ct.ContentModel
  462. as ComplexModel;
  463. if (cm != null) {
  464. ToEmptiableComplexContent (cm, isNew);
  465. return;
  466. }
  467. if (ct.Particle != null)
  468. ct.Particle.MinOccurs = 0;
  469. return;
  470. }
  471. SimpleType st = el.SchemaType as SimpleType;
  472. if (st != null) {
  473. st = MakeBaseTypeAsEmptiable (st);
  474. switch (st.QualifiedName.Namespace) {
  475. case XmlSchema.Namespace:
  476. case XdtNamespace:
  477. el.SchemaTypeName = st.QualifiedName;
  478. break;
  479. default:
  480. el.SchemaType =st;
  481. break;
  482. }
  483. }
  484. }
  485. private SimpleType MakeBaseTypeAsEmptiable (SimpleType st)
  486. {
  487. switch (st.QualifiedName.Namespace) {
  488. case XmlSchema.Namespace:
  489. case XdtNamespace:
  490. // If a primitive type
  491. return XmlSchemaType.GetBuiltInSimpleType (
  492. XmlTypeCode.String);
  493. }
  494. SimpleTypeRst str = st.Content as SimpleTypeRst;
  495. if (str != null) {
  496. ArrayList al = null;
  497. foreach (SchemaFacet f in str.Facets) {
  498. if (f is LengthFacet ||
  499. f is MinLengthFacet) {
  500. if (al == null)
  501. al = new ArrayList ();
  502. al.Add (f);
  503. }
  504. }
  505. foreach (SchemaFacet f in al)
  506. str.Facets.Remove (f);
  507. if (str.BaseType != null)
  508. str.BaseType =
  509. MakeBaseTypeAsEmptiable (st);
  510. else
  511. // It might have a reference to an
  512. // external simple type, but there is
  513. // no assurance that any of those
  514. // external types allow an empty
  515. // string. So just set base type as
  516. // xs:string.
  517. str.BaseTypeName = QNameString;
  518. } // union/list can have empty string value.
  519. return st;
  520. }
  521. private void ToEmptiableSimpleContent (
  522. SimpleModel sm, bool isNew)
  523. {
  524. SimpleExt se = sm.Content as SimpleExt;
  525. if (se != null)
  526. se.BaseTypeName = QNameString;
  527. else {
  528. SimpleRst sr = sm.Content
  529. as SimpleRst;
  530. if (sr == null)
  531. throw Error (sm, "Invalid simple content model was passed.");
  532. sr.BaseTypeName = QNameString;
  533. sr.BaseType = null;
  534. }
  535. }
  536. private void ToEmptiableComplexContent (
  537. ComplexModel cm, bool isNew)
  538. {
  539. ComplexExt ce = cm.Content
  540. as ComplexExt;
  541. if (ce != null) {
  542. if (ce.Particle != null)
  543. ce.Particle.MinOccurs = 0;
  544. else if (ce.BaseTypeName != null &&
  545. ce.BaseTypeName != QName.Empty &&
  546. ce.BaseTypeName != QNameAnyType)
  547. throw Error (ce, "Complex type content extension has a reference to an external component that is not supported.");
  548. }
  549. else {
  550. ComplexRst cr = cm.Content
  551. as ComplexRst;
  552. if (cr == null)
  553. throw Error (cm, "Invalid complex content model was passed.");
  554. if (cr.Particle != null)
  555. cr.Particle.MinOccurs = 0;
  556. else if (cr.BaseTypeName != null &&
  557. cr.BaseTypeName != QName.Empty &&
  558. cr.BaseTypeName != QNameAnyType)
  559. throw Error (cr, "Complex type content extension has a reference to an external component that is not supported.");
  560. }
  561. }
  562. private void InferContent (Element el, string ns, bool isNew)
  563. {
  564. source.Read ();
  565. source.MoveToContent ();
  566. switch (source.NodeType) {
  567. case XmlNodeType.EndElement:
  568. InferAsEmptyElement (el, ns, isNew);
  569. break;
  570. case XmlNodeType.Element:
  571. InferComplexContent (el, ns, isNew);
  572. break;
  573. case XmlNodeType.Text:
  574. case XmlNodeType.CDATA:
  575. case XmlNodeType.SignificantWhitespace:
  576. InferTextContent (el, isNew);
  577. source.MoveToContent ();
  578. if (source.NodeType == XmlNodeType.Element)
  579. goto case XmlNodeType.Element;
  580. break;
  581. case XmlNodeType.Whitespace:
  582. InferContent (el, ns, isNew); // skip and retry
  583. break;
  584. }
  585. }
  586. private void InferComplexContent (Element el, string ns,
  587. bool isNew)
  588. {
  589. ComplexType ct = ToComplexType (el);
  590. ToComplexContentType (ct);
  591. int position = 0;
  592. bool consumed = false;
  593. do {
  594. switch (source.NodeType) {
  595. case XmlNodeType.Element:
  596. Sequence s = PopulateSequence (ct);
  597. Choice c = s.Items.Count > 0 ?
  598. s.Items [0] as Choice :
  599. null;
  600. if (c != null)
  601. ProcessLax (c, ns);
  602. else
  603. ProcessSequence (ct, s, ns,
  604. ref position,
  605. ref consumed,
  606. isNew);
  607. source.MoveToContent ();
  608. break;
  609. case XmlNodeType.Text:
  610. case XmlNodeType.CDATA:
  611. case XmlNodeType.SignificantWhitespace:
  612. MarkAsMixed (ct);
  613. source.ReadString ();
  614. source.MoveToContent ();
  615. break;
  616. case XmlNodeType.EndElement:
  617. return; // finished
  618. case XmlNodeType.None:
  619. throw new NotImplementedException ("Internal Error: Should not happen.");
  620. }
  621. } while (true);
  622. }
  623. private void InferTextContent (Element el, bool isNew)
  624. {
  625. string value = source.ReadString ();
  626. if (el.SchemaType == null) {
  627. if (el.SchemaTypeName == QName.Empty) {
  628. // no type information -> infer type
  629. if (isNew)
  630. el.SchemaTypeName =
  631. InferSimpleType (
  632. value);
  633. else
  634. el.SchemaTypeName =
  635. QNameString;
  636. return;
  637. }
  638. switch (el.SchemaTypeName.Namespace) {
  639. case XmlSchema.Namespace:
  640. case XdtNamespace:
  641. // existing primitive type
  642. el.SchemaTypeName = InferMergedType (
  643. value, el.SchemaTypeName);
  644. break;
  645. default:
  646. ComplexType ct = schemas.GlobalTypes [
  647. el.SchemaTypeName]
  648. as ComplexType;
  649. // If it is complex, then just set
  650. // mixed='true' (type cannot be set.)
  651. // If it is simple, then we cannot
  652. // make sure that string value is
  653. // valid. So just set as xs:string.
  654. if (ct != null)
  655. MarkAsMixed (ct);
  656. else
  657. el.SchemaTypeName = QNameString;
  658. break;
  659. }
  660. return;
  661. }
  662. // simpleType
  663. SimpleType st = el.SchemaType as SimpleType;
  664. if (st != null) {
  665. // If simple, then (described above)
  666. el.SchemaType = null;
  667. el.SchemaTypeName = QNameString;
  668. return;
  669. }
  670. // complexType
  671. ComplexType ect = el.SchemaType as ComplexType;
  672. SimpleModel sm = ect.ContentModel as SimpleModel;
  673. if (sm == null) {
  674. // - ComplexContent
  675. MarkAsMixed (ect);
  676. return;
  677. }
  678. // - SimpleContent
  679. SimpleExt se = sm.Content as SimpleExt;
  680. if (se != null)
  681. se.BaseTypeName = InferMergedType (value,
  682. se.BaseTypeName);
  683. SimpleRst sr = sm.Content as SimpleRst;
  684. if (sr != null) {
  685. sr.BaseTypeName = InferMergedType (value,
  686. sr.BaseTypeName);
  687. sr.BaseType = null;
  688. }
  689. }
  690. private void MarkAsMixed (ComplexType ct)
  691. {
  692. ComplexModel cm = ct.ContentModel as ComplexModel;
  693. if (cm != null)
  694. cm.IsMixed = true;
  695. else
  696. ct.IsMixed = true;
  697. }
  698. #endregion
  699. #region Particles
  700. private void ProcessLax (Choice c, string ns)
  701. {
  702. foreach (Particle p in c.Items) {
  703. Element el = p as Element;
  704. if (el == null)
  705. throw Error (c, String.Format ("Target schema item contains unacceptable particle {0}. Only element is allowed here."));
  706. if (ElementMatches (el, ns)) {
  707. InferElement (el, ns, false);
  708. return;
  709. }
  710. }
  711. // append a new element particle to lax term.
  712. Element nel = new Element ();
  713. if (source.NamespaceURI == ns)
  714. nel.Name = source.LocalName;
  715. else {
  716. nel.RefName = new QName (source.LocalName,
  717. source.NamespaceURI);
  718. AddImport (ns, source.NamespaceURI);
  719. }
  720. InferElement (nel, source.NamespaceURI, true);
  721. c.Items.Add (nel);
  722. }
  723. private bool ElementMatches (Element el, string ns)
  724. {
  725. bool matches = false;
  726. if (el.RefName != QName.Empty) {
  727. if (el.RefName.Name == source.LocalName &&
  728. el.RefName.Namespace ==
  729. source.NamespaceURI)
  730. matches = true;
  731. }
  732. else if (el.Name == source.LocalName &&
  733. ns == source.NamespaceURI)
  734. matches = true;
  735. return matches;
  736. }
  737. private void ProcessSequence (ComplexType ct, Sequence s,
  738. string ns, ref int position, ref bool consumed,
  739. bool isNew)
  740. {
  741. for (int i = 0; i < position; i++) {
  742. Element iel = s.Items [i] as Element;
  743. if (ElementMatches (iel, ns)) {
  744. // Sequence element type violation
  745. // might happen (might not, but we
  746. // cannot backtrack here). So switch
  747. // to sequence of choice* here.
  748. ProcessLax (ToSequenceOfChoice (s), ns);
  749. return;
  750. }
  751. }
  752. if (s.Items.Count <= position) {
  753. QName name = new QName (source.LocalName,
  754. source.NamespaceURI);
  755. Element nel = CreateElement (name);
  756. if (laxOccurrence)
  757. nel.MinOccurs = 0;
  758. InferElement (nel, ns, true);
  759. if (ns == name.Namespace)
  760. s.Items.Add (nel);
  761. else {
  762. Element re = new Element ();
  763. if (laxOccurrence)
  764. re.MinOccurs = 0;
  765. re.RefName = name;
  766. AddImport (ns, name.Namespace);
  767. s.Items.Add (re);
  768. }
  769. consumed = true;
  770. return;
  771. }
  772. Element el = s.Items [position] as Element;
  773. if (el == null)
  774. throw Error (s, String.Format ("Target complex type content sequence has an unacceptable type of particle {0}", s.Items [position]));
  775. bool matches = ElementMatches (el, ns);
  776. if (matches) {
  777. if (consumed)
  778. el.MaxOccursString = "unbounded";
  779. InferElement (el, source.NamespaceURI, false);
  780. source.MoveToContent ();
  781. switch (source.NodeType) {
  782. case XmlNodeType.None:
  783. if (source.NodeType ==
  784. XmlNodeType.Element)
  785. goto case XmlNodeType.Element;
  786. else if (source.NodeType ==
  787. XmlNodeType.EndElement)
  788. goto case XmlNodeType.EndElement;
  789. break;
  790. case XmlNodeType.Element:
  791. ProcessSequence (ct, s, ns, ref position,
  792. ref consumed, isNew);
  793. break;
  794. case XmlNodeType.Text:
  795. case XmlNodeType.CDATA:
  796. case XmlNodeType.SignificantWhitespace:
  797. MarkAsMixed (ct);
  798. source.ReadString ();
  799. goto case XmlNodeType.None;
  800. case XmlNodeType.Whitespace:
  801. source.ReadString ();
  802. goto case XmlNodeType.None;
  803. case XmlNodeType.EndElement:
  804. return;
  805. default:
  806. source.Read ();
  807. break;
  808. }
  809. }
  810. else {
  811. if (consumed) {
  812. position++;
  813. consumed = false;
  814. ProcessSequence (ct, s, ns,
  815. ref position, ref consumed,
  816. isNew);
  817. }
  818. else
  819. ProcessLax (ToSequenceOfChoice (s), ns);
  820. }
  821. }
  822. // Note that it does not return the changed sequence.
  823. private Choice ToSequenceOfChoice (Sequence s)
  824. {
  825. Choice c = new Choice ();
  826. if (laxOccurrence)
  827. c.MinOccurs = 0;
  828. c.MaxOccursString = "unbounded";
  829. foreach (Particle p in s.Items)
  830. c.Items.Add (p);
  831. s.Items.Clear ();
  832. s.Items.Add (c);
  833. return c;
  834. }
  835. // It makes complexType not to have Simple content model.
  836. private void ToComplexContentType (ComplexType type)
  837. {
  838. SimpleModel sm = type.ContentModel as SimpleModel;
  839. if (sm == null)
  840. return;
  841. SOMList atts = GetAttributes (type);
  842. foreach (SOMObject o in atts)
  843. type.Attributes.Add (o);
  844. // FIXME: need to copy AnyAttribute.
  845. // (though not considered right now)
  846. type.ContentModel = null;
  847. type.IsMixed = true;
  848. }
  849. private Sequence PopulateSequence (ComplexType ct)
  850. {
  851. Particle p = PopulateParticle (ct);
  852. Sequence s = p as Sequence;
  853. if (s != null)
  854. return s;
  855. else
  856. throw Error (ct, String.Format ("Target complexType contains unacceptable type of particle {0}", p));
  857. }
  858. private Sequence CreateSequence ()
  859. {
  860. Sequence s = new Sequence ();
  861. if (laxOccurrence)
  862. s.MinOccurs = 0;
  863. return s;
  864. }
  865. private Particle PopulateParticle (ComplexType ct)
  866. {
  867. if (ct.ContentModel == null) {
  868. if (ct.Particle == null)
  869. ct.Particle = CreateSequence ();
  870. return ct.Particle;
  871. }
  872. ComplexModel cm = ct.ContentModel as ComplexModel;
  873. if (cm != null) {
  874. ComplexExt ce = cm.Content as ComplexExt;
  875. if (ce != null) {
  876. if (ce.Particle == null)
  877. ce.Particle = CreateSequence ();
  878. return ce.Particle;
  879. }
  880. ComplexRst cr = cm.Content as ComplexRst;
  881. if (cr != null) {
  882. if (cr.Particle == null)
  883. cr.Particle = CreateSequence ();
  884. return cr.Particle;
  885. }
  886. }
  887. throw Error (ct, "Schema inference internal error. The complexType should have been converted to have a complex content.");
  888. }
  889. #endregion
  890. #region String Value
  891. // primitive type inference.
  892. // When running lax type inference, it just returns xs:string.
  893. private QName InferSimpleType (string value)
  894. {
  895. if (laxTypeInference)
  896. return QNameString;
  897. switch (value) {
  898. // 0 and 1 are not infered as byte unlike MS.XSDInfer
  899. // case "0":
  900. // case "1":
  901. case "true":
  902. case "false":
  903. return QNameBoolean;
  904. }
  905. try {
  906. long dec = XmlConvert.ToInt64 (value);
  907. if (byte.MinValue <= dec && dec <= byte.MaxValue)
  908. return QNameUByte;
  909. if (sbyte.MinValue <= dec && dec <= sbyte.MaxValue)
  910. return QNameByte;
  911. if (ushort.MinValue <= dec && dec <= ushort.MaxValue)
  912. return QNameUShort;
  913. if (short.MinValue <= dec && dec <= short.MaxValue)
  914. return QNameShort;
  915. if (uint.MinValue <= dec && dec <= uint.MaxValue)
  916. return QNameUInt;
  917. if (int.MinValue <= dec && dec <= int.MaxValue)
  918. return QNameInt;
  919. return QNameLong;
  920. } catch (Exception) {
  921. }
  922. try {
  923. XmlConvert.ToUInt64 (value);
  924. return QNameULong;
  925. } catch (Exception) {
  926. }
  927. try {
  928. XmlConvert.ToDecimal (value);
  929. return QNameDecimal;
  930. } catch (Exception) {
  931. }
  932. try {
  933. double dbl = XmlConvert.ToDouble (value);
  934. if (float.MinValue <= dbl &&
  935. dbl <= float.MaxValue)
  936. return QNameFloat;
  937. else
  938. return QNameDouble;
  939. } catch (Exception) {
  940. }
  941. try {
  942. // FIXME: also try DateTimeSerializationMode
  943. // and gYearMonth
  944. XmlConvert.ToDateTime (value);
  945. return QNameDateTime;
  946. } catch (Exception) {
  947. }
  948. try {
  949. XmlConvert.ToTimeSpan (value);
  950. return QNameDuration;
  951. } catch (Exception) {
  952. }
  953. // xs:string
  954. return QNameString;
  955. }
  956. #endregion
  957. #region Utilities
  958. private Element GetGlobalElement (QName name)
  959. {
  960. Element el = newElements [name] as Element;
  961. if (el == null)
  962. el = schemas.GlobalElements [name] as Element;
  963. return el;
  964. }
  965. private Attr GetGlobalAttribute (QName name)
  966. {
  967. Attr a = newElements [name] as Attr;
  968. if (a == null)
  969. a = schemas.GlobalAttributes [name] as Attr;
  970. return a;
  971. }
  972. private Element CreateElement (QName name)
  973. {
  974. Element el = new Element ();
  975. el.Name = name.Name;
  976. return el;
  977. }
  978. private Element CreateGlobalElement (QName name)
  979. {
  980. Element el = CreateElement (name);
  981. XmlSchema schema = PopulateSchema (name.Namespace);
  982. schema.Items.Add (el);
  983. newElements.Add (name, el);
  984. return el;
  985. }
  986. private Attr CreateGlobalAttribute (QName name)
  987. {
  988. Attr attr = new Attr ();
  989. XmlSchema schema = PopulateSchema (name.Namespace);
  990. attr.Name = name.Name;
  991. schema.Items.Add (attr);
  992. newAttributes.Add (name, attr);
  993. return attr;
  994. }
  995. // Note that the return value never assures that all the
  996. // components in the parameter ns must reside in it.
  997. private XmlSchema PopulateSchema (string ns)
  998. {
  999. ICollection list = schemas.Schemas (ns);
  1000. if (list.Count > 0) {
  1001. IEnumerator e = list.GetEnumerator ();
  1002. e.MoveNext ();
  1003. return (XmlSchema) e.Current;
  1004. }
  1005. XmlSchema s = new XmlSchema ();
  1006. if (ns != null && ns.Length > 0)
  1007. s.TargetNamespace = ns;
  1008. s.ElementFormDefault = Form.Qualified;
  1009. s.AttributeFormDefault = Form.Unqualified;
  1010. schemas.Add (s);
  1011. return s;
  1012. }
  1013. private XmlSchemaInferenceException Error (
  1014. XmlSchemaObject sourceObj,
  1015. string message)
  1016. {
  1017. // This override is mainly for schema component error.
  1018. return Error (sourceObj, false, message);
  1019. }
  1020. private XmlSchemaInferenceException Error (
  1021. XmlSchemaObject sourceObj,
  1022. bool useReader,
  1023. string message)
  1024. {
  1025. string msg = String.Concat (
  1026. message,
  1027. sourceObj != null ?
  1028. String.Format (". Related schema component is {0}",
  1029. sourceObj.SourceUri,
  1030. sourceObj.LineNumber,
  1031. sourceObj.LinePosition) :
  1032. String.Empty,
  1033. useReader ?
  1034. String.Format (". {0}", source.BaseURI) :
  1035. String.Empty);
  1036. IXmlLineInfo li = source as IXmlLineInfo;
  1037. if (useReader && li != null)
  1038. return new XmlSchemaInferenceException (
  1039. msg, null, li.LineNumber,
  1040. li.LinePosition);
  1041. else
  1042. return new XmlSchemaInferenceException (msg);
  1043. }
  1044. #endregion
  1045. }
  1046. }
  1047. #endif