Parcourir la source

2004-01-28 Atsushi Enomoto <[email protected]>

	* DTDReader.cs : Added Normalization.  Parameter Entity declaration
	  should block invalid characters. (GEDecl should be fixed as well.)
	* XmlConstruct.cs : Should allow surrogate chars.
	* XmlNotation.cs : When prefix is empty (i.e. almost all cases)
	  Name should not add ':'. Patch by Boris Kirzner.
	* XmlTextReader.cs : Modified private ReadCharacterReference() to
	  return the character (or -1 when should not return anything).
	  Now Text character reference are checked their character range.
	  Set DTDReader.Normalization as well as the reader itself.

svn path=/trunk/mcs/; revision=22580
Atsushi Eno il y a 22 ans
Parent
commit
fa84bb251d

+ 12 - 0
mcs/class/System.XML/System.Xml/ChangeLog

@@ -1,3 +1,15 @@
+2004-01-28  Atsushi Enomoto <[email protected]>
+
+	* DTDReader.cs : Added Normalization.  Parameter Entity declaration
+	  should block invalid characters. (GEDecl should be fixed as well.)
+	* XmlConstruct.cs : Should allow surrogate chars.
+	* XmlNotation.cs : When prefix is empty (i.e. almost all cases) 
+	  Name should not add ':'. Patch by Boris Kirzner.
+	* XmlTextReader.cs : Modified private ReadCharacterReference() to
+	  return the character (or -1 when should not return anything).
+	  Now Text character reference are checked their character range.
+	  Set DTDReader.Normalization as well as the reader itself.
+
 2004-01-28  Atsushi Enomoto <[email protected]>
 
 	* DTDReader.cs : Make sure that parameter entities are not allowed

+ 29 - 13
mcs/class/System.XML/System.Xml/DTDReader.cs

@@ -40,6 +40,8 @@ namespace System.Xml
 		// Parameter entity placeholder
 		private int dtdIncludeSect;
 
+		private bool normalization;
+
 		private bool processingInternalSubset;
 
 		string cachedPublicId;
@@ -65,6 +67,11 @@ namespace System.Xml
 			get { return currentInput.BaseURI; }
 		}
 
+		public bool Normalization {
+			get { return normalization; }
+			set { normalization = value; }
+		}
+
 		// A buffer for ReadContent for ReadOuterXml
 		private StringBuilder CurrentTag {
 			get {
@@ -124,7 +131,6 @@ namespace System.Xml
 			}
 			StringCollection sc = new StringCollection ();
 
-//			/*
 			// Entity recursion check.
 			foreach (DTDEntityDeclaration ent in DTD.EntityDecls.Values) {
 				if (ent.NotationName != null) {
@@ -132,7 +138,6 @@ namespace System.Xml
 					sc.Clear ();
 				}
 			}
-//			*/
 
 			return DTD;
 		}
@@ -553,41 +558,42 @@ namespace System.Xml
 				ClearValueBuffer ();
 				bool loop = true;
 				while (loop) {
-					int c = PeekChar ();
+					int c = ReadChar ();
 					switch (c) {
 					case -1:
 						throw new XmlException ("unexpected end of stream in entity value definition.");
 					case '"':
-						ReadChar ();
 						if (quoteChar == '"')
 							loop = false;
 						else
 							AppendValueChar ('"');
 						break;
 					case '\'':
-						ReadChar ();
 						if (quoteChar == '\'')
 							loop = false;
 						else
 							AppendValueChar ('\'');
 						break;
 					case '&':
-						ReadChar ();
 						if (PeekChar () == '#') {
 							ReadChar ();
-							ReadCharacterReference ();
+							c = ReadCharacterReference ();
+							if (XmlConstructs.IsInvalid (c))
+								throw new XmlException (this as IXmlLineInfo, "Invalid character was used to define parameter entity.");
+
 						}
 						else
 							AppendValueChar ('&');
 						break;
 					case '%':
-						ReadChar ();
 						string peName = ReadName ();
 						Expect (';');
 						valueBuffer.Append (GetPEValue (peName));
 						break;
 					default:
-						AppendValueChar (ReadChar ());
+						if (XmlConstructs.IsInvalid (c))
+							throw new XmlException (this as IXmlLineInfo, "Invalid character was used to define parameter entity.");
+						AppendValueChar (c);
 						break;
 					}
 				}
@@ -693,9 +699,14 @@ namespace System.Xml
 			ClearValueBuffer ();
 
 			while (PeekChar () != quoteChar) {
-				switch (PeekChar ()) {
+				int ch = ReadChar ();
+				/*
+				FIXME: Here, character reference range validity
+				should be checked, but also should consider
+				how to handle them e.g. &#38amp;
+				*/
+				switch (ch) {
 				case '%':
-					ReadChar ();
 					string name = ReadName ();
 					Expect (';');
 					if (decl.IsInternalSubset)
@@ -706,7 +717,9 @@ namespace System.Xml
 				case -1:
 					throw new XmlException ("unexpected end of stream.");
 				default:
-					AppendValueChar (ReadChar ());
+					if (this.normalization && XmlConstructs.IsInvalid (ch))
+						throw new XmlException (this as IXmlLineInfo, "Invalid character was found in the entity declaration.");
+					AppendValueChar (ch);
 					break;
 				}
 			}
@@ -1376,7 +1389,9 @@ namespace System.Xml
 			Expect ("?>");
 		}
 
-		private void ReadCharacterReference ()
+		// Note that now this method behaves differently from
+		// XmlTextReader's one. It calles AppendValueChar() internally.
+		private int ReadCharacterReference ()
 		{
 			int value = 0;
 
@@ -1421,6 +1436,7 @@ namespace System.Xml
 				throw new XmlException (this as IXmlLineInfo,
 					"Referenced character was not allowed in XML.");
 			AppendValueChar (value);
+			return value;
 		}
 
 		private void AppendNameChar (int ch)

+ 2 - 2
mcs/class/System.XML/System.Xml/XmlConstructs.cs

@@ -322,12 +322,12 @@ namespace System.Xml
 		/// <param name="c">The character to check.</param>
 		public static bool IsValid(char c) 
 		{
-			return (CHARS[c] & VALID) != 0;
+			return c > 0 && ((int) c > 0xffff || ((CHARS[c] & VALID) != 0));
 		}
 
 		public static bool IsValid(int c) 
 		{
-			return c > 0 && c < CHARS.Length && (CHARS[c] & VALID) != 0;
+			return c > 0 && ((int) c > 0xffff || (CHARS[c] & VALID) != 0);
 		}
 
 		/// <summary>

+ 1 - 1
mcs/class/System.XML/System.Xml/XmlNotation.cs

@@ -60,7 +60,7 @@ namespace System.Xml
 		}
 
 		public override string Name {
-			get { return prefix + ":" + localName; }
+			get { return (prefix != String.Empty) ? (prefix + ":" + localName) : localName; }
 		}
 
 		public override XmlNodeType NodeType {

+ 37 - 26
mcs/class/System.XML/System.Xml/XmlTextReader.cs

@@ -1486,22 +1486,28 @@ namespace System.Xml
 			while (ch != '<' && ch != -1) {
 				if (ch == '&') {
 					ReadChar ();
-					if (ReadReference (false))
+					ch = ReadReference (false);
+					if (returnEntityReference) // Returns -1 if char validation should not be done
 						break;
-				} else {
-					if (normalization && XmlConstructs.IsInvalid (ch))
-						throw new XmlException (this as IXmlLineInfo,
-							"Not allowed character was found.");
-					AppendValueChar (ReadChar ());
-					if (ch == ']') {
-						if (previousCloseBracketColumn == LinePosition - 1 &&
-							previousCloseBracketLine == LineNumber)
-							if (PeekChar () == '>')
-								throw new XmlException (this as IXmlLineInfo,
-									"Inside text content, character sequence ']]>' is not allowed.");
-						previousCloseBracketColumn = LinePosition;
-						previousCloseBracketLine = LineNumber;
-					}
+				}
+				else
+					ch = ReadChar ();
+
+				if (normalization && XmlConstructs.IsInvalid (ch))
+					throw new XmlException (this as IXmlLineInfo,
+						"Not allowed character was found.");
+				AppendValueChar (ch);
+
+				// Block "]]>"
+				if (ch == ']') {
+					if (previousCloseBracketColumn == LinePosition - 1 &&
+						previousCloseBracketLine == LineNumber)
+						if (PeekChar () == '>')
+							throw new XmlException (this as IXmlLineInfo,
+								"Inside text content, character sequence ']]>' is not allowed.");
+					// This tricky style is required to check "] ]]>"
+					previousCloseBracketColumn = LinePosition;
+					previousCloseBracketLine = LineNumber;
 				}
 				ch = PeekChar ();
 				notWhitespace = true;
@@ -1527,18 +1533,16 @@ namespace System.Xml
 		// character reference or one of the predefined entities.
 		// This allows the ReadText method to break so that the
 		// next call to Read will return the EntityReference node.
-		private bool ReadReference (bool ignoreEntityReferences)
+		private int ReadReference (bool ignoreEntityReferences)
 		{
 			if (PeekChar () == '#') {
 				ReadChar ();
-				ReadCharacterReference ();
+				return ReadCharacterReference ();
 			} else
-				ReadEntityReference (ignoreEntityReferences);
-
-			return returnEntityReference;
+				return ReadEntityReference (ignoreEntityReferences);
 		}
 
-		private void ReadCharacterReference ()
+		private int ReadCharacterReference ()
 		{
 			int value = 0;
 
@@ -1582,10 +1586,12 @@ namespace System.Xml
 			if (normalization && value < 0xffff && !XmlConstructs.IsValid (value))
 				throw new XmlException (this as IXmlLineInfo,
 					"Referenced character was not allowed in XML.");
-			AppendValueChar (value);
+			return value;
 		}
 
-		private void ReadEntityReference (bool ignoreEntityReferences)
+		// Returns -1 if it should not be validated.
+		// Real EOF must not be detected here.
+		private int ReadEntityReference (bool ignoreEntityReferences)
 		{
 			nameLength = 0;
 
@@ -1605,7 +1611,8 @@ namespace System.Xml
 
 			char predefined = XmlChar.GetPredefinedEntity (name);
 			if (predefined != 0)
-				AppendValueChar (predefined);
+//				AppendValueChar (predefined);
+				return predefined;
 			else {
 				if (ignoreEntityReferences) {
 					AppendValueChar ('&');
@@ -1620,6 +1627,7 @@ namespace System.Xml
 					entityReferenceName = name;
 				}
 			}
+			return -1;
 		}
 
 		// The reader is positioned on the first character of
@@ -1754,7 +1762,8 @@ namespace System.Xml
 					int startPosition = currentTag.Length - 1;
 					if (PeekChar () == '#') {
 						ReadChar ();
-						this.ReadCharacterReference ();
+						ch = ReadCharacterReference ();
+						AppendValueChar (ch);
 						break;
 					}
 					// Check XML 1.0 section 3.1 WFC.
@@ -2190,7 +2199,9 @@ namespace System.Xml
 			DTD.LineNumber = line;
 			DTD.LinePosition = column;
 
-			return new DTDReader (DTD, intSubsetStartLine, intSubsetStartColumn).GenerateDTDObjectModel ();
+			DTDReader dr = new DTDReader (DTD, intSubsetStartLine, intSubsetStartColumn);
+			dr.Normalization = this.normalization;
+			return dr.GenerateDTDObjectModel ();
 		}
 
 		private enum DtdInputState