Преглед на файлове

escape entities in haxe.xml.Parser (fixed issue #1431)

Simon Krajewski преди 12 години
родител
ревизия
468fb6c7ef
променени са 6 файла, в които са добавени 77 реда и са изтрити 35 реда
  1. 1 1
      std/cpp/_std/Xml.hx
  2. 44 3
      std/haxe/xml/Parser.hx
  3. 1 1
      std/js/_std/Xml.hx
  4. 1 1
      std/neko/_std/Xml.hx
  5. 14 27
      std/php/_std/Xml.hx
  6. 16 2
      tests/unit/TestXML.hx

+ 1 - 1
std/cpp/_std/Xml.hx

@@ -380,7 +380,7 @@ enum XmlType {
 			s.add(_nodeName);
 			s.addChar(">".code);
 		case Xml.PCData:
-			s.add(_nodeValue);
+			s.add(StringTools.htmlEscape(_nodeValue));
 		case Xml.CData:
 			s.add("<![CDATA[");
 			s.add(_nodeValue);

+ 44 - 3
std/haxe/xml/Parser.hx

@@ -43,10 +43,22 @@ extern private class S {
 	public static inline var COMMENT		= 15;
 	public static inline var DOCTYPE		= 16;
 	public static inline var CDATA			= 17;
+	public static inline var ESCAPE			= 18;
 }
 
 class Parser
 {
+	static var escapes = {
+		var h = new haxe.ds.StringMap();
+		h.set("lt", "<");
+		h.set("gt", ">");
+		h.set("amp", "&");
+		h.set("quot", '"');
+		h.set("apos", "'");
+		h.set("nbsp", String.fromCharCode(160));
+		h;
+	}
+	
 	static public function parse(str:String)
 	{
 		var doc = Xml.createDocument();
@@ -64,7 +76,7 @@ class Parser
 		var nsubs = 0;
 		var nbrackets = 0;
 		var c = str.fastCodeAt(p);
-
+		var buf = new StringBuf();
 		while (!c.isEof())
 		{
 			switch(state)
@@ -95,12 +107,25 @@ class Parser
 				case S.PCDATA:
 					if (c == '<'.code)
 					{
-						var child = Xml.createPCData(str.substr(start, p - start));
+						#if php
+						var child = Xml.createPCDataFromCustomParser(buf.toString() + str.substr(start, p - start));
+						#else
+						var child = Xml.createPCData(buf.toString() + str.substr(start, p - start));
+						#end
+						buf = new StringBuf();
 						parent.addChild(child);
 						nsubs++;
 						state = S.IGNORE_SPACES;
 						next = S.BEGIN_NODE;
 					}
+					#if !flash9
+					else if (c == '&'.code) {
+						buf.addSub(str, start, p - start);
+						state = S.ESCAPE;
+						next = S.PCDATA;
+						start = p + 1;
+					}
+					#end
 				case S.CDATA:
 					if (c == ']'.code && str.fastCodeAt(p + 1) == ']'.code && str.fastCodeAt(p + 2) == '>'.code)
 					{
@@ -279,6 +304,22 @@ class Parser
 						parent.addChild(Xml.createProlog(str));
 						state = S.BEGIN;
 					}
+				case S.ESCAPE:
+					if (c == ';'.code)
+					{
+						var s = str.substr(start, p - start);
+						if (s.fastCodeAt(0) == '#'.code) {
+							var i = s.fastCodeAt(1) == 'x'.code
+								? Std.parseInt("0" +s.substr(1, s.length - 1))
+								: Std.parseInt(s.substr(1, s.length - 1));
+							buf.add(String.fromCharCode(i));
+						} else if (!escapes.exists(s))
+							buf.add('&$s;');
+						else
+							buf.add(escapes.get(s));
+						start = p + 1;
+						state = next;
+					}
 			}
 			c = str.fastCodeAt(++p);
 		}
@@ -292,7 +333,7 @@ class Parser
 		if (state == S.PCDATA)
 		{
 			if (p != start || nsubs == 0)
-				parent.addChild(Xml.createPCData(str.substr(start, p - start)));
+				parent.addChild(Xml.createPCData(buf.toString() + str.substr(start, p - start)));
 			return p;
 		}
 		

+ 1 - 1
std/js/_std/Xml.hx

@@ -280,7 +280,7 @@ enum XmlType {
 
 	public function toString() : String {
 		if( nodeType == Xml.PCData )
-			return _nodeValue;
+			return StringTools.htmlEscape(_nodeValue);
 		if( nodeType == Xml.CData )
 			return "<![CDATA["+_nodeValue+"]]>";
 		if( nodeType == Xml.Comment )

+ 1 - 1
std/neko/_std/Xml.hx

@@ -387,7 +387,7 @@ enum XmlType {
 			s.add(_nodeName);
 			s.addChar(">".code);
 		case Xml.PCData:
-			s.add(_nodeValue);
+			s.add(StringTools.htmlEscape(_nodeValue));
 		case Xml.CData:
 			s.add("<![CDATA[");
 			s.add(_nodeValue);

+ 14 - 27
std/php/_std/Xml.hx

@@ -20,30 +20,6 @@
  * DEALINGS IN THE SOFTWARE.
  */
 import php.Lib;
-/*
- * Copyright (c) 2005, The haXe Project Contributors
- * All rights reserved.
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   - Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   - Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE HAXE PROJECT CONTRIBUTORS "AS IS" AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE HAXE PROJECT CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- */
 
 enum XmlType {
 }
@@ -68,7 +44,8 @@ enum XmlType {
 	var _attributes : haxe.ds.StringMap<String>;
 	var _children : Array<Xml>;
 	var _parent : Xml;
-
+	var _fromCustomParser:Bool;
+	
 	private static var build : Xml;
 	private static function __start_element_handler(parser : Dynamic, name : String, attribs : ArrayAccess<String>) : Void {
 		var node = createElement(name);
@@ -154,8 +131,18 @@ enum XmlType {
 		return build;
 	}
 
-	private function new() : Void {}
+	private function new(fromCustomParser:Bool = false) : Void {
+		_fromCustomParser = fromCustomParser;
+	}
 
+	@:allow(haxe.xml.Parser)
+	static function createPCDataFromCustomParser( data : String ) : Xml {
+		var r = new Xml(true);
+		r.nodeType = Xml.PCData;
+		r.set_nodeValue( data );
+		return r;
+	}
+	
 	public static function createElement( name : String ) : Xml {
 		var r = new Xml();
 		r.nodeType = Xml.Element;
@@ -320,7 +307,7 @@ enum XmlType {
 
 	public function toString() : String {
 		if( nodeType == Xml.PCData )
-			return _nodeValue;
+			return _fromCustomParser ? StringTools.htmlEscape(_nodeValue) : _nodeValue;
 
 		var s = "";
 

+ 16 - 2
tests/unit/TestXML.hx

@@ -60,7 +60,7 @@ class TestXML extends Test {
 		#end
 		#if flash9
 		eq( Xml.parse('&quot; &lt; &gt;').toString(), '" &lt; &gt;' ); // some entities are resolved but not escaped on printing
-		#else
+		#elseif
 		eq( Xml.parse('&quot; &lt; &gt;').toString(), '&quot; &lt; &gt;' );
 		#end
 	}
@@ -194,7 +194,7 @@ class TestXML extends Test {
 	function testEntities() {
 		var entities = ["&lt;", "&gt;", "&quot;", "&amp;", "&apos;", "&nbsp;", "&euro;", "&#64;", "&#244;", "&#x3F;", "&#xFF;"];
 		var values = entities.copy();
-		#if flash
+		#if (flash || js)
 		// flash parser does support XML + some HTML entities (nbsp only ?) + character codes entities
 		values = ['<', '>', '"', '&', "'", String.fromCharCode(160), '&euro;', '@', 'ô', '?', 'ÿ'];
 		#end
@@ -218,6 +218,20 @@ class TestXML extends Test {
 		}
 	}
 	
+	function testCustomXmlParser() {
+		var entities = ["&lt;", "&gt;", "&quot;", "&amp;", "&apos;", "&euro;", "&#64;", "&#244;", "&#x3F;", "&#xFF;"];
+		var values = ['<', '>', '"', '&', "'", '&euro;', '@', String.fromCharCode(244), String.fromCharCode(0x3F), String.fromCharCode(0xFF)];
+		
+		for( i in 0...entities.length ) {
+			infos(entities[i]);
+			eq( haxe.xml.Parser.parse(entities[i]).firstChild().nodeValue, values[i] );
+		}
+		
+		var s = "<a>&gt;<b>&lt;</b>&lt;&gt;<b>&gt;&lt;</b>\"</a>";
+		var xml = haxe.xml.Parser.parse(s);
+		eq(s, xml.toString());
+	}
+	
 	function testMore() {
 		var doc = Xml.parse("<a>A</a><i>I</i>");
 		var aElement = doc.elementsNamed('a').next();