Преглед на файлове

slightly relax non strict mode, fix encoding for &# attributes on some platforms

Nicolas Cannasse преди 10 години
родител
ревизия
f31f0dd74d
променени са 1 файла, в които са добавени 38 реда и са изтрити 12 реда
  1. 38 12
      std/haxe/xml/Parser.hx

+ 38 - 12
std/haxe/xml/Parser.hx

@@ -58,14 +58,17 @@ class Parser
 		h;
 	}
 
-	static public function parse(str:String)
+	/**
+		Parses the String into an XML Document. Set strict parsing to true in order to enable a strict check of XML attributes and entities.
+	**/
+	static public function parse(str:String,strict=false)
 	{
 		var doc = Xml.createDocument();
-		doParse(str, 0, doc);
+		doParse(str, 0, strict, doc);
 		return doc;
 	}
 
-	static function doParse(str:String, p:Int = 0, ?parent:Xml):Int
+	static function doParse(str:String, p:Int = 0, strict:Bool, ?parent:Xml):Int
 	{
 		var xml:Xml = null;
 		var state = S.BEGIN;
@@ -239,7 +242,8 @@ class Parser
 							state = S.ESCAPE;
 							escapeNext = S.ATTRIB_VAL;
 							start = p + 1;
-						case '>'.code | '<'.code:
+						case '>'.code | '<'.code if( strict ):
+							// HTML allows these in attributes values
 							throw "Invalid unescaped " + String.fromCharCode(c) + " in attribute value";
 						case _ if (c == attrValQuote):
 							var val = buf.toString() + str.substr(start, p - start);
@@ -249,7 +253,7 @@ class Parser
 							next = S.BODY;
 					}
 				case S.CHILDS:
-					p = doParse(str, p, xml);
+					p = doParse(str, p, strict, xml);
 					start = p;
 					state = S.BEGIN;
 				case S.WAIT_END:
@@ -314,23 +318,45 @@ class Parser
 					{
 						var s = str.substr(start, p - start);
 						if (s.fastCodeAt(0) == '#'.code) {
-							var i = s.fastCodeAt(1) == 'x'.code
+							var c = s.fastCodeAt(1) == 'x'.code
 								? Std.parseInt("0" +s.substr(1, s.length - 1))
 								: Std.parseInt(s.substr(1, s.length - 1));
-							buf.add(String.fromCharCode(i));
+							#if (neko || cpp)
+							if( c >= 128 ) {
+								// UTF8-encode it
+								if( c <= 0x7FF ) {
+									buf.addChar(0xC0 | (c >> 6));
+									buf.addChar(0x80 | (c & 63));
+								} else if( c <= 0xFFFF ) {
+									buf.addChar(0xE0 | (c >> 12));
+									buf.addChar(0x80 | ((c >> 6) & 63));
+									buf.addChar(0x80 | (c & 63));
+								} else if( c <= 0x10FFFF ) {
+									buf.addChar(0xF0 | (c >> 18));
+									buf.addChar(0x80 | ((c >> 12) & 63));
+									buf.addChar(0x80 | ((c >> 6) & 63));
+									buf.addChar(0x80 | (c & 63));
+								} else
+									throw "Cannot encode UTF8-char " + c;
+							} else
+							#end
+							buf.addChar(c);
 						} else if (!escapes.exists(s)) {
-							#if xml_strict
-							throw 'Undefined entity: $s';
-							#else
+							if( strict )
+								throw 'Undefined entity: $s';
 							buf.add('&$s;');
-							#end
 						} else {
 							buf.add(escapes.get(s));
 						}
 						start = p + 1;
 						state = escapeNext;
 					} else if (!isValidChar(c) && c != "#".code) {
-						throw 'Invalid character in entity: ' + String.fromCharCode(c);
+						if( strict )
+							throw 'Invalid character in entity: ' + String.fromCharCode(c);
+						buf.add("&" + str.substr(start, p - start));
+						p--;
+						start = p;
+						state = escapeNext;
 					}
 			}
 			c = str.fastCodeAt(++p);