瀏覽代碼

use more efficient string caching.
string lengths are now in utf8 char count.

Nicolas Cannasse 19 年之前
父節點
當前提交
4a950b43ae
共有 2 個文件被更改,包括 49 次插入60 次删除
  1. 32 37
      std/haxe/Serializer.hx
  2. 17 23
      std/haxe/Unserializer.hx

+ 32 - 37
std/haxe/Serializer.hx

@@ -28,10 +28,24 @@ class Serializer {
 
 	var buf : StringBuf;
 	var cache : Array<Dynamic>;
+	var shash : Hash<Int>;
+	var scount : Int;
 
 	public function new() {
 		buf = new StringBuf();
 		cache = new Array();
+		shash = new Hash();
+		scount = 0;
+	}
+
+	/**
+		Desactivate object caching. If you are sure that your value
+		does not contain multiple references to the same object or
+		circular references between objects, this should speedup
+		serialization.
+	**/
+	public function dontUseCache() {
+		cache = null;
 	}
 
 	public function toString() {
@@ -49,8 +63,8 @@ class Serializer {
 		k : NaN
 		m : -Inf
 		p : +Inf
-		s : string
-		j : utf8 string
+		s : utf8 string
+		j : utf8 escaped string
 		a : array
 		u : array nulls
 		h : array end
@@ -61,50 +75,31 @@ class Serializer {
 		w : enum
 	*/
 
-	public function bytes(s : String) {
-		#if neko
-		return s.length;
-		#else true
-		var b = s.length;
-		for( i in 0...s.length ) {
-			var c = s.charCodeAt(i);
-			if( c < 0x7F )
-				continue;
-			if( c < 0x7FF ) {
-				b++;
-				continue;
-			}
-			if( c < 0xFFFF ) {
-				b += 2;
-				continue;
-			}
-			b += 3;
-		}
-		return b;
-		#end
-	}
-
 	function serializeString( s : String ) {
-		if( serializeRef(s) )
+		var x = shash.get(s);
+		if( x != null ) {
+			buf.add("R");
+			buf.add(x);
 			return;
-		for( i in 0...s.length ) {
-			var c = s.charCodeAt(i);
-			if( c > 0x7F || c == 13 || c == 10 ) {
-				s = s.split("\\").join("\\\\").split("\n").join("\\n").split("\r").join("\\r");
-				buf.add("j");
-				buf.add(bytes(s));
-				buf.add(":");
-				buf.add(s);
-				return;
-			}
 		}
-		buf.add("s");
+		shash.set(s,scount++);
+		if( s.indexOf("\n") != -1 || s.indexOf("\r") != -1 ) {
+			buf.add("j");
+			s = s.split("\\").join("\\\\").split("\n").join("\\n").split("\r").join("\\r");
+		} else
+			buf.add("s");
+		#if neko
+		buf.add(neko.Utf8.length(s));
+		#else true
 		buf.add(s.length);
+		#end
 		buf.add(":");
 		buf.add(s);
 	}
 
 	function serializeRef(v) {
+		if( cache == null )
+			return false;
 		#if js
 		var vt = untyped __js__("typeof")(v);
 		#end

+ 17 - 23
std/haxe/Unserializer.hx

@@ -38,12 +38,14 @@ class Unserializer {
  	var pos : Int;
  	var length : Int;
  	var cache : Array<Dynamic>;
+ 	var scache : Array<String>;
  	var resolver : TypeResolver;
 
  	public function new( buf : String ) {
  		this.buf = buf;
  		length = buf.length;
  		pos = 0;
+ 		scache = new Array();
  		cache = new Array();
  		setResolver(DEFAULT_RESOLVER);
  	}
@@ -136,9 +138,14 @@ class Unserializer {
  			var len = readDigits();
  			if( buf.charAt(pos++) != ":" || length - pos < len )
 				throw "Invalid string length";
+			#if neko
+			var s = neko.Utf8.sub(buf,pos,len);
+			pos += s.length;
+			#else true
  			var s = buf.substr(pos,len);
  			pos += len;
-			cache.push(s);
+ 			#end
+			scache.push(s);
 			return s;
  		case 106: // j
  			var len = readDigits();
@@ -147,32 +154,14 @@ class Unserializer {
  			#if neko
 			if( length - pos < len )
 				throw "Invalid string length";
+ 			var s = neko.Utf8.sub(buf,pos,len);
+ 			pos += s.length;
+ 			#else true
  			var s = buf.substr(pos,len);
  			pos += len;
- 			#else true
- 			var old = pos;
- 			var max = pos + len;
- 			while( pos < max ) {
-				var c = buf.charCodeAt(pos++);
-				if( c < 0x7F )
-					continue;
-				if( c < 0x7FF ) {
-					max--;
-					continue;
-				}
-				if( c < 0xFFFF ) {
-					max -= 2;
-					continue;
-				}
-				max -= 3;
-			}
-			len = max - old;
-			if( pos != max || length - old < len )
-				throw "Invalid string length";
-			var s = buf.substr(old,len);
  			#end
  			s = s.split("\\r").join("\r").split("\\n").join("\n").split("\\\\").join("\\");
- 			cache.push(s);
+ 			scache.push(s);
  			return s;
  		case 97: // a
  			var a = new Array<Dynamic>();
@@ -205,6 +194,11 @@ class Unserializer {
  			if( n < 0 || n >= cache.length )
  				throw "Invalid reference";
  			return cache[n];
+ 		case 82: // R
+			var n = readDigits();
+			if( n < 0 || n >= scache.length )
+				throw "Invalid string reference";
+			return scache[n];
  		case 120: // x
 			throw unserialize();
 		case 99: // c