Преглед изворни кода

[php] optimize String unicode iterators even more

Alexander Kuzmenko пре 7 година
родитељ
комит
aeb751213b

+ 16 - 4
std/php/_std/haxe/iterators/StringIteratorUnicode.hx

@@ -22,11 +22,12 @@
 package haxe.iterators;
 
 import php.Global.*;
+import php.NativeString;
 
 class StringIteratorUnicode {
 	var byteOffset:Int = 0;
 	var totalBytes:Int;
-	var s:String;
+	var s:NativeString;
 
 	public inline function new(s:String) {
 		this.s = s;
@@ -38,9 +39,20 @@ class StringIteratorUnicode {
 	}
 
 	public inline function next() {
-		var char = mb_substr(substr(s, byteOffset, 4), 0, 1, 'UTF-8');
-		byteOffset += strlen(char);
-		return mb_ord(char, 'UTF-8');
+		var code = ord(s[byteOffset]);
+		if(code < 0xC0) {
+			byteOffset++;
+		} else if(code < 0xE0) {
+			code = ((code - 0xC0) << 6) + ord(s[byteOffset + 1]) - 0x80;
+			byteOffset += 2;
+		} else if(code < 0xF0) {
+			code = ((code - 0xE0) << 12) + ((ord(s[byteOffset + 1]) - 0x80) << 6) + ord(s[byteOffset + 2]) - 0x80;
+			byteOffset += 3;
+		} else {
+			code = ((code - 0xF0) << 18) + ((ord(s[byteOffset + 1]) - 0x80) << 12) + ((ord(s[byteOffset + 2]) - 0x80) << 6) + ord(s[byteOffset + 3]) - 0x80;
+			byteOffset += 4;
+		}
+		return code;
 	}
 
 	static public inline function unicodeIterator(s:String) {

+ 16 - 4
std/php/_std/haxe/iterators/StringKeyValueIteratorUnicode.hx

@@ -22,12 +22,13 @@
 package haxe.iterators;
 
 import php.Global.*;
+import php.NativeString;
 
 class StringKeyValueIteratorUnicode {
 	var charOffset:Int = 0;
 	var byteOffset:Int = 0;
 	var totalBytes:Int;
-	var s:String;
+	var s:NativeString;
 
 	public inline function new(s:String) {
 		this.s = s;
@@ -39,9 +40,20 @@ class StringKeyValueIteratorUnicode {
 	}
 
 	public inline function next() {
-		var char = mb_substr(substr(s, byteOffset, 4), 0, 1, 'UTF-8');
-		byteOffset += strlen(char);
-		return { key: charOffset++, value: mb_ord(char, 'UTF-8') };
+		var code = ord(s[byteOffset]);
+		if(code < 0xC0) {
+			byteOffset++;
+		} else if(code < 0xE0) {
+			code = ((code - 0xC0) << 6) + ord(s[byteOffset + 1]) - 0x80;
+			byteOffset += 2;
+		} else if(code < 0xF0) {
+			code = ((code - 0xE0) << 12) + ((ord(s[byteOffset + 1]) - 0x80) << 6) + ord(s[byteOffset + 2]) - 0x80;
+			byteOffset += 3;
+		} else {
+			code = ((code - 0xF0) << 18) + ((ord(s[byteOffset + 1]) - 0x80) << 12) + ((ord(s[byteOffset + 2]) - 0x80) << 6) + ord(s[byteOffset + 3]) - 0x80;
+			byteOffset += 4;
+		}
+		return { key: charOffset++, value: code };
 	}
 
 	static public inline function unicodeKeyValueIterator(s:String) {

+ 16 - 0
tests/benchs/src/cases/StringIterator.hx

@@ -52,4 +52,20 @@ class StringIterator extends TestCase {
 		);
 		return suite.run();
 	}
+
+	function measure4BytesL100() {
+		var s = "".lpad("𠜎", 100);
+		var suite = new Suite("length 100 of 4-bytes characters");
+		suite.add("0...length + fastCodeAt",
+			for (key in 0...s.length) {
+				var value = s.fastCodeAt(key);
+			}
+		);
+		suite.add("StringKeyValueIteratorUnicode",
+			for (key => value in new StringKeyValueIteratorUnicode(s)) {
+
+			}
+		);
+		return suite.run();
+	}
 }