Jelajahi Sumber

[php] fix EReg.map with "global" flag for unicode strings (closes #8861)

Aleksandr Kuzmenko 5 tahun lalu
induk
melakukan
9d86bd480d
2 mengubah file dengan 35 tambahan dan 25 penghapusan
  1. 24 25
      std/php/_std/EReg.hx
  2. 11 0
      tests/unit/src/unit/issues/Issue8861.hx

+ 24 - 25
std/php/_std/EReg.hx

@@ -41,7 +41,11 @@ import php.*;
 	}
 
 	public function match(s:String):Bool {
-		var p = Global.preg_match(reUnicode, s, matches, Const.PREG_OFFSET_CAPTURE);
+		return matchFromByte(s, 0);
+	}
+
+	inline function matchFromByte(s:String, bytesOffset:Int):Bool {
+		var p = Global.preg_match(reUnicode, s, matches, Const.PREG_OFFSET_CAPTURE, bytesOffset);
 		if (p == false) {
 			handlePregError();
 			p = Global.preg_match(re, s, matches, Const.PREG_OFFSET_CAPTURE);
@@ -65,9 +69,9 @@ import php.*;
 		} else if (e == Const.PREG_JIT_STACKLIMIT_ERROR) {
 			throw 'failed due to limited JIT stack space';
 		}
-		// else if(e == PREG_BAD_UTF8_ERROR) {
+		// else if(e == Const.PREG_BAD_UTF8_ERROR) {
 		// 	throw 'EReg: malformed UTF8';
-		// } else if(e == PREG_BAD_UTF8_OFFSET_ERROR) {
+		// } else if(e == Const.PREG_BAD_UTF8_OFFSET_ERROR) {
 		// 	throw 'EReg: the offset didn\'t correspond to the begin of a valid UTF-8 code point';
 		// }
 	}
@@ -75,7 +79,7 @@ import php.*;
 	public function matched(n:Int):String {
 		if (matches == null || n < 0)
 			throw "EReg::matched";
-		// we can't differenciate between optional groups at the end of a match
+		// we can't differentiate between optional groups at the end of a match
 		// that have not been matched and invalid groups
 		if (n >= Global.count(matches))
 			return null;
@@ -143,30 +147,25 @@ import php.*;
 	}
 
 	public function map(s:String, f:EReg->String):String {
-		var offset = 0;
-		var buf = new StringBuf();
-		var length = s.length;
+		if(!matchFromByte(s, 0)) {
+			return s;
+		}
+		var result = '';
+		var bytesOffset = 0;
+		var bytesTotal = Global.strlen(s);
 		do {
-			if (offset >= length) {
-				break;
-			} else if (!matchSub(s, offset)) {
-				buf.add(s.substr(offset));
-				break;
-			}
-			var p = matchedPos();
-			buf.add(s.substr(offset, p.pos - offset));
-			buf.add(f(this));
-			if (p.len == 0) {
-				buf.add(s.substr(p.pos, 1));
-				offset = p.pos + 1;
+			result += Global.substr(s, bytesOffset, matches[0][1] - bytesOffset);
+			result += f(this);
+			bytesOffset = matches[0][1];
+			if(matches[0][0] == '') {
+				result += Global.substr(s, bytesOffset, 1);
+				bytesOffset++;
 			} else {
-				offset = p.pos + p.len;
+				bytesOffset += Global.strlen(matches[0][0]);
 			}
-		} while (global);
-		if (!global && offset > 0 && offset < length) {
-			buf.add(s.substr(offset));
-		}
-		return buf.toString();
+		} while(global && bytesOffset < bytesTotal && matchFromByte(s, bytesOffset));
+		result += Global.substr(s, bytesOffset);
+		return result;
 	}
 
 	public static inline function escape(s:String):String {

+ 11 - 0
tests/unit/src/unit/issues/Issue8861.hx

@@ -0,0 +1,11 @@
+package unit.issues;
+
+class Issue8861 extends Test {
+	function test() {
+		var str = ~/[äöü]/gu.map('väter, söhne, mütter', rgx -> switch rgx.matched(0) {
+			case 'ä' : 'ae'; case 'ö' : 'oe';
+			case 'ü' : 'ue'; case _ : '';
+		});
+		eq('vaeter, soehne, muetter', str);
+	}
+}