Browse Source

more EReg fixes and tests (fixed issue #1293)

Simon Krajewski 12 years ago
parent
commit
27c1783929
6 changed files with 174 additions and 58 deletions
  1. 14 4
      std/cpp/_std/EReg.hx
  2. 45 13
      std/flash/_std/EReg.hx
  3. 31 11
      std/js/_std/EReg.hx
  4. 44 19
      std/neko/_std/EReg.hx
  5. 14 5
      std/php/_std/EReg.hx
  6. 26 6
      tests/unit/TestEReg.hx

+ 14 - 4
std/cpp/_std/EReg.hx

@@ -150,14 +150,24 @@
 		var offset = 0;
 		var buf = new StringBuf();
 		do {
-			if (!matchSub(s, offset))
+			if (offset >= s.length)
 				break;
-			var p = matchedPos();
+			else if (!matchSub(s, offset)) {
+				buf.add(s.substr(offset));
+				break;
+			}
+			var p = regexp_matched_pos(r,0);
 			buf.add(s.substr(offset, p.pos - offset));
 			buf.add(f(this));
-			offset = p.pos + p.len;
+			if (p.len == 0) {
+				buf.add(s.substr(p.pos, 1));
+				offset = p.pos + 1;
+			}
+			else
+				offset = p.pos + p.len;
 		} while (global);
-		buf.add(s.substr(offset));
+		if (!global && offset < s.length)
+			buf.add(s.substr(offset));		
 		return buf.toString();
 	}
 

+ 45 - 13
std/flash/_std/EReg.hx

@@ -57,12 +57,22 @@
 	}
 	
 	public function matchSub( s : String, pos : Int, len : Int = -1):Bool {
-		var b = match( len < 0 ? s.substr(pos) : s.substr(pos,len) );
-		if (b) {
-			result.input = s;
-			result.index += pos;
+		return if (r.global) {
+			r.lastIndex = pos;
+			result = r.exec(len < 0 ? s : s.substr(0, pos + len));
+			var b = result != null;
+			if (b) {
+				result.input = s;
+			}
+			b;
+		} else {
+			var b = match( len < 0 ? s.substr(pos) : s.substr(pos,len) );
+			if (b) {
+				result.input = s;
+				result.index += pos;
+			}
+			b;
 		}
-		return b;
 	}
 
 	public function split( s : String ) : Array<String> {
@@ -79,14 +89,27 @@
 	public function map( s : String, f : EReg -> String ) : String {
 		var offset = 0;
 		var buf = new StringBuf();
+		var first = true;
 		do {
-			if (!matchSub(s, offset))
+			if (offset >= s.length)
 				break;
-			buf.add(s.substr(offset, result.index - offset));
+			else if (!matchSub(s, offset)) {
+				buf.add(s.substr(offset));
+				break;
+			}
+			var p = matchedPos();
+			buf.add(s.substr(offset, p.pos - offset));
 			buf.add(f(this));
-			offset = result.index + result[0].length;
+			if (p.len == 0) {
+				buf.add(s.substr(p.pos, 1));
+				offset = p.pos + 1;
+			}
+			else
+				offset = p.pos + p.len;
+			first = false;
 		} while (r.global);
-		buf.add(s.substr(offset));
+		if (!r.global && offset < s.length)
+			buf.add(s.substr(offset));
 		return buf.toString();
 	}
 
@@ -95,13 +118,22 @@
 		var offset = 0;
 		var buf = new StringBuf();
 		do {
-			if (!matchSub(s, offset))
+			if (offset >= s.length)
+				break;
+			else if (!matchSub(s, offset)) {
+				buf.add(s.substr(offset));
 				break;
-			buf.add(s.substr(offset, result.index - offset));
+			}
+			var p = matchedPos();
+			buf.add(s.substr(offset, p.pos - offset));
 			buf.add(f(this));
-			offset = result.index + result[0].length;
+			if (p.len == 0) {
+				buf.add(s.substr(p.pos, 1));
+				offset = p.pos + 1;
+			}
+			else
+				offset = p.pos + p.len;
 		} while (true);
-		buf.add(s.substr(offset));
 		return buf.toString();
 	}
 	#end

+ 31 - 11
std/js/_std/EReg.hx

@@ -56,13 +56,24 @@
 	}
 
 	public function matchSub( s : String, pos : Int, len : Int = -1):Bool {
-		var b = match( len < 0 ? s.substr(pos) : s.substr(pos,len) );
-		if (b) {
-			r.s = s;
-			r.m.index += pos;
+		return if (r.global) {
+			r.lastIndex = pos;
+			r.m = r.exec(len < 0 ? s : s.substr(0, pos + len));
+			var b = r.m != null;
+			if (b) {
+				r.s = s;
+			}
+			b;
+		} else {
+			// TODO: check some ^/$ related corner cases
+			var b = match( len < 0 ? s.substr(pos) : s.substr(pos,len) );
+			if (b) {
+				r.s = s;
+				r.m.index += pos;
+			}
+			b;
 		}
-		return b;
-	}
+	}	
 	
 	public function split( s : String ) : Array<String> {
 		// we can't use directly s.split because it's ignoring the 'g' flag
@@ -78,15 +89,24 @@
 		var offset = 0;
 		var buf = new StringBuf();
 		do {
-			if (!matchSub(s, offset))
+			if (offset >= s.length)
+				break;
+			else if (!matchSub(s, offset)) {
+				buf.add(s.substr(offset));
 				break;
+			}
 			var p = matchedPos();
-			buf.add(s.substr(offset, cast(p.pos,Int) - offset));
+			buf.add(s.substr(offset, p.pos - offset));
 			buf.add(f(this));
-			var p = matchedPos();
-			offset = p.pos + p.len;
+			if (p.len == 0) {
+				buf.add(s.substr(p.pos, 1));
+				offset = p.pos + 1;
+			}
+			else
+				offset = p.pos + p.len;
 		} while (r.global);
-		buf.add(s.substr(offset));
+		if (!r.global && offset < s.length)
+			buf.add(s.substr(offset));
 		return buf.toString();
 	}
 

+ 44 - 19
std/neko/_std/EReg.hx

@@ -146,30 +146,55 @@
 		return b.toString();
 	}
 
+	//public function map( s : String, f : EReg -> String ) : String {
+		//var b = new StringBuf();
+		//var pos = 0;
+		//var len = s.length;
+		//var first = true;
+		//last = s;
+		//do {
+			//if( !regexp_match(r,untyped s.__s,pos,len) )
+				//break;
+			//var p = regexp_matched_pos(r,0);
+			//if( p.len == 0 && !first ) {
+				//if( p.pos == s.length )
+					//break;
+				//p.pos += 1;
+			//}
+			//b.addSub(s,pos,p.pos-pos);
+			//b.add(f(this));
+			//var tot = p.pos + p.len - pos;
+			//pos += tot;
+			//len -= tot;
+			//first = false;
+		//} while( global );
+		//b.addSub(s,pos,len);
+		//return b.toString();
+	//}
+	
 	public function map( s : String, f : EReg -> String ) : String {
-		var b = new StringBuf();
-		var pos = 0;
-		var len = s.length;
-		var first = true;
-		last = s;
+		var offset = 0;
+		var buf = new StringBuf();
 		do {
-			if( !regexp_match(r,untyped s.__s,pos,len) )
+			if (offset >= s.length)
+				break;
+			else if (!matchSub(s, offset)) {
+				buf.add(s.substr(offset));
 				break;
+			}
 			var p = regexp_matched_pos(r,0);
-			if( p.len == 0 && !first ) {
-				if( p.pos == s.length )
-					break;
-				p.pos += 1;
+			buf.add(s.substr(offset, p.pos - offset));
+			buf.add(f(this));
+			if (p.len == 0) {
+				buf.add(s.substr(p.pos, 1));
+				offset = p.pos + 1;
 			}
-			b.addSub(s,pos,p.pos-pos);
-			b.add(f(this));
-			var tot = p.pos + p.len - pos;
-			pos += tot;
-			len -= tot;
-			first = false;
-		} while( global );
-		b.addSub(s,pos,len);
-		return b.toString();
+			else
+				offset = p.pos + p.len;
+		} while (global);
+		if (!global && offset < s.length)
+			buf.add(s.substr(offset));		
+		return buf.toString();
 	}
 
 	static var regexp_new_options = neko.Lib.load("regexp","regexp_new_options",2);

+ 14 - 5
std/php/_std/EReg.hx

@@ -74,9 +74,8 @@
 	}
 	
 	public function matchSub( s : String, pos : Int, len : Int = -1):Bool {
-		var p : Int = untyped __call__("preg_match", re, len < 0 ? s.substr(pos) : s.substr(pos,len), matches, __php__("PREG_OFFSET_CAPTURE"));
+		var p : Int = untyped __call__("preg_match", re, len < 0 ? s : s.substr(0,pos + len), matches, __php__("PREG_OFFSET_CAPTURE"), pos);
 		if(p > 0) {
-			untyped __php__("$this->matches[0][1] += $pos");
 			last = s;
 		}
 		else
@@ -99,14 +98,24 @@
 		var offset = 0;
 		var buf = new StringBuf();
 		do {
-			if (!matchSub(s, offset))
+			if (offset >= s.length)
 				break;
+			else if (!matchSub(s, offset)) {
+				buf.add(s.substr(offset));
+				break;
+			}
 			var p = matchedPos();
 			buf.add(s.substr(offset, p.pos - offset));
 			buf.add(f(this));
-			offset = p.pos + p.len;
+			if (p.len == 0) {
+				buf.add(s.substr(p.pos, 1));
+				offset = p.pos + 1;
+			}
+			else
+				offset = p.pos + p.len;
 		} while (global);
-		buf.add(s.substr(offset));
+		if (!global && offset < s.length)
+			buf.add(s.substr(offset));		
 		return buf.toString();
 	}
 	

+ 26 - 6
tests/unit/TestEReg.hx

@@ -69,7 +69,7 @@ class TestEReg extends Test {
 		
 		// we need to change our default customReplace implementation to fix that case
 		// the best is to add a matchSub(s,pos,len)
-		eq( ~/a+/g.map("aaabacx", function(r) return "[" + r.matchedLeft() + "]") , "[]b[aaab]cx" );
+		eq( ~/a+/g.map("aaabacxa", function(r) return "[" + r.matchedLeft() + "]") , "[]b[aaab]cx[aaabacx]" );
 		
 		// subsequent tests
 		var r = ~/a+/g;
@@ -88,19 +88,39 @@ class TestEReg extends Test {
 		eq(r.matchedRight(), "bab");
 		t(r.matchSub("abab", 1));
 		eq(r.matchedRight(), "b");
-		eq(r.matchedLeft(), "ab");		
+		eq(r.matchedLeft(), "ab");
 		// length
-		f(r.matchSub("bbaa", 0, 1)); 
-		f(r.matchSub("bbaa", 0, 2)); 
+		f(r.matchSub("bbaa", 0, 1));
+		f(r.matchSub("bbaa", 0, 2));
 		f(r.matchSub("bbaa", 1, 1)); 
 		t(r.matchSub("bbaa", 2, 1));
 		eq(r.matchedLeft(), "bb");
 		eq(r.matchedRight(), "a");
+
+		eq( ~/x?/g.map("aaabacx", function(r) return "[" + r.matched(0)+ "]") , "[]a[]a[]a[]b[]a[]c[x]" );
+		
+		var f = function(x) return "([" +x.matchedLeft() + "]" + "[" +x.matched(0) + "]" + "[" +x.matchedRight() + "])b";
+		var r = ~/$/mg;
+		eq(r.map("\n", f), "([][][\n])b\n");
+		eq(r.map("a", f), "a([a][][])b");
+		eq(r.map("aa\na", f), "aa([aa][][\na])b\na([aa\na][][])b");
+		//eq(r.map("", f, "")); // let's ignore this case
 		
-		// this one creates infinite loops on too most of the platforms ! TOFIX !
-		// eq( ~/x?/g.customReplace("aaabacx", function(r) return "[" + r.matched(0)+ "]") , "[]a[]a[]a[]b[]a[]c[][x]" );
+		var r = ~/^/mg;
+		eq(r.map("\n", f), "([][][\n])b\n");
+		eq(r.map("a", f), "([][][a])ba");
+		eq(r.map("aa\na", f), "([][][aa\na])baa\n([aa\n][][a])ba");
 		
+		var r = ~/$/m;
+		eq(r.map("\n", f), "([][][\n])b\n");
+		eq(r.map("a", f), "a([a][][])b");
+		eq(r.map("aa\na", f), "aa([aa][][\na])b\na");
+		//eq(r.map("", f, "")); // let's ignore this case
 		
+		var r = ~/^/m;
+		eq(r.map("\n", f), "([][][\n])b\n");
+		eq(r.map("a", f), "([][][a])ba");
+		eq(r.map("aa\na", f), "([][][aa\na])baa\na");		
 		#end
 	}