Browse Source

added EReg.map and EReg.matchSub (fixed issue #1178)

Simon Krajewski 12 years ago
parent
commit
d003bc068d
9 changed files with 163 additions and 41 deletions
  1. 12 1
      std/EReg.hx
  2. 21 7
      std/cpp/_std/EReg.hx
  3. 8 1
      std/cs/_std/EReg.hx
  4. 20 7
      std/flash/_std/EReg.hx
  5. 8 1
      std/java/_std/EReg.hx
  6. 22 7
      std/js/_std/EReg.hx
  7. 14 2
      std/neko/_std/EReg.hx
  8. 27 10
      std/php/_std/EReg.hx
  9. 31 5
      tests/unit/TestEReg.hx

+ 12 - 1
std/EReg.hx

@@ -74,6 +74,14 @@ class EReg {
 	public function matchedPos() : { pos : Int, len : Int } {
 	public function matchedPos() : { pos : Int, len : Int } {
 		return null;
 		return null;
 	}
 	}
+	
+	/**
+		Tells if the regular expression matches the String between pos and pos + len.
+		Updates the internal state accordingly.		
+	**/
+	public function matchSub( s : String, pos : Int, len : Int = 0):Bool {
+		return false;
+	}
 
 
 	/**
 	/**
 		Split a string by using the regular expression to match
 		Split a string by using the regular expression to match
@@ -97,7 +105,7 @@ class EReg {
 		can return the string that needs to be replaced. All occurences are matched anyway,
 		can return the string that needs to be replaced. All occurences are matched anyway,
 		and setting the [g] flag might cause some incorrect behavior on some platforms.
 		and setting the [g] flag might cause some incorrect behavior on some platforms.
 	**/
 	**/
-	public function customReplace( s : String, f : EReg -> String ) : String {
+	public function map( s : String, f : EReg -> String ) : String {
 		var buf = new StringBuf();
 		var buf = new StringBuf();
 		while( true ) {
 		while( true ) {
 			if( !match(s) )
 			if( !match(s) )
@@ -110,4 +118,7 @@ class EReg {
 		return buf.toString();
 		return buf.toString();
 	}
 	}
 
 
+	#if !haxe3
+	public inline function customReplace( s : String, f : EReg -> String ) : String return map(s, f)
+	#end	
 }
 }

+ 21 - 7
std/cpp/_std/EReg.hx

@@ -62,6 +62,15 @@
 			return regexp_matched_pos(r,0);
 			return regexp_matched_pos(r,0);
 	}
 	}
 
 
+	public function matchSub( s : String, pos : Int, len : Int = -1):Bool {
+			var p = regexp_match(r, s, pos, len < 0 ? s.length - pos : len);
+			if (p)
+				last = s;
+			else
+				last = null;
+			return p;
+	}
+	
 	public function split( s : String ) : Array<String> {
 	public function split( s : String ) : Array<String> {
 			var pos = 0;
 			var pos = 0;
 			var len = s.length;
 			var len = s.length;
@@ -137,16 +146,18 @@
 			return b.toString();
 			return b.toString();
 	}
 	}
 
 
-	public function customReplace( s : String, f : EReg -> String ) : String {
+	public function map( s : String, f : EReg -> String ) : String {
+		var offset = 0;
 		var buf = new StringBuf();
 		var buf = new StringBuf();
-		while( true ) {
-			if( !match(s) )
+		do {
+			if (!matchSub(s, offset))
 				break;
 				break;
-			buf.add(matchedLeft());
+			var p = matchedPos();
+			buf.add(s.substr(offset, p.pos - offset));
 			buf.add(f(this));
 			buf.add(f(this));
-			s = matchedRight();
-		}
-		buf.add(s);
+			offset = p.pos + p.len;
+		} while (global);
+		buf.add(s.substr(offset));
 		return buf.toString();
 		return buf.toString();
 	}
 	}
 
 
@@ -155,4 +166,7 @@
 	static var regexp_matched : Dynamic -> Int -> Dynamic = cpp.Lib.load("regexp","regexp_matched",2);
 	static var regexp_matched : Dynamic -> Int -> Dynamic = cpp.Lib.load("regexp","regexp_matched",2);
 	static var regexp_matched_pos : Dynamic -> Int -> { pos : Int, len : Int } = cpp.Lib.load("regexp","regexp_matched_pos",2);
 	static var regexp_matched_pos : Dynamic -> Int -> { pos : Int, len : Int } = cpp.Lib.load("regexp","regexp_matched_pos",2);
 
 
+	#if !haxe3
+	public inline function customReplace( s : String, f : EReg -> String ) : String return map(s, f)
+	#end	
 }
 }

+ 8 - 1
std/cs/_std/EReg.hx

@@ -71,6 +71,10 @@ import cs.system.text.regularExpressions.Regex;
 		return { pos : m.Index, len : m.Length };
 		return { pos : m.Index, len : m.Length };
 	}
 	}
 
 
+	public function matchSub( s : String, pos : Int, len : Int = -1):Bool {
+		return throw "not implemented yet";
+	}	
+	
 	public function split( s : String ) : Array<String> {
 	public function split( s : String ) : Array<String> {
 		if (isGlobal)
 		if (isGlobal)
 			return cs.Lib.array(regex.Split(s));
 			return cs.Lib.array(regex.Split(s));
@@ -85,7 +89,7 @@ import cs.system.text.regularExpressions.Regex;
 		return untyped (s.Substring(0, m.Index) + by + s.Substring(m.Index + m.Length));
 		return untyped (s.Substring(0, m.Index) + by + s.Substring(m.Index + m.Length));
 	}
 	}
 
 
-	public function customReplace( s : String, f : EReg -> String ) : String {
+	public function map( s : String, f : EReg -> String ) : String {
 		var buf = new StringBuf();
 		var buf = new StringBuf();
 		while (true)
 		while (true)
 		{
 		{
@@ -99,4 +103,7 @@ import cs.system.text.regularExpressions.Regex;
 		return buf.toString();
 		return buf.toString();
 	}
 	}
 
 
+	#if !haxe3
+	public inline function customReplace( s : String, f : EReg -> String ) : String return map(s, f)
+	#end	
 }
 }

+ 20 - 7
std/flash/_std/EReg.hx

@@ -55,6 +55,15 @@
 		if( result == null ) throw "No string matched";
 		if( result == null ) throw "No string matched";
 		return { pos : result.index, len : result[0].length };
 		return { pos : result.index, len : result[0].length };
 	}
 	}
+	
+	public function matchSub( s : String, pos : Int, len : Int = -1):Bool {
+		var b = match( len < 0 ? s.substr(pos) : s.substr(pos,len) );
+		if (b) {
+			result.input = s;
+			result.index += pos;
+		}
+		return b;
+	}
 
 
 	public function split( s : String ) : Array<String> {
 	public function split( s : String ) : Array<String> {
 		// we can't use directly s.split because it's ignoring the 'g' flag
 		// we can't use directly s.split because it's ignoring the 'g' flag
@@ -67,17 +76,21 @@
 		return untyped s.replace(r,by);
 		return untyped s.replace(r,by);
 	}
 	}
 
 
-	public function customReplace( s : String, f : EReg -> String ) : String {
+	public function map( s : String, f : EReg -> String ) : String {
+		var offset = 0;
 		var buf = new StringBuf();
 		var buf = new StringBuf();
-		while( true ) {
-			if( !match(s) )
+		do {
+			if (!matchSub(s, offset))
 				break;
 				break;
-			buf.add(matchedLeft());
+			buf.add(s.substr(offset, result.index - offset));
 			buf.add(f(this));
 			buf.add(f(this));
-			s = matchedRight();
-		}
-		buf.add(s);
+			offset = result.index + result[0].length;
+		} while (r.global);
+		buf.add(s.substr(offset));
 		return buf.toString();
 		return buf.toString();
 	}
 	}
 
 
+	#if !haxe3
+	public inline function customReplace( s : String, f : EReg -> String ) : String return map(s, f)
+	#end	
 }
 }

+ 8 - 1
std/java/_std/EReg.hx

@@ -161,6 +161,10 @@ class EReg {
 		return { pos : start, len : matcher.end() - start };
 		return { pos : start, len : matcher.end() - start };
 	}
 	}
 
 
+	public function matchSub( s : String, pos : Int, len : Int = -1):Bool {
+		return throw "not implemented yet";
+	}	
+	
 	/**
 	/**
 		Split a string by using the regular expression to match
 		Split a string by using the regular expression to match
 		the separators.
 		the separators.
@@ -207,7 +211,7 @@ class EReg {
 		can return the string that needs to be replaced. All occurences are matched anyway,
 		can return the string that needs to be replaced. All occurences are matched anyway,
 		and setting the [g] flag might cause some incorrect behavior on some platforms.
 		and setting the [g] flag might cause some incorrect behavior on some platforms.
 	**/
 	**/
-	public function customReplace( s : String, f : EReg -> String ) : String {
+	public function map( s : String, f : EReg -> String ) : String {
 		var buf = new StringBuf();
 		var buf = new StringBuf();
 		while( true ) {
 		while( true ) {
 			if( !match(s) )
 			if( !match(s) )
@@ -220,4 +224,7 @@ class EReg {
 		return buf.toString();
 		return buf.toString();
 	}
 	}
 
 
+	#if !haxe3
+	public inline function customReplace( s : String, f : EReg -> String ) : String return map(s, f)
+	#end	
 }
 }

+ 22 - 7
std/js/_std/EReg.hx

@@ -55,6 +55,15 @@
 		return { pos : r.m.index, len : r.m[0].length };
 		return { pos : r.m.index, len : r.m[0].length };
 	}
 	}
 
 
+	public function matchSub( s : String, pos : Int, len : Int = -1):Bool {
+		var b = match( len < 0 ? s.substr(pos) : s.substr(pos,len) );
+		if (b) {
+			r.s = s;
+			r.m.index += pos;
+		}
+		return b;
+	}
+	
 	public function split( s : String ) : Array<String> {
 	public function split( s : String ) : Array<String> {
 		// we can't use directly s.split because it's ignoring the 'g' flag
 		// we can't use directly s.split because it's ignoring the 'g' flag
 		var d = "#__delim__#";
 		var d = "#__delim__#";
@@ -65,17 +74,23 @@
 		return untyped s.replace(r,by);
 		return untyped s.replace(r,by);
 	}
 	}
 
 
-	public function customReplace( s : String, f : EReg -> String ) : String {
+	public function map( s : String, f : EReg -> String ) : String {
+		var offset = 0;
 		var buf = new StringBuf();
 		var buf = new StringBuf();
-		while( true ) {
-			if( !match(s) )
+		do {
+			if (!matchSub(s, offset))
 				break;
 				break;
-			buf.add(matchedLeft());
+			var p = matchedPos();
+			buf.add(s.substr(offset, cast(p.pos,Int) - offset));
 			buf.add(f(this));
 			buf.add(f(this));
-			s = matchedRight();
-		}
-		buf.add(s);
+			var p = matchedPos();
+			offset = p.pos + p.len;
+		} while (r.global);
+		buf.add(s.substr(offset));
 		return buf.toString();
 		return buf.toString();
 	}
 	}
 
 
+	#if !haxe3
+	public inline function customReplace( s : String, f : EReg -> String ) : String return map(s, f)
+	#end	
 }
 }

+ 14 - 2
std/neko/_std/EReg.hx

@@ -61,6 +61,15 @@
 	public function matchedPos() : { pos : Int, len : Int } {
 	public function matchedPos() : { pos : Int, len : Int } {
 		return regexp_matched_pos(r,0);
 		return regexp_matched_pos(r,0);
 	}
 	}
+	
+	public function matchSub( s : String, pos : Int, len : Int = -1):Bool {
+		var p = regexp_match(r, untyped s.__s, pos, len < 0 ? s.length - pos : len);
+		if( p )
+			last = s;
+		else
+			last = null;
+		return p;
+	}		
 
 
 	public function split( s : String ) : Array<String> {
 	public function split( s : String ) : Array<String> {
 		var pos = 0;
 		var pos = 0;
@@ -137,7 +146,7 @@
 		return b.toString();
 		return b.toString();
 	}
 	}
 
 
-	public function customReplace( s : String, f : EReg -> String ) : String {
+	public function map( s : String, f : EReg -> String ) : String {
 		var b = new StringBuf();
 		var b = new StringBuf();
 		var pos = 0;
 		var pos = 0;
 		var len = s.length;
 		var len = s.length;
@@ -158,7 +167,7 @@
 			pos += tot;
 			pos += tot;
 			len -= tot;
 			len -= tot;
 			first = false;
 			first = false;
-		} while( true );
+		} while( global );
 		b.addSub(s,pos,len);
 		b.addSub(s,pos,len);
 		return b.toString();
 		return b.toString();
 	}
 	}
@@ -168,4 +177,7 @@
 	static var regexp_matched = neko.Lib.load("regexp","regexp_matched",2);
 	static var regexp_matched = neko.Lib.load("regexp","regexp_matched",2);
 	static var regexp_matched_pos : Dynamic -> Int -> { pos : Int, len : Int } = neko.Lib.load("regexp","regexp_matched_pos",2);
 	static var regexp_matched_pos : Dynamic -> Int -> { pos : Int, len : Int } = neko.Lib.load("regexp","regexp_matched_pos",2);
 
 
+	#if !haxe3
+	public inline function customReplace( s : String, f : EReg -> String ) : String return map(s, f)
+	#end	
 }
 }

+ 27 - 10
std/php/_std/EReg.hx

@@ -72,6 +72,17 @@
 	public function matchedPos() : { pos : Int, len : Int } {
 	public function matchedPos() : { pos : Int, len : Int } {
 		return untyped { pos : __php__("$this->matches[0][1]"), len : __call__("strlen",__php__("$this->matches[0][0]")) };
 		return untyped { pos : __php__("$this->matches[0][1]"), len : __call__("strlen",__php__("$this->matches[0][0]")) };
 	}
 	}
+	
+	public function matchSub( s : String, pos : Int, len : Int = -1):Bool {
+		var p : Int = untyped __call__("preg_match", re, len < 0 ? s.substr(pos) : s.substr(pos,len), matches, __php__("PREG_OFFSET_CAPTURE"));
+		if(p > 0) {
+			untyped __php__("$this->matches[0][1] += $pos");
+			last = s;
+		}
+		else
+			last = null;
+		return p > 0;
+	}	
 
 
 	public function split( s : String ) : Array<String> {
 	public function split( s : String ) : Array<String> {
 		return untyped __php__("new _hx_array(preg_split($this->re, $s, $this->hglobal ? -1 : 2))");
 		return untyped __php__("new _hx_array(preg_split($this->re, $s, $this->hglobal ? -1 : 2))");
@@ -84,16 +95,22 @@
 		return untyped __call__("preg_replace", re, by, s, global ? -1 : 1);
 		return untyped __call__("preg_replace", re, by, s, global ? -1 : 1);
 	}
 	}
 
 
-	public function customReplace( s : String, f : EReg -> String ) : String {
-		var buf = "";
-		while( true ) {
-			if( !match(s) )
+	public function map( s : String, f : EReg -> String ) : String {
+		var offset = 0;
+		var buf = new StringBuf();
+		do {
+			if (!matchSub(s, offset))
 				break;
 				break;
-			buf += matchedLeft();
-			buf += f(this);
-			s = matchedRight();
-		}
-		buf += s;
-		return buf;
+			var p = matchedPos();
+			buf.add(s.substr(offset, p.pos - offset));
+			buf.add(f(this));
+			offset = p.pos + p.len;
+		} while (global);
+		buf.add(s.substr(offset));
+		return buf.toString();
 	}
 	}
+	
+	#if !haxe3
+	public inline function customReplace( s : String, f : EReg -> String ) : String return map(s, f)
+	#end
 }
 }

+ 31 - 5
tests/unit/TestEReg.hx

@@ -61,15 +61,41 @@ class TestEReg extends Test {
 		eq( '"' + block.split(test).join('","') + '"', '"","test",".blah","something:someval",""' );
 		eq( '"' + block.split(test).join('","') + '"', '"","test",".blah","something:someval",""' );
 		
 		
 		// test custom replace
 		// test custom replace
-		eq( ~/a+/g.customReplace("aaabacx", function(r) return "[" + r.matched(0).substr(1) + "]") , "[aa]b[]cx" );
-		eq( ~/a+/.customReplace("aaabacx", function(r) return "[" + r.matched(0).substr(1) + "]") , "[aa]b[]cx" ); // same without 'g'
+		eq( ~/a+/g.map("aaabacx", function(r) return "[" + r.matched(0).substr(1) + "]") , "[aa]b[]cx" );
+		eq( ~/a+/.map("aaabacx", function(r) return "[" + r.matched(0).substr(1) + "]") , "[aa]bacx" ); // same without 'g'
 		
 		
-		eq( ~/a+(b*)/g.customReplace("aaabacx", function(r) return "[" + r.matched(1) + "]") , "[b][]cx" );
-		eq( ~/a+/g.customReplace("aaabacx", function(r) return "[" + r.matchedRight() + "]") , "[bacx]b[cx]cx" );
+		eq( ~/a+(b*)/g.map("aaabacx", function(r) return "[" + r.matched(1) + "]") , "[b][]cx" );
+		eq( ~/a+/g.map("aaabacx", function(r) return "[" + r.matchedRight() + "]") , "[bacx]b[cx]cx" );
 		
 		
 		// we need to change our default customReplace implementation to fix that case
 		// we need to change our default customReplace implementation to fix that case
 		// the best is to add a matchSub(s,pos,len)
 		// the best is to add a matchSub(s,pos,len)
-		eq( ~/a+/g.customReplace("aaabacx", function(r) return "[" + r.matchedLeft() + "]") , "[]b[aaab]cx" );
+		eq( ~/a+/g.map("aaabacx", function(r) return "[" + r.matchedLeft() + "]") , "[]b[aaab]cx" );
+		
+		// subsequent tests
+		var r = ~/a+/g;
+		eq(r.map("aaabacx", function(r) return "[" + r.matchedLeft() + "]") , "[]b[aaab]cx");
+		eq(r.map("aaabacx", function(r) return "[" + r.matchedLeft() + "]") , "[]b[aaab]cx");
+		
+		// matchSub
+		var r = ~/a+/;
+		t(r.matchSub("abab", 0));
+		eq(r.matchedRight(), "bab");
+		t(r.matchSub("abab", 1));
+		eq(r.matchedRight(), "b");
+		eq(r.matchedLeft(), "ab");
+		// same again to make sure state is correct
+		t(r.matchSub("abab", 0));
+		eq(r.matchedRight(), "bab");
+		t(r.matchSub("abab", 1));
+		eq(r.matchedRight(), "b");
+		eq(r.matchedLeft(), "ab");		
+		// length
+		f(r.matchSub("bbaa", 0, 1)); 
+		f(r.matchSub("bbaa", 0, 2)); 
+		f(r.matchSub("bbaa", 1, 1)); 
+		t(r.matchSub("bbaa", 2, 1));
+		eq(r.matchedLeft(), "bb");
+		eq(r.matchedRight(), "a");
 		
 		
 		// this one creates infinite loops on too most of the platforms ! TOFIX !
 		// this one creates infinite loops on too most of the platforms ! TOFIX !
 		// eq( ~/x?/g.customReplace("aaabacx", function(r) return "[" + r.matched(0)+ "]") , "[]a[]a[]a[]b[]a[]c[][x]" );
 		// eq( ~/x?/g.customReplace("aaabacx", function(r) return "[" + r.matched(0)+ "]") , "[]a[]a[]a[]b[]a[]c[][x]" );