瀏覽代碼

added EReg.map and EReg.matchSub (fixed issue #1178)

Simon Krajewski 12 年之前
父節點
當前提交
d003bc068d
共有 9 個文件被更改,包括 163 次插入41 次删除
  1. 12 1
      std/EReg.hx
  2. 21 7
      std/cpp/_std/EReg.hx
  3. 8 1
      std/cs/_std/EReg.hx
  4. 20 7
      std/flash/_std/EReg.hx
  5. 8 1
      std/java/_std/EReg.hx
  6. 22 7
      std/js/_std/EReg.hx
  7. 14 2
      std/neko/_std/EReg.hx
  8. 27 10
      std/php/_std/EReg.hx
  9. 31 5
      tests/unit/TestEReg.hx

+ 12 - 1
std/EReg.hx

@@ -74,6 +74,14 @@ class EReg {
 	public function matchedPos() : { pos : Int, len : Int } {
 		return null;
 	}
+	
+	/**
+		Tells if the regular expression matches the String between pos and pos + len.
+		Updates the internal state accordingly.		
+	**/
+	public function matchSub( s : String, pos : Int, len : Int = 0):Bool {
+		return false;
+	}
 
 	/**
 		Split a string by using the regular expression to match
@@ -97,7 +105,7 @@ class EReg {
 		can return the string that needs to be replaced. All occurences are matched anyway,
 		and setting the [g] flag might cause some incorrect behavior on some platforms.
 	**/
-	public function customReplace( s : String, f : EReg -> String ) : String {
+	public function map( s : String, f : EReg -> String ) : String {
 		var buf = new StringBuf();
 		while( true ) {
 			if( !match(s) )
@@ -110,4 +118,7 @@ class EReg {
 		return buf.toString();
 	}
 
+	#if !haxe3
+	public inline function customReplace( s : String, f : EReg -> String ) : String return map(s, f)
+	#end	
 }

+ 21 - 7
std/cpp/_std/EReg.hx

@@ -62,6 +62,15 @@
 			return regexp_matched_pos(r,0);
 	}
 
+	public function matchSub( s : String, pos : Int, len : Int = -1):Bool {
+			var p = regexp_match(r, s, pos, len < 0 ? s.length - pos : len);
+			if (p)
+				last = s;
+			else
+				last = null;
+			return p;
+	}
+	
 	public function split( s : String ) : Array<String> {
 			var pos = 0;
 			var len = s.length;
@@ -137,16 +146,18 @@
 			return b.toString();
 	}
 
-	public function customReplace( s : String, f : EReg -> String ) : String {
+	public function map( s : String, f : EReg -> String ) : String {
+		var offset = 0;
 		var buf = new StringBuf();
-		while( true ) {
-			if( !match(s) )
+		do {
+			if (!matchSub(s, offset))
 				break;
-			buf.add(matchedLeft());
+			var p = matchedPos();
+			buf.add(s.substr(offset, p.pos - offset));
 			buf.add(f(this));
-			s = matchedRight();
-		}
-		buf.add(s);
+			offset = p.pos + p.len;
+		} while (global);
+		buf.add(s.substr(offset));
 		return buf.toString();
 	}
 
@@ -155,4 +166,7 @@
 	static var regexp_matched : Dynamic -> Int -> Dynamic = cpp.Lib.load("regexp","regexp_matched",2);
 	static var regexp_matched_pos : Dynamic -> Int -> { pos : Int, len : Int } = cpp.Lib.load("regexp","regexp_matched_pos",2);
 
+	#if !haxe3
+	public inline function customReplace( s : String, f : EReg -> String ) : String return map(s, f)
+	#end	
 }

+ 8 - 1
std/cs/_std/EReg.hx

@@ -71,6 +71,10 @@ import cs.system.text.regularExpressions.Regex;
 		return { pos : m.Index, len : m.Length };
 	}
 
+	public function matchSub( s : String, pos : Int, len : Int = -1):Bool {
+		return throw "not implemented yet";
+	}	
+	
 	public function split( s : String ) : Array<String> {
 		if (isGlobal)
 			return cs.Lib.array(regex.Split(s));
@@ -85,7 +89,7 @@ import cs.system.text.regularExpressions.Regex;
 		return untyped (s.Substring(0, m.Index) + by + s.Substring(m.Index + m.Length));
 	}
 
-	public function customReplace( s : String, f : EReg -> String ) : String {
+	public function map( s : String, f : EReg -> String ) : String {
 		var buf = new StringBuf();
 		while (true)
 		{
@@ -99,4 +103,7 @@ import cs.system.text.regularExpressions.Regex;
 		return buf.toString();
 	}
 
+	#if !haxe3
+	public inline function customReplace( s : String, f : EReg -> String ) : String return map(s, f)
+	#end	
 }

+ 20 - 7
std/flash/_std/EReg.hx

@@ -55,6 +55,15 @@
 		if( result == null ) throw "No string matched";
 		return { pos : result.index, len : result[0].length };
 	}
+	
+	public function matchSub( s : String, pos : Int, len : Int = -1):Bool {
+		var b = match( len < 0 ? s.substr(pos) : s.substr(pos,len) );
+		if (b) {
+			result.input = s;
+			result.index += pos;
+		}
+		return b;
+	}
 
 	public function split( s : String ) : Array<String> {
 		// we can't use directly s.split because it's ignoring the 'g' flag
@@ -67,17 +76,21 @@
 		return untyped s.replace(r,by);
 	}
 
-	public function customReplace( s : String, f : EReg -> String ) : String {
+	public function map( s : String, f : EReg -> String ) : String {
+		var offset = 0;
 		var buf = new StringBuf();
-		while( true ) {
-			if( !match(s) )
+		do {
+			if (!matchSub(s, offset))
 				break;
-			buf.add(matchedLeft());
+			buf.add(s.substr(offset, result.index - offset));
 			buf.add(f(this));
-			s = matchedRight();
-		}
-		buf.add(s);
+			offset = result.index + result[0].length;
+		} while (r.global);
+		buf.add(s.substr(offset));
 		return buf.toString();
 	}
 
+	#if !haxe3
+	public inline function customReplace( s : String, f : EReg -> String ) : String return map(s, f)
+	#end	
 }

+ 8 - 1
std/java/_std/EReg.hx

@@ -161,6 +161,10 @@ class EReg {
 		return { pos : start, len : matcher.end() - start };
 	}
 
+	public function matchSub( s : String, pos : Int, len : Int = -1):Bool {
+		return throw "not implemented yet";
+	}	
+	
 	/**
 		Split a string by using the regular expression to match
 		the separators.
@@ -207,7 +211,7 @@ class EReg {
 		can return the string that needs to be replaced. All occurences are matched anyway,
 		and setting the [g] flag might cause some incorrect behavior on some platforms.
 	**/
-	public function customReplace( s : String, f : EReg -> String ) : String {
+	public function map( s : String, f : EReg -> String ) : String {
 		var buf = new StringBuf();
 		while( true ) {
 			if( !match(s) )
@@ -220,4 +224,7 @@ class EReg {
 		return buf.toString();
 	}
 
+	#if !haxe3
+	public inline function customReplace( s : String, f : EReg -> String ) : String return map(s, f)
+	#end	
 }

+ 22 - 7
std/js/_std/EReg.hx

@@ -55,6 +55,15 @@
 		return { pos : r.m.index, len : r.m[0].length };
 	}
 
+	public function matchSub( s : String, pos : Int, len : Int = -1):Bool {
+		var b = match( len < 0 ? s.substr(pos) : s.substr(pos,len) );
+		if (b) {
+			r.s = s;
+			r.m.index += pos;
+		}
+		return b;
+	}
+	
 	public function split( s : String ) : Array<String> {
 		// we can't use directly s.split because it's ignoring the 'g' flag
 		var d = "#__delim__#";
@@ -65,17 +74,23 @@
 		return untyped s.replace(r,by);
 	}
 
-	public function customReplace( s : String, f : EReg -> String ) : String {
+	public function map( s : String, f : EReg -> String ) : String {
+		var offset = 0;
 		var buf = new StringBuf();
-		while( true ) {
-			if( !match(s) )
+		do {
+			if (!matchSub(s, offset))
 				break;
-			buf.add(matchedLeft());
+			var p = matchedPos();
+			buf.add(s.substr(offset, cast(p.pos,Int) - offset));
 			buf.add(f(this));
-			s = matchedRight();
-		}
-		buf.add(s);
+			var p = matchedPos();
+			offset = p.pos + p.len;
+		} while (r.global);
+		buf.add(s.substr(offset));
 		return buf.toString();
 	}
 
+	#if !haxe3
+	public inline function customReplace( s : String, f : EReg -> String ) : String return map(s, f)
+	#end	
 }

+ 14 - 2
std/neko/_std/EReg.hx

@@ -61,6 +61,15 @@
 	public function matchedPos() : { pos : Int, len : Int } {
 		return regexp_matched_pos(r,0);
 	}
+	
+	public function matchSub( s : String, pos : Int, len : Int = -1):Bool {
+		var p = regexp_match(r, untyped s.__s, pos, len < 0 ? s.length - pos : len);
+		if( p )
+			last = s;
+		else
+			last = null;
+		return p;
+	}		
 
 	public function split( s : String ) : Array<String> {
 		var pos = 0;
@@ -137,7 +146,7 @@
 		return b.toString();
 	}
 
-	public function customReplace( s : String, f : EReg -> String ) : String {
+	public function map( s : String, f : EReg -> String ) : String {
 		var b = new StringBuf();
 		var pos = 0;
 		var len = s.length;
@@ -158,7 +167,7 @@
 			pos += tot;
 			len -= tot;
 			first = false;
-		} while( true );
+		} while( global );
 		b.addSub(s,pos,len);
 		return b.toString();
 	}
@@ -168,4 +177,7 @@
 	static var regexp_matched = neko.Lib.load("regexp","regexp_matched",2);
 	static var regexp_matched_pos : Dynamic -> Int -> { pos : Int, len : Int } = neko.Lib.load("regexp","regexp_matched_pos",2);
 
+	#if !haxe3
+	public inline function customReplace( s : String, f : EReg -> String ) : String return map(s, f)
+	#end	
 }

+ 27 - 10
std/php/_std/EReg.hx

@@ -72,6 +72,17 @@
 	public function matchedPos() : { pos : Int, len : Int } {
 		return untyped { pos : __php__("$this->matches[0][1]"), len : __call__("strlen",__php__("$this->matches[0][0]")) };
 	}
+	
+	public function matchSub( s : String, pos : Int, len : Int = -1):Bool {
+		var p : Int = untyped __call__("preg_match", re, len < 0 ? s.substr(pos) : s.substr(pos,len), matches, __php__("PREG_OFFSET_CAPTURE"));
+		if(p > 0) {
+			untyped __php__("$this->matches[0][1] += $pos");
+			last = s;
+		}
+		else
+			last = null;
+		return p > 0;
+	}	
 
 	public function split( s : String ) : Array<String> {
 		return untyped __php__("new _hx_array(preg_split($this->re, $s, $this->hglobal ? -1 : 2))");
@@ -84,16 +95,22 @@
 		return untyped __call__("preg_replace", re, by, s, global ? -1 : 1);
 	}
 
-	public function customReplace( s : String, f : EReg -> String ) : String {
-		var buf = "";
-		while( true ) {
-			if( !match(s) )
+	public function map( s : String, f : EReg -> String ) : String {
+		var offset = 0;
+		var buf = new StringBuf();
+		do {
+			if (!matchSub(s, offset))
 				break;
-			buf += matchedLeft();
-			buf += f(this);
-			s = matchedRight();
-		}
-		buf += s;
-		return buf;
+			var p = matchedPos();
+			buf.add(s.substr(offset, p.pos - offset));
+			buf.add(f(this));
+			offset = p.pos + p.len;
+		} while (global);
+		buf.add(s.substr(offset));
+		return buf.toString();
 	}
+	
+	#if !haxe3
+	public inline function customReplace( s : String, f : EReg -> String ) : String return map(s, f)
+	#end
 }

+ 31 - 5
tests/unit/TestEReg.hx

@@ -61,15 +61,41 @@ class TestEReg extends Test {
 		eq( '"' + block.split(test).join('","') + '"', '"","test",".blah","something:someval",""' );
 		
 		// test custom replace
-		eq( ~/a+/g.customReplace("aaabacx", function(r) return "[" + r.matched(0).substr(1) + "]") , "[aa]b[]cx" );
-		eq( ~/a+/.customReplace("aaabacx", function(r) return "[" + r.matched(0).substr(1) + "]") , "[aa]b[]cx" ); // same without 'g'
+		eq( ~/a+/g.map("aaabacx", function(r) return "[" + r.matched(0).substr(1) + "]") , "[aa]b[]cx" );
+		eq( ~/a+/.map("aaabacx", function(r) return "[" + r.matched(0).substr(1) + "]") , "[aa]bacx" ); // same without 'g'
 		
-		eq( ~/a+(b*)/g.customReplace("aaabacx", function(r) return "[" + r.matched(1) + "]") , "[b][]cx" );
-		eq( ~/a+/g.customReplace("aaabacx", function(r) return "[" + r.matchedRight() + "]") , "[bacx]b[cx]cx" );
+		eq( ~/a+(b*)/g.map("aaabacx", function(r) return "[" + r.matched(1) + "]") , "[b][]cx" );
+		eq( ~/a+/g.map("aaabacx", function(r) return "[" + r.matchedRight() + "]") , "[bacx]b[cx]cx" );
 		
 		// we need to change our default customReplace implementation to fix that case
 		// the best is to add a matchSub(s,pos,len)
-		eq( ~/a+/g.customReplace("aaabacx", function(r) return "[" + r.matchedLeft() + "]") , "[]b[aaab]cx" );
+		eq( ~/a+/g.map("aaabacx", function(r) return "[" + r.matchedLeft() + "]") , "[]b[aaab]cx" );
+		
+		// subsequent tests
+		var r = ~/a+/g;
+		eq(r.map("aaabacx", function(r) return "[" + r.matchedLeft() + "]") , "[]b[aaab]cx");
+		eq(r.map("aaabacx", function(r) return "[" + r.matchedLeft() + "]") , "[]b[aaab]cx");
+		
+		// matchSub
+		var r = ~/a+/;
+		t(r.matchSub("abab", 0));
+		eq(r.matchedRight(), "bab");
+		t(r.matchSub("abab", 1));
+		eq(r.matchedRight(), "b");
+		eq(r.matchedLeft(), "ab");
+		// same again to make sure state is correct
+		t(r.matchSub("abab", 0));
+		eq(r.matchedRight(), "bab");
+		t(r.matchSub("abab", 1));
+		eq(r.matchedRight(), "b");
+		eq(r.matchedLeft(), "ab");		
+		// length
+		f(r.matchSub("bbaa", 0, 1)); 
+		f(r.matchSub("bbaa", 0, 2)); 
+		f(r.matchSub("bbaa", 1, 1)); 
+		t(r.matchSub("bbaa", 2, 1));
+		eq(r.matchedLeft(), "bb");
+		eq(r.matchedRight(), "a");
 		
 		// this one creates infinite loops on too most of the platforms ! TOFIX !
 		// eq( ~/x?/g.customReplace("aaabacx", function(r) return "[" + r.matched(0)+ "]") , "[]a[]a[]a[]b[]a[]c[][x]" );