Browse Source

[java/cs] EReg initial implementation

Caue Waneck 13 years ago
parent
commit
8f34f5f6ad

+ 44 - 100
std/cs/_std/EReg.hx

@@ -23,128 +23,77 @@
  * DAMAGE.
  * DAMAGE.
  */
  */
 
 
-@:core_api class EReg {
+import system.text.regularExpressions.Regex;
 
 
-	var r : Dynamic;
-	var last : String;
-	var global : Bool;
+@:core_api class EReg {
 
 
+	private var regex : Regex;
+	private var m : Match;
+	private var isGlobal : Bool;
+	private var cur : String;
+	
 	public function new( r : String, opt : String ) : Void {
 	public function new( r : String, opt : String ) : Void {
-			var a = opt.split("g");
-			global = a.length > 1;
-			if( global )
-				opt = a.join("");
-			this.r = regexp_new_options(r, opt);
+		var opts:Int = cast CultureInvariant;
+		for (i in 0...opt.length) untyped {
+			switch(cast(opt[i], Int))
+			{
+				case 'i'.code:
+					opts |= cast(IgnoreCase, Int);
+				case 'g'.code:
+					isGlobal = true;
+				case 'm'.code:
+					opts |= cast(Multiline, Int);
+				case 'c'.code:
+					opts |= cast(Compiled, Int);
+			}
+		}
+		
+		this.regex = new Regex(r, cast(opts, RegexOptions));
 	}
 	}
 
 
 	public function match( s : String ) : Bool {
 	public function match( s : String ) : Bool {
-			var p = regexp_match(r,s,0,s.length);
-			if( p )
-				last = s;
-			else
-				last = null;
-			return p;
+		m = regex.Match(s);
+		cur = s;
+		return m.Success;
 	}
 	}
 
 
 	public function matched( n : Int ) : String {
 	public function matched( n : Int ) : String {
-			var m = regexp_matched(r,n);
-			return m;
+		if (m == null || cast(n, UInt) > m.Groups.Count)
+			throw "EReg::matched";
+		return m.Groups[n].Value;
 	}
 	}
 
 
 	public function matchedLeft() : String {
 	public function matchedLeft() : String {
-			var p = regexp_matched_pos(r,0);
-			return last.substr(0,p.pos);
+		return untyped cur.Susbstring(0, m.Index);
 	}
 	}
 
 
 	public function matchedRight() : String {
 	public function matchedRight() : String {
-			var p = regexp_matched_pos(r,0);
-			var sz = p.pos+p.len;
-			return last.substr(sz,last.length-sz);
+		return untyped cur.Susbstring(m.Index + m.Length);
 	}
 	}
 
 
 	public function matchedPos() : { pos : Int, len : Int } {
 	public function matchedPos() : { pos : Int, len : Int } {
-			return regexp_matched_pos(r,0);
+		return { pos : m.Index, len : m.Length };
 	}
 	}
 
 
 	public function split( s : String ) : Array<String> {
 	public function split( s : String ) : Array<String> {
-			var pos = 0;
-			var len = s.length;
-			var a = new Array();
-			var first = true;
-			do {
-				if( !regexp_match(r,s,pos,len) )
-					break;
-				var p = regexp_matched_pos(r,0);
-				if( p.len == 0 && !first ) {
-					if( p.pos == s.length )
-						break;
-					p.pos += 1;
-				}
-				a.push(s.substr(pos,p.pos - pos));
-				var tot = p.pos + p.len - pos;
-				pos += tot;
-				len -= tot;
-				first = false;
-			} while( global );
-			a.push(s.substr(pos,len));
-			return a;
+		if (isGlobal)
+			return Array.ofNative(regex.Split(s));
+		var m = regex.Match(s);
+		return untyped [s.Substring(0, m.Index), s.Substring(m.Index + m.Length)];
 	}
 	}
 
 
 	public function replace( s : String, by : String ) : String {
 	public function replace( s : String, by : String ) : String {
-			var b = new StringBuf();
-			var pos = 0;
-			var len = s.length;
-			var a = by.split("$");
-			var first = true;
-			do {
-				if( !regexp_match(r,s,pos,len) )
-					break;
-				var p = regexp_matched_pos(r,0);
-				if( p.len == 0 && !first ) {
-					if( p.pos == s.length )
-						break;
-					p.pos += 1;
-				}
-				b.addSub(s,pos,p.pos-pos);
-				if( a.length > 0 )
-					b.add(a[0]);
-				var i = 1;
-				while( i < a.length ) {
-					var k = a[i];
-					var c = k.charCodeAt(0);
-					// 1...9
-					if( c >= 49 && c <= 57 ) {
-						var p = try regexp_matched_pos(r,Std.int(c)-48) catch( e : String ) null;
-						if( p == null ){
-							b.add("$");
-							b.add(k);
-						}else{
-						b.addSub(s,p.pos,p.len);
-						b.addSub(k,1,k.length - 1);
-						}
-					} else if( c == null ) {
-						b.add("$");
-						i++;
-						var k2 = a[i];
-						if( k2 != null && k2.length > 0 )
-							b.add(k2);
-					} else
-						b.add("$"+k);
-					i++;
-				}
-				var tot = p.pos + p.len - pos;
-				pos += tot;
-				len -= tot;
-				first = false;
-			} while( global );
-			b.addSub(s,pos,len);
-			return b.toString();
+		if (isGlobal)
+			return regex.Replace(s, by);
+		var m = regex.Match(s);
+		return untyped (s.Substring(0, m.Index) + by + s.Substring(m.Index + m.Length));
 	}
 	}
 
 
 	public function customReplace( s : String, f : EReg -> String ) : String {
 	public function customReplace( s : String, f : EReg -> String ) : String {
 		var buf = new StringBuf();
 		var buf = new StringBuf();
-		while( true ) {
-			if( !match(s) )
+		while (true)
+		{
+			if (!match(s))
 				break;
 				break;
 			buf.add(matchedLeft());
 			buf.add(matchedLeft());
 			buf.add(f(this));
 			buf.add(f(this));
@@ -154,9 +103,4 @@
 		return buf.toString();
 		return buf.toString();
 	}
 	}
 
 
-	static var regexp_new_options : String -> String -> Dynamic = null; //cpp.Lib.load("regexp","regexp_new_options",2);
-	static var regexp_match : Dynamic -> String -> Int -> Int -> Dynamic = null;// cpp.Lib.load("regexp","regexp_match",4);
-	static var regexp_matched : Dynamic -> Int -> Dynamic = null; //cpp.Lib.load("regexp","regexp_matched",2);
-	static var regexp_matched_pos : Dynamic -> Int -> { pos : Int, len : Int } = null;// cpp.Lib.load("regexp","regexp_matched_pos",2);
-
-}
+}

+ 2 - 0
std/cs/_std/String.hx

@@ -97,5 +97,7 @@ extern class String implements ArrayAccess<Char16> {
 	private function EndsWith(value:String):Bool;
 	private function EndsWith(value:String):Bool;
 	private function TrimStart():String;
 	private function TrimStart():String;
 	private function TrimEnd():String;
 	private function TrimEnd():String;
+	@:overload(function(startIndex:Int):String {})
+	private function Substring(startIndex:Int, length:Int):String;
 
 
 }
 }

+ 52 - 0
std/cs/_std/system/text/regularExpressions/Regex.hx

@@ -0,0 +1,52 @@
+package system.text.regularExpressions;
+import cs.NativeArray;
+
+@:native('System.Text.RegularExpressions.Regex') extern class Regex 
+{
+	function new(pattern:String, options:RegexOptions):Void;
+	function Match(input:String):Match;
+	function Split(input:String):NativeArray<String>;
+	function Replace(input:String, replacement:String):String;
+}
+
+@:native("System.Text.RegularExpressions.RegexOptions") extern enum RegexOptions
+{
+	None;
+	IgnoreCase;
+	Multiline;
+	ExplicitCapture;
+	Compiled;
+	Singleline;
+	IgnorePatternWhitespace;
+	RightToLeft;
+	ECMAScript;
+	CultureInvariant;
+}
+
+@:native("System.Text.RegularExpressions.Capture") extern class Capture
+{
+	var Index(default, null):Int;
+	var Length(default, null):Int;
+	var Value(default, null):String;
+}
+
+@:native("System.Text.RegularExpressions.Group") extern class Group extends Capture
+{
+	var Success(default, null):Bool;
+}
+
+@:native("System.Text.RegularExpressions.Match") extern class Match extends Group
+{
+	var Captures(default, null):CaptureCollection;
+	var Groups(default, null):GroupCollection;
+}
+
+@:native("System.Text.RegularExpressions.CaptureCollection") extern class CaptureCollection implements ArrayAccess<Capture>
+{
+	var Count(default, null):Int;
+}
+
+@:native("System.Text.RegularExpressions.GroupCollection") extern class GroupCollection implements ArrayAccess<Group>
+{
+	var Count(default, null):Int;
+}

+ 77 - 11
std/java/_std/EReg.hx

@@ -34,16 +34,60 @@ class EReg {
 	private var pattern:String;
 	private var pattern:String;
 	private var matcher:Matcher;
 	private var matcher:Matcher;
 	private var cur:String;
 	private var cur:String;
+	private var isGlobal:Bool;
 	
 	
 	/**
 	/**
 		Creates a new regular expression with pattern [r] and
 		Creates a new regular expression with pattern [r] and
 		options [opt].
 		options [opt].
 	**/
 	**/
 	public function new( r : String, opt : String ) {
 	public function new( r : String, opt : String ) {
-		//FIXME opt is ignored by now
-		matcher = Pattern.compile(r).matcher("");
+		var flags = 0;
+		for (i in 0...opt.length)
+		{
+			switch(StringTools.fastCodeAt(opt, i))
+			{
+				case 'i'.code:
+					flags |= Pattern.CASE_INSENSITIVE;
+				case 'm'.code:
+					flags |= Pattern.MULTILINE;
+				case 's'.code:
+					flags |= Pattern.DOTALL;
+				case 'g'.code:
+					isGlobal = true;
+			}
+		}
+		
+		matcher = Pattern.compile(convert(r), flags).matcher("");
 		pattern = r;
 		pattern = r;
 	}
 	}
+	
+	private static function convert(r:String):String
+	{
+		//some references of the implementation:
+		//http://stackoverflow.com/questions/809647/java-vs-javascript-regex-problem
+		//http://stackoverflow.com/questions/4788413/how-to-convert-javascript-regex-to-safe-java-regex
+		//Some necessary changes:
+		//
+		// \0  -> \x00
+		// \v  -> \x0b
+		// [^] -> [\s\S]
+		// unescaped ', " -> \', \"
+		/* FIXME
+		var pat = new StringBuf();
+		var len = r.length;
+		var i = 0;
+		while (i < len)
+		{
+			var c = StringTools.fastCodeAt(r, i++);
+			switch(c)
+			{
+				case '\\'.code: //escape-sequence
+					
+			}
+		}
+		*/
+		return r;
+	}
 
 
 	/**
 	/**
 		Tells if the regular expression matches the String.
 		Tells if the regular expression matches the String.
@@ -52,7 +96,14 @@ class EReg {
 	public function match( s : String ) : Bool {
 	public function match( s : String ) : Bool {
 		cur = s;
 		cur = s;
 		matcher = matcher.reset(s);
 		matcher = matcher.reset(s);
-		return matcher.find();
+		var ret = matcher.find();
+		//FIXME look into why find() sometimes returns a 0-length match
+		while (ret && matcher.start() - matcher.end() == 0)
+		{
+			ret = matcher.find();
+		}
+		
+		return ret;
 	}
 	}
 
 
 	/**
 	/**
@@ -62,7 +113,10 @@ class EReg {
 	**/
 	**/
 	public function matched( n : Int ) : String 
 	public function matched( n : Int ) : String 
 	{
 	{
-		return matcher.group(n);
+		if (n == 0)
+			return matcher.group();
+		else
+			return matcher.group(n);
 	}
 	}
 
 
 	/**
 	/**
@@ -71,7 +125,7 @@ class EReg {
 	**/
 	**/
 	public function matchedLeft() : String 
 	public function matchedLeft() : String 
 	{
 	{
-		return cur.substr(0, matcher.start());
+		return untyped cur.substring(0, matcher.start());
 	}
 	}
 
 
 	/**
 	/**
@@ -80,7 +134,7 @@ class EReg {
 	**/
 	**/
 	public function matchedRight() : String 
 	public function matchedRight() : String 
 	{
 	{
-		return cur.substr(matcher.end());
+		return untyped cur.substring(matcher.end(), cur.length);
 	}
 	}
 
 
 	/**
 	/**
@@ -96,12 +150,17 @@ class EReg {
 		Split a string by using the regular expression to match
 		Split a string by using the regular expression to match
 		the separators.
 		the separators.
 	**/
 	**/
-	@:functionBody('
-		return new Array<String>(s.split(this.pattern));
-	')
 	public function split( s : String ) : Array<String> 
 	public function split( s : String ) : Array<String> 
 	{
 	{
-		return null;
+		if (isGlobal)
+		{
+			return Array.ofNative(matcher.pattern().split(s));
+		} else {
+			var m = matcher;
+			m.reset(s);
+			m.find();
+			return untyped [s.substring(0, m.start()), s.substring(m.end(), s.length)];
+		}
 	}
 	}
 
 
 	/**
 	/**
@@ -110,8 +169,15 @@ class EReg {
 		while replacing. [$$] means the [$] character.
 		while replacing. [$$] means the [$] character.
 	**/
 	**/
 	public function replace( s : String, by : String ) : String {
 	public function replace( s : String, by : String ) : String {
+		var matcher = matcher;
 		matcher.reset(s);
 		matcher.reset(s);
-		return matcher.replaceAll(by);
+		if (isGlobal)
+		{
+			return matcher.replaceAll(by);
+		} else {
+			matcher.find();
+			return untyped (s.substring(0, matcher.start()) + by + s.substring(matcher.end(), s.length));
+		}
 	}
 	}
 
 
 	/**
 	/**

+ 2 - 0
std/java/_std/String.hx

@@ -98,6 +98,8 @@ extern class String {
 	private function endsWith( str : String ) : Bool;
 	private function endsWith( str : String ) : Bool;
 	
 	
 	private function replace( sub : String, by : String ) : String;
 	private function replace( sub : String, by : String ) : String;
+	
+	private function substring( begin : Int, end : Int ) : String;
 
 
 	static function fromCharCode( code : Int ) : String;
 	static function fromCharCode( code : Int ) : String;
 
 

+ 15 - 6
std/java/util/regex/Regex.hx

@@ -1,15 +1,20 @@
 package java.util.regex;
 package java.util.regex;
-
-/**
- * ...
- * @author waneck
- */
+import java.NativeArray;
 
 
 extern class Pattern
 extern class Pattern
 {
 {
-	static function compile(regex:String):Pattern;
+	static function compile(regex:String, flags:Int):Pattern;
 	
 	
 	function matcher(input:String):Matcher;
 	function matcher(input:String):Matcher;
+	function split(input:String):NativeArray<String>;
+	
+	static var CANON_EQ(default, null):Int;
+	static var CASE_INSENSITIVE(default, null):Int;
+	static var COMMENTS(default, null):Int;
+	static var DOTALL(default, null):Int;
+	static var MULTILINE(default, null):Int;
+	static var UNICODE_CASE(default, null):Int;
+	static var UNIX_LINES (default, null):Int;
 }
 }
 
 
 
 
@@ -18,6 +23,7 @@ extern interface MatchResult
 	@:overload(function(group:Int):Int {})
 	@:overload(function(group:Int):Int {})
 	function end():Int;
 	function end():Int;
 	
 	
+	@:overload(function():String {})
 	function group(group:Int):String;
 	function group(group:Int):String;
 	
 	
 	function groupCount():Int;
 	function groupCount():Int;
@@ -33,6 +39,7 @@ extern class Matcher implements MatchResult
 	@:overload(function(group:Int):Int {})
 	@:overload(function(group:Int):Int {})
 	function end():Int;
 	function end():Int;
 	
 	
+	@:overload(function():String {})
 	function group(group:Int):String;
 	function group(group:Int):String;
 	
 	
 	function groupCount():Int;
 	function groupCount():Int;
@@ -43,4 +50,6 @@ extern class Matcher implements MatchResult
 	function find():Bool;
 	function find():Bool;
 	
 	
 	function replaceAll(replacement:String):String;
 	function replaceAll(replacement:String):String;
+	
+	function pattern():Pattern;
 }
 }