Browse Source

added EReg specification (part 1)

Simon Krajewski 12 years ago
parent
commit
30a94fc78b
3 changed files with 159 additions and 22 deletions
  1. 86 20
      std/EReg.hx
  2. 5 2
      tests/unit/TestEReg.hx
  3. 68 0
      tests/unit/unitstd/EReg.unit.hx

+ 86 - 20
std/EReg.hx

@@ -19,73 +19,139 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
+
 /**
-	Regular expressions are a way to find regular patterns into
-	Strings. Have a look at the tutorial on haXe website to learn
-	how to use them.
+	The EReg class represents regular expressions.
+	
+	While basic usage and patterns consistently work across platforms, some more
+	complex operations may yield different results. This is a necessary trade-
+	off to retain a certain level of performance.
+	
+	EReg instances can be created by calling the constructor, or with the
+	special syntax ~/pattern/modifier
+	
+	EReg instances maintain an internal state, which is affected by several of
+	its methods.
+	
+	A detailed explanation of the supported operations is available at
+	http://haxe.org/doc/cross/regexp
 **/
 class EReg {
 
 	/**
-		Creates a new regular expression with pattern [r] and
-		options [opt].
+		Creates a new regular expression with pattern [r] and modifiers [opt].
+		
+		This is equivalent to the shorthand syntax ~/r/opt
+		
+		If [r] or [opt] are null, the result is unspecified.
 	**/
 	public function new( r : String, opt : String ) {
 		throw "Regular expressions are not implemented for this platform";
 	}
 
 	/**
-		Tells if the regular expression matches the String.
-		Updates the internal state accordingly.
+		Tells if [this] regular expression matches String [s].
+		
+		This method modifies the internal state.
+		
+		If [s] is null, the result is unspecified.
 	**/
 	public function match( s : String ) : Bool {
 		return false;
 	}
 
 	/**
-		Returns a matched group or throw an expection if there
-		is no such group. If [n = 0], the whole matched substring
-		is returned.
+		Returns the matched sub-group [n] of [this] EReg.
+		
+		This method should only be called after [this].match() or
+		[this].matchSub(), and then operates on the String of that operation.
+		
+		The index [n] corresponds to the n-th set of parentheses in the pattern
+		of [this] EReg. If no such sub-group exists, an exception is thrown.
+		
+		If [n] equals 0, the whole matched substring is returned.
 	**/
 	public function matched( n : Int ) : String {
 		return null;
 	}
 
 	/**
-		Returns the part of the string that was as the left of
-		of the matched substring.
+		Returns the part to the left of the last matched substring.
+		
+		If the most recent call to [this].match() or [this].matchSub() did not
+		match anything, the result is unspecified.
+		
+		If the global g modifier was in place for the matching, only the
+		substring to the left of the leftmost match is returned.
+		
+		The result does not include the matched part.
 	**/
 	public function matchedLeft() : String {
 		return null;
 	}
 
 	/**
-		Returns the part of the string that was at the right of
-		of the matched substring.
+		Returns the part to the right of the last matched substring.
+		
+		If the most recent call to [this].match() or [this].matchSub() did not
+		match anything, the result is unspecified.
+		
+		If the global g modifier was in place for the matching, only the
+		substring to the right of the leftmost match is returned.
+		
+		The result does not include the matched part.
 	**/
 	public function matchedRight() : String {
 		return null;
 	}
 
 	/**
-		Returns the position of the matched substring within the
-		original matched string.
+		Returns the position and length of the last matched substring, within
+		the String which was last used as argument to [this].match() or
+		[this].matchSub().
+		
+		If the most recent call to [this].match() or [this].matchSub() did not
+		match anything, the result is unspecified.
+		
+		If the global g modifier was in place for the matching, the position and
+		length of the leftmost substring is returned.
 	**/
 	public function matchedPos() : { pos : Int, len : Int } {
 		return null;
 	}
 
 	/**
-		Tells if the regular expression matches the String between pos and pos + len.
-		Updates the internal state accordingly.
+		Tells if [this] regular expression matches a substring of String [s].
+		
+		This function expects [pos] and [len] to describe a valid substring of
+		[s], or else the result is unspecified. To get more robust behavior,
+		[this].matchSub(s.substr(pos,len)) can be used instead.
+		
+		This method modifies the internal state.
+		
+		If [s] is null, the result is unspecified.
 	**/
 	public function matchSub( s : String, pos : Int, len : Int = 0):Bool {
 		return false;
 	}
 
 	/**
-		Split a string by using the regular expression to match
-		the separators.
+		Splits String [s] at all substrings [this] EReg matches.
+		
+		If a match is found at the start of [s], the result contains a leading
+		empty String "" entry.
+		
+		If a match is found at the end of [s], the result contains a trailing
+		empty String "" entry.
+		
+		If two matching substrings appear next to each other, the result
+		contains the empty String "" between them.
+		
+		By default, this method splits [s] into two parts at the first matched
+		substring. If the global g modifier is in place, [s] is split at each
+		matched substring.
+		
+		If [s] is null, the result is unspecified.
 	**/
 	public function split( s : String ) : Array<String> {
 		return null;

+ 5 - 2
tests/unit/TestEReg.hx

@@ -92,7 +92,7 @@ class TestEReg extends Test {
 		// length
 		f(r.matchSub("bbaa", 0, 1));
 		f(r.matchSub("bbaa", 0, 2));
-		f(r.matchSub("bbaa", 1, 1)); 
+		f(r.matchSub("bbaa", 1, 1));
 		t(r.matchSub("bbaa", 2, 1));
 		eq(r.matchedLeft(), "bb");
 		eq(r.matchedRight(), "a");
@@ -120,7 +120,10 @@ class TestEReg extends Test {
 		var r = ~/^/m;
 		eq(r.map("\n", f), "([][][\n])b\n");
 		eq(r.map("a", f), "([][][a])ba");
-		eq(r.map("aa\na", f), "([][][aa\na])baa\na");		
+		eq(r.map("aa\na", f), "([][][aa\na])baa\na");
+		
+		var r = ~/a/;
+		exc(function() r.matched(0));
 		#end
 	}
 

+ 68 - 0
tests/unit/unitstd/EReg.unit.hx

@@ -0,0 +1,68 @@
+#if !flash8
+var r = ~/a/;
+var rg = ~/a/g;
+var rg2 = ~/aa/g;
+r.match("") == false;
+r.match("b") == false;
+r.match("a") == true;
+r.matched(0) == "a";
+r.matchedLeft() == "";
+r.matchedRight() == "";
+var pos = r.matchedPos();
+pos.pos == 0;
+pos.len == 1;
+
+r.match("aa") == true;
+r.matched(0) == "a";
+r.matchedLeft() == "";
+r.matchedRight() == "a";
+var pos = r.matchedPos();
+pos.pos == 0;
+pos.len == 1;
+
+rg.match("aa") == true;
+rg.matched(0) == "a";
+rg.matchedLeft() == "";
+rg.matchedRight() == "a";
+var pos = rg.matchedPos();
+pos.pos == 0;
+pos.len == 1;
+
+rg2.match("aa") == true;
+rg2.matched(0) == "aa";
+rg2.matchedLeft() == "";
+rg2.matchedRight() == "";
+var pos = rg2.matchedPos();
+pos.pos == 0;
+pos.len == 2;
+
+rg2.match("AaaBaaC") == true;
+rg2.matched(0) == "aa";
+rg2.matchedLeft() == "A";
+rg2.matchedRight() == "BaaC";
+var pos = rg2.matchedPos();
+pos.pos == 1;
+pos.len == 2;
+
+// split
+~/a/.split("") == [""];
+~/a/.split("a") == ["",""];
+~/a/.split("aa") == ["","a"];
+~/a/.split("b") == ["b"];
+~/a/.split("ab") == ["","b"];
+~/a/.split("ba") == ["b",""];
+~/a/.split("aba") == ["","ba"];
+~/a/.split("bab") == ["b","b"];
+~/a/.split("baba") == ["b","ba"];
+
+// split + g
+~/a/g.split("") == [""];
+~/a/g.split("a") == ["",""];
+~/a/g.split("aa") == ["","",""];
+~/a/g.split("b") == ["b"];
+~/a/g.split("ab") == ["","b"];
+~/a/g.split("ba") == ["b",""];
+~/a/g.split("aba") == ["","b",""];
+~/a/g.split("bab") == ["b","b"];
+~/a/g.split("baba") == ["b","b",""];
+#end