Browse Source

Fix several problems with regex extensions and it's accepted parameters

mingodad 8 years ago
parent
commit
879c3de062

+ 9 - 10
SquiLu-ext/sq_pcre.cpp

@@ -351,20 +351,15 @@ static SQRESULT sq_pcre_gsub(HSQUIRRELVM v)
 	SQ_FUNC_VARS(v);
     GET_pcre_INSTANCE();
     SQ_GET_STRING(v, 2, str);
-    SQ_OPT_INTEGER(v, 4, start_offset, 0);
-    SQ_OPT_INTEGER(v, 5, options, 0);
-    SQ_OPT_INTEGER(v, 6, max_len, 0);
-
-    if(max_len)
-    {
-        str_size = calc_new_size_by_max_len(start_offset, max_len, str_size);
-    }
+    SQ_OPT_INTEGER(v, 4, options, 0);
+    SQ_OPT_INTEGER(v, 5, max_sub, 0);
+    if(max_sub < 0) return sq_throwerror(v, _SC("max substitutions can't be less than zero"));
 
 	SQBlob blob(0,8192);
 	const int replacement_idx = 3;
 	SQObjectType ptype = sq_gettype(v, replacement_idx);
     const SQChar *replacement;
-    SQInteger replacement_size;
+    SQInteger replacement_size, start_offset=0;
 
     SQInteger rc;
     bool isFirst = true;
@@ -492,6 +487,10 @@ static SQRESULT sq_pcre_gsub(HSQUIRRELVM v)
                 return sq_throwerror(v, _SC("gsub only works with closure, array, table for replacement"));
 	    }
 		start_offset = self->ovector[(rc*2)-1]; //the last match + 1
+        if(max_sub)
+        {
+            if(--max_sub == 0) break;
+        }
 	}
 
     if(rc < -2) //only no matching errore
@@ -532,7 +531,7 @@ static SQRegFunction sq_pcre_methods[] =
 	_DECL_FUNC(exec,-3,_SC("xsannn")),
 	_DECL_FUNC(match,-2,_SC("xsnnn")),
 	_DECL_FUNC(gmatch,-3,_SC("xscnnn")),
-	_DECL_FUNC(gsub,-3,_SC("xs s|c|a|t nnn")),
+	_DECL_FUNC(gsub,-3,_SC("xs s|c|a|t nn")),
 	_DECL_FUNC(_typeof,1,_SC("x")),
     _DECL_FUNC(version,1,_SC(".")),
     _DECL_FUNC(loadlib,2,_SC(".s")),

+ 16 - 17
SquiLu-ext/sq_pcre2.cpp

@@ -294,8 +294,8 @@ static SQRESULT sq_pcre2_exec(HSQUIRRELVM v)
 
     int rc = dlpcre2_match(
         self->re,             /* the compiled pattern */
-        (PCRE2_SPTR)subject,                  /* the subject string */
-        subject_size,             /* the length of the subject */
+        (PCRE2_SPTR)subject,  /* the subject string */
+        subject_size,         /* the length of the subject */
         start_offset,         /* start at offset 0 in the subject */
         options,              /* 0 = default options */
         self->match_data,     /* block for storing the result */
@@ -342,8 +342,8 @@ static SQRESULT sq_pcre2_match(HSQUIRRELVM v)
 
     int rc = dlpcre2_match(
         self->re,             /* the compiled pattern */
-        (PCRE2_SPTR)subject,                  /* the subject string */
-        subject_size,             /* the length of the subject */
+        (PCRE2_SPTR)subject,  /* the subject string */
+        subject_size,         /* the length of the subject */
         start_offset,         /* start at offset 0 in the subject */
         options,              /* 0 = default options */
         self->match_data,     /* block for storing the result */
@@ -382,8 +382,8 @@ static SQRESULT sq_pcre2_gmatch(HSQUIRRELVM v)
 
     while( (rc = dlpcre2_match(
         self->re,             /* the compiled pattern */
-        (PCRE2_SPTR)subject,                  /* the subject string */
-        subject_size,             /* the length of the subject */
+        (PCRE2_SPTR)subject,  /* the subject string */
+        subject_size,         /* the length of the subject */
         start_offset,         /* start at offset 0 in the subject */
         options,              /* 0 = default options */
         self->match_data,     /* block for storing the result */
@@ -432,27 +432,22 @@ static SQRESULT sq_pcre2_gsub(HSQUIRRELVM v)
 	SQ_FUNC_VARS(v);
     GET_pcre2_INSTANCE();
     SQ_GET_STRING(v, 2, str);
-    SQ_OPT_INTEGER(v, 4, start_offset, 0);
-    SQ_OPT_INTEGER(v, 5, options, 0);
-    SQ_OPT_INTEGER(v, 6, max_len, 0);
-
-    if(max_len)
-    {
-        str_size = calc_new_size_by_max_len(start_offset, max_len, str_size);
-    }
+    SQ_OPT_INTEGER(v, 4, options, 0);
+    SQ_OPT_INTEGER(v, 5, max_sub, 0);
+    if(max_sub < 0) return sq_throwerror(v, _SC("max substitutions can't be less than zero"));
 
 	SQBlob blob(0,8192);
 	const int replacement_idx = 3;
 	SQObjectType ptype = sq_gettype(v, replacement_idx);
     const SQChar *replacement;
-    SQInteger replacement_size;
+    SQInteger replacement_size, start_offset=0;
 
     SQInteger rc;
     bool isFirst = true;
 
     while( (rc = dlpcre2_match(
         self->re,             /* the compiled pattern */
-        (PCRE2_SPTR)str,                  /* the subject string */
+        (PCRE2_SPTR)str,      /* the subject string */
         str_size,             /* the length of the subject */
         start_offset,         /* start at offset 0 in the subject */
         options,              /* 0 = default options */
@@ -572,6 +567,10 @@ static SQRESULT sq_pcre2_gsub(HSQUIRRELVM v)
                 return sq_throwerror(v, _SC("gsub only works with closure, array, table for replacement"));
 	    }
 		start_offset = self->ovector[(rc*2)-1]; //the last match + 1
+        if(max_sub)
+        {
+            if(--max_sub == 0) break;
+        }
 	}
 
     if(rc < -2) //only no matching errore
@@ -754,7 +753,7 @@ static SQRegFunction sq_pcre2_methods[] =
 	_DECL_FUNC(exec,-3,_SC("xsannn")),
 	_DECL_FUNC(match,-2,_SC("xsnnn")),
 	_DECL_FUNC(gmatch,-3,_SC("xscnnn")),
-	_DECL_FUNC(gsub,-3,_SC("xs s|c|a|t nnn")),
+	_DECL_FUNC(gsub,-3,_SC("xs s|c|a|t nn")),
 	_DECL_FUNC(_typeof,1,_SC("x")),
     _DECL_FUNC(version,1,_SC(".")),
     _DECL_FUNC(loadlib,2,_SC(".s")),

+ 59 - 79
SquiLu-ext/sq_tre.cpp

@@ -139,22 +139,6 @@ if(!dltre_version) return false;
 
 #endif // SQ_USE_TRE_STATIC
 
-static SQInteger calc_new_size_by_max_len(SQInteger start_pos, SQInteger max_len, SQInteger curr_size)
-{
-    SQInteger new_size;
-    if(start_pos < 0)
-    {
-        new_size = curr_size + start_pos;
-        start_pos = new_size < 0 ? 0 : new_size;
-    }
-    if(max_len > 0) new_size = start_pos + max_len;
-    else new_size = curr_size + max_len;
-    if( (new_size < curr_size) && (new_size > start_pos) )
-    {
-        return new_size;
-    }
-    return curr_size;
-}
 
 struct sqtre_st {
     regex_t re;
@@ -231,27 +215,30 @@ static SQRESULT sq_tre_constructor(HSQUIRRELVM v)
 	return 0;
 }
 
+#define GET_EXTRA_MATCH_PARAMS(stack_pos) \
+    SQ_OPT_INTEGER(v, stack_pos, start_offset, 0); \
+    SQ_OPT_INTEGER(v, stack_pos+1, eflags, 0); \
+    SQ_OPT_INTEGER(v, stack_pos+2, max_len, 0); \
+    \
+    if(start_offset < 0) return sq_throwerror(v, _SC("start_offset can't be negative")); \
+    SQInteger subject_str_size = subject_size - start_offset; \
+    if(subject_str_size < 0) return sq_throwerror(v, _SC("str size - start_offset can't be negative")); \
+    const SQChar *subject_str = subject + start_offset; \
+    if(max_len && (max_len < subject_str_size)) subject_str_size = max_len;
 static SQRESULT sq_tre_exec(HSQUIRRELVM v)
 {
 	SQ_FUNC_VARS(v);
     GET_tre_INSTANCE();
     SQ_GET_STRING(v, 2, subject);
-    SQ_OPT_INTEGER(v, 4, start_offset, 0);
-    SQ_OPT_INTEGER(v, 5, eflags, 0);
-    SQ_OPT_INTEGER(v, 6, max_len, 0);
-
-    if(max_len)
-    {
-        subject_size = calc_new_size_by_max_len(start_offset, max_len, subject_size);
-    }
+    GET_EXTRA_MATCH_PARAMS(4);
 
     int rc = dltre_reganexec(
-        &self->re,             /* the compiled pattern */
-        subject+start_offset, /* the subject string */
-        subject_size,         /* the length of the subject */
+        &self->re,          /* the compiled pattern */
+        subject_str,        /* the subject string */
+        subject_str_size,   /* the length of the subject */
         &self->amatch,
         self->aparams,
-        eflags);             /* 0 = default eflags */
+        eflags);            /* 0 = default eflags */
 
     const int array_pos = 3;
     SQInteger rtype = sq_gettype(v, array_pos);
@@ -265,10 +252,10 @@ static SQRESULT sq_tre_exec(HSQUIRRELVM v)
             sq_clear(v, array_pos);
             for (int i = 0; i < nelms; i++)
             {
-                SQInteger pos = pmatch[i].rm_so;
+                SQInteger pos = pmatch[i].rm_so + start_offset;
                 sq_pushinteger(v, pos);
                 sq_arrayappend(v, array_pos);
-                pos = pmatch[i].rm_eo;
+                pos = pmatch[i].rm_eo + start_offset;
                 sq_pushinteger(v, pos);
                 sq_arrayappend(v, array_pos);
             }
@@ -284,29 +271,22 @@ static SQRESULT sq_tre_match(HSQUIRRELVM v)
     SQ_FUNC_VARS(v);
     GET_tre_INSTANCE();
     SQ_GET_STRING(v, 2, subject);
-    SQ_OPT_INTEGER(v, 3, start_offset, 0);
-    SQ_OPT_INTEGER(v, 4, eflags, 0);
-    SQ_OPT_INTEGER(v, 5, max_len, 0);
-
-    if(max_len)
-    {
-        subject_size = calc_new_size_by_max_len(start_offset, max_len, subject_size);
-    }
+    GET_EXTRA_MATCH_PARAMS(3);
 
     int rc = dltre_reganexec(
-        &self->re,             /* the compiled pattern */
-        subject+start_offset, /* the subject string */
-        subject_size,         /* the length of the subject */
+        &self->re,          /* the compiled pattern */
+        subject_str,        /* the subject string */
+        subject_str_size,   /* the length of the subject */
         &self->amatch,
         self->aparams,
-        eflags);             /* 0 = default eflags */
+        eflags);            /* 0 = default eflags */
 
     if(rc == 0)
     {
         regmatch_t *pmatch = self->amatch.pmatch;
         SQInteger start_pos = pmatch[0].rm_so, end_pos = pmatch[0].rm_eo;
-        if(start_pos == end_pos) sq_pushinteger(v, start_pos); //empty match return it's position
-        else sq_pushstring(v, subject + start_pos, end_pos - start_pos);
+        if(start_pos == end_pos) sq_pushinteger(v, start_pos + start_offset); //empty match return it's position
+        else sq_pushstring(v, subject_str + start_pos, end_pos - start_pos);
         return 1;
     }
     if(rc == REG_ESPACE) //only no matching errore
@@ -321,28 +301,22 @@ static SQRESULT sq_tre_gmatch(HSQUIRRELVM v)
     SQ_FUNC_VARS(v);
     GET_tre_INSTANCE();
     SQ_GET_STRING(v, 2, subject);
-    SQ_OPT_INTEGER(v, 4, start_offset, 0);
-    SQ_OPT_INTEGER(v, 5, eflags, 0);
-    SQ_OPT_INTEGER(v, 6, max_len, 0);
+    GET_EXTRA_MATCH_PARAMS(4);
 
     SQInteger rc;
     bool isFirst = true;
 
-    if(max_len)
-    {
-        subject_size = calc_new_size_by_max_len(start_offset, max_len, subject_size);
-    }
 
     regmatch_t *pmatch = self->amatch.pmatch;
     SQInteger nmatch = self->amatch.nmatch;
 
     while( (rc = dltre_reganexec(
         &self->re,             /* the compiled pattern */
-        subject+start_offset, /* the subject string */
-        subject_size,         /* the length of the subject */
+        subject_str,           /* the subject string */
+        subject_str_size,      /* the length of the subject */
         &self->amatch,
         self->aparams,
-        eflags)) == 0)           /* use default match context */
+        eflags)) == 0)         /* use default match context */
     {
         if(isFirst)
         {
@@ -350,13 +324,13 @@ static SQRESULT sq_tre_gmatch(HSQUIRRELVM v)
             isFirst = false;
         }
         sq_pushroottable(v); //this
-        SQInteger start_pos, end_pos, i = 0,
+        SQInteger start_pos, end_pos = 0, i = 0,
             param_count = 1; //root table already on the stack
         for(;i < nmatch; i++) {
             start_pos = pmatch[i].rm_so;
             end_pos = pmatch[i].rm_eo;
-            if(start_pos == end_pos) sq_pushinteger(v, start_pos); //empty match return it's position
-            else sq_pushstring(v, subject + start_offset + start_pos, end_pos - start_pos);
+            if(start_pos == end_pos) sq_pushinteger(v, start_pos + start_offset); //empty match return it's position
+            else sq_pushstring(v, subject_str + start_pos, end_pos - start_pos);
             ++param_count;
         }
         i = sq_call(v, param_count, SQTrue, SQTrue);
@@ -370,7 +344,11 @@ static SQRESULT sq_tre_gmatch(HSQUIRRELVM v)
 
         if(!keep_matching) break;
 
-        start_offset += pmatch[nmatch-1].rm_eo; //the last match + 1
+        end_pos = pmatch[0].rm_eo;
+        subject_str_size -= end_pos;
+        if(subject_str_size <= 0) break;
+        start_offset += end_pos;
+        subject_str += end_pos; //the last match + 1
     }
     if(rc == REG_ESPACE) //only no matching errore
     {
@@ -385,20 +363,15 @@ static SQRESULT sq_tre_gsub(HSQUIRRELVM v)
 	SQ_FUNC_VARS(v);
     GET_tre_INSTANCE();
     SQ_GET_STRING(v, 2, subject);
-    SQ_OPT_INTEGER(v, 4, start_offset, 0);
-    SQ_OPT_INTEGER(v, 5, eflags, 0);
-    SQ_OPT_INTEGER(v, 6, max_len, 0);
-
-    if(max_len)
-    {
-        subject_size = calc_new_size_by_max_len(start_offset, max_len, subject_size);
-    }
+    SQ_OPT_INTEGER(v, 4, eflags, 0);
+    SQ_OPT_INTEGER(v, 5, max_sub, 0);
+    if(max_sub < 0) return sq_throwerror(v, _SC("max substitutions can't be less than zero"));
 
 	SQBlob blob(0,8192);
 	const int replacement_idx = 3;
 	SQObjectType ptype = sq_gettype(v, replacement_idx);
     const SQChar *replacement;
-    SQInteger replacement_size;
+    SQInteger replacement_size, start_offset=0;
 
     SQInteger rc;
     bool isFirst = true;
@@ -410,15 +383,15 @@ static SQRESULT sq_tre_gsub(HSQUIRRELVM v)
     }
 
     while( (rc = dltre_reganexec(
-        &self->re,             /* the compiled pattern */
-        subject+start_offset, /* the subject string */
-        subject_size,         /* the length of the subject */
+        &self->re,                  /* the compiled pattern */
+        subject,                /* the subject string */
+        subject_size,  /* the length of the subject */
         &self->amatch,
         self->aparams,
-        eflags)) == 0)           /* use default match context */
+        eflags)) == 0)              /* use default match context */
     {
         SQInteger i, start_pos, end_pos;
-	    blob.Write(subject+start_offset, pmatch[0].rm_so);
+	    blob.Write(subject, pmatch[0].rm_so);
 	    switch(ptype){
 	        case OT_CLOSURE:{
                 if(isFirst)
@@ -431,8 +404,8 @@ static SQRESULT sq_tre_gsub(HSQUIRRELVM v)
                 for(i=0; i < nmatch; i++) {
                     start_pos = pmatch[i].rm_so;
                     end_pos = pmatch[i].rm_eo;
-                    if(start_pos == end_pos) sq_pushinteger(v, start_pos);
-                    else sq_pushstring(v, subject + start_offset + start_pos, end_pos - start_pos);
+                    if(start_pos == end_pos) sq_pushinteger(v, start_pos + start_offset);
+                    else sq_pushstring(v, subject + start_pos, end_pos - start_pos);
                     ++param_count;
                 }
                 i = sq_call(v, param_count, SQTrue, SQTrue);
@@ -461,7 +434,7 @@ static SQRESULT sq_tre_gsub(HSQUIRRELVM v)
                 for(i=0; i < nmatch; i++) {
                     start_pos = pmatch[i].rm_so;
                     end_pos = pmatch[i].rm_eo;
-                    sq_pushstring(v, subject + start_offset + start_pos, end_pos - start_pos);
+                    sq_pushstring(v, subject + start_pos, end_pos - start_pos);
                     if(SQ_SUCCEEDED(sq_get(v, replacement_idx)) &&
                             SQ_SUCCEEDED(sq_getstr_and_size(v, -1, &replacement, &replacement_size))){
                         blob.Write(replacement, replacement_size);
@@ -485,7 +458,7 @@ static SQRESULT sq_tre_gsub(HSQUIRRELVM v)
                                 {
                                     start_pos = pmatch[j].rm_so;
                                     end_pos = pmatch[j].rm_eo;
-                                    blob.Write(subject+start_offset+start_pos, end_pos-start_pos);
+                                    blob.Write(subject + start_pos, end_pos-start_pos);
                                     break;
                                 }
                                 ++match_idx;
@@ -518,7 +491,14 @@ static SQRESULT sq_tre_gsub(HSQUIRRELVM v)
 	        default:
                 return sq_throwerror(v, _SC("gsub only works with closure, array, table for replacement"));
 	    }
-		start_offset += pmatch[nmatch-1].rm_eo; //the last match + 1
+        end_pos = pmatch[0].rm_eo;
+        subject_size -= end_pos;
+        if(subject_size <= 0) break;
+        subject += end_pos; //the last match + 1
+        if(max_sub)
+        {
+            if(--max_sub == 0) break;
+        }
 	}
 
     if(rc == REG_ESPACE) //only no matching errore
@@ -526,7 +506,7 @@ static SQRESULT sq_tre_gsub(HSQUIRRELVM v)
         return sq_throwerror(v, _SC("tre_match error %d"), (int)rc);
     }
 
-    if(subject_size) blob.Write(subject+start_offset, subject_size-start_offset);
+    if(subject_size > 0) blob.Write(subject+start_offset, subject_size-start_offset);
 	sq_pushstring(v, (const SQChar *)blob.GetBuf(), blob.Len());
 	return 1;
 }
@@ -659,7 +639,7 @@ static SQRegFunction sq_tre_methods[] =
 	_DECL_FUNC(exec,-3,_SC("xsannn")),
 	_DECL_FUNC(match,-2,_SC("xsnnn")),
 	_DECL_FUNC(gmatch,-3,_SC("xscnnn")),
-	_DECL_FUNC(gsub,-3,_SC("xs s|c|a|t nnn")),
+	_DECL_FUNC(gsub,-3,_SC("xs s|c|a|t nn")),
 	_DECL_FUNC(_typeof,1,_SC("x")),
     _DECL_FUNC(version,1,_SC(".")),
 	_DECL_FUNC(have_approx,1,_SC("x")),

+ 1 - 1
SquiLu/include/sqstdstring.h

@@ -16,7 +16,7 @@ typedef struct {
 
 SQUIRREL_API SQRex *sqstd_rex_compile(const SQChar *pattern,const SQChar **error);
 SQUIRREL_API void sqstd_rex_free(SQRex *exp);
-SQUIRREL_API SQBool sqstd_rex_match(SQRex* exp,const SQChar* text);
+SQUIRREL_API SQBool sqstd_rex_match(SQRex* exp,const SQChar* text, SQInteger text_size);
 SQUIRREL_API SQBool sqstd_rex_search(SQRex* exp,const SQChar* text, const SQChar** out_begin, const SQChar** out_end);
 SQUIRREL_API SQBool sqstd_rex_searchrange(SQRex* exp,const SQChar* text_begin,const SQChar* text_end,const SQChar** out_begin, const SQChar** out_end);
 SQUIRREL_API SQInteger sqstd_rex_getsubexpcount(SQRex* exp);

+ 43 - 23
SquiLu/sqstdlib/sqstdrex.cpp

@@ -38,6 +38,7 @@ static const SQChar *g_nnames[] =
 #define OP_BOL			(MAX_CHAR+12)
 #define OP_WB			(MAX_CHAR+13)
 #define OP_MB           (MAX_CHAR+14) //match balanced
+#define OP_EMPTY        (MAX_CHAR+15) //match position
 
 #define SQREX_SYMBOL_ANY_CHAR ('.')
 #define SQREX_SYMBOL_GREEDY_ONE_OR_MORE ('+')
@@ -81,7 +82,7 @@ static SQInteger sqstd_rex_newnode(SQRex *exp, SQRexNodeType type)
 	SQRexNode n;
 	n.type = type;
 	n.next = n.right = n.left = -1;
-	if(type == OP_EXPR)
+	if((type == OP_EXPR) || (type == OP_EMPTY))
 		n.right = exp->_nsubexpr++;
 	if(exp->_nallocated < (exp->_nsize + 1)) {
 		SQInteger oldsize = exp->_nallocated;
@@ -172,7 +173,8 @@ static SQInteger sqstd_rex_charnode(SQRex *exp,SQBool isclass)
 				return sqstd_rex_newnode(exp,t);
 		}
 	}
-	else if(!scisprint(*exp->_p)) {
+	//else if(!scisprint(*exp->_p)) {
+	else if(((SQUChar)*exp->_p) < ' ') {
 
 		sqstd_rex_error(exp,_SC("letter expected"));
 	}
@@ -254,14 +256,26 @@ static SQInteger sqstd_rex_element(SQRex *exp)
 			sqstd_rex_expect(exp,':');
 			expr = sqstd_rex_newnode(exp,OP_NOCAPEXPR);
 		}
+		else if(*exp->_p ==')')
+        {
+            exp->_p++;
+			expr = sqstd_rex_newnode(exp,OP_EMPTY);
+			if(*exp->_p !='\0')
+            {
+                SQInteger newn = sqstd_rex_list(exp);
+                exp->_nodes[expr].next = newn;
+            }
+            ret = expr;
+            break;
+        }
 		else
 			expr = sqstd_rex_newnode(exp,OP_EXPR);
-		SQInteger newn = sqstd_rex_list(exp);
-		exp->_nodes[expr].left = newn;
-		ret = expr;
-		sqstd_rex_expect(exp,')');
-			  }
-			  break;
+            SQInteger newn = sqstd_rex_list(exp);
+            exp->_nodes[expr].left = newn;
+            ret = expr;
+            sqstd_rex_expect(exp,')');
+        }
+        break;
 	case '[':
 		exp->_p++;
 		ret = sqstd_rex_class(exp);
@@ -287,19 +301,19 @@ static SQInteger sqstd_rex_element(SQRex *exp)
 			p0 = (unsigned short)sqstd_rex_parsenumber(exp);
 			/*******************************/
 			switch(*exp->_p) {
-		case '}':
-			p1 = p0; exp->_p++;
-			break;
-		case ',':
-			exp->_p++;
-			p1 = 0xFFFF;
-			if(isdigit(*exp->_p)){
-				p1 = (unsigned short)sqstd_rex_parsenumber(exp);
-			}
-			sqstd_rex_expect(exp,'}');
-			break;
-		default:
-			sqstd_rex_error(exp,_SC(", or } expected"));
+                case '}':
+                    p1 = p0; exp->_p++;
+                    break;
+                case ',':
+                    exp->_p++;
+                    p1 = 0xFFFF;
+                    if(isdigit(*exp->_p)){
+                        p1 = (unsigned short)sqstd_rex_parsenumber(exp);
+                    }
+                    sqstd_rex_expect(exp,'}');
+                    break;
+                default:
+                    sqstd_rex_error(exp,_SC(", or } expected"));
 			}
 			/*******************************/
 			isgreedy = SQTrue;
@@ -466,6 +480,7 @@ static const SQChar *sqstd_rex_matchnode(SQRex* exp,SQRexNode *node,const SQChar
 			return NULL;
 			break;
 	}
+	case OP_EMPTY: //zero length capture
 	case OP_EXPR:
 	case OP_NOCAPEXPR:{
 			SQRexNode *n = &exp->_nodes[node->left];
@@ -475,6 +490,11 @@ static const SQChar *sqstd_rex_matchnode(SQRex* exp,SQRexNode *node,const SQChar
 				capture = exp->_currsubexp;
 				exp->_matches[capture].begin = cur;
 				exp->_currsubexp++;
+				if(type == OP_EMPTY)
+                {
+                    exp->_matches[capture].len = -1;
+                    return cur;
+                }
 			}
 			SQInteger tempcap = exp->_currsubexp;
 			do {
@@ -614,11 +634,11 @@ void sqstd_rex_free(SQRex *exp)
 	}
 }
 
-SQBool sqstd_rex_match(SQRex* exp,const SQChar* text)
+SQBool sqstd_rex_match(SQRex* exp,const SQChar* text, SQInteger text_size)
 {
 	const SQChar* res = NULL;
 	exp->_bol = text;
-	exp->_eol = text + scstrlen(text);
+	exp->_eol = text + ((text_size < 0) ? scstrlen(text) : text_size);
 	exp->_currsubexp = 0;
 	res = sqstd_rex_matchnode(exp,exp->_nodes,text,NULL);
 	if(res == NULL || res != exp->_eol)

+ 66 - 29
SquiLu/sqstdlib/sqstdstring.cpp

@@ -329,43 +329,65 @@ static SQRESULT _rexobj_releasehook(SQUserPointer p, SQInteger /*size*/, void */
 	return 1;
 }
 
+#define GET_EXTRA_MATCH_PARAMS(stack_pos) \
+    SQ_OPT_INTEGER(v, stack_pos, start_offset, 0); \
+    SQ_OPT_INTEGER(v, stack_pos+1, max_len, 0); \
+    \
+    if(start_offset < 0) return sq_throwerror(v, _SC("start_offset can't be negative")); \
+    SQInteger subject_str_size = subject_size - start_offset; \
+    if(subject_str_size < 0) return sq_throwerror(v, _SC("str size - start_offset can't be negative")); \
+    const SQChar *subject_str = subject + start_offset; \
+    if(max_len && (max_len < subject_str_size)) subject_str_size = max_len;
 static SQRESULT _regexp_match(HSQUIRRELVM v)
 {
+    SQ_FUNC_VARS(v);
 	SETUP_REX(v);
-	const SQChar *str;
-	sq_getstring(v,2,&str);
-	if(sqstd_rex_match(self,str) == SQTrue)
-	{
-		sq_pushbool(v,SQTrue);
-		return 1;
+	SQ_GET_STRING(v, 2, subject);
+	GET_EXTRA_MATCH_PARAMS(3);
+
+	const SQChar *begin,*end;
+	if(sqstd_rex_searchrange(self,subject_str, subject_str + subject_str_size,&begin,&end)){
+	    SQInteger n = sqstd_rex_getsubexpcount(self);
+	    SQRexMatch match;
+        sqstd_rex_getsubexp(self, (n ? 1 : 0),&match);
+        if(match.len == -1) sq_pushinteger(v, match.begin - subject); //on empty capture push position
+        else sq_pushstring(v, match.begin, match.len);
 	}
-	sq_pushbool(v,SQFalse);
+	else sq_pushnull(v);
 	return 1;
 }
 
 static SQRESULT _regexp_gmatch(HSQUIRRELVM v)
 {
+    SQ_FUNC_VARS(v);
 	SETUP_REX(v);
-	const SQChar *str;
-	SQInteger str_size;
-	sq_getstring(v,2,&str);
-	str_size = sq_getsize(v, 2);
+	SQ_GET_STRING(v, 2, subject);
+	GET_EXTRA_MATCH_PARAMS(4);
+
+    bool isFirst = true;
 	const SQChar *begin,*end;
-	while(sqstd_rex_searchrange(self,str, str+str_size,&begin,&end)){
+	while(sqstd_rex_searchrange(self,subject_str, subject_str + subject_str_size,&begin,&end)){
 	    SQInteger n = sqstd_rex_getsubexpcount(self);
 	    SQRexMatch match;
+        if(isFirst)
+        {
+            sq_push(v, 3); //push the function
+            isFirst = false;
+        }
 	    sq_pushroottable(v); //this
 	    SQInteger i = 0;
 	    for(;i < n; i++) {
             sqstd_rex_getsubexp(self,i,&match);
             if(i > 0){ //skip whole match
-                sq_pushstring(v, match.begin, match.len);
+                if(match.len == -1) sq_pushinteger(v, match.begin - subject); //on empty capture push position
+                else sq_pushstring(v, match.begin, match.len);
             }
 		}
 		i = sq_call(v, n, SQFalse, SQTrue);
 		if(i < 0) return i;
-		str_size -= end-str;
-		str = end;
+		subject_str_size -= end-subject_str;
+		//if(subject_str_size <= 0) break;
+		subject_str = end;
 	}
 	sq_pushbool(v,SQFalse);
 	return 1;
@@ -374,28 +396,40 @@ static SQRESULT _regexp_gmatch(HSQUIRRELVM v)
 #include "sqstdblobimpl.h"
 static SQRESULT _regexp_gsub(HSQUIRRELVM v)
 {
+    SQ_FUNC_VARS(v);
 	SETUP_REX(v);
-	const SQChar *str;
-	SQInteger str_size;
-	sq_getstring(v,2,&str);
-	str_size = sq_getsize(v, 2);
+	SQ_GET_STRING(v, 2, subject);
+    SQ_OPT_INTEGER(v, 4, max_sub, 0);
+    if(max_sub < 0) return sq_throwerror(v, _SC("max substitutions can't be less than zero"));
+
+    bool isFirst = true;
 	const SQChar *begin,*end;
 	SQBlob blob(0,8192);
 	SQObjectType ptype = sq_gettype(v, 3);
     const SQChar *replacement;
     SQInteger replacement_size;
-	while(sqstd_rex_searchrange(self,str, str+str_size,&begin,&end)){
-	    blob.Write(str, begin-str);
+    if(ptype == OT_STRING)
+    {
+        sq_getstr_and_size(v, 3, &replacement, &replacement_size);
+    }
+	while(sqstd_rex_searchrange(self,subject, subject+subject_size,&begin,&end)){
+	    blob.Write(subject, begin-subject);
 	    SQInteger n = sqstd_rex_getsubexpcount(self);
 	    SQRexMatch match;
         SQInteger i;
 	    switch(ptype){
 	        case OT_CLOSURE:{
+                if(isFirst)
+                {
+                    sq_push(v, 3); //push the function
+                    isFirst = false;
+                }
                 sq_pushroottable(v); //this
                 for(i=0; i < n; i++) {
                     sqstd_rex_getsubexp(self,i,&match);
                     if(i > 0){ //skip whole match
-                        sq_pushstring(v, match.begin, match.len);
+                        if(match.len == 0) sq_pushinteger(v, match.begin - subject); //on empty capture push position
+                        else sq_pushstring(v, match.begin, match.len);
                     }
                 }
                 i = sq_call(v, n, SQTrue, SQTrue);
@@ -437,7 +471,6 @@ static SQRESULT _regexp_gsub(HSQUIRRELVM v)
 	        }
 	        break;
 	        case OT_STRING:{
-	            sq_getstr_and_size(v, -1, &replacement, &replacement_size);
 
                 for(i=0; i < replacement_size; i++) {
                     SQInteger c = replacement[i];
@@ -481,10 +514,14 @@ static SQRESULT _regexp_gsub(HSQUIRRELVM v)
 	        default:
                 return sq_throwerror(v, _SC("gsub only works with closure, array, table for replacement"));
 	    }
-		str_size -= end-str;
-		str = end;
+		subject_size -= end-subject;
+		subject = end;
+        if(max_sub)
+        {
+            if(--max_sub == 0) break;
+        }
 	}
-    if(str_size) blob.Write(str, str_size);
+    if(subject_size > 0) blob.Write(subject, subject_size);
 	sq_pushstring(v, (const SQChar *)blob.GetBuf(), blob.Len());
 	return 1;
 }
@@ -601,9 +638,9 @@ static SQRESULT _regexp__typeof(HSQUIRRELVM v)
 static SQRegFunction rexobj_funcs[]={
 	_DECL_REX_FUNC(constructor,2,_SC(".s")),
 	_DECL_REX_FUNC(search,-2,_SC("xsn")),
-	_DECL_REX_FUNC(match,2,_SC("xs")),
-	_DECL_REX_FUNC(gmatch,3,_SC("xsc")),
-	_DECL_REX_FUNC(gsub,3,_SC("xs s|c|a|t")),
+	_DECL_REX_FUNC(match,-2,_SC("xsnn")),
+	_DECL_REX_FUNC(gmatch,-3,_SC("xscnn")),
+	_DECL_REX_FUNC(gsub,-3,_SC("xs s|c|a|t n")),
 	_DECL_REX_FUNC(capture,-2,_SC("xsn")),
 	_DECL_REX_FUNC(xcapture,-2,_SC("xsn")),
 	_DECL_REX_FUNC(getxcapture,4,_SC("xsna")),