浏览代码

new implementation for '*' in patterns + new option '+'

Roberto Ierusalimschy 26 年之前
父节点
当前提交
7808ea3a5f
共有 3 个文件被更改,包括 156 次插入113 次删除
  1. 14 12
      liolib.c
  2. 139 99
      lstrlib.c
  3. 3 2
      lualib.h

+ 14 - 12
liolib.c

@@ -1,5 +1,5 @@
 /*
-** $Id: liolib.c,v 1.37 1999/04/05 19:47:05 roberto Exp roberto $
+** $Id: liolib.c,v 1.38 1999/04/14 20:40:32 roberto Exp $
 ** Standard I/O (and system) library
 ** See Copyright Notice in lua.h
 */
@@ -244,23 +244,25 @@ static int read_pattern (FILE *f, char *p) {
         p++;
         continue;
       default: {
-        char *ep;  /* get what is next */
+        char *ep = luaI_classend(p);  /* get what is next */
         int m;  /* match result */
         if (c == NEED_OTHER) c = getc(f);
-        if (c != EOF)
-          m = luaI_singlematch(c, p, &ep);
-        else {
-          luaI_singlematch(0, p, &ep);  /* to set "ep" */
-          m = 0;  /* EOF matches no pattern */
-        }
+        m = (c==EOF) ? 0 : luaI_singlematch(c, p, ep);
         if (m) {
           if (!inskip) luaL_addchar(c);
           c = NEED_OTHER;
         }
         switch (*ep) {
-          case '*':  /* repetition */
-            if (!m) p = ep+1;  /* else stay in (repeat) the same item */
-            continue;
+          case '+':  /* repetition (1 or more) */
+            if (!m) goto break_while;  /* pattern fails? */
+            /* else go through */
+          case '*':  /* repetition (0 or more) */
+            while (m) {  /* reads the same item until it fails */
+              c = getc(f);
+              m = (c==EOF) ? 0 : luaI_singlematch(c, p, ep);
+              if (m && !inskip) luaL_addchar(c);
+            }
+            /* go through to continue reading the pattern */
           case '?':  /* optional */
             p = ep+1;  /* continues reading the pattern */
             continue;
@@ -336,7 +338,7 @@ static void io_read (void) {
         success = 1; /* always success */
         break;
       case 4:  /* word */
-        success = read_pattern(f, "{%s*}%S%S*");
+        success = read_pattern(f, "{%s*}%S+");
         break;
       default:
         success = read_pattern(f, p);

+ 139 - 99
lstrlib.c

@@ -1,5 +1,5 @@
 /*
-** $Id: lstrlib.c,v 1.28 1999/02/26 15:49:53 roberto Exp roberto $
+** $Id: lstrlib.c,v 1.29 1999/04/30 14:12:05 roberto Exp roberto $
 ** Standard library for strings and pattern-matching
 ** See Copyright Notice in lua.h
 */
@@ -130,7 +130,7 @@ struct Capture {
 
 
 #define ESC	'%'
-#define SPECIALS  "^$*?.([%-"
+#define SPECIALS  "^$*+?.([%-"
 
 
 static void push_captures (struct Capture *cap) {
@@ -160,8 +160,21 @@ static int capture_to_close (struct Capture *cap) {
 }
 
 
-static char *bracket_end (char *p) {
-  return (*p == 0) ? NULL : strchr((*p=='^') ? p+2 : p+1, ']');
+char *luaI_classend (char *p) {
+  switch (*p++) {
+    case ESC:
+      if (*p == '\0')
+        luaL_verror("incorrect pattern (ends with `%c')", ESC);
+      return p+1;
+    case '[':
+      if (*p == '^') p++;
+      if (*p == ']') p++;
+      p = strchr(p, ']');
+      if (!p) lua_error("incorrect pattern (missing `]')");
+      return p+1;
+    default:
+      return p;
+  }
 }
 
 
@@ -184,48 +197,55 @@ static int matchclass (int c, int cl) {
 }
 
 
-int luaI_singlematch (int c, char *p, char **ep) {
+
+static int matchbracketclass (int c, char *p, char *end) {
+  int sig = 1;
+  if (*(p+1) == '^') {
+    sig = 0;
+    p++;  /* skip the '^' */
+  }
+  while (++p < end) {
+    if (*p == ESC) {
+      p++;
+      if ((p < end) && matchclass(c, (unsigned char)*p))
+        return sig;
+    }
+    else if ((*(p+1) == '-') && (p+2 < end)) {
+      p+=2;
+      if ((int)(unsigned char)*(p-2) <= c && c <= (int)(unsigned char)*p)
+        return sig;
+    }
+    else if ((unsigned char)*p == c) return sig;
+  }
+  return !sig;
+}
+
+
+
+int luaI_singlematch (int c, char *p, char *ep) {
   switch (*p) {
     case '.':  /* matches any char */
-      *ep = p+1;
       return 1;
-    case '\0':  /* end of pattern; matches nothing */
-      *ep = p;
-      return 0;
     case ESC:
-      if (*(++p) == '\0')
-        luaL_verror("incorrect pattern (ends with `%c')", ESC);
-      *ep = p+1;
-      return matchclass(c, (unsigned char)*p);
-    case '[': {
-      char *end = bracket_end(p+1);
-      int sig = *(p+1) == '^' ? (p++, 0) : 1;
-      if (end == NULL) lua_error("incorrect pattern (missing `]')");
-      *ep = end+1;
-      while (++p < end) {
-        if (*p == ESC) {
-          if (((p+1) < end) && matchclass(c, (unsigned char)*++p))
-            return sig;
-        }
-        else if ((*(p+1) == '-') && (p+2 < end)) {
-          p+=2;
-          if ((int)(unsigned char)*(p-2) <= c && c <= (int)(unsigned char)*p)
-            return sig;
-        }
-        else if ((unsigned char)*p == c) return sig;
-      }
-      return !sig;
-    }
+      return matchclass(c, (unsigned char)*(p+1));
+    case '[':
+      return matchbracketclass(c, p, ep-1);
     default:
-      *ep = p+1;
       return ((unsigned char)*p == c);
   }
 }
 
 
-static char *matchbalance (char *s, int b, int e, struct Capture *cap) {
-  if (*s != b) return NULL;
+static char *match (char *s, char *p, struct Capture *cap);
+
+
+static char *matchbalance (char *s, char *p, struct Capture *cap) {
+  if (*p == 0 || *(p+1) == 0)
+    lua_error("unbalanced pattern");
+  if (*s != *p) return NULL;
   else {
+    int b = *p;
+    int e = *(p+1);
     int cont = 1;
     while (++s < cap->src_end) {
       if (*s == e) {
@@ -238,89 +258,109 @@ static char *matchbalance (char *s, int b, int e, struct Capture *cap) {
 }
 
 
-static char *matchitem (char *s, char *p, struct Capture *cap, char **ep) {
-  if (*p == ESC) {
-    p++;
-    if (isdigit((unsigned char)*p)) {  /* capture */
-      int l = check_cap(*p, cap);
-      int len = cap->capture[l].len;
-      *ep = p+1;
-      if (cap->src_end-s >= len && memcmp(cap->capture[l].init, s, len) == 0)
-        return s+len;
-      else return NULL;
-    }
-    else if (*p == 'b') {  /* balanced string */
-      p++;
-      if (*p == 0 || *(p+1) == 0)
-        lua_error("unbalanced pattern");
-      *ep = p+2;
-      return matchbalance(s, *p, *(p+1), cap);
-    }
-    else p--;  /* and go through */
+static char *max_expand (char *s, char *p, char *ep, struct Capture *cap) {
+  int i = 0;  /* counts maximum expand for item */
+  while ((s+i)<cap->src_end && luaI_singlematch((unsigned char)*(s+i), p, ep))
+    i++;
+  /* keeps trying to match mith the maximum repetitions */
+  while (i>=0) {
+    char *res = match((s+i), ep+1, cap);
+    if (res) return res;
+    i--;  /* else didn't match; reduce 1 repetition to try again */
   }
-  /* "luaI_singlematch" sets "ep" (so must be called even at the end of "s" */
-  return (luaI_singlematch((unsigned char)*s, p, ep) && s<cap->src_end) ?
-                    s+1 : NULL;
+  return NULL;
+}
+
+
+static char *min_expand (char *s, char *p, char *ep, struct Capture *cap) {
+  for (;;) {
+    char *res = match(s, ep+1, cap);
+    if (res != NULL)
+      return res;
+    else if (s<cap->src_end && luaI_singlematch((unsigned char)*s, p, ep))
+      s++;  /* try with one more repetition */
+    else return NULL;
+  }
+}
+
+
+static char *start_capt (char *s, char *p, struct Capture *cap) {
+  char *res;
+  int level = cap->level;
+  if (level >= MAX_CAPT) lua_error("too many captures");
+  cap->capture[level].init = s;
+  cap->capture[level].len = -1;
+  cap->level = level+1;
+  if ((res=match(s, p+1, cap)) == NULL)  /* match failed? */
+    cap->level--;  /* undo capture */
+  return res;
+}
+
+
+static char *end_capt (char *s, char *p, struct Capture *cap) {
+  int l = capture_to_close(cap);
+  char *res;
+  cap->capture[l].len = s - cap->capture[l].init;  /* close capture */
+  if ((res = match(s, p+1, cap)) == NULL)  /* match failed? */
+    cap->capture[l].len = -1;  /* undo capture */
+  return res;
+}
+
+
+static char *match_capture (char *s, int level, struct Capture *cap) {
+  int l = check_cap(level, cap);
+  int len = cap->capture[l].len;
+  if (cap->src_end-s >= len &&
+      memcmp(cap->capture[l].init, s, len) == 0)
+    return s+len;
+  else return NULL;
 }
 
 
 static char *match (char *s, char *p, struct Capture *cap) {
   init: /* using goto's to optimize tail recursion */
   switch (*p) {
-    case '(': {  /* start capture */
-      char *res;
-      if (cap->level >= MAX_CAPT) lua_error("too many captures");
-      cap->capture[cap->level].init = s;
-      cap->capture[cap->level].len = -1;
-      cap->level++;
-      if ((res=match(s, p+1, cap)) == NULL)  /* match failed? */
-        cap->level--;  /* undo capture */
-      return res;
-    }
-    case ')': {  /* end capture */
-      int l = capture_to_close(cap);
-      char *res;
-      cap->capture[l].len = s - cap->capture[l].init;  /* close capture */
-      if ((res = match(s, p+1, cap)) == NULL)  /* match failed? */
-        cap->capture[l].len = -1;  /* undo capture */
-      return res;
-    }
+    case '(':  /* start capture */
+      return start_capt(s, p, cap);
+    case ')':  /* end capture */
+      return end_capt(s, p, cap);
+    case ESC:  /* may be %[0-9] or %b */
+      if (isdigit((unsigned char)(*(p+1)))) {  /* capture? */
+        s = match_capture(s, *(p+1), cap);
+        if (s == NULL) return NULL;
+        p+=2; goto init;  /* else return match(p+2, s, cap) */
+      }
+      else if (*(p+1) == 'b') {  /* balanced string? */
+        s = matchbalance(s, p+2, cap);
+        if (s == NULL) return NULL;
+        p+=4; goto init;  /* else return match(p+4, s, cap); */
+      }
+      else goto dflt;  /* case default */
     case '\0':  /* end of pattern */
       return s;  /* match succeeded */
     case '$':
       if (*(p+1) == '\0')  /* is the '$' the last char in pattern? */
         return (s == cap->src_end) ? s : NULL;  /* check end of string */
-      /* else is a regular '$'; go through */
-    default: {  /* it is a pattern item */
-      char *ep;  /* will point to what is next */
-      char *s1 = matchitem(s, p, cap, &ep);
+      else goto dflt;
+    default: dflt: {  /* it is a pattern item */
+      char *ep = luaI_classend(p);  /* points to what is next */
+      int m = s<cap->src_end && luaI_singlematch((unsigned char)*s, p, ep);
       switch (*ep) {
-        case '*': {  /* repetition */
-          char *res;
-          if (s1 && s1>s && ((res=match(s1, p, cap)) != NULL))
-            return res;
-          p=ep+1; goto init;  /* else return match(s, ep+1, cap); */
-        }
         case '?': {  /* optional */
           char *res;
-          if (s1 && ((res=match(s1, ep+1, cap)) != NULL))
+          if (m && ((res=match(s+1, ep+1, cap)) != NULL))
             return res;
           p=ep+1; goto init;  /* else return match(s, ep+1, cap); */
         }
-        case '-': {  /* repetition */
-          char *res;
-          if ((res = match(s, ep+1, cap)) != NULL)
-            return res;
-          else if (s1 && s1>s) {
-            s = s1;
-            goto init;  /* return match(s1, p, cap); */
-          }
-          else
-            return NULL;
-        }
+        case '*':  /* 0 or more repetitions */
+          return max_expand(s, p, ep, cap);
+        case '+':  /* 1 or more repetitions */
+          return (m ? max_expand(s+1, p, ep, cap) : NULL);
+        case '-':  /* 0 or more repetitions (minimum) */
+          return min_expand(s, p, ep, cap);
         default:
-          if (s1) { s=s1; p=ep; goto init; }  /* return match(s1, ep, cap); */
-          else return NULL;
+          if (!m) return NULL;
+          s++; p=ep; goto init;  /* else return match(s+1, ep, cap); */
       }
     }
   }

+ 3 - 2
lualib.h

@@ -1,5 +1,5 @@
 /*
-** $Id: lualib.h,v 1.4 1998/06/19 16:14:09 roberto Exp roberto $
+** $Id: lualib.h,v 1.5 1999/01/08 16:47:44 roberto Exp roberto $
 ** Lua standard libraries
 ** See Copyright Notice in lua.h
 */
@@ -29,7 +29,8 @@ void lua_userinit (void);
 
 /* Auxiliary functions (private) */
 
-int luaI_singlematch (int c, char *p, char **ep);
+char *luaI_classend (char *p);
+int luaI_singlematch (int c, char *p, char *ep);
 
 #endif