Преглед на файлове

Standard library for strings and pattern-matching

Roberto Ierusalimschy преди 28 години
родител
ревизия
75ac0d2172
променени са 2 файла, в които са добавени 127 реда и са изтрити 147 реда
  1. 127 134
      lstrlib.c
  2. 0 13
      strlib.h

+ 127 - 134
strlib.c → lstrlib.c

@@ -1,17 +1,17 @@
 /*
-** strlib.c
-** String library to LUA
+** $Id: lstrlib.c,v 1.1 1997/08/14 19:47:57 roberto Exp roberto $
+** Standard library for strings and pattern-matching
+** See Copyright Notice in lua.h
 */
 
-char *rcs_strlib="$Id: strlib.c,v 1.46 1997/06/19 18:49:40 roberto Exp roberto $";
 
-#include <string.h>
+#include <ctype.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <ctype.h>
+#include <string.h>
 
+#include "lauxlib.h"
 #include "lua.h"
-#include "auxlib.h"
 #include "lualib.h"
 
 
@@ -37,12 +37,14 @@ static char *strbuffer (unsigned long size)
   return lbuffer.b;
 }
 
+
 static char *openspace (unsigned long size)
 {
   char *buff = strbuffer(lbuffer.size+size);
   return buff+lbuffer.size;
 }
 
+
 char *luaI_addchar (int c)
 {
   if (lbuffer.size >= lbuffer.max)
@@ -51,6 +53,7 @@ char *luaI_addchar (int c)
   return lbuffer.b;
 }
 
+
 void luaI_emptybuff (void)
 {
   lbuffer.size = 0;  /* prepare for next string */
@@ -64,23 +67,19 @@ static void addnchar (char *s, int n)
   lbuffer.size += n;
 }
 
+
 static void addstr (char *s)
 {
   addnchar(s, strlen(s));
 }
 
 
-/*
-** Return the string length
-*/
 static void str_len (void)
 {
  lua_pushnumber(strlen(luaL_check_string(1)));
 }
 
-/*
-** Return the substring of a string
-*/
+
 static void str_sub (void)
 {
   char *s = luaL_check_string(1);
@@ -97,9 +96,7 @@ static void str_sub (void)
   else lua_pushstring("");
 }
 
-/*
-** Convert a string to lower case.
-*/
+
 static void str_lower (void)
 {
   char *s;
@@ -109,9 +106,7 @@ static void str_lower (void)
   lua_pushstring(luaI_addchar(0));
 }
 
-/*
-** Convert a string to upper case.
-*/
+
 static void str_upper (void)
 {
   char *s;
@@ -131,9 +126,7 @@ static void str_rep (void)
   lua_pushstring(luaI_addchar(0));
 }
 
-/*
-** get ascii value of a character in a string
-*/
+
 static void str_ascii (void)
 {
   char *s = luaL_check_string(1);
@@ -143,36 +136,69 @@ static void str_ascii (void)
 }
 
 
-/* pattern matching */
+
+/*
+** =======================================================
+** PATTERN MATCHING
+** =======================================================
+*/
+
+#define MAX_CAPT 9
+
+static struct {
+  char *init;
+  int len;  /* -1 signals unfinished capture */
+} capture[MAX_CAPT];
+
+static int num_captures;  /* only valid after a sucessful call to match */
+
 
 #define ESC	'%'
 #define SPECIALS  "^$*?.([%-"
 
-static char *bracket_end (char *p)
+
+static void push_captures (void)
 {
-  return (*p == 0) ? NULL : strchr((*p=='^') ? p+2 : p+1, ']');
+  int i;
+  for (i=0; i<num_captures; i++) {
+    int l = capture[i].len;
+    char *buff = openspace(l+1);
+    if (l == -1) lua_error("unfinished capture");
+    strncpy(buff, capture[i].init, l);
+    buff[l] = 0;
+    lua_pushstring(buff);
+  }
 }
 
-char *luaL_item_end (char *p)
+
+static int check_cap (int l, int level)
 {
-  switch (*p++) {
-    case '\0': return p-1;
-    case ESC:
-      if (*p == 0) luaL_verror("incorrect pattern (ends with `%c')", ESC);
-      return p+1;
-    case '[': {
-      char *end = bracket_end(p);
-      if (end == NULL) lua_error("incorrect pattern (missing `]')");
-      return end+1;
-    }
-    default:
-      return p;
-  }
+  l -= '1';
+  if (!(0 <= l && l < level && capture[l].len != -1))
+    lua_error("invalid capture index");
+  return l;
+}
+
+
+static int capture_to_close (int level)
+{
+  for (level--; level>=0; level--)
+    if (capture[level].len == -1) return level;
+  lua_error("invalid pattern capture");
+  return 0;  /* to avoid warnings */
 }
 
+
+static char *bracket_end (char *p)
+{
+  return (*p == 0) ? NULL : strchr((*p=='^') ? p+2 : p+1, ']');
+}
+
+
 static int matchclass (int c, int cl)
 {
   int res;
+  if (c == 0) return 0;
   switch (tolower((unsigned char)cl)) {
     case 'a' : res = isalpha((unsigned char)c); break;
     case 'c' : res = iscntrl((unsigned char)c); break;
@@ -187,15 +213,27 @@ static int matchclass (int c, int cl)
   return (islower((unsigned char)cl) ? res : !res);
 }
 
-int luaL_singlematch (int c, char *p)
+
+int luaI_singlematch (int c, char *p, char **ep)
 {
-  if (c == 0) return 0;
   switch (*p) {
-    case '.': return 1;
-    case ESC: return matchclass(c, *(p+1));
+    case '\0':
+      *ep = p;
+      return 0;
+    case '.':
+      *ep = p+1;
+      return (c != 0);
+    case ESC:
+      if (*(++p) == '\0')
+        luaL_verror("incorrect pattern (ends with `%c')", ESC);
+      *ep = p+1;
+      return matchclass(c, *p);
     case '[': {
       char *end = bracket_end(p+1);
       int sig = *(p+1) == '^' ? (p++, 0) : 1;
+      if (end == NULL) lua_error("incorrect pattern (missing `]')");
+      *ep = end+1;
+      if (c == 0) return 0;
       while (++p < end) {
         if (*p == ESC) {
           if (((p+1) < end) && matchclass(c, *++p)) return sig;
@@ -208,48 +246,12 @@ int luaL_singlematch (int c, char *p)
       }
       return !sig;
     }
-    default: return (*p == c);
+    default:
+      *ep = p+1;
+      return (*p == c);
   }
 }
 
-#define MAX_CAPT 9
-
-static struct {
-  char *init;
-  int len;  /* -1 signals unfinished capture */
-} capture[MAX_CAPT];
-
-static int num_captures;  /* only valid after a sucessful call to match */
-
-
-static void push_captures (void)
-{
-  int i;
-  for (i=0; i<num_captures; i++) {
-    int l = capture[i].len;
-    char *buff = openspace(l+1);
-    if (l == -1) lua_error("unfinished capture");
-    strncpy(buff, capture[i].init, l);
-    buff[l] = 0;
-    lua_pushstring(buff);
-  }
-}
-
-static int check_cap (int l, int level)
-{
-  l -= '1';
-  if (!(0 <= l && l < level && capture[l].len != -1))
-    lua_error("invalid capture index");
-  return l;
-}
-
-static int capture_to_close (int level)
-{
-  for (level--; level>=0; level--)
-    if (capture[level].len == -1) return level;
-  lua_error("invalid pattern capture");
-  return 0;  /* to avoid warnings */
-}
 
 static char *matchbalance (char *s, int b, int e)
 {
@@ -266,6 +268,31 @@ static char *matchbalance (char *s, int b, int e)
   return NULL;  /* string ends out of balance */
 }
 
+
+static char *matchitem (char *s, char *p, int level, char **ep)
+{
+  if (*p == ESC) {
+    p++;
+    if (isdigit((unsigned char)*p)) {  /* capture */
+      int l = check_cap(*p, level);
+      *ep = p+1;
+      if (strncmp(capture[l].init, s, capture[l].len) == 0)
+        return s+capture[l].len;
+      else return NULL;
+    }
+    else if (*p == 'b') {  /* balanced string */
+      p++;
+      if (*p == 0 || *(p+1) == 0)
+        lua_error("bad balanced pattern specification");
+      *ep = p+2;
+      return matchbalance(s, *p, *(p+1));
+    }
+    else p--;  /* and go through */
+  }
+  return (luaI_singlematch(*s, p, ep) ? s+1 : NULL);
+}
+
+
 static char *match (char *s, char *p, int level)
 {
   init: /* using goto's to optimize tail recursion */
@@ -283,38 +310,19 @@ static char *match (char *s, char *p, int level)
         capture[l].len = -1;  /* undo capture */
       return res;
     }
-    case ESC:
-      if (isdigit((unsigned char)(*(p+1)))) {  /* capture */
-        int l = check_cap(*(p+1), level);
-        if (strncmp(capture[l].init, s, capture[l].len) == 0) {
-          /* return match(p+2, s+capture[l].len, level); */
-          p+=2; s+=capture[l].len; goto init;
-        }
-        else return NULL;
-      }
-      else if (*(p+1) == 'b') {  /* balanced string */
-        if (*(p+2) == 0 || *(p+3) == 0)
-          lua_error("bad balanced pattern specification");
-        s = matchbalance(s, *(p+2), *(p+3));
-        if (s == NULL) return NULL;
-        else {  /* return match(p+4, s, level); */
-          p+=4; goto init;
-        }
-      }
-      else goto dflt;
     case '\0': case '$':  /* (possibly) end of pattern */
       if (*p == 0 || (*(p+1) == 0 && *s == 0)) {
         num_captures = level;
         return s;
       }
-      else goto dflt;
-    default: dflt: {  /* it is a pattern item */
-      int m = luaL_singlematch(*s, p);
-      char *ep = luaL_item_end(p);  /* get what is next */
+      /* else go through */
+    default: {  /* it is a pattern item */
+      char *ep;  /* get what is next */
+      char *s1 = matchitem(s, p, level, &ep);
       switch (*ep) {
         case '*': {  /* repetition */
           char *res;
-          if (m && (res = match(s+1, p, level)))
+          if (s1 && (res = match(s1, p, level)))
             return res;
           p=ep+1; goto init;  /* else return match(s, ep+1, level); */
         }
@@ -322,34 +330,35 @@ static char *match (char *s, char *p, int level)
           char *res;
           if ((res = match(s, ep+1, level)) != 0)
             return res;
-          else if (m) {
-            s++;
-            goto init;  /* return match(s+1, p, level); */
+          else if (s1) {
+            s = s1;
+            goto init;  /* return match(s1, p, level); */
           }
           else
             return NULL;
         }
         case '?': {  /* optional */
           char *res;
-          if (m && (res = match(s+1, ep+1, level)))
+          if (s1 && (res = match(s1, ep+1, level)))
             return res;
           p=ep+1; goto init;  /* else return match(s, ep+1, level); */
         }
         default:
-          if (m) { s++; p=ep; goto init; }  /* return match(s+1, ep, level); */
+          if (s1) { s=s1; p=ep; goto init; }  /* return match(s1, ep, level); */
           else return NULL;
       }
     }
   }
 }
 
+
 static void str_find (void)
 {
   char *s = luaL_check_string(1);
   char *p = luaL_check_string(2);
   long init = (long)luaL_opt_number(3, 1) - 1;
   luaL_arg_check(0 <= init && init <= strlen(s), 3, "out of range");
-  if (lua_getparam(4) != LUA_NOOBJECT || 
+  if (lua_getparam(4) != LUA_NOOBJECT ||
       strpbrk(p, SPECIALS) == NULL) {  /* no special caracters? */
     char *s2 = strstr(s+init, p);
     if (s2) {
@@ -372,7 +381,8 @@ static void str_find (void)
   }
 }
 
-static void add_s (lua_Object newp, lua_Object table, int n)
+
+static void add_s (lua_Object newp)
 {
   if (lua_isstring(newp)) {
     char *news = lua_getstring(newp);
@@ -390,12 +400,8 @@ static void add_s (lua_Object newp, lua_Object table, int n)
     struct lbuff oldbuff;
     int status;
     lua_beginblock();
-    if (lua_istable(table)) {
-      lua_pushobject(table);
-      lua_pushnumber(n);
-    }
     push_captures();
-    /* function may use lbuffer, so save it and create a new one */
+    /* function may use lbuffer, so save it and create a luaM_new one */
     oldbuff = lbuffer;
     lbuffer.b = NULL; lbuffer.max = lbuffer.size = 0;
     status = lua_callfunction(newp);
@@ -411,13 +417,13 @@ static void add_s (lua_Object newp, lua_Object table, int n)
   else luaL_arg_check(0, 3, NULL);
 }
 
+
 static void str_gsub (void)
 {
   char *src = luaL_check_string(1);
   char *p = luaL_check_string(2);
   lua_Object newp = lua_getparam(3);
-  lua_Object table = lua_getparam(4);
-  int max_s = (int)luaL_opt_number(lua_istable(table)?5:4, strlen(src)+1);
+  int max_s = (int)luaL_opt_number(4, strlen(src)+1);
   int anchor = (*p == '^') ? (p++, 1) : 0;
   int n = 0;
   luaI_emptybuff();
@@ -425,7 +431,7 @@ static void str_gsub (void)
     char *e = match(src, p, 0);
     if (e) {
       n++;
-      add_s(newp, table, n);
+      add_s(newp);
     }
     if (e && e>src) /* non empty match? */
       src = e;  /* skip it */
@@ -439,18 +445,6 @@ static void str_gsub (void)
   lua_pushnumber(n);  /* number of substitutions */
 }
 
-static void str_set (void)
-{
-  char *item = luaL_check_string(1);
-  int i;
-  luaL_arg_check(*luaL_item_end(item) == 0, 1, "wrong format");
-  luaI_emptybuff();
-  for (i=1; i<256; i++)  /* 0 cannot be part of a set */
-    if (luaL_singlematch(i, item))
-      luaI_addchar(i);
-  lua_pushstring(luaI_addchar(0));
-}
-
 
 void luaI_addquoted (char *s)
 {
@@ -505,7 +499,7 @@ static void str_format (void)
         case 'u':  case 'x':  case 'X':
           sprintf(buff, form, (int)luaL_check_number(arg));
           break;
-        case 'e':  case 'E': case 'f': case 'g':
+        case 'e':  case 'E': case 'f': case 'g': case 'G':
           sprintf(buff, form, luaL_check_number(arg));
           break;
         default:  /* also treat cases 'pnLlh' */
@@ -521,7 +515,6 @@ static void str_format (void)
 static struct luaL_reg strlib[] = {
 {"strlen", str_len},
 {"strsub", str_sub},
-{"strset", str_set},
 {"strlower", str_lower},
 {"strupper", str_upper},
 {"strrep", str_rep},

+ 0 - 13
strlib.h

@@ -1,13 +0,0 @@
-/*
-** String library to LUA
-** TeCGraf - PUC-Rio
-** $Id: $
-*/
-
-
-#ifndef strlib_h
-
-void strlib_open (void);
-
-#endif
-