2
0
Эх сурвалжийг харах

Small optimizations in 'string.gsub'

Avoid creating extra strings when possible:

- avoid creating new resulting string when subject was not modified
(instead, return the subject itself);

- avoid creating strings representing the captured substrings when
handling replacements like '%1' (instead, add the substring directly
to the buffer).
Roberto Ierusalimschy 6 жил өмнө
parent
commit
b0810c51c3
3 өөрчлөгдсөн 115 нэмэгдсэн , 47 устгасан
  1. 84 46
      lstrlib.c
  2. 1 1
      testes/gc.lua
  3. 30 0
      testes/pm.lua

+ 84 - 46
lstrlib.c

@@ -660,25 +660,46 @@ static const char *lmemfind (const char *s1, size_t l1,
 }
 
 
-static void push_onecapture (MatchState *ms, int i, const char *s,
-                                                    const char *e) {
+/*
+** get information about the i-th capture. If there are no captures
+** and 'i==0', return information about the whole match, which
+** is the range 's'..'e'. If the capture is a string, return
+** its length and put its address in '*cap'. If it is an integer
+** (a position), push it on the stack and return CAP_POSITION.
+*/
+static size_t get_onecapture (MatchState *ms, int i, const char *s,
+                              const char *e, const char **cap) {
   if (i >= ms->level) {
-    if (i == 0)  /* ms->level == 0, too */
-      lua_pushlstring(ms->L, s, e - s);  /* add whole match */
-    else
+    if (i != 0)
       luaL_error(ms->L, "invalid capture index %%%d", i + 1);
+    *cap = s;
+    return e - s;
   }
   else {
-    ptrdiff_t l = ms->capture[i].len;
-    if (l == CAP_UNFINISHED) luaL_error(ms->L, "unfinished capture");
-    if (l == CAP_POSITION)
+    ptrdiff_t capl = ms->capture[i].len;
+    *cap = ms->capture[i].init;
+    if (capl == CAP_UNFINISHED)
+      luaL_error(ms->L, "unfinished capture");
+    else if (capl == CAP_POSITION)
       lua_pushinteger(ms->L, (ms->capture[i].init - ms->src_init) + 1);
-    else
-      lua_pushlstring(ms->L, ms->capture[i].init, l);
+    return capl;
   }
 }
 
 
+/*
+** Push the i-th capture on the stack.
+*/
+static void push_onecapture (MatchState *ms, int i, const char *s,
+                                                    const char *e) {
+  const char *cap;
+  ptrdiff_t l = get_onecapture(ms, i, s, e, &cap);
+  if (l != CAP_POSITION)
+    lua_pushlstring(ms->L, cap, l);
+  /* else position was already pushed */
+}
+
+
 static int push_captures (MatchState *ms, const char *s, const char *e) {
   int i;
   int nlevels = (ms->level == 0 && s) ? 1 : ms->level;
@@ -817,60 +838,72 @@ static int gmatch (lua_State *L) {
 
 static void add_s (MatchState *ms, luaL_Buffer *b, const char *s,
                                                    const char *e) {
-  size_t l, i;
+  size_t l;
   lua_State *L = ms->L;
   const char *news = lua_tolstring(L, 3, &l);
-  for (i = 0; i < l; i++) {
-    if (news[i] != L_ESC)
-      luaL_addchar(b, news[i]);
-    else {
-      i++;  /* skip ESC */
-      if (!isdigit(uchar(news[i]))) {
-        if (news[i] != L_ESC)
-          luaL_error(L, "invalid use of '%c' in replacement string", L_ESC);
-        luaL_addchar(b, news[i]);
-      }
-      else if (news[i] == '0')
-          luaL_addlstring(b, s, e - s);
-      else {
-        push_onecapture(ms, news[i] - '1', s, e);
-        luaL_tolstring(L, -1, NULL);  /* if number, convert it to string */
-        lua_remove(L, -2);  /* remove original value */
-        luaL_addvalue(b);  /* add capture to accumulated result */
-      }
+  const char *p;
+  while ((p = (char *)memchr(news, L_ESC, l)) != NULL) {
+    luaL_addlstring(b, news, p - news);
+    p++;  /* skip ESC */
+    if (*p == L_ESC)  /* '%%' */
+      luaL_addchar(b, *p);
+    else if (*p == '0')  /* '%0' */
+        luaL_addlstring(b, s, e - s);
+    else if (isdigit(uchar(*p))) {  /* '%n' */
+      const char *cap;
+      ptrdiff_t resl = get_onecapture(ms, *p - '1', s, e, &cap);
+      if (resl == CAP_POSITION)
+        luaL_addvalue(b);  /* add position to accumulated result */
+      else
+        luaL_addlstring(b, cap, resl);
     }
+    else
+      luaL_error(L, "invalid use of '%c' in replacement string", L_ESC);
+    l -= p + 1 - news;
+    news = p + 1;
   }
+  luaL_addlstring(b, news, l);
 }
 
 
-static void add_value (MatchState *ms, luaL_Buffer *b, const char *s,
-                                       const char *e, int tr) {
+/*
+** Add the replacement value to the string buffer 'b'.
+** Return true if the original string was changed. (Function calls and
+** table indexing resulting in nil or false do not change the subject.)
+*/
+static int add_value (MatchState *ms, luaL_Buffer *b, const char *s,
+                                      const char *e, int tr) {
   lua_State *L = ms->L;
   switch (tr) {
-    case LUA_TFUNCTION: {
+    case LUA_TFUNCTION: {  /* call the function */
       int n;
-      lua_pushvalue(L, 3);
-      n = push_captures(ms, s, e);
-      lua_call(L, n, 1);
+      lua_pushvalue(L, 3);  /* push the function */
+      n = push_captures(ms, s, e);  /* all captures as arguments */
+      lua_call(L, n, 1);  /* call it */
       break;
     }
-    case LUA_TTABLE: {
-      push_onecapture(ms, 0, s, e);
+    case LUA_TTABLE: {  /* index the table */
+      push_onecapture(ms, 0, s, e);  /* first capture is the index */
       lua_gettable(L, 3);
       break;
     }
     default: {  /* LUA_TNUMBER or LUA_TSTRING */
-      add_s(ms, b, s, e);
-      return;
+      add_s(ms, b, s, e);  /* add value to the buffer */
+      return 1;  /* something changed */
     }
   }
   if (!lua_toboolean(L, -1)) {  /* nil or false? */
-    lua_pop(L, 1);
-    lua_pushlstring(L, s, e - s);  /* keep original text */
+    lua_pop(L, 1);  /* remove value */
+    luaL_addlstring(b, s, e - s);  /* keep original text */
+    return 0;  /* no changes */
   }
   else if (!lua_isstring(L, -1))
-    luaL_error(L, "invalid replacement value (a %s)", luaL_typename(L, -1));
-  luaL_addvalue(b);  /* add result to accumulator */
+    return luaL_error(L, "invalid replacement value (a %s)",
+                         luaL_typename(L, -1));
+  else {
+    luaL_addvalue(b);  /* add result to accumulator */
+    return 1;  /* something changed */
+  }
 }
 
 
@@ -883,6 +916,7 @@ static int str_gsub (lua_State *L) {
   lua_Integer max_s = luaL_optinteger(L, 4, srcl + 1);  /* max replacements */
   int anchor = (*p == '^');
   lua_Integer n = 0;  /* replacement count */
+  int changed = 0;  /* change flag */
   MatchState ms;
   luaL_Buffer b;
   luaL_argexpected(L, tr == LUA_TNUMBER || tr == LUA_TSTRING ||
@@ -898,7 +932,7 @@ static int str_gsub (lua_State *L) {
     reprepstate(&ms);  /* (re)prepare state for new match */
     if ((e = match(&ms, src, p)) != NULL && e != lastmatch) {  /* match? */
       n++;
-      add_value(&ms, &b, src, e, tr);  /* add replacement to buffer */
+      changed = add_value(&ms, &b, src, e, tr) | changed;
       src = lastmatch = e;
     }
     else if (src < ms.src_end)  /* otherwise, skip one character */
@@ -906,8 +940,12 @@ static int str_gsub (lua_State *L) {
     else break;  /* end of subject */
     if (anchor) break;
   }
-  luaL_addlstring(&b, src, ms.src_end-src);
-  luaL_pushresult(&b);
+  if (!changed)  /* no changes? */
+    lua_pushvalue(L, 1);  /* return original string */
+  else {  /* something changed */
+    luaL_addlstring(&b, src, ms.src_end-src);
+    luaL_pushresult(&b);  /* create and return new string */
+  }
   lua_pushinteger(L, n);  /* number of substitutions */
   return 2;
 }

+ 1 - 1
testes/gc.lua

@@ -113,7 +113,7 @@ do
   contCreate = 0
   while contCreate <= limit do
     a = contCreate .. "b";
-    a = string.gsub(a, '(%d%d*)', string.upper)
+    a = string.gsub(a, '(%d%d*)', "%1 %1")
     a = "a"
     contCreate = contCreate+1
   end

+ 30 - 0
testes/pm.lua

@@ -387,5 +387,35 @@ assert(string.match("abc\0\0\0", "%\0%\0?") == "\0\0")
 assert(string.find("abc\0\0","\0.") == 4)
 assert(string.find("abcx\0\0abc\0abc","x\0\0abc\0a.") == 4)
 
+
+do   -- test reuse of original string in gsub
+  local s = string.rep("a", 100)
+  local r = string.gsub(s, "b", "c")   -- no match
+  assert(string.format("%p", s) == string.format("%p", r))
+
+  r = string.gsub(s, ".", {x = "y"})   -- no substitutions
+  assert(string.format("%p", s) == string.format("%p", r))
+
+  local count = 0
+  r = string.gsub(s, ".", function (x)
+                            assert(x == "a")
+                            count = count + 1
+                            return nil    -- no substitution
+                          end)
+  r = string.gsub(r, ".", {b = 'x'})   -- "a" is not a key; no subst.
+  assert(count == 100)
+  assert(string.format("%p", s) == string.format("%p", r))
+
+  count = 0
+  r = string.gsub(s, ".", function (x)
+                            assert(x == "a")
+                            count = count + 1
+                            return x    -- substitution...
+                          end)
+  assert(count == 100)
+  -- no reuse in this case
+  assert(r == s and string.format("%p", s) ~= string.format("%p", r))
+end
+
 print('OK')