Browse Source

External strings

Strings can use external buffers to store their contents.
Roberto Ierusalimschy 1 year ago
parent
commit
024f9064f1
9 changed files with 195 additions and 14 deletions
  1. 14 0
      lapi.c
  2. 3 1
      lgc.c
  3. 8 0
      lobject.h
  4. 75 3
      lstring.c
  5. 3 7
      lstring.h
  6. 33 0
      ltests.c
  7. 2 0
      lua.h
  8. 34 0
      manual/manual.of
  9. 23 3
      testes/strings.lua

+ 14 - 0
lapi.c

@@ -535,6 +535,20 @@ LUA_API const char *lua_pushlstring (lua_State *L, const char *s, size_t len) {
 }
 }
 
 
 
 
+LUA_API const char *lua_pushextlstring (lua_State *L,
+	        const char *s, size_t len, lua_Alloc falloc, void *ud) {
+  TString *ts;
+  lua_lock(L);
+  api_check(L, s[len] == '\0', "string not ending with zero");
+  ts = luaS_newextlstr (L, s, len, falloc, ud);
+  setsvalue2s(L, L->top.p, ts);
+  api_incr_top(L);
+  luaC_checkGC(L);
+  lua_unlock(L);
+  return getstr(ts);
+}
+
+
 LUA_API const char *lua_pushstring (lua_State *L, const char *s) {
 LUA_API const char *lua_pushstring (lua_State *L, const char *s) {
   lua_lock(L);
   lua_lock(L);
   if (s == NULL)
   if (s == NULL)

+ 3 - 1
lgc.c

@@ -813,7 +813,9 @@ static void freeobj (lua_State *L, GCObject *o) {
     }
     }
     case LUA_VLNGSTR: {
     case LUA_VLNGSTR: {
       TString *ts = gco2ts(o);
       TString *ts = gco2ts(o);
-      luaM_freemem(L, ts, sizestrlng(ts->u.lnglen));
+      if (ts->shrlen == LSTRMEM)  /* must free external string? */
+        (*ts->falloc)(ts->ud, ts->contents, ts->u.lnglen + 1, 0);
+      luaM_freemem(L, ts, luaS_sizelngstr(ts->u.lnglen, ts->shrlen));
       break;
       break;
     }
     }
     default: lua_assert(0);
     default: lua_assert(0);

+ 8 - 0
lobject.h

@@ -382,6 +382,12 @@ typedef struct GCObject {
 #define setsvalue2n	setsvalue
 #define setsvalue2n	setsvalue
 
 
 
 
+/* Kinds of long strings (stored in 'shrlen') */
+#define LSTRREG		-1  /* regular long string */
+#define LSTRFIX		-2  /* fixed external long string */
+#define LSTRMEM		-3  /* external long string with deallocation */
+
+
 /*
 /*
 ** Header for a string value.
 ** Header for a string value.
 */
 */
@@ -395,6 +401,8 @@ typedef struct TString {
     struct TString *hnext;  /* linked list for hash table */
     struct TString *hnext;  /* linked list for hash table */
   } u;
   } u;
   char *contents;  /* pointer to content in long strings */
   char *contents;  /* pointer to content in long strings */
+  lua_Alloc falloc;  /* deallocation function for external strings */
+  void *ud;  /* user data for external strings */
 } TString;
 } TString;
 
 
 
 

+ 75 - 3
lstring.c

@@ -136,6 +136,20 @@ void luaS_init (lua_State *L) {
 }
 }
 
 
 
 
+size_t luaS_sizelngstr (size_t len, int kind) {
+  switch (kind) {
+    case LSTRREG:  /* regular long string */
+      /* don't need 'falloc'/'ud', but need space for content */
+      return offsetof(TString, falloc) + (len + 1) * sizeof(char);
+    case LSTRFIX:  /* fixed external long string */
+      /* don't need 'falloc'/'ud' */
+      return offsetof(TString, falloc);
+    default:  /* external long string with deallocation */
+      lua_assert(kind == LSTRMEM);
+      return sizeof(TString);
+  }
+}
+
 
 
 /*
 /*
 ** creates a new string object
 ** creates a new string object
@@ -153,11 +167,11 @@ static TString *createstrobj (lua_State *L, size_t totalsize, int tag,
 
 
 
 
 TString *luaS_createlngstrobj (lua_State *L, size_t l) {
 TString *luaS_createlngstrobj (lua_State *L, size_t l) {
-  size_t totalsize = sizestrlng(l);
+  size_t totalsize = luaS_sizelngstr(l, LSTRREG);
   TString *ts = createstrobj(L, totalsize, LUA_VLNGSTR, G(L)->seed);
   TString *ts = createstrobj(L, totalsize, LUA_VLNGSTR, G(L)->seed);
   ts->u.lnglen = l;
   ts->u.lnglen = l;
-  ts->shrlen = -1;  /* signals that it is a long string */
-  ts->contents = cast_charp(ts) + sizeof(TString);
+  ts->shrlen = LSTRREG;  /* signals that it is a regular long string */
+  ts->contents = cast_charp(ts) + offsetof(TString, falloc);
   ts->contents[l] = '\0';  /* ending 0 */
   ts->contents[l] = '\0';  /* ending 0 */
   return ts;
   return ts;
 }
 }
@@ -275,3 +289,61 @@ Udata *luaS_newudata (lua_State *L, size_t s, int nuvalue) {
   return u;
   return u;
 }
 }
 
 
+
+struct NewExt {
+  int kind;
+  const char *s;
+   size_t len;
+  TString *ts;  /* output */
+};
+
+
+static void f_newext (lua_State *L, void *ud) {
+  struct NewExt *ne = cast(struct NewExt *, ud);
+  size_t size = luaS_sizelngstr(0, ne->kind);
+  ne->ts = createstrobj(L, size, LUA_VLNGSTR, G(L)->seed);
+}
+
+
+static void f_pintern (lua_State *L, void *ud) {
+  struct NewExt *ne = cast(struct NewExt *, ud);
+  ne->ts = internshrstr(L, ne->s, ne->len);
+}
+
+
+TString *luaS_newextlstr (lua_State *L,
+	          const char *s, size_t len, lua_Alloc falloc, void *ud) {
+  struct NewExt ne;
+  if (len <= LUAI_MAXSHORTLEN) {  /* short string? */
+    ne.s = s; ne.len = len;
+    if (!falloc)
+      f_pintern(L, &ne);  /* just internalize string */
+    else {
+      int status = luaD_rawrunprotected(L, f_pintern, &ne);
+      (*falloc)(ud, cast_voidp(s), len + 1, 0);  /* free external string */
+      if (status != LUA_OK)  /* memory error? */
+        luaM_error(L);  /* re-raise memory error */
+    }
+    return ne.ts;
+  }
+  /* "normal" case: long strings */
+  if (!falloc) {
+    ne.kind = LSTRFIX;
+    f_newext(L, &ne);  /* just create header */
+  }
+  else {
+    ne.kind = LSTRMEM;
+    if (luaD_rawrunprotected(L, f_newext, &ne) != LUA_OK) {  /* mem. error? */
+      (*falloc)(ud, cast_voidp(s), len + 1, 0);  /* free external string */
+      luaM_error(L);  /* re-raise memory error */
+    }
+    ne.ts->falloc = falloc;
+    ne.ts->ud = ud;
+  }
+  ne.ts->shrlen = ne.kind;
+  ne.ts->u.lnglen = len;
+  ne.ts->contents = cast_charp(s);
+  return ne.ts;
+}
+
+

+ 3 - 7
lstring.h

@@ -26,12 +26,6 @@
 #define sizestrshr(l)  \
 #define sizestrshr(l)  \
 	(offsetof(TString, contents) + ((l) + 1) * sizeof(char))
 	(offsetof(TString, contents) + ((l) + 1) * sizeof(char))
 
 
-/*
-** Size of a long TString: Size of the header plus space for the string
-** itself (including final '\0').
-*/
-#define sizestrlng(l)	(sizeof(TString) + ((l) + 1) * sizeof(char))
-
 
 
 #define luaS_newliteral(L, s)	(luaS_newlstr(L, "" s, \
 #define luaS_newliteral(L, s)	(luaS_newlstr(L, "" s, \
                                  (sizeof(s)/sizeof(char))-1))
                                  (sizeof(s)/sizeof(char))-1))
@@ -60,6 +54,8 @@ LUAI_FUNC Udata *luaS_newudata (lua_State *L, size_t s, int nuvalue);
 LUAI_FUNC TString *luaS_newlstr (lua_State *L, const char *str, size_t l);
 LUAI_FUNC TString *luaS_newlstr (lua_State *L, const char *str, size_t l);
 LUAI_FUNC TString *luaS_new (lua_State *L, const char *str);
 LUAI_FUNC TString *luaS_new (lua_State *L, const char *str);
 LUAI_FUNC TString *luaS_createlngstrobj (lua_State *L, size_t l);
 LUAI_FUNC TString *luaS_createlngstrobj (lua_State *L, size_t l);
-
+LUAI_FUNC TString *luaS_newextlstr (lua_State *L,
+		const char *s, size_t len, lua_Alloc falloc, void *ud);
+LUAI_FUNC size_t luaS_sizelngstr (size_t len, int kind);
 
 
 #endif
 #endif

+ 33 - 0
ltests.c

@@ -1277,6 +1277,37 @@ static int checkpanic (lua_State *L) {
 }
 }
 
 
 
 
+static int externKstr (lua_State *L) {
+  size_t len;
+  const char *s = luaL_checklstring(L, 1, &len);
+  lua_pushextlstring(L, s, len, NULL, NULL);
+  return 1;
+}
+
+
+/*
+** Create a buffer with the content of a given string and then
+** create an external string using that buffer. Use the allocation
+** function from Lua to create and free the buffer.
+*/
+static int externstr (lua_State *L) {
+  size_t len;
+  const char *s = luaL_checklstring(L, 1, &len);
+  void *ud;
+  lua_Alloc allocf = lua_getallocf(L, &ud);  /* get allocation function */
+  /* create the buffer */
+  char *buff = cast_charp((*allocf)(ud, NULL, 0, len + 1));
+  if (buff == NULL) {  /* memory error? */
+    lua_pushliteral(L, "not enough memory");
+    lua_error(L);  /* raise a memory error */
+  }
+  /* copy string content to buffer, including ending 0 */
+  memcpy(buff, s, (len + 1) * sizeof(char));
+  /* create external string */
+  lua_pushextlstring(L, buff, len, allocf, ud);
+  return 1;
+}
+
 
 
 /*
 /*
 ** {====================================================================
 ** {====================================================================
@@ -1949,6 +1980,8 @@ static const struct luaL_Reg tests_funcs[] = {
   {"udataval", udataval},
   {"udataval", udataval},
   {"unref", unref},
   {"unref", unref},
   {"upvalue", upvalue},
   {"upvalue", upvalue},
+  {"externKstr", externKstr},
+  {"externstr", externstr},
   {NULL, NULL}
   {NULL, NULL}
 };
 };
 
 

+ 2 - 0
lua.h

@@ -244,6 +244,8 @@ LUA_API void        (lua_pushnil) (lua_State *L);
 LUA_API void        (lua_pushnumber) (lua_State *L, lua_Number n);
 LUA_API void        (lua_pushnumber) (lua_State *L, lua_Number n);
 LUA_API void        (lua_pushinteger) (lua_State *L, lua_Integer n);
 LUA_API void        (lua_pushinteger) (lua_State *L, lua_Integer n);
 LUA_API const char *(lua_pushlstring) (lua_State *L, const char *s, size_t len);
 LUA_API const char *(lua_pushlstring) (lua_State *L, const char *s, size_t len);
+LUA_API const char *(lua_pushextlstring) (lua_State *L,
+		const char *s, size_t len, lua_Alloc falloc, void *ud);
 LUA_API const char *(lua_pushstring) (lua_State *L, const char *s);
 LUA_API const char *(lua_pushstring) (lua_State *L, const char *s);
 LUA_API const char *(lua_pushvfstring) (lua_State *L, const char *fmt,
 LUA_API const char *(lua_pushvfstring) (lua_State *L, const char *fmt,
                                                       va_list argp);
                                                       va_list argp);

+ 34 - 0
manual/manual.of

@@ -3908,6 +3908,40 @@ This function is equivalent to @Lid{lua_pushcclosure} with no upvalues.
 
 
 }
 }
 
 
+@APIEntry{const char *(lua_pushextlstring) (lua_State *L,
+                const char *s, size_t len, lua_Alloc falloc, void *ud);|
+@apii{0,1,m}
+
+Creates an @emphx{external string},
+that is, a string that uses memory not managed by Lua.
+The pointer @id{s} points to the exernal buffer
+holding the string content,
+and @id{len} is the length of the string.
+The string should have a zero at its end,
+that is, the condition @T{s[len] == '\0'} should hold.
+
+If @id{falloc} is different from @id{NULL},
+that function will be called by Lua
+when the external buffer is no longer needed.
+The contents of the buffer should not change before this call.
+The function will be called with the given @id{ud},
+the string @id{s} as the block,
+the length plus one (to account for the ending zero) as the old size,
+and 0 as the new size.
+
+Lua always @x{internalizes} strings with lengths up to 40 characters.
+So, for strings in that range,
+this function will immediately internalize the string
+and call @id{falloc} to free the buffer.
+
+Even when using an external buffer,
+Lua still has to allocate a header for the string.
+In case of a memory-allocation error,
+Lua will call @id{falloc} before raising the error.
+
+}
+
+
 @APIEntry{const char *lua_pushfstring (lua_State *L, const char *fmt, ...);|
 @APIEntry{const char *lua_pushfstring (lua_State *L, const char *fmt, ...);|
 @apii{0,1,v}
 @apii{0,1,v}
 
 

+ 23 - 3
testes/strings.lua

@@ -157,6 +157,12 @@ else   -- compatible coercion
   assert(tostring(-1203 + 0.0) == "-1203")
   assert(tostring(-1203 + 0.0) == "-1203")
 end
 end
 
 
+
+local function topointer (s)
+  return string.format("%p", s)
+end
+
+
 do  -- tests for '%p' format
 do  -- tests for '%p' format
   -- not much to test, as C does not specify what '%p' does.
   -- not much to test, as C does not specify what '%p' does.
   -- ("The value of the pointer is converted to a sequence of printing
   -- ("The value of the pointer is converted to a sequence of printing
@@ -180,18 +186,18 @@ do  -- tests for '%p' format
 
 
   do
   do
     local t1 = {}; local t2 = {}
     local t1 = {}; local t2 = {}
-    assert(string.format("%p", t1) ~= string.format("%p", t2))
+    assert(topointer(t1) ~= topointer(t2))
   end
   end
 
 
   do     -- short strings are internalized
   do     -- short strings are internalized
     local s1 = string.rep("a", 10)
     local s1 = string.rep("a", 10)
     local s2 = string.rep("aa", 5)
     local s2 = string.rep("aa", 5)
-  assert(string.format("%p", s1) == string.format("%p", s2))
+  assert(topointer(s1) == topointer(s2))
   end
   end
 
 
   do     -- long strings aren't internalized
   do     -- long strings aren't internalized
     local s1 = string.rep("a", 300); local s2 = string.rep("a", 300)
     local s1 = string.rep("a", 300); local s2 = string.rep("a", 300)
-    assert(string.format("%p", s1) ~= string.format("%p", s2))
+    assert(topointer(s1) ~= topointer(s2))
   end
   end
 end
 end
 
 
@@ -521,6 +527,20 @@ else
   testpfs("P", str, {})
   testpfs("P", str, {})
 end
 end
 
 
+if T == nil then
+  (Message or print)('\n >>> testC not active: skipping external strings tests <<<\n')
+else
+  print("testing external strings")
+  local x = T.externKstr("hello")   -- external fixed short string
+  assert(x == "hello")
+  local x = T.externstr("hello")   -- external allocated short string
+  assert(x == "hello")
+  x = string.rep("a", 100)   -- long string
+  local y = T.externKstr(x)   -- external fixed long string
+  assert(y == x)
+  local z = T.externstr(x)   -- external allocated long string
+  assert(z == y)
+end
 
 
 print('OK')
 print('OK')