Browse Source

'dumpint' and related functions replaced by 'string.pack'/'string.unpack'

Roberto Ierusalimschy 10 năm trước cách đây
mục cha
commit
c172a4f7c2
1 tập tin đã thay đổi với 340 bổ sung157 xóa
  1. 340 157
      lstrlib.c

+ 340 - 157
lstrlib.c

@@ -1,5 +1,5 @@
 /*
 /*
-** $Id: lstrlib.c,v 1.201 2014/08/20 22:06:41 roberto Exp roberto $
+** $Id: lstrlib.c,v 1.202 2014/10/01 11:54:56 roberto Exp roberto $
 ** Standard library for string operations and pattern-matching
 ** Standard library for string operations and pattern-matching
 ** See Copyright Notice in lua.h
 ** See Copyright Notice in lua.h
 */
 */
@@ -950,214 +950,399 @@ static int str_format (lua_State *L) {
 */
 */
 
 
 
 
+/* maximum size for the binary representation of an integer */
+#define MAXINTSIZE	16
+
 /* number of bits in a character */
 /* number of bits in a character */
 #define NB	CHAR_BIT
 #define NB	CHAR_BIT
 
 
 /* mask for one character (NB 1's) */
 /* mask for one character (NB 1's) */
 #define MC	((1 << NB) - 1)
 #define MC	((1 << NB) - 1)
 
 
-/* mask for one character without sign bit ((NB - 1) 1's) */
-#define SM	(MC >> 1)
-
 /* size of a lua_Integer */
 /* size of a lua_Integer */
 #define SZINT	((int)sizeof(lua_Integer))
 #define SZINT	((int)sizeof(lua_Integer))
 
 
-/* maximum size for the binary representation of an integer */
-#define MAXINTSIZE	12
+/* mask for all ones in last byte in a lua Integer */
+#define HIGHERBYTE	((lua_Unsigned)MC << (NB * (SZINT - 1)))
 
 
 
 
-static union {
+/* dummy union to get native endianness */
+static const union {
   int dummy;
   int dummy;
   char little;  /* true iff machine is little endian */
   char little;  /* true iff machine is little endian */
-} const nativeendian = {1};
+} nativeendian = {1};
 
 
 
 
-static int getendian (lua_State *L, int arg) {
-  const char *endian = luaL_optstring(L, arg,
-                             (nativeendian.little ? "l" : "b"));
-  if (*endian == 'n')  /* native? */
-    return nativeendian.little;
-  luaL_argcheck(L, *endian == 'l' || *endian == 'b', arg,
-                   "endianness must be 'l'/'b'/'n'");
-  return (*endian == 'l');
-}
+/* dummy structure to get native alignment requirements */
+struct cD {
+  char c;
+  union { double d; void *p; lua_Integer i; lua_Number n; } u;
+};
 
 
+#define MAXALIGN	(offsetof(struct cD, u))
 
 
-static int getintsize (lua_State *L, int arg) {
-  lua_Integer size = luaL_optinteger(L, arg, 0);
-  if (size == 0) size = SZINT;
-  luaL_argcheck(L, 1 <= size && size <= MAXINTSIZE, arg,
-                   "integer size out of valid range");
-  return (int)size;
-}
 
 
+/*
+** Union for serializing floats
+*/
+typedef union Ftypes {
+  float f;
+  double d;
+  lua_Number n;
+  char buff[5 * sizeof(lua_Number)];  /* enough for any float type */
+} Ftypes;
 
 
-/* mask for all ones in last byte in a lua Integer */
-#define HIGHERBYTE    ((lua_Unsigned)MC << (NB * (SZINT - 1)))
 
 
+/*
+** information to pack/unpack stuff
+*/
+typedef struct Header {
+  lua_State *L;
+  int islittle;
+  int maxalign;
+} Header;
 
 
-static int dumpint (char *buff, lua_Integer m, int littleendian, int size) {
-  int i;
-  lua_Unsigned n = (lua_Unsigned)m;
-  lua_Unsigned mask = (m >= 0) ? 0 : HIGHERBYTE;  /*  sign extension */
-  if (littleendian) {
-    for (i = 0; i < size - 1; i++) {
-      buff[i] = (n & MC);
-      n = (n >> NB) | mask;
-    }
-  }
+
+typedef enum KOption {Kint, Kuint, Kfloat, Kchar, Kstring, Kstring0,
+                      Kspace, Kpadding, Kpaddalig} KOption;
+
+
+/*
+** Read an integer numeral from string 'fmt' or return 'df' if
+** there is no numeral
+*/
+static int digit (int c) { return '0' <= c && c <= '9'; }
+
+static int getnum (const char **fmt, int df) {
+  if (!digit(**fmt))  /* no number? */
+    return df;  /* return default value */
   else {
   else {
-    for (i = size - 1; i > 0; i--) {
-      buff[i] = (n & MC);
-      n = (n >> NB) | mask;
-    }
-  }
-  buff[i] = (n & MC);  /* last byte */
-  if (size < SZINT) {  /* need test for overflow? */
-    /* OK if there are only zeros left in higher bytes,
-       or only ones left (excluding non-signal bits in last byte) */
-    return ((n & ~(lua_Unsigned)MC) == 0 ||
-            (n | SM) == ~(lua_Unsigned)0);
+    int a = 0;
+    do {
+      a = a*10 + *((*fmt)++) - '0';
+    } while (digit(**fmt) && a < (INT_MAX/10 - 10));
+    return a;
   }
   }
-  else return 1;  /* no overflow can occur with full size */
 }
 }
 
 
 
 
-static int dumpint_l (lua_State *L) {
-  char buff[MAXINTSIZE];
-  lua_Integer n = luaL_checkinteger(L, 1);
-  int size = getintsize(L, 2);
-  int endian = getendian(L, 3);
-  if (dumpint(buff, n, endian, size))
-    lua_pushlstring(L, buff, size);
-  else
-    luaL_error(L, "integer does not fit into given size (%d)", size);
-  return 1;
+/*
+** Read an integer numeral and raises an error if it is larger
+** than the maximum size for integers.
+*/
+static int getnumlimit (Header *h, const char **fmt, int df) {
+  int sz = getnum(fmt, df);
+  if (sz > MAXINTSIZE || sz <= 0)
+    luaL_error(h->L, "integral size (%d) out of limits [1,%d]", sz, MAXINTSIZE);
+  return sz;
 }
 }
 
 
 
 
-/* mask to check higher-order byte + signal bit of next (lower) byte */
-#define HIGHERBYTE1   (HIGHERBYTE | (HIGHERBYTE >> 1))
+/*
+** Reads an option endianness indication ('<'/'>') and
+** returns true if operation should use little endian.
+*/
+static int getendian (const char **fmt) {
+  if (**fmt == '<' || **fmt == '>')  /* explicit endianness? */
+    return (*((*fmt)++) == '<');  /* true iff little endian */
+  else  /* no endian indication */
+    return nativeendian.little;  /* use native */
+}
 
 
 
 
-static int undumpint (const char *buff, lua_Integer *res,
-                      int littleendian, int size) {
-  lua_Unsigned n = 0;
-  int i;
-  for (i = 0; i < size; i++) {
-    if (i >= SZINT) {  /* will throw away a byte? */
-      /* check for overflow: it is OK to throw away leading zeros for a
-         positive number, leading ones for a negative number, and a
-         leading zero byte to allow unsigned integers with a 1 in
-         its "signal bit" */
-      if (!((n & HIGHERBYTE1) == 0 ||  /* zeros for positive number */
-          (n & HIGHERBYTE1) == HIGHERBYTE1 ||  /* ones for negative number */
-          (i == size - 1 && (n & HIGHERBYTE) == 0)))  /* leading zero */
-        return 0;  /* overflow */
-    }
-    n <<= NB;
-    n |= (lua_Unsigned)(unsigned char)buff[littleendian ? size - 1 - i : i];
-  }
-  if (size < SZINT) {  /* need sign extension? */
-    lua_Unsigned mask = (lua_Unsigned)1 << (size*NB - 1);
-    *res = (lua_Integer)((n ^ mask) - mask);  /* do sign extension */
+/*
+** Read and return maximum alignment to be used
+*/
+static int getalignment (Header *h, const char **fmt) {
+  if (**fmt == '!') {  /* explicit alignment? */
+    (*fmt)++;  /* skip '!' */
+    return getnumlimit(h, fmt, MAXALIGN);
   }
   }
   else
   else
-    *res = (lua_Integer)n;
-  return 1;
+    return 1;  /* default is no alignment */
 }
 }
 
 
 
 
-static int undumpint_l (lua_State *L) {
-  lua_Integer res;
-  size_t len;
-  const char *s = luaL_checklstring(L, 1, &len);
-  lua_Integer pos = posrelat(luaL_optinteger(L, 2, 1), len);
-  int size = getintsize(L, 3);
-  int endian = getendian(L, 4);
-  luaL_argcheck(L, 1 <= pos && (size_t)pos + size - 1 <= len, 1,
-                   "string too short");
-  if(undumpint(s + pos - 1, &res, endian, size))
-    lua_pushinteger(L, res);
-  else
-    luaL_error(L, "result does not fit into a Lua integer");
-  return 1;
+/*
+** Read optional endianness and alignment indications
+*/
+static void getheader (lua_State *L, Header *h, const char **fmt) {
+  h->L = L;
+  h->islittle = getendian(fmt);
+  h->maxalign = getalignment(h, fmt);
 }
 }
 
 
 
 
-static void correctendianness (lua_State *L, char *b, int size, int endianarg) {
-  int endian = getendian(L, endianarg);
-  if (endian != nativeendian.little) {  /* not native endianness? */
-    int i = 0;
-    while (i < --size) {
-      char temp = b[i];
-      b[i++] = b[size];
-      b[size] = temp;
+/*
+** Read and classify next option. 'size' is filled with option's size.
+*/
+static KOption getoption (Header *h, const char **fmt, int *size) {
+  int opt = *((*fmt)++);
+  switch (opt) {
+    case 'b': *size = sizeof(char); return Kint;
+    case 'B': *size = sizeof(char); return Kuint;
+    case 'h': *size = sizeof(short); return Kint;
+    case 'H': *size = sizeof(short); return Kuint;
+    case 'l': *size = sizeof(long); return Kint;
+    case 'L': *size = sizeof(long); return Kuint;
+    case 'j': *size = sizeof(lua_Integer); return Kint;
+    case 'J': *size = sizeof(lua_Integer); return Kuint;
+    case 'T': *size = sizeof(size_t); return Kuint;
+    case 'f': *size = sizeof(float); return Kfloat;
+    case 'd': *size = sizeof(double); return Kfloat;
+    case 'n': *size = sizeof(lua_Number); return Kfloat;
+    case 'i': *size = getnumlimit(h, fmt, sizeof(int)); return Kint;
+    case 'I': *size = getnumlimit(h, fmt, sizeof(int)); return Kuint;
+    case 's': *size = getnumlimit(h, fmt, sizeof(size_t)); return Kstring;
+    case 'c': *size = getnum(fmt, 1); return Kchar;
+    case 'z': *size = 0; return Kstring0;
+    case 'x': *size = 1; return Kpadding;
+    case 'X': *size = 0; return Kpaddalig;
+    case ' ': *size = 0; return Kspace;
+    default: {
+      *size = 0;  /* to avoid warnings */
+      luaL_error(h->L, "invalid format option '%c'", opt);
+      return (KOption)0;
     }
     }
   }
   }
 }
 }
 
 
 
 
-static int getfloatsize (lua_State *L, int arg) {
-  const char *size = luaL_optstring(L, arg, "n");
-  if (*size == 'n') return sizeof(lua_Number);
-  luaL_argcheck(L, *size == 'd' || *size == 'f', arg,
-                   "size must be 'f'/'d'/'n'");
-  return (*size == 'd' ? sizeof(double) : sizeof(float));
+/*
+** Read, classify, and fill other details about the next option.
+** 'psize' is filled with option's size, 'notoalign' with its
+** alignment requirements.
+** Local variable 'size' gets the size to be aligned. (Kpadal option
+** always gets its full alignment, other options are limited by 
+** the maximum alignment ('maxalign). Kchar option needs no aligment
+** despite its size.
+*/
+static KOption getdetails (Header *h, size_t totalsize,
+                           const char **fmt, int *psize, int *ntoalign) {
+  KOption opt = getoption(h, fmt, psize);
+  int align = *psize;  /* usually, alignment follows size */
+  if (opt == Kpaddalig) {
+    if (**fmt == '\0' || strchr("Xc ", **fmt) != NULL)
+      luaL_argerror(h->L, 1, "invalid next option for option 'X'");
+    getoption(h, fmt, &align);  /* get next element's size for alignment */
+  }
+  if (align <= 1 || opt == Kchar)  /* need no alignment? */
+    *ntoalign = 0;
+  else {
+    if (align > h->maxalign)
+      align = h->maxalign;
+    if ((align & (align - 1)) != 0)  /* is 'align' not a power of 2? */
+      luaL_argerror(h->L, 1, "format asks for alignment not power of 2");
+    *ntoalign = (align - (int)(totalsize & (align - 1))) & (align - 1);
+  }
+  return opt;
 }
 }
 
 
 
 
-static int dumpfloat_l (lua_State *L) {
-  float f;  double d;
-  char *pn;  /* pointer to number */
-  lua_Number n = luaL_checknumber(L, 1);
-  int size = getfloatsize(L, 2);
-  if (size == sizeof(lua_Number))
-    pn = (char*)&n;
-  else if (size == sizeof(float)) {
-    f = (float)n;
-    pn = (char*)&f;
-  }  
-  else {  /* native lua_Number may be neither float nor double */
-    lua_assert(size == sizeof(double));
-    d = (double)n;
-    pn = (char*)&d;
+static void packint (luaL_Buffer *b, lua_Unsigned n,
+                     int islittle, int size, lua_Unsigned mask) {
+  char *buff = luaL_prepbuffsize(b, size);
+  int i;
+  for (i = 0; i < size - 1; i++) {
+    buff[islittle ? i : size - 1 - i] = (n & MC);
+    n = (n >> NB) | mask;
   }
   }
-  correctendianness(L, pn, size, 3);
-  lua_pushlstring(L, pn, size);
-  return 1;
+  buff[islittle ? i : size - 1 - i] = (n & MC);
+  luaL_addsize(b, size);  /* add result to buffer */
 }
 }
 
 
 
 
-static int undumpfloat_l (lua_State *L) {
-  lua_Number res;
-  size_t len;
-  const char *s = luaL_checklstring(L, 1, &len);
-  lua_Integer pos = posrelat(luaL_optinteger(L, 2, 1), len);
-  int size = getfloatsize(L, 3);
-  luaL_argcheck(L, 1 <= pos && (size_t)pos + size - 1 <= len, 1,
-                   "string too short");
-  if (size == sizeof(lua_Number)) {
-    memcpy(&res, s + pos - 1, size); 
-    correctendianness(L, (char*)&res, size, 4);
+/*
+** Copy 'size' bytes from 'src' to 'dest', correcting endianness if
+** given 'islittle' is different from native endianness.
+*/
+static void copywithendian (volatile char *dest, volatile const char *src,
+                            int size, int islittle) {
+  if (islittle == nativeendian.little) {
+    while (size-- != 0)
+      *(dest++) = *(src++);
   }
   }
-  else if (size == sizeof(float)) {
-    float f;
-    memcpy(&f, s + pos - 1, size); 
-    correctendianness(L, (char*)&f, size, 4);
-    res = (lua_Number)f;
-  }  
-  else {  /* native lua_Number may be neither float nor double */
-    double d;
-    lua_assert(size == sizeof(double));
-    memcpy(&d, s + pos - 1, size); 
-    correctendianness(L, (char*)&d, size, 4);
-    res = (lua_Number)d;
+  else {
+    dest += size - 1;
+    while (size-- != 0)
+      *(dest--) = *(src++);
+  }
+}
+
+
+static int str_pack (lua_State *L) {
+  luaL_Buffer b;
+  Header h;
+  const char *fmt = luaL_checkstring(L, 1);  /* format string */
+  int arg = 1;  /* current argument to pack */
+  size_t totalsize = 0;  /* accumulate total size of result */
+  getheader(L, &h, &fmt);
+  lua_pushnil(L);  /* mark to separate arguments from string buffer */
+  luaL_buffinit(L, &b);
+  while (*fmt != '\0') {
+    int size, ntoalign;
+    KOption opt = getdetails(&h, totalsize, &fmt, &size, &ntoalign);
+    totalsize += ntoalign + size;
+    while (ntoalign-- > 0) luaL_addchar(&b, '\0');  /* fill alignment */
+    arg++;
+    switch (opt) {
+      case Kint: {  /* signed integers */
+        lua_Integer n = luaL_checkinteger(L, arg);
+        lua_Unsigned mask = (n < 0) ? HIGHERBYTE : 0;  /*  sign extension */
+        if (size < SZINT) {  /* need overflow check? */
+          lua_Integer lim = (lua_Integer)1 << ((size * NB) - 1);
+          luaL_argcheck(L, -lim <= n && n < lim, arg, "integer overflow");
+        }
+        packint(&b, (lua_Unsigned)n, h.islittle, size, mask);
+        break;
+      }
+      case Kuint: {  /* unsigned integers */
+        lua_Integer n = luaL_checkinteger(L, arg);
+        if (size < SZINT)  /* need overflow check? */
+          luaL_argcheck(L, (lua_Unsigned)n < ((lua_Unsigned)1 << (size * NB)),
+                           arg, "unsigned overflow");
+        packint(&b, (lua_Unsigned)n, h.islittle, size, 0);
+        break;
+      }
+      case Kfloat: {  /* floating-point options */
+        volatile Ftypes u;
+        char *buff = luaL_prepbuffsize(&b, size);
+        lua_Number n = luaL_checknumber(L, arg);  /* get argument */
+        if (size == sizeof(u.f)) u.f = (float)n;  /* copy it into 'u' */
+        else if (size == sizeof(u.d)) u.d = (double)n;
+        else u.n = n;
+        /* move 'u' to final result, correcting endianness if needed */
+        copywithendian(buff, u.buff, size, h.islittle);
+        luaL_addsize(&b, size);
+        break;
+      }
+      case Kchar: {  /* fixed-size string */
+        size_t len;
+        const char *s = luaL_checklstring(L, arg, &len);
+        luaL_argcheck(L, len == (size_t)size, arg, "wrong length");
+        luaL_addlstring(&b, s, size);
+        break;
+      }
+      case Kstring: {  /* strings with length count */
+        size_t len;
+        const char *s = luaL_checklstring(L, arg, &len);
+        luaL_argcheck(L, size >= (int)sizeof(size_t) ||
+                         len < ((size_t)1 << (size * NB)),
+                         arg, "string length does not fit in given size");
+        packint(&b, (lua_Unsigned)len, h.islittle, size, 0);  /* pack length */
+        luaL_addlstring(&b, s, len);
+        totalsize += len;
+        break;
+      }
+      case Kstring0: {  /* zero-terminated string */
+        size_t len;
+        const char *s = luaL_checklstring(L, arg, &len);
+        luaL_argcheck(L, strlen(s) == len, arg, "string contains zeros");
+        luaL_addlstring(&b, s, len);
+        luaL_addchar(&b, '\0');  /* add zero at the end */
+        totalsize += len + 1;
+        break;
+      }
+      case Kpadding: luaL_addchar(&b, '\0');  /* go through */
+      case Kpaddalig: case Kspace:
+        arg--;  /* undo increment */
+        break;
+    }
   }
   }
-  lua_pushnumber(L, res);
+  luaL_pushresult(&b);
   return 1;
   return 1;
 }
 }
 
 
+
+static lua_Integer unpackint (lua_State *L, const char *str,
+                              int islittle, int size, int issigned) {
+  lua_Unsigned res = 0;
+  int i;
+  int limit = (size  <= SZINT) ? size : SZINT;
+  for (i = limit - 1; i >= 0; i--) {
+    res <<= NB;
+    res |= (lua_Unsigned)(unsigned char)str[islittle ? i : size - 1 - i];
+  }
+  if (size < SZINT) {  /* real size smaller than lua_Integer? */
+    if (issigned) {  /* needs sign extension? */
+      lua_Unsigned mask = (lua_Unsigned)1 << (size*NB - 1);
+      res = ((res ^ mask) - mask);  /* do sign extension */
+    }
+  }
+  else {  /* must check unread bytes */
+    int mask = (!issigned || (lua_Integer)res >= 0) ? 0 : MC;
+    for (i = limit; i < size; i++) {
+      if ((unsigned char)str[islittle ? i : size - 1 - i] != mask)
+        luaL_error(L, "%d-bit integer does not fit into Lua Integer", size);
+    }
+  }
+  return (lua_Integer)res;
+}
+
+
+static int str_unpack (lua_State *L) {
+  Header h;
+  const char *fmt = luaL_checkstring(L, 1);
+  size_t ld;
+  const char *data = luaL_checklstring(L, 2, &ld);
+  size_t pos = (size_t)posrelat(luaL_optinteger(L, 3, 1), ld) - 1;
+  int n = 0;  /* number of results */
+  luaL_argcheck(L, pos <= ld, 3, "initial position out of string");
+  getheader(L, &h, &fmt);
+  while (*fmt) {
+    int size, ntoalign;
+    KOption opt = getdetails(&h, pos, &fmt, &size, &ntoalign);
+    if ((size_t)ntoalign + size > ~pos || pos + ntoalign + size > ld)
+      luaL_argerror(L, 2, "data string too short");
+    pos += ntoalign;
+    /* stack space for item + next position */
+    luaL_checkstack(L, 2, "too many results");
+    n++;
+    switch (opt) {
+      case Kint:
+      case Kuint: {
+        lua_Integer res = unpackint(L, data + pos, h.islittle, size,
+                                       (opt == Kint));
+        lua_pushinteger(L, res);
+        break;
+      }
+      case Kfloat: {
+        volatile Ftypes u;
+        lua_Number num;
+        copywithendian(u.buff, data + pos, size, h.islittle);
+        if (size == sizeof(u.f)) num = (lua_Number)u.f;
+        else if (size == sizeof(u.d)) num = (lua_Number)u.d;
+        else num = u.n;
+        lua_pushnumber(L, num);
+        break;
+      }
+      case Kchar: {
+        lua_pushlstring(L, data + pos, size);
+        break;
+      }
+      case Kstring: {
+        size_t len = (size_t)unpackint(L, data + pos, h.islittle, size, 0);
+        luaL_argcheck(L, pos + len + size <= ld, 2, "data string too short");
+        lua_pushlstring(L, data + pos + size, len);
+        pos += len;
+        break;
+      }
+      case Kstring0: {
+        size_t len = (int)strlen(data + pos);
+        lua_pushlstring(L, data + pos, len);
+        pos += len + 1;  /* skip final '\0' */
+        break;
+      }
+      case Kpaddalig: case Kpadding: case Kspace:
+        n--;  /* undo increment */
+        break;
+    }
+    pos += size;
+  }
+  lua_pushinteger(L, pos + 1);  /* next position */
+  return n + 1;
+}
+
 /* }====================================================== */
 /* }====================================================== */
 
 
 
 
@@ -1176,10 +1361,8 @@ static const luaL_Reg strlib[] = {
   {"reverse", str_reverse},
   {"reverse", str_reverse},
   {"sub", str_sub},
   {"sub", str_sub},
   {"upper", str_upper},
   {"upper", str_upper},
-  {"dumpfloat", dumpfloat_l},
-  {"dumpinteger", dumpint_l},
-  {"undumpfloat", undumpfloat_l},
-  {"undumpinteger", undumpint_l},
+  {"pack", str_pack},
+  {"unpack", str_unpack},
   {NULL, NULL}
   {NULL, NULL}
 };
 };