Преглед изворни кода

UTF-8 encoding exported as format '%U' in 'lua_pushfstring'

Roberto Ierusalimschy пре 11 година
родитељ
комит
4ea60463f5
3 измењених фајлова са 37 додато и 21 уклоњено
  1. 7 18
      llex.c
  2. 26 2
      lobject.c
  3. 4 1
      lobject.h

+ 7 - 18
llex.c

@@ -1,5 +1,5 @@
 /*
-** $Id: llex.c,v 2.71 2014/01/31 15:14:22 roberto Exp roberto $
+** $Id: llex.c,v 2.72 2014/02/04 18:57:34 roberto Exp roberto $
 ** Lexical Analyzer
 ** See Copyright Notice in lua.h
 */
@@ -359,22 +359,11 @@ static unsigned int readutf8esc (LexState *ls) {
 }
 
 
-static void utf8esc (LexState *ls, unsigned int r) {
-  if (r < 0x80)  /* ascii? */
-    save(ls, r);
-  else {  /* need continuation bytes */
-    int buff[4];  /* to store continuation bytes */
-    int n = 0;  /* number of continuation bytes */
-    unsigned int mfb = 0x3f;  /* maximum that fits in first byte */
-    do {
-      buff[n++] = 0x80 | (r & 0x3f);  /* add continuation byte */
-      r >>= 6;  /* remove added bits */
-      mfb >>= 1;  /* now there is one less bit in first byte */
-    } while (r > mfb);  /* needs continuation byte? */
-    save(ls, (~mfb << 1) | r);  /* add first byte */
-    while (n-- > 0)  /* add 'buff' to string, reversed */
-      save(ls, buff[n]);
-  }
+static void utf8esc (LexState *ls) {
+  char buff[UTF8BUFFSZ];
+  int n = luaO_utf8esc(buff, readutf8esc(ls));
+  for (; n > 0; n--)  /* add 'buff' to string */
+    save(ls, buff[UTF8BUFFSZ - n]);
 }
 
 
@@ -414,7 +403,7 @@ static void read_string (LexState *ls, int del, SemInfo *seminfo) {
           case 't': c = '\t'; goto read_save;
           case 'v': c = '\v'; goto read_save;
           case 'x': c = readhexaesc(ls); goto read_save;
-          case 'u': utf8esc(ls, readutf8esc(ls));  goto no_save;
+          case 'u': utf8esc(ls);  goto no_save;
           case '\n': case '\r':
             inclinenumber(ls); c = '\n'; goto only_save;
           case '\\': case '\"': case '\'':

+ 26 - 2
lobject.c

@@ -1,5 +1,5 @@
 /*
-** $Id: lobject.c,v 2.71 2013/12/30 20:47:58 roberto Exp roberto $
+** $Id: lobject.c,v 2.72 2014/01/27 13:34:32 roberto Exp roberto $
 ** Some generic functions over Lua objects
 ** See Copyright Notice in lua.h
 */
@@ -284,12 +284,30 @@ int luaO_str2int (const char *s, size_t len, lua_Integer *result) {
 }
 
 
+int luaO_utf8esc (char *buff, unsigned int x) {
+  int n = 1;  /* number of bytes put in buffer (backwards) */
+  if (x < 0x80)  /* ascii? */
+    buff[UTF8BUFFSZ - 1] = x;
+  else {  /* need continuation bytes */
+    unsigned int mfb = 0x3f;  /* maximum that fits in first byte */
+    do {
+      buff[UTF8BUFFSZ - (n++)] = 0x80 | (x & 0x3f);  /* add continuation byte */
+      x >>= 6;  /* remove added bits */
+      mfb >>= 1;  /* now there is one less bit available in first byte */
+    } while (x > mfb);  /* still needs continuation byte? */
+    buff[UTF8BUFFSZ - n] = (~mfb << 1) | x;  /* add first byte */
+  }
+  return n;
+}
+
+
 static void pushstr (lua_State *L, const char *str, size_t l) {
   setsvalue2s(L, L->top++, luaS_newlstr(L, str, l));
 }
 
 
-/* this function handles only `%d', `%c', %f, %p, and `%s' formats */
+/* this function handles only '%d', '%c', '%f', '%p', and '%s' 
+   conventional formats, plus Lua-specific '%L' and '%U' */
 const char *luaO_pushvfstring (lua_State *L, const char *fmt, va_list argp) {
   int n = 0;
   for (;;) {
@@ -328,6 +346,12 @@ const char *luaO_pushvfstring (lua_State *L, const char *fmt, va_list argp) {
         pushstr(L, buff, l);
         break;
       }
+      case 'U': {
+        char buff[UTF8BUFFSZ];
+        int l = luaO_utf8esc(buff, va_arg(argp, int));
+        pushstr(L, buff + UTF8BUFFSZ - l, l);
+        break;
+      }
       case '%': {
         pushstr(L, "%", 1);
         break;

+ 4 - 1
lobject.h

@@ -1,5 +1,5 @@
 /*
-** $Id: lobject.h,v 2.82 2013/09/05 19:31:49 roberto Exp roberto $
+** $Id: lobject.h,v 2.83 2013/12/04 12:15:22 roberto Exp roberto $
 ** Type definitions for Lua objects
 ** See Copyright Notice in lua.h
 */
@@ -479,9 +479,12 @@ typedef struct Table {
 
 LUAI_DDEC const TValue luaO_nilobject_;
 
+/* size of buffer for 'luaO_utf8esc' function */
+#define UTF8BUFFSZ	8
 
 LUAI_FUNC int luaO_int2fb (unsigned int x);
 LUAI_FUNC int luaO_fb2int (int x);
+LUAI_FUNC int luaO_utf8esc (char *buff, unsigned int x);
 LUAI_FUNC int luaO_ceillog2 (unsigned int x);
 LUAI_FUNC void luaO_arith (lua_State *L, int op, const TValue *p1,
                            const TValue *p2, TValue *res);