Browse Source

several changes in 'utf8.offset'

Roberto Ierusalimschy 11 years ago
parent
commit
144afa4d47
1 changed files with 27 additions and 22 deletions
  1. 27 22
      lutf8lib.c

+ 27 - 22
lutf8lib.c

@@ -1,5 +1,5 @@
 /*
 /*
-** $Id: lutf8lib.c,v 1.5 2014/04/01 14:39:55 roberto Exp roberto $
+** $Id: lutf8lib.c,v 1.6 2014/04/02 17:01:22 roberto Exp roberto $
 ** Standard library for UTF-8 manipulation
 ** Standard library for UTF-8 manipulation
 ** See Copyright Notice in lua.h
 ** See Copyright Notice in lua.h
 */
 */
@@ -150,41 +150,46 @@ static int utfchar (lua_State *L) {
 
 
 
 
 /*
 /*
-** offset(s, n, [i])  -> index where n-th character *after*
+** offset(s, n, [i])  -> index where n-th character counting from
 **   position 'i' starts; 0 means character at 'i'.
 **   position 'i' starts; 0 means character at 'i'.
 */
 */
 static int byteoffset (lua_State *L) {
 static int byteoffset (lua_State *L) {
   size_t len;
   size_t len;
   const char *s = luaL_checklstring(L, 1, &len);
   const char *s = luaL_checklstring(L, 1, &len);
   int n  = luaL_checkint(L, 2);
   int n  = luaL_checkint(L, 2);
-  lua_Integer posi = u_posrelat(luaL_optinteger(L, 3, 1), len) - 1;
-  luaL_argcheck(L, 0 <= posi && posi <= (lua_Integer)len, 3,
+  lua_Integer posi = (n >= 0) ? 1 : len + 1;
+  posi = u_posrelat(luaL_optinteger(L, 3, posi), len);
+  luaL_argcheck(L, 1 <= posi && --posi <= (lua_Integer)len, 3,
                    "position out of range");
                    "position out of range");
   if (n == 0) {
   if (n == 0) {
     /* find beginning of current byte sequence */
     /* find beginning of current byte sequence */
     while (posi > 0 && iscont(s + posi)) posi--;
     while (posi > 0 && iscont(s + posi)) posi--;
   }
   }
-  else if (n < 0) {
-    while (n < 0 && posi > 0) {  /* move back */
-      do {  /* find beginning of previous character */
-        posi--;
-      } while (posi > 0 && iscont(s + posi));
-      n++;
-    }
-  }
   else {
   else {
-    n--;  /* do not move for 1st character */
-    while (n > 0 && posi < (lua_Integer)len) {
-      do {  /* find beginning of next character */
-        posi++;
-      } while (iscont(s + posi));  /* ('\0' is not continuation) */
-      n--;
-    }
+    if (iscont(s + posi))
+      luaL_error(L, "initial position is a continuation byte");
+    if (n < 0) {
+       while (n < 0 && posi > 0) {  /* move back */
+         do {  /* find beginning of previous character */
+           posi--;
+         } while (posi > 0 && iscont(s + posi));
+         n++;
+       }
+     }
+     else {
+       n--;  /* do not move for 1st character */
+       while (n > 0 && posi < (lua_Integer)len) {
+         do {  /* find beginning of next character */
+           posi++;
+         } while (iscont(s + posi));  /* (cannot pass final '\0') */
+         n--;
+       }
+     }
   }
   }
-  if (n == 0)
+  if (n == 0)  /* did it find given character? */
     lua_pushinteger(L, posi + 1);
     lua_pushinteger(L, posi + 1);
-  else
-    lua_pushnil(L);  /* no such position */
+  else  /* no such character */
+    lua_pushnil(L);
   return 1;  
   return 1;  
 }
 }