Browse Source

String split/loop are now unicode savvy. Unit test added.

Marco Bambini 6 years ago
parent
commit
3941a4345c

+ 14 - 10
src/runtime/gravity_core.c

@@ -2451,11 +2451,13 @@ static bool string_split (gravity_vm *vm, gravity_value_t *args, uint16_t nargs,
     char *original = string->s;
     uint32_t slen = string->len;
 
-    // If the separator is empty, then we split the string at every character
+    // if the separator is empty, then we split the string at every character
     if (seplen == 0) {
-    for (uint32_t i=0; i<slen; ++i) {
-            marray_push(gravity_value_t, list->array, VALUE_FROM_STRING(vm, original, 1));
-            original += 1;
+        for (uint32_t i=0; i<slen;) {
+            uint32_t n = utf8_charbytes(original, 0);
+            marray_push(gravity_value_t, list->array, VALUE_FROM_STRING(vm, original, n));
+            original += n;
+            i += n;
         }
         gravity_gc_setenabled(vm, true);
         RETURN_VALUE(VALUE_FROM_OBJECT(list), rindex);
@@ -2536,22 +2538,24 @@ static bool string_iterator (gravity_vm *vm, gravity_value_t *args, uint16_t nar
     if (!VALUE_ISA_INT(value)) RETURN_ERROR("Iterator expects a numeric value here.");
 
     // compute new value
-    gravity_int_t n = value.n;
-    if (n+1 < string->len) {
-        ++n;
+    gravity_int_t index = value.n;
+    if (index+1 < string->len) {
+        uint32_t n = utf8_charbytes(string->s + index, 0);
+        index += n;
     } else {
         RETURN_VALUE(VALUE_FROM_FALSE, rindex);
     }
 
     // return new iterator
-    RETURN_VALUE(VALUE_FROM_INT(n), rindex);
+    RETURN_VALUE(VALUE_FROM_INT(index), rindex);
 }
 
 static bool string_iterator_next (gravity_vm *vm, gravity_value_t *args, uint16_t nargs, uint32_t rindex) {
     #pragma unused(vm, nargs)
     gravity_string_t *string = VALUE_AS_STRING(GET_VALUE(0));
-    register int32_t index = (int32_t)VALUE_AS_INT(GET_VALUE(1));
-    RETURN_VALUE(VALUE_FROM_STRING(vm, string->s + index, 1), rindex);
+    int32_t index = (int32_t)VALUE_AS_INT(GET_VALUE(1));
+    uint32_t n = utf8_charbytes(string->s + index, 0);
+    RETURN_VALUE(VALUE_FROM_STRING(vm, string->s + index, n), rindex);
 }
 
 static bool string_exec (gravity_vm *vm, gravity_value_t *args, uint16_t nargs, uint32_t rindex) {

+ 2 - 2
src/shared/gravity_value.h

@@ -66,8 +66,8 @@
 extern "C" {
 #endif
 
-#define GRAVITY_VERSION						"0.6.9"     // git tag 0.6.9
-#define GRAVITY_VERSION_NUMBER				0x000609    // git push --tags
+#define GRAVITY_VERSION						"0.7.0"     // git tag 0.7.0
+#define GRAVITY_VERSION_NUMBER				0x000700    // git push --tags
 #define GRAVITY_BUILD_DATE                  __DATE__
 
 #ifndef GRAVITY_ENABLE_DOUBLE

+ 2 - 2
src/utils/gravity_utils.c

@@ -480,8 +480,8 @@ inline uint32_t utf8_charbytes (const char *s, uint32_t i) {
 
 uint32_t utf8_nbytes (uint32_t n) {
     if (n <= 0x7f) return 1;        // 127
-    if (n <= 0x7ff) return 2;        // 2047
-    if (n <= 0xffff) return 3;        // 65535
+    if (n <= 0x7ff) return 2;       // 2047
+    if (n <= 0xffff) return 3;      // 65535
     if (n <= 0x10ffff) return 4;    // 1114111
 
     return 0;

+ 10 - 0
test/unittest/split_unicode.gravity

@@ -0,0 +1,10 @@
+#unittest {
+    name: "Unicode string split.";
+    result: "£";
+};
+
+func main() {
+    var s = "£99.89";
+    var r = s.split("");
+    return r[0];
+}

+ 13 - 0
test/unittest/string_unicode_loop.gravity

@@ -0,0 +1,13 @@
+#unittest {
+    name: "Unicode string for loop.";
+    result: "£99£89£";
+};
+
+func main() {
+    var s = "£99£89£";
+    var s2 = "";
+    for (var c in s) {
+        s2 += c;
+    }
+    return s2;
+}