Browse Source

Switch to json_scanf

PUBLISHED_FROM=866b06f99f2d0b459fad1367f684541ba743163c
Sergey Lyubka 9 years ago
parent
commit
7b66e94f0f
3 changed files with 39 additions and 260 deletions
  1. 15 134
      frozen.c
  2. 4 8
      frozen.h
  3. 20 118
      unit_test.c

+ 15 - 134
frozen.c

@@ -60,14 +60,6 @@ typedef unsigned _int64 uint64_t;
 #define va_copy(x, y) x = y
 #endif
 
-#ifndef FROZEN_REALLOC
-#define FROZEN_REALLOC realloc
-#endif
-
-#ifndef FROZEN_FREE
-#define FROZEN_FREE free
-#endif
-
 #ifndef JSON_MAX_PATH_LEN
 #define JSON_MAX_PATH_LEN 60
 #endif
@@ -75,10 +67,6 @@ typedef unsigned _int64 uint64_t;
 struct frozen {
   const char *end;
   const char *cur;
-  struct json_token *tokens;
-  int max_tokens;
-  int num_tokens;
-  int do_realloc;
 
   /* For callback API */
   char path[JSON_MAX_PATH_LEN];
@@ -100,7 +88,7 @@ struct fstate {
 #define CALL_BACK(fr)                                                       \
   do {                                                                      \
     struct json_token __t = {                                               \
-        fstate.ptr, (fr)->cur - (const char *) fstate.ptr, 0, fstate.type}; \
+        fstate.ptr, (fr)->cur - (const char *) fstate.ptr, fstate.type};    \
     truncate_path((fr), fstate.path_len);                                   \
     if ((fr)->callback &&                                                   \
         ((fr)->path_len == 0 || (fr)->path[(fr)->path_len - 1] != '.'))     \
@@ -203,41 +191,15 @@ static int get_escape_len(const char *s, int len) {
   }
 }
 
-static int capture_ptr(struct frozen *f, const char *ptr, enum json_type type) {
-  if (f->do_realloc && f->num_tokens >= f->max_tokens) {
-    int new_size = f->max_tokens == 0 ? 100 : f->max_tokens * 2;
-    void *p = FROZEN_REALLOC(f->tokens, new_size * sizeof(f->tokens[0]));
-    if (p == NULL) return JSON_TOKEN_ARRAY_TOO_SMALL;
-    f->max_tokens = new_size;
-    f->tokens = (struct json_token *) p;
-  }
-  if (f->tokens == NULL || f->max_tokens == 0) return 0;
-  if (f->num_tokens >= f->max_tokens) return JSON_TOKEN_ARRAY_TOO_SMALL;
-  f->tokens[f->num_tokens].ptr = ptr;
-  f->tokens[f->num_tokens].type = type;
-  f->num_tokens++;
-  return 0;
-}
-
-static int capture_len(struct frozen *f, int token_index, const char *ptr) {
-  if (f->tokens == 0 || f->max_tokens == 0) return 0;
-  EXPECT(token_index >= 0 && token_index < f->max_tokens, JSON_STRING_INVALID);
-  f->tokens[token_index].len = ptr - f->tokens[token_index].ptr;
-  f->tokens[token_index].num_desc = (f->num_tokens - 1) - token_index;
-  return 0;
-}
-
 /* identifier = letter { letter | digit | '_' } */
 static int parse_identifier(struct frozen *f) {
   EXPECT(is_alpha(cur(f)), JSON_STRING_INVALID);
   {
     SET_STATE(f, f->cur, JSON_TYPE_STRING, "", 0);
-    TRY(capture_ptr(f, f->cur, JSON_TYPE_STRING));
     while (f->cur < f->end &&
            (*f->cur == '_' || is_alpha(*f->cur) || is_digit(*f->cur))) {
       f->cur++;
     }
-    capture_len(f, f->num_tokens - 1, f->cur);
     CALL_BACK(f);
   }
   return 0;
@@ -261,7 +223,6 @@ static int parse_string(struct frozen *f) {
   TRY(test_and_skip(f, '"'));
   {
     SET_STATE(f, f->cur, JSON_TYPE_STRING, "", 0);
-    TRY(capture_ptr(f, f->cur, JSON_TYPE_STRING));
     for (; f->cur < f->end; f->cur += len) {
       ch = *(unsigned char *) f->cur;
       len = get_utf8_char_len((unsigned char) ch);
@@ -271,7 +232,6 @@ static int parse_string(struct frozen *f) {
         EXPECT((n = get_escape_len(f->cur + 1, left(f))) > 0, n);
         len += n;
       } else if (ch == '"') {
-        capture_len(f, f->num_tokens - 1, f->cur);
         CALL_BACK(f);
         f->cur++;
         break;
@@ -285,7 +245,6 @@ static int parse_string(struct frozen *f) {
 static int parse_number(struct frozen *f) {
   int ch = cur(f);
   SET_STATE(f, f->cur, JSON_TYPE_NUMBER, "", 0);
-  TRY(capture_ptr(f, f->cur, JSON_TYPE_NUMBER));
   if (ch == '-') f->cur++;
   EXPECT(f->cur < f->end, JSON_STRING_INCOMPLETE);
   EXPECT(is_digit(f->cur[0]), JSON_STRING_INVALID);
@@ -304,20 +263,17 @@ static int parse_number(struct frozen *f) {
     EXPECT(is_digit(f->cur[0]), JSON_STRING_INVALID);
     while (f->cur < f->end && is_digit(f->cur[0])) f->cur++;
   }
-  capture_len(f, f->num_tokens - 1, f->cur);
   CALL_BACK(f);
   return 0;
 }
 
 /* array = '[' [ value { ',' value } ] ']' */
 static int parse_array(struct frozen *f) {
-  int i = 0, ind, current_path_len;
+  int i = 0, current_path_len;
   char buf[20];
   TRY(test_and_skip(f, '['));
   {
     SET_STATE(f, f->cur - 1, JSON_TYPE_ARRAY, "", 0);
-    TRY(capture_ptr(f, f->cur - 1, JSON_TYPE_ARRAY));
-    ind = f->num_tokens - 1;
     while (cur(f) != ']') {
       snprintf(buf, sizeof(buf), "[%d]", i);
       i++;
@@ -327,29 +283,20 @@ static int parse_array(struct frozen *f) {
       if (cur(f) == ',') f->cur++;
     }
     TRY(test_and_skip(f, ']'));
-    capture_len(f, ind, f->cur);
     CALL_BACK(f);
   }
   return 0;
 }
 
-static int compare(const char *s, const char *str, int len) {
-  int i = 0;
-  while (i < len && s[i] == str[i]) i++;
-  return i == len ? 1 : 0;
-}
-
 static int expect(struct frozen *f, const char *s, int len, enum json_type t) {
   int i, n = left(f);
 
   SET_STATE(f, f->cur, t, "", 0);
-  TRY(capture_ptr(f, f->cur, t));
   for (i = 0; i < len; i++) {
     if (i >= n) return JSON_STRING_INCOMPLETE;
     if (f->cur[i] != s[i]) return JSON_STRING_INVALID;
   }
   f->cur += len;
-  TRY(capture_len(f, f->num_tokens - 1, f->cur));
   CALL_BACK(f);
 
   return 0;
@@ -432,18 +379,14 @@ static int parse_pair(struct frozen *f) {
 
 /* object = '{' pair { ',' pair } '}' */
 static int parse_object(struct frozen *f) {
-  int ind;
   TRY(test_and_skip(f, '{'));
   {
     SET_STATE(f, f->cur - 1, JSON_TYPE_OBJECT, ".", 1);
-    TRY(capture_ptr(f, f->cur - 1, JSON_TYPE_OBJECT));
-    ind = f->num_tokens - 1;
     while (cur(f) != '}') {
       TRY(parse_pair(f));
       if (cur(f) == ',') f->cur++;
     }
     TRY(test_and_skip(f, '}'));
-    capture_len(f, ind, f->cur);
     CALL_BACK(f);
   }
   return 0;
@@ -463,81 +406,6 @@ static int doit(struct frozen *f) {
     return ret;
   }
 
-  TRY(capture_ptr(f, f->cur, JSON_TYPE_EOF));
-  capture_len(f, f->num_tokens, f->cur);
-  return 0;
-}
-
-/* json = object */
-int parse_json(const char *s, int s_len, struct json_token *arr, int arr_len) {
-  struct frozen frozen;
-
-  memset(&frozen, 0, sizeof(frozen));
-  frozen.end = s + s_len;
-  frozen.cur = s;
-  frozen.tokens = arr;
-  frozen.max_tokens = arr_len;
-
-  TRY(doit(&frozen));
-
-  return frozen.cur - s;
-}
-
-struct json_token *parse_json2(const char *s, int s_len) {
-  struct frozen frozen;
-
-  memset(&frozen, 0, sizeof(frozen));
-  frozen.end = s + s_len;
-  frozen.cur = s;
-  frozen.do_realloc = 1;
-
-  if (doit(&frozen) < 0) {
-    FROZEN_FREE((void *) frozen.tokens);
-    frozen.tokens = NULL;
-  }
-  return frozen.tokens;
-}
-
-static int path_part_len(const char *p) {
-  int i = 0;
-  while (p[i] != '\0' && p[i] != '[' && p[i] != '.') i++;
-  return i;
-}
-
-struct json_token *find_json_token(struct json_token *toks, const char *path) {
-  while (path != 0 && path[0] != '\0') {
-    int i, ind2 = 0, ind = -1, skip = 2, n = path_part_len(path);
-    if (path[0] == '[') {
-      if (toks->type != JSON_TYPE_ARRAY || !is_digit(path[1])) return 0;
-      for (ind = 0, n = 1; path[n] != ']' && path[n] != '\0'; n++) {
-        if (!is_digit(path[n])) return 0;
-        ind *= 10;
-        ind += path[n] - '0';
-      }
-      if (path[n++] != ']') return 0;
-      skip = 1; /* In objects, we skip 2 elems while iterating, in arrays 1. */
-    } else if (toks->type != JSON_TYPE_OBJECT)
-      return 0;
-    toks++;
-    for (i = 0; i < toks[-1].num_desc; i += skip, ind2++) {
-      /* ind == -1 indicated that we're iterating an array, not object */
-      if (ind == -1 && toks[i].type != JSON_TYPE_STRING) return 0;
-      if (ind2 == ind ||
-          (ind == -1 && toks[i].len == n && compare(path, toks[i].ptr, n))) {
-        i += skip - 1;
-        break;
-      };
-      if (toks[i - 1 + skip].type == JSON_TYPE_ARRAY ||
-          toks[i - 1 + skip].type == JSON_TYPE_OBJECT) {
-        i += toks[i - 1 + skip].num_desc;
-      }
-    }
-    if (i == toks[-1].num_desc) return 0;
-    path += n;
-    if (path[0] == '.') path++;
-    if (path[0] == '\0') return &toks[i];
-    toks += i;
-  }
   return 0;
 }
 
@@ -813,6 +681,15 @@ static void json_scanf_cb_bool(void *callback_data, const char *path,
   }
 }
 
+static void json_scanf_cb_tok(void *callback_data, const char *path,
+                              const struct json_token *tok) {
+  struct json_scanf_info *info = (struct json_scanf_info *) callback_data;
+  if (strcmp(path, info->path) == 0) {
+    info->num_conversions++;
+    *(struct json_token *) info->target = *tok;
+  }
+}
+
 static void json_scanf_cb_str(void *callback_data, const char *path,
                               const struct json_token *tok) {
   struct json_scanf_info *info = (struct json_scanf_info *) callback_data;
@@ -868,6 +745,10 @@ int json_vscanf(const char *s, int len, const char *fmt, va_list ap) {
           json_parse(s, len, json_scanf_cb_func, &info);
           i += 2;
           break;
+        case 'T':
+          json_parse(s, len, json_scanf_cb_tok, &info);
+          i += 2;
+          break;
         default: {
           const char *delims = ", \t\r\n]}";
           int conv_len = strcspn(fmt + i + 1, delims) + 1;

+ 4 - 8
frozen.h

@@ -29,7 +29,7 @@ extern "C" {
 #include <stdio.h>
 
 enum json_type {
-  JSON_TYPE_EOF = 0, /* End of parsed tokens marker */
+  JSON_TYPE_INVALID = 0,
   JSON_TYPE_STRING = 1,
   JSON_TYPE_NUMBER = 2,
   JSON_TYPE_OBJECT = 3,
@@ -42,19 +42,14 @@ enum json_type {
 struct json_token {
   const char *ptr;     /* Points to the beginning of the token */
   int len;             /* Token length */
-  int num_desc;        /* For arrays and object, total number of descendants */
   enum json_type type; /* Type of the token, possible values above */
 };
 
+#define JSON_INVALID_TOKEN {0, 0, JSON_TYPE_INVALID}
+
 /* Error codes */
 #define JSON_STRING_INVALID -1
 #define JSON_STRING_INCOMPLETE -2
-#define JSON_TOKEN_ARRAY_TOO_SMALL -3
-
-int parse_json(const char *json_string, int json_string_length,
-               struct json_token *tokens_array, int size_of_tokens_array);
-struct json_token *parse_json2(const char *json_string, int string_length);
-struct json_token *find_json_token(struct json_token *toks, const char *path);
 
 /* Callback-based API */
 typedef void (*json_parse_callback_t)(void *callback_data, const char *path,
@@ -136,6 +131,7 @@ int json_printf_array(struct json_out *, va_list *ap);
  *       string is malloc-ed, caller must free() the string.
  *    - %M: consumes custom scanning function pointer and
  *       `void *user_data` parameter - see json_scanner_t definition.
+ *    - %T: consumes `struct json_token *`, fills it out with matched token.
  *
  * Return number of elements successfully scanned & converted.
  * Negative number means scan error.

+ 20 - 118
unit_test.c

@@ -53,18 +53,7 @@
 
 static int static_num_tests = 0;
 
-static int cmp_token(const struct json_token *tok, const char *str,
-                     enum json_type type) {
-#if 0
-  printf("[%.*s] [%s]\n", tok->len, tok->ptr, str);
-#endif
-  return tok->type == type && (int) strlen(str) == tok->len &&
-         memcmp(tok->ptr, str, tok->len) == 0;
-}
-
 static const char *test_errors(void) {
-  struct json_token ar[100];
-  int size = ARRAY_SIZE(ar);
   /* clang-format off */
   static const char *invalid_tests[] = {
       "1",        "a:3",           "\x01",         "{:",
@@ -121,122 +110,27 @@ static const char *test_errors(void) {
   const char *s1 =
       " { a: 1, b: \"hi there\", c: true, d: false, "
       " e : null, f: [ 1, -2, 3], g: { \"1\": [], h: [ 7 ] } } ";
-  const char *s2 =
-      "{ a: 1, b: \"hi there\", c: true, d: false, "
-      " e : null, f: [ 1, -2, 3], g: { \"1\": [], h: [ 7 ] } }";
-  const char *s3 = "{ \"1\": [], h: [ 7 ] }";
   int i;
 
-  ASSERT(parse_json(NULL, 0, NULL, 0) == JSON_STRING_INVALID);
+  ASSERT(json_parse(NULL, 0, NULL, 0) == JSON_STRING_INVALID);
   for (i = 0; invalid_tests[i] != NULL; i++) {
-    ASSERT(parse_json(invalid_tests[i], strlen(invalid_tests[i]), ar, size) ==
-           JSON_STRING_INVALID);
+    ASSERT(json_parse(invalid_tests[i], strlen(invalid_tests[i]), NULL,
+                      NULL) == JSON_STRING_INVALID);
   }
 
   for (i = 0; incomplete_tests[i] != NULL; i++) {
-    ASSERT(parse_json(incomplete_tests[i], strlen(incomplete_tests[i]), ar,
-                      size) == JSON_STRING_INCOMPLETE);
+    ASSERT(json_parse(incomplete_tests[i], strlen(incomplete_tests[i]), NULL,
+                      NULL) == JSON_STRING_INCOMPLETE);
   }
 
   for (i = 0; success_tests[i].str != NULL; i++) {
-    ASSERT(parse_json(success_tests[i].str, strlen(success_tests[i].str), ar,
-                      size) == success_tests[i].expected_len);
-  }
-
-  ASSERT(parse_json("{}", 2, ar, 1) == JSON_TOKEN_ARRAY_TOO_SMALL);
-  ASSERT(parse_json("{}", 2, ar, 2) == 2);
-  ASSERT(cmp_token(&ar[0], "{}", JSON_TYPE_OBJECT));
-  ASSERT(ar[1].type == JSON_TYPE_EOF);
-
-  ASSERT(parse_json(s1, strlen(s1), NULL, 0) > 0);
-  ASSERT(parse_json(s1, strlen(s1), ar, 10) == JSON_TOKEN_ARRAY_TOO_SMALL);
-  ASSERT(parse_json(s1, strlen(s1), ar, size) > 0);
-  ASSERT(cmp_token(&ar[0], s2, JSON_TYPE_OBJECT));
-  ASSERT(cmp_token(&ar[1], "a", JSON_TYPE_STRING));
-  ASSERT(cmp_token(&ar[2], "1", JSON_TYPE_NUMBER));
-  ASSERT(cmp_token(&ar[3], "b", JSON_TYPE_STRING));
-  ASSERT(cmp_token(&ar[4], "hi there", JSON_TYPE_STRING));
-  ASSERT(cmp_token(&ar[5], "c", JSON_TYPE_STRING));
-  ASSERT(cmp_token(&ar[6], "true", JSON_TYPE_TRUE));
-  ASSERT(cmp_token(&ar[7], "d", JSON_TYPE_STRING));
-  ASSERT(cmp_token(&ar[8], "false", JSON_TYPE_FALSE));
-  ASSERT(cmp_token(&ar[9], "e", JSON_TYPE_STRING));
-  ASSERT(cmp_token(&ar[10], "null", JSON_TYPE_NULL));
-  ASSERT(cmp_token(&ar[11], "f", JSON_TYPE_STRING));
-  ASSERT(cmp_token(&ar[12], "[ 1, -2, 3]", JSON_TYPE_ARRAY));
-  ASSERT(cmp_token(&ar[13], "1", JSON_TYPE_NUMBER));
-  ASSERT(cmp_token(&ar[14], "-2", JSON_TYPE_NUMBER));
-  ASSERT(cmp_token(&ar[15], "3", JSON_TYPE_NUMBER));
-  ASSERT(cmp_token(&ar[16], "g", JSON_TYPE_STRING));
-  ASSERT(cmp_token(&ar[17], s3, JSON_TYPE_OBJECT));
-  ASSERT(cmp_token(&ar[18], "1", JSON_TYPE_STRING));
-  ASSERT(cmp_token(&ar[19], "[]", JSON_TYPE_ARRAY));
-  ASSERT(cmp_token(&ar[20], "h", JSON_TYPE_STRING));
-  ASSERT(cmp_token(&ar[21], "[ 7 ]", JSON_TYPE_ARRAY));
-  ASSERT(cmp_token(&ar[22], "7", JSON_TYPE_NUMBER));
-  ASSERT(ar[23].type == JSON_TYPE_EOF);
-
-  ASSERT(find_json_token(ar, "a") == &ar[2]);
-  ASSERT(find_json_token(ar, "f") == &ar[12]);
-  ASSERT(find_json_token(ar, "g.h") == &ar[21]);
-  ASSERT(find_json_token(ar, "g.h[0]") == &ar[22]);
-  ASSERT(find_json_token(ar, "g.h[1]") == NULL);
-  ASSERT(find_json_token(ar, "g.h1") == NULL);
-  ASSERT(find_json_token(ar, "") == NULL);
-  ASSERT(find_json_token(ar, NULL) == NULL);
-
-  return NULL;
-}
-
-static const char *test_config(void) {
-  static const char *config_str = "{ ports: [ 80, 443 ] } ";
-  struct json_token tokens[100];
-  int tokens_size = sizeof(tokens) / sizeof(tokens[0]);
-
-  ASSERT(parse_json(config_str, strlen(config_str), tokens, tokens_size) > 0);
-  ASSERT(tokens[0].type == JSON_TYPE_OBJECT);
-  ASSERT(tokens[1].type == JSON_TYPE_STRING);
-  ASSERT(tokens[2].type == JSON_TYPE_ARRAY);
-  ASSERT(tokens[3].type == JSON_TYPE_NUMBER);
-  ASSERT(tokens[4].type == JSON_TYPE_NUMBER);
-  ASSERT(tokens[5].type == JSON_TYPE_EOF);
-
-  ASSERT(find_json_token(tokens, "ports") == &tokens[2]);
-  ASSERT(find_json_token(tokens, "ports[0]") == &tokens[3]);
-  ASSERT(find_json_token(tokens, "ports[1]") == &tokens[4]);
-  ASSERT(find_json_token(tokens, "ports[3]") == NULL);
-  ASSERT(find_json_token(tokens, "foo.bar") == NULL);
-
-  return NULL;
-}
-
-static const char *test_nested(void) {
-  struct json_token ar[100];
-  const char *s = "{ a : [ [1, 2, { b : 2 } ] ] }";
-  enum json_type types[] = {
-      JSON_TYPE_OBJECT, JSON_TYPE_STRING, JSON_TYPE_ARRAY,  JSON_TYPE_ARRAY,
-      JSON_TYPE_NUMBER, JSON_TYPE_NUMBER, JSON_TYPE_OBJECT, JSON_TYPE_STRING,
-      JSON_TYPE_NUMBER, JSON_TYPE_EOF};
-  int i, ar_size = ARRAY_SIZE(ar), types_size = ARRAY_SIZE(types);
-
-  ASSERT(parse_json(s, strlen(s), ar, ar_size) == (int) strlen(s));
-  for (i = 0; i < types_size; i++) {
-    ASSERT(ar[i].type == types[i]);
+    ASSERT(json_parse(success_tests[i].str, strlen(success_tests[i].str), NULL,
+                      NULL) == success_tests[i].expected_len);
   }
-  ASSERT(find_json_token(ar, "a[0]") == &ar[3]);
-  ASSERT(find_json_token(ar, "a[0][0]") == &ar[4]);
-  ASSERT(find_json_token(ar, "a[0][1]") == &ar[5]);
-  ASSERT(find_json_token(ar, "a[0][2]") == &ar[6]);
-  ASSERT(find_json_token(ar, "a[0][2].b") == &ar[8]);
 
-  return NULL;
-}
+  ASSERT(json_parse("{}", 2, NULL, NULL) == 2);
+  ASSERT(json_parse(s1, strlen(s1), NULL, 0) > 0);
 
-static const char *test_realloc(void) {
-  struct json_token *p;
-  ASSERT(parse_json2("{ foo: 2 }", 2) == NULL);
-  ASSERT((p = parse_json2("{ foo: 2 }", 10)) != NULL);
-  free(p);
   return NULL;
 }
 
@@ -437,15 +331,23 @@ static const char *test_scanf(void) {
     ASSERT(x == 123);
   }
 
+  {
+    /* Test that paths are utf8 */
+    const char *str = "{a: 123, b: [1,2,3]}";
+    struct json_token t;
+    memset(&t, 0, sizeof(t));
+    ASSERT(json_scanf(str, strlen(str), "{b: %T}", &t) == 1);
+    ASSERT(t.type == JSON_TYPE_ARRAY);
+    ASSERT(t.len == 7);
+    ASSERT(strncmp(t.ptr, "[1,2,3]", t.len) == 0);
+  }
+
   return NULL;
 }
 
 static const char *run_all_tests(void) {
   RUN_TEST(test_scanf);
   RUN_TEST(test_errors);
-  RUN_TEST(test_config);
-  RUN_TEST(test_nested);
-  RUN_TEST(test_realloc);
   RUN_TEST(test_json_printf);
   RUN_TEST(test_system);
   RUN_TEST(test_callback_api);