Browse Source

Improve json_walk API

Now, for each composite type (objects and arrays) there are two token
types: _START and _END. It is useful for `json_walk()` API, but kinda
confusing for scanf-based API: it's _END at which the value is
considered, so, client code should use `JSON_TYPE_OBJECT_END` and
`JSON_TYPE_ARRAY_END` instead of `JSON_TYPE_OBJECT` and `JSON_TYPE_ARRAY`.

Maybe I should add something like:

```
define JSON_TYPE_OBJECT JSON_TYPE_OBJECT_END
define JSON_TYPE_ARRAY JSON_TYPE_ARRAY_END
```

Let me know if I should.

PUBLISHED_FROM=6587579d1f9ac24c89e733af957e5229187ba400
Dmitry Frank 9 years ago
parent
commit
cf03b9c54a
4 changed files with 185 additions and 66 deletions
  1. 3 2
      Makefile
  2. 88 39
      frozen.c
  3. 45 16
      frozen.h
  4. 49 9
      unit_test.c

+ 3 - 2
Makefile

@@ -1,5 +1,6 @@
 PROF = -fprofile-arcs -ftest-coverage -g -O0
-CFLAGS = -W -Wall -pedantic -O3 $(PROF) $(CFLAGS_EXTRA)
+CFLAGS = -W -Wall -pedantic -O3 $(PROF) $(CFLAGS_EXTRA) -std=c99
+CXXFLAGS = -W -Wall -pedantic -O3 $(PROF) $(CFLAGS_EXTRA)
 
 .PHONY: clean all
 
@@ -13,7 +14,7 @@ c: clean
 
 c++: clean
 	rm -rf *.gc*
-	g++ unit_test.c -o unit_test $(CFLAGS) && ./unit_test
+	g++ unit_test.c -o unit_test $(CXXFLAGS) && ./unit_test
 	gcov -a unit_test.c
 
 w:

+ 88 - 39
frozen.c

@@ -68,6 +68,9 @@ struct frozen {
   const char *end;
   const char *cur;
 
+  const char *cur_name;
+  size_t cur_name_len;
+
   /* For callback API */
   char path[JSON_MAX_PATH_LEN];
   int path_len;
@@ -77,22 +80,30 @@ struct frozen {
 
 struct fstate {
   const char *ptr;
-  enum json_type type;
   int path_len;
 };
 
-#define SET_STATE(fr, ptr, type, str, len)                \
-  struct fstate fstate = {(ptr), (type), (fr)->path_len}; \
+#define SET_STATE(fr, ptr, str, len)                \
+  struct fstate fstate = {(ptr), (fr)->path_len};   \
   append_to_path((fr), (str), (len));
 
-#define CALL_BACK(fr)                                                         \
-  do {                                                                        \
-    struct json_token t = {fstate.ptr, (fr)->cur - (const char *) fstate.ptr, \
-                           fstate.type};                                      \
-    truncate_path((fr), fstate.path_len);                                     \
-    if ((fr)->callback &&                                                     \
-        ((fr)->path_len == 0 || (fr)->path[(fr)->path_len - 1] != '.'))       \
-      (fr)->callback((fr)->callback_data, (fr)->path, &t);                    \
+#define CALL_BACK(fr, tok, value, len)                                    \
+  do {                                                                    \
+    if ((fr)->callback &&                                                 \
+        ((fr)->path_len == 0 || (fr)->path[(fr)->path_len - 1] != '.')){  \
+                                                                          \
+      struct json_token t = {(value), (len), (tok)};                      \
+                                                                          \
+      /* Call the callback with the given value and current name */       \
+      (fr)->callback(                                                     \
+          (fr)->callback_data,                                            \
+          (fr)->cur_name, (fr)->cur_name_len, (fr)->path, &t              \
+          );                                                              \
+                                                                          \
+      /* Reset the name */                                                \
+      (fr)->cur_name = NULL;                                              \
+      (fr)->cur_name_len = 0;                                             \
+    }                                                                     \
   } while (0)
 
 static int append_to_path(struct frozen *f, const char *str, int size) {
@@ -116,11 +127,13 @@ static int parse_value(struct frozen *f);
   do {                              \
     if (!(cond)) return (err_code); \
   } while (0)
+
 #define TRY(expr)          \
   do {                     \
     int _n = expr;         \
     if (_n < 0) return _n; \
   } while (0)
+
 #define END_OF_STRING (-1)
 
 static int left(const struct frozen *f) {
@@ -187,12 +200,13 @@ static int get_escape_len(const char *s, int len) {
 static int parse_identifier(struct frozen *f) {
   EXPECT(is_alpha(cur(f)), JSON_STRING_INVALID);
   {
-    SET_STATE(f, f->cur, JSON_TYPE_STRING, "", 0);
+    SET_STATE(f, f->cur, "", 0);
     while (f->cur < f->end &&
            (*f->cur == '_' || is_alpha(*f->cur) || is_digit(*f->cur))) {
       f->cur++;
     }
-    CALL_BACK(f);
+    truncate_path(f, fstate.path_len);
+    CALL_BACK(f, JSON_TYPE_STRING, fstate.ptr, f->cur - fstate.ptr);
   }
   return 0;
 }
@@ -214,7 +228,7 @@ static int parse_string(struct frozen *f) {
   int n, ch = 0, len = 0;
   TRY(test_and_skip(f, '"'));
   {
-    SET_STATE(f, f->cur, JSON_TYPE_STRING, "", 0);
+    SET_STATE(f, f->cur, "", 0);
     for (; f->cur < f->end; f->cur += len) {
       ch = *(unsigned char *) f->cur;
       len = get_utf8_char_len((unsigned char) ch);
@@ -224,7 +238,8 @@ static int parse_string(struct frozen *f) {
         EXPECT((n = get_escape_len(f->cur + 1, left(f))) > 0, n);
         len += n;
       } else if (ch == '"') {
-        CALL_BACK(f);
+        truncate_path(f, fstate.path_len);
+        CALL_BACK(f, JSON_TYPE_STRING, fstate.ptr, f->cur - fstate.ptr);
         f->cur++;
         break;
       };
@@ -236,7 +251,7 @@ static int parse_string(struct frozen *f) {
 /* number = [ '-' ] digit+ [ '.' digit+ ] [ ['e'|'E'] ['+'|'-'] digit+ ] */
 static int parse_number(struct frozen *f) {
   int ch = cur(f);
-  SET_STATE(f, f->cur, JSON_TYPE_NUMBER, "", 0);
+  SET_STATE(f, f->cur, "", 0);
   if (ch == '-') f->cur++;
   EXPECT(f->cur < f->end, JSON_STRING_INCOMPLETE);
   EXPECT(is_digit(f->cur[0]), JSON_STRING_INVALID);
@@ -255,7 +270,8 @@ static int parse_number(struct frozen *f) {
     EXPECT(is_digit(f->cur[0]), JSON_STRING_INVALID);
     while (f->cur < f->end && is_digit(f->cur[0])) f->cur++;
   }
-  CALL_BACK(f);
+  truncate_path(f, fstate.path_len);
+  CALL_BACK(f, JSON_TYPE_NUMBER, fstate.ptr, f->cur - fstate.ptr);
   return 0;
 }
 
@@ -265,31 +281,36 @@ static int parse_array(struct frozen *f) {
   char buf[20];
   TRY(test_and_skip(f, '['));
   {
-    SET_STATE(f, f->cur - 1, JSON_TYPE_ARRAY, "", 0);
+    CALL_BACK(f, JSON_TYPE_ARRAY_START, NULL, 0);
+    SET_STATE(f, f->cur - 1, "", 0);
     while (cur(f) != ']') {
       snprintf(buf, sizeof(buf), "[%d]", i);
       i++;
       current_path_len = append_to_path(f, buf, strlen(buf));
+      f->cur_name = f->path + strlen(f->path) - strlen(buf) + 1/*opening brace*/;
+      f->cur_name_len = strlen(buf) - 2/*braces*/;
       TRY(parse_value(f));
       truncate_path(f, current_path_len);
       if (cur(f) == ',') f->cur++;
     }
     TRY(test_and_skip(f, ']'));
-    CALL_BACK(f);
+    truncate_path(f, fstate.path_len);
+    CALL_BACK(f, JSON_TYPE_ARRAY_END, fstate.ptr, f->cur - fstate.ptr);
   }
   return 0;
 }
 
-static int expect(struct frozen *f, const char *s, int len, enum json_type t) {
+static int expect(struct frozen *f, const char *s, int len, enum json_token_type tok_type) {
   int i, n = left(f);
-
-  SET_STATE(f, f->cur, t, "", 0);
+  SET_STATE(f, f->cur, "", 0);
   for (i = 0; i < len; i++) {
     if (i >= n) return JSON_STRING_INCOMPLETE;
     if (f->cur[i] != s[i]) return JSON_STRING_INVALID;
   }
   f->cur += len;
-  CALL_BACK(f);
+  truncate_path(f, fstate.path_len);
+
+  CALL_BACK(f, tok_type, fstate.ptr, f->cur - fstate.ptr);
 
   return 0;
 }
@@ -360,9 +381,11 @@ static int parse_pair(struct frozen *f) {
   skip_whitespaces(f);
   tok = f->cur;
   TRY(parse_key(f));
-  current_path_len =
-      append_to_path(f, *tok == '"' ? tok + 1 : tok,
-                     *tok == '"' ? f->cur - tok - 2 : f->cur - tok);
+  {
+    f->cur_name = *tok == '"' ? tok + 1 : tok;
+    f->cur_name_len = *tok == '"' ? f->cur - tok - 2 : f->cur - tok;
+    current_path_len = append_to_path(f, f->cur_name, f->cur_name_len);
+  }
   TRY(test_and_skip(f, ':'));
   TRY(parse_value(f));
   truncate_path(f, current_path_len);
@@ -373,13 +396,15 @@ static int parse_pair(struct frozen *f) {
 static int parse_object(struct frozen *f) {
   TRY(test_and_skip(f, '{'));
   {
-    SET_STATE(f, f->cur - 1, JSON_TYPE_OBJECT, ".", 1);
+    CALL_BACK(f, JSON_TYPE_OBJECT_START, NULL, 0);
+    SET_STATE(f, f->cur - 1, ".", 1);
     while (cur(f) != '}') {
       TRY(parse_pair(f));
       if (cur(f) == ',') f->cur++;
     }
     TRY(test_and_skip(f, '}'));
-    CALL_BACK(f);
+    truncate_path(f, fstate.path_len);
+    CALL_BACK(f, JSON_TYPE_OBJECT_END, fstate.ptr, f->cur - fstate.ptr);
   }
   return 0;
 }
@@ -631,9 +656,15 @@ struct scan_array_info {
   struct json_token *token;
 };
 
-static void json_scanf_array_elem_cb(void *callback_data, const char *path,
+static void json_scanf_array_elem_cb(void *callback_data,
+                                     const char *name, size_t name_len,
+                                     const char *path,
                                      const struct json_token *token) {
   struct scan_array_info *info = (struct scan_array_info *) callback_data;
+
+  (void) name;
+  (void) name_len;
+
   if (strcmp(path, info->path) == 0) {
     *info->token = *token;
   }
@@ -658,15 +689,33 @@ struct json_scanf_info {
   int type;
 };
 
-static void json_scanf_cb(void *callback_data, const char *path,
-                          const struct json_token *tok) {
+static void json_scanf_cb(void *callback_data,
+                          const char *name, size_t name_len,
+                          const char *path,
+                          const struct json_token *token)
+{
   struct json_scanf_info *info = (struct json_scanf_info *) callback_data;
-  if (strcmp(path, info->path) != 0) return;
+
+  (void) name;
+  (void) name_len;
+
+  if (strcmp(path, info->path) != 0) {
+    /* It's not the path we're looking for, so, just ignore this callback */
+    return;
+  }
+
+  if (token->ptr == NULL) {
+    /*
+     * We're not interested here in the events for which we have no value;
+     * namely, JSON_TYPE_OBJECT_START and JSON_TYPE_ARRAY_START
+     */
+    return;
+  }
 
   switch (info->type) {
     case 'B':
       info->num_conversions++;
-      *(int *) info->target = (tok->type == JSON_TYPE_TRUE ? 1 : 0);
+      *(int *) info->target = (token->type == JSON_TYPE_TRUE ? 1 : 0);
       break;
     case 'M': {
       union {
@@ -674,26 +723,26 @@ static void json_scanf_cb(void *callback_data, const char *path,
         json_scanner_t f;
       } u = {info->target};
       info->num_conversions++;
-      u.f(tok->ptr, tok->len, info->user_data);
+      u.f(token->ptr, token->len, info->user_data);
       break;
     }
     case 'Q': {
       char **dst = (char **) info->target;
       info->num_conversions++;
       /* TODO(lsm): un-escape string */
-      *dst = (char *) malloc(tok->len + 1);
+      *dst = (char *) malloc(token->len + 1);
       if (*dst != NULL) {
-        strncpy(*dst, tok->ptr, tok->len);
-        (*dst)[tok->len] = '\0';
+        strncpy(*dst, token->ptr, token->len);
+        (*dst)[token->len] = '\0';
       }
       break;
     }
     case 'T':
       info->num_conversions++;
-      *(struct json_token *) info->target = *tok;
+      *(struct json_token *) info->target = *token;
       break;
     default:
-      info->num_conversions += sscanf(tok->ptr, info->fmt, info->target);
+      info->num_conversions += sscanf(token->ptr, info->fmt, info->target);
       break;
   }
 }

+ 45 - 16
frozen.h

@@ -28,21 +28,26 @@ extern "C" {
 #include <stddef.h>
 #include <stdio.h>
 
-enum json_type {
-  JSON_TYPE_INVALID = 0,
-  JSON_TYPE_STRING = 1,
-  JSON_TYPE_NUMBER = 2,
-  JSON_TYPE_OBJECT = 3,
-  JSON_TYPE_TRUE = 4,
-  JSON_TYPE_FALSE = 5,
-  JSON_TYPE_NULL = 6,
-  JSON_TYPE_ARRAY = 7
+enum json_token_type {
+  JSON_TYPE_STRING,
+  JSON_TYPE_NUMBER,
+  JSON_TYPE_TRUE,
+  JSON_TYPE_FALSE,
+  JSON_TYPE_NULL,
+  JSON_TYPE_OBJECT_START,
+  JSON_TYPE_OBJECT_END,
+  JSON_TYPE_ARRAY_START,
+  JSON_TYPE_ARRAY_END,
+
+  JSON_TYPES_CNT,
 };
 
+#define JSON_TYPE_INVALID JSON_TYPES_CNT
+
 struct json_token {
-  const char *ptr;     /* Points to the beginning of the token */
-  int len;             /* Token length */
-  enum json_type type; /* Type of the token, possible values above */
+  const char *ptr;           /* Points to the beginning of the value */
+  int len;                   /* Value length */
+  enum json_token_type type; /* Type of the token, possible values are above */
 };
 
 #define JSON_INVALID_TOKEN {0, 0, JSON_TYPE_INVALID}
@@ -51,13 +56,37 @@ struct json_token {
 #define JSON_STRING_INVALID -1
 #define JSON_STRING_INCOMPLETE -2
 
-/* Callback-based API */
-typedef void (*json_walk_callback_t)(void *callback_data, const char *path,
+/*
+ * Callback-based SAX-like API.
+ *
+ * Property name and length is given only if it's available: i.e. if current
+ * event is an object's property. In other cases, `name` is `NULL`. For
+ * example, name is never given:
+ *   - For the first value in the JSON string;
+ *   - For events JSON_TYPE_OBJECT_END and JSON_TYPE_ARRAY_END
+ *
+ * E.g. for the input `{ "foo": 123, "bar": [ 1, 2, { "baz": true } ] }`,
+ * the sequence of callback invocations will be as follows:
+ *
+ * - type: JSON_TYPE_OBJECT_START, name: NULL, path: "", value: NULL
+ * - type: JSON_TYPE_NUMBER, name: "foo", path: ".foo", value: "123"
+ * - type: JSON_TYPE_ARRAY_START,  name: "bar", path: ".bar", value: NULL
+ * - type: JSON_TYPE_NUMBER, name: "0", path: ".bar[0]", value: "1"
+ * - type: JSON_TYPE_NUMBER, name: "1", path: ".bar[1]", value: "2"
+ * - type: JSON_TYPE_OBJECT_START, name: "2", path: ".bar[2]", value: NULL
+ * - type: JSON_TYPE_TRUE, name: "baz", path: ".bar[2].baz", value: "true"
+ * - type: JSON_TYPE_OBJECT_END, name: NULL, path: ".bar[2]", value: "{ \"baz\": true }"
+ * - type: JSON_TYPE_ARRAY_END, name: NULL, path: ".bar", value: "[ 1, 2, { \"baz\": true } ]"
+ * - type: JSON_TYPE_OBJECT_END, name: NULL, path: "", value: "{ \"foo\": 123, \"bar\": [ 1, 2, { \"baz\": true } ] }"
+ */
+typedef void (*json_walk_callback_t)(void *callback_data,
+                                     const char *name, size_t name_len,
+                                     const char *path,
                                      const struct json_token *token);
 
 /*
- * Parse `json_string`, invoking `callback` function for each JSON token.
- * Return number of bytes processed
+ * Parse `json_string`, invoking `callback` in a way similar to SAX parsers;
+ * see `json_walk_callback_t`.
  */
 int json_walk(const char *json_string, int json_string_length,
               json_walk_callback_t callback, void *callback_data);

+ 49 - 9
unit_test.c

@@ -32,6 +32,18 @@
 #include <stdlib.h>
 #include <string.h>
 
+const char *tok_type_names[] = {
+  "STRING",
+  "NUMBER",
+  "TRUE",
+  "FALSE",
+  "NULL",
+  "OBJECT_START",
+  "OBJECT_END",
+  "ARRAY_START",
+  "ARRAY_END",
+};
+
 #define FAIL(str, line)                           \
   do {                                            \
     printf("Fail on line %d: [%s]\n", line, str); \
@@ -280,18 +292,46 @@ static const char *test_system() {
   return NULL;
 }
 
-static void cb(void *data, const char *path, const struct json_token *token) {
+static void cb(void *data,
+               const char *name, size_t name_len,
+               const char *path,
+               const struct json_token *token) {
   char *buf = (char *) data;
-  sprintf(buf + strlen(buf), "%d->%s[%.*s] ", token->type, path, token->len,
-          token->ptr);
+
+  const char *snull = "<null>";
+
+  sprintf(buf + strlen(buf), "name:'%.*s', path:'%s', type:%s, val:'%.*s'\n",
+      name != NULL ? name_len : strlen(snull),
+      name != NULL ? name : snull,
+      path,
+      tok_type_names[token->type],
+      token->ptr != NULL ? token->len : strlen(snull),
+      token->ptr != NULL ? token->ptr : snull
+      );
 }
 
 static const char *test_callback_api() {
-  const char *s = "{\"c\":[{\"a\":9,\"b\":\"x\"}]}";
+  const char *s =
+    "{\"c\":[\"foo\", \"bar\", {\"a\":9, \"b\": \"x\"}], "
+    "\"mynull\": null, \"mytrue\": true, \"myfalse\": false}";
+
   const char *result =
-      "2->.c[0].a[9] 1->.c[0].b[x] 3->.c[0][{\"a\":9,\"b\":\"x\"}] "
-      "7->.c[[{\"a\":9,\"b\":\"x\"}]] 3->[{\"c\":[{\"a\":9,\"b\":\"x\"}]}] ";
-  char buf[200] = "";
+    "name:'<null>', path:'', type:OBJECT_START, val:'<null>'\n"
+    "name:'c', path:'.c', type:ARRAY_START, val:'<null>'\n"
+    "name:'0', path:'.c[0]', type:STRING, val:'foo'\n"
+    "name:'1', path:'.c[1]', type:STRING, val:'bar'\n"
+    "name:'2', path:'.c[2]', type:OBJECT_START, val:'<null>'\n"
+    "name:'a', path:'.c[2].a', type:NUMBER, val:'9'\n"
+    "name:'b', path:'.c[2].b', type:STRING, val:'x'\n"
+    "name:'<null>', path:'.c[2]', type:OBJECT_END, val:'{\"a\":9, \"b\": \"x\"}'\n"
+    "name:'<null>', path:'.c', type:ARRAY_END, val:'[\"foo\", \"bar\", {\"a\":9, \"b\": \"x\"}]'\n"
+    "name:'mynull', path:'.mynull', type:NULL, val:'null'\n"
+    "name:'mytrue', path:'.mytrue', type:TRUE, val:'true'\n"
+    "name:'myfalse', path:'.myfalse', type:FALSE, val:'false'\n"
+    "name:'<null>', path:'', type:OBJECT_END, val:'{\"c\":[\"foo\", \"bar\", {\"a\":9, \"b\": \"x\"}], \"mynull\": null, \"mytrue\": true, \"myfalse\": false}'\n"
+    ;
+
+  char buf[4096] = "";
   ASSERT(json_walk(s, strlen(s), cb, buf) == (int) strlen(s));
   ASSERT(strcmp(buf, result) == 0);
   return NULL;
@@ -345,7 +385,7 @@ static const char *test_scanf(void) {
     struct json_token t;
     memset(&t, 0, sizeof(t));
     ASSERT(json_scanf(str, strlen(str), "{b: %T}", &t) == 1);
-    ASSERT(t.type == JSON_TYPE_ARRAY);
+    ASSERT(t.type == JSON_TYPE_ARRAY_END);
     ASSERT(t.len == 7);
     ASSERT(strncmp(t.ptr, "[1,2,3]", t.len) == 0);
   }
@@ -376,7 +416,7 @@ static const char *test_scanf(void) {
     /* Scan each array element into a token */
     for (i = 0; json_scanf_array_elem(str, len, ".a", i, &t) > 0; i++) {
       /* Now scan each token */
-      ASSERT(t.type == JSON_TYPE_OBJECT);
+      ASSERT(t.type == JSON_TYPE_OBJECT_END);
       ASSERT(json_scanf(t.ptr, t.len, "{b: %d}", &value) == 1);
       ASSERT((size_t) i < sizeof(values) / sizeof(values[0]));
       ASSERT(values[i] == value);