浏览代码

Parser completed

Sergey Lyubka 11 年之前
父节点
当前提交
d98d0b3948
共有 3 个文件被更改,包括 173 次插入22 次删除
  1. 93 16
      frozen.c
  2. 2 1
      frozen.h
  3. 78 5
      unit_test.c

+ 93 - 16
frozen.c

@@ -17,9 +17,6 @@
 
 #include "frozen.h"
 
-
-#include <stdio.h>  // TODO:remove this
-
 struct frozen {
   const char *end;
   const char *cur;
@@ -29,13 +26,16 @@ struct frozen {
 };
 
 static int parse_object(struct frozen *f);
+static int parse_value(struct frozen *f);
 
 #define EXPECT(cond, err_code) do { if (!(cond)) return (err_code); } while (0)
 #define TRY(expr) do { int n = expr; if (n < 0) return n; } while (0)
-#define SKIP_SPACES(f) do { skip_whitespaces(f); \
-  if (f->cur >= f->end) return JSON_STRING_INCOMPLETE; } while (0)
 #define END_OF_STRING (-1)
 
+static int left(const struct frozen *f) {
+  return f->end - f->cur;
+}
+
 static int is_space(int ch) {
   return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
 };
@@ -75,40 +75,93 @@ static int get_escape_len(const char *s, int len) {
         is_hex_digit(s[2]) && is_hex_digit(s[3]) ? 4 : JSON_STRING_INVALID;
     case '"': case '\\': case '/': case 'b':
     case 'f': case 'n': case 'r': case 't':
-      return len < 2 ? JSON_STRING_INCOMPLETE : 2;
+      return len < 2 ? JSON_STRING_INCOMPLETE : 1;
     default:
       return JSON_STRING_INVALID;
   }
 }
 
+static int capture_ptr(struct frozen *f, const char *ptr, int type) {
+  if (f->tokens == 0 || f->max_tokens == 0) return 0;
+  if (f->num_tokens >= f->max_tokens) return JSON_TOKEN_ARRAY_TOO_SMALL;
+  f->tokens[f->num_tokens].ptr = ptr;
+  f->tokens[f->num_tokens].type = type;
+  f->num_tokens++;
+  return 0;
+}
+
+static int capture_len(struct frozen *f, int token_index, const char *ptr) {
+  if (f->tokens == 0 || f->max_tokens == 0) return 0;
+  EXPECT(token_index >= 0 && token_index < f->max_tokens, JSON_STRING_INVALID);
+  f->tokens[token_index].num_children = (f->num_tokens - 1) - token_index;
+  f->tokens[token_index].len = ptr - f->tokens[token_index].ptr;
+  return 0;
+}
+
 // identifier = letter { letter | digit | '_' }
 static int parse_identifier(struct frozen *f) {
-  //printf("%s 1 [%.*s]\n", __func__, (int) (f->end - f->cur), f->cur);
   EXPECT(is_alpha(cur(f)), JSON_STRING_INVALID);
+  TRY(capture_ptr(f, f->cur, JSON_TYPE_STRING));
   while (f->cur < f->end &&
          (*f->cur == '_' || is_alpha(*f->cur) || is_digit(*f->cur))) {
     f->cur++;
   }
+  capture_len(f, f->num_tokens - 1, f->cur);
   return 0;
 }
 
 // string = '"' { quoted_printable_chars } '"'
 static int parse_string(struct frozen *f) {
-  int n, ch;
+  int n, ch = 0;
   TRY(test_and_skip(f, '"'));
-  while (++f->cur < f->end) {
-    ch = cur(f);
-    EXPECT(ch > 32 && ch < 127, JSON_STRING_INVALID);
+  TRY(capture_ptr(f, f->cur, JSON_TYPE_STRING));
+  for (; f->cur < f->end; f->cur++) {
+    ch = * (unsigned char *) f->cur;
+    EXPECT(ch >= 32 && ch <= 127, JSON_STRING_INVALID);
     if (ch == '\\') {
-      EXPECT((n = get_escape_len(f->cur + 1, f->end - f->cur)) > 0, n);
+      EXPECT((n = get_escape_len(f->cur + 1, left(f))) > 0, n);
       f->cur += n;
     } else if (ch == '"') {
+      capture_len(f, f->num_tokens - 1, f->cur);
+      f->cur++;
       break;
     };
   }
+  return ch == '"' ? 0 : JSON_STRING_INCOMPLETE;
+}
+
+// number = [ '-' ] digit { digit }
+static int parse_number(struct frozen *f) {
+  int ch = cur(f);
+  TRY(capture_ptr(f, f->cur, JSON_TYPE_NUMBER));
+  if (ch == '-') f->cur++;
+  while (f->cur < f->end && is_digit(f->cur[0])) f->cur++;
+  capture_len(f, f->num_tokens - 1, f->cur);
+  return 0;
+}
+
+// array = '[' [ value { ',' value } ] ']'
+static int parse_array(struct frozen *f) {
+  int ind;
+  TRY(test_and_skip(f, '['));
+  TRY(capture_ptr(f, f->cur - 1, JSON_TYPE_ARRAY));
+  ind = f->num_tokens - 1;
+  while (cur(f) != ']') {
+    TRY(parse_value(f));
+    if (cur(f) == ',') f->cur++;
+  }
+  TRY(test_and_skip(f, ']'));
+  capture_len(f, ind, f->cur);
   return 0;
 }
 
+static int compare(const struct frozen *f, const char *str, int len) {
+  int i = 0;
+  if (left(f) < len) return 0;
+  while (i < len && f->cur[i] == str[i]) i++;
+  return i == len ? 1 : 0;
+}
+
 // value = 'null' | 'true' | 'false' | number | string | array | object
 static int parse_value(struct frozen *f) {
   int ch = cur(f);
@@ -116,6 +169,23 @@ static int parse_value(struct frozen *f) {
     TRY(parse_string(f));
   } else if (ch == '{') {
     TRY(parse_object(f));
+  } else if (ch == '[') {
+    TRY(parse_array(f));
+  } else if (ch == 'n' && compare(f, "null", 4)) {
+    TRY(capture_ptr(f, f->cur, JSON_TYPE_NULL));
+    f->cur += 4;
+    capture_len(f, f->num_tokens - 1, f->cur);
+  } else if (ch == 't' && compare(f, "true", 4)) {
+    TRY(capture_ptr(f, f->cur, JSON_TYPE_TRUE));
+    f->cur += 4;
+    capture_len(f, f->num_tokens - 1, f->cur);
+  } else if (ch == 'f' && compare(f, "false", 5)) {
+    TRY(capture_ptr(f, f->cur, JSON_TYPE_FALSE));
+    f->cur += 5;
+    capture_len(f, f->num_tokens - 1, f->cur);
+  } else if (is_digit(ch) ||
+             (ch == '-' && f->cur + 1 < f->end && is_digit(f->cur[1]))) {
+    TRY(parse_number(f));
   } else {
     return ch == END_OF_STRING ? JSON_STRING_INCOMPLETE : JSON_STRING_INVALID;
   }
@@ -125,7 +195,9 @@ static int parse_value(struct frozen *f) {
 // key = identifier | string
 static int parse_key(struct frozen *f) {
   int ch = cur(f);
-  //printf("%s 1 [%.*s]\n", __func__, (int) (f->end - f->cur), f->cur);
+#if 0
+  printf("%s 1 [%.*s]\n", __func__, (int) (f->end - f->cur), f->cur);
+#endif
   if (is_alpha(ch)) {
     TRY(parse_identifier(f));
   } else if (ch == '"') {
@@ -144,24 +216,29 @@ static int parse_pair(struct frozen *f) {
   return 0;
 }
 
-
 // object = '{' pair { ',' pair } '}'
 static int parse_object(struct frozen *f) {
+  int ind;
   TRY(test_and_skip(f, '{'));
+  TRY(capture_ptr(f, f->cur - 1, JSON_TYPE_OBJECT));
+  ind = f->num_tokens - 1;
   while (cur(f) != '}') {
     TRY(parse_pair(f));
+    if (cur(f) == ',') f->cur++;
   }
   TRY(test_and_skip(f, '}'));
+  capture_len(f, ind, f->cur);
   return 0;
 }
 
-// number = [ '-' ] digit { digit }
-// array = '[' [ value { ',' value } ] ']'
 // json = object
 int parse_json(const char *s, int s_len, struct json_token *arr, int arr_len) {
   struct frozen frozen = { s + s_len, s, arr, arr_len, 0 };
   if (s == 0 || s_len < 0) return JSON_STRING_INVALID;
   if (s_len == 0) return JSON_STRING_INCOMPLETE;
   TRY(parse_object(&frozen));
+  TRY(capture_ptr(&frozen, frozen.cur, JSON_TYPE_EOF));
+  capture_len(&frozen, frozen.num_tokens, frozen.cur);
+
   return frozen.cur - s;
 }

+ 2 - 1
frozen.h

@@ -30,11 +30,12 @@ struct json_token {
 
 #define JSON_TYPE_EOF     0   // End of parsed tokens marker
 #define JSON_TYPE_STRING  1
-#define JSON_TYPE_NUMERIC 2
+#define JSON_TYPE_NUMBER  2
 #define JSON_TYPE_OBJECT  3
 #define JSON_TYPE_TRUE    4
 #define JSON_TYPE_FALSE   5
 #define JSON_TYPE_NULL    6
+#define JSON_TYPE_ARRAY   7
 };
 
 // Error codes

+ 78 - 5
unit_test.c

@@ -37,26 +37,42 @@
 
 static int static_num_tests = 0;
 
+static int cmp_token(const struct json_token *tok, const char *str, int type) {
 #if 0
-static int cmp_token(const struct json_token *tok, const char *str) {
-  return (int) strlen(str) == tok->len && memcmp(tok->ptr, str, tok->len) == 0;
-}
+  printf("[%.*s] [%s]\n", tok->len, tok->ptr, str);
 #endif
+  return tok->type == type && (int) strlen(str) == tok->len &&
+    memcmp(tok->ptr, str, tok->len) == 0;
+}
 
 static const char *test_errors(void) {
   struct json_token ar[100];
   int size = ARRAY_SIZE(ar);
   static const char *invalid_tests[] = {
-    "1", "a:3", "\x01", "{:", " { 1",
+    "1", "a:3", "\x01", "{:", " { 1", "{a:\"\n\"}",
     NULL
   };
   static const char *incomplete_tests[] = {
-    "", " \r\n\t", "{", " { a", "{a:", "{a:\"", " { a : \"xx",
+    "", " \r\n\t", "{", " { a", "{a:", "{a:\"", " { a : \"xx", "{a:12",
     NULL
   };
   static const struct { const char *str; int expected_len; } success_tests[] = {
+    { "{}", 2 },
+    { " { } ", 4 },
+    { "{a:1}", 5 },
+    { "{a:1}", 5 },
+    { "{a:\"\"}", 6 },
+    { "{a:\" \\n\\t\\r\"}", 13 },
+    { " {a:[1]} 123456", 8 },
+    { " {a:[]} 123456", 7 },
+    { " {a:[1,2]} 123456", 10 },
+    { "{a:1,b:2} xxxx", 9 },
+    { "{a:1,b:{},c:[{}]} xxxx", 17 },
+    { "{a:true,b:[false,null]} xxxx", 23 },
     { NULL, 0 }
   };
+  const char *s1 = " { a: 1, b: \"hi there\", c: true, d: false, "
+    " e : null, f: [ 1, -2, 3], g: { \"1\": [], h: {} } } ";
   int i;
 
   ASSERT(parse_json(NULL, 0, NULL, 0) == JSON_STRING_INVALID);
@@ -75,6 +91,63 @@ static const char *test_errors(void) {
                       ar, size) == success_tests[i].expected_len);
   }
 
+  ASSERT(parse_json("{}", 2, ar, 1) == JSON_TOKEN_ARRAY_TOO_SMALL);
+  ASSERT(parse_json("{}", 2, ar, 2) == 2);
+  ASSERT(cmp_token(&ar[0], "{}", JSON_TYPE_OBJECT));
+  ASSERT(ar[1].type == JSON_TYPE_EOF);
+
+  ASSERT(parse_json(s1, strlen(s1), NULL, 0) > 0);
+  ASSERT(parse_json(s1, strlen(s1), ar, 10) == JSON_TOKEN_ARRAY_TOO_SMALL);
+  ASSERT(parse_json(s1, strlen(s1), ar, size) > 0);
+  ASSERT(cmp_token(&ar[0], "{ a: 1, b: \"hi there\", c: true, d: false, "
+                   " e : null, f: [ 1, -2, 3], g: { \"1\": [], h: {} } }",
+                   JSON_TYPE_OBJECT));
+  ASSERT(cmp_token(&ar[1], "a", JSON_TYPE_STRING));
+  ASSERT(cmp_token(&ar[2], "1", JSON_TYPE_NUMBER));
+  ASSERT(cmp_token(&ar[3], "b", JSON_TYPE_STRING));
+  ASSERT(cmp_token(&ar[4], "hi there", JSON_TYPE_STRING));
+  ASSERT(cmp_token(&ar[5], "c", JSON_TYPE_STRING));
+  ASSERT(cmp_token(&ar[6], "true", JSON_TYPE_TRUE));
+  ASSERT(cmp_token(&ar[7], "d", JSON_TYPE_STRING));
+  ASSERT(cmp_token(&ar[8], "false", JSON_TYPE_FALSE));
+  ASSERT(cmp_token(&ar[9], "e", JSON_TYPE_STRING));
+  ASSERT(cmp_token(&ar[10], "null", JSON_TYPE_NULL));
+  ASSERT(cmp_token(&ar[11], "f", JSON_TYPE_STRING));
+  ASSERT(cmp_token(&ar[12], "[ 1, -2, 3]", JSON_TYPE_ARRAY));
+  ASSERT(cmp_token(&ar[13], "1", JSON_TYPE_NUMBER));
+  ASSERT(cmp_token(&ar[14], "-2", JSON_TYPE_NUMBER));
+  ASSERT(cmp_token(&ar[15], "3", JSON_TYPE_NUMBER));
+  ASSERT(cmp_token(&ar[16], "g", JSON_TYPE_STRING));
+  ASSERT(cmp_token(&ar[17], "{ \"1\": [], h: {} }" , JSON_TYPE_OBJECT));
+  ASSERT(cmp_token(&ar[18], "1", JSON_TYPE_STRING));
+  ASSERT(cmp_token(&ar[19], "[]", JSON_TYPE_ARRAY));
+  ASSERT(cmp_token(&ar[20], "h", JSON_TYPE_STRING));
+  ASSERT(cmp_token(&ar[21], "{}", JSON_TYPE_OBJECT));
+  ASSERT(ar[22].type == JSON_TYPE_EOF);
+
+  ASSERT(ar[0].num_children == 21);
+  ASSERT(ar[1].num_children == 0);
+  ASSERT(ar[2].num_children == 0);
+  ASSERT(ar[3].num_children == 0);
+  ASSERT(ar[4].num_children == 0);
+  ASSERT(ar[5].num_children == 0);
+  ASSERT(ar[6].num_children == 0);
+  ASSERT(ar[7].num_children == 0);
+  ASSERT(ar[8].num_children == 0);
+  ASSERT(ar[9].num_children == 0);
+  ASSERT(ar[10].num_children == 0);
+  ASSERT(ar[11].num_children == 0);
+  ASSERT(ar[12].num_children == 3);
+  ASSERT(ar[13].num_children == 0);
+  ASSERT(ar[14].num_children == 0);
+  ASSERT(ar[15].num_children == 0);
+  ASSERT(ar[16].num_children == 0);
+  ASSERT(ar[17].num_children == 4);
+  ASSERT(ar[18].num_children == 0);
+  ASSERT(ar[19].num_children == 0);
+  ASSERT(ar[20].num_children == 0);
+  ASSERT(ar[21].num_children == 0);
+
   return NULL;
 }