jimp.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448
  1. // Prototype of an Immediate Deserialization idea. Expect this API to change a lot.
  2. #ifndef JIMP_H_
  3. #define JIMP_H_
  4. #include <assert.h>
  5. #include <stdio.h>
  6. #include <stdlib.h>
  7. #include <stdbool.h>
  8. #include <stdarg.h>
  9. #include <string.h>
  10. #include <ctype.h>
  11. // TODO: move all diagnostics reporting outside of the library
  12. // So the user has more options on how to report things
  13. typedef enum {
  14. JIMP_INVALID,
  15. JIMP_EOF,
  16. // Puncts
  17. JIMP_OCURLY,
  18. JIMP_CCURLY,
  19. JIMP_OBRACKET,
  20. JIMP_CBRACKET,
  21. JIMP_COMMA,
  22. JIMP_COLON,
  23. // Symbols
  24. JIMP_TRUE,
  25. JIMP_FALSE,
  26. JIMP_NULL,
  27. // Values
  28. JIMP_STRING,
  29. JIMP_NUMBER,
  30. } Jimp_Token;
  31. typedef struct {
  32. const char *file_path;
  33. const char *start;
  34. const char *end;
  35. const char *point;
  36. Jimp_Token token;
  37. const char *token_start; // TODO: `token_start` is primarily used for diagnostics location. Rename it accordingly.
  38. char *string;
  39. size_t string_count;
  40. size_t string_capacity;
  41. double number;
  42. bool boolean;
  43. } Jimp;
  44. // TODO: how do null-s fit into this entire system?
  45. void jimp_begin(Jimp *jimp, const char *file_path, const char *input, size_t input_size);
  46. /// If succeeds puts the freshly parsed boolean into jimp->boolean.
  47. /// Any consequent calls to the jimp_* functions may invalidate jimp->boolean.
  48. bool jimp_bool(Jimp *jimp);
  49. /// If succeeds puts the freshly parsed number into jimp->number.
  50. /// Any consequent calls to the jimp_* functions may invalidate jimp->number.
  51. bool jimp_number(Jimp *jimp);
  52. /// If succeeds puts the freshly parsed string into jimp->string as a NULL-terminated string.
  53. /// Any consequent calls to the jimp_* functions may invalidate jimp->string.
  54. /// strdup it if you don't wanna lose it (memory management is on you at that point).
  55. bool jimp_string(Jimp *jimp);
  56. /// Parses the beginning of the object `{`
  57. bool jimp_object_begin(Jimp *jimp);
  58. /// If succeeds puts the key of the member into jimp->string as a NULL-terminated string.
  59. /// Any consequent calls to the jimp_* functions may invalidate jimp->string.
  60. /// strdup it if you don't wanna lose it (memory management is on you at that point).
  61. bool jimp_object_member(Jimp *jimp);
  62. /// Parses the end of the object `}`
  63. bool jimp_object_end(Jimp *jimp);
  64. /// Reports jimp->string as an unknown member. jimp->string is expected to be populated by
  65. /// jimp_object_member.
  66. void jimp_unknown_member(Jimp *jimp);
  67. /// Parses the beginning of the array `[`
  68. bool jimp_array_begin(Jimp *jimp);
  69. /// Checks whether there is any more items in the array.
  70. bool jimp_array_item(Jimp *jimp);
  71. /// Parses the end of the array `]`
  72. bool jimp_array_end(Jimp *jimp);
  73. /// Prints diagnostic at the current position of the parser.
  74. void jimp_diagf(Jimp *jimp, const char *fmt, ...);
  75. bool jimp_is_null_ahead(Jimp *jimp);
  76. bool jimp_is_bool_ahead(Jimp *jimp);
  77. bool jimp_is_number_ahead(Jimp *jimp);
  78. bool jimp_is_string_ahead(Jimp *jimp);
  79. bool jimp_is_array_ahead(Jimp *jimp);
  80. bool jimp_is_object_ahead(Jimp *jimp);
  81. #endif // JIMP_H_
  82. #ifdef JIMP_IMPLEMENTATION
  83. static bool jimp__expect_token(Jimp *jimp, Jimp_Token token);
  84. static bool jimp__get_and_expect_token(Jimp *jimp, Jimp_Token token);
  85. static const char *jimp__token_kind(Jimp_Token token);
  86. static bool jimp__get_token(Jimp *jimp);
  87. static void jimp__skip_whitespaces(Jimp *jimp);
  88. static void jimp__append_to_string(Jimp *jimp, char x);
  89. static void jimp__append_to_string(Jimp *jimp, char x)
  90. {
  91. if (jimp->string_count >= jimp->string_capacity) {
  92. if (jimp->string_capacity == 0) jimp->string_capacity = 1024;
  93. else jimp->string_capacity *= 2;
  94. jimp->string = realloc(jimp->string, jimp->string_capacity);
  95. }
  96. jimp->string[jimp->string_count++] = x;
  97. }
  98. static void jimp__skip_whitespaces(Jimp *jimp)
  99. {
  100. while (jimp->point < jimp->end && isspace(*jimp->point)) {
  101. jimp->point += 1;
  102. }
  103. }
  104. static Jimp_Token jimp__puncts[256] = {
  105. ['{'] = JIMP_OCURLY,
  106. ['}'] = JIMP_CCURLY,
  107. ['['] = JIMP_OBRACKET,
  108. [']'] = JIMP_CBRACKET,
  109. [','] = JIMP_COMMA,
  110. [':'] = JIMP_COLON,
  111. };
  112. static struct {
  113. Jimp_Token token;
  114. const char *symbol;
  115. } jimp__symbols[] = {
  116. { .token = JIMP_TRUE, .symbol = "true" },
  117. { .token = JIMP_FALSE, .symbol = "false" },
  118. { .token = JIMP_NULL, .symbol = "null" },
  119. };
  120. #define jimp__symbols_count (sizeof(jimp__symbols)/sizeof(jimp__symbols[0]))
  121. static bool jimp__get_token(Jimp *jimp)
  122. {
  123. jimp__skip_whitespaces(jimp);
  124. jimp->token_start = jimp->point;
  125. if (jimp->point >= jimp->end) {
  126. jimp->token = JIMP_EOF;
  127. return false;
  128. }
  129. jimp->token = jimp__puncts[(unsigned char)*jimp->point];
  130. if (jimp->token) {
  131. jimp->point += 1;
  132. return true;
  133. }
  134. for (size_t i = 0; i < jimp__symbols_count; ++i) {
  135. const char *symbol = jimp__symbols[i].symbol;
  136. if (*symbol == *jimp->point) {
  137. while (*symbol && jimp->point < jimp->end && *symbol++ == *jimp->point++) {}
  138. if (*symbol) {
  139. jimp->token = JIMP_INVALID;
  140. jimp_diagf(jimp, "ERROR: invalid symbol\n");
  141. return false;
  142. } else {
  143. jimp->token = jimp__symbols[i].token;
  144. return true;
  145. }
  146. }
  147. }
  148. char *endptr = NULL;
  149. jimp->number = strtod(jimp->point, &endptr); // TODO: This implies that jimp->end is a valid address and *jimp->end == 0
  150. if (jimp->point != endptr) {
  151. jimp->point = endptr;
  152. jimp->token = JIMP_NUMBER;
  153. return true;
  154. }
  155. if (*jimp->point == '"') {
  156. jimp->point++;
  157. jimp->string_count = 0;
  158. while (jimp->point < jimp->end) {
  159. // TODO: support all the JSON escape sequences defined in the spec
  160. // Yes, including those dumb suroggate pairs. Spec is spec.
  161. switch (*jimp->point) {
  162. case '\\': {
  163. jimp->point++;
  164. if (jimp->point >= jimp->end) {
  165. jimp->token_start = jimp->point;
  166. jimp_diagf(jimp, "ERROR: unfinished escape sequence\n");
  167. return false;
  168. }
  169. switch (*jimp->point) {
  170. case 'r':
  171. jimp->point++;
  172. jimp__append_to_string(jimp, '\r');
  173. break;
  174. case 'n':
  175. jimp->point++;
  176. jimp__append_to_string(jimp, '\n');
  177. break;
  178. case 't':
  179. jimp->point++;
  180. jimp__append_to_string(jimp, '\t');
  181. break;
  182. case '\\':
  183. jimp->point++;
  184. jimp__append_to_string(jimp, '\\');
  185. break;
  186. case '"':
  187. jimp->point++;
  188. jimp__append_to_string(jimp, '"');
  189. break;
  190. default:
  191. jimp->token_start = jimp->point;
  192. jimp_diagf(jimp, "ERROR: invalid escape sequence\n");
  193. return false;
  194. }
  195. break;
  196. }
  197. case '"': {
  198. jimp->point++;
  199. jimp__append_to_string(jimp, '\0');
  200. jimp->token = JIMP_STRING;
  201. return true;
  202. }
  203. default: {
  204. char x = *jimp->point++;
  205. jimp__append_to_string(jimp, x);
  206. }
  207. }
  208. }
  209. jimp->token = JIMP_INVALID;
  210. jimp_diagf(jimp, "ERROR: unfinished string\n");
  211. return false;
  212. }
  213. jimp->token = JIMP_INVALID;
  214. jimp_diagf(jimp, "ERROR: invalid token\n");
  215. return false;
  216. }
  217. void jimp_begin(Jimp *jimp, const char *file_path, const char *input, size_t input_size)
  218. {
  219. jimp->file_path = file_path;
  220. jimp->start = input;
  221. jimp->end = input + input_size;
  222. jimp->point = input;
  223. }
  224. void jimp_diagf(Jimp *jimp, const char *fmt, ...)
  225. {
  226. long line_number = 0;
  227. const char *line_start = jimp->start;
  228. const char *point = jimp->start;
  229. while (point < jimp->token_start) {
  230. char x = *point++;
  231. if (x == '\n') {
  232. line_start = point;
  233. line_number += 1;
  234. }
  235. }
  236. fprintf(stderr, "%s:%ld:%ld: ", jimp->file_path, line_number + 1, point - line_start + 1);
  237. va_list args;
  238. va_start(args, fmt);
  239. vfprintf(stderr, fmt, args);
  240. va_end(args);
  241. }
  242. static const char *jimp__token_kind(Jimp_Token token)
  243. {
  244. switch (token) {
  245. case JIMP_EOF: return "end of input";
  246. case JIMP_INVALID: return "invalid";
  247. case JIMP_OCURLY: return "{";
  248. case JIMP_CCURLY: return "}";
  249. case JIMP_OBRACKET: return "[";
  250. case JIMP_CBRACKET: return "]";
  251. case JIMP_COMMA: return ",";
  252. case JIMP_COLON: return ":";
  253. case JIMP_TRUE: return "true";
  254. case JIMP_FALSE: return "false";
  255. case JIMP_NULL: return "null";
  256. case JIMP_STRING: return "string";
  257. case JIMP_NUMBER: return "number";
  258. }
  259. assert(0 && "unreachable");
  260. return NULL;
  261. }
  262. bool jimp_array_begin(Jimp *jimp)
  263. {
  264. return jimp__get_and_expect_token(jimp, JIMP_OBRACKET);
  265. }
  266. bool jimp_array_end(Jimp *jimp)
  267. {
  268. return jimp__get_and_expect_token(jimp, JIMP_CBRACKET);
  269. }
  270. bool jimp_array_item(Jimp *jimp)
  271. {
  272. const char *point = jimp->point;
  273. if (!jimp__get_token(jimp)) return false;
  274. if (jimp->token == JIMP_COMMA) return true;
  275. if (jimp->token == JIMP_CBRACKET) {
  276. jimp->point = point;
  277. return false;
  278. }
  279. jimp->point = point;
  280. return true;
  281. }
  282. void jimp_unknown_member(Jimp *jimp)
  283. {
  284. jimp_diagf(jimp, "ERROR: unexpected object member `%s`\n", jimp->string);
  285. }
  286. bool jimp_object_begin(Jimp *jimp)
  287. {
  288. return jimp__get_and_expect_token(jimp, JIMP_OCURLY);
  289. }
  290. bool jimp_object_member(Jimp *jimp)
  291. {
  292. const char *point = jimp->point;
  293. if (!jimp__get_token(jimp)) return false;
  294. if (jimp->token == JIMP_COMMA) {
  295. if (!jimp__get_and_expect_token(jimp, JIMP_STRING)) return false;
  296. if (!jimp__get_and_expect_token(jimp, JIMP_COLON)) return false;
  297. return true;
  298. }
  299. if (jimp->token == JIMP_CCURLY) {
  300. jimp->point = point;
  301. return false;
  302. }
  303. if (!jimp__expect_token(jimp, JIMP_STRING)) return false;
  304. if (!jimp__get_and_expect_token(jimp, JIMP_COLON)) return false;
  305. return true;
  306. }
  307. bool jimp_object_end(Jimp *jimp)
  308. {
  309. return jimp__get_and_expect_token(jimp, JIMP_CCURLY);
  310. }
  311. bool jimp_string(Jimp *jimp)
  312. {
  313. return jimp__get_and_expect_token(jimp, JIMP_STRING);
  314. }
  315. bool jimp_bool(Jimp *jimp)
  316. {
  317. jimp__get_token(jimp);
  318. if (jimp->token == JIMP_TRUE) {
  319. jimp->boolean = true;
  320. } else if (jimp->token == JIMP_FALSE) {
  321. jimp->boolean = false;
  322. } else {
  323. jimp_diagf(jimp, "ERROR: expected boolean, but got `%s`\n", jimp__token_kind(jimp->token));
  324. return false;
  325. }
  326. return true;
  327. }
  328. bool jimp_number(Jimp *jimp)
  329. {
  330. return jimp__get_and_expect_token(jimp, JIMP_NUMBER);
  331. }
  332. bool jimp_is_null_ahead(Jimp *jimp)
  333. {
  334. const char *point = jimp->point;
  335. if (!jimp__get_token(jimp)) return false;
  336. jimp->point = point;
  337. return jimp->token == JIMP_NULL;
  338. }
  339. bool jimp_is_bool_ahead(Jimp *jimp)
  340. {
  341. const char *point = jimp->point;
  342. if (!jimp__get_token(jimp)) return false;
  343. jimp->point = point;
  344. return jimp->token == JIMP_TRUE || jimp->token == JIMP_FALSE;
  345. }
  346. bool jimp_is_number_ahead(Jimp *jimp)
  347. {
  348. const char *point = jimp->point;
  349. if (!jimp__get_token(jimp)) return false;
  350. jimp->point = point;
  351. return jimp->token == JIMP_NUMBER;
  352. }
  353. bool jimp_is_string_ahead(Jimp *jimp)
  354. {
  355. const char *point = jimp->point;
  356. if (!jimp__get_token(jimp)) return false;
  357. jimp->point = point;
  358. return jimp->token == JIMP_STRING;
  359. }
  360. bool jimp_is_array_ahead(Jimp *jimp)
  361. {
  362. const char *point = jimp->point;
  363. if (!jimp__get_token(jimp)) return false;
  364. jimp->point = point;
  365. return jimp->token == JIMP_OBRACKET;
  366. }
  367. bool jimp_is_object_ahead(Jimp *jimp)
  368. {
  369. const char *point = jimp->point;
  370. if (!jimp__get_token(jimp)) return false;
  371. jimp->point = point;
  372. return jimp->token == JIMP_OCURLY;
  373. }
  374. static bool jimp__get_and_expect_token(Jimp *jimp, Jimp_Token token)
  375. {
  376. if (!jimp__get_token(jimp)) return false;
  377. return jimp__expect_token(jimp, token);
  378. }
  379. static bool jimp__expect_token(Jimp *jimp, Jimp_Token token)
  380. {
  381. if (jimp->token != token) {
  382. jimp_diagf(jimp, "ERROR: expected %s, but got %s\n", jimp__token_kind(token), jimp__token_kind(jimp->token));
  383. return false;
  384. }
  385. return true;
  386. }
  387. #endif // JIMP_IMPLEMENTATION