lua_cjson.c 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256
  1. /* CJSON - JSON support for Lua
  2. *
  3. * Copyright (c) 2010-2011 Mark Pulford <[email protected]>
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining
  6. * a copy of this software and associated documentation files (the
  7. * "Software"), to deal in the Software without restriction, including
  8. * without limitation the rights to use, copy, modify, merge, publish,
  9. * distribute, sublicense, and/or sell copies of the Software, and to
  10. * permit persons to whom the Software is furnished to do so, subject to
  11. * the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be
  14. * included in all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  17. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  19. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  20. * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23. */
  24. /* Caveats:
  25. * - JSON "null" values are represented as lightuserdata since Lua
  26. * tables cannot contain "nil". Compare with cjson.null.
  27. * - Invalid UTF-8 characters are not detected and will be passed
  28. * untouched.
  29. * - Javascript comments are not part of the JSON spec, and are not
  30. * supported.
  31. *
  32. * Note: Decoding is slower than encoding. Lua spends significant
  33. * time (30%) managing tables when parsing JSON since it is
  34. * difficult to know object/array sizes ahead of time.
  35. */
  36. #include <assert.h>
  37. #include <string.h>
  38. #include <math.h>
  39. #include <lua.h>
  40. #include <lauxlib.h>
  41. #include "strbuf.h"
  42. #define DEFAULT_SPARSE_CONVERT 0
  43. #define DEFAULT_SPARSE_RATIO 2
  44. #define DEFAULT_SPARSE_SAFE 10
  45. #define DEFAULT_MAX_DEPTH 20
  46. #define DEFAULT_ENCODE_REFUSE_BADNUM 1
  47. #define DEFAULT_DECODE_REFUSE_BADNUM 0
  48. #define DEFAULT_ENCODE_KEEP_BUFFER 1
  49. typedef enum {
  50. T_OBJ_BEGIN,
  51. T_OBJ_END,
  52. T_ARR_BEGIN,
  53. T_ARR_END,
  54. T_STRING,
  55. T_NUMBER,
  56. T_BOOLEAN,
  57. T_NULL,
  58. T_COLON,
  59. T_COMMA,
  60. T_END,
  61. T_WHITESPACE,
  62. T_ERROR,
  63. T_UNKNOWN
  64. } json_token_type_t;
  65. static const char *json_token_type_name[] = {
  66. "T_OBJ_BEGIN",
  67. "T_OBJ_END",
  68. "T_ARR_BEGIN",
  69. "T_ARR_END",
  70. "T_STRING",
  71. "T_NUMBER",
  72. "T_BOOLEAN",
  73. "T_NULL",
  74. "T_COLON",
  75. "T_COMMA",
  76. "T_END",
  77. "T_WHITESPACE",
  78. "T_ERROR",
  79. "T_UNKNOWN",
  80. NULL
  81. };
  82. typedef struct {
  83. json_token_type_t ch2token[256];
  84. char escape2char[256]; /* Decoding */
  85. #if 0
  86. char escapes[35][8]; /* Pre-generated escape string buffer */
  87. char *char2escape[256]; /* Encoding */
  88. #endif
  89. strbuf_t encode_buf;
  90. int current_depth;
  91. int encode_sparse_convert;
  92. int encode_sparse_ratio;
  93. int encode_sparse_safe;
  94. int encode_max_depth;
  95. int encode_refuse_badnum;
  96. int decode_refuse_badnum;
  97. int encode_keep_buffer;
  98. } json_config_t;
  99. typedef struct {
  100. const char *data;
  101. int index;
  102. strbuf_t *tmp; /* Temporary storage for strings */
  103. json_config_t *cfg;
  104. } json_parse_t;
  105. typedef struct {
  106. json_token_type_t type;
  107. int index;
  108. union {
  109. const char *string;
  110. double number;
  111. int boolean;
  112. } value;
  113. int string_len;
  114. } json_token_t;
  115. static const char *char2escape[256] = {
  116. "\\u0000", "\\u0001", "\\u0002", "\\u0003",
  117. "\\u0004", "\\u0005", "\\u0006", "\\u0007",
  118. "\\b", "\\t", "\\n", "\\u000b",
  119. "\\f", "\\r", "\\u000e", "\\u000f",
  120. "\\u0010", "\\u0011", "\\u0012", "\\u0013",
  121. "\\u0014", "\\u0015", "\\u0016", "\\u0017",
  122. "\\u0018", "\\u0019", "\\u001a", "\\u001b",
  123. "\\u001c", "\\u001d", "\\u001e", "\\u001f",
  124. NULL, NULL, "\\\"", NULL, NULL, NULL, NULL, NULL,
  125. NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\/",
  126. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  127. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  128. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  129. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  130. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  131. NULL, NULL, NULL, NULL, "\\\\", NULL, NULL, NULL,
  132. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  133. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  134. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  135. NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\u007f",
  136. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  137. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  138. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  139. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  140. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  141. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  142. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  143. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  144. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  145. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  146. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  147. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  148. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  149. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  150. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  151. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  152. };
  153. static int json_config_key;
  154. /* ===== CONFIGURATION ===== */
  155. static json_config_t *json_fetch_config(lua_State *l)
  156. {
  157. json_config_t *cfg;
  158. lua_pushlightuserdata(l, &json_config_key);
  159. lua_gettable(l, LUA_REGISTRYINDEX);
  160. cfg = lua_touserdata(l, -1);
  161. if (!cfg)
  162. luaL_error(l, "BUG: Unable to fetch CJSON configuration");
  163. lua_pop(l, 1);
  164. return cfg;
  165. }
  166. static void json_verify_arg_count(lua_State *l, int args)
  167. {
  168. luaL_argcheck(l, lua_gettop(l) <= args, args + 1,
  169. "found too many arguments");
  170. }
  171. /* Configures handling of extremely sparse arrays:
  172. * convert: Convert extremely sparse arrays into objects? Otherwise error.
  173. * ratio: 0: always allow sparse; 1: never allow sparse; >1: use ratio
  174. * safe: Always use an array when the max index <= safe */
  175. static int json_cfg_encode_sparse_array(lua_State *l)
  176. {
  177. json_config_t *cfg;
  178. int val;
  179. json_verify_arg_count(l, 3);
  180. cfg = json_fetch_config(l);
  181. switch (lua_gettop(l)) {
  182. case 3:
  183. val = luaL_checkinteger(l, 3);
  184. luaL_argcheck(l, val >= 0, 3, "expected integer >= 0");
  185. cfg->encode_sparse_safe = val;
  186. case 2:
  187. val = luaL_checkinteger(l, 2);
  188. luaL_argcheck(l, val >= 0, 2, "expected integer >= 0");
  189. cfg->encode_sparse_ratio = val;
  190. case 1:
  191. luaL_argcheck(l, lua_isboolean(l, 1), 1, "expected boolean");
  192. cfg->encode_sparse_convert = lua_toboolean(l, 1);
  193. }
  194. lua_pushboolean(l, cfg->encode_sparse_convert);
  195. lua_pushinteger(l, cfg->encode_sparse_ratio);
  196. lua_pushinteger(l, cfg->encode_sparse_safe);
  197. return 3;
  198. }
  199. /* Configures the maximum number of nested arrays/objects allowed when
  200. * encoding */
  201. static int json_cfg_encode_max_depth(lua_State *l)
  202. {
  203. json_config_t *cfg;
  204. int depth;
  205. json_verify_arg_count(l, 1);
  206. cfg = json_fetch_config(l);
  207. if (lua_gettop(l)) {
  208. depth = luaL_checkinteger(l, 1);
  209. luaL_argcheck(l, depth > 0, 1, "expected positive integer");
  210. cfg->encode_max_depth = depth;
  211. }
  212. lua_pushinteger(l, cfg->encode_max_depth);
  213. return 1;
  214. }
  215. /* Configures JSON encoding buffer persistence */
  216. static int json_cfg_encode_keep_buffer(lua_State *l)
  217. {
  218. json_config_t *cfg;
  219. json_verify_arg_count(l, 1);
  220. cfg = json_fetch_config(l);
  221. if (lua_gettop(l)) {
  222. luaL_checktype(l, 1, LUA_TBOOLEAN);
  223. cfg->encode_keep_buffer = lua_toboolean(l, 1);
  224. }
  225. lua_pushboolean(l, cfg->encode_keep_buffer);
  226. return 1;
  227. }
  228. /* On argument: decode enum and set config variables
  229. * **options must point to a NULL terminated array of 4 enums
  230. * Returns: current enum value */
  231. static void json_enum_option(lua_State *l, const char **options,
  232. int *opt1, int *opt2)
  233. {
  234. int setting;
  235. if (lua_gettop(l)) {
  236. if (lua_isboolean(l, 1))
  237. setting = lua_toboolean(l, 1) * 3;
  238. else
  239. setting = luaL_checkoption(l, 1, NULL, options);
  240. *opt1 = setting & 1 ? 1 : 0;
  241. *opt2 = setting & 2 ? 1 : 0;
  242. } else {
  243. setting = *opt1 | (*opt2 << 1);
  244. }
  245. if (setting)
  246. lua_pushstring(l, options[setting]);
  247. else
  248. lua_pushboolean(l, 0);
  249. }
  250. /* When enabled, rejects: NaN, Infinity, hexidecimal numbers */
  251. static int json_cfg_refuse_invalid_numbers(lua_State *l)
  252. {
  253. static const char *options_enc_dec[] = { "none", "encode", "decode",
  254. "both", NULL };
  255. json_config_t *cfg;
  256. json_verify_arg_count(l, 1);
  257. cfg = json_fetch_config(l);
  258. json_enum_option(l, options_enc_dec,
  259. &cfg->encode_refuse_badnum,
  260. &cfg->decode_refuse_badnum);
  261. return 1;
  262. }
  263. static int json_destroy_config(lua_State *l)
  264. {
  265. json_config_t *cfg;
  266. cfg = lua_touserdata(l, 1);
  267. if (cfg)
  268. strbuf_free(&cfg->encode_buf);
  269. cfg = NULL;
  270. return 0;
  271. }
  272. static void json_create_config(lua_State *l)
  273. {
  274. json_config_t *cfg;
  275. int i;
  276. cfg = lua_newuserdata(l, sizeof(*cfg));
  277. /* Create GC method to clean up strbuf */
  278. lua_newtable(l);
  279. lua_pushcfunction(l, json_destroy_config);
  280. lua_setfield(l, -2, "__gc");
  281. lua_setmetatable(l, -2);
  282. strbuf_init(&cfg->encode_buf, 0);
  283. cfg->encode_sparse_convert = DEFAULT_SPARSE_CONVERT;
  284. cfg->encode_sparse_ratio = DEFAULT_SPARSE_RATIO;
  285. cfg->encode_sparse_safe = DEFAULT_SPARSE_SAFE;
  286. cfg->encode_max_depth = DEFAULT_MAX_DEPTH;
  287. cfg->encode_refuse_badnum = DEFAULT_ENCODE_REFUSE_BADNUM;
  288. cfg->decode_refuse_badnum = DEFAULT_DECODE_REFUSE_BADNUM;
  289. cfg->encode_keep_buffer = DEFAULT_ENCODE_KEEP_BUFFER;
  290. /* Decoding init */
  291. /* Tag all characters as an error */
  292. for (i = 0; i < 256; i++)
  293. cfg->ch2token[i] = T_ERROR;
  294. /* Set tokens that require no further processing */
  295. cfg->ch2token['{'] = T_OBJ_BEGIN;
  296. cfg->ch2token['}'] = T_OBJ_END;
  297. cfg->ch2token['['] = T_ARR_BEGIN;
  298. cfg->ch2token[']'] = T_ARR_END;
  299. cfg->ch2token[','] = T_COMMA;
  300. cfg->ch2token[':'] = T_COLON;
  301. cfg->ch2token['\0'] = T_END;
  302. cfg->ch2token[' '] = T_WHITESPACE;
  303. cfg->ch2token['\t'] = T_WHITESPACE;
  304. cfg->ch2token['\n'] = T_WHITESPACE;
  305. cfg->ch2token['\r'] = T_WHITESPACE;
  306. /* Update characters that require further processing */
  307. cfg->ch2token['f'] = T_UNKNOWN; /* false? */
  308. cfg->ch2token['i'] = T_UNKNOWN; /* inf, ininity? */
  309. cfg->ch2token['I'] = T_UNKNOWN;
  310. cfg->ch2token['n'] = T_UNKNOWN; /* null, nan? */
  311. cfg->ch2token['N'] = T_UNKNOWN;
  312. cfg->ch2token['t'] = T_UNKNOWN; /* true? */
  313. cfg->ch2token['"'] = T_UNKNOWN; /* string? */
  314. cfg->ch2token['+'] = T_UNKNOWN; /* number? */
  315. cfg->ch2token['-'] = T_UNKNOWN;
  316. for (i = 0; i < 10; i++)
  317. cfg->ch2token['0' + i] = T_UNKNOWN;
  318. /* Lookup table for parsing escape characters */
  319. for (i = 0; i < 256; i++)
  320. cfg->escape2char[i] = 0; /* String error */
  321. cfg->escape2char['"'] = '"';
  322. cfg->escape2char['\\'] = '\\';
  323. cfg->escape2char['/'] = '/';
  324. cfg->escape2char['b'] = '\b';
  325. cfg->escape2char['t'] = '\t';
  326. cfg->escape2char['n'] = '\n';
  327. cfg->escape2char['f'] = '\f';
  328. cfg->escape2char['r'] = '\r';
  329. cfg->escape2char['u'] = 'u'; /* Unicode parsing required */
  330. #if 0
  331. /* Initialise separate storage for pre-generated escape codes.
  332. * Escapes 0-31 map directly, 34, 92, 127 follow afterwards to
  333. * save memory. */
  334. for (i = 0 ; i < 32; i++)
  335. sprintf(cfg->escapes[i], "\\u%04x", i);
  336. strcpy(cfg->escapes[8], "\b"); /* Override simpler escapes */
  337. strcpy(cfg->escapes[9], "\t");
  338. strcpy(cfg->escapes[10], "\n");
  339. strcpy(cfg->escapes[12], "\f");
  340. strcpy(cfg->escapes[13], "\r");
  341. strcpy(cfg->escapes[32], "\\\""); /* chr(34) */
  342. strcpy(cfg->escapes[33], "\\\\"); /* chr(92) */
  343. sprintf(cfg->escapes[34], "\\u%04x", 127); /* char(127) */
  344. /* Initialise encoding escape lookup table */
  345. for (i = 0; i < 32; i++)
  346. cfg->char2escape[i] = cfg->escapes[i];
  347. for (i = 32; i < 256; i++)
  348. cfg->char2escape[i] = NULL;
  349. cfg->char2escape[34] = cfg->escapes[32];
  350. cfg->char2escape[92] = cfg->escapes[33];
  351. cfg->char2escape[127] = cfg->escapes[34];
  352. #endif
  353. }
  354. /* ===== ENCODING ===== */
  355. static void json_encode_exception(lua_State *l, json_config_t *cfg, int lindex,
  356. const char *reason)
  357. {
  358. if (!cfg->encode_keep_buffer)
  359. strbuf_free(&cfg->encode_buf);
  360. luaL_error(l, "Cannot serialise %s: %s",
  361. lua_typename(l, lua_type(l, lindex)), reason);
  362. }
  363. /* json_append_string args:
  364. * - lua_State
  365. * - JSON strbuf
  366. * - String (Lua stack index)
  367. *
  368. * Returns nothing. Doesn't remove string from Lua stack */
  369. static void json_append_string(lua_State *l, strbuf_t *json, int lindex)
  370. {
  371. const char *escstr;
  372. int i;
  373. const char *str;
  374. size_t len;
  375. str = lua_tolstring(l, lindex, &len);
  376. /* Worst case is len * 6 (all unicode escapes).
  377. * This buffer is reused constantly for small strings
  378. * If there are any excess pages, they won't be hit anyway.
  379. * This gains ~5% speedup. */
  380. strbuf_ensure_empty_length(json, len * 6 + 2);
  381. strbuf_append_char_unsafe(json, '\"');
  382. for (i = 0; i < len; i++) {
  383. escstr = char2escape[(unsigned char)str[i]];
  384. if (escstr)
  385. strbuf_append_string(json, escstr);
  386. else
  387. strbuf_append_char_unsafe(json, str[i]);
  388. }
  389. strbuf_append_char_unsafe(json, '\"');
  390. }
  391. /* Find the size of the array on the top of the Lua stack
  392. * -1 object (not a pure array)
  393. * >=0 elements in array
  394. */
  395. static int lua_array_length(lua_State *l, json_config_t *cfg)
  396. {
  397. double k;
  398. int max;
  399. int items;
  400. max = 0;
  401. items = 0;
  402. lua_pushnil(l);
  403. /* table, startkey */
  404. while (lua_next(l, -2) != 0) {
  405. /* table, key, value */
  406. if (lua_isnumber(l, -2) &&
  407. (k = lua_tonumber(l, -2))) {
  408. /* Integer >= 1 ? */
  409. if (floor(k) == k && k >= 1) {
  410. if (k > max)
  411. max = k;
  412. items++;
  413. lua_pop(l, 1);
  414. continue;
  415. }
  416. }
  417. /* Must not be an array (non integer key) */
  418. lua_pop(l, 2);
  419. return -1;
  420. }
  421. /* Encode very sparse arrays as objects (if enabled) */
  422. if (cfg->encode_sparse_ratio > 0 &&
  423. max > items * cfg->encode_sparse_ratio &&
  424. max > cfg->encode_sparse_safe) {
  425. if (!cfg->encode_sparse_convert)
  426. json_encode_exception(l, cfg, -1, "excessively sparse array");
  427. return -1;
  428. }
  429. return max;
  430. }
  431. static void json_encode_descend(lua_State *l, json_config_t *cfg)
  432. {
  433. cfg->current_depth++;
  434. if (cfg->current_depth > cfg->encode_max_depth) {
  435. if (!cfg->encode_keep_buffer)
  436. strbuf_free(&cfg->encode_buf);
  437. luaL_error(l, "Cannot serialise, excessive nesting (%d)",
  438. cfg->current_depth);
  439. }
  440. }
  441. static void json_append_data(lua_State *l, json_config_t *cfg, strbuf_t *json);
  442. /* json_append_array args:
  443. * - lua_State
  444. * - JSON strbuf
  445. * - Size of passwd Lua array (top of stack) */
  446. static void json_append_array(lua_State *l, json_config_t *cfg, strbuf_t *json,
  447. int array_length)
  448. {
  449. int comma, i;
  450. json_encode_descend(l, cfg);
  451. strbuf_append_char(json, '[');
  452. comma = 0;
  453. for (i = 1; i <= array_length; i++) {
  454. if (comma)
  455. strbuf_append_char(json, ',');
  456. else
  457. comma = 1;
  458. lua_rawgeti(l, -1, i);
  459. json_append_data(l, cfg, json);
  460. lua_pop(l, 1);
  461. }
  462. strbuf_append_char(json, ']');
  463. cfg->current_depth--;
  464. }
  465. static void json_append_number(lua_State *l, strbuf_t *json, int index,
  466. json_config_t *cfg)
  467. {
  468. double num = lua_tonumber(l, index);
  469. if (cfg->encode_refuse_badnum && (isinf(num) || isnan(num)))
  470. json_encode_exception(l, cfg, index, "must not be NaN or Inf");
  471. strbuf_append_number(json, num);
  472. }
  473. static void json_append_object(lua_State *l, json_config_t *cfg,
  474. strbuf_t *json)
  475. {
  476. int comma, keytype;
  477. json_encode_descend(l, cfg);
  478. /* Object */
  479. strbuf_append_char(json, '{');
  480. lua_pushnil(l);
  481. /* table, startkey */
  482. comma = 0;
  483. while (lua_next(l, -2) != 0) {
  484. if (comma)
  485. strbuf_append_char(json, ',');
  486. else
  487. comma = 1;
  488. /* table, key, value */
  489. keytype = lua_type(l, -2);
  490. if (keytype == LUA_TNUMBER) {
  491. strbuf_append_char(json, '"');
  492. json_append_number(l, json, -2, cfg);
  493. strbuf_append_mem(json, "\":", 2);
  494. } else if (keytype == LUA_TSTRING) {
  495. json_append_string(l, json, -2);
  496. strbuf_append_char(json, ':');
  497. } else {
  498. json_encode_exception(l, cfg, -2,
  499. "table key must be a number or string");
  500. /* never returns */
  501. }
  502. /* table, key, value */
  503. json_append_data(l, cfg, json);
  504. lua_pop(l, 1);
  505. /* table, key */
  506. }
  507. strbuf_append_char(json, '}');
  508. cfg->current_depth--;
  509. }
  510. /* Serialise Lua data into JSON string. */
  511. static void json_append_data(lua_State *l, json_config_t *cfg, strbuf_t *json)
  512. {
  513. int len;
  514. switch (lua_type(l, -1)) {
  515. case LUA_TSTRING:
  516. json_append_string(l, json, -1);
  517. break;
  518. case LUA_TNUMBER:
  519. json_append_number(l, json, -1, cfg);
  520. break;
  521. case LUA_TBOOLEAN:
  522. if (lua_toboolean(l, -1))
  523. strbuf_append_mem(json, "true", 4);
  524. else
  525. strbuf_append_mem(json, "false", 5);
  526. break;
  527. case LUA_TTABLE:
  528. len = lua_array_length(l, cfg);
  529. if (len > 0)
  530. json_append_array(l, cfg, json, len);
  531. else
  532. json_append_object(l, cfg, json);
  533. break;
  534. case LUA_TNIL:
  535. strbuf_append_mem(json, "null", 4);
  536. break;
  537. case LUA_TLIGHTUSERDATA:
  538. if (lua_touserdata(l, -1) == NULL) {
  539. strbuf_append_mem(json, "null", 4);
  540. break;
  541. }
  542. default:
  543. /* Remaining types (LUA_TFUNCTION, LUA_TUSERDATA, LUA_TTHREAD,
  544. * and LUA_TLIGHTUSERDATA) cannot be serialised */
  545. json_encode_exception(l, cfg, -1, "type not supported");
  546. /* never returns */
  547. }
  548. }
  549. static int json_encode(lua_State *l)
  550. {
  551. json_config_t *cfg;
  552. char *json;
  553. int len;
  554. /* Can't use json_verify_arg_count() since we need to ensure
  555. * there is only 1 argument */
  556. luaL_argcheck(l, lua_gettop(l) == 1, 1, "expected 1 argument");
  557. cfg = json_fetch_config(l);
  558. cfg->current_depth = 0;
  559. /* Reset the persistent buffer if it exists.
  560. * Otherwise allocate a new buffer. */
  561. if (strbuf_allocated(&cfg->encode_buf))
  562. strbuf_reset(&cfg->encode_buf);
  563. else
  564. strbuf_init(&cfg->encode_buf, 0);
  565. json_append_data(l, cfg, &cfg->encode_buf);
  566. json = strbuf_string(&cfg->encode_buf, &len);
  567. lua_pushlstring(l, json, len);
  568. if (!cfg->encode_keep_buffer)
  569. strbuf_free(&cfg->encode_buf);
  570. return 1;
  571. }
  572. /* ===== DECODING ===== */
  573. static void json_process_value(lua_State *l, json_parse_t *json,
  574. json_token_t *token);
  575. static int hexdigit2int(char hex)
  576. {
  577. if ('0' <= hex && hex <= '9')
  578. return hex - '0';
  579. /* Force lowercase */
  580. hex |= 0x20;
  581. if ('a' <= hex && hex <= 'f')
  582. return 10 + hex - 'a';
  583. return -1;
  584. }
  585. static int decode_hex4(const char *hex)
  586. {
  587. int digit[4];
  588. int i;
  589. /* Convert ASCII hex digit to numeric digit
  590. * Note: this returns an error for invalid hex digits, including
  591. * NULL */
  592. for (i = 0; i < 4; i++) {
  593. digit[i] = hexdigit2int(hex[i]);
  594. if (digit[i] < 0) {
  595. return -1;
  596. }
  597. }
  598. return (digit[0] << 12) +
  599. (digit[1] << 8) +
  600. (digit[2] << 4) +
  601. digit[3];
  602. }
  603. /* Converts a Unicode codepoint to UTF-8.
  604. * Returns UTF-8 string length, and up to 4 bytes in *utf8 */
  605. static int codepoint_to_utf8(char *utf8, int codepoint)
  606. {
  607. /* 0xxxxxxx */
  608. if (codepoint <= 0x7F) {
  609. utf8[0] = codepoint;
  610. return 1;
  611. }
  612. /* 110xxxxx 10xxxxxx */
  613. if (codepoint <= 0x7FF) {
  614. utf8[0] = (codepoint >> 6) | 0xC0;
  615. utf8[1] = (codepoint & 0x3F) | 0x80;
  616. return 2;
  617. }
  618. /* 1110xxxx 10xxxxxx 10xxxxxx */
  619. if (codepoint <= 0xFFFF) {
  620. utf8[0] = (codepoint >> 12) | 0xE0;
  621. utf8[1] = ((codepoint >> 6) & 0x3F) | 0x80;
  622. utf8[2] = (codepoint & 0x3F) | 0x80;
  623. return 3;
  624. }
  625. /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
  626. if (codepoint <= 0x1FFFFF) {
  627. utf8[0] = (codepoint >> 18) | 0xF0;
  628. utf8[1] = ((codepoint >> 12) & 0x3F) | 0x80;
  629. utf8[2] = ((codepoint >> 6) & 0x3F) | 0x80;
  630. utf8[3] = (codepoint & 0x3F) | 0x80;
  631. return 4;
  632. }
  633. return 0;
  634. }
  635. /* Called when index pointing to beginning of UTF-16 code escape: \uXXXX
  636. * \u is guaranteed to exist, but the remaining hex characters may be
  637. * missing.
  638. * Translate to UTF-8 and append to temporary token string.
  639. * Must advance index to the next character to be processed.
  640. * Returns: 0 success
  641. * -1 error
  642. */
  643. static int json_append_unicode_escape(json_parse_t *json)
  644. {
  645. char utf8[4]; /* Surrogate pairs require 4 UTF-8 bytes */
  646. int codepoint;
  647. int surrogate_low;
  648. int len;
  649. int escape_len = 6;
  650. /* Fetch UTF-16 code unit */
  651. codepoint = decode_hex4(&json->data[json->index + 2]);
  652. if (codepoint < 0)
  653. return -1;
  654. /* UTF-16 surrogate pairs take the following 2 byte form:
  655. * 11011 x yyyyyyyyyy
  656. * When x = 0: y is the high 10 bits of the codepoint
  657. * x = 1: y is the low 10 bits of the codepoint
  658. *
  659. * Check for a surrogate pair (high or low) */
  660. if ((codepoint & 0xF800) == 0xD800) {
  661. /* Error if the 1st surrogate is not high */
  662. if (codepoint & 0x400)
  663. return -1;
  664. /* Ensure the next code is a unicode escape */
  665. if (json->data[json->index + escape_len] != '\\' ||
  666. json->data[json->index + escape_len + 1] != 'u') {
  667. return -1;
  668. }
  669. /* Fetch the next codepoint */
  670. surrogate_low = decode_hex4(&json->data[json->index + 2 + escape_len]);
  671. if (surrogate_low < 0)
  672. return -1;
  673. /* Error if the 2nd code is not a low surrogate */
  674. if ((surrogate_low & 0xFC00) != 0xDC00)
  675. return -1;
  676. /* Calculate Unicode codepoint */
  677. codepoint = (codepoint & 0x3FF) << 10;
  678. surrogate_low &= 0x3FF;
  679. codepoint = (codepoint | surrogate_low) + 0x10000;
  680. escape_len = 12;
  681. }
  682. /* Convert codepoint to UTF-8 */
  683. len = codepoint_to_utf8(utf8, codepoint);
  684. if (!len)
  685. return -1;
  686. /* Append bytes and advance parse index */
  687. strbuf_append_mem_unsafe(json->tmp, utf8, len);
  688. json->index += escape_len;
  689. return 0;
  690. }
  691. static void json_set_token_error(json_token_t *token, json_parse_t *json,
  692. const char *errtype)
  693. {
  694. token->type = T_ERROR;
  695. token->index = json->index;
  696. token->value.string = errtype;
  697. }
  698. static void json_next_string_token(json_parse_t *json, json_token_t *token)
  699. {
  700. char *escape2char = json->cfg->escape2char;
  701. char ch;
  702. /* Caller must ensure a string is next */
  703. assert(json->data[json->index] == '"');
  704. /* Skip " */
  705. json->index++;
  706. /* json->tmp is the temporary strbuf used to accumulate the
  707. * decoded string value. */
  708. strbuf_reset(json->tmp);
  709. while ((ch = json->data[json->index]) != '"') {
  710. if (!ch) {
  711. /* Premature end of the string */
  712. json_set_token_error(token, json, "unexpected end of string");
  713. return;
  714. }
  715. /* Handle escapes */
  716. if (ch == '\\') {
  717. /* Fetch escape character */
  718. ch = json->data[json->index + 1];
  719. /* Translate escape code and append to tmp string */
  720. ch = escape2char[(unsigned char)ch];
  721. if (ch == 'u') {
  722. if (json_append_unicode_escape(json) == 0)
  723. continue;
  724. json_set_token_error(token, json,
  725. "invalid unicode escape code");
  726. return;
  727. }
  728. if (!ch) {
  729. json_set_token_error(token, json, "invalid escape code");
  730. return;
  731. }
  732. /* Skip '\' */
  733. json->index++;
  734. }
  735. /* Append normal character or translated single character
  736. * Unicode escapes are handled above */
  737. strbuf_append_char_unsafe(json->tmp, ch);
  738. json->index++;
  739. }
  740. json->index++; /* Eat final quote (") */
  741. strbuf_ensure_null(json->tmp);
  742. token->type = T_STRING;
  743. token->value.string = strbuf_string(json->tmp, &token->string_len);
  744. }
  745. /* JSON numbers should take the following form:
  746. * -?(0|[1-9]|[1-9][0-9]+)(.[0-9]+)?([eE][-+]?[0-9]+)?
  747. *
  748. * json_next_number_token() uses strtod() which allows other forms:
  749. * - numbers starting with '+'
  750. * - NaN, -NaN, infinity, -infinity
  751. * - hexidecimal numbers
  752. * - numbers with leading zeros
  753. *
  754. * json_is_invalid_number() detects "numbers" which may pass strtod()'s
  755. * error checking, but should not be allowed with strict JSON.
  756. *
  757. * json_is_invalid_number() may pass numbers which cause strtod()
  758. * to generate an error.
  759. */
  760. static int json_is_invalid_number(json_parse_t *json)
  761. {
  762. int i = json->index;
  763. /* Reject numbers starting with + */
  764. if (json->data[i] == '+')
  765. return 1;
  766. /* Skip minus sign if it exists */
  767. if (json->data[i] == '-')
  768. i++;
  769. /* Reject numbers starting with 0x, or leading zeros */
  770. if (json->data[i] == '0') {
  771. int ch2 = json->data[i + 1];
  772. if ((ch2 | 0x20) == 'x' || /* Hex */
  773. ('0' <= ch2 && ch2 <= '9')) /* Leading zero */
  774. return 1;
  775. return 0;
  776. } else if (json->data[i] <= '9') {
  777. return 0; /* Ordinary number */
  778. }
  779. /* Reject inf/nan */
  780. if (!strncasecmp(&json->data[i], "inf", 3))
  781. return 1;
  782. if (!strncasecmp(&json->data[i], "nan", 3))
  783. return 1;
  784. /* Pass all other numbers which may still be invalid, but
  785. * strtod() will catch them. */
  786. return 0;
  787. }
  788. static void json_next_number_token(json_parse_t *json, json_token_t *token)
  789. {
  790. const char *startptr;
  791. char *endptr;
  792. token->type = T_NUMBER;
  793. startptr = &json->data[json->index];
  794. token->value.number = strtod(&json->data[json->index], &endptr);
  795. if (startptr == endptr)
  796. json_set_token_error(token, json, "invalid number");
  797. else
  798. json->index += endptr - startptr; /* Skip the processed number */
  799. return;
  800. }
  801. /* Fills in the token struct.
  802. * T_STRING will return a pointer to the json_parse_t temporary string
  803. * T_ERROR will leave the json->index pointer at the error.
  804. */
  805. static void json_next_token(json_parse_t *json, json_token_t *token)
  806. {
  807. json_token_type_t *ch2token = json->cfg->ch2token;
  808. int ch;
  809. /* Eat whitespace. FIXME: UGLY */
  810. token->type = ch2token[(unsigned char)json->data[json->index]];
  811. while (token->type == T_WHITESPACE)
  812. token->type = ch2token[(unsigned char)json->data[++json->index]];
  813. token->index = json->index;
  814. /* Don't advance the pointer for an error or the end */
  815. if (token->type == T_ERROR) {
  816. json_set_token_error(token, json, "invalid token");
  817. return;
  818. }
  819. if (token->type == T_END) {
  820. return;
  821. }
  822. /* Found a known single character token, advance index and return */
  823. if (token->type != T_UNKNOWN) {
  824. json->index++;
  825. return;
  826. }
  827. /* Process characters which triggered T_UNKNOWN */
  828. ch = json->data[json->index];
  829. /* Must use strncmp() to match the front of the JSON string.
  830. * JSON identifier must be lowercase.
  831. * When strict_numbers if disabled, either case is allowed for
  832. * Infinity/NaN (since we are no longer following the spec..) */
  833. if (ch == '"') {
  834. json_next_string_token(json, token);
  835. return;
  836. } else if (ch == '-' || ('0' <= ch && ch <= '9')) {
  837. if (json->cfg->decode_refuse_badnum && json_is_invalid_number(json)) {
  838. json_set_token_error(token, json, "invalid number");
  839. return;
  840. }
  841. json_next_number_token(json, token);
  842. return;
  843. } else if (!strncmp(&json->data[json->index], "true", 4)) {
  844. token->type = T_BOOLEAN;
  845. token->value.boolean = 1;
  846. json->index += 4;
  847. return;
  848. } else if (!strncmp(&json->data[json->index], "false", 5)) {
  849. token->type = T_BOOLEAN;
  850. token->value.boolean = 0;
  851. json->index += 5;
  852. return;
  853. } else if (!strncmp(&json->data[json->index], "null", 4)) {
  854. token->type = T_NULL;
  855. json->index += 4;
  856. return;
  857. } else if (!json->cfg->decode_refuse_badnum &&
  858. json_is_invalid_number(json)) {
  859. /* When refuse_badnum is disabled, only attempt to process
  860. * numbers we know are invalid JSON (Inf, NaN, hex)
  861. * This is required to generate an appropriate token error,
  862. * otherwise all bad tokens will register as "invalid number"
  863. */
  864. json_next_number_token(json, token);
  865. return;
  866. }
  867. /* Token starts with t/f/n but isn't recognised above. */
  868. json_set_token_error(token, json, "invalid token");
  869. }
  870. /* This function does not return.
  871. * DO NOT CALL WITH DYNAMIC MEMORY ALLOCATED.
  872. * The only supported exception is the temporary parser string
  873. * json->tmp struct.
  874. * json and token should exist on the stack somewhere.
  875. * luaL_error() will long_jmp and release the stack */
  876. static void json_throw_parse_error(lua_State *l, json_parse_t *json,
  877. const char *exp, json_token_t *token)
  878. {
  879. const char *found;
  880. strbuf_free(json->tmp);
  881. if (token->type == T_ERROR)
  882. found = token->value.string;
  883. else
  884. found = json_token_type_name[token->type];
  885. /* Note: token->index is 0 based, display starting from 1 */
  886. luaL_error(l, "Expected %s but found %s at character %d",
  887. exp, found, token->index + 1);
  888. }
  889. static void json_decode_checkstack(lua_State *l, json_parse_t *json, int n)
  890. {
  891. if (lua_checkstack(l, n))
  892. return;
  893. strbuf_free(json->tmp);
  894. luaL_error(l, "Too many nested data structures");
  895. }
  896. static void json_parse_object_context(lua_State *l, json_parse_t *json)
  897. {
  898. json_token_t token;
  899. /* 3 slots required:
  900. * .., table, key, value */
  901. json_decode_checkstack(l, json, 3);
  902. lua_newtable(l);
  903. json_next_token(json, &token);
  904. /* Handle empty objects */
  905. if (token.type == T_OBJ_END) {
  906. return;
  907. }
  908. while (1) {
  909. if (token.type != T_STRING)
  910. json_throw_parse_error(l, json, "object key string", &token);
  911. /* Push key */
  912. lua_pushlstring(l, token.value.string, token.string_len);
  913. json_next_token(json, &token);
  914. if (token.type != T_COLON)
  915. json_throw_parse_error(l, json, "colon", &token);
  916. /* Fetch value */
  917. json_next_token(json, &token);
  918. json_process_value(l, json, &token);
  919. /* Set key = value */
  920. lua_rawset(l, -3);
  921. json_next_token(json, &token);
  922. if (token.type == T_OBJ_END)
  923. return;
  924. if (token.type != T_COMMA)
  925. json_throw_parse_error(l, json, "comma or object end", &token);
  926. json_next_token(json, &token);
  927. }
  928. }
  929. /* Handle the array context */
  930. static void json_parse_array_context(lua_State *l, json_parse_t *json)
  931. {
  932. json_token_t token;
  933. int i;
  934. /* 2 slots required:
  935. * .., table, value */
  936. json_decode_checkstack(l, json, 2);
  937. lua_newtable(l);
  938. json_next_token(json, &token);
  939. /* Handle empty arrays */
  940. if (token.type == T_ARR_END)
  941. return;
  942. for (i = 1; ; i++) {
  943. json_process_value(l, json, &token);
  944. lua_rawseti(l, -2, i); /* arr[i] = value */
  945. json_next_token(json, &token);
  946. if (token.type == T_ARR_END)
  947. return;
  948. if (token.type != T_COMMA)
  949. json_throw_parse_error(l, json, "comma or array end", &token);
  950. json_next_token(json, &token);
  951. }
  952. }
  953. /* Handle the "value" context */
  954. static void json_process_value(lua_State *l, json_parse_t *json,
  955. json_token_t *token)
  956. {
  957. switch (token->type) {
  958. case T_STRING:
  959. lua_pushlstring(l, token->value.string, token->string_len);
  960. break;;
  961. case T_NUMBER:
  962. lua_pushnumber(l, token->value.number);
  963. break;;
  964. case T_BOOLEAN:
  965. lua_pushboolean(l, token->value.boolean);
  966. break;;
  967. case T_OBJ_BEGIN:
  968. json_parse_object_context(l, json);
  969. break;;
  970. case T_ARR_BEGIN:
  971. json_parse_array_context(l, json);
  972. break;;
  973. case T_NULL:
  974. /* In Lua, setting "t[k] = nil" will delete k from the table.
  975. * Hence a NULL pointer lightuserdata object is used instead */
  976. lua_pushlightuserdata(l, NULL);
  977. break;;
  978. default:
  979. json_throw_parse_error(l, json, "value", token);
  980. }
  981. }
  982. /* json_text must be null terminated string */
  983. static void lua_json_decode(lua_State *l, const char *json_text, int json_len)
  984. {
  985. json_parse_t json;
  986. json_token_t token;
  987. json.cfg = json_fetch_config(l);
  988. json.data = json_text;
  989. json.index = 0;
  990. /* Ensure the temporary buffer can hold the entire string.
  991. * This means we no longer need to do length checks since the decoded
  992. * string must be smaller than the entire json string */
  993. json.tmp = strbuf_new(json_len);
  994. json_next_token(&json, &token);
  995. json_process_value(l, &json, &token);
  996. /* Ensure there is no more input left */
  997. json_next_token(&json, &token);
  998. if (token.type != T_END)
  999. json_throw_parse_error(l, &json, "the end", &token);
  1000. strbuf_free(json.tmp);
  1001. }
  1002. static int json_decode(lua_State *l)
  1003. {
  1004. const char *json;
  1005. size_t len;
  1006. json_verify_arg_count(l, 1);
  1007. json = luaL_checklstring(l, 1, &len);
  1008. /* Detect Unicode other than UTF-8 (see RFC 4627, Sec 3)
  1009. *
  1010. * CJSON can support any simple data type, hence only the first
  1011. * character is guaranteed to be ASCII (at worst: '"'). This is
  1012. * still enough to detect whether the wrong encoding is in use. */
  1013. if (len >= 2 && (!json[0] || !json[1]))
  1014. luaL_error(l, "JSON parser does not support UTF-16 or UTF-32");
  1015. lua_json_decode(l, json, len);
  1016. return 1;
  1017. }
  1018. /* ===== INITIALISATION ===== */
  1019. int luaopen_cjson(lua_State *l)
  1020. {
  1021. luaL_Reg reg[] = {
  1022. { "encode", json_encode },
  1023. { "decode", json_decode },
  1024. { "encode_sparse_array", json_cfg_encode_sparse_array },
  1025. { "encode_max_depth", json_cfg_encode_max_depth },
  1026. { "encode_keep_buffer", json_cfg_encode_keep_buffer },
  1027. { "refuse_invalid_numbers", json_cfg_refuse_invalid_numbers },
  1028. { NULL, NULL }
  1029. };
  1030. /* Use json_fetch_config as a pointer.
  1031. * It's faster than using a config string, and more unique */
  1032. lua_pushlightuserdata(l, &json_config_key);
  1033. json_create_config(l);
  1034. lua_settable(l, LUA_REGISTRYINDEX);
  1035. luaL_register(l, "cjson", reg);
  1036. /* Set cjson.null */
  1037. lua_pushlightuserdata(l, NULL);
  1038. lua_setfield(l, -2, "null");
  1039. /* Set cjson.version */
  1040. lua_pushliteral(l, VERSION);
  1041. lua_setfield(l, -2, "version");
  1042. /* Return cjson table */
  1043. return 1;
  1044. }
  1045. /* vi:ai et sw=4 ts=4:
  1046. */