unmarshal.odin 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606
  1. package json
  2. import "core:mem"
  3. import "core:math"
  4. import "core:reflect"
  5. import "core:strconv"
  6. import "core:strings"
  7. import "core:runtime"
  8. Unmarshal_Data_Error :: enum {
  9. Invalid_Data,
  10. Invalid_Parameter,
  11. Non_Pointer_Parameter,
  12. Multiple_Use_Field,
  13. }
  14. Unsupported_Type_Error :: struct {
  15. id: typeid,
  16. token: Token,
  17. }
  18. Unmarshal_Error :: union {
  19. Error,
  20. Unmarshal_Data_Error,
  21. Unsupported_Type_Error,
  22. }
  23. unmarshal_any :: proc(data: []byte, v: any, spec := DEFAULT_SPECIFICATION, allocator := context.allocator) -> Unmarshal_Error {
  24. v := v
  25. if v == nil || v.id == nil {
  26. return .Invalid_Parameter
  27. }
  28. v = reflect.any_base(v)
  29. ti := type_info_of(v.id)
  30. if !reflect.is_pointer(ti) || ti.id == rawptr {
  31. return .Non_Pointer_Parameter
  32. }
  33. PARSE_INTEGERS :: true
  34. if !is_valid(data, spec, PARSE_INTEGERS) {
  35. return .Invalid_Data
  36. }
  37. p := make_parser(data, spec, PARSE_INTEGERS, allocator)
  38. data := any{(^rawptr)(v.data)^, ti.variant.(reflect.Type_Info_Pointer).elem.id}
  39. if v.data == nil {
  40. return .Invalid_Parameter
  41. }
  42. context.allocator = p.allocator
  43. if p.spec == .MJSON {
  44. #partial switch p.curr_token.kind {
  45. case .Ident, .String:
  46. return unmarshal_object(&p, data, .EOF)
  47. }
  48. }
  49. return unmarshal_value(&p, data)
  50. }
  51. unmarshal :: proc(data: []byte, ptr: ^$T, spec := DEFAULT_SPECIFICATION, allocator := context.allocator) -> Unmarshal_Error {
  52. return unmarshal_any(data, ptr, spec, allocator)
  53. }
  54. unmarshal_string :: proc(data: string, ptr: ^$T, spec := DEFAULT_SPECIFICATION, allocator := context.allocator) -> Unmarshal_Error {
  55. return unmarshal_any(transmute([]byte)data, ptr, spec, allocator)
  56. }
  57. @(private)
  58. assign_bool :: proc(val: any, b: bool) -> bool {
  59. v := reflect.any_core(val)
  60. switch &dst in v {
  61. case bool: dst = bool(b)
  62. case b8: dst = b8 (b)
  63. case b16: dst = b16 (b)
  64. case b32: dst = b32 (b)
  65. case b64: dst = b64 (b)
  66. case: return false
  67. }
  68. return true
  69. }
  70. @(private)
  71. assign_int :: proc(val: any, i: $T) -> bool {
  72. v := reflect.any_core(val)
  73. switch &dst in v {
  74. case i8: dst = i8 (i)
  75. case i16: dst = i16 (i)
  76. case i16le: dst = i16le (i)
  77. case i16be: dst = i16be (i)
  78. case i32: dst = i32 (i)
  79. case i32le: dst = i32le (i)
  80. case i32be: dst = i32be (i)
  81. case i64: dst = i64 (i)
  82. case i64le: dst = i64le (i)
  83. case i64be: dst = i64be (i)
  84. case i128: dst = i128 (i)
  85. case i128le: dst = i128le (i)
  86. case i128be: dst = i128be (i)
  87. case u8: dst = u8 (i)
  88. case u16: dst = u16 (i)
  89. case u16le: dst = u16le (i)
  90. case u16be: dst = u16be (i)
  91. case u32: dst = u32 (i)
  92. case u32le: dst = u32le (i)
  93. case u32be: dst = u32be (i)
  94. case u64: dst = u64 (i)
  95. case u64le: dst = u64le (i)
  96. case u64be: dst = u64be (i)
  97. case u128: dst = u128 (i)
  98. case u128le: dst = u128le (i)
  99. case u128be: dst = u128be (i)
  100. case int: dst = int (i)
  101. case uint: dst = uint (i)
  102. case uintptr: dst = uintptr(i)
  103. case: return false
  104. }
  105. return true
  106. }
  107. @(private)
  108. assign_float :: proc(val: any, f: $T) -> bool {
  109. v := reflect.any_core(val)
  110. switch &dst in v {
  111. case f16: dst = f16 (f)
  112. case f16le: dst = f16le(f)
  113. case f16be: dst = f16be(f)
  114. case f32: dst = f32 (f)
  115. case f32le: dst = f32le(f)
  116. case f32be: dst = f32be(f)
  117. case f64: dst = f64 (f)
  118. case f64le: dst = f64le(f)
  119. case f64be: dst = f64be(f)
  120. case complex32: dst = complex(f16(f), 0)
  121. case complex64: dst = complex(f32(f), 0)
  122. case complex128: dst = complex(f64(f), 0)
  123. case quaternion64: dst = quaternion(f16(f), 0, 0, 0)
  124. case quaternion128: dst = quaternion(f32(f), 0, 0, 0)
  125. case quaternion256: dst = quaternion(f64(f), 0, 0, 0)
  126. case: return false
  127. }
  128. return true
  129. }
  130. @(private)
  131. unmarshal_string_token :: proc(p: ^Parser, val: any, str: string, ti: ^reflect.Type_Info) -> bool {
  132. val := val
  133. switch &dst in val {
  134. case string:
  135. dst = str
  136. return true
  137. case cstring:
  138. if str == "" {
  139. dst = strings.clone_to_cstring("", p.allocator)
  140. } else {
  141. // NOTE: This is valid because 'clone_string' appends a NUL terminator
  142. dst = cstring(raw_data(str))
  143. }
  144. return true
  145. }
  146. #partial switch variant in ti.variant {
  147. case reflect.Type_Info_Enum:
  148. for name, i in variant.names {
  149. if name == str {
  150. assign_int(val, variant.values[i])
  151. return true
  152. }
  153. }
  154. // TODO(bill): should this be an error or not?
  155. return true
  156. case reflect.Type_Info_Integer:
  157. i := strconv.parse_i128(str) or_return
  158. if assign_int(val, i) {
  159. return true
  160. }
  161. if assign_float(val, i) {
  162. return true
  163. }
  164. case reflect.Type_Info_Float:
  165. f := strconv.parse_f64(str) or_return
  166. if assign_int(val, f) {
  167. return true
  168. }
  169. if assign_float(val, f) {
  170. return true
  171. }
  172. }
  173. return false
  174. }
  175. @(private)
  176. unmarshal_value :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) {
  177. UNSUPPORTED_TYPE := Unsupported_Type_Error{v.id, p.curr_token}
  178. token := p.curr_token
  179. v := v
  180. ti := reflect.type_info_base(type_info_of(v.id))
  181. if u, ok := ti.variant.(reflect.Type_Info_Union); ok && token.kind != .Null {
  182. // NOTE: If it's a union with only one variant, then treat it as that variant
  183. if len(u.variants) == 1 {
  184. variant := u.variants[0]
  185. v.id = variant.id
  186. ti = reflect.type_info_base(variant)
  187. if !reflect.is_pointer_internally(variant) {
  188. tag := any{rawptr(uintptr(v.data) + u.tag_offset), u.tag_type.id}
  189. assign_int(tag, 1)
  190. }
  191. } else if v.id != Value {
  192. for variant, i in u.variants {
  193. variant_any := any{v.data, variant.id}
  194. variant_p := p^
  195. if err = unmarshal_value(&variant_p, variant_any); err == nil {
  196. p^ = variant_p
  197. raw_tag := i
  198. if !u.no_nil { raw_tag += 1 }
  199. tag := any{rawptr(uintptr(v.data) + u.tag_offset), u.tag_type.id}
  200. assign_int(tag, raw_tag)
  201. return
  202. }
  203. }
  204. return UNSUPPORTED_TYPE
  205. }
  206. }
  207. switch &dst in v {
  208. // Handle json.Value as an unknown type
  209. case Value:
  210. dst = parse_value(p) or_return
  211. return
  212. }
  213. #partial switch token.kind {
  214. case .Null:
  215. mem.zero(v.data, ti.size)
  216. advance_token(p)
  217. return
  218. case .False, .True:
  219. advance_token(p)
  220. if assign_bool(v, token.kind == .True) {
  221. return
  222. }
  223. return UNSUPPORTED_TYPE
  224. case .Integer:
  225. advance_token(p)
  226. i, _ := strconv.parse_i128(token.text)
  227. if assign_int(v, i) {
  228. return
  229. }
  230. if assign_float(v, i) {
  231. return
  232. }
  233. return UNSUPPORTED_TYPE
  234. case .Float:
  235. advance_token(p)
  236. f, _ := strconv.parse_f64(token.text)
  237. if assign_float(v, f) {
  238. return
  239. }
  240. if i, fract := math.modf(f); fract == 0 {
  241. if assign_int(v, i) {
  242. return
  243. }
  244. if assign_float(v, i) {
  245. return
  246. }
  247. }
  248. return UNSUPPORTED_TYPE
  249. case .Ident:
  250. advance_token(p)
  251. if p.spec == .MJSON {
  252. if unmarshal_string_token(p, any{v.data, ti.id}, token.text, ti) {
  253. return nil
  254. }
  255. }
  256. return UNSUPPORTED_TYPE
  257. case .String:
  258. advance_token(p)
  259. str := unquote_string(token, p.spec, p.allocator) or_return
  260. if unmarshal_string_token(p, any{v.data, ti.id}, str, ti) {
  261. return nil
  262. }
  263. delete(str, p.allocator)
  264. return UNSUPPORTED_TYPE
  265. case .Open_Brace:
  266. return unmarshal_object(p, v, .Close_Brace)
  267. case .Open_Bracket:
  268. return unmarshal_array(p, v)
  269. case:
  270. if p.spec != .JSON {
  271. #partial switch token.kind {
  272. case .Infinity:
  273. advance_token(p)
  274. f: f64 = 0h7ff0000000000000
  275. if token.text[0] == '-' {
  276. f = 0hfff0000000000000
  277. }
  278. if assign_float(v, f) {
  279. return
  280. }
  281. return UNSUPPORTED_TYPE
  282. case .NaN:
  283. advance_token(p)
  284. f: f64 = 0h7ff7ffffffffffff
  285. if token.text[0] == '-' {
  286. f = 0hfff7ffffffffffff
  287. }
  288. if assign_float(v, f) {
  289. return
  290. }
  291. return UNSUPPORTED_TYPE
  292. }
  293. }
  294. }
  295. advance_token(p)
  296. return UNSUPPORTED_TYPE
  297. }
  298. @(private)
  299. unmarshal_expect_token :: proc(p: ^Parser, kind: Token_Kind, loc := #caller_location) -> Token {
  300. prev := p.curr_token
  301. err := expect_token(p, kind)
  302. assert(err == nil, "unmarshal_expect_token")
  303. return prev
  304. }
  305. @(private)
  306. unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unmarshal_Error) {
  307. UNSUPPORTED_TYPE := Unsupported_Type_Error{v.id, p.curr_token}
  308. if end_token == .Close_Brace {
  309. unmarshal_expect_token(p, .Open_Brace)
  310. }
  311. v := v
  312. v = reflect.any_base(v)
  313. ti := type_info_of(v.id)
  314. #partial switch t in ti.variant {
  315. case reflect.Type_Info_Struct:
  316. if t.is_raw_union {
  317. return UNSUPPORTED_TYPE
  318. }
  319. struct_loop: for p.curr_token.kind != end_token {
  320. key, _ := parse_object_key(p, p.allocator)
  321. defer delete(key, p.allocator)
  322. unmarshal_expect_token(p, .Colon)
  323. fields := reflect.struct_fields_zipped(ti.id)
  324. runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD(ignore = context.temp_allocator == context.allocator)
  325. field_used := make([]bool, len(fields), context.temp_allocator)
  326. use_field_idx := -1
  327. for field, field_idx in fields {
  328. tag_value := string(reflect.struct_tag_get(field.tag, "json"))
  329. if key == tag_value {
  330. use_field_idx = field_idx
  331. break
  332. }
  333. }
  334. if use_field_idx < 0 {
  335. for field, field_idx in fields {
  336. if key == field.name {
  337. use_field_idx = field_idx
  338. break
  339. }
  340. }
  341. }
  342. if use_field_idx >= 0 {
  343. if field_used[use_field_idx] {
  344. return .Multiple_Use_Field
  345. }
  346. field_used[use_field_idx] = true
  347. offset := fields[use_field_idx].offset
  348. type := fields[use_field_idx].type
  349. name := fields[use_field_idx].name
  350. field_ptr := rawptr(uintptr(v.data) + offset)
  351. field := any{field_ptr, type.id}
  352. unmarshal_value(p, field) or_return
  353. if parse_comma(p) {
  354. break struct_loop
  355. }
  356. continue struct_loop
  357. } else {
  358. // allows skipping unused struct fields
  359. parse_value(p) or_return
  360. if parse_comma(p) {
  361. break struct_loop
  362. }
  363. continue struct_loop
  364. }
  365. }
  366. case reflect.Type_Info_Map:
  367. if !reflect.is_string(t.key) {
  368. return UNSUPPORTED_TYPE
  369. }
  370. raw_map := (^mem.Raw_Map)(v.data)
  371. if raw_map.allocator.procedure == nil {
  372. raw_map.allocator = p.allocator
  373. }
  374. elem_backing := bytes_make(t.value.size, t.value.align, p.allocator) or_return
  375. defer delete(elem_backing, p.allocator)
  376. map_backing_value := any{raw_data(elem_backing), t.value.id}
  377. map_loop: for p.curr_token.kind != end_token {
  378. key, _ := parse_object_key(p, p.allocator)
  379. unmarshal_expect_token(p, .Colon)
  380. mem.zero_slice(elem_backing)
  381. if err := unmarshal_value(p, map_backing_value); err != nil {
  382. delete(key, p.allocator)
  383. return err
  384. }
  385. key_ptr := rawptr(&key)
  386. key_cstr: cstring
  387. if reflect.is_cstring(t.key) {
  388. key_cstr = cstring(raw_data(key))
  389. key_ptr = &key_cstr
  390. }
  391. set_ptr := runtime.__dynamic_map_set_without_hash(raw_map, t.map_info, key_ptr, map_backing_value.data)
  392. if set_ptr == nil {
  393. delete(key, p.allocator)
  394. }
  395. if parse_comma(p) {
  396. break map_loop
  397. }
  398. }
  399. case reflect.Type_Info_Enumerated_Array:
  400. index_type := reflect.type_info_base(t.index)
  401. enum_type := index_type.variant.(reflect.Type_Info_Enum)
  402. enumerated_array_loop: for p.curr_token.kind != end_token {
  403. key, _ := parse_object_key(p, p.allocator)
  404. unmarshal_expect_token(p, .Colon)
  405. defer delete(key, p.allocator)
  406. index := -1
  407. for name, i in enum_type.names {
  408. if key == name {
  409. index = int(enum_type.values[i] - t.min_value)
  410. break
  411. }
  412. }
  413. if index < 0 || index >= t.count {
  414. return UNSUPPORTED_TYPE
  415. }
  416. index_ptr := rawptr(uintptr(v.data) + uintptr(index*t.elem_size))
  417. index_any := any{index_ptr, t.elem.id}
  418. unmarshal_value(p, index_any) or_return
  419. if parse_comma(p) {
  420. break enumerated_array_loop
  421. }
  422. }
  423. return nil
  424. case:
  425. return UNSUPPORTED_TYPE
  426. }
  427. if end_token == .Close_Brace {
  428. unmarshal_expect_token(p, .Close_Brace)
  429. }
  430. return
  431. }
  432. @(private)
  433. unmarshal_count_array :: proc(p: ^Parser) -> (length: uintptr) {
  434. p_backup := p^
  435. p.allocator = mem.nil_allocator()
  436. unmarshal_expect_token(p, .Open_Bracket)
  437. array_length_loop: for p.curr_token.kind != .Close_Bracket {
  438. _, _ = parse_value(p)
  439. length += 1
  440. if parse_comma(p) {
  441. break
  442. }
  443. }
  444. p^ = p_backup
  445. return
  446. }
  447. @(private)
  448. unmarshal_array :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) {
  449. assign_array :: proc(p: ^Parser, base: rawptr, elem: ^reflect.Type_Info, length: uintptr) -> Unmarshal_Error {
  450. unmarshal_expect_token(p, .Open_Bracket)
  451. for idx: uintptr = 0; p.curr_token.kind != .Close_Bracket; idx += 1 {
  452. assert(idx < length)
  453. elem_ptr := rawptr(uintptr(base) + idx*uintptr(elem.size))
  454. elem := any{elem_ptr, elem.id}
  455. unmarshal_value(p, elem) or_return
  456. if parse_comma(p) {
  457. break
  458. }
  459. }
  460. unmarshal_expect_token(p, .Close_Bracket)
  461. return nil
  462. }
  463. UNSUPPORTED_TYPE := Unsupported_Type_Error{v.id, p.curr_token}
  464. ti := reflect.type_info_base(type_info_of(v.id))
  465. length := unmarshal_count_array(p)
  466. #partial switch t in ti.variant {
  467. case reflect.Type_Info_Slice:
  468. raw := (^mem.Raw_Slice)(v.data)
  469. data := bytes_make(t.elem.size * int(length), t.elem.align, p.allocator) or_return
  470. raw.data = raw_data(data)
  471. raw.len = int(length)
  472. return assign_array(p, raw.data, t.elem, length)
  473. case reflect.Type_Info_Dynamic_Array:
  474. raw := (^mem.Raw_Dynamic_Array)(v.data)
  475. data := bytes_make(t.elem.size * int(length), t.elem.align, p.allocator) or_return
  476. raw.data = raw_data(data)
  477. raw.len = int(length)
  478. raw.cap = int(length)
  479. raw.allocator = p.allocator
  480. return assign_array(p, raw.data, t.elem, length)
  481. case reflect.Type_Info_Array:
  482. // NOTE(bill): Allow lengths which are less than the dst array
  483. if int(length) > t.count {
  484. return UNSUPPORTED_TYPE
  485. }
  486. return assign_array(p, v.data, t.elem, length)
  487. case reflect.Type_Info_Enumerated_Array:
  488. // NOTE(bill): Allow lengths which are less than the dst array
  489. if int(length) > t.count {
  490. return UNSUPPORTED_TYPE
  491. }
  492. return assign_array(p, v.data, t.elem, length)
  493. case reflect.Type_Info_Complex:
  494. // NOTE(bill): Allow lengths which are less than the dst array
  495. if int(length) > 2 {
  496. return UNSUPPORTED_TYPE
  497. }
  498. switch ti.id {
  499. case complex32: return assign_array(p, v.data, type_info_of(f16), 2)
  500. case complex64: return assign_array(p, v.data, type_info_of(f32), 2)
  501. case complex128: return assign_array(p, v.data, type_info_of(f64), 2)
  502. }
  503. return UNSUPPORTED_TYPE
  504. }
  505. return UNSUPPORTED_TYPE
  506. }