unmarshal.odin 18 KB


  1. package encoding_json
  2. import "core:mem"
  3. import "core:math"
  4. import "core:reflect"
  5. import "core:strconv"
  6. import "core:strings"
  7. import "base:runtime"
  8. import "base:intrinsics"
  9. Unmarshal_Data_Error :: enum {
  10. Invalid_Data,
  11. Invalid_Parameter,
  12. Non_Pointer_Parameter,
  13. Multiple_Use_Field,
  14. }
  15. Unsupported_Type_Error :: struct {
  16. id: typeid,
  17. token: Token,
  18. }
  19. Unmarshal_Error :: union {
  20. Error,
  21. Unmarshal_Data_Error,
  22. Unsupported_Type_Error,
  23. }
  24. unmarshal_any :: proc(data: []byte, v: any, spec := DEFAULT_SPECIFICATION, allocator := context.allocator) -> Unmarshal_Error {
  25. v := v
  26. if v == nil || v.id == nil {
  27. return .Invalid_Parameter
  28. }
  29. v = reflect.any_base(v)
  30. ti := type_info_of(v.id)
  31. if !reflect.is_pointer(ti) || ti.id == rawptr {
  32. return .Non_Pointer_Parameter
  33. }
  34. PARSE_INTEGERS :: true
  35. if !is_valid(data, spec, PARSE_INTEGERS) {
  36. return .Invalid_Data
  37. }
  38. p := make_parser(data, spec, PARSE_INTEGERS, allocator)
  39. data := any{(^rawptr)(v.data)^, ti.variant.(reflect.Type_Info_Pointer).elem.id}
  40. if v.data == nil {
  41. return .Invalid_Parameter
  42. }
  43. context.allocator = p.allocator
  44. if p.spec == .MJSON {
  45. #partial switch p.curr_token.kind {
  46. case .Ident, .String:
  47. return unmarshal_object(&p, data, .EOF)
  48. }
  49. }
  50. return unmarshal_value(&p, data)
  51. }
  52. unmarshal :: proc(data: []byte, ptr: ^$T, spec := DEFAULT_SPECIFICATION, allocator := context.allocator) -> Unmarshal_Error {
  53. return unmarshal_any(data, ptr, spec, allocator)
  54. }
  55. unmarshal_string :: proc(data: string, ptr: ^$T, spec := DEFAULT_SPECIFICATION, allocator := context.allocator) -> Unmarshal_Error {
  56. return unmarshal_any(transmute([]byte)data, ptr, spec, allocator)
  57. }
  58. @(private)
  59. assign_bool :: proc(val: any, b: bool) -> bool {
  60. v := reflect.any_core(val)
  61. switch &dst in v {
  62. case bool: dst = bool(b)
  63. case b8: dst = b8 (b)
  64. case b16: dst = b16 (b)
  65. case b32: dst = b32 (b)
  66. case b64: dst = b64 (b)
  67. case: return false
  68. }
  69. return true
  70. }
  71. @(private)
  72. assign_int :: proc(val: any, i: $T) -> bool {
  73. v := reflect.any_core(val)
  74. switch &dst in v {
  75. case i8: dst = i8 (i)
  76. case i16: dst = i16 (i)
  77. case i16le: dst = i16le (i)
  78. case i16be: dst = i16be (i)
  79. case i32: dst = i32 (i)
  80. case i32le: dst = i32le (i)
  81. case i32be: dst = i32be (i)
  82. case i64: dst = i64 (i)
  83. case i64le: dst = i64le (i)
  84. case i64be: dst = i64be (i)
  85. case i128: dst = i128 (i)
  86. case i128le: dst = i128le (i)
  87. case i128be: dst = i128be (i)
  88. case u8: dst = u8 (i)
  89. case u16: dst = u16 (i)
  90. case u16le: dst = u16le (i)
  91. case u16be: dst = u16be (i)
  92. case u32: dst = u32 (i)
  93. case u32le: dst = u32le (i)
  94. case u32be: dst = u32be (i)
  95. case u64: dst = u64 (i)
  96. case u64le: dst = u64le (i)
  97. case u64be: dst = u64be (i)
  98. case u128: dst = u128 (i)
  99. case u128le: dst = u128le (i)
  100. case u128be: dst = u128be (i)
  101. case int: dst = int (i)
  102. case uint: dst = uint (i)
  103. case uintptr: dst = uintptr(i)
  104. case:
  105. is_bit_set_different_endian_to_platform :: proc(ti: ^runtime.Type_Info) -> bool {
  106. if ti == nil {
  107. return false
  108. }
  109. t := runtime.type_info_base(ti)
  110. #partial switch info in t.variant {
  111. case runtime.Type_Info_Integer:
  112. switch info.endianness {
  113. case .Platform: return false
  114. case .Little: return ODIN_ENDIAN != .Little
  115. case .Big: return ODIN_ENDIAN != .Big
  116. }
  117. }
  118. return false
  119. }
  120. ti := type_info_of(v.id)
  121. if info, ok := ti.variant.(runtime.Type_Info_Bit_Set); ok {
  122. do_byte_swap := is_bit_set_different_endian_to_platform(info.underlying)
  123. switch ti.size * 8 {
  124. case 0: // no-op.
  125. case 8:
  126. x := (^u8)(v.data)
  127. x^ = u8(i)
  128. case 16:
  129. x := (^u16)(v.data)
  130. x^ = do_byte_swap ? intrinsics.byte_swap(u16(i)) : u16(i)
  131. case 32:
  132. x := (^u32)(v.data)
  133. x^ = do_byte_swap ? intrinsics.byte_swap(u32(i)) : u32(i)
  134. case 64:
  135. x := (^u64)(v.data)
  136. x^ = do_byte_swap ? intrinsics.byte_swap(u64(i)) : u64(i)
  137. case:
  138. panic("unknown bit_size size")
  139. }
  140. return true
  141. }
  142. return false
  143. }
  144. return true
  145. }
  146. @(private)
  147. assign_float :: proc(val: any, f: $T) -> bool {
  148. v := reflect.any_core(val)
  149. switch &dst in v {
  150. case f16: dst = f16 (f)
  151. case f16le: dst = f16le(f)
  152. case f16be: dst = f16be(f)
  153. case f32: dst = f32 (f)
  154. case f32le: dst = f32le(f)
  155. case f32be: dst = f32be(f)
  156. case f64: dst = f64 (f)
  157. case f64le: dst = f64le(f)
  158. case f64be: dst = f64be(f)
  159. case complex32: dst = complex(f16(f), 0)
  160. case complex64: dst = complex(f32(f), 0)
  161. case complex128: dst = complex(f64(f), 0)
  162. case quaternion64: dst = quaternion(w=f16(f), x=0, y=0, z=0)
  163. case quaternion128: dst = quaternion(w=f32(f), x=0, y=0, z=0)
  164. case quaternion256: dst = quaternion(w=f64(f), x=0, y=0, z=0)
  165. case: return false
  166. }
  167. return true
  168. }
  169. @(private)
  170. unmarshal_string_token :: proc(p: ^Parser, val: any, str: string, ti: ^reflect.Type_Info) -> (ok: bool, err: Error) {
  171. val := val
  172. switch &dst in val {
  173. case string:
  174. dst = str
  175. return true, nil
  176. case cstring:
  177. if str == "" {
  178. a_err: runtime.Allocator_Error
  179. dst, a_err = strings.clone_to_cstring("", p.allocator)
  180. #partial switch a_err {
  181. case nil:
  182. // okay
  183. case .Out_Of_Memory:
  184. err = .Out_Of_Memory
  185. case:
  186. err = .Invalid_Allocator
  187. }
  188. if err != nil {
  189. return
  190. }
  191. } else {
  192. // NOTE: This is valid because 'clone_string' appends a NUL terminator
  193. dst = cstring(raw_data(str))
  194. }
  195. ok = true
  196. return
  197. }
  198. #partial switch variant in ti.variant {
  199. case reflect.Type_Info_Enum:
  200. for name, i in variant.names {
  201. if name == str {
  202. assign_int(val, variant.values[i])
  203. return true, nil
  204. }
  205. }
  206. // TODO(bill): should this be an error or not?
  207. return true, nil
  208. case reflect.Type_Info_Integer:
  209. i, pok := strconv.parse_i128(str)
  210. if !pok {
  211. return false, nil
  212. }
  213. if assign_int(val, i) {
  214. return true, nil
  215. }
  216. if assign_float(val, i) {
  217. return true, nil
  218. }
  219. case reflect.Type_Info_Float:
  220. f, pok := strconv.parse_f64(str)
  221. if !pok {
  222. return false, nil
  223. }
  224. if assign_int(val, f) {
  225. return true, nil
  226. }
  227. if assign_float(val, f) {
  228. return true, nil
  229. }
  230. }
  231. return false, nil
  232. }
  233. @(private)
  234. unmarshal_value :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) {
  235. UNSUPPORTED_TYPE := Unsupported_Type_Error{v.id, p.curr_token}
  236. token := p.curr_token
  237. v := v
  238. ti := reflect.type_info_base(type_info_of(v.id))
  239. if u, ok := ti.variant.(reflect.Type_Info_Union); ok && token.kind != .Null {
  240. // NOTE: If it's a union with only one variant, then treat it as that variant
  241. if len(u.variants) == 1 {
  242. variant := u.variants[0]
  243. v.id = variant.id
  244. ti = reflect.type_info_base(variant)
  245. if !reflect.is_pointer_internally(variant) {
  246. tag := any{rawptr(uintptr(v.data) + u.tag_offset), u.tag_type.id}
  247. assign_int(tag, 1)
  248. }
  249. } else if v.id != Value {
  250. for variant, i in u.variants {
  251. variant_any := any{v.data, variant.id}
  252. variant_p := p^
  253. if err = unmarshal_value(&variant_p, variant_any); err == nil {
  254. p^ = variant_p
  255. raw_tag := i
  256. if !u.no_nil { raw_tag += 1 }
  257. tag := any{rawptr(uintptr(v.data) + u.tag_offset), u.tag_type.id}
  258. assign_int(tag, raw_tag)
  259. return
  260. }
  261. }
  262. return UNSUPPORTED_TYPE
  263. }
  264. }
  265. switch &dst in v {
  266. // Handle json.Value as an unknown type
  267. case Value:
  268. dst = parse_value(p) or_return
  269. return
  270. }
  271. #partial switch token.kind {
  272. case .Null:
  273. mem.zero(v.data, ti.size)
  274. advance_token(p)
  275. return
  276. case .False, .True:
  277. advance_token(p)
  278. if assign_bool(v, token.kind == .True) {
  279. return
  280. }
  281. return UNSUPPORTED_TYPE
  282. case .Integer:
  283. advance_token(p)
  284. i, _ := strconv.parse_i128(token.text)
  285. if assign_int(v, i) {
  286. return
  287. }
  288. if assign_float(v, i) {
  289. return
  290. }
  291. return UNSUPPORTED_TYPE
  292. case .Float:
  293. advance_token(p)
  294. f, _ := strconv.parse_f64(token.text)
  295. if assign_float(v, f) {
  296. return
  297. }
  298. if i, fract := math.modf(f); fract == 0 {
  299. if assign_int(v, i) {
  300. return
  301. }
  302. if assign_float(v, i) {
  303. return
  304. }
  305. }
  306. return UNSUPPORTED_TYPE
  307. case .Ident:
  308. advance_token(p)
  309. if p.spec == .MJSON {
  310. if unmarshal_string_token(p, any{v.data, ti.id}, token.text, ti) or_return {
  311. return nil
  312. }
  313. }
  314. return UNSUPPORTED_TYPE
  315. case .String:
  316. advance_token(p)
  317. str := unquote_string(token, p.spec, p.allocator) or_return
  318. dest := any{v.data, ti.id}
  319. if !(unmarshal_string_token(p, dest, str, ti) or_return) {
  320. delete(str, p.allocator)
  321. return UNSUPPORTED_TYPE
  322. }
  323. switch destv in dest {
  324. case string, cstring:
  325. case: delete(str, p.allocator)
  326. }
  327. return nil
  328. case .Open_Brace:
  329. return unmarshal_object(p, v, .Close_Brace)
  330. case .Open_Bracket:
  331. return unmarshal_array(p, v)
  332. case:
  333. if p.spec != .JSON {
  334. #partial switch token.kind {
  335. case .Infinity:
  336. advance_token(p)
  337. f: f64 = 0h7ff0000000000000
  338. if token.text[0] == '-' {
  339. f = 0hfff0000000000000
  340. }
  341. if assign_float(v, f) {
  342. return
  343. }
  344. return UNSUPPORTED_TYPE
  345. case .NaN:
  346. advance_token(p)
  347. f: f64 = 0h7ff7ffffffffffff
  348. if token.text[0] == '-' {
  349. f = 0hfff7ffffffffffff
  350. }
  351. if assign_float(v, f) {
  352. return
  353. }
  354. return UNSUPPORTED_TYPE
  355. }
  356. }
  357. }
  358. advance_token(p)
  359. return UNSUPPORTED_TYPE
  360. }
  361. @(private)
  362. unmarshal_expect_token :: proc(p: ^Parser, kind: Token_Kind, loc := #caller_location) -> Token {
  363. prev := p.curr_token
  364. err := expect_token(p, kind)
  365. assert(err == nil, "unmarshal_expect_token")
  366. return prev
  367. }
  368. // Struct tags can include not only the name of the JSON key, but also a tag such as `omitempty`.
  369. // Example: `json:"key_name,omitempty"`
  370. // This returns the first field as `json_name`, and the rest are returned as `extra`.
  371. @(private)
  372. json_name_from_tag_value :: proc(value: string) -> (json_name, extra: string) {
  373. json_name = value
  374. if comma_index := strings.index_byte(json_name, ','); comma_index >= 0 {
  375. json_name = json_name[:comma_index]
  376. extra = value[1 + comma_index:]
  377. }
  378. return
  379. }
  380. @(private)
  381. unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unmarshal_Error) {
  382. UNSUPPORTED_TYPE := Unsupported_Type_Error{v.id, p.curr_token}
  383. if end_token == .Close_Brace {
  384. unmarshal_expect_token(p, .Open_Brace)
  385. }
  386. v := v
  387. ti := reflect.type_info_base(type_info_of(v.id))
  388. #partial switch t in ti.variant {
  389. case reflect.Type_Info_Struct:
  390. if .raw_union in t.flags {
  391. return UNSUPPORTED_TYPE
  392. }
  393. fields := reflect.struct_fields_zipped(ti.id)
  394. struct_loop: for p.curr_token.kind != end_token {
  395. key := parse_object_key(p, p.allocator) or_return
  396. defer delete(key, p.allocator)
  397. unmarshal_expect_token(p, .Colon)
  398. field_used_bytes := (reflect.size_of_typeid(ti.id)+7)/8
  399. field_used := intrinsics.alloca(field_used_bytes + 1, 1) // + 1 to not overflow on size_of 0 types.
  400. intrinsics.mem_zero(field_used, field_used_bytes)
  401. use_field_idx := -1
  402. for field, field_idx in fields {
  403. tag_value := reflect.struct_tag_get(field.tag, "json")
  404. json_name, _ := json_name_from_tag_value(tag_value)
  405. if key == json_name {
  406. use_field_idx = field_idx
  407. break
  408. }
  409. }
  410. if use_field_idx < 0 {
  411. for field, field_idx in fields {
  412. tag_value := reflect.struct_tag_get(field.tag, "json")
  413. json_name, _ := json_name_from_tag_value(tag_value)
  414. if json_name == "" && key == field.name {
  415. use_field_idx = field_idx
  416. break
  417. }
  418. }
  419. }
  420. check_children_using_fields :: proc(key: string, parent: typeid) -> (
  421. offset: uintptr,
  422. type: ^reflect.Type_Info,
  423. found: bool,
  424. ) {
  425. for field in reflect.struct_fields_zipped(parent) {
  426. if field.is_using && field.name == "_" {
  427. offset, type, found = check_children_using_fields(key, field.type.id)
  428. if found {
  429. offset += field.offset
  430. return
  431. }
  432. }
  433. tag_value := reflect.struct_tag_get(field.tag, "json")
  434. json_name, _ := json_name_from_tag_value(tag_value)
  435. if (json_name == "" && field.name == key) || json_name == key {
  436. offset = field.offset
  437. type = field.type
  438. found = true
  439. return
  440. }
  441. }
  442. return
  443. }
  444. offset: uintptr
  445. type: ^reflect.Type_Info
  446. field_found: bool = use_field_idx >= 0
  447. if field_found {
  448. offset = fields[use_field_idx].offset
  449. type = fields[use_field_idx].type
  450. } else {
  451. offset, type, field_found = check_children_using_fields(key, ti.id)
  452. }
  453. if field_found {
  454. field_test :: #force_inline proc "contextless" (field_used: [^]byte, offset: uintptr) -> bool {
  455. prev_set := field_used[offset/8] & byte(offset&7) != 0
  456. field_used[offset/8] |= byte(offset&7)
  457. return prev_set
  458. }
  459. if field_test(field_used, offset) {
  460. return .Multiple_Use_Field
  461. }
  462. field_ptr := rawptr(uintptr(v.data) + offset)
  463. field := any{field_ptr, type.id}
  464. unmarshal_value(p, field) or_return
  465. if parse_comma(p) {
  466. break struct_loop
  467. }
  468. continue struct_loop
  469. } else {
  470. // allows skipping unused struct fields
  471. // NOTE(bill): prevent possible memory leak if a string is unquoted
  472. allocator := p.allocator
  473. defer p.allocator = allocator
  474. p.allocator = mem.nil_allocator()
  475. parse_value(p) or_return
  476. if parse_comma(p) {
  477. break struct_loop
  478. }
  479. continue struct_loop
  480. }
  481. }
  482. case reflect.Type_Info_Map:
  483. if !reflect.is_string(t.key) && !reflect.is_integer(t.key) {
  484. return UNSUPPORTED_TYPE
  485. }
  486. raw_map := (^mem.Raw_Map)(v.data)
  487. if raw_map.allocator.procedure == nil {
  488. raw_map.allocator = p.allocator
  489. }
  490. elem_backing := bytes_make(t.value.size, t.value.align, p.allocator) or_return
  491. defer delete(elem_backing, p.allocator)
  492. map_backing_value := any{raw_data(elem_backing), t.value.id}
  493. map_loop: for p.curr_token.kind != end_token {
  494. key, _ := parse_object_key(p, p.allocator)
  495. unmarshal_expect_token(p, .Colon)
  496. mem.zero_slice(elem_backing)
  497. if uerr := unmarshal_value(p, map_backing_value); uerr != nil {
  498. delete(key, p.allocator)
  499. return uerr
  500. }
  501. key_ptr: rawptr
  502. #partial switch tk in t.key.variant {
  503. case runtime.Type_Info_String:
  504. assert(tk.encoding == .UTF_8)
  505. key_ptr = rawptr(&key)
  506. key_cstr: cstring
  507. if reflect.is_cstring(t.key) {
  508. key_cstr = cstring(raw_data(key))
  509. key_ptr = &key_cstr
  510. }
  511. case runtime.Type_Info_Integer:
  512. i, ok := strconv.parse_i128(key)
  513. if !ok { return UNSUPPORTED_TYPE }
  514. key_ptr = rawptr(&i)
  515. case: return UNSUPPORTED_TYPE
  516. }
  517. set_ptr := runtime.__dynamic_map_set_without_hash(raw_map, t.map_info, key_ptr, map_backing_value.data)
  518. if set_ptr == nil {
  519. delete(key, p.allocator)
  520. }
  521. // there's no need to keep string value on the heap, since it was copied into map
  522. if reflect.is_integer(t.key) {
  523. delete(key, p.allocator)
  524. }
  525. if parse_comma(p) {
  526. break map_loop
  527. }
  528. }
  529. case reflect.Type_Info_Enumerated_Array:
  530. index_type := reflect.type_info_base(t.index)
  531. enum_type := index_type.variant.(reflect.Type_Info_Enum)
  532. enumerated_array_loop: for p.curr_token.kind != end_token {
  533. key, _ := parse_object_key(p, p.allocator)
  534. unmarshal_expect_token(p, .Colon)
  535. defer delete(key, p.allocator)
  536. index := -1
  537. for name, i in enum_type.names {
  538. if key == name {
  539. index = int(enum_type.values[i] - t.min_value)
  540. break
  541. }
  542. }
  543. if index < 0 || index >= t.count {
  544. return UNSUPPORTED_TYPE
  545. }
  546. index_ptr := rawptr(uintptr(v.data) + uintptr(index*t.elem_size))
  547. index_any := any{index_ptr, t.elem.id}
  548. unmarshal_value(p, index_any) or_return
  549. if parse_comma(p) {
  550. break enumerated_array_loop
  551. }
  552. }
  553. case:
  554. return UNSUPPORTED_TYPE
  555. }
  556. if end_token == .Close_Brace {
  557. unmarshal_expect_token(p, .Close_Brace)
  558. }
  559. return
  560. }
  561. @(private)
  562. unmarshal_count_array :: proc(p: ^Parser) -> (length: uintptr) {
  563. p_backup := p^
  564. p.allocator = mem.nil_allocator()
  565. unmarshal_expect_token(p, .Open_Bracket)
  566. array_length_loop: for p.curr_token.kind != .Close_Bracket {
  567. _, _ = parse_value(p)
  568. length += 1
  569. if parse_comma(p) {
  570. break
  571. }
  572. }
  573. p^ = p_backup
  574. return
  575. }
  576. @(private)
  577. unmarshal_array :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) {
  578. assign_array :: proc(p: ^Parser, base: rawptr, elem: ^reflect.Type_Info, length: uintptr) -> Unmarshal_Error {
  579. unmarshal_expect_token(p, .Open_Bracket)
  580. for idx: uintptr = 0; p.curr_token.kind != .Close_Bracket; idx += 1 {
  581. assert(idx < length)
  582. elem_ptr := rawptr(uintptr(base) + idx*uintptr(elem.size))
  583. elem := any{elem_ptr, elem.id}
  584. unmarshal_value(p, elem) or_return
  585. if parse_comma(p) {
  586. break
  587. }
  588. }
  589. unmarshal_expect_token(p, .Close_Bracket)
  590. return nil
  591. }
  592. UNSUPPORTED_TYPE := Unsupported_Type_Error{v.id, p.curr_token}
  593. ti := reflect.type_info_base(type_info_of(v.id))
  594. length := unmarshal_count_array(p)
  595. #partial switch t in ti.variant {
  596. case reflect.Type_Info_Slice:
  597. raw := (^mem.Raw_Slice)(v.data)
  598. data := bytes_make(t.elem.size * int(length), t.elem.align, p.allocator) or_return
  599. raw.data = raw_data(data)
  600. raw.len = int(length)
  601. return assign_array(p, raw.data, t.elem, length)
  602. case reflect.Type_Info_Dynamic_Array:
  603. raw := (^mem.Raw_Dynamic_Array)(v.data)
  604. data := bytes_make(t.elem.size * int(length), t.elem.align, p.allocator) or_return
  605. raw.data = raw_data(data)
  606. raw.len = int(length)
  607. raw.cap = int(length)
  608. raw.allocator = p.allocator
  609. return assign_array(p, raw.data, t.elem, length)
  610. case reflect.Type_Info_Array:
  611. // NOTE(bill): Allow lengths which are less than the dst array
  612. if int(length) > t.count {
  613. return UNSUPPORTED_TYPE
  614. }
  615. return assign_array(p, v.data, t.elem, length)
  616. case reflect.Type_Info_Enumerated_Array:
  617. // NOTE(bill): Allow lengths which are less than the dst array
  618. if int(length) > t.count {
  619. return UNSUPPORTED_TYPE
  620. }
  621. return assign_array(p, v.data, t.elem, length)
  622. case reflect.Type_Info_Complex:
  623. // NOTE(bill): Allow lengths which are less than the dst array
  624. if int(length) > 2 {
  625. return UNSUPPORTED_TYPE
  626. }
  627. switch ti.id {
  628. case complex32: return assign_array(p, v.data, type_info_of(f16), 2)
  629. case complex64: return assign_array(p, v.data, type_info_of(f32), 2)
  630. case complex128: return assign_array(p, v.data, type_info_of(f64), 2)
  631. }
  632. return UNSUPPORTED_TYPE
  633. }
  634. return UNSUPPORTED_TYPE
  635. }