unmarshal.odin 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701
  1. package encoding_json
  2. import "core:mem"
  3. import "core:math"
  4. import "core:reflect"
  5. import "core:strconv"
  6. import "core:strings"
  7. import "base:runtime"
  8. import "base:intrinsics"
  9. Unmarshal_Data_Error :: enum {
  10. Invalid_Data,
  11. Invalid_Parameter,
  12. Non_Pointer_Parameter,
  13. Multiple_Use_Field,
  14. }
  15. Unsupported_Type_Error :: struct {
  16. id: typeid,
  17. token: Token,
  18. }
  19. Unmarshal_Error :: union {
  20. Error,
  21. Unmarshal_Data_Error,
  22. Unsupported_Type_Error,
  23. }
  24. unmarshal_any :: proc(data: []byte, v: any, spec := DEFAULT_SPECIFICATION, allocator := context.allocator) -> Unmarshal_Error {
  25. v := v
  26. if v == nil || v.id == nil {
  27. return .Invalid_Parameter
  28. }
  29. v = reflect.any_base(v)
  30. ti := type_info_of(v.id)
  31. if !reflect.is_pointer(ti) || ti.id == rawptr {
  32. return .Non_Pointer_Parameter
  33. }
  34. PARSE_INTEGERS :: true
  35. if !is_valid(data, spec, PARSE_INTEGERS) {
  36. return .Invalid_Data
  37. }
  38. p := make_parser(data, spec, PARSE_INTEGERS, allocator)
  39. data := any{(^rawptr)(v.data)^, ti.variant.(reflect.Type_Info_Pointer).elem.id}
  40. if v.data == nil {
  41. return .Invalid_Parameter
  42. }
  43. context.allocator = p.allocator
  44. if p.spec == .MJSON {
  45. #partial switch p.curr_token.kind {
  46. case .Ident, .String:
  47. return unmarshal_object(&p, data, .EOF)
  48. }
  49. }
  50. return unmarshal_value(&p, data)
  51. }
  52. unmarshal :: proc(data: []byte, ptr: ^$T, spec := DEFAULT_SPECIFICATION, allocator := context.allocator) -> Unmarshal_Error {
  53. return unmarshal_any(data, ptr, spec, allocator)
  54. }
  55. unmarshal_string :: proc(data: string, ptr: ^$T, spec := DEFAULT_SPECIFICATION, allocator := context.allocator) -> Unmarshal_Error {
  56. return unmarshal_any(transmute([]byte)data, ptr, spec, allocator)
  57. }
  58. @(private)
  59. assign_bool :: proc(val: any, b: bool) -> bool {
  60. v := reflect.any_core(val)
  61. switch &dst in v {
  62. case bool: dst = bool(b)
  63. case b8: dst = b8 (b)
  64. case b16: dst = b16 (b)
  65. case b32: dst = b32 (b)
  66. case b64: dst = b64 (b)
  67. case: return false
  68. }
  69. return true
  70. }
  71. @(private)
  72. assign_int :: proc(val: any, i: $T) -> bool {
  73. v := reflect.any_core(val)
  74. switch &dst in v {
  75. case i8: dst = i8 (i)
  76. case i16: dst = i16 (i)
  77. case i16le: dst = i16le (i)
  78. case i16be: dst = i16be (i)
  79. case i32: dst = i32 (i)
  80. case i32le: dst = i32le (i)
  81. case i32be: dst = i32be (i)
  82. case i64: dst = i64 (i)
  83. case i64le: dst = i64le (i)
  84. case i64be: dst = i64be (i)
  85. case i128: dst = i128 (i)
  86. case i128le: dst = i128le (i)
  87. case i128be: dst = i128be (i)
  88. case u8: dst = u8 (i)
  89. case u16: dst = u16 (i)
  90. case u16le: dst = u16le (i)
  91. case u16be: dst = u16be (i)
  92. case u32: dst = u32 (i)
  93. case u32le: dst = u32le (i)
  94. case u32be: dst = u32be (i)
  95. case u64: dst = u64 (i)
  96. case u64le: dst = u64le (i)
  97. case u64be: dst = u64be (i)
  98. case u128: dst = u128 (i)
  99. case u128le: dst = u128le (i)
  100. case u128be: dst = u128be (i)
  101. case int: dst = int (i)
  102. case uint: dst = uint (i)
  103. case uintptr: dst = uintptr(i)
  104. case:
  105. ti := type_info_of(v.id)
  106. if _, ok := ti.variant.(runtime.Type_Info_Bit_Set); ok {
  107. do_byte_swap := !reflect.bit_set_is_big_endian(v)
  108. switch ti.size * 8 {
  109. case 0: // no-op.
  110. case 8:
  111. x := (^u8)(v.data)
  112. x^ = u8(i)
  113. case 16:
  114. x := (^u16)(v.data)
  115. x^ = do_byte_swap ? intrinsics.byte_swap(u16(i)) : u16(i)
  116. case 32:
  117. x := (^u32)(v.data)
  118. x^ = do_byte_swap ? intrinsics.byte_swap(u32(i)) : u32(i)
  119. case 64:
  120. x := (^u64)(v.data)
  121. x^ = do_byte_swap ? intrinsics.byte_swap(u64(i)) : u64(i)
  122. case:
  123. panic("unknown bit_size size")
  124. }
  125. return true
  126. }
  127. return false
  128. }
  129. return true
  130. }
  131. @(private)
  132. assign_float :: proc(val: any, f: $T) -> bool {
  133. v := reflect.any_core(val)
  134. switch &dst in v {
  135. case f16: dst = f16 (f)
  136. case f16le: dst = f16le(f)
  137. case f16be: dst = f16be(f)
  138. case f32: dst = f32 (f)
  139. case f32le: dst = f32le(f)
  140. case f32be: dst = f32be(f)
  141. case f64: dst = f64 (f)
  142. case f64le: dst = f64le(f)
  143. case f64be: dst = f64be(f)
  144. case complex32: dst = complex(f16(f), 0)
  145. case complex64: dst = complex(f32(f), 0)
  146. case complex128: dst = complex(f64(f), 0)
  147. case quaternion64: dst = quaternion(w=f16(f), x=0, y=0, z=0)
  148. case quaternion128: dst = quaternion(w=f32(f), x=0, y=0, z=0)
  149. case quaternion256: dst = quaternion(w=f64(f), x=0, y=0, z=0)
  150. case: return false
  151. }
  152. return true
  153. }
  154. @(private)
  155. unmarshal_string_token :: proc(p: ^Parser, val: any, str: string, ti: ^reflect.Type_Info) -> bool {
  156. val := val
  157. switch &dst in val {
  158. case string:
  159. dst = str
  160. return true
  161. case cstring:
  162. if str == "" {
  163. dst = strings.clone_to_cstring("", p.allocator)
  164. } else {
  165. // NOTE: This is valid because 'clone_string' appends a NUL terminator
  166. dst = cstring(raw_data(str))
  167. }
  168. return true
  169. }
  170. #partial switch variant in ti.variant {
  171. case reflect.Type_Info_Enum:
  172. for name, i in variant.names {
  173. if name == str {
  174. assign_int(val, variant.values[i])
  175. return true
  176. }
  177. }
  178. // TODO(bill): should this be an error or not?
  179. return true
  180. case reflect.Type_Info_Integer:
  181. i := strconv.parse_i128(str) or_return
  182. if assign_int(val, i) {
  183. return true
  184. }
  185. if assign_float(val, i) {
  186. return true
  187. }
  188. case reflect.Type_Info_Float:
  189. f := strconv.parse_f64(str) or_return
  190. if assign_int(val, f) {
  191. return true
  192. }
  193. if assign_float(val, f) {
  194. return true
  195. }
  196. }
  197. return false
  198. }
  199. @(private)
  200. unmarshal_value :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) {
  201. UNSUPPORTED_TYPE := Unsupported_Type_Error{v.id, p.curr_token}
  202. token := p.curr_token
  203. v := v
  204. ti := reflect.type_info_base(type_info_of(v.id))
  205. if u, ok := ti.variant.(reflect.Type_Info_Union); ok && token.kind != .Null {
  206. // NOTE: If it's a union with only one variant, then treat it as that variant
  207. if len(u.variants) == 1 {
  208. variant := u.variants[0]
  209. v.id = variant.id
  210. ti = reflect.type_info_base(variant)
  211. if !reflect.is_pointer_internally(variant) {
  212. tag := any{rawptr(uintptr(v.data) + u.tag_offset), u.tag_type.id}
  213. assign_int(tag, 1)
  214. }
  215. } else if v.id != Value {
  216. for variant, i in u.variants {
  217. variant_any := any{v.data, variant.id}
  218. variant_p := p^
  219. if err = unmarshal_value(&variant_p, variant_any); err == nil {
  220. p^ = variant_p
  221. raw_tag := i
  222. if !u.no_nil { raw_tag += 1 }
  223. tag := any{rawptr(uintptr(v.data) + u.tag_offset), u.tag_type.id}
  224. assign_int(tag, raw_tag)
  225. return
  226. }
  227. }
  228. return UNSUPPORTED_TYPE
  229. }
  230. }
  231. switch &dst in v {
  232. // Handle json.Value as an unknown type
  233. case Value:
  234. dst = parse_value(p) or_return
  235. return
  236. }
  237. #partial switch token.kind {
  238. case .Null:
  239. mem.zero(v.data, ti.size)
  240. advance_token(p)
  241. return
  242. case .False, .True:
  243. advance_token(p)
  244. if assign_bool(v, token.kind == .True) {
  245. return
  246. }
  247. return UNSUPPORTED_TYPE
  248. case .Integer:
  249. advance_token(p)
  250. i, _ := strconv.parse_i128(token.text)
  251. if assign_int(v, i) {
  252. return
  253. }
  254. if assign_float(v, i) {
  255. return
  256. }
  257. return UNSUPPORTED_TYPE
  258. case .Float:
  259. advance_token(p)
  260. f, _ := strconv.parse_f64(token.text)
  261. if assign_float(v, f) {
  262. return
  263. }
  264. if i, fract := math.modf(f); fract == 0 {
  265. if assign_int(v, i) {
  266. return
  267. }
  268. if assign_float(v, i) {
  269. return
  270. }
  271. }
  272. return UNSUPPORTED_TYPE
  273. case .Ident:
  274. advance_token(p)
  275. if p.spec == .MJSON {
  276. if unmarshal_string_token(p, any{v.data, ti.id}, token.text, ti) {
  277. return nil
  278. }
  279. }
  280. return UNSUPPORTED_TYPE
  281. case .String:
  282. advance_token(p)
  283. str := unquote_string(token, p.spec, p.allocator) or_return
  284. dest := any{v.data, ti.id}
  285. if !unmarshal_string_token(p, dest, str, ti) {
  286. delete(str, p.allocator)
  287. return UNSUPPORTED_TYPE
  288. }
  289. switch destv in dest {
  290. case string, cstring:
  291. case: delete(str, p.allocator)
  292. }
  293. return nil
  294. case .Open_Brace:
  295. return unmarshal_object(p, v, .Close_Brace)
  296. case .Open_Bracket:
  297. return unmarshal_array(p, v)
  298. case:
  299. if p.spec != .JSON {
  300. #partial switch token.kind {
  301. case .Infinity:
  302. advance_token(p)
  303. f: f64 = 0h7ff0000000000000
  304. if token.text[0] == '-' {
  305. f = 0hfff0000000000000
  306. }
  307. if assign_float(v, f) {
  308. return
  309. }
  310. return UNSUPPORTED_TYPE
  311. case .NaN:
  312. advance_token(p)
  313. f: f64 = 0h7ff7ffffffffffff
  314. if token.text[0] == '-' {
  315. f = 0hfff7ffffffffffff
  316. }
  317. if assign_float(v, f) {
  318. return
  319. }
  320. return UNSUPPORTED_TYPE
  321. }
  322. }
  323. }
  324. advance_token(p)
  325. return UNSUPPORTED_TYPE
  326. }
  327. @(private)
  328. unmarshal_expect_token :: proc(p: ^Parser, kind: Token_Kind, loc := #caller_location) -> Token {
  329. prev := p.curr_token
  330. err := expect_token(p, kind)
  331. assert(err == nil, "unmarshal_expect_token")
  332. return prev
  333. }
  334. @(private)
  335. json_name_from_tag_value :: proc(value: string) -> (json_name, extra: string) {
  336. json_name = value
  337. if comma_index := strings.index_byte(json_name, ','); comma_index >= 0 {
  338. json_name = json_name[:comma_index]
  339. extra = value[1 + comma_index:]
  340. }
  341. return
  342. }
  343. @(private)
  344. unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unmarshal_Error) {
  345. UNSUPPORTED_TYPE := Unsupported_Type_Error{v.id, p.curr_token}
  346. if end_token == .Close_Brace {
  347. unmarshal_expect_token(p, .Open_Brace)
  348. }
  349. v := v
  350. ti := reflect.type_info_base(type_info_of(v.id))
  351. #partial switch t in ti.variant {
  352. case reflect.Type_Info_Struct:
  353. if .raw_union in t.flags {
  354. return UNSUPPORTED_TYPE
  355. }
  356. struct_loop: for p.curr_token.kind != end_token {
  357. key, _ := parse_object_key(p, p.allocator)
  358. defer delete(key, p.allocator)
  359. unmarshal_expect_token(p, .Colon)
  360. fields := reflect.struct_fields_zipped(ti.id)
  361. field_test :: #force_inline proc "contextless" (field_used: [^]byte, offset: uintptr) -> bool {
  362. prev_set := field_used[offset/8] & byte(offset&7) != 0
  363. field_used[offset/8] |= byte(offset&7)
  364. return prev_set
  365. }
  366. field_used_bytes := (reflect.size_of_typeid(ti.id)+7)/8
  367. field_used := intrinsics.alloca(field_used_bytes, 1)
  368. intrinsics.mem_zero(field_used, field_used_bytes)
  369. use_field_idx := -1
  370. for field, field_idx in fields {
  371. tag_value := string(reflect.struct_tag_get(field.tag, "json"))
  372. json_name, _ := json_name_from_tag_value(tag_value)
  373. if key == json_name {
  374. use_field_idx = field_idx
  375. break
  376. }
  377. }
  378. if use_field_idx < 0 {
  379. for field, field_idx in fields {
  380. if key == field.name {
  381. use_field_idx = field_idx
  382. break
  383. }
  384. }
  385. }
  386. check_children_using_fields :: proc(key: string, parent: typeid) -> (
  387. offset: uintptr,
  388. type: ^reflect.Type_Info,
  389. found: bool,
  390. ) {
  391. for field in reflect.struct_fields_zipped(parent) {
  392. if field.is_using && field.name == "_" {
  393. offset, type, found = check_children_using_fields(key, field.type.id)
  394. if found {
  395. offset += field.offset
  396. return
  397. }
  398. }
  399. if field.name == key {
  400. offset = field.offset
  401. type = field.type
  402. found = true
  403. return
  404. }
  405. }
  406. return
  407. }
  408. offset: uintptr
  409. type: ^reflect.Type_Info
  410. field_found: bool = use_field_idx >= 0
  411. if field_found {
  412. offset = fields[use_field_idx].offset
  413. type = fields[use_field_idx].type
  414. } else {
  415. offset, type, field_found = check_children_using_fields(key, ti.id)
  416. }
  417. if field_found {
  418. if field_test(field_used, offset) {
  419. return .Multiple_Use_Field
  420. }
  421. field_ptr := rawptr(uintptr(v.data) + offset)
  422. field := any{field_ptr, type.id}
  423. unmarshal_value(p, field) or_return
  424. if parse_comma(p) {
  425. break struct_loop
  426. }
  427. continue struct_loop
  428. } else {
  429. // allows skipping unused struct fields
  430. // NOTE(bill): prevent possible memory leak if a string is unquoted
  431. allocator := p.allocator
  432. defer p.allocator = allocator
  433. p.allocator = mem.nil_allocator()
  434. parse_value(p) or_return
  435. if parse_comma(p) {
  436. break struct_loop
  437. }
  438. continue struct_loop
  439. }
  440. }
  441. case reflect.Type_Info_Map:
  442. if !reflect.is_string(t.key) && !reflect.is_integer(t.key) {
  443. return UNSUPPORTED_TYPE
  444. }
  445. raw_map := (^mem.Raw_Map)(v.data)
  446. if raw_map.allocator.procedure == nil {
  447. raw_map.allocator = p.allocator
  448. }
  449. elem_backing := bytes_make(t.value.size, t.value.align, p.allocator) or_return
  450. defer delete(elem_backing, p.allocator)
  451. map_backing_value := any{raw_data(elem_backing), t.value.id}
  452. map_loop: for p.curr_token.kind != end_token {
  453. key, _ := parse_object_key(p, p.allocator)
  454. unmarshal_expect_token(p, .Colon)
  455. mem.zero_slice(elem_backing)
  456. if uerr := unmarshal_value(p, map_backing_value); uerr != nil {
  457. delete(key, p.allocator)
  458. return uerr
  459. }
  460. key_ptr: rawptr
  461. #partial switch tk in t.key.variant {
  462. case runtime.Type_Info_String:
  463. key_ptr = rawptr(&key)
  464. key_cstr: cstring
  465. if reflect.is_cstring(t.key) {
  466. key_cstr = cstring(raw_data(key))
  467. key_ptr = &key_cstr
  468. }
  469. case runtime.Type_Info_Integer:
  470. i, ok := strconv.parse_i128(key)
  471. if !ok { return UNSUPPORTED_TYPE }
  472. key_ptr = rawptr(&i)
  473. case: return UNSUPPORTED_TYPE
  474. }
  475. set_ptr := runtime.__dynamic_map_set_without_hash(raw_map, t.map_info, key_ptr, map_backing_value.data)
  476. if set_ptr == nil {
  477. delete(key, p.allocator)
  478. }
  479. // there's no need to keep string value on the heap, since it was copied into map
  480. if reflect.is_integer(t.key) {
  481. delete(key, p.allocator)
  482. }
  483. if parse_comma(p) {
  484. break map_loop
  485. }
  486. }
  487. case reflect.Type_Info_Enumerated_Array:
  488. index_type := reflect.type_info_base(t.index)
  489. enum_type := index_type.variant.(reflect.Type_Info_Enum)
  490. enumerated_array_loop: for p.curr_token.kind != end_token {
  491. key, _ := parse_object_key(p, p.allocator)
  492. unmarshal_expect_token(p, .Colon)
  493. defer delete(key, p.allocator)
  494. index := -1
  495. for name, i in enum_type.names {
  496. if key == name {
  497. index = int(enum_type.values[i] - t.min_value)
  498. break
  499. }
  500. }
  501. if index < 0 || index >= t.count {
  502. return UNSUPPORTED_TYPE
  503. }
  504. index_ptr := rawptr(uintptr(v.data) + uintptr(index*t.elem_size))
  505. index_any := any{index_ptr, t.elem.id}
  506. unmarshal_value(p, index_any) or_return
  507. if parse_comma(p) {
  508. break enumerated_array_loop
  509. }
  510. }
  511. case:
  512. return UNSUPPORTED_TYPE
  513. }
  514. if end_token == .Close_Brace {
  515. unmarshal_expect_token(p, .Close_Brace)
  516. }
  517. return
  518. }
  519. @(private)
  520. unmarshal_count_array :: proc(p: ^Parser) -> (length: uintptr) {
  521. p_backup := p^
  522. p.allocator = mem.nil_allocator()
  523. unmarshal_expect_token(p, .Open_Bracket)
  524. array_length_loop: for p.curr_token.kind != .Close_Bracket {
  525. _, _ = parse_value(p)
  526. length += 1
  527. if parse_comma(p) {
  528. break
  529. }
  530. }
  531. p^ = p_backup
  532. return
  533. }
  534. @(private)
  535. unmarshal_array :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) {
  536. assign_array :: proc(p: ^Parser, base: rawptr, elem: ^reflect.Type_Info, length: uintptr) -> Unmarshal_Error {
  537. unmarshal_expect_token(p, .Open_Bracket)
  538. for idx: uintptr = 0; p.curr_token.kind != .Close_Bracket; idx += 1 {
  539. assert(idx < length)
  540. elem_ptr := rawptr(uintptr(base) + idx*uintptr(elem.size))
  541. elem := any{elem_ptr, elem.id}
  542. unmarshal_value(p, elem) or_return
  543. if parse_comma(p) {
  544. break
  545. }
  546. }
  547. unmarshal_expect_token(p, .Close_Bracket)
  548. return nil
  549. }
  550. UNSUPPORTED_TYPE := Unsupported_Type_Error{v.id, p.curr_token}
  551. ti := reflect.type_info_base(type_info_of(v.id))
  552. length := unmarshal_count_array(p)
  553. #partial switch t in ti.variant {
  554. case reflect.Type_Info_Slice:
  555. raw := (^mem.Raw_Slice)(v.data)
  556. data := bytes_make(t.elem.size * int(length), t.elem.align, p.allocator) or_return
  557. raw.data = raw_data(data)
  558. raw.len = int(length)
  559. return assign_array(p, raw.data, t.elem, length)
  560. case reflect.Type_Info_Dynamic_Array:
  561. raw := (^mem.Raw_Dynamic_Array)(v.data)
  562. data := bytes_make(t.elem.size * int(length), t.elem.align, p.allocator) or_return
  563. raw.data = raw_data(data)
  564. raw.len = int(length)
  565. raw.cap = int(length)
  566. raw.allocator = p.allocator
  567. return assign_array(p, raw.data, t.elem, length)
  568. case reflect.Type_Info_Array:
  569. // NOTE(bill): Allow lengths which are less than the dst array
  570. if int(length) > t.count {
  571. return UNSUPPORTED_TYPE
  572. }
  573. return assign_array(p, v.data, t.elem, length)
  574. case reflect.Type_Info_Enumerated_Array:
  575. // NOTE(bill): Allow lengths which are less than the dst array
  576. if int(length) > t.count {
  577. return UNSUPPORTED_TYPE
  578. }
  579. return assign_array(p, v.data, t.elem, length)
  580. case reflect.Type_Info_Complex:
  581. // NOTE(bill): Allow lengths which are less than the dst array
  582. if int(length) > 2 {
  583. return UNSUPPORTED_TYPE
  584. }
  585. switch ti.id {
  586. case complex32: return assign_array(p, v.data, type_info_of(f16), 2)
  587. case complex64: return assign_array(p, v.data, type_info_of(f32), 2)
  588. case complex128: return assign_array(p, v.data, type_info_of(f64), 2)
  589. }
  590. return UNSUPPORTED_TYPE
  591. }
  592. return UNSUPPORTED_TYPE
  593. }