doc.odin 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. /*
  2. Package cbor encodes, decodes, marshals and unmarshals types from/into RCF 8949 compatible CBOR binary.
  3. Also provided are conversion to and from JSON and the CBOR diagnostic format.
  4. **Allocations:**
  5. In general, when in the following table it says allocations are done on the `temp_allocator`, these allocations
  6. are still attempted to be deallocated.
  7. This allows you to use an allocator with freeing implemented as the `temp_allocator` which is handy with big CBOR.
  8. - *Encoding*: If the `.Deterministic_Map_Sorting` flag is set on the encoder, this allocates on the given `temp_allocator`
  9. some space for the keys of maps in order to sort them and then write them.
  10. Other than that there are no allocations (only for the final bytes if you use `cbor.encode_into_bytes`.
  11. - *Decoding*: Allocates everything on the given allocator and input given can be deleted after decoding.
  12. *No* temporary allocations are done.
  13. - *Marshal*: Same allocation strategy as encoding.
  14. - *Unmarshal*: Allocates everything on the given allocator and input given can be deleted after unmarshalling.
  15. Some temporary allocations are done on the given `temp_allocator`.
  16. **Determinism:**
  17. CBOR defines a deterministic en/decoder, which among other things uses the smallest type possible for integers and floats,
  18. and sorts map keys by their (encoded) lexical bytewise order.
  19. You can enable this behaviour using a combination of flags, also available as the `cbor.ENCODE_FULLY_DETERMINISTIC` constant.
  20. If you just want the small size that comes with this, but not the map sorting (which has a performance cost) you can use the
  21. `cbor.ENCODE_SMALL` constant for the flags.
  22. A deterministic float is a float in the smallest type (f16, f32, f64) that hasn't changed after conversion.
  23. A deterministic integer is an integer in the smallest representation (u8, u16, u32, u64) it fits in.
  24. **Untrusted Input:**
  25. By default input is treated as untrusted, this means the sizes that are encoded in the CBOR are not blindly trusted.
  26. If you were to trust these sizes, and allocate space for them an attacker would be able to cause massive allocations with small payloads.
  27. The decoder has a `max_pre_alloc` field that specifies the maximum amount of bytes (roughly) to pre allocate, a KiB by default.
  28. This does mean reallocations are more common though, you can, if you know the input is trusted, add the `.Trusted_Input` flag to the decoder.
  29. **Tags:**
  30. CBOR describes tags that you can wrap values with to assign a number to describe what type of data will follow.
  31. More information and a list of default tags can be found here: [[RFC 8949 Section 3.4;https://www.rfc-editor.org/rfc/rfc8949.html#name-tagging-of-items]].
  32. A list of registered extension types can be found here: [[IANA CBOR assignments;https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml]].
  33. Tags can either be assigned to a distinct Odin type (used by default),
  34. or be used with struct tags (`cbor_tag:"base64"`, or `cbor_tag:"1"` for example).
  35. By default, the following tags are supported/provided by this implementation:
  36. - *1/epoch*: Assign this tag to `time.Time` or integer fields to use the defined seconds since epoch format.
  37. - *24/cbor*: Assign this tag to string or byte fields to store encoded CBOR (not decoding it).
  38. - *34/base64*: Assign this tag to string or byte fields to store and decode the contents in base64.
  39. - *2 & 3*: Used automatically by the implementation to encode and decode big numbers into/from `core:math/big`.
  40. - *55799*: Self described CBOR, used when `.Self_Described_CBOR` flag is used to wrap the entire binary.
  41. This shows other implementations that we are dealing with CBOR by just looking at the first byte of input.
  42. - *1010*: An extension tag that defines a string type followed by its value, this is used by this implementation to support Odin's unions.
  43. Users can provide their own tag implementations using the `cbor.tag_register_type(...)` to register a tag for a distinct Odin type
  44. used automatically when it is encountered during marshal and unmarshal.
  45. Or with `cbor.tag_register_number(...)` to register a tag number along with an identifier for convenience that can be used with struct tags,
  46. e.g. `cbor_tag:"69"` or `cbor_tag:"my_tag"`.
  47. You can look at the default tags provided for pointers on how these implementations work.
  48. Example:
  49. package main
  50. import "base:intrinsics"
  51. import "core:encoding/cbor"
  52. import "core:fmt"
  53. import "core:reflect"
  54. import "core:time"
  55. Possibilities :: union {
  56. string,
  57. int,
  58. }
  59. Data :: struct {
  60. str: string,
  61. neg: cbor.Negative_U16, // Store a CBOR value directly.
  62. now: time.Time `cbor_tag:"epoch"`, // Wrapped in the epoch tag.
  63. ignore_this: ^Data `cbor:"-"`, // Ignored by implementation.
  64. renamed: f32 `cbor:"renamed :)"`, // Renamed when encoded.
  65. my_union: Possibilities, // Union support.
  66. my_raw: [8]u32 `cbor_tag:"raw"`, // Custom tag that just writes the value as bytes.
  67. }
  68. main :: proc() {
  69. // Example custom tag implementation that instead of breaking down all parts,
  70. // just writes the value as a big byte blob. This is an advanced feature but very powerful.
  71. RAW_TAG_NR :: 200
  72. cbor.tag_register_number({
  73. marshal = proc(_: ^cbor.Tag_Implementation, e: cbor.Encoder, v: any) -> cbor.Marshal_Error {
  74. cbor._encode_u8(e.writer, RAW_TAG_NR, .Tag) or_return
  75. return cbor.err_conv(cbor._encode_bytes(e, reflect.as_bytes(v)))
  76. },
  77. unmarshal = proc(_: ^cbor.Tag_Implementation, d: cbor.Decoder, _: cbor.Tag_Number, v: any) -> (cbor.Unmarshal_Error) {
  78. hdr := cbor._decode_header(d.reader) or_return
  79. maj, add := cbor._header_split(hdr)
  80. if maj != .Bytes {
  81. return .Bad_Tag_Value
  82. }
  83. bytes := cbor.err_conv(cbor._decode_bytes(d, add, maj)) or_return
  84. intrinsics.mem_copy_non_overlapping(v.data, raw_data(bytes), len(bytes))
  85. return nil
  86. },
  87. }, RAW_TAG_NR, "raw")
  88. now := time.Time{_nsec = 1701117968 * 1e9}
  89. data := Data{
  90. str = "Hello, World!",
  91. neg = 300,
  92. now = now,
  93. ignore_this = &Data{},
  94. renamed = 123123.125,
  95. my_union = 3,
  96. my_raw = {1=1, 2=2, 3=3},
  97. }
  98. // Marshal the struct into binary CBOR.
  99. binary, err := cbor.marshal(data, cbor.ENCODE_FULLY_DETERMINISTIC)
  100. fmt.assertf(err == nil, "marshal error: %v", err)
  101. defer delete(binary)
  102. // Decode the binary data into a `cbor.Value`.
  103. decoded, derr := cbor.decode(string(binary))
  104. fmt.assertf(derr == nil, "decode error: %v", derr)
  105. defer cbor.destroy(decoded)
  106. // Turn the CBOR into a human readable representation defined as the diagnostic format in [[RFC 8949 Section 8;https://www.rfc-editor.org/rfc/rfc8949.html#name-diagnostic-notation]].
  107. diagnosis, eerr := cbor.to_diagnostic_format(decoded)
  108. fmt.assertf(eerr == nil, "to diagnostic error: %v", eerr)
  109. defer delete(diagnosis)
  110. fmt.println(diagnosis)
  111. }
  112. Output:
  113. {
  114. "my_raw": 200(h'00001000200030000000000000000000'),
  115. "my_union": 1010([
  116. "int",
  117. 3
  118. ]),
  119. "neg": -301,
  120. "now": 1(1701117968),
  121. "renamed :)": 123123.12500000,
  122. "str": "Hello, World!"
  123. }
  124. */
  125. package encoding_cbor