Browse Source

encoding/cbor: add general docs and example

Laytan Laats 1 year ago
parent
commit
c1cf6c1a95

+ 0 - 5
core/encoding/cbor/cbor.odin

@@ -1,14 +1,9 @@
-// Package cbor encodes, decodes, marshals and unmarshals types from/into RCF 8949 compatible CBOR binary.
-// Also provided are conversion to and from JSON and the CBOR diagnostic format.
-//
-// You can additionally provide custom CBOR tag implementations for your use cases.
 package cbor
 package cbor
 
 
 import "core:encoding/json"
 import "core:encoding/json"
 import "core:intrinsics"
 import "core:intrinsics"
 import "core:io"
 import "core:io"
 import "core:mem"
 import "core:mem"
-import "core:runtime"
 import "core:strconv"
 import "core:strconv"
 import "core:strings"
 import "core:strings"
 
 

+ 4 - 4
core/encoding/cbor/coding.odin

@@ -121,7 +121,7 @@ decode_from_decoder :: proc(d: Decoder, allocator := context.allocator) -> (v: V
 	
 	
 	d := d
 	d := d
 
 
-	DECODE_PROGRESS_GUARD(&d)
+	_DECODE_PROGRESS_GUARD(&d)
 
 
 	v, err = _decode_from_decoder(d)
 	v, err = _decode_from_decoder(d)
 	// Normal EOF does not exist here, we try to read the exact amount that is said to be provided.
 	// Normal EOF does not exist here, we try to read the exact amount that is said to be provided.
@@ -228,7 +228,7 @@ encode_into_writer :: proc(w: io.Writer, v: Value, flags := ENCODE_SMALL) -> Enc
 encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error {
 encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error {
 	e := e
 	e := e
 
 
-	ENCODE_PROGRESS_GUARD(&e) or_return
+	_ENCODE_PROGRESS_GUARD(&e) or_return
 	
 	
 	switch v_spec in v {
 	switch v_spec in v {
 	case u8:           return _encode_u8(e.writer, v_spec, .Unsigned)
 	case u8:           return _encode_u8(e.writer, v_spec, .Unsigned)
@@ -256,7 +256,7 @@ encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error {
 }
 }
 
 
 @(deferred_in_out=_decode_progress_end)
 @(deferred_in_out=_decode_progress_end)
-DECODE_PROGRESS_GUARD :: proc(d: ^Decoder) -> (is_begin: bool, tmp: runtime.Arena_Temp) {
+_DECODE_PROGRESS_GUARD :: proc(d: ^Decoder) -> (is_begin: bool, tmp: runtime.Arena_Temp) {
 	if ._In_Progress in d.flags {
 	if ._In_Progress in d.flags {
 		return
 		return
 	}
 	}
@@ -286,7 +286,7 @@ _decode_progress_end :: proc(d: ^Decoder, is_begin: bool, tmp: runtime.Arena_Tem
 }
 }
 
 
 @(deferred_in_out=_encode_progress_end)
 @(deferred_in_out=_encode_progress_end)
-ENCODE_PROGRESS_GUARD :: proc(e: ^Encoder) -> (is_begin: bool, tmp: runtime.Arena_Temp, err: Encode_Error) {
+_ENCODE_PROGRESS_GUARD :: proc(e: ^Encoder) -> (is_begin: bool, tmp: runtime.Arena_Temp, err: Encode_Error) {
 	if ._In_Progress in e.flags {
 	if ._In_Progress in e.flags {
 		return
 		return
 	}
 	}

+ 143 - 0
core/encoding/cbor/doc.odin

@@ -0,0 +1,143 @@
+/*
+Package cbor encodes, decodes, marshals and unmarshals types from/into RCF 8949 compatible CBOR binary.
+Also provided are conversion to and from JSON and the CBOR diagnostic format.
+
+**Allocations:**
+
+In general, when in the following table it says allocations are done on the `context.temp_allocator`, these allocations
+are still attempted to be deallocated.
+This allows you to use an allocator with freeing implemented as the `context.temp_allocator` which is handy with big CBOR.
+
+If you use the default `context.temp_allocator` it will be returned back to its state when the process (en/decoding, (un)marshal) started.
+
+- *Encoding*:  If the `.Deterministic_Map_Sorting` flag is set on the encoder, this allocates on `context.temp_allocator`
+               some space for the keys of maps in order to sort them and then write them.
+               Other than that there are no allocations (only for the final bytes if you use `cbor.encode_into_bytes`.
+
+- *Decoding*:  Allocates everything on the given allocator and input given can be deleted after decoding.
+               *No* allocations are done on the `context.temp_allocator`.
+
+- *Marshal*:   Same allocation strategy as encoding.
+
+- *Unmarshal*: Allocates everything on the given allocator and input given can be deleted after unmarshalling.
+               Some temporary allocations are done on the `context.temp_allocator`.
+
+**Determinism:**
+
+CBOR defines a deterministic en/decoder, which among other things uses the smallest type possible for integers and floats,
+and sorts map keys by their (encoded) lexical bytewise order.
+
+You can enable this behaviour using a combination of flags, also available as the `cbor.ENCODE_FULLY_DETERMINISTIC` constant.
+If you just want the small size that comes with this, but not the map sorting (which has a performance cost) you can use the
+`cbor.ENCODE_SMALL` constant for the flags.
+
+A deterministic float is a float in the smallest type (f16, f32, f64) that hasn't changed after conversion.
+A deterministic integer is an integer in the smallest representation (u8, u16, u32, u64) it fits in.
+
+**Untrusted Input:**
+
+By default input is treated as untrusted, this means the sizes that are encoded in the CBOR are not blindly trusted.
+If you were to trust these sizes, and allocate space for them an attacker would be able to cause massive allocations with small payloads.
+
+The decoder has a `max_pre_alloc` field that specifies the maximum amount of bytes (roughly) to pre allocate, a KiB by default.
+
+This does mean reallocations are more common though, you can, if you know the input is trusted, add the `.Trusted_Input` flag to the decoder.
+
+**Tags:**
+
+CBOR describes tags that you can wrap values with to assign a number to describe what type of data will follow.
+
+More information and a list of default tags can be found here: [[RFC 8949 Section 3.4;https://www.rfc-editor.org/rfc/rfc8949.html#name-tagging-of-items]].
+
+A list of registered extension types can be found here: [[IANA CBOR assignments;https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml]].
+
+Tags can either be assigned to a distinct Odin type (used by default),
+or be used with struct tags (`cbor_tag:"base64"`, or `cbor_tag:"1"` for example).
+
+By default, the following tags are supported/provided by this implementation:
+
+- *1/epoch*:   Assign this tag to `time.Time` or integer fields to use the defined seconds since epoch format.
+
+- *24/cbor*:   Assign this tag to string or byte fields to store encoded CBOR (not decoding it).
+
+- *34/base64*: Assign this tag to string or byte fields to store and decode the contents in base64.
+
+- *2 & 3*:     Used automatically by the implementation to encode and decode big numbers into/from `core:math/big`.
+
+- *55799*:     Self described CBOR, used when `.Self_Described_CBOR` flag is used to wrap the entire binary.
+               This shows other implementations that we are dealing with CBOR by just looking at the first byte of input.
+
+- *1010*:      An extension tag that defines a string type followed by its value, this is used by this implementation to support Odin's unions.
+
+Users can provide their own tag implementations using the `cbor.tag_register_type(...)` to register a tag for a distinct Odin type
+used automatically when it is encountered during marshal and unmarshal.
+Or with `cbor.tag_register_number(...)` to register a tag number along with an identifier for convenience that can be used with struct tags,
+e.g. `cbor_tag:"69"` or `cbor_tag:"my_tag"`.
+
+You can look at the default tags provided for pointers on how these implementations work.
+
+Example:
+	package main
+
+	import "core:encoding/cbor"
+	import "core:fmt"
+	import "core:time"
+
+	Possibilities :: union {
+		string,
+		int,
+	}
+
+	Data :: struct {
+		str: string,
+		neg: cbor.Negative_U16,            // Store a CBOR value directly.
+		now: time.Time `cbor_tag:"epoch"`, // Wrapped in the epoch tag.
+		ignore_this: ^Data `cbor:"-"`,     // Ignored by implementation.
+		renamed: f32 `cbor:"renamed :)"`,  // Renamed when encoded.
+		my_union: Possibilities,           // Union support.
+	}
+
+	main :: proc() {
+		now := time.Time{_nsec = 1701117968 * 1e9}
+
+		data := Data{
+			str         = "Hello, World!",
+			neg         = 300,
+			now         = now,
+			ignore_this = &Data{},
+			renamed     = 123123.125,
+			my_union    = 3,
+		}
+		
+		// Marshal the struct into binary CBOR.
+		binary, err := cbor.marshal(data, cbor.ENCODE_FULLY_DETERMINISTIC)
+		assert(err == nil)
+		defer delete(binary)
+		
+		// Decode the binary data into a `cbor.Value`.
+		decoded, derr := cbor.decode(string(binary))
+		assert(derr == nil)
+		defer cbor.destroy(decoded)
+
+		// Turn the CBOR into a human readable representation.
+		diagnosis, eerr := cbor.diagnose(decoded)
+		assert(eerr == nil)
+		defer delete(diagnosis)
+
+		fmt.println(diagnosis)
+	}
+
+Output:
+	{
+		"my_union": 1010([
+			"int",
+			3
+		]),
+		"neg": -301,
+		"now": 1(1701117968),
+		"renamed :)": 123123.12500000,
+		"str": "Hello, World!"
+	}
+*/
+package cbor
+

+ 1 - 1
core/encoding/cbor/marshal.odin

@@ -78,7 +78,7 @@ marshal_into_writer :: proc(w: io.Writer, v: any, flags := ENCODE_SMALL) -> Mars
 marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) {
 marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) {
 	e := e
 	e := e
 
 
-	err_conv(ENCODE_PROGRESS_GUARD(&e)) or_return
+	err_conv(_ENCODE_PROGRESS_GUARD(&e)) or_return
 
 
 	if v == nil {
 	if v == nil {
 		return _encode_nil(e.writer)
 		return _encode_nil(e.writer)

+ 1 - 1
core/encoding/cbor/unmarshal.odin

@@ -53,7 +53,7 @@ unmarshal_from_string :: proc(s: string, ptr: ^$T, flags := Decoder_Flags{}, all
 unmarshal_from_decoder :: proc(d: Decoder, ptr: ^$T, allocator := context.allocator) -> (err: Unmarshal_Error) {
 unmarshal_from_decoder :: proc(d: Decoder, ptr: ^$T, allocator := context.allocator) -> (err: Unmarshal_Error) {
 	d := d
 	d := d
 
 
-	DECODE_PROGRESS_GUARD(&d)
+	_DECODE_PROGRESS_GUARD(&d)
 
 
 	err = _unmarshal_any_ptr(d, ptr, allocator=allocator)
 	err = _unmarshal_any_ptr(d, ptr, allocator=allocator)