| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485 | // package csv reads and writes comma-separated values (CSV) files.// This package supports the format described in RFC 4180 <https://tools.ietf.org/html/rfc4180.html>package encoding_csvimport "core:bufio"import "core:bytes"import "core:io"import "core:strings"import "core:unicode/utf8"// Reader is a data structure used for reading records from a CSV-encoded file//// The associated procedures for Reader expects its input to conform to RFC 4180.Reader :: struct {	// comma is the field delimiter	// reader_init will set it to be ','	// A "comma" must be a valid rune, nor can it be \r, \n, or the Unicode replacement character (0xfffd)	comma: rune,	// comment, if not 0, is the comment character	// Lines beginning with the comment character without a preceding whitespace are ignored	comment: rune,	// fields_per_record is the number of expected fields per record	//         if fields_per_record is >0, 'read' requires each record to have that field count	//         if fields_per_record is  0, 'read' sets it to the field count in the first record	//         if fields_per_record is <0, no check is made and records may have a variable field count	fields_per_record: int,	// If trim_leading_space is true, leading whitespace in a field is ignored	// This is done even if the field delimiter (comma), is whitespace	trim_leading_space: bool,	// If lazy_quotes is true, a quote may appear in an unquoted field and a non-doubled quote may appear in a quoted field	lazy_quotes: bool,	// multiline_fields, when set to true, will treat a field starting with a " as a multiline string	// therefore, instead of reading until the next \n, it'll read until the next "	multiline_fields: bool,	// reuse_record controls whether calls to 'read' may return a slice using the backing buffer	// for performance	// By default, each call to 'read' returns a newly allocated slice	reuse_record: bool,	// reuse_record_buffer controls whether calls to 'read' clone the strings of each field or uses	// the data stored in record buffer for performance	// By default, each call to 'read' clones the strings of each field	reuse_record_buffer: bool,	// internal buffers	r:             bufio.Reader,	line_count:    int, // current line being read in the CSV file	raw_buffer:    [dynamic]byte,	record_buffer: [dynamic]byte,	field_indices: [dynamic]int,	last_record:   [dynamic]string,	sr: strings.Reader, // used by reader_init_with_string	// Set and used by the iterator. Query using `iterator_last_error`	last_iterator_error: Error,}Reader_Error_Kind :: enum {	Bare_Quote,	Quote,	Field_Count,	Invalid_Delim,}reader_error_kind_string := [Reader_Error_Kind]string{	.Bare_Quote     = "bare \" in non-quoted field",	.Quote          = "extra or missing \" in quoted field",	.Field_Count    = "wrong field count",	.Invalid_Delim  = "invalid delimiter",}Reader_Error :: struct {	kind:          Reader_Error_Kind,	start_line:    int,	line:          int,	column:        int,	expected, got: int, // used by .Field_Count}Error :: union {	Reader_Error,	io.Error,}DEFAULT_RECORD_BUFFER_CAPACITY :: 256// reader_init initializes a new Reader from rreader_init :: proc(reader: ^Reader, r: io.Reader, buffer_allocator := context.allocator) {	switch reader.comma {	case '\x00', '\n', '\r', 0xfffd:		reader.comma = ','	}	context.allocator = buffer_allocator	reserve(&reader.record_buffer, DEFAULT_RECORD_BUFFER_CAPACITY)	reserve(&reader.raw_buffer,    0)	reserve(&reader.field_indices, 0)	reserve(&reader.last_record,   0)	bufio.reader_init(&reader.r, r)}// reader_init_with_string initializes a new Reader from sreader_init_with_string :: proc(reader: ^Reader, s: string, buffer_allocator := context.allocator) {	strings.reader_init(&reader.sr, s)	r, _ := io.to_reader(strings.reader_to_stream(&reader.sr))	reader_init(reader, r, buffer_allocator)}// reader_destroy destroys a Readerreader_destroy :: proc(r: ^Reader) {	delete(r.raw_buffer)	delete(r.record_buffer)	delete(r.field_indices)	delete(r.last_record)	bufio.reader_destroy(&r.r)}/*	Returns a record at a time.	for record, row_idx in csv.iterator_next(&r) { ... }	TIP: If you process the results within the loop and don't need to own the results,	you can set the Reader's `reuse_record` and `reuse_record_reuse_record_buffer` to true;	you won't need to delete the record or its fields.*/iterator_next :: proc(r: ^Reader) -> (record: []string, idx: int, err: Error, more: bool) {	record, r.last_iterator_error = read(r)	return record, r.line_count - 1, r.last_iterator_error, r.last_iterator_error == nil}// Get last error if we the iteratoriterator_last_error :: proc(r: Reader) -> (err: Error) {	return r.last_iterator_error}// read reads a single record (a slice of fields) from r//// All \r\n sequences are normalized to \n, including multi-line field@(require_results)read :: proc(r: ^Reader, allocator := context.allocator) -> (record: []string, err: Error) {	if r.reuse_record {		record, err = _read_record(r, &r.last_record, allocator)		resize(&r.last_record, len(record))		copy(r.last_record[:], record)	} else {		record, err = _read_record(r, nil, allocator)	}	return}// is_io_error checks where an Error is a specific io.Error kind@(require_results)is_io_error :: proc(err: Error, io_err: io.Error) -> bool {	if v, ok := err.(io.Error); ok {		return v == io_err	}	return false}// read_all reads all the remaining records from r.// Each record is a slice of fields.// read_all is defined to read until an EOF, and does not treat, and does not treat EOF as an error@(require_results)read_all :: proc(r: ^Reader, allocator := context.allocator) -> ([][]string, Error) {	context.allocator = allocator	records: [dynamic][]string	for {		record, rerr := _read_record(r, nil, allocator)		if is_io_error(rerr, .EOF) {			return records[:], nil		}		if rerr != nil {			// allow for a partial read			if record != nil {				append(&records, record)			}			return records[:], rerr		}		append(&records, record)	}}// read reads a single record (a slice of fields) from the provided input.@(require_results)read_from_string :: proc(input: string, record_allocator := context.allocator, buffer_allocator := context.allocator) -> (record: []string, n: int, err: Error) {	ir: strings.Reader	strings.reader_init(&ir, input)	input_reader, _ := io.to_reader(strings.reader_to_stream(&ir))	r: Reader	reader_init(&r, input_reader, buffer_allocator)	defer reader_destroy(&r)	record, err = read(&r, record_allocator)	n = int(r.r.r)	return}// read_all reads all the remaining records from the provided input.@(require_results)read_all_from_string :: proc(input: string, records_allocator := context.allocator, buffer_allocator := context.allocator) -> ([][]string, Error) {	ir: strings.Reader	strings.reader_init(&ir, input)	input_reader, _ := io.to_reader(strings.reader_to_stream(&ir))	r: Reader	reader_init(&r, input_reader, buffer_allocator)	defer reader_destroy(&r)	return read_all(&r, records_allocator)}@(private, require_results)is_valid_delim :: proc(r: rune) -> bool {	switch r {	case 0, '"', '\r', '\n', utf8.RUNE_ERROR:		return false	}	return utf8.valid_rune(r)}@(private, require_results)_read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.allocator) -> ([]string, Error) {	@(require_results)	read_line :: proc(r: ^Reader) -> ([]byte, io.Error) {		if !r.multiline_fields {			line, err := bufio.reader_read_slice(&r.r, '\n')			if err == .Buffer_Full {				clear(&r.raw_buffer)				append(&r.raw_buffer, ..line)				for err == .Buffer_Full {					line, err = bufio.reader_read_slice(&r.r, '\n')					append(&r.raw_buffer, ..line)				}				line = r.raw_buffer[:]			}			if len(line) > 0 && err == .EOF {				err = nil				if line[len(line)-1] == '\r' {					line = line[:len(line)-1]				}			}			r.line_count += 1			// normalize \r\n to \n			n := len(line)			for n >= 2 && string(line[n-2:]) == "\r\n" {				line[n-2] = '\n'				line = line[:n-1]			}			return line, err		} else {			// Reading a "line" that can possibly contain multiline fields.			// Unfortunately, this means we need to read a character at a time.			err:       io.Error			cur:       rune			is_quoted: bool			field_length := 0			clear(&r.raw_buffer)			read_loop: for err == .None {				cur, _, err = bufio.reader_read_rune(&r.r)				if err != .None { break read_loop }				switch cur {				case '"':					is_quoted = field_length == 0					field_length += 1				case '\n', '\r':					is_quoted or_break read_loop				case r.comma:					field_length = 0				case:					field_length += 1				}				rune_buf, rune_len := utf8.encode_rune(cur)				append(&r.raw_buffer, ..rune_buf[:rune_len])			}			return r.raw_buffer[:], err		}		unreachable()	}	@(require_results)	length_newline :: proc(b: []byte) -> int {		if len(b) > 0 && b[len(b)-1] == '\n' {			return 1		}		return 0	}	@(require_results)	next_rune :: proc(b: []byte) -> rune {		r, _ := utf8.decode_rune(b)		return r	}	if r.comma == r.comment ||	   !is_valid_delim(r.comma) ||	   (r.comment != 0 && !is_valid_delim(r.comment)) {		err := Reader_Error{			kind = .Invalid_Delim,			line = r.line_count,		}		return nil, err	}	line, full_line: []byte	err_read: io.Error	for err_read == nil {		line, err_read = read_line(r)		if r.comment != 0 && next_rune(line) == r.comment {			line = nil			continue		}		if err_read == nil && len(line) == length_newline(line) {			line = nil			continue		}		full_line = line		break	}	if is_io_error(err_read, .EOF) {		return nil, err_read	}	err: Error	quote_len :: len(`"`)	comma_len := utf8.rune_size(r.comma)	record_line := r.line_count	clear(&r.record_buffer)	clear(&r.field_indices)	parse_field: for {		if r.trim_leading_space {			line = bytes.trim_left_space(line)		}		if len(line) == 0 || line[0] != '"' {			i := bytes.index_rune(line, r.comma)			field := line			if i >= 0 {				field = field[:i]			} else {				field = field[:len(field) - length_newline(field)]			}			if !r.lazy_quotes {				if j := bytes.index_byte(field, '"'); j >= 0 {					column := utf8.rune_count(full_line[:len(full_line) - len(line[j:])])					err = Reader_Error{						kind = .Bare_Quote,						start_line = record_line,						line = r.line_count,						column = column,					}					break parse_field				}			}			append(&r.record_buffer, ..field)			append(&r.field_indices, len(r.record_buffer))			if i >= 0 {				line = line[i+comma_len:]				continue parse_field			}			break parse_field		} else {			line = line[quote_len:]			for {				i := bytes.index_byte(line, '"')				switch {				case i >= 0:					append(&r.record_buffer, ..line[:i])					line = line[i+quote_len:]					switch ch := next_rune(line); {					case ch == '"': // append quote						append(&r.record_buffer, '"')						line = line[quote_len:]					case ch == r.comma: // end of field						line = line[comma_len:]						append(&r.field_indices, len(r.record_buffer))						continue parse_field					case length_newline(line) == len(line): // end of line						append(&r.field_indices, len(r.record_buffer))						break parse_field					case r.lazy_quotes: // bare quote						append(&r.record_buffer, '"')					case: // invalid non-escaped quote						column := utf8.rune_count(full_line[:len(full_line) - len(line) - quote_len])						err = Reader_Error{							kind = .Quote,							start_line = record_line,							line = r.line_count,							column = column,						}						break parse_field					}				case len(line) > 0:					append(&r.record_buffer, ..line)					if err_read != nil {						break parse_field					}					line, err_read = read_line(r)					if is_io_error(err_read, .EOF) {						err_read = nil					}					full_line = line				case:					if !r.lazy_quotes && err_read == nil {						column := utf8.rune_count(full_line)						err = Reader_Error{							kind = .Quote,							start_line = record_line,							line = r.line_count,							column = column,						}						break parse_field					}					append(&r.field_indices, len(r.record_buffer))					break parse_field				}			}		}	}	if err == nil && err_read != nil {		err = err_read	}	context.allocator = allocator	dst := dst	str := string(r.record_buffer[:])	if dst == nil {		// use local variable		dst = &([dynamic]string){}	}	clear(dst)	resize(dst, len(r.field_indices))	pre_idx: int	for idx, i in r.field_indices {		field := str[pre_idx:idx]		if !r.reuse_record_buffer {			field = strings.clone(field)		}		dst[i] = field		pre_idx = idx	}	if r.fields_per_record > 0 {		if len(dst) != r.fields_per_record && err == nil {			err = Reader_Error{				kind = .Field_Count,				start_line = record_line,				line = r.line_count,				expected = r.fields_per_record,				got = len(dst),			}		}	} else if r.fields_per_record == 0 {		r.fields_per_record = len(dst)	}	return dst[:], err}
 |