reader.odin 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. package bufio
  2. import "core:io"
  3. import "core:mem"
  4. import "core:unicode/utf8"
  5. import "core:bytes"
  6. // Reader is a buffered wrapper for an io.Reader
  7. Reader :: struct {
  8. buf: []byte,
  9. buf_allocator: mem.Allocator,
  10. rd: io.Reader, // reader
  11. r, w: int, // read and write positions for buf
  12. err: io.Error,
  13. last_byte: int, // last byte read, invalid is -1
  14. last_rune_size: int, // size of last rune read, invalid is -1
  15. max_consecutive_empty_reads: int,
  16. }
  17. DEFAULT_BUF_SIZE :: 4096
  18. @(private)
  19. MIN_READ_BUFFER_SIZE :: 16
  20. @(private)
  21. DEFAULT_MAX_CONSECUTIVE_EMPTY_READS :: 128
  22. reader_init :: proc(b: ^Reader, rd: io.Reader, size: int = DEFAULT_BUF_SIZE, allocator := context.allocator) {
  23. size := size
  24. size = max(size, MIN_READ_BUFFER_SIZE)
  25. reader_reset(b, rd)
  26. b.buf_allocator = allocator
  27. b.buf = make([]byte, size, allocator)
  28. }
  29. reader_init_with_buf :: proc(b: ^Reader, rd: io.Reader, buf: []byte) {
  30. reader_reset(b, rd)
  31. b.buf_allocator = {}
  32. b.buf = buf
  33. }
  34. // reader_destroy destroys the underlying buffer with its associated allocator IFF that allocator has been set
  35. reader_destroy :: proc(b: ^Reader) {
  36. delete(b.buf, b.buf_allocator)
  37. b^ = {}
  38. }
  39. reader_size :: proc(b: ^Reader) -> int {
  40. return len(b.buf)
  41. }
  42. reader_reset :: proc(b: ^Reader, r: io.Reader) {
  43. b.rd = r
  44. b.r, b.w = 0, 0
  45. b.err = nil
  46. b.last_byte = -1
  47. b.last_rune_size = -1
  48. }
  49. @(private)
  50. _reader_read_new_chunk :: proc(b: ^Reader) -> io.Error {
  51. if b.r > 0 {
  52. copy(b.buf, b.buf[b.r:b.w])
  53. b.w -= b.r
  54. b.r = 0
  55. }
  56. if b.w >= len(b.buf) {
  57. return .Buffer_Full
  58. }
  59. if b.max_consecutive_empty_reads <= 0 {
  60. b.max_consecutive_empty_reads = DEFAULT_MAX_CONSECUTIVE_EMPTY_READS
  61. }
  62. // read new data, and try a limited number of times
  63. for i := b.max_consecutive_empty_reads; i > 0; i -= 1 {
  64. n, err := io.read(b.rd, b.buf[b.w:])
  65. if n < 0 {
  66. return .Negative_Read
  67. }
  68. b.w += n
  69. if err != nil {
  70. b.err = err
  71. return nil
  72. }
  73. if n > 0 {
  74. return nil
  75. }
  76. }
  77. b.err = .No_Progress
  78. return nil
  79. }
  80. @(private)
  81. _reader_consume_err :: proc(b: ^Reader) -> io.Error {
  82. err := b.err
  83. b.err = nil
  84. return err
  85. }
  86. // reader_peek returns the next n bytes without advancing the reader
  87. // The bytes stop being valid on the next read call
  88. // If reader_peek returns fewer than n bytes, it also return an error
  89. // explaining why the read is short
  90. // The error will be .Buffer_Full if n is larger than the internal buffer size
  91. reader_peek :: proc(b: ^Reader, n: int) -> (data: []byte, err: io.Error) {
  92. n := n
  93. if n < 0 {
  94. return nil, .Negative_Count
  95. }
  96. b.last_byte = -1
  97. b.last_rune_size = -1
  98. for b.w-b.r < n && b.w-b.r < len(b.buf) && b.err == nil {
  99. _reader_read_new_chunk(b) or_return
  100. }
  101. if n > len(b.buf) {
  102. return b.buf[b.r : b.w], .Buffer_Full
  103. }
  104. if available := b.w - b.r; available < n {
  105. n = available
  106. err = _reader_consume_err(b)
  107. if err == nil {
  108. err = .Buffer_Full
  109. }
  110. }
  111. return b.buf[b.r : b.r+n], err
  112. }
  113. // reader_buffered returns the number of bytes that can be read from the current buffer
  114. reader_buffered :: proc(b: ^Reader) -> int {
  115. return b.w - b.r
  116. }
  117. // reader_discard skips the next n bytes, and returns the number of bytes that were discarded
  118. reader_discard :: proc(b: ^Reader, n: int) -> (discarded: int, err: io.Error) {
  119. if n < 0 {
  120. return 0, .Negative_Count
  121. }
  122. if n == 0 {
  123. return
  124. }
  125. remaining := n
  126. for {
  127. skip := reader_buffered(b)
  128. if skip == 0 {
  129. _reader_read_new_chunk(b) or_return
  130. skip = reader_buffered(b)
  131. }
  132. skip = min(skip, remaining)
  133. b.r += skip
  134. remaining -= skip
  135. if remaining == 0 {
  136. return n, nil
  137. }
  138. if b.err != nil {
  139. return n - remaining, _reader_consume_err(b)
  140. }
  141. }
  142. return
  143. }
  144. // reader_read reads data into p
  145. // The bytes are taken from at most one read on the underlying Reader, which means n may be less than len(p)
  146. reader_read :: proc(b: ^Reader, p: []byte) -> (n: int, err: io.Error) {
  147. n = len(p)
  148. if n == 0 {
  149. if reader_buffered(b) > 0 {
  150. return 0, nil
  151. }
  152. return 0, _reader_consume_err(b)
  153. }
  154. if b.r == b.w {
  155. if b.err != nil {
  156. return 0, _reader_consume_err(b)
  157. }
  158. if len(p) >= len(b.buf) {
  159. n, b.err = io.read(b.rd, p)
  160. if n < 0 {
  161. return 0, .Negative_Read
  162. }
  163. if n > 0 {
  164. b.last_byte = int(p[n-1])
  165. b.last_rune_size = -1
  166. }
  167. return n, _reader_consume_err(b)
  168. }
  169. b.r, b.w = 0, 0
  170. n, b.err = io.read(b.rd, b.buf)
  171. if n < 0 {
  172. return 0, .Negative_Read
  173. }
  174. if n == 0 {
  175. return 0, _reader_consume_err(b)
  176. }
  177. b.w += n
  178. }
  179. n = copy(p, b.buf[b.r:b.w])
  180. b.r += n
  181. b.last_byte = int(b.buf[b.r-1])
  182. b.last_rune_size = -1
  183. return n, nil
  184. }
  185. // reader_read_byte reads and returns a single byte
  186. // If no byte is available, it return an error
  187. reader_read_byte :: proc(b: ^Reader) -> (c: byte, err: io.Error) {
  188. b.last_rune_size = -1
  189. for b.r == b.w {
  190. if b.err != nil {
  191. return 0, _reader_consume_err(b)
  192. }
  193. _reader_read_new_chunk(b) or_return
  194. }
  195. c = b.buf[b.r]
  196. b.r += 1
  197. b.last_byte = int(c)
  198. return
  199. }
  200. // reader_unread_byte unreads the last byte. Only the most recently read byte can be unread
  201. reader_unread_byte :: proc(b: ^Reader) -> io.Error {
  202. if b.last_byte < 0 || b.r == 0 && b.w > 0 {
  203. return .Invalid_Unread
  204. }
  205. if b.r > 0 {
  206. b.r -= 1
  207. } else {
  208. // b.r == 0 && b.w == 0
  209. b.w = 1
  210. }
  211. b.buf[b.r] = byte(b.last_byte)
  212. b.last_byte = -1
  213. b.last_rune_size = -1
  214. return nil
  215. }
  216. // reader_read_rune reads a single UTF-8 encoded unicode character
  217. // and returns the rune and its size in bytes
  218. // If the encoded rune is invalid, it consumes one byte and returns utf8.RUNE_ERROR (U+FFFD) with a size of 1
  219. reader_read_rune :: proc(b: ^Reader) -> (r: rune, size: int, err: io.Error) {
  220. for b.r+utf8.UTF_MAX > b.w &&
  221. !utf8.full_rune(b.buf[b.r:b.w]) &&
  222. b.err == nil &&
  223. b.w-b.w < len(b.buf) {
  224. _reader_read_new_chunk(b) or_return
  225. }
  226. b.last_rune_size = -1
  227. if b.r == b.w {
  228. return 0, 0, _reader_consume_err(b)
  229. }
  230. r, size = rune(b.buf[b.r]), 1
  231. if r >= utf8.RUNE_SELF {
  232. r, size = utf8.decode_rune(b.buf[b.r : b.w])
  233. }
  234. b.r += size
  235. b.last_byte = int(b.buf[b.r-1])
  236. b.last_rune_size = size
  237. return
  238. }
  239. // reader_unread_rune unreads the last rune. Only the most recently read rune can be unread
  240. reader_unread_rune :: proc(b: ^Reader) -> io.Error {
  241. if b.last_rune_size < 0 || b.r < b.last_rune_size {
  242. return .Invalid_Unread
  243. }
  244. b.r -= b.last_rune_size
  245. b.last_byte = -1
  246. b.last_rune_size = -1
  247. return nil
  248. }
  249. reader_write_to :: proc(b: ^Reader, w: io.Writer) -> (n: i64, err: io.Error) {
  250. write_buf :: proc(b: ^Reader, w: io.Writer) -> (i64, io.Error) {
  251. n, err := io.write(w, b.buf[b.r:b.w])
  252. if n < 0 {
  253. return 0, .Negative_Write
  254. }
  255. b.r += n
  256. return i64(n), err
  257. }
  258. n = write_buf(b, w) or_return
  259. m: i64
  260. if b.w-b.r < len(b.buf) {
  261. _reader_read_new_chunk(b) or_return
  262. }
  263. for b.r < b.w {
  264. m, err = write_buf(b, w)
  265. n += m // this needs to be done before returning
  266. if err != nil {
  267. return
  268. }
  269. _reader_read_new_chunk(b) or_return
  270. }
  271. if b.err == .EOF {
  272. b.err = nil
  273. }
  274. err = _reader_consume_err(b)
  275. return
  276. }
  277. // reader_to_stream converts a Reader into an io.Stream
  278. reader_to_stream :: proc(b: ^Reader) -> (s: io.Stream) {
  279. s.data = b
  280. s.procedure = _reader_proc
  281. return
  282. }
  283. @(private)
  284. _reader_proc :: proc(stream_data: rawptr, mode: io.Stream_Mode, p: []byte, offset: i64, whence: io.Seek_From) -> (n: i64, err: io.Error) {
  285. b := (^Reader)(stream_data)
  286. #partial switch mode {
  287. case .Read:
  288. return io._i64_err(reader_read(b, p))
  289. case .Destroy:
  290. reader_destroy(b)
  291. return
  292. case .Query:
  293. return io.query_utility({.Read, .Destroy, .Query})
  294. }
  295. return 0, .Empty
  296. }
  297. //
  298. // Utility procedures
  299. //
  300. // reader_read_slice reads until the first occurrence of delim from the reader
  301. // It returns a slice pointing at the bytes in the buffer
  302. // The bytes stop being valid at the next read
  303. // If reader_read_slice encounters an error before finding a delimiter
  304. // reader_read_slice fails with error .Buffer_Full if the buffer fills without a delim
  305. // Because the data returned from reader_read_slice will be overwritten on the
  306. // next IO operation, reader_read_bytes or reader_read_string is usually preferred
  307. //
  308. // reader_read_slice returns err != nil if and only if line does not end in delim
  309. //
  310. reader_read_slice :: proc(b: ^Reader, delim: byte) -> (line: []byte, err: io.Error) {
  311. s := 0
  312. for {
  313. if i := bytes.index_byte(b.buf[b.r+s : b.w], delim); i >= 0 {
  314. i += s
  315. line = b.buf[b.r:][:i+1]
  316. b.r += i + 1
  317. break
  318. }
  319. if b.err != nil {
  320. line = b.buf[b.r : b.w]
  321. b.r = b.w
  322. err = _reader_consume_err(b)
  323. break
  324. }
  325. if reader_buffered(b) >= len(b.buf) {
  326. b.r = b.w
  327. line = b.buf
  328. err = .Buffer_Full
  329. break
  330. }
  331. s = b.w - b.r
  332. _reader_read_new_chunk(b) or_break
  333. }
  334. if i := len(line)-1; i >= 0 {
  335. b.last_byte = int(line[i])
  336. b.last_rune_size = -1
  337. }
  338. return
  339. }
  340. // reader_read_bytes reads until the first occurrence of delim from the Reader
  341. // It returns an allocated slice containing the data up to and including the delimiter
  342. reader_read_bytes :: proc(b: ^Reader, delim: byte, allocator := context.allocator) -> (buf: []byte, err: io.Error) {
  343. full: [dynamic]byte
  344. full.allocator = allocator
  345. frag: []byte
  346. for {
  347. e: io.Error
  348. frag, e = reader_read_slice(b, delim)
  349. if e == nil {
  350. break
  351. }
  352. if e != .Buffer_Full {
  353. err = e
  354. break
  355. }
  356. append(&full, ..frag)
  357. }
  358. append(&full, ..frag)
  359. return full[:], err
  360. }
  361. // reader_read_string reads until the first occurrence of delim from the Reader
  362. // It returns an allocated string containing the data up to and including the delimiter
  363. reader_read_string :: proc(b: ^Reader, delim: byte, allocator := context.allocator) -> (string, io.Error) {
  364. buf, err := reader_read_bytes(b, delim, allocator)
  365. return string(buf), err
  366. }