reader.odin 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480
  1. package bufio
  2. import "core:io"
  3. import "core:mem"
  4. import "core:unicode/utf8"
  5. import "core:bytes"
  6. // Reader is a buffered wrapper for an io.Reader
  7. Reader :: struct {
  8. buf: []byte,
  9. buf_allocator: mem.Allocator,
  10. rd: io.Reader, // reader
  11. r, w: int, // read and write positions for buf
  12. err: io.Error,
  13. last_byte: int, // last byte read, invalid is -1
  14. last_rune_size: int, // size of last rune read, invalid is -1
  15. max_consecutive_empty_reads: int,
  16. }
  17. DEFAULT_BUF_SIZE :: 4096
  18. @(private)
  19. MIN_READ_BUFFER_SIZE :: 16
  20. @(private)
  21. DEFAULT_MAX_CONSECUTIVE_EMPTY_READS :: 128
  22. reader_init :: proc(b: ^Reader, rd: io.Reader, size: int = DEFAULT_BUF_SIZE, allocator := context.allocator) {
  23. size := size
  24. size = max(size, MIN_READ_BUFFER_SIZE)
  25. reader_reset(b, rd)
  26. b.buf_allocator = allocator
  27. b.buf = make([]byte, size, allocator)
  28. }
  29. reader_init_with_buf :: proc(b: ^Reader, rd: io.Reader, buf: []byte) {
  30. reader_reset(b, rd)
  31. b.buf_allocator = {}
  32. b.buf = buf
  33. }
  34. // reader_destroy destroys the underlying buffer with its associated allocator IFF that allocator has been set
  35. reader_destroy :: proc(b: ^Reader) {
  36. delete(b.buf, b.buf_allocator)
  37. b^ = {}
  38. }
  39. reader_size :: proc(b: ^Reader) -> int {
  40. return len(b.buf)
  41. }
  42. reader_reset :: proc(b: ^Reader, r: io.Reader) {
  43. b.rd = r
  44. b.r, b.w = 0, 0
  45. b.err = nil
  46. b.last_byte = -1
  47. b.last_rune_size = -1
  48. }
  49. @(private)
  50. _reader_read_new_chunk :: proc(b: ^Reader) -> io.Error {
  51. if b.r > 0 {
  52. copy(b.buf, b.buf[b.r:b.w])
  53. b.w -= b.r
  54. b.r = 0
  55. }
  56. if b.w >= len(b.buf) {
  57. return .Buffer_Full
  58. }
  59. if b.max_consecutive_empty_reads <= 0 {
  60. b.max_consecutive_empty_reads = DEFAULT_MAX_CONSECUTIVE_EMPTY_READS
  61. }
  62. // read new data, and try a limited number of times
  63. for i := b.max_consecutive_empty_reads; i > 0; i -= 1 {
  64. n, err := io.read(b.rd, b.buf[b.w:])
  65. if n < 0 {
  66. return .Negative_Read
  67. }
  68. b.w += n
  69. if err != nil {
  70. b.err = err
  71. return nil
  72. }
  73. if n > 0 {
  74. return nil
  75. }
  76. }
  77. b.err = .No_Progress
  78. return nil
  79. }
  80. @(private)
  81. _reader_consume_err :: proc(b: ^Reader) -> io.Error {
  82. err := b.err
  83. b.err = nil
  84. return err
  85. }
  86. // reader_peek returns the next n bytes without advancing the reader
  87. // The bytes stop being valid on the next read call
  88. // If reader_peek returns fewer than n bytes, it also return an error
  89. // explaining why the read is short
  90. // The error will be .Buffer_Full if n is larger than the internal buffer size
  91. reader_peek :: proc(b: ^Reader, n: int) -> (data: []byte, err: io.Error) {
  92. n := n
  93. if n < 0 {
  94. return nil, .Negative_Count
  95. }
  96. b.last_byte = -1
  97. b.last_rune_size = -1
  98. for b.w-b.r < n && b.w-b.r < len(b.buf) && b.err == nil {
  99. if fill_err := _reader_read_new_chunk(b); fill_err != nil {
  100. return nil, fill_err
  101. }
  102. }
  103. if n > len(b.buf) {
  104. return b.buf[b.r : b.w], .Buffer_Full
  105. }
  106. if available := b.w - b.r; available < n {
  107. n = available
  108. err = _reader_consume_err(b)
  109. if err == nil {
  110. err = .Buffer_Full
  111. }
  112. }
  113. return b.buf[b.r : b.r+n], err
  114. }
  115. // reader_buffered returns the number of bytes that can be read from the current buffer
  116. reader_buffered :: proc(b: ^Reader) -> int {
  117. return b.w - b.r
  118. }
  119. // reader_discard skips the next n bytes, and returns the number of bytes that were discarded
  120. reader_discard :: proc(b: ^Reader, n: int) -> (discarded: int, err: io.Error) {
  121. if n < 0 {
  122. return 0, .Negative_Count
  123. }
  124. if n == 0 {
  125. return
  126. }
  127. remaining := n
  128. for {
  129. skip := reader_buffered(b)
  130. if skip == 0 {
  131. if fill_err := _reader_read_new_chunk(b); fill_err != nil {
  132. return 0, fill_err
  133. }
  134. skip = reader_buffered(b)
  135. }
  136. skip = min(skip, remaining)
  137. b.r += skip
  138. remaining -= skip
  139. if remaining == 0 {
  140. return n, nil
  141. }
  142. if b.err != nil {
  143. return n - remaining, _reader_consume_err(b)
  144. }
  145. }
  146. return
  147. }
  148. // reader_read reads data into p
  149. // The bytes are taken from at most one read on the underlying Reader, which means n may be less than len(p)
  150. reader_read :: proc(b: ^Reader, p: []byte) -> (n: int, err: io.Error) {
  151. n = len(p)
  152. if n == 0 {
  153. if reader_buffered(b) > 0 {
  154. return 0, nil
  155. }
  156. return 0, _reader_consume_err(b)
  157. }
  158. if b.r == b.w {
  159. if b.err != nil {
  160. return 0, _reader_consume_err(b)
  161. }
  162. if len(p) >= len(b.buf) {
  163. n, b.err = io.read(b.rd, p)
  164. if n < 0 {
  165. return 0, .Negative_Read
  166. }
  167. if n > 0 {
  168. b.last_byte = int(p[n-1])
  169. b.last_rune_size = -1
  170. }
  171. return n, _reader_consume_err(b)
  172. }
  173. b.r, b.w = 0, 0
  174. n, b.err = io.read(b.rd, b.buf)
  175. if n < 0 {
  176. return 0, .Negative_Read
  177. }
  178. if n == 0 {
  179. return 0, _reader_consume_err(b)
  180. }
  181. b.w += n
  182. }
  183. n = copy(p, b.buf[b.r:b.w])
  184. b.r += n
  185. b.last_byte = int(b.buf[b.r-1])
  186. b.last_rune_size = -1
  187. return n, nil
  188. }
  189. // reader_read_byte reads and returns a single byte
  190. // If no byte is available, it return an error
  191. reader_read_byte :: proc(b: ^Reader) -> (byte, io.Error) {
  192. b.last_rune_size = -1
  193. for b.r == b.w {
  194. if b.err != nil {
  195. return 0, _reader_consume_err(b)
  196. }
  197. if err := _reader_read_new_chunk(b); err != nil {
  198. return 0, err
  199. }
  200. }
  201. c := b.buf[b.r]
  202. b.r += 1
  203. b.last_byte = int(c)
  204. return c, nil
  205. }
  206. // reader_unread_byte unreads the last byte. Only the most recently read byte can be unread
  207. reader_unread_byte :: proc(b: ^Reader) -> io.Error {
  208. if b.last_byte < 0 || b.r == 0 && b.w > 0 {
  209. return .Invalid_Unread
  210. }
  211. if b.r > 0 {
  212. b.r -= 1
  213. } else {
  214. // b.r == 0 && b.w == 0
  215. b.w = 1
  216. }
  217. b.buf[b.r] = byte(b.last_byte)
  218. b.last_byte = -1
  219. b.last_rune_size = -1
  220. return nil
  221. }
  222. // reader_read_rune reads a single UTF-8 encoded unicode character
  223. // and returns the rune and its size in bytes
  224. // If the encoded rune is invalid, it consumes one byte and returns utf8.RUNE_ERROR (U+FFFD) with a size of 1
  225. reader_read_rune :: proc(b: ^Reader) -> (r: rune, size: int, err: io.Error) {
  226. for b.r+utf8.UTF_MAX > b.w &&
  227. !utf8.full_rune(b.buf[b.r:b.w]) &&
  228. b.err == nil &&
  229. b.w-b.w < len(b.buf) {
  230. if err = _reader_read_new_chunk(b); err != nil {
  231. return
  232. }
  233. }
  234. b.last_rune_size = -1
  235. if b.r == b.w {
  236. err = _reader_consume_err(b)
  237. return
  238. }
  239. r, size = rune(b.buf[b.r]), 1
  240. if r >= utf8.RUNE_SELF {
  241. r, size = utf8.decode_rune(b.buf[b.r : b.w])
  242. }
  243. b.r += size
  244. b.last_byte = int(b.buf[b.r-1])
  245. b.last_rune_size = size
  246. return
  247. }
  248. // reader_unread_rune unreads the last rune. Only the most recently read rune can be unread
  249. reader_unread_rune :: proc(b: ^Reader) -> io.Error {
  250. if b.last_rune_size < 0 || b.r < b.last_rune_size {
  251. return .Invalid_Unread
  252. }
  253. b.r -= b.last_rune_size
  254. b.last_byte = -1
  255. b.last_rune_size = -1
  256. return nil
  257. }
  258. reader_write_to :: proc(b: ^Reader, w: io.Writer) -> (n: i64, err: io.Error) {
  259. write_buf :: proc(b: ^Reader, w: io.Writer) -> (i64, io.Error) {
  260. n, err := io.write(w, b.buf[b.r:b.w])
  261. if n < 0 {
  262. return 0, .Negative_Write
  263. }
  264. b.r += n
  265. return i64(n), err
  266. }
  267. n, err = write_buf(b, w)
  268. if err != nil {
  269. return
  270. }
  271. m: i64
  272. if nr, ok := io.to_writer_to(b.rd); ok {
  273. m, err = io.write_to(nr, w)
  274. n += m
  275. return n, err
  276. }
  277. if nw, ok := io.to_reader_from(w); ok {
  278. m, err = io.read_from(nw, b.rd)
  279. n += m
  280. return n, err
  281. }
  282. if b.w-b.r < len(b.buf) {
  283. if err = _reader_read_new_chunk(b); err != nil {
  284. return
  285. }
  286. }
  287. for b.r < b.w {
  288. m, err = write_buf(b, w)
  289. n += m
  290. if err != nil {
  291. return
  292. }
  293. if err = _reader_read_new_chunk(b); err != nil {
  294. return
  295. }
  296. }
  297. if b.err == .EOF {
  298. b.err = nil
  299. }
  300. err = _reader_consume_err(b)
  301. return
  302. }
  303. // reader_to_stream converts a Reader into an io.Stream
  304. reader_to_stream :: proc(b: ^Reader) -> (s: io.Stream) {
  305. s.stream_data = b
  306. s.stream_vtable = _reader_vtable
  307. return
  308. }
  309. @(private)
  310. _reader_vtable := &io.Stream_VTable{
  311. impl_destroy = proc(s: io.Stream) -> io.Error {
  312. b := (^Reader)(s.stream_data)
  313. reader_destroy(b)
  314. return nil
  315. },
  316. impl_read = proc(s: io.Stream, p: []byte) -> (n: int, err: io.Error) {
  317. b := (^Reader)(s.stream_data)
  318. return reader_read(b, p)
  319. },
  320. impl_read_byte = proc(s: io.Stream) -> (c: byte, err: io.Error) {
  321. b := (^Reader)(s.stream_data)
  322. return reader_read_byte(b)
  323. },
  324. impl_unread_byte = proc(s: io.Stream) -> io.Error {
  325. b := (^Reader)(s.stream_data)
  326. return reader_unread_byte(b)
  327. },
  328. impl_read_rune = proc(s: io.Stream) -> (r: rune, size: int, err: io.Error) {
  329. b := (^Reader)(s.stream_data)
  330. return reader_read_rune(b)
  331. },
  332. impl_unread_rune = proc(s: io.Stream) -> io.Error {
  333. b := (^Reader)(s.stream_data)
  334. return reader_unread_rune(b)
  335. },
  336. impl_write_to = proc(s: io.Stream, w: io.Writer) -> (n: i64, err: io.Error) {
  337. b := (^Reader)(s.stream_data)
  338. return reader_write_to(b, w)
  339. },
  340. }
  341. //
  342. // Utility procedures
  343. //
  344. // reader_read_slice reads until the first occurrence of delim from the reader
  345. // It returns a slice pointing at the bytes in the buffer
  346. // The bytes stop being valid at the next read
  347. // If reader_read_slice encounters an error before finding a delimiter
  348. // reader_read_slice fails with error .Buffer_Full if the buffer fills without a delim
  349. // Because the data returned from reader_read_slice will be overwritten on the
  350. // next IO operation, reader_read_bytes or reader_read_string is usually preferred
  351. //
  352. // reader_read_slice returns err != nil if and only if line does not end in delim
  353. //
  354. reader_read_slice :: proc(b: ^Reader, delim: byte) -> (line: []byte, err: io.Error) {
  355. s := 0
  356. for {
  357. if i := bytes.index_byte(b.buf[b.r+s : b.w], delim); i >= 0 {
  358. i += s
  359. line = b.buf[b.r:][:i+1]
  360. b.r += i + 1
  361. break
  362. }
  363. if b.err != nil {
  364. line = b.buf[b.r : b.w]
  365. b.r = b.w
  366. err = _reader_consume_err(b)
  367. break
  368. }
  369. if reader_buffered(b) >= len(b.buf) {
  370. b.r = b.w
  371. line = b.buf
  372. err = .Buffer_Full
  373. break
  374. }
  375. s = b.w - b.r
  376. if err = _reader_read_new_chunk(b); err != nil {
  377. break
  378. }
  379. }
  380. if i := len(line)-1; i >= 0 {
  381. b.last_byte = int(line[i])
  382. b.last_rune_size = -1
  383. }
  384. return
  385. }
  386. // reader_read_bytes reads until the first occurrence of delim from the Reader
  387. // It returns an allocated slice containing the data up to and including the delimiter
  388. reader_read_bytes :: proc(b: ^Reader, delim: byte, allocator := context.allocator) -> (buf: []byte, err: io.Error) {
  389. full: [dynamic]byte
  390. full.allocator = allocator
  391. frag: []byte
  392. for {
  393. e: io.Error
  394. frag, e = reader_read_slice(b, delim)
  395. if e == nil {
  396. break
  397. }
  398. if e != .Buffer_Full {
  399. err = e
  400. break
  401. }
  402. append(&full, ..frag)
  403. }
  404. append(&full, ..frag)
  405. return full[:], err
  406. }
  407. // reader_read_string reads until the first occurrence of delim from the Reader
  408. // It returns an allocated string containing the data up to and including the delimiter
  409. reader_read_string :: proc(b: ^Reader, delim: byte, allocator := context.allocator) -> (string, io.Error) {
  410. buf, err := reader_read_bytes(b, delim, allocator)
  411. return string(buf), err
  412. }