reader.odin 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480
  1. package bufio
  2. import "core:io"
  3. import "core:mem"
  4. import "core:unicode/utf8"
  5. import "core:bytes"
  6. // Reader is a buffered wrapper for an io.Reader
  7. Reader :: struct {
  8. buf: []byte,
  9. buf_allocator: mem.Allocator,
  10. rd: io.Reader, // reader
  11. r, w: int, // read and write positions for buf
  12. err: io.Error,
  13. last_byte: int, // last byte read, invalid is -1
  14. last_rune_size: int, // size of last rune read, invalid is -1
  15. max_consecutive_empty_reads: int,
  16. }
  17. DEFAULT_BUF_SIZE :: 4096;
  18. @(private)
  19. MIN_READ_BUFFER_SIZE :: 16;
  20. @(private)
  21. DEFAULT_MAX_CONSECUTIVE_EMPTY_READS :: 128;
  22. reader_init :: proc(b: ^Reader, rd: io.Reader, size: int = DEFAULT_BUF_SIZE, allocator := context.allocator) {
  23. size := size;
  24. size = max(size, MIN_READ_BUFFER_SIZE);
  25. reader_reset(b, rd);
  26. b.buf_allocator = allocator;
  27. b.buf = make([]byte, size, allocator);
  28. }
  29. reader_init_with_buf :: proc(b: ^Reader, rd: io.Reader, buf: []byte) {
  30. reader_reset(b, rd);
  31. b.buf_allocator = {};
  32. b.buf = buf;
  33. }
  34. // reader_destroy destroys the underlying buffer with its associated allocator IFF that allocator has been set
  35. reader_destroy :: proc(b: ^Reader) {
  36. delete(b.buf, b.buf_allocator);
  37. b^ = {};
  38. }
  39. reader_size :: proc(b: ^Reader) -> int {
  40. return len(b.buf);
  41. }
  42. reader_reset :: proc(b: ^Reader, r: io.Reader) {
  43. b.rd = r;
  44. b.r, b.w = 0, 0;
  45. b.err = nil;
  46. b.last_byte = -1;
  47. b.last_rune_size = -1;
  48. }
  49. @(private)
  50. _reader_read_new_chunk :: proc(b: ^Reader) -> io.Error {
  51. if b.r > 0 {
  52. copy(b.buf, b.buf[b.r:b.w]);
  53. b.w -= b.r;
  54. b.r = 0;
  55. }
  56. if b.w >= len(b.buf) {
  57. return .Buffer_Full;
  58. }
  59. if b.max_consecutive_empty_reads <= 0 {
  60. b.max_consecutive_empty_reads = DEFAULT_MAX_CONSECUTIVE_EMPTY_READS;
  61. }
  62. // read new data, and try a limited number of times
  63. for i := b.max_consecutive_empty_reads; i > 0; i -= 1 {
  64. n, err := io.read(b.rd, b.buf[b.w:]);
  65. if n < 0 {
  66. return .Negative_Read;
  67. }
  68. b.w += n;
  69. if err != nil {
  70. b.err = err;
  71. return nil;
  72. }
  73. if n > 0 {
  74. return nil;
  75. }
  76. }
  77. b.err = .No_Progress;
  78. return nil;
  79. }
  80. @(private)
  81. _reader_consume_err :: proc(b: ^Reader) -> io.Error {
  82. err := b.err;
  83. b.err = nil;
  84. return err;
  85. }
  86. // reader_peek returns the next n bytes without advancing the reader
  87. // The bytes stop being valid on the next read call
  88. // If reader_peek returns fewer than n bytes, it also return an error
  89. // explaining why the read is short
  90. // The error will be .Buffer_Full if n is larger than the internal buffer size
  91. reader_peek :: proc(b: ^Reader, n: int) -> (data: []byte, err: io.Error) {
  92. n := n;
  93. if n < 0 {
  94. return nil, .Negative_Count;
  95. }
  96. b.last_byte = -1;
  97. b.last_rune_size = -1;
  98. for b.w-b.r < n && b.w-b.r < len(b.buf) && b.err == nil {
  99. if fill_err := _reader_read_new_chunk(b); fill_err != nil {
  100. return nil, fill_err;
  101. }
  102. }
  103. if n > len(b.buf) {
  104. return b.buf[b.r : b.w], .Buffer_Full;
  105. }
  106. if available := b.w - b.r; available < n {
  107. n = available;
  108. err = _reader_consume_err(b);
  109. if err == nil {
  110. err = .Buffer_Full;
  111. }
  112. }
  113. return b.buf[b.r : b.r+n], err;
  114. }
  115. // reader_buffered returns the number of bytes that can be read from the current buffer
  116. reader_buffered :: proc(b: ^Reader) -> int {
  117. return b.w - b.r;
  118. }
  119. // reader_discard skips the next n bytes, and returns the number of bytes that were discarded
  120. reader_discard :: proc(b: ^Reader, n: int) -> (discarded: int, err: io.Error) {
  121. if n < 0 {
  122. return 0, .Negative_Count;
  123. }
  124. if n == 0 {
  125. return;
  126. }
  127. remaining := n;
  128. for {
  129. skip := reader_buffered(b);
  130. if skip == 0 {
  131. if fill_err := _reader_read_new_chunk(b); fill_err != nil {
  132. return 0, fill_err;
  133. }
  134. skip = reader_buffered(b);
  135. }
  136. skip = min(skip, remaining);
  137. b.r += skip;
  138. remaining -= skip;
  139. if remaining == 0 {
  140. return n, nil;
  141. }
  142. if b.err != nil {
  143. return n - remaining, _reader_consume_err(b);
  144. }
  145. }
  146. return;
  147. }
  148. // reader_read reads data into p
  149. // The bytes are taken from at most one read on the underlying Reader, which means n may be less than len(p)
  150. reader_read :: proc(b: ^Reader, p: []byte) -> (n: int, err: io.Error) {
  151. n = len(p);
  152. if n == 0 {
  153. if reader_buffered(b) > 0 {
  154. return 0, nil;
  155. }
  156. return 0, _reader_consume_err(b);
  157. }
  158. if b.r == b.w {
  159. if b.err != nil {
  160. return 0, _reader_consume_err(b);
  161. }
  162. if len(p) >= len(b.buf) {
  163. n, b.err = io.read(b.rd, p);
  164. if n < 0 {
  165. return 0, .Negative_Read;
  166. }
  167. if n > 0 {
  168. b.last_byte = int(p[n-1]);
  169. b.last_rune_size = -1;
  170. }
  171. return n, _reader_consume_err(b);
  172. }
  173. b.r, b.w = 0, 0;
  174. n, b.err = io.read(b.rd, b.buf);
  175. if n < 0 {
  176. return 0, .Negative_Read;
  177. }
  178. if n == 0 {
  179. return 0, _reader_consume_err(b);
  180. }
  181. b.w += n;
  182. }
  183. n = copy(p, b.buf[b.r:b.w]);
  184. b.r += n;
  185. b.last_byte = int(b.buf[b.r-1]);
  186. b.last_rune_size = -1;
  187. return n, nil;
  188. }
  189. // reader_read_byte reads and returns a single byte
  190. // If no byte is available, it return an error
  191. reader_read_byte :: proc(b: ^Reader) -> (byte, io.Error) {
  192. b.last_rune_size = -1;
  193. for b.r == b.w {
  194. if b.err != nil {
  195. return 0, _reader_consume_err(b);
  196. }
  197. if err := _reader_read_new_chunk(b); err != nil {
  198. return 0, err;
  199. }
  200. }
  201. c := b.buf[b.r];
  202. b.r += 1;
  203. b.last_byte = int(c);
  204. return c, nil;
  205. }
  206. // reader_unread_byte unreads the last byte. Only the most recently read byte can be unread
  207. reader_unread_byte :: proc(b: ^Reader) -> io.Error {
  208. if b.last_byte < 0 || b.r == 0 && b.w > 0 {
  209. return .Invalid_Unread;
  210. }
  211. if b.r > 0 {
  212. b.r -= 1;
  213. } else {
  214. // b.r == 0 && b.w == 0
  215. b.w = 1;
  216. }
  217. b.buf[b.r] = byte(b.last_byte);
  218. b.last_byte = -1;
  219. b.last_rune_size = -1;
  220. return nil;
  221. }
  222. // reader_read_rune reads a single UTF-8 encoded unicode character
  223. // and returns the rune and its size in bytes
  224. // If the encoded rune is invalid, it consumes one byte and returns utf8.RUNE_ERROR (U+FFFD) with a size of 1
  225. reader_read_rune :: proc(b: ^Reader) -> (r: rune, size: int, err: io.Error) {
  226. for b.r+utf8.UTF_MAX > b.w &&
  227. !utf8.full_rune(b.buf[b.r:b.w]) &&
  228. b.err == nil &&
  229. b.w-b.w < len(b.buf) {
  230. if err = _reader_read_new_chunk(b); err != nil {
  231. return;
  232. }
  233. }
  234. b.last_rune_size = -1;
  235. if b.r == b.w {
  236. err = _reader_consume_err(b);
  237. return;
  238. }
  239. r, size = rune(b.buf[b.r]), 1;
  240. if r >= utf8.RUNE_SELF {
  241. r, size = utf8.decode_rune(b.buf[b.r : b.w]);
  242. }
  243. b.r += size;
  244. b.last_byte = int(b.buf[b.r-1]);
  245. b.last_rune_size = size;
  246. return;
  247. }
  248. // reader_unread_rune unreads the last rune. Only the most recently read rune can be unread
  249. reader_unread_rune :: proc(b: ^Reader) -> io.Error {
  250. if b.last_rune_size < 0 || b.r < b.last_rune_size {
  251. return .Invalid_Unread;
  252. }
  253. b.r -= b.last_rune_size;
  254. b.last_byte = -1;
  255. b.last_rune_size = -1;
  256. return nil;
  257. }
  258. reader_write_to :: proc(b: ^Reader, w: io.Writer) -> (n: i64, err: io.Error) {
  259. write_buf :: proc(b: ^Reader, w: io.Writer) -> (i64, io.Error) {
  260. n, err := io.write(w, b.buf[b.r:b.w]);
  261. if n < 0 {
  262. return 0, .Negative_Write;
  263. }
  264. b.r += n;
  265. return i64(n), err;
  266. }
  267. n, err = write_buf(b, w);
  268. if err != nil {
  269. return;
  270. }
  271. m: i64;
  272. if nr, ok := io.to_writer_to(b.rd); ok {
  273. m, err = io.write_to(nr, w);
  274. n += m;
  275. return n, err;
  276. }
  277. if nw, ok := io.to_reader_from(w); ok {
  278. m, err = io.read_from(nw, b.rd);
  279. n += m;
  280. return n, err;
  281. }
  282. if b.w-b.r < len(b.buf) {
  283. if err = _reader_read_new_chunk(b); err != nil {
  284. return;
  285. }
  286. }
  287. for b.r < b.w {
  288. m, err = write_buf(b, w);
  289. n += m;
  290. if err != nil {
  291. return;
  292. }
  293. if err = _reader_read_new_chunk(b); err != nil {
  294. return;
  295. }
  296. }
  297. if b.err == .EOF {
  298. b.err = nil;
  299. }
  300. err = _reader_consume_err(b);
  301. return;
  302. }
  303. // reader_to_stream converts a Reader into an io.Stream
  304. reader_to_stream :: proc(b: ^Reader) -> (s: io.Stream) {
  305. s.stream_data = b;
  306. s.stream_vtable = _reader_vtable;
  307. return;
  308. }
  309. @(private)
  310. _reader_vtable := &io.Stream_VTable{
  311. impl_destroy = proc(s: io.Stream) -> io.Error {
  312. b := (^Reader)(s.stream_data);
  313. reader_destroy(b);
  314. return nil;
  315. },
  316. impl_read = proc(s: io.Stream, p: []byte) -> (n: int, err: io.Error) {
  317. b := (^Reader)(s.stream_data);
  318. return reader_read(b, p);
  319. },
  320. impl_read_byte = proc(s: io.Stream) -> (c: byte, err: io.Error) {
  321. b := (^Reader)(s.stream_data);
  322. return reader_read_byte(b);
  323. },
  324. impl_unread_byte = proc(s: io.Stream) -> io.Error {
  325. b := (^Reader)(s.stream_data);
  326. return reader_unread_byte(b);
  327. },
  328. impl_read_rune = proc(s: io.Stream) -> (r: rune, size: int, err: io.Error) {
  329. b := (^Reader)(s.stream_data);
  330. return reader_read_rune(b);
  331. },
  332. impl_unread_rune = proc(s: io.Stream) -> io.Error {
  333. b := (^Reader)(s.stream_data);
  334. return reader_unread_rune(b);
  335. },
  336. impl_write_to = proc(s: io.Stream, w: io.Writer) -> (n: i64, err: io.Error) {
  337. b := (^Reader)(s.stream_data);
  338. return reader_write_to(b, w);
  339. },
  340. };
  341. //
  342. // Utility procedures
  343. //
  344. // reader_read_slice reads until the first occurrence of delim from the reader
  345. // It returns a slice pointing at the bytes in the buffer
  346. // The bytes stop being valid at the next read
  347. // If reader_read_slice encounters an error before finding a delimiter
  348. // reader_read_slice fails with error .Buffer_Full if the buffer fills without a delim
  349. // Because the data returned from reader_read_slice will be overwritten on the
  350. // next IO operation, reader_read_bytes or reader_read_string is usually preferred
  351. //
  352. // reader_read_slice returns err != nil if and only if line does not end in delim
  353. //
  354. reader_read_slice :: proc(b: ^Reader, delim: byte) -> (line: []byte, err: io.Error) {
  355. s := 0;
  356. for {
  357. if i := bytes.index_byte(b.buf[b.r+s : b.w], delim); i >= 0 {
  358. i += s;
  359. line = b.buf[b.r:][:i+1];
  360. b.r += i + 1;
  361. break;
  362. }
  363. if b.err != nil {
  364. line = b.buf[b.r : b.w];
  365. b.r = b.w;
  366. err = _reader_consume_err(b);
  367. break;
  368. }
  369. if reader_buffered(b) >= len(b.buf) {
  370. b.r = b.w;
  371. line = b.buf;
  372. err = .Buffer_Full;
  373. break;
  374. }
  375. s = b.w - b.r;
  376. if err = _reader_read_new_chunk(b); err != nil {
  377. break;
  378. }
  379. }
  380. if i := len(line)-1; i >= 0 {
  381. b.last_byte = int(line[i]);
  382. b.last_rune_size = -1;
  383. }
  384. return;
  385. }
  386. // reader_read_bytes reads until the first occurrence of delim from the Reader
  387. // It returns an allocated slice containing the data up to and including the delimiter
  388. reader_read_bytes :: proc(b: ^Reader, delim: byte, allocator := context.allocator) -> (buf: []byte, err: io.Error) {
  389. full: [dynamic]byte;
  390. full.allocator = allocator;
  391. frag: []byte;
  392. for {
  393. e: io.Error;
  394. frag, e = reader_read_slice(b, delim);
  395. if e == nil {
  396. break;
  397. }
  398. if e != .Buffer_Full {
  399. err = e;
  400. break;
  401. }
  402. append(&full, ..frag);
  403. }
  404. append(&full, ..frag);
  405. return full[:], err;
  406. }
  407. // reader_read_string reads until the first occurrence of delim from the Reader
  408. // It returns an allocated string containing the data up to and including the delimiter
  409. reader_read_string :: proc(b: ^Reader, delim: byte, allocator := context.allocator) -> (string, io.Error) {
  410. buf, err := reader_read_bytes(b, delim, allocator);
  411. return string(buf), err;
  412. }