line_reader.c 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. /*-
  2. * Copyright (c) 2008 Tim Kientzle
  3. * Copyright (c) 2010 Joerg Sonnenberger
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice, this list of conditions and the following disclaimer
  11. * in this position and unchanged.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  17. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  18. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  19. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  20. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  21. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  25. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include "lafe_platform.h"
  28. __FBSDID("$FreeBSD$");
  29. #include <errno.h>
  30. #include <stdio.h>
  31. #include <stdlib.h>
  32. #include <string.h>
  33. #include "err.h"
  34. #include "line_reader.h"
  35. #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__BORLANDC__)
  36. #define strdup _strdup
  37. #endif
  38. /*
  39. * Read lines from file and do something with each one. If option_null
  40. * is set, lines are terminated with zero bytes; otherwise, they're
  41. * terminated with newlines.
  42. *
  43. * This uses a self-sizing buffer to handle arbitrarily-long lines.
  44. */
  45. struct lafe_line_reader {
  46. FILE *f;
  47. char *buff, *buff_end, *line_start, *line_end;
  48. char *pathname;
  49. size_t buff_length;
  50. int nullSeparator; /* Lines separated by null, not CR/CRLF/etc. */
  51. };
  52. struct lafe_line_reader *
  53. lafe_line_reader(const char *pathname, int nullSeparator)
  54. {
  55. struct lafe_line_reader *lr;
  56. lr = calloc(1, sizeof(*lr));
  57. if (lr == NULL)
  58. lafe_errc(1, ENOMEM, "Can't open %s", pathname);
  59. lr->nullSeparator = nullSeparator;
  60. lr->pathname = strdup(pathname);
  61. if (strcmp(pathname, "-") == 0)
  62. lr->f = stdin;
  63. else
  64. lr->f = fopen(pathname, "r");
  65. if (lr->f == NULL)
  66. lafe_errc(1, errno, "Couldn't open %s", pathname);
  67. lr->buff_length = 8192;
  68. lr->line_start = lr->line_end = lr->buff_end = lr->buff = NULL;
  69. return (lr);
  70. }
  71. static void
  72. lafe_line_reader_find_eol(struct lafe_line_reader *lr)
  73. {
  74. lr->line_end += strcspn(lr->line_end,
  75. lr->nullSeparator ? "" : "\x0d\x0a");
  76. *lr->line_end = '\0'; /* Noop if line_end == buff_end */
  77. }
  78. const char *
  79. lafe_line_reader_next(struct lafe_line_reader *lr)
  80. {
  81. size_t bytes_wanted, bytes_read, new_buff_size;
  82. char *line_start, *p;
  83. for (;;) {
  84. /* If there's a line in the buffer, return it immediately. */
  85. while (lr->line_end < lr->buff_end) {
  86. line_start = lr->line_start;
  87. lr->line_start = ++lr->line_end;
  88. lafe_line_reader_find_eol(lr);
  89. if (lr->nullSeparator || line_start[0] != '\0')
  90. return (line_start);
  91. }
  92. /* If we're at end-of-file, process the final data. */
  93. if (lr->f == NULL) {
  94. if (lr->line_start == lr->buff_end)
  95. return (NULL); /* No more text */
  96. line_start = lr->line_start;
  97. lr->line_start = lr->buff_end;
  98. return (line_start);
  99. }
  100. /* Buffer only has part of a line. */
  101. if (lr->line_start > lr->buff) {
  102. /* Move a leftover fractional line to the beginning. */
  103. memmove(lr->buff, lr->line_start,
  104. lr->buff_end - lr->line_start);
  105. lr->buff_end -= lr->line_start - lr->buff;
  106. lr->line_end -= lr->line_start - lr->buff;
  107. lr->line_start = lr->buff;
  108. } else {
  109. /* Line is too big; enlarge the buffer. */
  110. new_buff_size = lr->buff_length * 2;
  111. if (new_buff_size <= lr->buff_length)
  112. lafe_errc(1, ENOMEM,
  113. "Line too long in %s", lr->pathname);
  114. lr->buff_length = new_buff_size;
  115. /*
  116. * Allocate one extra byte to allow terminating
  117. * the buffer.
  118. */
  119. p = realloc(lr->buff, new_buff_size + 1);
  120. if (p == NULL)
  121. lafe_errc(1, ENOMEM,
  122. "Line too long in %s", lr->pathname);
  123. lr->buff_end = p + (lr->buff_end - lr->buff);
  124. lr->line_end = p + (lr->line_end - lr->buff);
  125. lr->line_start = lr->buff = p;
  126. }
  127. /* Get some more data into the buffer. */
  128. bytes_wanted = lr->buff + lr->buff_length - lr->buff_end;
  129. bytes_read = fread(lr->buff_end, 1, bytes_wanted, lr->f);
  130. lr->buff_end += bytes_read;
  131. *lr->buff_end = '\0'; /* Always terminate buffer */
  132. lafe_line_reader_find_eol(lr);
  133. if (ferror(lr->f))
  134. lafe_errc(1, errno, "Can't read %s", lr->pathname);
  135. if (feof(lr->f)) {
  136. if (lr->f != stdin)
  137. fclose(lr->f);
  138. lr->f = NULL;
  139. }
  140. }
  141. }
  142. void
  143. lafe_line_reader_free(struct lafe_line_reader *lr)
  144. {
  145. free(lr->buff);
  146. free(lr->pathname);
  147. free(lr);
  148. }