man2html.c 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. /*
  2. * man2html.c -- convert nroff output to html
  3. *
  4. * Convert backspace/overstrikes to bold.
  5. * Convert backspace/underbar (either order) to italic.
  6. * Convert bar/backspace/dash to `+' (also handle bold rendering)
  7. * Convert bar/backspace/equals to `*' (also handle bold rendering)
  8. * Convert plus/backspace/o to `o'
  9. * Convert `<' to `&lt'; `>' to `&gt'; and `&' to `&amp'
  10. * If -u specified, compress duplicate blank lines.
  11. *
  12. * $Id$
  13. */
  14. #include <stdio.h>
  15. #define BUFSIZE 4096
  16. main(argc, argv)
  17. int argc;
  18. char *argv[];
  19. {
  20. char buf[BUFSIZE];
  21. static int blank = 1, uopt = 0, lineno = 0, bksp = 0;
  22. /*
  23. * Options.
  24. */
  25. if (argc > 2)
  26. usage();
  27. if (argc == 2) {
  28. if (strcmp(argv[1], "-u") == 0)
  29. uopt = 1;
  30. else
  31. usage();
  32. }
  33. /*
  34. * Enclose in html envelope
  35. */
  36. puts("<BODY><PRE>");
  37. /*
  38. * Process each line. Count unresolved backspaces and issue
  39. * a warning message at the end.
  40. */
  41. while (fgets(buf, BUFSIZE, stdin)) {
  42. lineno++;
  43. /*
  44. * if -u specified, compress duplicate blank lines
  45. */
  46. if (uopt) {
  47. if (blankline(buf)) {
  48. if (blank)
  49. continue;
  50. else
  51. blank = 1;
  52. } else
  53. blank = 0;
  54. }
  55. bksp += html(buf, lineno);
  56. }
  57. /*
  58. * Close html envelope.
  59. */
  60. puts("</PRE></BODY>");
  61. /*
  62. * Warn about any unresolved backspaces.
  63. */
  64. if (bksp > 0)
  65. fprintf(stderr,
  66. "man2html: warning: %d unresolved backspaces\n", bksp);
  67. exit(0);
  68. }
  69. usage()
  70. {
  71. fprintf(stderr, "man2html: usage: man2html [-u]\n");
  72. exit(1);
  73. }
  74. /*
  75. * Given a line, print it out as html.
  76. * Return the number of unprocessed backspaces in this line.
  77. */
  78. int
  79. html(s, lineno)
  80. char *s;
  81. int lineno;
  82. {
  83. char *p;
  84. int bold, italic, bksp;
  85. unsigned char buf2[BUFSIZE];
  86. /*
  87. * two bits in each element of buf2 indicate attributes of
  88. * corresponding character in `s'.
  89. * bit 1, when set, indicates bold
  90. * bit 2, when set, indicates underscore
  91. */
  92. /*
  93. * pass 1: set character attributes (and delete overstrikes)
  94. */
  95. bzero(buf2, BUFSIZE);
  96. for (p = s; *p; p++) {
  97. if (p == s || *p != '\b')
  98. continue;
  99. /* detect a backspace/overstrike (bold <B>) */
  100. if (*(p - 1) == *(p + 1)) {
  101. /* get rid of backspace and overstrike */
  102. strcpy(p - 1, p + 1);
  103. /* flag character as bold */
  104. buf2[p - 1 - s] |= 1;
  105. p--;
  106. /* detect an underbar/backspace (italic <I>) */
  107. } else if (*(p - 1) == '_') {
  108. /* get rid of backspace and underbar */
  109. strcpy(p - 1, p + 1);
  110. /* flag character as underscored */
  111. buf2[p - 1 - s] |= 2;
  112. p--;
  113. /* detect a backspace/underbar (reverse of above) */
  114. } else if (*(p + 1) == '_') {
  115. /* get rid of backspace and underbar */
  116. strcpy(p, p + 2);
  117. /* flag character as underscored */
  118. buf2[p - 1 - s] |= 2;
  119. p--;
  120. /* convert bar/backspace/dash to `+' */
  121. } else if (strncmp(p - 1, "|\b-", 3) == 0
  122. || strncmp(p - 1, "+\b-", 3) == 0
  123. || strncmp(p - 1, "+\b|", 3) == 0) {
  124. /* get rid of backspace and bar, change dash to `+' */
  125. strcpy(p - 1, p + 1);
  126. *(p - 1) = '+';
  127. p--;
  128. /* convert bar/backspace/equals to `*' */
  129. } else if (strncmp(p - 1, "|\b=", 3) == 0
  130. || strncmp(p - 1, "*\b=", 3) == 0
  131. || strncmp(p - 1, "*\b|", 3) == 0) {
  132. strcpy(p - 1, p + 1);
  133. *(p - 1) = '*';
  134. p--;
  135. /* convert plus/backspace/o to bold `o' */
  136. } else if (strncmp(p - 1, "+\bo", 3) == 0
  137. || strncmp(p - 1, "o\b+", 3) == 0) {
  138. strcpy(p - 1, p + 1);
  139. *(p - 1) = 'o';
  140. /* flag character as bold */
  141. buf2[p - 1 - s] |= 1;
  142. p--;
  143. }
  144. }
  145. /*
  146. * pass 2: print out line as html
  147. */
  148. bold = italic = bksp = 0;
  149. for (p = s; *p; p++) {
  150. /* bold */
  151. if (buf2[p - s] & 1) {
  152. if (!bold) {
  153. /*
  154. * an overstrike/underbar is ambiguous.
  155. * change to italic if we are in an italic
  156. * context right now
  157. */
  158. if (italic && *p == '_')
  159. buf2[p - s] |= 2;
  160. else {
  161. fputs("<B>", stdout);
  162. bold = 1;
  163. }
  164. }
  165. } else {
  166. if (bold && *p != ' ') {
  167. fputs("</B>", stdout);
  168. bold = 0;
  169. }
  170. }
  171. /* italic */
  172. if (buf2[p - s] & 2) {
  173. if (!italic) {
  174. fputs("<I>", stdout);
  175. italic = 1;
  176. }
  177. } else {
  178. if (italic && *p != ' ') {
  179. fputs("</I>", stdout);
  180. italic = 0;
  181. }
  182. }
  183. /* print the char, escaping the three html special chars */
  184. switch (*p) {
  185. case '<':
  186. fputs("&lt", stdout);
  187. break;
  188. case '>':
  189. fputs("&gt", stdout);
  190. break;
  191. case '&':
  192. fputs("&amp", stdout);
  193. break;
  194. case '\b':
  195. #ifdef notdef
  196. fprintf(stderr,
  197. "man2html: warning, \\b on line %d\n", lineno);
  198. #endif
  199. bksp++;
  200. default:
  201. putchar(*p);
  202. }
  203. }
  204. if (bold)
  205. fputs("</B>", stdout);
  206. if (italic)
  207. fputs("</I>", stdout);
  208. return(bksp);
  209. }
  210. int
  211. blankline(s)
  212. char *s;
  213. {
  214. while (*s) {
  215. if (!isspace(*s))
  216. return(0);
  217. s++;
  218. }
  219. return(1);
  220. }