md2html.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. /*
  2. * MD4C: Markdown parser for C
  3. * (http://github.com/mity/md4c)
  4. *
  5. * Copyright (c) 2016-2020 Martin Mitas
  6. *
  7. * Permission is hereby granted, free of charge, to any person obtaining a
  8. * copy of this software and associated documentation files (the "Software"),
  9. * to deal in the Software without restriction, including without limitation
  10. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11. * and/or sell copies of the Software, and to permit persons to whom the
  12. * Software is furnished to do so, subject to the following conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be included in
  15. * all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  22. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  23. * IN THE SOFTWARE.
  24. */
  25. #include <stdio.h>
  26. #include <stdlib.h>
  27. #include <string.h>
  28. #include <time.h>
  29. #include "md4c-html.h"
  30. #include "cmdline.h"
  31. /* Global options. */
  32. static unsigned parser_flags = 0;
  33. #ifndef MD4C_USE_ASCII
  34. static unsigned renderer_flags = MD_HTML_FLAG_DEBUG | MD_HTML_FLAG_SKIP_UTF8_BOM;
  35. #else
  36. static unsigned renderer_flags = MD_HTML_FLAG_DEBUG;
  37. #endif
  38. static int want_fullhtml = 0;
  39. static int want_xhtml = 0;
  40. static int want_toc = 0;
  41. static int want_stat = 0;
  42. MD_TOC_OPTIONS toc_options = { 0, NULL};
  43. /*********************************
  44. *** Simple grow-able buffer ***
  45. *********************************/
  46. /* We render to a memory buffer instead of directly outputting the rendered
  47. * documents, as this allows using this utility for evaluating performance
  48. * of MD4C (--stat option). This allows us to measure just time of the parser,
  49. * without the I/O.
  50. */
  51. struct membuffer {
  52. char* data;
  53. size_t asize;
  54. size_t size;
  55. };
  56. static void
  57. membuf_init(struct membuffer* buf, MD_SIZE new_asize)
  58. {
  59. buf->size = 0;
  60. buf->asize = new_asize;
  61. buf->data = malloc(buf->asize);
  62. if(buf->data == NULL) {
  63. fprintf(stderr, "membuf_init: malloc() failed.\n");
  64. exit(1);
  65. }
  66. }
  67. static void
  68. membuf_fini(struct membuffer* buf)
  69. {
  70. if(buf->data)
  71. free(buf->data);
  72. }
  73. static void
  74. membuf_grow(struct membuffer* buf, size_t new_asize)
  75. {
  76. buf->data = realloc(buf->data, new_asize);
  77. if(buf->data == NULL) {
  78. fprintf(stderr, "membuf_grow: realloc() failed.\n");
  79. exit(1);
  80. }
  81. buf->asize = new_asize;
  82. }
  83. static void
  84. membuf_append(struct membuffer* buf, const char* data, MD_SIZE size)
  85. {
  86. if(buf->asize < buf->size + size)
  87. membuf_grow(buf, buf->size + buf->size / 2 + size);
  88. memcpy(buf->data + buf->size, data, size);
  89. buf->size += size;
  90. }
  91. /**********************
  92. *** Main program ***
  93. **********************/
  94. static void
  95. process_output(const MD_CHAR* text, MD_SIZE size, void* userdata)
  96. {
  97. membuf_append((struct membuffer*) userdata, text, size);
  98. }
  99. static int
  100. process_file(FILE* in, FILE* out)
  101. {
  102. size_t n;
  103. struct membuffer buf_in = {0};
  104. struct membuffer buf_out = {0};
  105. int ret = -1;
  106. clock_t t0, t1;
  107. membuf_init(&buf_in, 32 * 1024);
  108. /* Read the input file into a buffer. */
  109. while(1) {
  110. if(buf_in.size >= buf_in.asize)
  111. membuf_grow(&buf_in, buf_in.asize + buf_in.asize / 2);
  112. n = fread(buf_in.data + buf_in.size, 1, buf_in.asize - buf_in.size, in);
  113. if(n == 0)
  114. break;
  115. buf_in.size += n;
  116. }
  117. /* Input size is good estimation of output size. Add some more reserve to
  118. * deal with the HTML header/footer and tags. */
  119. membuf_init(&buf_out, (MD_SIZE)(buf_in.size + buf_in.size/8 + 64));
  120. /* Parse the document. This shall call our callbacks provided via the
  121. * md_renderer_t structure. */
  122. t0 = clock();
  123. ret = md_html(buf_in.data, (MD_SIZE)buf_in.size, process_output, (void*) &buf_out,
  124. parser_flags, renderer_flags, &toc_options);
  125. t1 = clock();
  126. if(ret != 0) {
  127. fprintf(stderr, "Parsing failed.\n");
  128. goto out;
  129. }
  130. /* Write down the document in the HTML format. */
  131. if(want_fullhtml) {
  132. if(want_xhtml) {
  133. fprintf(out, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
  134. fprintf(out, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" "
  135. "\"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n");
  136. fprintf(out, "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n");
  137. } else {
  138. fprintf(out, "<!DOCTYPE html>\n");
  139. fprintf(out, "<html>\n");
  140. }
  141. fprintf(out, "<head>\n");
  142. fprintf(out, "<title></title>\n");
  143. fprintf(out, "<meta name=\"generator\" content=\"md2html\"%s>\n", want_xhtml ? " /" : "");
  144. fprintf(out, "</head>\n");
  145. fprintf(out, "<body>\n");
  146. }
  147. fwrite(buf_out.data, 1, buf_out.size, out);
  148. if(want_fullhtml) {
  149. fprintf(out, "</body>\n");
  150. fprintf(out, "</html>\n");
  151. }
  152. if(want_stat) {
  153. if(t0 != (clock_t)-1 && t1 != (clock_t)-1) {
  154. double elapsed = (double)(t1 - t0) / CLOCKS_PER_SEC;
  155. if (elapsed < 1)
  156. fprintf(stderr, "Time spent on parsing: %7.2f ms.\n", elapsed*1e3);
  157. else
  158. fprintf(stderr, "Time spent on parsing: %6.3f s.\n", elapsed);
  159. }
  160. }
  161. /* Success if we have reached here. */
  162. ret = 0;
  163. out:
  164. membuf_fini(&buf_in);
  165. membuf_fini(&buf_out);
  166. return ret;
  167. }
  168. static const CMDLINE_OPTION cmdline_options[] = {
  169. { 'o', "output", 'o', CMDLINE_OPTFLAG_REQUIREDARG },
  170. { 'f', "full-html", 'f', 0 },
  171. { 'x', "xhtml", 'x', 0 },
  172. { 't', "table-of-content", 't', CMDLINE_OPTFLAG_OPTIONALARG },
  173. { 0, "toc", 't', CMDLINE_OPTFLAG_OPTIONALARG },
  174. { 0, "toc-depth", 'd', CMDLINE_OPTFLAG_REQUIREDARG },
  175. { 's', "stat", 's', 0 },
  176. { 'h', "help", 'h', 0 },
  177. { 'v', "version", 'v', 0 },
  178. { 0, "commonmark", 'c', 0 },
  179. { 0, "github", 'g', 0 },
  180. { 0, "fcollapse-whitespace", 'W', 0 },
  181. { 0, "flatex-math", 'L', 0 },
  182. { 0, "fpermissive-atx-headers", 'A', 0 },
  183. { 0, "fpermissive-autolinks", 'V', 0 },
  184. { 0, "fpermissive-email-autolinks", '@', 0 },
  185. { 0, "fpermissive-url-autolinks", 'U', 0 },
  186. { 0, "fpermissive-www-autolinks", '.', 0 },
  187. { 0, "fstrikethrough", 'S', 0 },
  188. { 0, "ftables", 'T', 0 },
  189. { 0, "ftasklists", 'X', 0 },
  190. { 0, "funderline", '_', 0 },
  191. { 0, "fverbatim-entities", 'E', 0 },
  192. { 0, "fwiki-links", 'K', 0 },
  193. { 0, "fheading-auto-id", '#', 0 },
  194. { 0, "fno-html-blocks", 'F', 0 },
  195. { 0, "fno-html-spans", 'G', 0 },
  196. { 0, "fno-html", 'H', 0 },
  197. { 0, "fno-indented-code", 'I', 0 },
  198. { 0, NULL, 0, 0 }
  199. };
  200. static void
  201. usage(void)
  202. {
  203. printf(
  204. "Usage: md2html [OPTION]... [FILE]\n"
  205. "Convert input FILE (or standard input) in Markdown format to HTML.\n"
  206. "\n"
  207. "General options:\n"
  208. " -o --output=FILE Output file (default is standard output)\n"
  209. " -f, --full-html Generate full HTML document, including header\n"
  210. " -x, --xhtml Generate XHTML instead of HTML\n"
  211. " -t, --table-of-content=MARK, --toc=MARK\n"
  212. " Generate a table of content in place of MARK line\n"
  213. " If no MARK is given, the toc is generated at start\n"
  214. " --toc-depth=D Set the maximum level of heading in the table\n"
  215. " of content. 1 to 6. Default is 3\n"
  216. " -s, --stat Measure time of input parsing\n"
  217. " -h, --help Display this help and exit\n"
  218. " -v, --version Display version and exit\n"
  219. "\n"
  220. "Markdown dialect options:\n"
  221. "(note these are equivalent to some combinations of the flags below)\n"
  222. " --commonmark CommonMark (this is default)\n"
  223. " --github Github Flavored Markdown\n"
  224. "\n"
  225. "Markdown extension options:\n"
  226. " --fcollapse-whitespace\n"
  227. " Collapse non-trivial whitespace\n"
  228. " --flatex-math Enable LaTeX style mathematics spans\n"
  229. " --fpermissive-atx-headers\n"
  230. " Allow ATX headers without delimiting space\n"
  231. " --fpermissive-url-autolinks\n"
  232. " Allow URL autolinks without '<', '>'\n"
  233. " --fpermissive-www-autolinks\n"
  234. " Allow WWW autolinks without any scheme (e.g. 'www.example.com')\n"
  235. " --fpermissive-email-autolinks \n"
  236. " Allow e-mail autolinks without '<', '>' and 'mailto:'\n"
  237. " --fpermissive-autolinks\n"
  238. " Same as --fpermissive-url-autolinks --fpermissive-www-autolinks\n"
  239. " --fpermissive-email-autolinks\n"
  240. " --fstrikethrough Enable strike-through spans\n"
  241. " --ftables Enable tables\n"
  242. " --ftasklists Enable task lists\n"
  243. " --funderline Enable underline spans\n"
  244. " --fwiki-links Enable wiki links\n"
  245. " --fheading-auto-id\n"
  246. " Enable heading auto identifier\n"
  247. "\n"
  248. "Markdown suppression options:\n"
  249. " --fno-html-blocks\n"
  250. " Disable raw HTML blocks\n"
  251. " --fno-html-spans\n"
  252. " Disable raw HTML spans\n"
  253. " --fno-html Same as --fno-html-blocks --fno-html-spans\n"
  254. " --fno-indented-code\n"
  255. " Disable indented code blocks\n"
  256. "\n"
  257. "HTML generator options:\n"
  258. " --fverbatim-entities\n"
  259. " Do not translate entities\n"
  260. "\n"
  261. );
  262. }
  263. static void
  264. version(void)
  265. {
  266. printf("%d.%d.%d\n", MD_VERSION_MAJOR, MD_VERSION_MINOR, MD_VERSION_RELEASE);
  267. }
  268. static const char* input_path = NULL;
  269. static const char* output_path = NULL;
  270. static int parse_toc_depth(char const* value){
  271. toc_options.depth = -1;
  272. toc_options.depth = *value - '0';
  273. return (toc_options.depth>0 && toc_options.depth <= 6);
  274. }
  275. static int
  276. cmdline_callback(int opt, char const* value, void* data)
  277. {
  278. switch(opt) {
  279. case 0:
  280. if(input_path) {
  281. fprintf(stderr, "Too many arguments. Only one input file can be specified.\n");
  282. fprintf(stderr, "Use --help for more info.\n");
  283. exit(1);
  284. }
  285. input_path = value;
  286. break;
  287. case 'o': output_path = value; break;
  288. case 'f': want_fullhtml = 1; break;
  289. case 'x': want_xhtml = 1; renderer_flags |= MD_HTML_FLAG_XHTML; break;
  290. case 't':
  291. want_toc = 1;
  292. parser_flags |= MD_FLAG_HEADINGAUTOID;
  293. toc_options.toc_placeholder = value;
  294. if(toc_options.depth == 0)
  295. toc_options.depth = 3;
  296. break;
  297. case 'd':
  298. if(!parse_toc_depth(value)){
  299. fprintf(stderr, "Invalid toc-depth: %s\n", value);
  300. fprintf(stderr, "Must be a number in the range 1-6\n");
  301. exit(1);
  302. }
  303. break;
  304. case 's': want_stat = 1; break;
  305. case 'h': usage(); exit(0); break;
  306. case 'v': version(); exit(0); break;
  307. case 'c': parser_flags |= MD_DIALECT_COMMONMARK; break;
  308. case 'g': parser_flags |= MD_DIALECT_GITHUB; break;
  309. case 'E': renderer_flags |= MD_HTML_FLAG_VERBATIM_ENTITIES; break;
  310. case 'A': parser_flags |= MD_FLAG_PERMISSIVEATXHEADERS; break;
  311. case 'I': parser_flags |= MD_FLAG_NOINDENTEDCODEBLOCKS; break;
  312. case 'F': parser_flags |= MD_FLAG_NOHTMLBLOCKS; break;
  313. case 'G': parser_flags |= MD_FLAG_NOHTMLSPANS; break;
  314. case 'H': parser_flags |= MD_FLAG_NOHTML; break;
  315. case 'W': parser_flags |= MD_FLAG_COLLAPSEWHITESPACE; break;
  316. case 'U': parser_flags |= MD_FLAG_PERMISSIVEURLAUTOLINKS; break;
  317. case '.': parser_flags |= MD_FLAG_PERMISSIVEWWWAUTOLINKS; break;
  318. case '@': parser_flags |= MD_FLAG_PERMISSIVEEMAILAUTOLINKS; break;
  319. case 'V': parser_flags |= MD_FLAG_PERMISSIVEAUTOLINKS; break;
  320. case 'T': parser_flags |= MD_FLAG_TABLES; break;
  321. case 'S': parser_flags |= MD_FLAG_STRIKETHROUGH; break;
  322. case 'L': parser_flags |= MD_FLAG_LATEXMATHSPANS; break;
  323. case 'K': parser_flags |= MD_FLAG_WIKILINKS; break;
  324. case 'X': parser_flags |= MD_FLAG_TASKLISTS; break;
  325. case '_': parser_flags |= MD_FLAG_UNDERLINE; break;
  326. case '#': parser_flags |= MD_FLAG_HEADINGAUTOID; break;
  327. default:
  328. fprintf(stderr, "Illegal option: %s\n", value);
  329. fprintf(stderr, "Use --help for more info.\n");
  330. exit(1);
  331. break;
  332. }
  333. return 0;
  334. }
  335. int
  336. main(int argc, char** argv)
  337. {
  338. FILE* in = stdin;
  339. FILE* out = stdout;
  340. int ret = 0;
  341. if(cmdline_read(cmdline_options, argc, argv, cmdline_callback, NULL) != 0) {
  342. usage();
  343. exit(1);
  344. }
  345. if(input_path != NULL && strcmp(input_path, "-") != 0) {
  346. in = fopen(input_path, "rb");
  347. if(in == NULL) {
  348. fprintf(stderr, "Cannot open %s.\n", input_path);
  349. exit(1);
  350. }
  351. }
  352. if(output_path != NULL && strcmp(output_path, "-") != 0) {
  353. out = fopen(output_path, "wt");
  354. if(out == NULL) {
  355. fprintf(stderr, "Cannot open %s.\n", output_path);
  356. exit(1);
  357. }
  358. }
  359. ret = process_file(in, out);
  360. if(in != stdin)
  361. fclose(in);
  362. if(out != stdout)
  363. fclose(out);
  364. return ret;
  365. }