mkdio.c 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. /*
  2. * mkdio -- markdown front end input functions
  3. *
  4. * Copyright (C) 2007 David L Parsons.
  5. * The redistribution terms are provided in the COPYRIGHT file that must
  6. * be distributed with this source code.
  7. */
  8. #include "config.h"
  9. #include <stdio.h>
  10. #include <stdlib.h>
  11. #include <ctype.h>
  12. #include "cstring.h"
  13. #include "markdown.h"
  14. #include "amalloc.h"
  15. typedef ANCHOR(Line) LineAnchor;
  16. /* create a new blank Document
  17. */
  18. Document*
  19. __mkd_new_Document()
  20. {
  21. Document *ret = calloc(sizeof(Document), 1);
  22. if ( ret ) {
  23. if ( ret->ctx = calloc(sizeof(MMIOT), 1) ) {
  24. ret->magic = VALID_DOCUMENT;
  25. return ret;
  26. }
  27. free(ret);
  28. }
  29. return 0;
  30. }
  31. /* add a line to the markdown input chain, expanding tabs and
  32. * noting the presence of special characters as we go.
  33. */
  34. void
  35. __mkd_enqueue(Document* a, Cstring *line)
  36. {
  37. Line *p = calloc(sizeof *p, 1);
  38. unsigned char c;
  39. int xp = 0;
  40. int size = S(*line);
  41. unsigned char *str = (unsigned char*)T(*line);
  42. CREATE(p->text);
  43. ATTACH(a->content, p);
  44. while ( size-- ) {
  45. if ( (c = *str++) == '\t' ) {
  46. /* expand tabs into ->tabstop spaces. We use ->tabstop
  47. * because the ENTIRE FREAKING COMPUTER WORLD uses editors
  48. * that don't do ^T/^D, but instead use tabs for indentation,
  49. * and, of course, set their tabs down to 4 spaces
  50. */
  51. do {
  52. EXPAND(p->text) = ' ';
  53. } while ( ++xp % a->tabstop );
  54. }
  55. else if ( c >= ' ' ) {
  56. if ( c == '|' )
  57. p->flags |= PIPECHAR;
  58. EXPAND(p->text) = c;
  59. ++xp;
  60. }
  61. }
  62. EXPAND(p->text) = 0;
  63. S(p->text)--;
  64. p->dle = mkd_firstnonblank(p);
  65. }
  66. /* trim leading blanks from a header line
  67. */
  68. void
  69. __mkd_header_dle(Line *p)
  70. {
  71. CLIP(p->text, 0, 1);
  72. p->dle = mkd_firstnonblank(p);
  73. }
  74. /* build a Document from any old input.
  75. */
  76. typedef int (*getc_func)(void*);
  77. Document *
  78. populate(getc_func getc, void* ctx, int flags)
  79. {
  80. Cstring line;
  81. Document *a = __mkd_new_Document();
  82. int c;
  83. int pandoc = 0;
  84. if ( !a ) return 0;
  85. a->tabstop = (flags & MKD_TABSTOP) ? 4 : TABSTOP;
  86. CREATE(line);
  87. while ( (c = (*getc)(ctx)) != EOF ) {
  88. if ( c == '\n' ) {
  89. if ( pandoc != EOF && pandoc < 3 ) {
  90. if ( S(line) && (T(line)[0] == '%') )
  91. pandoc++;
  92. else
  93. pandoc = EOF;
  94. }
  95. __mkd_enqueue(a, &line);
  96. S(line) = 0;
  97. }
  98. else if ( isprint(c) || isspace(c) || (c & 0x80) )
  99. EXPAND(line) = c;
  100. }
  101. if ( S(line) )
  102. __mkd_enqueue(a, &line);
  103. DELETE(line);
  104. if ( (pandoc == 3) && !(flags & (MKD_NOHEADER|MKD_STRICT)) ) {
  105. /* the first three lines started with %, so we have a header.
  106. * clip the first three lines out of content and hang them
  107. * off header.
  108. */
  109. Line *headers = T(a->content);
  110. a->title = headers; __mkd_header_dle(a->title);
  111. a->author= headers->next; __mkd_header_dle(a->author);
  112. a->date = headers->next->next; __mkd_header_dle(a->date);
  113. T(a->content) = headers->next->next->next;
  114. }
  115. return a;
  116. }
  117. /* convert a file into a linked list
  118. */
  119. Document *
  120. mkd_in(FILE *f, DWORD flags)
  121. {
  122. return populate((getc_func)fgetc, f, flags & INPUT_MASK);
  123. }
  124. /* return a single character out of a buffer
  125. */
  126. int
  127. __mkd_io_strget(struct string_stream *in)
  128. {
  129. if ( !in->size ) return EOF;
  130. --(in->size);
  131. return *(in->data)++;
  132. }
  133. /* convert a block of text into a linked list
  134. */
  135. Document *
  136. mkd_string(const char *buf, int len, DWORD flags)
  137. {
  138. struct string_stream about;
  139. about.data = buf;
  140. about.size = len;
  141. return populate((getc_func)__mkd_io_strget, &about, flags & INPUT_MASK);
  142. }
  143. /* write the html to a file (xmlified if necessary)
  144. */
  145. int
  146. mkd_generatehtml(Document *p, FILE *output)
  147. {
  148. char *doc;
  149. int szdoc;
  150. DO_OR_DIE( szdoc = mkd_document(p,&doc) );
  151. if ( p->ctx->flags & MKD_CDATA )
  152. DO_OR_DIE( mkd_generatexml(doc, szdoc, output) );
  153. else if ( fwrite(doc, szdoc, 1, output) != 1 )
  154. return EOF;
  155. DO_OR_DIE( putc('\n', output) );
  156. return 0;
  157. }
  158. /* convert some markdown text to html
  159. */
  160. int
  161. markdown(Document *document, FILE *out, int flags)
  162. {
  163. if ( mkd_compile(document, flags) ) {
  164. mkd_generatehtml(document, out);
  165. mkd_cleanup(document);
  166. return 0;
  167. }
  168. return -1;
  169. }
  170. /* write out a Cstring, mangled into a form suitable for `<a href=` or `<a id=`
  171. */
  172. void
  173. mkd_string_to_anchor(char *s, int len, mkd_sta_function_t outchar,
  174. void *out, int labelformat,
  175. DWORD flags)
  176. {
  177. static const unsigned char hexchars[] = "0123456789abcdef";
  178. unsigned char c;
  179. int i, size;
  180. char *line;
  181. size = mkd_line(s, len, &line, IS_LABEL);
  182. if ( !(flags & MKD_URLENCODEDANCHOR)
  183. && labelformat
  184. && (size>0) && !isalpha(line[0]) )
  185. (*outchar)('L',out);
  186. for ( i=0; i < size ; i++ ) {
  187. c = line[i];
  188. if ( labelformat ) {
  189. if ( isalnum(c) || (c == '_') || (c == ':') || (c == '-') || (c == '.' ) )
  190. (*outchar)(c, out);
  191. else if ( flags & MKD_URLENCODEDANCHOR ) {
  192. (*outchar)('%', out);
  193. (*outchar)(hexchars[c >> 4 & 0xf], out);
  194. (*outchar)(hexchars[c & 0xf], out);
  195. }
  196. else
  197. (*outchar)('.', out);
  198. }
  199. else
  200. (*outchar)(c,out);
  201. }
  202. if (line)
  203. free(line);
  204. }
  205. /* ___mkd_reparse() a line
  206. */
  207. static void
  208. mkd_parse_line(char *bfr, int size, MMIOT *f, int flags)
  209. {
  210. ___mkd_initmmiot(f, 0);
  211. f->flags = flags & USER_FLAGS;
  212. ___mkd_reparse(bfr, size, 0, f, 0);
  213. ___mkd_emblock(f);
  214. }
  215. /* ___mkd_reparse() a line, returning it in malloc()ed memory
  216. */
  217. int
  218. mkd_line(char *bfr, int size, char **res, DWORD flags)
  219. {
  220. MMIOT f;
  221. int len;
  222. mkd_parse_line(bfr, size, &f, flags);
  223. if ( len = S(f.out) ) {
  224. /* kludge alert; we know that T(f.out) is malloced memory,
  225. * so we can just steal it away. This is awful -- there
  226. * should be an opaque method that transparently moves
  227. * the pointer out of the embedded Cstring.
  228. */
  229. EXPAND(f.out) = 0;
  230. *res = T(f.out);
  231. T(f.out) = 0;
  232. S(f.out) = ALLOCATED(f.out) = 0;
  233. }
  234. else {
  235. *res = 0;
  236. len = EOF;
  237. }
  238. ___mkd_freemmiot(&f, 0);
  239. return len;
  240. }
  241. /* ___mkd_reparse() a line, writing it to a FILE
  242. */
  243. int
  244. mkd_generateline(char *bfr, int size, FILE *output, DWORD flags)
  245. {
  246. MMIOT f;
  247. int status;
  248. mkd_parse_line(bfr, size, &f, flags);
  249. if ( flags & MKD_CDATA )
  250. status = mkd_generatexml(T(f.out), S(f.out), output) != EOF;
  251. else
  252. status = fwrite(T(f.out), S(f.out), 1, output) == S(f.out);
  253. ___mkd_freemmiot(&f, 0);
  254. return status ? 0 : EOF;
  255. }
  256. /* set the url display callback
  257. */
  258. void
  259. mkd_e_url(Document *f, mkd_callback_t edit)
  260. {
  261. if ( f )
  262. f->cb.e_url = edit;
  263. }
  264. /* set the url options callback
  265. */
  266. void
  267. mkd_e_flags(Document *f, mkd_callback_t edit)
  268. {
  269. if ( f )
  270. f->cb.e_flags = edit;
  271. }
  272. /* set the url display/options deallocator
  273. */
  274. void
  275. mkd_e_free(Document *f, mkd_free_t dealloc)
  276. {
  277. if ( f )
  278. f->cb.e_free = dealloc;
  279. }
  280. /* set the url display/options context data field
  281. */
  282. void
  283. mkd_e_data(Document *f, void *data)
  284. {
  285. if ( f )
  286. f->cb.e_data = data;
  287. }
  288. /* set the href prefix for markdown extra style footnotes
  289. */
  290. void
  291. mkd_ref_prefix(Document *f, char *data)
  292. {
  293. if ( f )
  294. f->ref_prefix = data;
  295. }
  296. int
  297. mkd_generatehtml_str(Document *p,void *out,void (*mywrite)(char const *,int size,void*))
  298. {
  299. char *doc;
  300. int szdoc;
  301. if ( (szdoc = mkd_document(p, &doc)) != EOF ) {
  302. if ( p->ctx->flags & MKD_CDATA ) {
  303. char c;
  304. while ( szdoc-- > 0 ) {
  305. if ( !isascii(c = *doc++) )
  306. continue;
  307. switch (c) {
  308. case '<': mywrite("&lt;",4,out); break;
  309. case '>': mywrite("&gt;",4,out); break;
  310. case '&': mywrite("&amp;",5,out); break;
  311. case '"': mywrite("&quot;",6,out); break;
  312. case '\'':mywrite("&apos;",6,out); break;
  313. default: mywrite(&c,1,out); break;
  314. }
  315. }
  316. }
  317. else{
  318. mywrite(doc,szdoc,out);
  319. }
  320. mywrite("\n",1, out);
  321. return 0;
  322. }
  323. return -1;
  324. }
  325. int
  326. markdown_str(Document *document, void *out,void (*mywrite)(char const *,int size,void*),int flags)
  327. {
  328. if ( mkd_compile(document, flags) ) {
  329. mkd_generatehtml_str(document,out,mywrite);
  330. mkd_cleanup(document);
  331. return 0;
  332. }
  333. return -1;
  334. }