markdown.c 27 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361
  1. /* markdown: a C implementation of John Gruber's Markdown markup language.
  2. *
  3. * Copyright (C) 2007 David L Parsons.
  4. * The redistribution terms are provided in the COPYRIGHT file that must
  5. * be distributed with this source code.
  6. */
  7. #include "config.h"
  8. #include <stdio.h>
  9. #include <string.h>
  10. #include <stdarg.h>
  11. #include <stdlib.h>
  12. #include <time.h>
  13. #include <ctype.h>
  14. #include "cstring.h"
  15. #include "markdown.h"
  16. #include "amalloc.h"
  17. #include "tags.h"
  18. typedef int (*stfu)(const void*,const void*);
  19. typedef ANCHOR(Paragraph) ParagraphRoot;
  20. static Paragraph *Pp(ParagraphRoot *, Line *, int);
  21. static Paragraph *compile(Line *, int, MMIOT *);
  22. /* case insensitive string sort for Footnote tags.
  23. */
  24. int
  25. __mkd_footsort(Footnote *a, Footnote *b)
  26. {
  27. int i;
  28. char ac, bc;
  29. if ( S(a->tag) != S(b->tag) )
  30. return S(a->tag) - S(b->tag);
  31. for ( i=0; i < S(a->tag); i++) {
  32. ac = tolower(T(a->tag)[i]);
  33. bc = tolower(T(b->tag)[i]);
  34. if ( isspace(ac) && isspace(bc) )
  35. continue;
  36. if ( ac != bc )
  37. return ac - bc;
  38. }
  39. return 0;
  40. }
  41. /* find the first blank character after position <i>
  42. */
  43. static int
  44. nextblank(Line *t, int i)
  45. {
  46. while ( (i < S(t->text)) && !isspace(T(t->text)[i]) )
  47. ++i;
  48. return i;
  49. }
  50. /* find the next nonblank character after position <i>
  51. */
  52. static int
  53. nextnonblank(Line *t, int i)
  54. {
  55. while ( (i < S(t->text)) && isspace(T(t->text)[i]) )
  56. ++i;
  57. return i;
  58. }
  59. /* find the first nonblank character on the Line.
  60. */
  61. int
  62. mkd_firstnonblank(Line *p)
  63. {
  64. return nextnonblank(p,0);
  65. }
  66. static inline int
  67. blankline(Line *p)
  68. {
  69. return ! (p && (S(p->text) > p->dle) );
  70. }
  71. static Line *
  72. skipempty(Line *p)
  73. {
  74. while ( p && (p->dle == S(p->text)) )
  75. p = p->next;
  76. return p;
  77. }
  78. void
  79. ___mkd_tidy(Cstring *t)
  80. {
  81. while ( S(*t) && isspace(T(*t)[S(*t)-1]) )
  82. --S(*t);
  83. }
  84. static struct kw comment = { "!--", 3, 0 };
  85. static struct kw *
  86. isopentag(Line *p)
  87. {
  88. int i=0, len;
  89. char *line;
  90. if ( !p ) return 0;
  91. line = T(p->text);
  92. len = S(p->text);
  93. if ( len < 3 || line[0] != '<' )
  94. return 0;
  95. if ( line[1] == '!' && line[2] == '-' && line[3] == '-' )
  96. /* comments need special case handling, because
  97. * the !-- doesn't need to end in a whitespace
  98. */
  99. return &comment;
  100. /* find how long the tag is so we can check to see if
  101. * it's a block-level tag
  102. */
  103. for ( i=1; i < len && T(p->text)[i] != '>'
  104. && T(p->text)[i] != '/'
  105. && !isspace(T(p->text)[i]); ++i )
  106. ;
  107. return mkd_search_tags(T(p->text)+1, i-1);
  108. }
  109. typedef struct _flo {
  110. Line *t;
  111. int i;
  112. } FLO;
  113. #define floindex(x) (x.i)
  114. static int
  115. flogetc(FLO *f)
  116. {
  117. if ( f && f->t ) {
  118. if ( f->i < S(f->t->text) )
  119. return T(f->t->text)[f->i++];
  120. f->t = f->t->next;
  121. f->i = 0;
  122. return flogetc(f);
  123. }
  124. return EOF;
  125. }
  126. static void
  127. splitline(Line *t, int cutpoint)
  128. {
  129. if ( t && (cutpoint < S(t->text)) ) {
  130. Line *tmp = calloc(1, sizeof *tmp);
  131. tmp->next = t->next;
  132. t->next = tmp;
  133. tmp->dle = t->dle;
  134. SUFFIX(tmp->text, T(t->text)+cutpoint, S(t->text)-cutpoint);
  135. S(t->text) = cutpoint;
  136. }
  137. }
  138. #define UNCHECK(l) ((l)->flags &= ~CHECKED)
  139. #define UNLESS_FENCED(t) if (fenced) { \
  140. other = 1; l->count += (c == ' ' ? 0 : -1); \
  141. } else { t; }
  142. /*
  143. * walk a line, seeing if it's any of half a dozen interesting regular
  144. * types.
  145. */
  146. static void
  147. checkline(Line *l, DWORD flags)
  148. {
  149. int eol, i;
  150. int dashes = 0, spaces = 0,
  151. equals = 0, underscores = 0,
  152. stars = 0, tildes = 0, other = 0,
  153. backticks = 0, fenced = 0;
  154. l->flags |= CHECKED;
  155. l->kind = chk_text;
  156. l->count = 0;
  157. if (l->dle >= 4) { l->kind=chk_code; return; }
  158. for ( eol = S(l->text); eol > l->dle && isspace(T(l->text)[eol-1]); --eol )
  159. ;
  160. for (i=l->dle; i<eol; i++) {
  161. register int c = T(l->text)[i];
  162. int is_fence_char = 0;
  163. if ( c != ' ' ) l->count++;
  164. switch (c) {
  165. case '-': UNLESS_FENCED(dashes = 1); break;
  166. case ' ': UNLESS_FENCED(spaces = 1); break;
  167. case '=': equals = 1; break;
  168. case '_': UNLESS_FENCED(underscores = 1); break;
  169. case '*': stars = 1; break;
  170. default:
  171. if (flags & MKD_FENCEDCODE) {
  172. switch (c) {
  173. case '~': if (other) return; is_fence_char = 1; tildes = 1; break;
  174. case '`': if (other) return; is_fence_char = 1; backticks = 1; break;
  175. }
  176. if (is_fence_char) {
  177. fenced = 1;
  178. break;
  179. }
  180. }
  181. other = 1;
  182. l->count--;
  183. if (!fenced) return;
  184. }
  185. }
  186. if ( dashes + equals + underscores + stars + tildes + backticks > 1 )
  187. return;
  188. if ( spaces ) {
  189. if ( (underscores || stars || dashes) )
  190. l->kind = chk_hr;
  191. return;
  192. }
  193. if ( stars || underscores ) { l->kind = chk_hr; }
  194. else if ( dashes ) { l->kind = chk_dash; }
  195. else if ( equals ) { l->kind = chk_equal; }
  196. else if ( tildes ) { l->kind = chk_tilde; }
  197. else if ( backticks ) { l->kind = chk_backtick; }
  198. }
  199. /* markdown only does special handling of comments if the comment end
  200. * is at the end of a line
  201. */
  202. static Line *
  203. commentblock(Paragraph *p, int *unclosed)
  204. {
  205. Line *t, *ret;
  206. char *end;
  207. for ( t = p->text; t ; t = t->next) {
  208. if ( end = strstr(T(t->text), "-->") ) {
  209. if ( nextnonblank(t, 3 + (end - T(t->text))) < S(t->text) )
  210. continue;
  211. /*splitline(t, 3 + (end - T(t->text)) );*/
  212. ret = t->next;
  213. t->next = 0;
  214. return ret;
  215. }
  216. }
  217. *unclosed = 1;
  218. return t;
  219. }
  220. static Line *
  221. htmlblock(Paragraph *p, struct kw *tag, int *unclosed)
  222. {
  223. Line *ret;
  224. FLO f = { p->text, 0 };
  225. int c;
  226. int i, closing, depth=0;
  227. *unclosed = 0;
  228. if ( tag == &comment )
  229. return commentblock(p, unclosed);
  230. if ( tag->selfclose ) {
  231. ret = f.t->next;
  232. f.t->next = 0;
  233. return ret;
  234. }
  235. while ( (c = flogetc(&f)) != EOF ) {
  236. if ( c == '<' ) {
  237. /* tag? */
  238. c = flogetc(&f);
  239. if ( c == '!' ) { /* comment? */
  240. if ( flogetc(&f) == '-' && flogetc(&f) == '-' ) {
  241. /* yes */
  242. while ( (c = flogetc(&f)) != EOF ) {
  243. if ( c == '-' && flogetc(&f) == '-'
  244. && flogetc(&f) == '>')
  245. /* consumed whole comment */
  246. break;
  247. }
  248. }
  249. }
  250. else {
  251. if ( closing = (c == '/') ) c = flogetc(&f);
  252. for ( i=0; i < tag->size; i++, c=flogetc(&f) ) {
  253. if ( tag->id[i] != toupper(c) )
  254. break;
  255. }
  256. if ( (i == tag->size) && !isalnum(c) ) {
  257. depth = depth + (closing ? -1 : 1);
  258. if ( depth == 0 ) {
  259. while ( c != EOF && c != '>' ) {
  260. /* consume trailing gunk in close tag */
  261. c = flogetc(&f);
  262. }
  263. if ( c == EOF )
  264. break;
  265. if ( !f.t )
  266. return 0;
  267. splitline(f.t, floindex(f));
  268. ret = f.t->next;
  269. f.t->next = 0;
  270. return ret;
  271. }
  272. }
  273. }
  274. }
  275. }
  276. *unclosed = 1;
  277. return 0;
  278. }
  279. /* footnotes look like ^<whitespace>{0,3}[stuff]: <content>$
  280. */
  281. static int
  282. isfootnote(Line *t)
  283. {
  284. int i;
  285. if ( ( (i = t->dle) > 3) || (T(t->text)[i] != '[') )
  286. return 0;
  287. for ( ++i; i < S(t->text) ; ++i ) {
  288. if ( T(t->text)[i] == '[' )
  289. return 0;
  290. else if ( T(t->text)[i] == ']' )
  291. return ( T(t->text)[i+1] == ':' ) ;
  292. }
  293. return 0;
  294. }
  295. static inline int
  296. isquote(Line *t)
  297. {
  298. return (t->dle < 4 && T(t->text)[t->dle] == '>');
  299. }
  300. static inline int
  301. iscode(Line *t)
  302. {
  303. return (t->dle >= 4);
  304. }
  305. static inline int
  306. ishr(Line *t, DWORD flags)
  307. {
  308. if ( ! (t->flags & CHECKED) )
  309. checkline(t, flags);
  310. if ( t->count > 2 )
  311. return t->kind == chk_hr || t->kind == chk_dash || t->kind == chk_equal;
  312. return 0;
  313. }
  314. static int
  315. issetext(Line *t, int *htyp, DWORD flags)
  316. {
  317. Line *n;
  318. /* check for setext-style HEADER
  319. * ======
  320. */
  321. if ( (n = t->next) ) {
  322. if ( !(n->flags & CHECKED) )
  323. checkline(n, flags);
  324. if ( n->kind == chk_dash || n->kind == chk_equal ) {
  325. *htyp = SETEXT;
  326. return 1;
  327. }
  328. }
  329. return 0;
  330. }
  331. static int
  332. ishdr(Line *t, int *htyp, DWORD flags)
  333. {
  334. /* ANY leading `#`'s make this into an ETX header
  335. */
  336. if ( (t->dle == 0) && (S(t->text) > 1) && (T(t->text)[0] == '#') ) {
  337. *htyp = ETX;
  338. return 1;
  339. }
  340. /* And if not, maybe it's a SETEXT header instead
  341. */
  342. return issetext(t, htyp, flags);
  343. }
  344. static inline int
  345. end_of_block(Line *t, DWORD flags)
  346. {
  347. int dummy;
  348. if ( !t )
  349. return 0;
  350. return ( (S(t->text) <= t->dle) || ishr(t, flags) || ishdr(t, &dummy, flags) );
  351. }
  352. static Line*
  353. is_discount_dt(Line *t, int *clip, DWORD flags)
  354. {
  355. if ( !(flags & MKD_NODLDISCOUNT)
  356. && t
  357. && t->next
  358. && (S(t->text) > 2)
  359. && (t->dle == 0)
  360. && (T(t->text)[0] == '=')
  361. && (T(t->text)[S(t->text)-1] == '=') ) {
  362. if ( t->next->dle >= 4 ) {
  363. *clip = 4;
  364. return t;
  365. }
  366. else
  367. return is_discount_dt(t->next, clip, flags);
  368. }
  369. return 0;
  370. }
  371. static int
  372. is_extra_dd(Line *t)
  373. {
  374. return (t->dle < 4) && (T(t->text)[t->dle] == ':')
  375. && isspace(T(t->text)[t->dle+1]);
  376. }
  377. static Line*
  378. is_extra_dt(Line *t, int *clip, DWORD flags)
  379. {
  380. if ( flags & MKD_DLEXTRA
  381. && t
  382. && t->next && S(t->text) && T(t->text)[0] != '='
  383. && T(t->text)[S(t->text)-1] != '=') {
  384. Line *x;
  385. if ( iscode(t) || end_of_block(t, flags) )
  386. return 0;
  387. if ( (x = skipempty(t->next)) && is_extra_dd(x) ) {
  388. *clip = x->dle+2;
  389. return t;
  390. }
  391. if ( x=is_extra_dt(t->next, clip, flags) )
  392. return x;
  393. }
  394. return 0;
  395. }
  396. static Line*
  397. isdefinition(Line *t, int *clip, int *kind, DWORD flags)
  398. {
  399. Line *ret;
  400. *kind = 1;
  401. if ( ret = is_discount_dt(t,clip,flags) )
  402. return ret;
  403. *kind=2;
  404. return is_extra_dt(t,clip,flags);
  405. }
  406. static int
  407. islist(Line *t, int *clip, DWORD flags, int *list_type)
  408. {
  409. int i, j;
  410. char *q;
  411. if ( end_of_block(t, flags) )
  412. return 0;
  413. if ( !(flags & (MKD_NODLIST|MKD_STRICT)) && isdefinition(t,clip,list_type,flags) )
  414. return DL;
  415. if ( strchr("*-+", T(t->text)[t->dle]) && isspace(T(t->text)[t->dle+1]) ) {
  416. i = nextnonblank(t, t->dle+1);
  417. *clip = (i > 4) ? 4 : i;
  418. *list_type = UL;
  419. return AL;
  420. }
  421. if ( (j = nextblank(t,t->dle)) > t->dle ) {
  422. if ( T(t->text)[j-1] == '.' ) {
  423. if ( !(flags & (MKD_NOALPHALIST|MKD_STRICT))
  424. && (j == t->dle + 2)
  425. && isalpha(T(t->text)[t->dle]) ) {
  426. j = nextnonblank(t,j);
  427. *clip = (j > 4) ? 4 : j;
  428. *list_type = AL;
  429. return AL;
  430. }
  431. strtoul(T(t->text)+t->dle, &q, 10);
  432. if ( (q > T(t->text)+t->dle) && (q == T(t->text) + (j-1)) ) {
  433. j = nextnonblank(t,j);
  434. *clip = j;
  435. *list_type = OL;
  436. return AL;
  437. }
  438. }
  439. }
  440. return 0;
  441. }
  442. static Line *
  443. headerblock(Paragraph *pp, int htyp)
  444. {
  445. Line *ret = 0;
  446. Line *p = pp->text;
  447. int i, j;
  448. switch (htyp) {
  449. case SETEXT:
  450. /* p->text is header, p->next->text is -'s or ='s
  451. */
  452. pp->hnumber = (T(p->next->text)[0] == '=') ? 1 : 2;
  453. ret = p->next->next;
  454. ___mkd_freeLine(p->next);
  455. p->next = 0;
  456. break;
  457. case ETX:
  458. /* p->text is ###header###, so we need to trim off
  459. * the leading and trailing `#`'s
  460. */
  461. for (i=0; (T(p->text)[i] == T(p->text)[0]) && (i < S(p->text)-1)
  462. && (i < 6); i++)
  463. ;
  464. pp->hnumber = i;
  465. while ( (i < S(p->text)) && isspace(T(p->text)[i]) )
  466. ++i;
  467. CLIP(p->text, 0, i);
  468. UNCHECK(p);
  469. for (j=S(p->text); (j > 1) && (T(p->text)[j-1] == '#'); --j)
  470. ;
  471. while ( j && isspace(T(p->text)[j-1]) )
  472. --j;
  473. S(p->text) = j;
  474. ret = p->next;
  475. p->next = 0;
  476. break;
  477. }
  478. return ret;
  479. }
  480. static Line *
  481. codeblock(Paragraph *p)
  482. {
  483. Line *t = p->text, *r;
  484. for ( ; t; t = r ) {
  485. CLIP(t->text,0,4);
  486. t->dle = mkd_firstnonblank(t);
  487. if ( !( (r = skipempty(t->next)) && iscode(r)) ) {
  488. ___mkd_freeLineRange(t,r);
  489. t->next = 0;
  490. return r;
  491. }
  492. }
  493. return t;
  494. }
  495. static int
  496. iscodefence(Line *r, int size, line_type kind, DWORD flags)
  497. {
  498. if ( !(flags & MKD_FENCEDCODE) )
  499. return 0;
  500. if ( !(r->flags & CHECKED) )
  501. checkline(r, flags);
  502. if ( kind )
  503. return (r->kind == kind) && (r->count >= size);
  504. else
  505. return (r->kind == chk_tilde || r->kind == chk_backtick) && (r->count >= size);
  506. }
  507. static Paragraph *
  508. fencedcodeblock(ParagraphRoot *d, Line **ptr, DWORD flags)
  509. {
  510. Line *first, *r;
  511. Paragraph *ret;
  512. first = (*ptr);
  513. /* don't allow zero-length code fences
  514. */
  515. if ( (first->next == 0) || iscodefence(first->next, first->count, 0, flags) )
  516. return 0;
  517. /* find the closing fence, discard the fences,
  518. * return a Paragraph with the contents
  519. */
  520. for ( r = first; r && r->next; r = r->next )
  521. if ( iscodefence(r->next, first->count, first->kind, flags) ) {
  522. (*ptr) = r->next->next;
  523. ret = Pp(d, first->next, CODE);
  524. if (S(first->text) - first->count > 0) {
  525. char *lang_attr = T(first->text) + first->count;
  526. while ( *lang_attr != 0 && *lang_attr == ' ' ) lang_attr++;
  527. ret->lang = strdup(lang_attr);
  528. }
  529. else {
  530. ret->lang = 0;
  531. }
  532. ___mkd_freeLine(first);
  533. ___mkd_freeLine(r->next);
  534. r->next = 0;
  535. return ret;
  536. }
  537. return 0;
  538. }
  539. static int
  540. centered(Line *first, Line *last)
  541. {
  542. if ( first&&last ) {
  543. int len = S(last->text);
  544. if ( (len > 2) && (strncmp(T(first->text), "->", 2) == 0)
  545. && (strncmp(T(last->text)+len-2, "<-", 2) == 0) ) {
  546. CLIP(first->text, 0, 2);
  547. S(last->text) -= 2;
  548. return CENTER;
  549. }
  550. }
  551. return 0;
  552. }
  553. static int
  554. endoftextblock(Line *t, int toplevelblock, DWORD flags)
  555. {
  556. int z;
  557. if ( end_of_block(t, flags) || isquote(t) )
  558. return 1;
  559. /* HORRIBLE STANDARDS KLUDGES:
  560. * 1. non-toplevel paragraphs absorb adjacent code blocks
  561. * 2. Toplevel paragraphs eat absorb adjacent list items,
  562. * but sublevel blocks behave properly.
  563. * (What this means is that we only need to check for code
  564. * blocks at toplevel, and only check for list items at
  565. * nested levels.)
  566. */
  567. return toplevelblock ? 0 : islist(t,&z,flags,&z);
  568. }
  569. static Line *
  570. textblock(Paragraph *p, int toplevel, DWORD flags)
  571. {
  572. Line *t, *next;
  573. for ( t = p->text; t ; t = next ) {
  574. if ( ((next = t->next) == 0) || endoftextblock(next, toplevel, flags) ) {
  575. p->align = centered(p->text, t);
  576. t->next = 0;
  577. return next;
  578. }
  579. }
  580. return t;
  581. }
  582. /* length of the id: or class: kind in a special div-not-quote block
  583. */
  584. static int
  585. szmarkerclass(char *p)
  586. {
  587. if ( strncasecmp(p, "id:", 3) == 0 )
  588. return 3;
  589. if ( strncasecmp(p, "class:", 6) == 0 )
  590. return 6;
  591. return 0;
  592. }
  593. /*
  594. * check if the first line of a quoted block is the special div-not-quote
  595. * marker %[kind:]name%
  596. */
  597. #define iscsschar(c) (isalpha(c) || (c == '-') || (c == '_') )
  598. static int
  599. isdivmarker(Line *p, int start, DWORD flags)
  600. {
  601. char *s;
  602. int last, i;
  603. if ( flags & (MKD_NODIVQUOTE|MKD_STRICT) )
  604. return 0;
  605. start = nextnonblank(p, start);
  606. last= S(p->text) - (1 + start);
  607. s = T(p->text) + start;
  608. if ( (last <= 0) || (*s != '%') || (s[last] != '%') )
  609. return 0;
  610. i = szmarkerclass(s+1);
  611. if ( !iscsschar(s[i+1]) )
  612. return 0;
  613. while ( ++i < last )
  614. if ( !(isdigit(s[i]) || iscsschar(s[i])) )
  615. return 0;
  616. return 1;
  617. }
  618. /*
  619. * accumulate a blockquote.
  620. *
  621. * one sick horrible thing about blockquotes is that even though
  622. * it just takes ^> to start a quote, following lines, if quoted,
  623. * assume that the prefix is ``> ''. This means that code needs
  624. * to be indented *5* spaces from the leading '>', but *4* spaces
  625. * from the start of the line. This does not appear to be
  626. * documented in the reference implementation, but it's the
  627. * way the markdown sample web form at Daring Fireball works.
  628. */
  629. static Line *
  630. quoteblock(Paragraph *p, DWORD flags)
  631. {
  632. Line *t, *q;
  633. int qp;
  634. for ( t = p->text; t ; t = q ) {
  635. if ( isquote(t) ) {
  636. /* clip leading spaces */
  637. for (qp = 0; T(t->text)[qp] != '>'; qp ++)
  638. /* assert: the first nonblank character on this line
  639. * will be a >
  640. */;
  641. /* clip '>' */
  642. qp++;
  643. /* clip next space, if any */
  644. if ( T(t->text)[qp] == ' ' )
  645. qp++;
  646. CLIP(t->text, 0, qp);
  647. UNCHECK(t);
  648. t->dle = mkd_firstnonblank(t);
  649. }
  650. q = skipempty(t->next);
  651. if ( (q == 0) || ((q != t->next) && (!isquote(q) || isdivmarker(q,1,flags))) ) {
  652. ___mkd_freeLineRange(t, q);
  653. t = q;
  654. break;
  655. }
  656. }
  657. if ( isdivmarker(p->text,0,flags) ) {
  658. char *prefix = "class";
  659. int i;
  660. q = p->text;
  661. p->text = p->text->next;
  662. if ( (i = szmarkerclass(1+T(q->text))) == 3 )
  663. /* and this would be an "%id:" prefix */
  664. prefix="id";
  665. if ( p->ident = malloc(4+strlen(prefix)+S(q->text)) )
  666. sprintf(p->ident, "%s=\"%.*s\"", prefix, S(q->text)-(i+2),
  667. T(q->text)+(i+1) );
  668. ___mkd_freeLine(q);
  669. }
  670. return t;
  671. }
  672. typedef int (*linefn)(Line *);
  673. /*
  674. * pull in a list block. A list block starts with a list marker and
  675. * runs until the next list marker, the next non-indented paragraph,
  676. * or EOF. You do not have to indent nonblank lines after the list
  677. * marker, but multiple paragraphs need to start with a 4-space indent.
  678. */
  679. static Line *
  680. listitem(Paragraph *p, int indent, DWORD flags, linefn check)
  681. {
  682. Line *t, *q;
  683. int clip = indent;
  684. int z;
  685. for ( t = p->text; t ; t = q) {
  686. CLIP(t->text, 0, clip);
  687. UNCHECK(t);
  688. t->dle = mkd_firstnonblank(t);
  689. /* even though we had to trim a long leader off this item,
  690. * the indent for trailing paragraphs is still 4...
  691. */
  692. if (indent > 4) {
  693. indent = 4;
  694. }
  695. if ( (q = skipempty(t->next)) == 0 ) {
  696. ___mkd_freeLineRange(t,q);
  697. return 0;
  698. }
  699. /* after a blank line, the next block needs to start with a line
  700. * that's indented 4(? -- reference implementation allows a 1
  701. * character indent, but that has unfortunate side effects here)
  702. * spaces, but after that the line doesn't need any indentation
  703. */
  704. if ( q != t->next ) {
  705. if (q->dle < indent) {
  706. q = t->next;
  707. t->next = 0;
  708. return q;
  709. }
  710. /* indent at least 2, and at most as
  711. * as far as the initial line was indented. */
  712. indent = clip ? clip : 2;
  713. }
  714. if ( (q->dle < indent) && (ishr(q,flags) || islist(q,&z,flags,&z)
  715. || (check && (*check)(q)))
  716. && !issetext(q,&z,flags) ) {
  717. q = t->next;
  718. t->next = 0;
  719. return q;
  720. }
  721. clip = (q->dle > indent) ? indent : q->dle;
  722. }
  723. return t;
  724. }
  725. static Line *
  726. definition_block(Paragraph *top, int clip, MMIOT *f, int kind)
  727. {
  728. ParagraphRoot d = { 0, 0 };
  729. Paragraph *p;
  730. Line *q = top->text, *text = 0, *labels;
  731. int z, para;
  732. while (( labels = q )) {
  733. if ( (q = isdefinition(labels, &z, &kind, f->flags)) == 0 )
  734. break;
  735. if ( (text = skipempty(q->next)) == 0 )
  736. break;
  737. if ( para = (text != q->next) )
  738. ___mkd_freeLineRange(q, text);
  739. q->next = 0;
  740. if ( kind == 1 /* discount dl */ )
  741. for ( q = labels; q; q = q->next ) {
  742. CLIP(q->text, 0, 1);
  743. UNCHECK(q);
  744. S(q->text)--;
  745. }
  746. dd_block:
  747. p = Pp(&d, text, LISTITEM);
  748. text = listitem(p, clip, f->flags, (kind==2) ? is_extra_dd : 0);
  749. p->down = compile(p->text, 0, f);
  750. p->text = labels; labels = 0;
  751. if ( para && p->down ) p->down->align = PARA;
  752. if ( (q = skipempty(text)) == 0 )
  753. break;
  754. if ( para = (q != text) ) {
  755. Line anchor;
  756. anchor.next = text;
  757. ___mkd_freeLineRange(&anchor,q);
  758. text = q;
  759. }
  760. if ( kind == 2 && is_extra_dd(q) )
  761. goto dd_block;
  762. }
  763. top->text = 0;
  764. top->down = T(d);
  765. return text;
  766. }
  767. static Line *
  768. enumerated_block(Paragraph *top, int clip, MMIOT *f, int list_class)
  769. {
  770. ParagraphRoot d = { 0, 0 };
  771. Paragraph *p;
  772. Line *q = top->text, *text;
  773. int para = 0, z;
  774. while (( text = q )) {
  775. p = Pp(&d, text, LISTITEM);
  776. text = listitem(p, clip, f->flags, 0);
  777. p->down = compile(p->text, 0, f);
  778. p->text = 0;
  779. if ( para && p->down ) p->down->align = PARA;
  780. if ( (q = skipempty(text)) == 0
  781. || islist(q, &clip, f->flags, &z) != list_class )
  782. break;
  783. if ( para = (q != text) ) {
  784. Line anchor;
  785. anchor.next = text;
  786. ___mkd_freeLineRange(&anchor, q);
  787. if ( p->down ) p->down->align = PARA;
  788. }
  789. }
  790. top->text = 0;
  791. top->down = T(d);
  792. return text;
  793. }
  794. static int
  795. tgood(char c)
  796. {
  797. switch (c) {
  798. case '\'':
  799. case '"': return c;
  800. case '(': return ')';
  801. }
  802. return 0;
  803. }
  804. /*
  805. * add a new (image or link) footnote to the footnote table
  806. */
  807. static Line*
  808. addfootnote(Line *p, MMIOT* f)
  809. {
  810. int j, i;
  811. int c;
  812. Line *np = p->next;
  813. Footnote *foot = &EXPAND(f->footnotes->note);
  814. CREATE(foot->tag);
  815. CREATE(foot->link);
  816. CREATE(foot->title);
  817. foot->flags = foot->height = foot->width = 0;
  818. for (j=i=p->dle+1; T(p->text)[j] != ']'; j++)
  819. EXPAND(foot->tag) = T(p->text)[j];
  820. EXPAND(foot->tag) = 0;
  821. S(foot->tag)--;
  822. j = nextnonblank(p, j+2);
  823. if ( (f->flags & MKD_EXTRA_FOOTNOTE) && (T(foot->tag)[0] == '^') ) {
  824. /* need to consume all lines until non-indented block? */
  825. while ( j < S(p->text) )
  826. EXPAND(foot->title) = T(p->text)[j++];
  827. goto skip_to_end;
  828. }
  829. while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
  830. EXPAND(foot->link) = T(p->text)[j++];
  831. EXPAND(foot->link) = 0;
  832. S(foot->link)--;
  833. j = nextnonblank(p,j);
  834. if ( T(p->text)[j] == '=' ) {
  835. sscanf(T(p->text)+j, "=%dx%d", &foot->width, &foot->height);
  836. while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
  837. ++j;
  838. j = nextnonblank(p,j);
  839. }
  840. if ( (j >= S(p->text)) && np && np->dle && tgood(T(np->text)[np->dle]) ) {
  841. ___mkd_freeLine(p);
  842. p = np;
  843. np = p->next;
  844. j = p->dle;
  845. }
  846. if ( (c = tgood(T(p->text)[j])) ) {
  847. /* Try to take the rest of the line as a comment; read to
  848. * EOL, then shrink the string back to before the final
  849. * quote.
  850. */
  851. ++j; /* skip leading quote */
  852. while ( j < S(p->text) )
  853. EXPAND(foot->title) = T(p->text)[j++];
  854. while ( S(foot->title) && T(foot->title)[S(foot->title)-1] != c )
  855. --S(foot->title);
  856. if ( S(foot->title) ) /* skip trailing quote */
  857. --S(foot->title);
  858. EXPAND(foot->title) = 0;
  859. --S(foot->title);
  860. }
  861. skip_to_end:
  862. ___mkd_freeLine(p);
  863. return np;
  864. }
  865. /*
  866. * allocate a paragraph header, link it to the
  867. * tail of the current document
  868. */
  869. static Paragraph *
  870. Pp(ParagraphRoot *d, Line *ptr, int typ)
  871. {
  872. Paragraph *ret = calloc(sizeof *ret, 1);
  873. ret->text = ptr;
  874. ret->typ = typ;
  875. return ATTACH(*d, ret);
  876. }
  877. static Line*
  878. consume(Line *ptr, int *eaten)
  879. {
  880. Line *next;
  881. int blanks=0;
  882. for (; ptr && blankline(ptr); ptr = next, blanks++ ) {
  883. next = ptr->next;
  884. ___mkd_freeLine(ptr);
  885. }
  886. if ( ptr ) *eaten = blanks;
  887. return ptr;
  888. }
  889. /*
  890. * top-level compilation; break the document into
  891. * style, html, and source blocks with footnote links
  892. * weeded out.
  893. */
  894. static Paragraph *
  895. compile_document(Line *ptr, MMIOT *f)
  896. {
  897. ParagraphRoot d = { 0, 0 };
  898. ANCHOR(Line) source = { 0, 0 };
  899. Paragraph *p = 0;
  900. struct kw *tag;
  901. int eaten, unclosed;
  902. while ( ptr ) {
  903. if ( !(f->flags & MKD_NOHTML) && (tag = isopentag(ptr)) ) {
  904. int blocktype;
  905. /* If we encounter a html/style block, compile and save all
  906. * of the cached source BEFORE processing the html/style.
  907. */
  908. if ( T(source) ) {
  909. E(source)->next = 0;
  910. p = Pp(&d, 0, SOURCE);
  911. p->down = compile(T(source), 1, f);
  912. T(source) = E(source) = 0;
  913. }
  914. if ( f->flags & MKD_NOSTYLE )
  915. blocktype = HTML;
  916. else
  917. blocktype = strcmp(tag->id, "STYLE") == 0 ? STYLE : HTML;
  918. p = Pp(&d, ptr, blocktype);
  919. ptr = htmlblock(p, tag, &unclosed);
  920. if ( unclosed ) {
  921. p->typ = SOURCE;
  922. p->down = compile(p->text, 1, f);
  923. p->text = 0;
  924. }
  925. }
  926. else if ( isfootnote(ptr) ) {
  927. /* footnotes, like cats, sleep anywhere; pull them
  928. * out of the input stream and file them away for
  929. * later processing
  930. */
  931. ptr = consume(addfootnote(ptr, f), &eaten);
  932. }
  933. else {
  934. /* source; cache it up to wait for eof or the
  935. * next html/style block
  936. */
  937. ATTACH(source,ptr);
  938. ptr = ptr->next;
  939. }
  940. }
  941. if ( T(source) ) {
  942. /* if there's any cached source at EOF, compile
  943. * it now.
  944. */
  945. E(source)->next = 0;
  946. p = Pp(&d, 0, SOURCE);
  947. p->down = compile(T(source), 1, f);
  948. }
  949. return T(d);
  950. }
  951. static int
  952. first_nonblank_before(Line *j, int dle)
  953. {
  954. return (j->dle < dle) ? j->dle : dle;
  955. }
  956. static int
  957. actually_a_table(MMIOT *f, Line *pp)
  958. {
  959. Line *r;
  960. int j;
  961. int c;
  962. /* tables need to be turned on */
  963. if ( f->flags & (MKD_STRICT|MKD_NOTABLES) )
  964. return 0;
  965. /* tables need three lines */
  966. if ( !(pp && pp->next && pp->next->next) ) {
  967. return 0;
  968. }
  969. /* all lines must contain |'s */
  970. for (r = pp; r; r = r->next )
  971. if ( !(r->flags & PIPECHAR) ) {
  972. return 0;
  973. }
  974. /* if the header has a leading |, all lines must have leading |'s */
  975. if ( T(pp->text)[pp->dle] == '|' ) {
  976. for ( r = pp; r; r = r->next )
  977. if ( T(r->text)[first_nonblank_before(r,pp->dle)] != '|' ) {
  978. return 0;
  979. }
  980. }
  981. /* second line must be only whitespace, -, |, or : */
  982. r = pp->next;
  983. for ( j=r->dle; j < S(r->text); ++j ) {
  984. c = T(r->text)[j];
  985. if ( !(isspace(c)||(c=='-')||(c==':')||(c=='|')) ) {
  986. return 0;
  987. }
  988. }
  989. return 1;
  990. }
  991. /*
  992. * break a collection of markdown input into
  993. * blocks of lists, code, html, and text to
  994. * be marked up.
  995. */
  996. static Paragraph *
  997. compile(Line *ptr, int toplevel, MMIOT *f)
  998. {
  999. ParagraphRoot d = { 0, 0 };
  1000. Paragraph *p = 0;
  1001. Line *r;
  1002. int para = toplevel;
  1003. int blocks = 0;
  1004. int hdr_type, list_type, list_class, indent;
  1005. ptr = consume(ptr, &para);
  1006. while ( ptr ) {
  1007. if ( iscode(ptr) ) {
  1008. p = Pp(&d, ptr, CODE);
  1009. if ( f->flags & MKD_1_COMPAT) {
  1010. /* HORRIBLE STANDARDS KLUDGE: the first line of every block
  1011. * has trailing whitespace trimmed off.
  1012. */
  1013. ___mkd_tidy(&p->text->text);
  1014. }
  1015. ptr = codeblock(p);
  1016. }
  1017. else if ( iscodefence(ptr,3,0,f->flags) && (p=fencedcodeblock(&d, &ptr, f->flags)) )
  1018. /* yay, it's already done */ ;
  1019. else if ( ishr(ptr, f->flags) ) {
  1020. p = Pp(&d, 0, HR);
  1021. r = ptr;
  1022. ptr = ptr->next;
  1023. ___mkd_freeLine(r);
  1024. }
  1025. else if ( list_class = islist(ptr, &indent, f->flags, &list_type) ) {
  1026. if ( list_class == DL ) {
  1027. p = Pp(&d, ptr, DL);
  1028. ptr = definition_block(p, indent, f, list_type);
  1029. }
  1030. else {
  1031. p = Pp(&d, ptr, list_type);
  1032. ptr = enumerated_block(p, indent, f, list_class);
  1033. }
  1034. }
  1035. else if ( isquote(ptr) ) {
  1036. p = Pp(&d, ptr, QUOTE);
  1037. ptr = quoteblock(p, f->flags);
  1038. p->down = compile(p->text, 1, f);
  1039. p->text = 0;
  1040. }
  1041. else if ( ishdr(ptr, &hdr_type, f->flags) ) {
  1042. p = Pp(&d, ptr, HDR);
  1043. ptr = headerblock(p, hdr_type);
  1044. }
  1045. else {
  1046. p = Pp(&d, ptr, MARKUP);
  1047. ptr = textblock(p, toplevel, f->flags);
  1048. /* tables are a special kind of paragraph */
  1049. if ( actually_a_table(f, p->text) )
  1050. p->typ = TABLE;
  1051. }
  1052. if ( (para||toplevel) && !p->align )
  1053. p->align = PARA;
  1054. blocks++;
  1055. para = toplevel || (blocks > 1);
  1056. ptr = consume(ptr, &para);
  1057. if ( para && !p->align )
  1058. p->align = PARA;
  1059. }
  1060. return T(d);
  1061. }
  1062. /*
  1063. * the guts of the markdown() function, ripped out so I can do
  1064. * debugging.
  1065. */
  1066. /*
  1067. * prepare and compile `text`, returning a Paragraph tree.
  1068. */
  1069. int
  1070. mkd_compile(Document *doc, DWORD flags)
  1071. {
  1072. if ( !doc )
  1073. return 0;
  1074. flags &= USER_FLAGS;
  1075. if ( doc->compiled ) {
  1076. if ( doc->ctx->flags == flags )
  1077. return 1;
  1078. else {
  1079. if ( doc->code)
  1080. ___mkd_freeParagraph(doc->code);
  1081. if ( doc->ctx->footnotes )
  1082. ___mkd_freefootnotes(doc->ctx);
  1083. }
  1084. }
  1085. doc->compiled = 1;
  1086. memset(doc->ctx, 0, sizeof(MMIOT) );
  1087. doc->ctx->ref_prefix= doc->ref_prefix;
  1088. doc->ctx->cb = &(doc->cb);
  1089. doc->ctx->flags = flags;
  1090. CREATE(doc->ctx->in);
  1091. doc->ctx->footnotes = malloc(sizeof doc->ctx->footnotes[0]);
  1092. doc->ctx->footnotes->reference = 0;
  1093. CREATE(doc->ctx->footnotes->note);
  1094. mkd_initialize();
  1095. doc->code = compile_document(T(doc->content), doc->ctx);
  1096. qsort(T(doc->ctx->footnotes->note), S(doc->ctx->footnotes->note),
  1097. sizeof T(doc->ctx->footnotes->note)[0],
  1098. (stfu)__mkd_footsort);
  1099. memset(&doc->content, 0, sizeof doc->content);
  1100. return 1;
  1101. }