markdown.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570
  1. <?php
  2. /*
  3. Copyright (c) 2009-2014 F3::Factory/Bong Cosca, All rights reserved.
  4. This file is part of the Fat-Free Framework (http://fatfree.sf.net).
  5. THE SOFTWARE AND DOCUMENTATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF
  6. ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
  7. IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR
  8. PURPOSE.
  9. Please see the license.txt file for more information.
  10. */
  11. //! Markdown-to-HTML converter
  12. class Markdown extends Prefab {
  13. protected
  14. //! Parsing rules
  15. $blocks,
  16. //! Special characters
  17. $special;
  18. /**
  19. * Process blockquote
  20. * @return string
  21. * @param $str string
  22. **/
  23. protected function _blockquote($str) {
  24. $str=preg_replace('/(?<=^|\n)\h?>\h?(.*?(?:\n+|$))/','\1',$str);
  25. return strlen($str)?
  26. ('<blockquote>'.$this->build($str).'</blockquote>'."\n\n"):'';
  27. }
  28. /**
  29. * Process whitespace-prefixed code block
  30. * @return string
  31. * @param $str string
  32. **/
  33. protected function _pre($str) {
  34. $str=preg_replace('/(?<=^|\n)(?: {4}|\t)(.+?(?:\n+|$))/','\1',
  35. $this->esc($str));
  36. return strlen($str)?
  37. ('<pre><code>'.
  38. $this->esc($this->snip($str)).
  39. '</code></pre>'."\n\n"):
  40. '';
  41. }
  42. /**
  43. * Process fenced code block
  44. * @return string
  45. * @param $hint string
  46. * @param $str string
  47. **/
  48. protected function _fence($hint,$str) {
  49. $str=$this->snip($str);
  50. $fw=Base::instance();
  51. if ($fw->get('HIGHLIGHT')) {
  52. switch (strtolower($hint)) {
  53. case 'php':
  54. $str=$fw->highlight($str);
  55. break;
  56. case 'apache':
  57. preg_match_all('/(?<=^|\n)(\h*)'.
  58. '(?:(<\/?)(\w+)((?:\h+[^>]+)*)(>)|'.
  59. '(?:(\w+)(\h.+?)))(\h*(?:\n+|$))/',
  60. $str,$matches,PREG_SET_ORDER);
  61. $out='';
  62. foreach ($matches as $match)
  63. $out.=$match[1].
  64. ($match[3]?
  65. ('<span class="section">'.
  66. $this->esc($match[2]).$match[3].
  67. '</span>'.
  68. ($match[4]?
  69. ('<span class="data">'.
  70. $this->esc($match[4]).
  71. '</span>'):
  72. '').
  73. '<span class="section">'.
  74. $this->esc($match[5]).
  75. '</span>'):
  76. ('<span class="directive">'.
  77. $match[6].
  78. '</span>'.
  79. '<span class="data">'.
  80. $this->esc($match[7]).
  81. '</span>')).
  82. $match[8];
  83. $str='<code>'.$out.'</code>';
  84. break;
  85. case 'html':
  86. preg_match_all(
  87. '/(?:(?:<(\/?)(\w+)'.
  88. '((?:\h+(?:\w+\h*=\h*)?".+?"|[^>]+)*|'.
  89. '\h+.+?)(\h*\/?)>)|(.+?))/s',
  90. $str,$matches,PREG_SET_ORDER
  91. );
  92. $out='';
  93. foreach ($matches as $match) {
  94. if ($match[2]) {
  95. $out.='<span class="xml_tag">&lt;'.
  96. $match[1].$match[2].'</span>';
  97. if ($match[3]) {
  98. preg_match_all(
  99. '/(?:\h+(?:(?:(\w+)\h*=\h*)?'.
  100. '(".+?")|(.+)))/',
  101. $match[3],$parts,PREG_SET_ORDER
  102. );
  103. foreach ($parts as $part)
  104. $out.=' '.
  105. (empty($part[3])?
  106. ((empty($part[1])?
  107. '':
  108. ('<span class="xml_attr">'.
  109. $part[1].'</span>=')).
  110. '<span class="xml_data">'.
  111. $part[2].'</span>'):
  112. ('<span class="xml_tag">'.
  113. $part[3].'</span>'));
  114. }
  115. $out.='<span class="xml_tag">'.
  116. $match[4].'&gt;</span>';
  117. }
  118. else
  119. $out.=$this->esc($match[5]);
  120. }
  121. $str='<code>'.$out.'</code>';
  122. break;
  123. case 'ini':
  124. preg_match_all(
  125. '/(?<=^|\n)(?:'.
  126. '(;[^\n]*)|(?:<\?php.+?\?>?)|'.
  127. '(?:\[(.+?)\])|'.
  128. '(.+?)\h*=\h*'.
  129. '((?:\\\\\h*\r?\n|.+?)*)'.
  130. ')((?:\r?\n)+|$)/',
  131. $str,$matches,PREG_SET_ORDER
  132. );
  133. $out='';
  134. foreach ($matches as $match) {
  135. if ($match[1])
  136. $out.='<span class="comment">'.$match[1].
  137. '</span>';
  138. elseif ($match[2])
  139. $out.='<span class="ini_section">['.$match[2].']'.
  140. '</span>';
  141. elseif ($match[3])
  142. $out.='<span class="ini_key">'.$match[3].
  143. '</span>='.
  144. ($match[4]?
  145. ('<span class="ini_value">'.
  146. $match[4].'</span>'):'');
  147. else
  148. $out.=$match[0];
  149. if (isset($match[5]))
  150. $out.=$match[5];
  151. }
  152. $str='<code>'.$out.'</code>';
  153. break;
  154. default:
  155. $str='<code>'.$this->esc($str).'</code>';
  156. break;
  157. }
  158. }
  159. else
  160. $str='<code>'.$this->esc($str).'</code>';
  161. return '<pre>'.$str.'</pre>'."\n\n";
  162. }
  163. /**
  164. * Process horizontal rule
  165. * @return string
  166. **/
  167. protected function _hr() {
  168. return '<hr />'."\n\n";
  169. }
  170. /**
  171. * Process atx-style heading
  172. * @return string
  173. * @param $type string
  174. * @param $str string
  175. **/
  176. protected function _atx($type,$str) {
  177. $level=strlen($type);
  178. return '<h'.$level.' id="'.Web::instance()->slug($str).'">'.
  179. $this->scan($str).'</h'.$level.'>'."\n\n";
  180. }
  181. /**
  182. * Process setext-style heading
  183. * @return string
  184. * @param $str string
  185. * @param $type string
  186. **/
  187. protected function _setext($str,$type) {
  188. $level=strpos('=-',$type)+1;
  189. return '<h'.$level.' id="'.Web::instance()->slug($str).'">'.
  190. $this->scan($str).'</h'.$level.'>'."\n\n";
  191. }
  192. /**
  193. * Process ordered/unordered list
  194. * @return string
  195. * @param $str string
  196. **/
  197. protected function _li($str) {
  198. // Initialize list parser
  199. $len=strlen($str);
  200. $ptr=0;
  201. $dst='';
  202. $first=TRUE;
  203. $tight=TRUE;
  204. $type='ul';
  205. // Main loop
  206. while ($ptr<$len) {
  207. if (preg_match('/^\h*[*-](?:\h?[*-]){2,}(?:\n+|$)/',
  208. substr($str,$ptr),$match)) {
  209. $ptr+=strlen($match[0]);
  210. // Embedded horizontal rule
  211. return (strlen($dst)?
  212. ('<'.$type.'>'."\n".$dst.'</'.$type.'>'."\n\n"):'').
  213. '<hr />'."\n\n".$this->build(substr($str,$ptr));
  214. }
  215. elseif (preg_match('/(?<=^|\n)([*+-]|\d+\.)\h'.
  216. '(.+?(?:\n+|$))((?:(?: {4}|\t)+.+?(?:\n+|$))*)/s',
  217. substr($str,$ptr),$match)) {
  218. $match[3]=preg_replace('/(?<=^|\n)(?: {4}|\t)/','',$match[3]);
  219. $found=FALSE;
  220. foreach (array_slice($this->blocks,0,-1) as $regex)
  221. if (preg_match($regex,$match[3])) {
  222. $found=TRUE;
  223. break;
  224. }
  225. // List
  226. if ($first) {
  227. // First pass
  228. if (is_numeric($match[1]))
  229. $type='ol';
  230. if (preg_match('/\n{2,}$/',$match[2].
  231. ($found?'':$match[3])))
  232. // Loose structure; Use paragraphs
  233. $tight=FALSE;
  234. $first=FALSE;
  235. }
  236. // Strip leading whitespaces
  237. $ptr+=strlen($match[0]);
  238. $tmp=$this->snip($match[2].$match[3]);
  239. if ($tight) {
  240. if ($found)
  241. $tmp=$match[2].$this->build($this->snip($match[3]));
  242. }
  243. else
  244. $tmp=$this->build($tmp);
  245. $dst.='<li>'.$this->scan(trim($tmp)).'</li>'."\n";
  246. }
  247. }
  248. return strlen($dst)?
  249. ('<'.$type.'>'."\n".$dst.'</'.$type.'>'."\n\n"):'';
  250. }
  251. /**
  252. * Ignore raw HTML
  253. * @return string
  254. * @param $str string
  255. **/
  256. protected function _raw($str) {
  257. return $str;
  258. }
  259. /**
  260. * Process paragraph
  261. * @return string
  262. * @param $str string
  263. **/
  264. protected function _p($str) {
  265. $str=trim($str);
  266. if (strlen($str)) {
  267. if (preg_match('/^(.+?\n)([>#].+)$/s',$str,$parts))
  268. return $this->_p($parts[1]).$this->build($parts[2]);
  269. $self=$this;
  270. $str=preg_replace_callback(
  271. '/([^<>\[]+)?(<[\?%].+?[\?%]>|<.+?>|\[.+?\]\s*\(.+?\))|'.
  272. '(.+)/s',
  273. function($expr) use($self) {
  274. $tmp='';
  275. if (isset($expr[4]))
  276. $tmp.=$self->esc($expr[4]);
  277. else {
  278. if (isset($expr[1]))
  279. $tmp.=$self->esc($expr[1]);
  280. $tmp.=$expr[2];
  281. if (isset($expr[3]))
  282. $tmp.=$self->esc($expr[3]);
  283. }
  284. return $tmp;
  285. },
  286. $str
  287. );
  288. return '<p>'.$this->scan($str).'</p>'."\n\n";
  289. }
  290. return '';
  291. }
  292. /**
  293. * Process strong/em/strikethrough spans
  294. * @return string
  295. * @param $str string
  296. **/
  297. protected function _text($str) {
  298. $tmp='';
  299. while ($str!=$tmp)
  300. $str=preg_replace_callback(
  301. '/(?<!\\\\)([*_]{1,3})(.*?)(?!\\\\)\1(?=[\s[:punct:]]|$)/',
  302. function($expr) {
  303. switch (strlen($expr[1])) {
  304. case 1:
  305. return '<em>'.$expr[2].'</em>';
  306. case 2:
  307. return '<strong>'.$expr[2].'</strong>';
  308. case 3:
  309. return '<strong><em>'.$expr[2].'</em></strong>';
  310. }
  311. },
  312. preg_replace(
  313. '/(?<!\\\\)~~(.*?)(?!\\\\)~~(?=[\s[:punct:]]|$)/',
  314. '<del>\1</del>',
  315. $tmp=$str
  316. )
  317. );
  318. return $str;
  319. }
  320. /**
  321. * Process image span
  322. * @return string
  323. * @param $str string
  324. **/
  325. protected function _img($str) {
  326. $self=$this;
  327. return preg_replace_callback(
  328. '/!(?:\[(.+?)\])?\h*\(<?(.*?)>?(?:\h*"(.*?)"\h*)?\)/',
  329. function($expr) use($self) {
  330. return '<img src="'.$expr[2].'"'.
  331. (empty($expr[1])?
  332. '':
  333. (' alt="'.$self->esc($expr[1]).'"')).
  334. (empty($expr[3])?
  335. '':
  336. (' title="'.$self->esc($expr[3]).'"')).' />';
  337. },
  338. $str
  339. );
  340. }
  341. /**
  342. * Process anchor span
  343. * @return string
  344. * @param $str string
  345. **/
  346. protected function _a($str) {
  347. $self=$this;
  348. return preg_replace_callback(
  349. '/(?<!\\\\)\[(.+?)(?!\\\\)\]\h*\(<?(.*?)>?(?:\h*"(.*?)"\h*)?\)/',
  350. function($expr) use($self) {
  351. return '<a href="'.$self->esc($expr[2]).'"'.
  352. (empty($expr[3])?
  353. '':
  354. (' title="'.$self->esc($expr[3]).'"')).
  355. '>'.$self->scan($expr[1]).'</a>';
  356. },
  357. $str
  358. );
  359. }
  360. /**
  361. * Auto-convert links
  362. * @return string
  363. * @param $str string
  364. **/
  365. protected function _auto($str) {
  366. $self=$this;
  367. return preg_replace_callback(
  368. '/`.*?<(.+?)>.*?`|<(.+?)>/',
  369. function($expr) use($self) {
  370. if (empty($expr[1]) && parse_url($expr[2],PHP_URL_SCHEME)) {
  371. $expr[2]=$self->esc($expr[2]);
  372. return '<a href="'.$expr[2].'">'.$expr[2].'</a>';
  373. }
  374. return $expr[0];
  375. },
  376. $str
  377. );
  378. }
  379. /**
  380. * Process code span
  381. * @return string
  382. * @param $str string
  383. **/
  384. protected function _code($str) {
  385. $self=$this;
  386. return preg_replace_callback(
  387. '/`` (.+?) ``|(?<!\\\\)`(.+?)(?!\\\\)`/',
  388. function($expr) use($self) {
  389. return '<code>'.
  390. $self->esc(empty($expr[1])?$expr[2]:$expr[1]).'</code>';
  391. },
  392. $str
  393. );
  394. }
  395. /**
  396. * Convert characters to HTML entities
  397. * @return string
  398. * @param $str string
  399. **/
  400. function esc($str) {
  401. if (!$this->special)
  402. $this->special=array(
  403. '...'=>'&hellip;',
  404. '(tm)'=>'&trade;',
  405. '(r)'=>'&reg;',
  406. '(c)'=>'&copy;'
  407. );
  408. foreach ($this->special as $key=>$val)
  409. $str=preg_replace('/'.preg_quote($key,'/').'/i',$val,$str);
  410. return htmlspecialchars($str,ENT_COMPAT,
  411. Base::instance()->get('ENCODING'),FALSE);
  412. }
  413. /**
  414. * Reduce multiple line feeds
  415. * @return string
  416. * @param $str string
  417. **/
  418. protected function snip($str) {
  419. return preg_replace('/(?:(?<=\n)\n+)|\n+$/',"\n",$str);
  420. }
  421. /**
  422. * Scan line for convertible spans
  423. * @return string
  424. * @param $str string
  425. **/
  426. function scan($str) {
  427. $inline=array('img','a','text','auto','code');
  428. foreach ($inline as $func)
  429. $str=$this->{'_'.$func}($str);
  430. return $str;
  431. }
  432. /**
  433. * Assemble blocks
  434. * @return string
  435. * @param $str string
  436. **/
  437. protected function build($str) {
  438. if (!$this->blocks) {
  439. // Regexes for capturing entire blocks
  440. $this->blocks=array(
  441. 'blockquote'=>'/^(?:\h?>\h?.*?(?:\n+|$))+/',
  442. 'pre'=>'/^(?:(?: {4}|\t).+?(?:\n+|$))+/',
  443. 'fence'=>'/^`{3}\h*(\w+)?.*?[^\n]*\n+(.+?)`{3}[^\n]*'.
  444. '(?:\n+|$)/s',
  445. 'hr'=>'/^\h*[*_-](?:\h?[\*_-]){2,}\h*(?:\n+|$)/',
  446. 'atx'=>'/^\h*(#{1,6})\h?(.+?)\h*(?:#.*)?(?:\n+|$)/',
  447. 'setext'=>'/^\h*(.+?)\h*\n([=-])+\h*(?:\n+|$)/',
  448. 'li'=>'/^(?:(?:[*+-]|\d+\.)\h.+?(?:\n+|$)'.
  449. '(?:(?: {4}|\t)+.+?(?:\n+|$))*)+/s',
  450. 'raw'=>'/^((?:<!--.+?-->|'.
  451. '<(address|article|aside|audio|blockquote|canvas|dd|'.
  452. 'div|dl|fieldset|figcaption|figure|footer|form|h\d|'.
  453. 'header|hgroup|hr|noscript|object|ol|output|p|pre|'.
  454. 'section|table|tfoot|ul|video).*?'.
  455. '(?:\/>|>(?:(?>[^><]+)|(?R))*<\/\2>))'.
  456. '\h*(?:\n{2,}|\n*$)|<[\?%].+?[\?%]>\h*(?:\n?$|\n*))/s',
  457. 'p'=>'/^(.+?(?:\n{2,}|\n*$))/s'
  458. );
  459. }
  460. $self=$this;
  461. // Treat lines with nothing but whitespaces as empty lines
  462. $str=preg_replace('/\n\h+(?=\n)/',"\n",$str);
  463. // Initialize block parser
  464. $len=strlen($str);
  465. $ptr=0;
  466. $dst='';
  467. // Main loop
  468. while ($ptr<$len) {
  469. if (preg_match('/^ {0,3}\[([^\[\]]+)\]:\s*<?(.*?)>?\s*'.
  470. '(?:"([^\n]*)")?(?:\n+|$)/s',substr($str,$ptr),$match)) {
  471. // Reference-style link; Backtrack
  472. $ptr+=strlen($match[0]);
  473. $tmp='';
  474. // Catch line breaks in title attribute
  475. $ref=preg_replace('/\h/','\s',preg_quote($match[1],'/'));
  476. while ($dst!=$tmp) {
  477. $dst=preg_replace_callback(
  478. '/(?<!\\\\)\[('.$ref.')(?!\\\\)\]\s*\[\]|'.
  479. '(!?)(?:\[([^\[\]]+)\]\s*)?'.
  480. '(?<!\\\\)\[('.$ref.')(?!\\\\)\]/',
  481. function($expr) use($match,$self) {
  482. return (empty($expr[2]))?
  483. // Anchor
  484. ('<a href="'.$self->esc($match[2]).'"'.
  485. (empty($match[3])?
  486. '':
  487. (' title="'.
  488. $self->esc($match[3]).'"')).'>'.
  489. // Link
  490. $self->scan(
  491. empty($expr[3])?
  492. (empty($expr[1])?
  493. $expr[4]:
  494. $expr[1]):
  495. $expr[3]
  496. ).'</a>'):
  497. // Image
  498. ('<img src="'.$match[2].'"'.
  499. (empty($expr[2])?
  500. '':
  501. (' alt="'.
  502. $self->esc($expr[3]).'"')).
  503. (empty($match[3])?
  504. '':
  505. (' title="'.
  506. $self->esc($match[3]).'"')).
  507. ' />');
  508. },
  509. $tmp=$dst
  510. );
  511. }
  512. }
  513. else
  514. foreach ($this->blocks as $func=>$regex)
  515. if (preg_match($regex,substr($str,$ptr),$match)) {
  516. $ptr+=strlen($match[0]);
  517. $dst.=call_user_func_array(
  518. array($this,'_'.$func),
  519. count($match)>1?array_slice($match,1):$match
  520. );
  521. break;
  522. }
  523. }
  524. return $dst;
  525. }
  526. /**
  527. * Render HTML equivalent of markdown
  528. * @return string
  529. * @param $txt string
  530. **/
  531. function convert($txt) {
  532. $txt=preg_replace_callback(
  533. '/(<code.*?>.+?<\/code>|'.
  534. '<[^>\n]+>|\([^\n\)]+\)|"[^"\n]+")|'.
  535. '\\\\(.)/s',
  536. function($expr) {
  537. // Process escaped characters
  538. return empty($expr[1])?$expr[2]:$expr[1];
  539. },
  540. $this->build(preg_replace('/\r\n|\r/',"\n",$txt))
  541. );
  542. return $this->snip($txt);
  543. }
  544. }