PrepareHTML.js 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. /** Converts <>&" to their HTML escape sequences */
  2. function escapeHTMLEntities(str) {
  3. return String(str).replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;');
  4. }
  5. /** Restores the original source string's '<' and '>' as entered in
  6. the document, before the browser processed it as HTML. There is no
  7. way in an HTML document to distinguish an entity that was entered
  8. as an entity.*/
  9. function unescapeHTMLEntities(str) {
  10. // Process &amp; last so that we don't recursively unescape
  11. // escaped escape sequences.
  12. return str.
  13. replace(/&lt;/g, '<').
  14. replace(/&gt;/g, '>').
  15. replace(/&quot;/g, '"').
  16. replace(/&#39;/g, "'").
  17. replace(/&ndash;/g, '--').
  18. replace(/&mdash;/g, '---').
  19. replace(/&amp;/g, '&');
  20. }
  21. /**
  22. \param node A node from an HTML DOM
  23. \return A String that is a very good reconstruction of what the
  24. original source looked like before the browser tried to correct
  25. it to legal HTML.
  26. */
  27. function nodeToMarkdeepSource(node, leaveEscapes) {
  28. var source = node.innerHTML;
  29. // Markdown uses <[email protected]> e-mail syntax, which HTML parsing
  30. // will try to close by inserting the matching close tags at the end of the
  31. // document. Remove anything that looks like that and comes *after*
  32. // the first fallback style.
  33. source = source.replace(/(?:<style class="fallback">[\s\S]*?<\/style>[\s\S]*)<\/\S+@\S+\.\S+?>/gim, '');
  34. // Remove artificially inserted close tags
  35. source = source.replace(/<\/h?ttps?:.*>/gi, '');
  36. // Now try to fix the URLs themselves, which will be
  37. // transformed like this: <http: casual-effects.com="" markdeep="">
  38. source = source.replace(/<(https?): (.*?)>/gi, function (match, protocol, list) {
  39. // Remove any quotes--they wouldn't have been legal in the URL anyway
  40. var s = '<' + protocol + '://' + list.replace(/=""\s/g, '/');
  41. if (s.substring(s.length - 3) === '=""') {
  42. s = s.substring(0, s.length - 3);
  43. }
  44. // Remove any lingering quotes (since they
  45. // wouldn't have been legal in the URL)
  46. s = s.replace(/"/g, '');
  47. return s + '>';
  48. });
  49. // Remove the "fallback" style tags
  50. source = source.replace(/<style class=["']fallback["']>.*?<\/style>/gmi, '');
  51. source = unescapeHTMLEntities(source);
  52. return source;
  53. }
  54. // $ (FULL_DOCUMENT_HEAD) is replaced by the contents of the <head> found in
  55. // PreviewBlogPage.htm. This document head will overwrite whatever Markdeep does to
  56. // the head at the very end.
  57. FullDocumentHead='\
  58. $(FULL_DOCUMENT_HEAD)\
  59. ';
  60. // This code is placed at the beginning of the body before the Markdeep code.
  61. // $ (DOCUMENT_BODY_PREFIX) is everything in the body of PreviewBlogPage.htm up to
  62. // $ (ARTICLE_HTML_CODE).
  63. DocumentBodyPrefix='\
  64. $(DOCUMENT_BODY_PREFIX)\
  65. <!-- MARKDEEP_BEGIN -->\
  66. <pre class="markdeep">\
  67. ';
  68. // This code is placed at the end of the body after the Markdeep code.
  69. // $ (DOCUMENT_BODY_SUFFIX) is everything in the body of PreviewBlogPage.htm after
  70. // $ (ARTICLE_HTML_CODE).
  71. DocumentBodySuffix='\
  72. </pre>\
  73. <!-- MARKDEEP_END -->\
  74. <div>Document &lt;body&gt; code:<br/>\
  75. <textarea cols="40" rows="10" id="BodyDisplayBox"></textarea></div>\
  76. $(DOCUMENT_BODY_SUFFIX)\
  77. ';
  78. // Get the full Markdeep code from the .md.html file without the script invocation
  79. MarkdeepCode=nodeToMarkdeepSource(document.body);
  80. MarkdeepCode=MarkdeepCode.slice(0,MarkdeepCode.lastIndexOf("<script"));
  81. // Bring it into a form where it can be pasted into an HTML document
  82. SanitizedMarkdeepCode=escapeHTMLEntities(MarkdeepCode);
  83. // Surround it by the prefix and suffix code and set that as body code
  84. document.body.innerHTML=DocumentBodyPrefix+SanitizedMarkdeepCode+DocumentBodySuffix;