浏览代码

Text.Markdown. Initial Import.

Brucey 2 年之前
父节点
当前提交
50a4c52bc5
共有 61 个文件被更改,包括 32272 次插入0 次删除
  1. 109 0
      markdown.mod/common.bmx
  2. 37 0
      markdown.mod/examples/example_01.bmx
  3. 15 0
      markdown.mod/examples/example_02.bmx
  4. 185 0
      markdown.mod/glue.c
  5. 742 0
      markdown.mod/markdown.bmx
  6. 79 0
      markdown.mod/md4c/.gitignore
  7. 36 0
      markdown.mod/md4c/.travis.yml
  8. 442 0
      markdown.mod/md4c/CHANGELOG.md
  9. 59 0
      markdown.mod/md4c/CMakeLists.txt
  10. 22 0
      markdown.mod/md4c/LICENSE.md
  11. 301 0
      markdown.mod/md4c/README.md
  12. 29 0
      markdown.mod/md4c/appveyor.yml
  13. 4 0
      markdown.mod/md4c/codecov.yml
  14. 22 0
      markdown.mod/md4c/md2html/CMakeLists.txt
  15. 205 0
      markdown.mod/md4c/md2html/cmdline.c
  16. 153 0
      markdown.mod/md4c/md2html/cmdline.h
  17. 113 0
      markdown.mod/md4c/md2html/md2html.1
  18. 417 0
      markdown.mod/md4c/md2html/md2html.c
  19. 120 0
      markdown.mod/md4c/scripts/build_folding_map.py
  20. 66 0
      markdown.mod/md4c/scripts/build_punct_map.py
  21. 66 0
      markdown.mod/md4c/scripts/build_symbol_map.py
  22. 66 0
      markdown.mod/md4c/scripts/build_whitespace_map.py
  23. 70 0
      markdown.mod/md4c/scripts/coverity.sh
  24. 91 0
      markdown.mod/md4c/scripts/run-tests.sh
  25. 1584 0
      markdown.mod/md4c/scripts/unicode/CaseFolding.txt
  26. 4100 0
      markdown.mod/md4c/scripts/unicode/DerivedGeneralCategory.txt
  27. 56 0
      markdown.mod/md4c/src/CMakeLists.txt
  28. 2190 0
      markdown.mod/md4c/src/entity.c
  29. 42 0
      markdown.mod/md4c/src/entity.h
  30. 590 0
      markdown.mod/md4c/src/md4c-html.c
  31. 71 0
      markdown.mod/md4c/src/md4c-html.h
  32. 13 0
      markdown.mod/md4c/src/md4c-html.pc.in
  33. 7240 0
      markdown.mod/md4c/src/md4c.c
  34. 430 0
      markdown.mod/md4c/src/md4c.h
  35. 13 0
      markdown.mod/md4c/src/md4c.pc.in
  36. 64 0
      markdown.mod/md4c/test/LICENSE
  37. 40 0
      markdown.mod/md4c/test/cmark.py
  38. 522 0
      markdown.mod/md4c/test/coverage.txt
  39. 40 0
      markdown.mod/md4c/test/fuzz-input/commonmark.md
  40. 10 0
      markdown.mod/md4c/test/fuzz-input/gfm.md
  41. 1 0
      markdown.mod/md4c/test/fuzz-input/latex-math.md
  42. 1 0
      markdown.mod/md4c/test/fuzz-input/wiki.md
  43. 35 0
      markdown.mod/md4c/test/fuzzers/fuzz-mdhtml.c
  44. 163 0
      markdown.mod/md4c/test/heading-auto-identifier.txt
  45. 39 0
      markdown.mod/md4c/test/latex-math.txt
  46. 194 0
      markdown.mod/md4c/test/normalize.py
  47. 63 0
      markdown.mod/md4c/test/pathological_auto_ident_tests.py
  48. 128 0
      markdown.mod/md4c/test/pathological_tests.py
  49. 50 0
      markdown.mod/md4c/test/permissive-email-autolinks.txt
  50. 99 0
      markdown.mod/md4c/test/permissive-url-autolinks.txt
  51. 107 0
      markdown.mod/md4c/test/permissive-www-autolinks.txt
  52. 9756 0
      markdown.mod/md4c/test/spec.txt
  53. 144 0
      markdown.mod/md4c/test/spec_tests.py
  54. 75 0
      markdown.mod/md4c/test/strikethrough.txt
  55. 357 0
      markdown.mod/md4c/test/tables.txt
  56. 117 0
      markdown.mod/md4c/test/tasklists.txt
  57. 85 0
      markdown.mod/md4c/test/toc-mark.txt
  58. 104 0
      markdown.mod/md4c/test/toc.txt
  59. 39 0
      markdown.mod/md4c/test/underline.txt
  60. 232 0
      markdown.mod/md4c/test/wiki-links.txt
  61. 29 0
      markdown.mod/source.bmx

+ 109 - 0
markdown.mod/common.bmx

@@ -0,0 +1,109 @@
+' Copyright (c) 2023 Bruce A Henderson
+' 
+' Permission is hereby granted, free of charge, to any person obtaining a copy
+' of this software and associated documentation files (the "Software"), to deal
+' in the Software without restriction, including without limitation the rights
+' to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+' copies of the Software, and to permit persons to whom the Software is
+' furnished to do so, subject to the following conditions:
+' 
+' The above copyright notice and this permission notice shall be included in
+' all copies or substantial portions of the Software.
+' 
+' THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+' IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+' FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+' AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+' LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+' OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+' THE SOFTWARE.
+' 
+SuperStrict
+
+Import BRL.StringBuilder
+Import "source.bmx"
+
+Extern
+
+End Extern
+
+Enum EMDBlockType
+	BLOCK_DOC
+    BLOCK_QUOTE
+    BLOCK_UL
+    BLOCK_OL
+    BLOCK_LI
+    BLOCK_HR
+    BLOCK_H
+    BLOCK_CODE
+    BLOCK_HTML
+    BLOCK_P
+    BLOCK_TABLE
+    BLOCK_THEAD
+    BLOCK_TBODY
+    BLOCK_TR
+    BLOCK_TH
+    BLOCK_TD
+End Enum
+
+Enum EMDSpanType
+	SPAN_EM
+    SPAN_STRONG
+    SPAN_A
+    SPAN_IMG
+    SPAN_CODE
+    SPAN_DEL
+    SPAN_LATEXMATH
+    SPAN_LATEXMATH_DISPLAY
+    SPAN_WIKILINK
+    SPAN_U
+End Enum
+
+Enum EMDTextType
+    TEXT_NORMAL
+    TEXT_NULLCHAR
+	TEXT_BR
+    TEXT_SOFTBR
+    TEXT_ENTITY
+    TEXT_CODE
+    TEXT_HTML
+    TEXT_LATEXMATH
+End Enum
+
+Enum EMDAlign
+	ALIGN_DEFAULT
+    ALIGN_LEFT
+    ALIGN_CENTER
+    ALIGN_RIGHT
+End Enum
+
+Enum EMDFlags Flags
+	COLLAPSEWHITESPACE = $0001
+	PERMISSIVEATXHEADERS = $0002
+	PERMISSIVEURLAUTOLINKS = $0004
+	PERMISSIVEEMAILAUTOLINKS = $0008
+	NOINDENTEDCODEBLOCKS = $0010
+	NOHTMLBLOCKS = $0020
+	NOHTMLSPANS = $0040
+	TABLES = $0100
+	STRIKETHROUGH = $0200
+	PERMISSIVEWWWAUTOLINKS = $0400
+	TASKLISTS = $0800
+	LATEXMATHSPANS = $1000
+	WIKILINKS = $2000
+	UNDERLINE = $4000
+	
+	PERMISSIVEAUTOLINKS = (PERMISSIVEEMAILAUTOLINKS | PERMISSIVEURLAUTOLINKS | PERMISSIVEWWWAUTOLINKS)
+	NOHTML = (NOHTMLBLOCKS | NOHTMLSPANS)
+	
+	DIALECT_COMMONMARK = 0
+	DIALECT_GITHUB = (PERMISSIVEAUTOLINKS | TABLES | STRIKETHROUGH | TASKLISTS)
+End Enum
+
+Enum EMDHtmlFlags Flags
+    NONE
+    DEBUG = $0001
+    VERBATIM_ENTITIES = $0002
+    SKIP_UTF8_BOM = $0004
+    XHTML = $0008
+End Enum

+ 37 - 0
markdown.mod/examples/example_01.bmx

@@ -0,0 +1,37 @@
+SuperStrict
+
+Framework brl.standardio
+Import text.markdown
+
+Local parser:TParser = New TParser
+
+TMarkdown.Parse(parser, """
+Hello *World*!
+* First
+* Second
+""")
+
+
+Type TParser Implements IMDParser
+
+	Method EnterBlock:Int(block:TMDBlock)
+		Print "EnterBlock : " + block.GetType().ToString()
+	End Method
+
+	Method LeaveBlock:Int(block:TMDBlock)
+		Print "LeaveBlock : " + block.GetType().ToString()
+	End Method
+
+	Method EnterSpan:Int(span:TMDSpan)
+		Print "EnterSpan : " + span.GetType().ToString()
+	End Method
+
+	Method LeaveSpan:Int(span:TMDSpan)
+		Print "LeaveSpan : " + span.GetType().ToString()
+	End Method
+
+	Method Text:Int(text:String, textType:EMDTextType)
+		Print "Text : " + text
+	End Method
+
+End Type

+ 15 - 0
markdown.mod/examples/example_02.bmx

@@ -0,0 +1,15 @@
+SuperStrict
+
+Framework brl.standardio
+Import text.markdown
+
+Local sb:TStringBuilder = New TStringBuilder
+
+TMarkdown.ParseToHtml("""
+Hello *World*!
+* First
+* Second
+""", sb)
+
+Print sb.ToString()
+

+ 185 - 0
markdown.mod/glue.c

@@ -0,0 +1,185 @@
+/*
+  Copyright (c) 2023 Bruce A Henderson
+ 
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+  
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+  
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+  THE SOFTWARE.
+*/ 
+#include "md4c.h"
+#include "brl.mod/blitz.mod/blitz.h"
+
+
+int text_markdown_TMarkdown__EnterBlock(BBObject * obj, MD_BLOCKTYPE type, void * detail);
+int text_markdown_TMarkdown__LeaveBlock(BBObject * obj, MD_BLOCKTYPE type, void * detail);
+int text_markdown_TMarkdown__EnterSpan(BBObject * obj, MD_SPANTYPE type, void * detail);
+int text_markdown_TMarkdown__LeaveSpan(BBObject * obj, MD_SPANTYPE type, void * detail);
+int text_markdown_TMarkdown__Text(BBObject * obj, MD_TEXTTYPE type, BBString * text);
+void text_markdown_TMarkdown__HtmlOutput(const MD_CHAR * txt, MD_SIZE size, BBObject * ob );
+
+int bmx_md_cb_enter_block(MD_BLOCKTYPE type, void* detail, void* userdata) {
+    return text_markdown_TMarkdown__EnterBlock((BBObject *)userdata, type, detail);
+}
+
+int bmx_md_cb_leave_block(MD_BLOCKTYPE type, void* detail, void* userdata) {
+    return text_markdown_TMarkdown__LeaveBlock((BBObject *)userdata, type, detail);
+}
+
+int bmx_md_cb_enter_span(MD_SPANTYPE type, void* detail, void* userdata) {
+    return text_markdown_TMarkdown__EnterSpan((BBObject *)userdata, type, detail);
+}
+
+int bmx_md_cb_leave_span(MD_SPANTYPE type, void* detail, void* userdata) {
+    return text_markdown_TMarkdown__LeaveSpan((BBObject *)userdata, type, detail);
+}
+
+int bmx_md_cb_text(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata) {
+    BBString * txt = bbStringFromUTF8Bytes((unsigned char*)text, size);
+    return text_markdown_TMarkdown__Text((BBObject *)userdata, type, txt);
+}
+
+void bmx_md_cb_html_output(const MD_CHAR * txt, MD_SIZE size, void * userdata) {
+    text_markdown_TMarkdown__HtmlOutput(txt, size, (BBObject*)userdata);
+}
+
+int bmx_md_parse(BBObject * obj, BBString * txt, int flags) {
+
+    MD_PARSER parser = {
+        1,
+        flags,
+        bmx_md_cb_enter_block,
+        bmx_md_cb_leave_block,
+        bmx_md_cb_enter_span,
+        bmx_md_cb_leave_span,
+        bmx_md_cb_text,
+        0,
+        0
+    };
+
+    char * t = bbStringToUTF8String(txt);
+    MD_SIZE size = strlen(t);
+
+    int res = md_parse(t, size, &parser, obj);
+
+    bbMemFree(t);
+
+    return res;
+}
+
+int bmx_md_html(BBString * text, BBObject * output, int parserFlags, int rendererFlags, int depth, char * ph) {
+
+    MD_TOC_OPTIONS tocOptions = {
+        depth,
+        ph
+    };
+
+    char * t = bbStringToUTF8String(text);
+    MD_SIZE size = strlen(t);
+
+    int res = md_html(t, size, bmx_md_cb_html_output, output, parserFlags, rendererFlags, &tocOptions);
+
+    bbMemFree(t);
+
+    return res;
+}
+
+int bmx_md_blockul_istight(MD_BLOCK_UL_DETAIL * detail) {
+    return detail->is_tight;
+}
+
+int bmx_md_blockul_mark(MD_BLOCK_UL_DETAIL * detail) {
+    return detail->mark;
+}
+
+unsigned int bmx_md_blockol_start(MD_BLOCK_OL_DETAIL * detail) {
+    return detail->start;
+}
+
+int bmx_md_blockol_istight(MD_BLOCK_OL_DETAIL * detail) {
+    return detail->is_tight;
+}
+
+int bmx_md_blockol_markdelimiter(MD_BLOCK_OL_DETAIL * detail) {
+    return detail->mark_delimiter;
+}
+
+int bmx_md_blockli_istask(MD_BLOCK_LI_DETAIL * detail) {
+    return detail->is_task;
+}
+
+int bmx_md_blockli_taskmark(MD_BLOCK_LI_DETAIL * detail) {
+    return detail->task_mark;
+}
+
+unsigned int bmx_md_blockli_taskmarkoffset(MD_BLOCK_LI_DETAIL * detail) {
+    return detail->task_mark_offset;
+}
+
+unsigned int bmx_md_blockh_level(MD_BLOCK_H_DETAIL * detail) {
+    return detail->level;
+}
+
+MD_ATTRIBUTE bmx_md_blockh_identifier(MD_BLOCK_H_DETAIL * detail) {
+    return detail->identifier;
+}
+
+MD_ATTRIBUTE bmx_md_blockcode_info(MD_BLOCK_CODE_DETAIL * detail) {
+    return detail->info;
+}
+
+MD_ATTRIBUTE bmx_md_blockcode_lang(MD_BLOCK_CODE_DETAIL * detail) {
+    return detail->lang;
+}
+
+int bmx_md_blockcode_fencechar(MD_BLOCK_CODE_DETAIL * detail) {
+    return detail->fence_char;
+}
+
+unsigned int bmx_md_blocktable_colcount(MD_BLOCK_TABLE_DETAIL * detail) {
+    return detail->col_count;
+}
+
+unsigned int bmx_md_blocktable_headrowcount(MD_BLOCK_TABLE_DETAIL * detail) {
+    return detail->head_row_count;
+}
+
+unsigned int bmx_md_blocktable_bodyrowcount(MD_BLOCK_TABLE_DETAIL * detail) {
+    return detail->body_row_count;
+}
+
+MD_ALIGN bmx_md_blocktd_align(MD_BLOCK_TD_DETAIL * detail) {
+    return detail->align;
+}
+
+MD_ATTRIBUTE bmx_md_spana_href(MD_SPAN_A_DETAIL * detail) {
+    return detail->href;
+}
+
+MD_ATTRIBUTE bmx_md_spana_title(MD_SPAN_A_DETAIL * detail) {
+    return detail->title;
+}
+
+MD_ATTRIBUTE bmx_md_spanimg_src(MD_SPAN_IMG_DETAIL * detail) {
+    return detail->src;
+}
+
+MD_ATTRIBUTE bmx_md_spanimg_title(MD_SPAN_IMG_DETAIL * detail) {
+    return detail->title;
+}
+
+MD_ATTRIBUTE bmx_md_spanwikilink_target(MD_SPAN_WIKILINK_DETAIL * detail) {
+    return detail->target;
+}

+ 742 - 0
markdown.mod/markdown.bmx

@@ -0,0 +1,742 @@
+' Copyright (c) 2023 Bruce A Henderson
+' 
+' Permission is hereby granted, free of charge, to any person obtaining a copy
+' of this software and associated documentation files (the "Software"), to deal
+' in the Software without restriction, including without limitation the rights
+' to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+' copies of the Software, and to permit persons to whom the Software is
+' furnished to do so, subject to the following conditions:
+' 
+' The above copyright notice and this permission notice shall be included in
+' all copies or substantial portions of the Software.
+' 
+' THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+' IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+' FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+' AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+' LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+' OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+' THE SOFTWARE.
+' 
+SuperStrict
+
+ModuleInfo "Version: 1.00"
+ModuleInfo "Author: Bruce A Henderson"
+ModuleInfo "License: MIT"
+ModuleInfo "md4c - Copyright (c) Martin Mitas - https://github.com/tim-gromeyer/MarkdownEdit_md4c"
+ModuleInfo "Copyright: 2023 Bruce A Henderson"
+
+ModuleInfo "History: 1.00"
+ModuleInfo "History: Initial Release"
+
+Rem
+bbdoc: A markdown processor.
+about: Can either use a custom renderer, or generate HTML directly.
+End Rem
+Module Text.Markdown
+
+Import "common.bmx"
+
+Rem
+bbdoc: A renderer for markdown parser events.
+End Rem
+Interface IMDRenderer
+	Method EnterBlock:Int(block:TMDBlock)
+	Method LeaveBlock:Int(block:TMDBlock)
+	Method EnterSpan:Int(span:TMDSpan)
+	Method LeaveSpan:Int(span:TMDSpan)
+	Method Text:Int(text:String, textType:EMDTextType)
+End Interface
+
+Rem
+bbdoc: Html table of contents options.
+End Rem
+Type TMDHtmlTocOptions
+	Field depth:Int
+	Field placeHolder:String
+End Type
+
+Rem
+bbdoc: A Markdown text processor. 
+End Rem
+Type TMarkdown
+
+	Rem
+	bbdoc: Parses markdown @text, feeding parser events into the supplied @renderer. 
+	End Rem
+	Function Parse:Int(renderer:IMDRenderer, text:String, flags:EMDFlags = EMDFlags.DIALECT_COMMONMARK)
+		Return bmx_md_parse(renderer, text, flags)
+	End Function
+
+	Rem
+	bbdoc: Parses markdown @text, appending HTML into @output.
+	End Rem
+	Function ParseToHtml:Int(text:String, output:TStringBuilder, parserFlags:EMDFlags = EMDFlags.DIALECT_COMMONMARK, rendererFlags:EMDHtmlFlags = EMDHtmlFlags.NONE, tocOptions:TMDHtmlTocOptions = Null)
+		Local depth:Int = 0
+		Local ph:Byte Ptr
+		If tocOptions Then
+			depth = tocOptions.depth
+			ph = tocOptions.placeHolder.ToUTF8String()
+		End If
+		Local res:Int = bmx_md_html(text, output, parserFlags, rendererFlags, depth, ph)
+		MemFree(ph)
+		Return res
+	End Function
+
+	Private
+	Function _HtmlOutput(text:Byte Ptr, size:UInt, output:TStringBuilder) { nomangle }
+		output.AppendUTF8Bytes(text, Int(size))
+	End Function
+
+	Function _EnterBlock:Int(parser:IMDRenderer, blockType:EMDBlockType, detail:Byte Ptr) { nomangle }
+		Local block:TMDBlock = BlockAs(blockType, detail)
+		Return parser.EnterBlock(block)
+	End Function
+
+	Function _LeaveBlock:Int(parser:IMDRenderer, blockType:EMDBlockType, detail:Byte Ptr) { nomangle }
+		Local block:TMDBlock = BlockAs(blockType, detail)
+		Return parser.LeaveBlock(block)
+	End Function
+
+	Function _EnterSpan:Int(parser:IMDRenderer, spanType:EMDSpanType, detail:Byte Ptr) { nomangle }
+		Local span:TMDSpan = SpanAs(spanType, detail)
+		Return parser.EnterSpan(span)
+	End Function
+
+	Function _LeaveSpan:Int(parser:IMDRenderer, spanType:EMDSpanType, detail:Byte Ptr) { nomangle }
+		Local span:TMDSpan = SpanAs(spanType, detail)
+		Return parser.LeaveSpan(span)
+	End Function
+
+	Function _Text:Int(parser:IMDRenderer, textType:EMDTextType, text:String) { nomangle }
+		Return parser.Text(text, textType)
+	End Function
+
+	Function BlockAs:TMDBlock(blockType:EMDBlockType, detail:Byte Ptr)
+		Select blockType
+			Case EMDBlockType.BLOCK_DOC
+				Return New TMDBlockDoc(detail)
+			Case EMDBlockType.BLOCK_QUOTE
+				Return New TMDBlockQuote(detail)
+			Case EMDBlockType.BLOCK_UL
+				Return New TMDBlockUL(detail)
+			Case EMDBlockType.BLOCK_OL
+				Return New TMDBlockOL(detail)
+			Case EMDBlockType.BLOCK_LI
+				Return New TMDBlockLI(detail)
+			Case EMDBlockType.BLOCK_HR
+				Return New TMDBlockHR(detail)
+			Case EMDBlockType.BLOCK_H
+				Return New TMDBlockH(detail)
+			Case EMDBlockType.BLOCK_CODE
+				Return New TMDBlockCode(detail)
+			Case EMDBlockType.BLOCK_HTML
+				Return New TMDBlockHtml(detail)
+			Case EMDBlockType.BLOCK_P
+				Return New TMDBlockP(detail)
+			Case EMDBlockType.BLOCK_TABLE
+				Return New TMDBlockTable(detail)
+			Case EMDBlockType.BLOCK_THEAD
+				Return New TMDBlockTHead(detail)
+			Case EMDBlockType.BLOCK_TBODY
+				Return New TMDBlockTBody(detail)
+			Case EMDBlockType.BLOCK_TR
+				Return New TMDBlockTR(detail)
+			Case EMDBlockType.BLOCK_TH
+				Return New TMDBlockTH(detail)
+			Case EMDBlockType.BLOCK_TD
+				Return New TMDBlockTD(detail)
+		End Select
+	End Function
+
+	Function SpanAs:TMDSpan(spanType:EMDSpanType, detail:Byte Ptr)
+		Select spanType
+			Case EMDSpanType.SPAN_EM
+				Return New TMDSpanEM(detail)
+			Case EMDSpanType.SPAN_STRONG
+				Return New TMDSpanStrong(detail)
+			Case EMDSpanType.SPAN_A
+				Return New TMDSpanA(detail)
+			Case EMDSpanType.SPAN_IMG
+				Return New TMDSpanImg(detail)
+			Case EMDSpanType.SPAN_CODE
+				Return New TMDSpanCode(detail)
+			Case EMDSpanType.SPAN_DEL
+				Return New TMDSpanDel(detail)
+			Case EMDSpanType.SPAN_LATEXMATH
+				Return New TMDSpanLatexMath(detail)
+			Case EMDSpanType.SPAN_LATEXMATH_DISPLAY
+				Return New TMDSpanLatexMathDisplay(detail)
+			Case EMDSpanType.SPAN_WIKILINK
+				Return New TMDSpanWikiLink(detail)
+			Case EMDSpanType.SPAN_U
+				Return New TMDSpanU(detail)
+		End Select
+	End Function
+End Type
+
+Rem
+bbdoc: A markdown block.
+about: A block represents a part of document hierarchy structure like a paragraph or list item.
+End Rem
+Type TMDBlock Abstract
+	Field detail:Byte Ptr
+
+	Method GetType:EMDBlockType() Abstract
+End Type
+
+Rem
+bbdoc: A markdown block document body
+End Rem
+Type TMDBlockDoc Extends TMDBlock
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDBlockType() Override
+		Return EMDBlockType.BLOCK_DOC
+	End Method
+End Type
+
+Rem
+bbdoc: A markdown block block quote
+End Rem
+Type TMDBlockQuote Extends TMDBlock
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDBlockType() Override
+		Return EMDBlockType.BLOCK_QUOTE
+	End Method
+End Type
+
+Rem
+bbdoc: A markdown block unordered list
+End Rem
+Type TMDBlockUL Extends TMDBlock
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDBlockType() Override
+		Return EMDBlockType.BLOCK_UL
+	End Method
+
+	Rem
+	bbdoc: Non-zero if tight list, zero if loose.
+	End Rem
+	Method IsTight:Int()
+		Return bmx_md_blockul_istight(detail)
+	End Method
+
+	Rem
+	bbdoc: Item bullet character in Markdown source of the list.
+	about: e.g. `-`, `+`, `*`.
+	End Rem
+	Method Mark:Int()
+		Return bmx_md_blockul_mark(detail)
+	End Method
+End Type
+
+Rem
+bbdoc: A markdown block ordered list
+End Rem
+Type TMDBlockOL Extends TMDBlock
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDBlockType() Override
+		Return EMDBlockType.BLOCK_OL
+	End Method
+
+	Rem
+	bbdoc: Start index of the ordered list.
+	End Rem
+	Method Start:UInt()
+		Return bmx_md_blockol_start(detail)
+	End Method
+
+	Rem
+	bbdoc: Non-zero if tight list, zero if loose.
+	End Rem
+	Method IsTight:Int()
+		Return bmx_md_blockol_istight(detail)
+	End Method
+
+	Rem
+	bbdoc: Character delimiting the item marks in MarkDown source.
+	about: e.g. `.` or `)`
+	End Rem
+	Method MarkDelimiter:Int()
+		Return bmx_md_blockol_markdelimiter(detail)
+	End Method
+
+End Type
+
+Rem
+bbdoc: A markdown block list item
+End Rem
+Type TMDBlockLI Extends TMDBlock
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDBlockType() Override
+		Return EMDBlockType.BLOCK_LI
+	End Method
+
+	Rem
+	bbdoc: Can be non-zero only with MD_FLAG_TASKLISTS
+	End Rem
+	Method IsTask:Int()
+		Return bmx_md_blockli_istask(detail)
+	End Method
+
+	Rem
+	bbdoc: If IsTask, then one of `x`, `X` or ` `, otherwise undefined.
+	End Rem
+	Method TaskMark:Int()
+		Return bmx_md_blockli_taskmark(detail)
+	End Method
+
+	Rem
+	bbdoc: If IsTask, then offset in the input of the char between `[` and `]`.
+	End Rem
+	Method TaskMarkOffset:UInt()
+		Return bmx_md_blockli_taskmarkoffset(detail)
+	End Method
+
+End Type
+
+Rem
+bbdoc: A markdown block thematic break.
+End Rem
+Type TMDBlockHR Extends TMDBlock
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDBlockType() Override
+		Return EMDBlockType.BLOCK_HR
+	End Method
+End Type
+
+Rem
+bbdoc: A markdown header block.
+End Rem
+Type TMDBlockH Extends TMDBlock
+
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDBlockType() Override
+		Return EMDBlockType.BLOCK_H
+	End Method
+
+	Rem
+	bbdoc: Header level (1 - 6)
+	End Rem
+	Method Level:UInt()
+		Return bmx_md_blockh_level(detail)
+	End Method
+
+	Rem
+	bbdoc: An identifier, eg `{#some-id}` or autogenerated from the heading text
+	End Rem
+	Method Identifier:SMDAttribute()
+		Return bmx_md_blockh_identifier(detail)
+	End Method
+
+End Type
+
+Rem
+bbdoc: A markdown code block.
+End Rem
+Type TMDBlockCode Extends TMDBlock
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDBlockType() Override
+		Return EMDBlockType.BLOCK_CODE
+	End Method
+
+	Rem
+	bbdoc: 
+	End Rem
+	Method Info:SMDAttribute()
+		Return bmx_md_blockcode_info(detail)
+	End Method
+
+	Rem
+	bbdoc: 
+	End Rem
+	Method Lang:SMDAttribute()
+		Return bmx_md_blockcode_lang(detail)
+	End Method
+
+	Rem
+	bbdoc: 
+	End Rem
+	Method FenceChar:Int()
+		Return bmx_md_blockcode_fencechar(detail)
+	End Method
+
+End Type
+
+Rem
+bbdoc: A markdown HTML block.
+End Rem
+Type TMDBlockHtml Extends TMDBlock
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDBlockType() Override
+		Return EMDBlockType.BLOCK_HTML
+	End Method
+End Type
+
+Rem
+bbdoc: A markdown paragraph block. 
+End Rem
+Type TMDBlockP Extends TMDBlock
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDBlockType() Override
+		Return EMDBlockType.BLOCK_P
+	End Method
+End Type
+
+Rem
+bbdoc: A markdown table block.
+End Rem
+Type TMDBlockTable Extends TMDBlock
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDBlockType() Override
+		Return EMDBlockType.BLOCK_TABLE
+	End Method
+
+	Rem
+	bbdoc: The number of columns in the table.
+	End Rem
+	Method ColCount:UInt()
+		Return bmx_md_blocktable_colcount(detail)
+	End Method
+
+	Rem
+	bbdoc: The number of header rows.
+	about: Always returns 1.
+	End Rem
+	Method HeadRowCount:UInt()
+		Return bmx_md_blocktable_headrowcount(detail)
+	End Method
+
+	Rem
+	bbdoc: The number of body rows.
+	End Rem
+	Method BodyRowCount:UInt()
+		Return bmx_md_blocktable_bodyrowcount(detail)
+	End Method
+
+End Type
+
+Rem
+bbdoc: A markdown table head block.
+End Rem
+Type TMDBlockTHead Extends TMDBlock
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDBlockType() Override
+		Return EMDBlockType.BLOCK_THEAD
+	End Method
+End Type
+
+Rem
+bbdoc: A markdown table body block.
+End Rem
+Type TMDBlockTBody Extends TMDBlock
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDBlockType() Override
+		Return EMDBlockType.BLOCK_TBODY
+	End Method
+End Type
+
+Rem
+bbdoc: A markdown table row block.
+End Rem
+Type TMDBlockTR Extends TMDBlock
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDBlockType() Override
+		Return EMDBlockType.BLOCK_TR
+	End Method
+End Type
+
+Rem
+bbdoc: A markdown table header cell block 
+End Rem
+Type TMDBlockTH Extends TMDBlock
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDBlockType() Override
+		Return EMDBlockType.BLOCK_TH
+	End Method
+
+	Rem
+	bbdoc: Alignment
+	End Rem
+	Method Align:EMDAlign()
+		Return bmx_md_blocktd_align(detail)
+	End Method
+
+End Type
+
+Rem
+bbdoc: A markdown table cell block 
+End Rem
+Type TMDBlockTD Extends TMDBlock
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDBlockType() Override
+		Return EMDBlockType.BLOCK_TD
+	End Method
+
+	Rem
+	bbdoc: Alignment
+	End Rem
+	Method Align:EMDAlign()
+		Return bmx_md_blocktd_align(detail)
+	End Method
+End Type
+
+Rem
+bbdoc: A markdown span.
+End Rem
+Type TMDSpan Abstract
+	Field detail:Byte Ptr
+
+	Method GetType:EMDSpanType() Abstract
+End Type
+
+Rem
+bbdoc: A markdown emphasize span. 
+End Rem
+Type TMDSpanEM Extends TMDSpan
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDSpanType() Override
+		Return EMDSpanType.SPAN_EM
+	End Method
+End Type
+
+Rem
+bbdoc: A markdown strong span.
+End Rem
+Type TMDSpanStrong Extends TMDSpan
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDSpanType() Override
+		Return EMDSpanType.SPAN_STRONG
+	End Method
+End Type
+
+Rem
+bbdoc: A markdown hyperlink span.
+End Rem
+Type TMDSpanA Extends TMDSpan
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDSpanType() Override
+		Return EMDSpanType.SPAN_A
+	End Method
+
+	Method HRef:SMDAttribute()
+		Return bmx_md_spana_href(detail)
+	End Method
+
+	Method Title:SMDAttribute()
+		Return bmx_md_spana_title(detail)
+	End Method
+
+End Type
+
+Rem
+bbdoc: A markdown image span.
+End Rem
+Type TMDSpanImg Extends TMDSpan
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDSpanType() Override
+		Return EMDSpanType.SPAN_IMG
+	End Method
+
+	Method Src:SMDAttribute()
+		Return bmx_md_spanimg_src(detail)
+	End Method
+
+	Method Title:SMDAttribute()
+		Return bmx_md_spanimg_title(detail)
+	End Method
+
+End Type
+
+Rem
+bbdoc: A markdown code span.
+End Rem
+Type TMDSpanCode Extends TMDSpan
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDSpanType() Override
+		Return EMDSpanType.SPAN_CODE
+	End Method
+End Type
+
+Rem
+bbdoc: A markdown strikethrough span.
+End Rem
+Type TMDSpanDel Extends TMDSpan
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDSpanType() Override
+		Return EMDSpanType.SPAN_DEL
+	End Method
+End Type
+
+Rem
+bbdoc: A markdown latex math span.
+End Rem
+Type TMDSpanLatexMath Extends TMDSpan
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDSpanType() Override
+		Return EMDSpanType.SPAN_LATEXMATH
+	End Method
+End Type
+
+Rem
+bbdoc: A markdown latex math display span.
+End Rem
+Type TMDSpanLatexMathDisplay Extends TMDSpan
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDSpanType() Override
+		Return EMDSpanType.SPAN_LATEXMATH_DISPLAY
+	End Method
+End Type
+
+Rem
+bbdoc: A markdown wikilink span.
+End Rem
+Type TMDSpanWikiLink Extends TMDSpan
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDSpanType() Override
+		Return EMDSpanType.SPAN_WIKILINK
+	End Method
+
+	Method Target:SMDAttribute()
+		Return bmx_md_spanwikilink_target(detail)
+	End Method
+	
+End Type
+
+Rem
+bbdoc: A markdown underline span.
+End Rem
+Type TMDSpanU Extends TMDSpan
+	Method New(detail:Byte Ptr)
+		Self.detail = detail
+	End Method
+
+	Method GetType:EMDSpanType() Override
+		Return EMDSpanType.SPAN_U
+	End Method
+End Type
+
+Rem
+bbdoc: A markdown attribute.
+End Rem
+Struct SMDAttribute
+	Field text:Byte Ptr
+	Field size:UInt
+	Field substrTypes:EMDTextType Ptr
+	Field substrOffsets:UInt Ptr
+End Struct
+
+Private
+
+Extern
+	Function bmx_md_parse:Int(parser:IMDRenderer, text:String, flags:EMDFlags)
+	Function bmx_md_html:Int(text:String, output:TStringBuilder, parserFlags:EMDFlags, rendererFlags:EMDHtmlFlags, depth:Int, ph:Byte Ptr)
+
+	Function bmx_md_blockul_istight:Int(detail:Byte Ptr)
+	Function bmx_md_blockul_mark:Int(detail:Byte Ptr)
+
+	Function bmx_md_blockol_start:UInt(detail:Byte Ptr)
+	Function bmx_md_blockol_istight:Int(detail:Byte Ptr)
+	Function bmx_md_blockol_markdelimiter:Int(detail:Byte Ptr)
+
+	Function bmx_md_blockli_istask:Int(detail:Byte Ptr)
+	Function bmx_md_blockli_taskmark:Int(detail:Byte Ptr)
+	Function bmx_md_blockli_taskmarkoffset:UInt(detail:Byte Ptr)
+
+	Function bmx_md_blockh_level:UInt(detail:Byte Ptr)
+	Function bmx_md_blockh_identifier:SMDAttribute(detail:Byte Ptr)
+
+	Function bmx_md_blockcode_info:SMDAttribute(detail:Byte Ptr)
+	Function bmx_md_blockcode_lang:SMDAttribute(detail:Byte Ptr)
+	Function bmx_md_blockcode_fencechar:Int(detail:Byte Ptr)
+
+	Function bmx_md_blocktable_colcount:UInt(detail:Byte Ptr)
+	Function bmx_md_blocktable_headrowcount:UInt(detail:Byte Ptr)
+	Function bmx_md_blocktable_bodyrowcount:UInt(detail:Byte Ptr)
+
+	Function bmx_md_blocktd_align:EMDAlign(detail:Byte Ptr)
+	
+	Function bmx_md_spana_href:SMDAttribute(detail:Byte Ptr)
+	Function bmx_md_spana_title:SMDAttribute(detail:Byte Ptr)
+	
+	Function bmx_md_spanimg_src:SMDAttribute(detail:Byte Ptr)
+	Function bmx_md_spanimg_title:SMDAttribute(detail:Byte Ptr)
+
+	Function bmx_md_spanwikilink_target:SMDAttribute(detail:Byte Ptr)
+
+End Extern

+ 79 - 0
markdown.mod/md4c/.gitignore

@@ -0,0 +1,79 @@
+# Prerequisites
+*.d
+
+# Object files
+*.o
+*.ko
+*.obj
+*.elf
+
+# Linker output
+*.ilk
+*.map
+*.exp
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Libraries
+*.lib
+*.a
+*.la
+*.lo
+
+# Shared objects (inc. Windows DLLs)
+*.dll
+*.so
+*.so.*
+*.dylib
+
+# Executables
+*.exe
+*.out
+*.app
+*.i*86
+*.x86_64
+*.hex
+pfft
+
+# Debug files
+*.dSYM/
+*.su
+*.idb
+*.pdb
+
+# Kernel Module Compile Results
+*.mod*
+*.cmd
+.tmp_versions/
+modules.order
+Module.symvers
+Mkfile.old
+dkms.conf
+
+# Temp files
+*.swp
+*.tmp
+*~
+~*
+*.e
+
+# autotools
+/aclocal.m4
+/autom4te.cache/
+/build/
+/build-aux/
+conf*.dir/
+/config.h
+/config.log
+/config.status
+/configure
+.deps/
+.dirstamp
+Makefile
+Makefile.in
+stamp-*
+*.stamp
+*.stamp-*
+.Tpo

+ 36 - 0
markdown.mod/md4c/.travis.yml

@@ -0,0 +1,36 @@
+# YAML definition for travis-ci.com continuous integration.
+# See https://docs.travis-ci.com/user/languages/c
+arch:
+    - amd64
+    - ppc64le
+language: c
+dist: bionic
+
+compiler:
+    - gcc
+
+addons:
+    apt:
+        packages:
+            - python3   # for running tests
+            - lcov      # for generating code coverage report
+
+before_script:
+    - mkdir build
+    - cd build
+    # We enforce -Wdeclaration-after-statement because Qt project needs to
+    # build MD4C with Integrity compiler which chokes whenever a declaration
+    # is not at the beginning of a block.
+    - CFLAGS='--coverage -g -O0 -Wall -Wdeclaration-after-statement -Werror' cmake -DCMAKE_BUILD_TYPE=Debug -G 'Unix Makefiles' ..
+
+script:
+    - make VERBOSE=1
+
+after_success:
+    - ../scripts/run-tests.sh
+    # Creating report
+    - lcov --directory . --capture --output-file coverage.info # capture coverage info
+    - lcov --remove coverage.info '/usr/*' --output-file coverage.info # filter out system
+    - lcov --list coverage.info # debug info
+    # Uploading report to CodeCov
+    - bash <(curl -s https://codecov.io/bash) || echo "Codecov did not collect coverage reports"

+ 442 - 0
markdown.mod/md4c/CHANGELOG.md

@@ -0,0 +1,442 @@
+
+# MD4C Change Log
+
+
+## Next Version (Work in Progress)
+
+Changes:
+
+ * Changes mandated by CommonMark specification 0.30.
+
+   Actually there are only very minor changes to recognition of HTML blocks:
+
+   - The tag `<textarea>` now triggers HTML block (of type 1 as per the
+     specification).
+
+   - HTML declaration (HTML block type 4) is not required to begin with an
+     upper-case ASCII character after the `<!`. Any ASCII character is now
+     allowed.
+
+   Other than that, the newest specification mainly improves test coverage and
+   clarifies its wording in some cases, without affecting the implementation.
+
+   Refer to [CommonMark
+   0.30 notes](https://github.com/commonmark/commonmark-spec/releases/tag/0.30)
+   for more info.
+
+Fixes:
+
+ * [#163](https://github.com/mity/md4c/issues/163):
+   Make HTML renderer to emit `'\n'` after the root tag when in the XHTML mode.
+
+ * [#165](https://github.com/mity/md4c/issues/165):
+   Make HTML renderer not to percent-encode `'~'` in URLs. Although it does
+   work, it's not needed, and it can actually be confusing with URLs such as
+   `http://www.example.com/~johndoe/`.
+
+ * [#167](https://github.com/mity/md4c/issues/167),
+   [#168](https://github.com/mity/md4c/issues/168):
+   Fix multiple instances of various buffer overflow bugs, found mostly using
+   a fuzz testing. Contributed by [dtldarek](https://github.com/dtldarek) and
+   [Thierry Coppey](https://github.com/TCKnet).
+
+ * [#169](https://github.com/mity/md4c/issues/169):
+   Table underline now does not require 3 characters per table column anymore.
+   One dash (optionally with a leading or tailing `:` appended or prepended)
+   is now sufficient. This improves compatibility with the GFM.
+
+ * [#172](https://github.com/mity/md4c/issues/172):
+   Fix quadratic time behavior caused by unnecessary lookup for link reference
+   definition even if the potential label contains nested brackets.
+
+ * [#173](https://github.com/mity/md4c/issues/173),
+   [#174](https://github.com/mity/md4c/issues/174):
+   Multiple bugs identified with [OSS-Fuzz](https://github.com/google/oss-fuzz)
+   were fixed.
+
+
+## Version 0.4.8
+
+Fixes:
+
+ * [#149](https://github.com/mity/md4c/issues/149):
+   A HTML block started in a container block (and not explicitly finished in
+   the block) could eat 1 line of actual contents.
+
+ * [#150](https://github.com/mity/md4c/issues/150):
+   Fix md2html utility to output proper DOCTYPE and HTML tags when `--full-html`
+   command line options is used, accordingly to the expected output format
+   (HTML or XHTML).
+
+ * [#152](https://github.com/mity/md4c/issues/152):
+   Suppress recognition of a permissive autolink if it would otherwise form a
+   complete body of an outer inline link.
+
+ * [#153](https://github.com/mity/md4c/issues/153),
+   [#154](https://github.com/mity/md4c/issues/154):
+   Set `MD_BLOCK_UL_DETAIL::mark` and `MD_BLOCK_OL_DETAIL::mark_delimiter`
+   correctly, even when the blocks are nested at the same line in a complicated
+   ways.
+
+ * [#155](https://github.com/mity/md4c/issues/155):
+   Avoid reading 1 character beyond the input size in some complex cases.
+
+
+## Version 0.4.7
+
+Changes:
+
+ * Add `MD_TABLE_DETAIL` structure into the API. The structure describes column
+   count and row count of the table, and pointer to it is passed into the
+   application-provided block callback with the `MD_BLOCK_TABLE` block type.
+
+Fixes:
+
+ * [#131](https://github.com/mity/md4c/issues/131):
+   Fix handling of a reference image nested in a reference link.
+
+ * [#135](https://github.com/mity/md4c/issues/135):
+   Handle unmatched parenthesis pairs inside a permissive URL and WWW auto-links
+   in a way more compatible with the GFM.
+
+ * [#138](https://github.com/mity/md4c/issues/138):
+   The tag `<tbody></tbody>` is now suppressed whenever the table has zero body
+   rows.
+
+ * [#139](https://github.com/mity/md4c/issues/139):
+   Recognize a list item mark even when EOF follows it.
+
+ * [#142](https://github.com/mity/md4c/issues/142):
+   Fix reference link definition label matching in a case when the label ends
+   with a Unicode character with non-trivial case folding mapping.
+
+
+## Version 0.4.6
+
+Fixes:
+
+ * [#130](https://github.com/mity/md4c/issues/130):
+   Fix `ISANYOF` macro, which could provide unexpected results when encountering
+   zero byte in the input text; in some cases leading to broken internal state
+   of the parser.
+
+   The bug could result in denial of service and possibly also to other security
+   implications. Applications are advised to update to 0.4.6.
+
+
+## Version 0.4.5
+
+Fixes:
+
+ * [#118](https://github.com/mity/md4c/issues/118):
+   Fix HTML renderer's `MD_HTML_FLAG_VERBATIM_ENTITIES` flag, exposed in the
+   `md2html` utility via `--fverbatim-entities`.
+
+ * [#124](https://github.com/mity/md4c/issues/124):
+   Fix handling of indentation of 16 or more spaces in the fenced code blocks.
+
+
+## Version 0.4.4
+
+Changes:
+
+ * Make Unicode-specific code compliant to Unicode 13.0.
+
+New features:
+
+ * The HTML renderer, developed originally as the heart of the `md2html`
+   utility, is now built as a standalone library, in order to simplify its
+   reuse in applications.
+
+ * With `MD_HTML_FLAG_SKIP_UTF8_BOM`, the HTML renderer now skips UTF-8 byte
+   order mark (BOM) if the input begins with it, before passing to the Markdown
+   parser.
+
+   `md2html` utility automatically enables the flag (unless it is custom-built
+   with `-DMD4C_USE_ASCII`).
+
+ * With `MD_HTML_FLAG_XHTML`, The HTML renderer generates XHTML instead of
+   HTML.
+
+   This effectively means `<br />` instead of `<br>`, `<hr />` instead of
+   `<hr>`, and `<img ... />` instead of `<img ...>`.
+
+   `md2html` utility now understands the command line option `-x` or `--xhtml`
+   enabling the XHTML mode.
+
+Fixes:
+
+ * [#113](https://github.com/mity/md4c/issues/113):
+   Add missing folding info data for the following Unicode characters:
+   `U+0184`, `U+018a`, `U+01b2`, `U+01b5`, `U+01f4`, `U+0372`, `U+038f`,
+   `U+1c84`, `U+1fb9`, `U+1fbb`, `U+1fd9`, `U+1fdb`, `U+1fe9`, `U+1feb`,
+   `U+1ff9`, `U+1ffb`, `U+2c7f`, `U+2ced`, `U+a77b`, `U+a792`, `U+a7c9`.
+
+   Due the bug, the link definition label matching did not work in the case
+   insensitive way for these characters.
+
+
+## Version 0.4.3
+
+New features:
+
+ * With `MD_FLAG_UNDERLINE`, spans enclosed in underscore (`_foo_`) are seen
+   as underline (`MD_SPAN_UNDERLINE`) rather than an ordinary emphasis or
+   strong emphasis.
+
+Changes:
+
+ * The implementation of wiki-links extension (with `MD_FLAG_WIKILINKS`) has
+   been simplified.
+
+    - A noticeable increase of MD4C's memory footprint introduced by the
+      extension implementation in 0.4.0 has been removed.
+    - The priority handling towards other inline elements have been unified.
+      (This affects an obscure case where syntax of an image was in place of
+      wiki-link destination made the wiki-link invalid. Now *all* inline spans
+      in the wiki-link destination, including the images, is suppressed.)
+    - The length limitation of 100 characters now always applies to wiki-link
+      destination.
+
+ * Recognition of strike-through spans (with the flag `MD_FLAG_STRIKETHROUGH`)
+   has become much stricter and, arguably, reasonable.
+
+    - Only single tildes (`~`) and double tildes (`~~`) are recognized as
+      strike-through marks. Longer ones are not anymore.
+    - The length of the opener and closer marks have to be the same.
+    - The tildes cannot open a strike-through span if a whitespace follows.
+    - The tildes cannot close a strike-through span if a whitespace precedes.
+
+   This change follows the changes of behavior in cmark-gfm some time ago, so
+   it is also beneficial from compatibility point of view.
+
+ * When building MD4C by hand instead of using its CMake-based build, the UTF-8
+   support was by default disabled, unless explicitly asked for by defining
+   a preprocessor macro `MD4C_USE_UTF8`.
+
+   This has been changed and the UTF-8 mode now becomes the default, no matter
+   how `md4c.c` is compiled. If you need to disable it and use the ASCII-only
+   mode, you have explicitly define macro `MD4C_USE_ASCII` when compiling it.
+
+   (The CMake-based build as provided in our repository explicitly asked for
+   the UTF-8 support with `-DMD4C_USE_UTF8`. I.e. if you are using MD4C library
+   built with our vanilla `CMakeLists.txt` files, this change should not affect
+   you.)
+
+Fixes:
+
+ * Fixed some string length handling in the special `MD4C_USE_UTF16` build.
+
+   (This does not affect you unless you are on Windows and explicitly define
+   the macro when building MD4C.)
+
+ * [#100](https://github.com/mity/md4c/issues/100):
+   Fixed an off-by-one error in the maximal length limit of some segments
+   of e-mail addresses used in autolinks.
+
+ * [#107](https://github.com/mity/md4c/issues/107):
+   Fix mis-detection of asterisk-encoded emphasis in some corner cases when
+   length of the opener and closer differs, as in `***foo *bar baz***`.
+
+
+## Version 0.4.2
+
+Fixes:
+
+ * [#98](https://github.com/mity/md4c/issues/98):
+   Fix mis-detection of asterisk-encoded emphasis in some corner cases when
+   length of the opener and closer differs, as in `**a *b c** d*`.
+
+
+## Version 0.4.1
+
+Unfortunately, 0.4.0 has been released with badly updated ChangeLog. Fixing
+this is the only change on 0.4.1.
+
+
+## Version 0.4.0
+
+New features:
+
+ * With `MD_FLAG_LATEXMATHSPANS`, LaTeX math spans (`$...$`) and LaTeX display
+   math spans (`$$...$$`) are now recognized. (Note though that the HTML
+   renderer outputs them verbatim in a custom `<x-equation>` tag.)
+
+   Contributed by [Tilman Roeder](https://github.com/dyedgreen).
+
+ * With `MD_FLAG_WIKILINKS`, Wiki-style links (`[[...]]`) are now recognized.
+   (Note though that the HTML renderer renders them as a custom `<x-wikilink>`
+   tag.)
+
+   Contributed by [Nils Blomqvist](https://github.com/niblo).
+
+Changes:
+
+ * Parsing of tables (with `MD_FLAG_TABLES`) is now closer to the way how
+   cmark-gfm parses tables as we do not require every row of the table to
+   contain a pipe `|` anymore.
+
+   As a consequence, paragraphs now cannot interrupt tables. A paragraph which
+   follows the table has to be delimited with a blank line.
+
+Fixes:
+
+ * [#94](https://github.com/mity/md4c/issues/94):
+   `md_build_ref_def_hashtable()`: Do not allocate more memory than strictly
+   needed.
+
+ * [#95](https://github.com/mity/md4c/issues/95):
+   `md_is_container_mark()`: Ordered list mark requires at least one digit.
+
+ * [#96](https://github.com/mity/md4c/issues/96):
+   Some fixes for link label comparison.
+
+
+## Version 0.3.4
+
+Changes:
+
+ * Make Unicode-specific code compliant to Unicode 12.1.
+
+ * Structure `MD_BLOCK_CODE_DETAIL` got new member `fenced_char`. Application
+   can use it to detect character used to form the block fences (`` ` `` or
+   `~`). In the case of indented code block, it is set to zero.
+
+Fixes:
+
+ * [#77](https://github.com/mity/md4c/issues/77):
+   Fix maximal count of digits for numerical character references, as requested
+   by CommonMark specification 0.29.
+
+ * [#78](https://github.com/mity/md4c/issues/78):
+   Fix link reference definition label matching for Unicode characters where
+   the folding mapping leads to multiple codepoints, as e.g. in `ẞ` -> `SS`.
+
+ * [#83](https://github.com/mity/md4c/issues/83):
+   Fix recognition of an empty blockquote which interrupts a paragraph.
+
+
+## Version 0.3.3
+
+Changes:
+
+ * Make permissive URL autolink and permissive WWW autolink extensions stricter.
+
+   This brings the behavior closer to GFM and mitigates risk of false positives.
+   In particular, the domain has to contain at least one dot and parenthesis
+   can be part of the link destination only if `(` and `)` are balanced.
+
+Fixes:
+
+ * [#73](https://github.com/mity/md4c/issues/73):
+   Some raw HTML inputs could lead to quadratic parsing times.
+
+ * [#74](https://github.com/mity/md4c/issues/74):
+   Fix input leading to a crash. Found by fuzzing.
+
+ * [#76](https://github.com/mity/md4c/issues/76):
+   Fix handling of parenthesis in some corner cases of permissive URL autolink
+   and permissive WWW autolink extensions.
+
+
+## Version 0.3.2
+
+Changes:
+
+ * Changes mandated by CommonMark specification 0.29.
+
+   Most importantly, the white-space trimming rules for code spans have changed.
+   At most one space/newline is trimmed from beginning/end of the code span
+   (if the code span contains some non-space contents, and if it begins and
+   ends with space at the same time). In all other cases the spaces in the code
+   span are now left intact.
+
+   Other changes in behavior are in corner cases only. Refer to [CommonMark
+   0.29 notes](https://github.com/commonmark/commonmark-spec/releases/tag/0.29)
+   for more info.
+
+Fixes:
+
+ * [#68](https://github.com/mity/md4c/issues/68):
+   Some specific HTML blocks were not recognized when EOF follows without any
+   end-of-line character.
+
+ * [#69](https://github.com/mity/md4c/issues/69):
+   Strike-through span not working correctly when its opener mark is directly
+   followed by other opener mark; or when other closer mark directly precedes
+   its closer mark.
+
+
+## Version 0.3.1
+
+Fixes:
+
+ * [#58](https://github.com/mity/md4c/issues/58),
+   [#59](https://github.com/mity/md4c/issues/59),
+   [#60](https://github.com/mity/md4c/issues/60),
+   [#63](https://github.com/mity/md4c/issues/63),
+   [#66](https://github.com/mity/md4c/issues/66):
+   Some inputs could lead to quadratic parsing times. Thanks to Anders Kaseorg
+   for finding all those issues.
+
+ * [#61](https://github.com/mity/md4c/issues/59):
+   Flag `MD_FLAG_NOHTMLSPANS` erroneously affected also recognition of
+   CommonMark autolinks.
+
+
+## Version 0.3.0
+
+New features:
+
+ * Add extension for GitHub-style task lists:
+
+   ```
+    * [x] foo
+    * [x] bar
+    * [ ] baz
+   ```
+
+   (It has to be explicitly enabled with `MD_FLAG_TASKLISTS`.)
+
+ * Added support for building as a shared library. On non-Windows platforms,
+   this is now default behavior; on Windows static library is still the default.
+   The CMake option `BUILD_SHARED_LIBS` can be used to request one or the other
+   explicitly.
+
+   Contributed by Lisandro Damián Nicanor Pérez Meyer.
+
+ * Renamed structure `MD_RENDERER` to `MD_PARSER` and refactorize its contents
+   a little bit. Note this is source-level incompatible and initialization code
+   in apps may need to be updated.
+
+   The aim of the change is to be more friendly for long-term ABI compatibility
+   we shall maintain, starting with this release.
+
+ * Added `CHANGELOG.md` (this file).
+
+ * Make sure `md_process_table_row()` reports the same count of table cells for
+   all table rows, no matter how broken the input is. The cell count is derived
+   from table underline line. Bogus cells in other rows are silently ignored.
+   Missing cells in other rows are reported as empty ones.
+
+Fixes:
+
+ * CID 1475544:
+   Calling `md_free_attribute()` on uninitialized data.
+
+ * [#47](https://github.com/mity/md4c/issues/47):
+   Using bad offsets in `md_is_entity_str()`, in some cases leading to buffer
+   overflow.
+
+ * [#51](https://github.com/mity/md4c/issues/51):
+   Segfault in `md_process_table_cell()`.
+
+ * [#53](https://github.com/mity/md4c/issues/53):
+   With `MD_FLAG_PERMISSIVEURLAUTOLINKS` or `MD_FLAG_PERMISSIVEWWWAUTOLINKS`
+   we could generate bad output for ordinary Markdown links, if a non-space
+   character immediately follows like e.g. in `[link](http://github.com)X`.
+
+
+## Version 0.2.7
+
+This was the last version before the changelog has been added.

+ 59 - 0
markdown.mod/md4c/CMakeLists.txt

@@ -0,0 +1,59 @@
+
+cmake_minimum_required(VERSION 3.4)
+project(MD4C C)
+
+set(MD_VERSION_MAJOR 0)
+set(MD_VERSION_MINOR 4)
+set(MD_VERSION_RELEASE 8)
+set(MD_VERSION "${MD_VERSION_MAJOR}.${MD_VERSION_MINOR}.${MD_VERSION_RELEASE}")
+
+set(PROJECT_VERSION "${MD_VERSION}")
+set(PROJECT_URL "https://github.com/mity/md4c")
+
+if(WIN32)
+    # On Windows, given there is no standard lib install dir etc., we rather
+    # by default build static lib.
+    option(BUILD_SHARED_LIBS "help string describing option" OFF)
+else()
+    # On Linux, MD4C is slowly being adding into some distros which prefer
+    # shared lib.
+    option(BUILD_SHARED_LIBS "help string describing option" ON)
+endif()
+
+add_definitions(
+    -DMD_VERSION_MAJOR=${MD_VERSION_MAJOR}
+    -DMD_VERSION_MINOR=${MD_VERSION_MINOR}
+    -DMD_VERSION_RELEASE=${MD_VERSION_RELEASE}
+)
+
+set(CMAKE_CONFIGURATION_TYPES Debug Release RelWithDebInfo MinSizeRel)
+if("${CMAKE_BUILD_TYPE}" STREQUAL "")
+    set(CMAKE_BUILD_TYPE $ENV{CMAKE_BUILD_TYPE})
+
+    if("${CMAKE_BUILD_TYPE}" STREQUAL "")
+        set(CMAKE_BUILD_TYPE "Release")
+    endif()
+endif()
+
+
+if(${CMAKE_C_COMPILER_ID} MATCHES GNU|Clang)
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall")
+elseif(MSVC)
+    # Disable warnings about the so-called unsecured functions:
+    add_definitions(/D_CRT_SECURE_NO_WARNINGS /W3)
+
+    # Specify proper C runtime library:
+    string(REGEX REPLACE "/M[DT]d?" "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}")
+    string(REGEX REPLACE "/M[DT]d?" "" CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}")
+    string(REGEX REPLACE "/M[DT]d?" "" CMAKE_C_FLAGS_RELWITHDEBINFO "{$CMAKE_C_FLAGS_RELWITHDEBINFO}")
+    string(REGEX REPLACE "/M[DT]d?" "" CMAKE_C_FLAGS_MINSIZEREL "${CMAKE_C_FLAGS_MINSIZEREL}")
+    set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /MTd")
+    set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /MT")
+    set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELEASE} /MT")
+    set(CMAKE_C_FLAGS_MINSIZEREL "${CMAKE_C_FLAGS_RELEASE} /MT")
+endif()
+
+include(GNUInstallDirs)
+
+add_subdirectory(src)
+add_subdirectory(md2html)

+ 22 - 0
markdown.mod/md4c/LICENSE.md

@@ -0,0 +1,22 @@
+
+# The MIT License (MIT)
+
+Copyright © 2016-2020 Martin Mitáš
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the “Software”),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+IN THE SOFTWARE.

+ 301 - 0
markdown.mod/md4c/README.md

@@ -0,0 +1,301 @@
+[![Linux Build Status (travis-ci.com)](https://img.shields.io/travis/mity/md4c/master.svg?logo=linux&label=linux%20build)](https://travis-ci.com/mity/md4c)
+[![Windows Build Status (appveyor.com)](https://img.shields.io/appveyor/ci/mity/md4c/master.svg?logo=windows&label=windows%20build)](https://ci.appveyor.com/project/mity/md4c/branch/master)
+[![Code Coverage Status (codecov.io)](https://img.shields.io/codecov/c/github/mity/md4c/master.svg?logo=codecov&label=code%20coverage)](https://codecov.io/github/mity/md4c)
+[![Coverity Scan Status](https://img.shields.io/coverity/scan/mity-md4c.svg?label=coverity%20scan)](https://scan.coverity.com/projects/mity-md4c)
+
+
+# MD4C Readme
+
+* Home: http://github.com/mity/md4c
+* Wiki: http://github.com/mity/md4c/wiki
+* Issue tracker: http://github.com/mity/md4c/issues
+
+MD4C stands for "Markdown for C" and that's exactly what this project is about.
+
+
+## What is Markdown
+
+In short, Markdown is the markup language this `README.md` file is written in.
+
+The following resources can explain more if you are unfamiliar with it:
+* [Wikipedia article](http://en.wikipedia.org/wiki/Markdown)
+* [CommonMark site](http://commonmark.org)
+
+
+## What is MD4C
+
+MD4C is Markdown parser implementation in C, with the following features:
+
+* **Compliance:** Generally, MD4C aims to be compliant to the latest version of
+  [CommonMark specification](http://spec.commonmark.org/). Currently, we are
+  fully compliant to CommonMark 0.30.
+
+* **Extensions:** MD4C supports some commonly requested and accepted extensions.
+  See below.
+
+* **Performance:** MD4C is [very fast](https://talk.commonmark.org/t/2520).
+
+* **Compactness:** MD4C parser is implemented in one source file and one header
+  file. There are no dependencies other than standard C library.
+
+* **Embedding:** MD4C parser is easy to reuse in other projects, its API is
+  very straightforward: There is actually just one function, `md_parse()`.
+
+* **Push model:** MD4C parses the complete document and calls few callback
+  functions provided by the application to inform it about a start/end of
+  every block, a start/end of every span, and with any textual contents.
+
+* **Portability:** MD4C builds and works on Windows and POSIX-compliant OSes.
+  (It should be simple to make it run also on most other platforms, at least as
+  long as the platform provides C standard library, including a heap memory
+  management.)
+
+* **Encoding:** MD4C by default expects UTF-8 encoding of the input document.
+  But it can be compiled to recognize ASCII-only control characters (i.e. to
+  disable all Unicode-specific code), or (on Windows) to expect UTF-16 (i.e.
+  what is on Windows commonly called just "Unicode"). See more details below.
+
+* **Permissive license:** MD4C is available under the [MIT license](LICENSE.md).
+
+
+## Using MD4C
+
+### Parsing Markdown
+
+If you need just to parse a Markdown document, you need to include `md4c.h`
+and link against MD4C library (`-lmd4c`); or alternatively add `md4c.[hc]`
+directly to your code base as the parser is only implemented in the single C
+source file.
+
+The main provided function is `md_parse()`. It takes a text in the Markdown
+syntax and a pointer to a structure which provides pointers to several callback
+functions.
+
+As `md_parse()` processes the input, it calls the callbacks (when entering or
+leaving any Markdown block or span; and when outputting any textual content of
+the document), allowing application to convert it into another format or render
+it onto the screen.
+
+
+### Converting to HTML
+
+If you need to convert Markdown to HTML, include `md4c-html.h` and link against
+MD4C-HTML library (`-lmd4c-html`); or alternatively add the sources `md4c.[hc]`,
+`md4c-html.[hc]` and `entity.[hc]` into your code base.
+
+To convert a Markdown input, call `md_html()` function. It takes the Markdown
+input and calls the provided callback function. The callback is fed with
+chunks of the HTML output. Typical callback implementation just appends the
+chunks into a buffer or writes them to a file.
+
+
+## Markdown Extensions
+
+The default behavior is to recognize only Markdown syntax defined by the
+[CommonMark specification](http://spec.commonmark.org/).
+
+However, with appropriate flags, the behavior can be tuned to enable some
+extensions:
+
+* With the flag `MD_FLAG_COLLAPSEWHITESPACE`, a non-trivial whitespace is
+  collapsed into a single space.
+
+* With the flag `MD_FLAG_TABLES`, GitHub-style tables are supported.
+
+* With the flag `MD_FLAG_TASKLISTS`, GitHub-style task lists are supported.
+
+* With the flag `MD_FLAG_STRIKETHROUGH`, strike-through spans are enabled
+  (text enclosed in tilde marks, e.g. `~foo bar~`).
+
+* With the flag `MD_FLAG_PERMISSIVEURLAUTOLINKS` permissive URL autolinks
+  (not enclosed in `<` and `>`) are supported.
+
+* With the flag `MD_FLAG_PERMISSIVEEMAILAUTOLINKS`, permissive e-mail
+  autolinks (not enclosed in `<` and `>`) are supported.
+
+* With the flag `MD_FLAG_PERMISSIVEWWWAUTOLINKS` permissive WWW autolinks
+  without any scheme specified (e.g. `www.example.com`) are supported. MD4C
+  then assumes `http:` scheme.
+
+* With the flag `MD_FLAG_LATEXMATHSPANS` LaTeX math spans (`$...$`) and
+  LaTeX display math spans (`$$...$$`) are supported. (Note though that the
+  HTML renderer outputs them verbatim in a custom tag `<x-equation>`.)
+
+* With the flag `MD_FLAG_WIKILINKS`, wiki-style links (`[[link label]]` and
+  `[[target article|link label]]`) are supported. (Note that the HTML renderer
+  outputs them in a custom tag `<x-wikilink>`.)
+
+* With the flag `MD_FLAG_UNDERLINE`, underscore (`_`) denotes an underline
+  instead of an ordinary emphasis or strong emphasis.
+
+* With the flag `MD_FLAG_HEADINGAUTOID`, unique identifiers are generated for
+  headings. The HTML render output them as `id` in the heading tag. For example
+  `<h1 id="title">Title</h1>`.
+
+Few features of CommonMark (those some people see as mis-features) may be
+disabled with the following flags:
+
+* With the flag `MD_FLAG_NOHTMLSPANS` or `MD_FLAG_NOHTMLBLOCKS`, raw inline
+  HTML or raw HTML blocks respectively are disabled.
+
+* With the flag `MD_FLAG_NOINDENTEDCODEBLOCKS`, indented code blocks are
+  disabled.
+
+
+## Input/Output Encoding
+
+The CommonMark specification declares that any sequence of Unicode code points
+is a valid CommonMark document.
+
+But, under a closer inspection, Unicode plays any role in few very specific
+situations when parsing Markdown documents:
+
+1. For detection of word boundaries when processing emphasis and strong
+   emphasis, some classification of Unicode characters (whether it is
+   a whitespace or a punctuation) is needed.
+
+2. For (case-insensitive) matching of a link reference label with the
+   corresponding link reference definition, Unicode case folding is used.
+
+3. For translating HTML entities (e.g. `&amp;`) and numeric character
+   references (e.g. `&#35;` or `&#xcab;`) into their Unicode equivalents.
+
+   However note MD4C leaves this translation on the renderer/application; as
+   the renderer is supposed to really know output encoding and whether it
+   really needs to perform this kind of translation. (For example, when the
+   renderer outputs HTML, it may leave the entities untranslated and defer the
+   work to a web browser.)
+
+MD4C relies on this property of the CommonMark and the implementation is, to
+a large degree, encoding-agnostic. Most of MD4C code only assumes that the
+encoding of your choice is compatible with ASCII. I.e. that the codepoints
+below 128 have the same numeric values as ASCII.
+
+Any input MD4C does not understand is simply seen as part of the document text
+and sent to the renderer's callback functions unchanged.
+
+The two situations (word boundary detection and link reference matching) where
+MD4C has to understand Unicode are handled as specified by the following
+preprocessor macros (as specified at the time MD4C is being built):
+
+* If preprocessor macro `MD4C_USE_UTF8` is defined, MD4C assumes UTF-8 for the
+  word boundary detection and for the case-insensitive matching of link labels.
+
+  When none of these macros is explicitly used, this is the default behavior.
+
+* On Windows, if preprocessor macro `MD4C_USE_UTF16` is defined, MD4C uses
+  `WCHAR` instead of `char` and assumes UTF-16 encoding in those situations.
+  (UTF-16 is what Windows developers usually call just "Unicode" and what
+  Win32API generally works with.)
+
+  Note that because this macro affects also the types in `md4c.h`, you have
+  to define the macro both when building MD4C as well as when including
+  `md4c.h`.
+
+  Also note this is only supported in the parser (`md4c.[hc]`). The HTML
+  renderer does not support this and you will have to write your own custom
+  renderer to use this feature.
+
+* If preprocessor macro `MD4C_USE_ASCII` is defined, MD4C assumes nothing but
+  an ASCII input.
+
+  That effectively means that non-ASCII whitespace or punctuation characters
+  won't be recognized as such and that link reference matching will work in
+  a case-insensitive way only for ASCII letters (`[a-zA-Z]`).
+
+
+## Documentation
+
+The API of the parser is quite well documented in the comments in the `md4c.h`.
+Similarly, the markdown-to-html API is described in its header `md4c-html.h`.
+
+There is also [project wiki](http://github.com/mity/md4c/wiki) which provides
+some more comprehensive documentation. However note it is incomplete and some
+details may be somewhat outdated.
+
+
+## FAQ
+
+**Q: How does MD4C compare to a parser XY?**
+
+**A:** Some other implementations combine Markdown parser and HTML generator
+into a single entangled code hidden behind an interface which just allows the
+conversion from Markdown to HTML. They are often unusable if you want to
+process the input in any other way.
+
+Even when the parsing is available as a standalone feature, most parsers (if
+not all of them; at least within the scope of C/C++ language) are full DOM-like
+parsers: They construct abstract syntax tree (AST) representation of the whole
+Markdown document. That takes time and it leads to bigger memory footprint.
+
+It's completely fine as long as you really need it. If you don't need the full
+AST, there is a very high chance that using MD4C will be substantially faster
+and less hungry in terms of memory consumption.
+
+Last but not least, some Markdown parsers are implemented in a naive way. When
+fed with a [smartly crafted input pattern](test/pathological_tests.py), they
+may exhibit quadratic (or even worse) parsing times. What MD4C can still parse
+in a fraction of second may turn into long minutes or possibly hours with them.
+Hence, when such a naive parser is used to process an input from an untrusted
+source, the possibility of denial-of-service attacks becomes a real danger.
+
+A lot of our effort went into providing linear parsing times no matter what
+kind of crazy input MD4C parser is fed with. (If you encounter an input pattern
+which leads to a sub-linear parsing times, please do not hesitate and report it
+as a bug.)
+
+**Q: Does MD4C perform any input validation?**
+
+**A:** No. And we are proud of it. :-)
+
+CommonMark specification states that any sequence of Unicode characters is
+a valid Markdown document. (In practice, this more or less always means UTF-8
+encoding.)
+
+In other words, according to the specification, it does not matter whether some
+Markdown syntax construction is in some way broken or not. If it is broken, it
+will simply not be recognized and the parser should see it just as a verbatim
+text.
+
+MD4C takes this a step further: It sees any sequence of bytes as a valid input,
+following completely the GIGO philosophy (garbage in, garbage out). I.e. any
+ill-formed UTF-8 byte sequence will propagate to the respective callback as
+a part of the text.
+
+If you need to validate that the input is, say, a well-formed UTF-8 document,
+you have to do it on your own. The easiest way how to do this is to simply
+validate the whole document before passing it to the MD4C parser.
+
+
+## License
+
+MD4C is covered with MIT license, see the file `LICENSE.md`.
+
+
+## Links to Related Projects
+
+Ports and bindings to other languages:
+
+* [commonmark-d](https://github.com/AuburnSounds/commonmark-d):
+  Port of MD4C to D language.
+
+* [markdown-wasm](https://github.com/rsms/markdown-wasm):
+  Port of MD4C to WebAssembly.
+
+* [PyMD4C](https://github.com/dominickpastore/pymd4c):
+  Python bindings for MD4C
+
+Software using MD4C:
+
+* [QOwnNotes](https://www.qownnotes.org/):
+  A plain-text file notepad and todo-list manager with markdown support and
+  ownCloud / Nextcloud integration.
+
+* [Qt](https://www.qt.io/):
+  Cross-platform C++ GUI framework.
+
+* [Textosaurus](https://github.com/martinrotter/textosaurus):
+  Cross-platform text editor based on Qt and Scintilla.
+
+* [8th](https://8th-dev.com/):
+  Cross-platform concatenative programming language.

+ 29 - 0
markdown.mod/md4c/appveyor.yml

@@ -0,0 +1,29 @@
+# YAML definition for Appveyor.com continuous integration.
+# See http://www.appveyor.com/docs/appveyor-yml
+
+version: '{branch}-{build}'
+
+before_build:
+  - 'cmake --version'
+  - 'if "%PLATFORM%"=="x64" cmake -G "Visual Studio 12 Win64" .'
+  - 'if not "%PLATFORM%"=="x64" cmake -G "Visual Studio 12" .'
+
+build:
+  project: md4c.sln
+  verbosity: detailed
+
+skip_tags: true
+
+os:
+  - Windows Server 2012 R2
+
+configuration:
+  - Debug
+  - Release
+
+platform:
+  - x64    # 64-bit build
+  - win32  # 32-bit build
+
+artifacts:
+  - path: $(configuration)/md2html/md2html.exe

+ 4 - 0
markdown.mod/md4c/codecov.yml

@@ -0,0 +1,4 @@
+# YAML definition for codecov.io code coverage reports.
+
+ignore:
+    - "md2html"

+ 22 - 0
markdown.mod/md4c/md2html/CMakeLists.txt

@@ -0,0 +1,22 @@
+
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DDEBUG")
+
+
+# Build rules for md2html command line utility
+
+include_directories("${PROJECT_SOURCE_DIR}/src")
+add_executable(md2html cmdline.c cmdline.h md2html.c)
+target_link_libraries(md2html md4c-html)
+
+
+# Install rules
+
+install(
+    TARGETS md2html
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+    PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+)
+install(FILES "md2html.1" DESTINATION "${CMAKE_INSTALL_MANDIR}/man1")
+

+ 205 - 0
markdown.mod/md4c/md2html/cmdline.c

@@ -0,0 +1,205 @@
+/*
+ * C Reusables
+ * <http://github.com/mity/c-reusables>
+ *
+ * Copyright (c) 2017-2020 Martin Mitas
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "cmdline.h"
+
+#include <stdio.h>
+#include <string.h>
+
+
+#ifdef _WIN32
+    #define snprintf    _snprintf
+#endif
+
+
+#define CMDLINE_AUXBUF_SIZE     32
+
+
+
+static int
+cmdline_handle_short_opt_group(const CMDLINE_OPTION* options, const char* arggroup,
+        int (*callback)(int /*optval*/, const char* /*arg*/, void* /*userdata*/),
+        void* userdata)
+{
+    const CMDLINE_OPTION* opt;
+    int i;
+    int ret = 0;
+
+    for(i = 0; arggroup[i] != '\0'; i++) {
+        for(opt = options; opt->id != 0; opt++) {
+            if(arggroup[i] == opt->shortname)
+                break;
+        }
+
+        if(opt->id != 0  &&  !(opt->flags & CMDLINE_OPTFLAG_REQUIREDARG)) {
+            ret = callback(opt->id, NULL, userdata);
+        } else {
+            /* Unknown option. */
+            char badoptname[3];
+            badoptname[0] = '-';
+            badoptname[1] = arggroup[i];
+            badoptname[2] = '\0';
+            ret = callback((opt->id != 0 ? CMDLINE_OPTID_MISSINGARG : CMDLINE_OPTID_UNKNOWN),
+                            badoptname, userdata);
+        }
+
+        if(ret != 0)
+            break;
+    }
+
+    return ret;
+}
+
+int
+cmdline_read(const CMDLINE_OPTION* options, int argc, char** argv,
+        int (*callback)(int /*optval*/, const char* /*arg*/, void* /*userdata*/),
+        void* userdata)
+{
+    const CMDLINE_OPTION* opt;
+    char auxbuf[CMDLINE_AUXBUF_SIZE+1];
+    int fast_optarg_decision = 1;
+    int after_doubledash = 0;
+    int i = 1;
+    int ret = 0;
+
+    auxbuf[CMDLINE_AUXBUF_SIZE] = '\0';
+
+    /* Check whether there is any CMDLINE_OPTFLAG_COMPILERLIKE option with
+     * a name not starting with '-'. That would imply we can to check for
+     * non-option arguments only after refusing all such options. */
+    for(opt = options; opt->id != 0; opt++) {
+        if((opt->flags & CMDLINE_OPTFLAG_COMPILERLIKE)  &&  opt->longname[0] != '-')
+            fast_optarg_decision = 0;
+    }
+
+    while(i < argc) {
+        if(after_doubledash  ||  strcmp(argv[i], "-") == 0) {
+            /* Non-option argument.
+             * Standalone "-" usually means "read from stdin" or "write to
+             * stdout" so treat it always as a non-option. */
+            ret = callback(CMDLINE_OPTID_NONE, argv[i], userdata);
+        } else if(strcmp(argv[i], "--") == 0) {
+            /* End of options. All the remaining tokens are non-options
+             * even if they start with a dash. */
+            after_doubledash = 1;
+        } else if(fast_optarg_decision  &&  argv[i][0] != '-') {
+            /* Non-option argument. */
+            ret = callback(CMDLINE_OPTID_NONE, argv[i], userdata);
+        } else {
+            for(opt = options; opt->id != 0; opt++) {
+                if(opt->flags & CMDLINE_OPTFLAG_COMPILERLIKE) {
+                    size_t len = strlen(opt->longname);
+                    if(strncmp(argv[i], opt->longname, len) == 0) {
+                        /* Compiler-like option. */
+                        if(argv[i][len] != '\0')
+                            ret = callback(opt->id, argv[i] + len, userdata);
+                        else if(i+1 < argc)
+                            ret = callback(opt->id, argv[++i], userdata);
+                        else
+                            ret = callback(CMDLINE_OPTID_MISSINGARG, opt->longname, userdata);
+                        break;
+                    }
+                } else if(opt->longname != NULL  &&  strncmp(argv[i], "--", 2) == 0) {
+                    size_t len = strlen(opt->longname);
+                    if(strncmp(argv[i]+2, opt->longname, len) == 0) {
+                        /* Regular long option. */
+                        if(argv[i][2+len] == '\0') {
+                            /* with no argument provided. */
+                            if(!(opt->flags & CMDLINE_OPTFLAG_REQUIREDARG))
+                                ret = callback(opt->id, NULL, userdata);
+                            else
+                                ret = callback(CMDLINE_OPTID_MISSINGARG, argv[i], userdata);
+                            break;
+                        } else if(argv[i][2+len] == '=') {
+                            /* with an argument provided. */
+                            if(opt->flags & (CMDLINE_OPTFLAG_OPTIONALARG | CMDLINE_OPTFLAG_REQUIREDARG)) {
+                                ret = callback(opt->id, argv[i]+2+len+1, userdata);
+                            } else {
+                                snprintf(auxbuf, CMDLINE_AUXBUF_SIZE, "--%s", opt->longname);
+                                ret = callback(CMDLINE_OPTID_BOGUSARG, auxbuf, userdata);
+                            }
+                            break;
+                        } else {
+                            continue;
+                        }
+                    }
+                } else if(opt->shortname != '\0'  &&  argv[i][0] == '-') {
+                    if(argv[i][1] == opt->shortname) {
+                        /* Regular short option. */
+                        if(opt->flags & CMDLINE_OPTFLAG_REQUIREDARG) {
+                            if(argv[i][2] != '\0')
+                                ret = callback(opt->id, argv[i]+2, userdata);
+                            else if(i+1 < argc)
+                                ret = callback(opt->id, argv[++i], userdata);
+                            else
+                                ret = callback(CMDLINE_OPTID_MISSINGARG, argv[i], userdata);
+                            break;
+                        } else {
+                            ret = callback(opt->id, NULL, userdata);
+
+                            /* There might be more (argument-less) short options
+                             * grouped together. */
+                            if(ret == 0  &&  argv[i][2] != '\0')
+                                ret = cmdline_handle_short_opt_group(options, argv[i]+2, callback, userdata);
+                            break;
+                        }
+                    }
+                }
+            }
+
+            if(opt->id == 0) {  /* still not handled? */
+                if(argv[i][0] != '-') {
+                    /* Non-option argument. */
+                    ret = callback(CMDLINE_OPTID_NONE, argv[i], userdata);
+                } else {
+                    /* Unknown option. */
+                    char* badoptname = argv[i];
+
+                    if(strncmp(badoptname, "--", 2) == 0) {
+                        /* Strip any argument from the long option. */
+                        char* assignment = strchr(badoptname, '=');
+                        if(assignment != NULL) {
+                            size_t len = assignment - badoptname;
+                            if(len > CMDLINE_AUXBUF_SIZE)
+                                len = CMDLINE_AUXBUF_SIZE;
+                            strncpy(auxbuf, badoptname, len);
+                            auxbuf[len] = '\0';
+                            badoptname = auxbuf;
+                        }
+                    }
+
+                    ret = callback(CMDLINE_OPTID_UNKNOWN, badoptname, userdata);
+                }
+            }
+        }
+
+        if(ret != 0)
+            return ret;
+        i++;
+    }
+
+    return ret;
+}
+

+ 153 - 0
markdown.mod/md4c/md2html/cmdline.h

@@ -0,0 +1,153 @@
+/*
+ * C Reusables
+ * <http://github.com/mity/c-reusables>
+ *
+ * Copyright (c) 2017 Martin Mitas
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef CRE_CMDLINE_H
+#define CRE_CMDLINE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* The option may have an argument. (Affects only long option.) */
+#define CMDLINE_OPTFLAG_OPTIONALARG     0x0001
+
+/* The option must have an argument.
+ * Such short option cannot be grouped within single '-abc'. */
+#define CMDLINE_OPTFLAG_REQUIREDARG     0x0002
+
+/* Enable special compiler-like mode for the long option.
+ *
+ * Note ::shortname is not supported with this flag. CMDLINE_OPTION::shortname
+ * is silently ignored if the flag is used.
+ *
+ * With this flag, CMDLINE_OPTION::longname is treated differently as follows:
+ *
+ * 1. The option matches if the CMDLINE_OPTION::longname is the exact prefix
+ *   of the argv[i] from commandline.
+ *
+ * 2. Double dash ("--") is not automatically prepended to
+ *    CMDLINE_OPTION::longname. (If you desire any leading dash, include it
+ *    explicitly in CMDLINE_OPTION initialization.)
+ *
+ * 3. An argument (optionally after a whitespace) is required (the flag
+ *    CMDLINE_OPTFLAG_COMPILERLIKE implicitly implies also the flag
+ *    CMDLINE_OPTFLAG_REQUIREDARG).
+ *
+ *    But there is no delimiter expected (no "=" between the option and its
+ *    argument). Whitespace is optional between the option and its argument.
+ *
+ *    Intended use is for options similar to what many compilers accept.
+ *    For example:
+ *      -DDEBUG=0               (-D is the option, DEBUG=0 is the argument).
+ *      -Isrc/include           (-I is the option, src/include is the argument).
+ *      -isystem /usr/include   (-isystem is the option, /usr/include is the argument).
+ *      -lmath                  (-l is the option, math is the argument).
+ */
+#define CMDLINE_OPTFLAG_COMPILERLIKE    0x0004
+
+
+/* Special (reserved) option IDs. Do not use these for any CMDLINE_OPTION::id.
+ * See documentation of cmdline_read() to get info about their meaning.
+ */
+#define CMDLINE_OPTID_NONE              0
+#define CMDLINE_OPTID_UNKNOWN           (-0x7fffffff + 0)
+#define CMDLINE_OPTID_MISSINGARG        (-0x7fffffff + 1)
+#define CMDLINE_OPTID_BOGUSARG          (-0x7fffffff + 2)
+
+
+typedef struct CMDLINE_OPTION {
+    char shortname;         /* Short (single char) option or 0. */
+    const char* longname;   /* Long name (after "--") or NULL. */
+    int id;                 /* Non-zero ID to identify the option in the callback; or zero to denote end of options list. */
+    unsigned flags;         /* Bitmask of CMDLINE_OPTFLAG_xxxx flags. */
+} CMDLINE_OPTION;
+
+
+/* Parses all options and their arguments as specified by argc, argv accordingly
+ * with the given options (except argv[0] which is ignored).
+ *
+ * The caller must specify the list of supported options in the 1st parameter
+ * of the function. The array must end with a record whose CMDLINE_OPTION::id
+ * is zero to zero.
+ *
+ * The provided callback function is called for each option on the command
+ * line so that:
+ *
+ *   -- the "id" refers to the id of the option as specified  in options[].
+ *
+ *   -- the "arg" specifies an argument of the option or NULL if none is
+ *      provided.
+ *
+ *   -- the "userdata" just allows to pass in some caller's context into
+ *      the callback.
+ *
+ * Special cases (recognized via special "id" value) are reported to the
+ * callback as follows:
+ *
+ *   -- If id is CMDLINE_OPTID_NONE, the callback informs about a non-option
+ *      also known as a positional argument.
+ *
+ *      All argv[] tokens which are not interpreted as an options or an argument
+ *      of any option fall into this category.
+ *
+ *      Usually, programs interpret these as paths to file to process.
+ *
+ *   -- If id is CMDLINE_OPTID_UNKNOWN, the corresponding argv[] looks like an
+ *      option but it is not found in the options[] passed to cmdline_read().
+ *
+ *      The callback's parameter arg specifies the guilty command line token.
+ *      Usually, program writes down an error message and exits.
+ *
+ *   -- If id is CMDLINE_OPTID_MISSINGARG, the given option is valid but its
+ *      flag in options[] requires an argument; yet there is none on the
+ *      command line.
+ *
+ *      The callback's parameter arg specifies the guilty option name.
+ *      Usually, program writes down an error message and exits.
+ *
+ *   -- If id is CMDLINE_OPTID_BOGUSARG, the given option is valid but its
+ *      flag in options[] does not expect an argument; yet the command line
+ *      does provide one.
+ *
+ *      The callback's parameter arg specifies the guilty option name.
+ *      Usually, program writes down an error message and exits.
+ *
+ * On success, zero is returned.
+ *
+ * If the callback returns a non-zero, cmdline_read() aborts immediately and
+ * cmdline_read() propagates the same return value to the caller.
+ */
+
+int cmdline_read(const CMDLINE_OPTION* options, int argc, char** argv,
+        int (*callback)(int /*id*/, const char* /*arg*/, void* /*userdata*/),
+        void* userdata);
+
+
+#ifdef __cplusplus
+}  /* extern "C" { */
+#endif
+
+#endif  /* CRE_CMDLINE_H */

+ 113 - 0
markdown.mod/md4c/md2html/md2html.1

@@ -0,0 +1,113 @@
+.TH MD2HTML 1 "June 2019" "" "General Commands Manual"
+.nh
+.ad l
+.
+.SH NAME
+.
+md2html \- convert Markdown to HTML
+.
+.SH SYNOPSIS
+.
+.B md2html
+.RI [ OPTION ]...\&
+.RI [ FILE ]
+.
+.SH OPTIONS
+.
+.SS General options:
+.
+.TP
+.BR -o ", " --output= \fIOUTFILE\fR
+Write output to \fIOUTFILE\fR instead of \fBstdout\fR(3)
+.
+.TP
+.BR -f ", " --full-html
+Generate full HTML document, including header
+.
+.TP
+.BR -s ", " --stat
+Measure time of input parsing
+.
+.TP
+.BR -h ", " --help
+Display help and exit
+.
+.TP
+.BR -v ", " --version
+Display version and exit
+.
+.SS Markdown dialect options:
+.
+.TP
+.B --commonmark
+CommonMark (the default)
+.
+.TP
+.B --github
+Github Flavored Markdown
+.
+.PP
+Note: dialect options are equivalent to some combination of flags below.
+.
+.SS Markdown extension options:
+.
+.TP
+.B --fcollapse-whitespace
+Collapse non-trivial whitespace
+.
+.TP
+.B --fverbatim-entities
+Do not translate entities
+.
+.TP
+.B --fpermissive-atx-headers
+Allow ATX headers without delimiting space
+.
+.TP
+.B --fpermissive-url-autolinks
+Allow URL autolinks without "<" and ">" delimiters
+.
+.TP
+.B --fpermissive-www-autolinks
+Allow WWW autolinks without any scheme (e.g. "www.example.com")
+.
+.TP
+.B --fpermissive-email-autolinks
+Allow e-mail autolinks without "<", ">" and "mailto:"
+.
+.TP
+.B --fpermissive-autolinks
+Enable all 3 of the above permissive autolinks options
+.
+.TP
+.B --fno-indented-code
+Disable indented code blocks
+.
+.TP
+.B --fno-html-blocks
+Disable raw HTML blocks
+.
+.TP
+.B --fno-html-spans
+Disable raw HTML spans
+.
+.TP
+.B --fno-html
+Same as \fB--fno-html-blocks --fno-html-spans\fR
+.
+.TP
+.B --ftables
+Enable tables
+.
+.TP
+.B --fstrikethrough
+Enable strikethrough spans
+.
+.TP
+.B --ftasklists
+Enable task lists
+.
+.SH SEE ALSO
+.
+https://github.com/mity/md4c
+.

+ 417 - 0
markdown.mod/md4c/md2html/md2html.c

@@ -0,0 +1,417 @@
+/*
+ * MD4C: Markdown parser for C
+ * (http://github.com/mity/md4c)
+ *
+ * Copyright (c) 2016-2020 Martin Mitas
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "md4c-html.h"
+#include "cmdline.h"
+
+
+
+/* Global options. */
+static unsigned parser_flags = 0;
+#ifndef MD4C_USE_ASCII
+    static unsigned renderer_flags = MD_HTML_FLAG_DEBUG | MD_HTML_FLAG_SKIP_UTF8_BOM;
+#else
+    static unsigned renderer_flags = MD_HTML_FLAG_DEBUG;
+#endif
+static int want_fullhtml = 0;
+static int want_xhtml = 0;
+static int want_toc = 0;
+static int want_stat = 0;
+
+MD_TOC_OPTIONS toc_options = { 0, NULL};
+
+/*********************************
+ ***  Simple grow-able buffer  ***
+ *********************************/
+
+/* We render to a memory buffer instead of directly outputting the rendered
+ * documents, as this allows using this utility for evaluating performance
+ * of MD4C (--stat option). This allows us to measure just time of the parser,
+ * without the I/O.
+ */
+
+struct membuffer {
+    char* data;
+    size_t asize;
+    size_t size;
+};
+
+static void
+membuf_init(struct membuffer* buf, MD_SIZE new_asize)
+{
+    buf->size = 0;
+    buf->asize = new_asize;
+    buf->data = malloc(buf->asize);
+    if(buf->data == NULL) {
+        fprintf(stderr, "membuf_init: malloc() failed.\n");
+        exit(1);
+    }
+}
+
+static void
+membuf_fini(struct membuffer* buf)
+{
+    if(buf->data)
+        free(buf->data);
+}
+
+static void
+membuf_grow(struct membuffer* buf, size_t new_asize)
+{
+    buf->data = realloc(buf->data, new_asize);
+    if(buf->data == NULL) {
+        fprintf(stderr, "membuf_grow: realloc() failed.\n");
+        exit(1);
+    }
+    buf->asize = new_asize;
+}
+
+static void
+membuf_append(struct membuffer* buf, const char* data, MD_SIZE size)
+{
+    if(buf->asize < buf->size + size)
+        membuf_grow(buf, buf->size + buf->size / 2 + size);
+    memcpy(buf->data + buf->size, data, size);
+    buf->size += size;
+}
+
+
+/**********************
+ ***  Main program  ***
+ **********************/
+
+static void
+process_output(const MD_CHAR* text, MD_SIZE size, void* userdata)
+{
+    membuf_append((struct membuffer*) userdata, text, size);
+}
+
+static int
+process_file(FILE* in, FILE* out)
+{
+    size_t n;
+    struct membuffer buf_in = {0};
+    struct membuffer buf_out = {0};
+    int ret = -1;
+    clock_t t0, t1;
+
+    membuf_init(&buf_in, 32 * 1024);
+
+    /* Read the input file into a buffer. */
+    while(1) {
+        if(buf_in.size >= buf_in.asize)
+            membuf_grow(&buf_in, buf_in.asize + buf_in.asize / 2);
+
+        n = fread(buf_in.data + buf_in.size, 1, buf_in.asize - buf_in.size, in);
+        if(n == 0)
+            break;
+        buf_in.size += n;
+    }
+
+    /* Input size is good estimation of output size. Add some more reserve to
+     * deal with the HTML header/footer and tags. */
+    membuf_init(&buf_out, (MD_SIZE)(buf_in.size + buf_in.size/8 + 64));
+
+    /* Parse the document. This shall call our callbacks provided via the
+     * md_renderer_t structure. */
+    t0 = clock();
+
+    ret = md_html(buf_in.data, (MD_SIZE)buf_in.size, process_output, (void*) &buf_out,
+                    parser_flags, renderer_flags, &toc_options);
+
+    t1 = clock();
+    if(ret != 0) {
+        fprintf(stderr, "Parsing failed.\n");
+        goto out;
+    }
+
+    /* Write down the document in the HTML format. */
+    if(want_fullhtml) {
+        if(want_xhtml) {
+            fprintf(out, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
+            fprintf(out, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" "
+                            "\"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n");
+            fprintf(out, "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n");
+        } else {
+            fprintf(out, "<!DOCTYPE html>\n");
+            fprintf(out, "<html>\n");
+        }
+        fprintf(out, "<head>\n");
+        fprintf(out, "<title></title>\n");
+        fprintf(out, "<meta name=\"generator\" content=\"md2html\"%s>\n", want_xhtml ? " /" : "");
+        fprintf(out, "</head>\n");
+        fprintf(out, "<body>\n");
+    }
+
+    fwrite(buf_out.data, 1, buf_out.size, out);
+
+    if(want_fullhtml) {
+        fprintf(out, "</body>\n");
+        fprintf(out, "</html>\n");
+    }
+
+    if(want_stat) {
+        if(t0 != (clock_t)-1  &&  t1 != (clock_t)-1) {
+            double elapsed = (double)(t1 - t0) / CLOCKS_PER_SEC;
+            if (elapsed < 1)
+                fprintf(stderr, "Time spent on parsing: %7.2f ms.\n", elapsed*1e3);
+            else
+                fprintf(stderr, "Time spent on parsing: %6.3f s.\n", elapsed);
+        }
+    }
+
+    /* Success if we have reached here. */
+    ret = 0;
+
+out:
+    membuf_fini(&buf_in);
+    membuf_fini(&buf_out);
+
+    return ret;
+}
+
+
+static const CMDLINE_OPTION cmdline_options[] = {
+    { 'o', "output",                        'o', CMDLINE_OPTFLAG_REQUIREDARG },
+    { 'f', "full-html",                     'f', 0 },
+    { 'x', "xhtml",                         'x', 0 },
+    { 't', "table-of-content",              't', CMDLINE_OPTFLAG_OPTIONALARG },   
+    {   0, "toc",                           't', CMDLINE_OPTFLAG_OPTIONALARG },   
+    {   0, "toc-depth",                     'd', CMDLINE_OPTFLAG_REQUIREDARG },
+    { 's', "stat",                          's', 0 },
+    { 'h', "help",                          'h', 0 },
+    { 'v', "version",                       'v', 0 },
+
+    {  0,  "commonmark",                    'c', 0 },
+    {  0,  "github",                        'g', 0 },
+
+    {  0,  "fcollapse-whitespace",          'W', 0 },
+    {  0,  "flatex-math",                   'L', 0 },
+    {  0,  "fpermissive-atx-headers",       'A', 0 },
+    {  0,  "fpermissive-autolinks",         'V', 0 },
+    {  0,  "fpermissive-email-autolinks",   '@', 0 },
+    {  0,  "fpermissive-url-autolinks",     'U', 0 },
+    {  0,  "fpermissive-www-autolinks",     '.', 0 },
+    {  0,  "fstrikethrough",                'S', 0 },
+    {  0,  "ftables",                       'T', 0 },
+    {  0,  "ftasklists",                    'X', 0 },
+    {  0,  "funderline",                    '_', 0 },
+    {  0,  "fverbatim-entities",            'E', 0 },
+    {  0,  "fwiki-links",                   'K', 0 },
+    {  0,  "fheading-auto-id",              '#', 0 },
+
+    {  0,  "fno-html-blocks",               'F', 0 },
+    {  0,  "fno-html-spans",                'G', 0 },
+    {  0,  "fno-html",                      'H', 0 },
+    {  0,  "fno-indented-code",             'I', 0 },
+
+    {  0,  NULL,                             0,  0 }
+};
+
+static void
+usage(void)
+{
+    printf(
+        "Usage: md2html [OPTION]... [FILE]\n"
+        "Convert input FILE (or standard input) in Markdown format to HTML.\n"
+        "\n"
+        "General options:\n"
+        "  -o  --output=FILE    Output file (default is standard output)\n"
+        "  -f, --full-html      Generate full HTML document, including header\n"
+        "  -x, --xhtml          Generate XHTML instead of HTML\n"
+        "  -t, --table-of-content=MARK, --toc=MARK\n"
+        "                       Generate a table of content in place of MARK line\n"
+        "                       If no MARK is given, the toc is generated at start\n"
+        "      --toc-depth=D    Set the maximum level of heading in the table\n" 
+        "                       of content. 1 to 6. Default is 3\n"
+        "  -s, --stat           Measure time of input parsing\n"
+        "  -h, --help           Display this help and exit\n"
+        "  -v, --version        Display version and exit\n"
+        "\n"
+        "Markdown dialect options:\n"
+        "(note these are equivalent to some combinations of the flags below)\n"
+        "      --commonmark     CommonMark (this is default)\n"
+        "      --github         Github Flavored Markdown\n"
+        "\n"
+        "Markdown extension options:\n"
+        "      --fcollapse-whitespace\n"
+        "                       Collapse non-trivial whitespace\n"
+        "      --flatex-math    Enable LaTeX style mathematics spans\n"
+        "      --fpermissive-atx-headers\n"
+        "                       Allow ATX headers without delimiting space\n"
+        "      --fpermissive-url-autolinks\n"
+        "                       Allow URL autolinks without '<', '>'\n"
+        "      --fpermissive-www-autolinks\n"
+        "                       Allow WWW autolinks without any scheme (e.g. 'www.example.com')\n"
+        "      --fpermissive-email-autolinks  \n"
+        "                       Allow e-mail autolinks without '<', '>' and 'mailto:'\n"
+        "      --fpermissive-autolinks\n"
+        "                       Same as --fpermissive-url-autolinks --fpermissive-www-autolinks\n"
+        "                       --fpermissive-email-autolinks\n"
+        "      --fstrikethrough Enable strike-through spans\n"
+        "      --ftables        Enable tables\n"
+        "      --ftasklists     Enable task lists\n"
+        "      --funderline     Enable underline spans\n"
+        "      --fwiki-links    Enable wiki links\n"
+        "      --fheading-auto-id\n"
+        "                       Enable heading auto identifier\n"
+        "\n"
+        "Markdown suppression options:\n"
+        "      --fno-html-blocks\n"
+        "                       Disable raw HTML blocks\n"
+        "      --fno-html-spans\n"
+        "                       Disable raw HTML spans\n"
+        "      --fno-html       Same as --fno-html-blocks --fno-html-spans\n"
+        "      --fno-indented-code\n"
+        "                       Disable indented code blocks\n"
+        "\n"
+        "HTML generator options:\n"
+        "      --fverbatim-entities\n"
+        "                       Do not translate entities\n"
+        "\n"
+    );
+}
+
+static void
+version(void)
+{
+    printf("%d.%d.%d\n", MD_VERSION_MAJOR, MD_VERSION_MINOR, MD_VERSION_RELEASE);
+}
+
+static const char* input_path = NULL;
+static const char* output_path = NULL;
+
+static int parse_toc_depth(char const* value){
+    toc_options.depth = -1;
+    toc_options.depth = *value - '0';
+    return (toc_options.depth>0 && toc_options.depth <= 6);
+}
+
+static int
+cmdline_callback(int opt, char const* value, void* data)
+{
+    switch(opt) {
+        case 0:
+            if(input_path) {
+                fprintf(stderr, "Too many arguments. Only one input file can be specified.\n");
+                fprintf(stderr, "Use --help for more info.\n");
+                exit(1);
+            }
+            input_path = value;
+            break;
+
+        case 'o':   output_path = value; break;
+        case 'f':   want_fullhtml = 1; break;
+        case 'x':   want_xhtml = 1; renderer_flags |= MD_HTML_FLAG_XHTML; break;
+        case 't':  
+            want_toc = 1;
+            parser_flags |= MD_FLAG_HEADINGAUTOID; 
+            toc_options.toc_placeholder = value;
+            if(toc_options.depth == 0) 
+                toc_options.depth = 3;
+            break;
+        case 'd':   
+            if(!parse_toc_depth(value)){
+                fprintf(stderr, "Invalid toc-depth: %s\n", value);
+                fprintf(stderr, "Must be a number in the range 1-6\n");
+                exit(1);
+            }
+            break;
+        case 's':   want_stat = 1; break;
+        case 'h':   usage(); exit(0); break;
+        case 'v':   version(); exit(0); break;
+
+        case 'c':   parser_flags |= MD_DIALECT_COMMONMARK; break;
+        case 'g':   parser_flags |= MD_DIALECT_GITHUB; break;
+
+        case 'E':   renderer_flags |= MD_HTML_FLAG_VERBATIM_ENTITIES; break;
+        case 'A':   parser_flags |= MD_FLAG_PERMISSIVEATXHEADERS; break;
+        case 'I':   parser_flags |= MD_FLAG_NOINDENTEDCODEBLOCKS; break;
+        case 'F':   parser_flags |= MD_FLAG_NOHTMLBLOCKS; break;
+        case 'G':   parser_flags |= MD_FLAG_NOHTMLSPANS; break;
+        case 'H':   parser_flags |= MD_FLAG_NOHTML; break;
+        case 'W':   parser_flags |= MD_FLAG_COLLAPSEWHITESPACE; break;
+        case 'U':   parser_flags |= MD_FLAG_PERMISSIVEURLAUTOLINKS; break;
+        case '.':   parser_flags |= MD_FLAG_PERMISSIVEWWWAUTOLINKS; break;
+        case '@':   parser_flags |= MD_FLAG_PERMISSIVEEMAILAUTOLINKS; break;
+        case 'V':   parser_flags |= MD_FLAG_PERMISSIVEAUTOLINKS; break;
+        case 'T':   parser_flags |= MD_FLAG_TABLES; break;
+        case 'S':   parser_flags |= MD_FLAG_STRIKETHROUGH; break;
+        case 'L':   parser_flags |= MD_FLAG_LATEXMATHSPANS; break;
+        case 'K':   parser_flags |= MD_FLAG_WIKILINKS; break;
+        case 'X':   parser_flags |= MD_FLAG_TASKLISTS; break;
+        case '_':   parser_flags |= MD_FLAG_UNDERLINE; break;
+        case '#':   parser_flags |= MD_FLAG_HEADINGAUTOID; break;
+
+        default:
+            fprintf(stderr, "Illegal option: %s\n", value);
+            fprintf(stderr, "Use --help for more info.\n");
+            exit(1);
+            break;
+    }
+
+    return 0;
+}
+
+int
+main(int argc, char** argv)
+{
+    FILE* in = stdin;
+    FILE* out = stdout;
+    int ret = 0;
+
+    if(cmdline_read(cmdline_options, argc, argv, cmdline_callback, NULL) != 0) {
+        usage();
+        exit(1);
+    }
+
+    if(input_path != NULL && strcmp(input_path, "-") != 0) {
+        in = fopen(input_path, "rb");
+        if(in == NULL) {
+            fprintf(stderr, "Cannot open %s.\n", input_path);
+            exit(1);
+        }
+    }
+    if(output_path != NULL && strcmp(output_path, "-") != 0) {
+        out = fopen(output_path, "wt");
+        if(out == NULL) {
+            fprintf(stderr, "Cannot open %s.\n", output_path);
+            exit(1);
+        }
+    }
+
+    ret = process_file(in, out);
+    if(in != stdin)
+        fclose(in);
+    if(out != stdout)
+        fclose(out);
+
+    return ret;
+}

+ 120 - 0
markdown.mod/md4c/scripts/build_folding_map.py

@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import textwrap
+
+
+self_path = os.path.dirname(os.path.realpath(__file__));
+f = open(self_path + "/unicode/CaseFolding.txt", "r")
+
+status_list = [ "C", "F" ]
+
+folding_list = [ dict(), dict(), dict() ]
+
+# Filter the foldings for "full" folding.
+for line in f:
+    comment_off = line.find("#")
+    if comment_off >= 0:
+        line = line[:comment_off]
+    line = line.strip()
+    if not line:
+        continue
+
+    raw_codepoint, status, raw_mapping, ignored_tail = line.split(";", 3)
+    if not status.strip() in status_list:
+        continue
+    codepoint = int(raw_codepoint.strip(), 16)
+    mapping = [int(it, 16) for it in raw_mapping.strip().split(" ")]
+    mapping_len = len(mapping)
+
+    if mapping_len in range(1, 4):
+        folding_list[mapping_len-1][codepoint] = mapping
+    else:
+        assert(False)
+f.close()
+
+
+# If we assume that (index0 ... index-1) makes a range (as defined below),
+# check that the newly provided index is compatible with the range too; i.e.
+# verify that the range can be extended without breaking its properties.
+#
+# Currently, we can handle ranges which:
+#
+# (1) either form consecutive sequence of codepoints and which map that range
+#     to other consecutive range of codepoints (of the same length);
+#
+# (2) or a consecutive sequence of codepoints with step 2 where each codepoint
+#     CP is mapped to the codepoint CP+1
+#     (e.g. 0x1234 -> 0x1235; 0x1236 -> 0x1237; 0x1238 -> 0x1239; ...).
+#
+# Note: When the codepoints in the range are mapped to multiple codepoints,
+# only the 1st mapped codepoint is considered. All the other ones have to be
+# shared by all the mappings covered by the range.
+def is_range_compatible(folding, codepoint_list, index0, index):
+    N = index - index0
+    codepoint0 = codepoint_list[index0]
+    codepoint1 = codepoint_list[index0+1]
+    codepointN = codepoint_list[index]
+    mapping0 = folding[codepoint0]
+    mapping1 = folding[codepoint1]
+    mappingN = folding[codepointN]
+
+    # Check the range type (1):
+    if codepoint1 - codepoint0 == 1 and codepointN - codepoint0 == N                \
+            and mapping1[0] - mapping0[0] == 1 and mapping1[1:] == mapping0[1:]     \
+            and mappingN[0] - mapping0[0] == N and mappingN[1:] == mapping0[1:]:
+        return True
+
+    # Check the range type (2):
+    if codepoint1 - codepoint0 == 2 and codepointN - codepoint0 == 2 * N            \
+            and mapping0[0] - codepoint0 == 1                                       \
+            and mapping1[0] - codepoint1 == 1 and mapping1[1:] == mapping0[1:]      \
+            and mappingN[0] - codepointN == 1 and mappingN[1:] == mapping0[1:]:
+        return True
+
+    return False
+
+
+def mapping_str(list, mapping):
+    return ",".join("0x{:04x}".format(x) for x in mapping)
+
+for mapping_len in range(1, 4):
+    folding = folding_list[mapping_len-1]
+    codepoint_list = list(folding)
+
+    index0 = 0
+    count = len(folding)
+
+    records = list()
+    data_records = list()
+
+    while index0 < count:
+        index1 = index0 + 1
+        while index1 < count and is_range_compatible(folding, codepoint_list, index0, index1):
+            index1 += 1
+
+        if index1 - index0 > 2:
+            # Range of codepoints
+            records.append("R(0x{:04x},0x{:04x})".format(codepoint_list[index0], codepoint_list[index1-1]))
+            data_records.append(mapping_str(data_records, folding[codepoint_list[index0]]))
+            data_records.append(mapping_str(data_records, folding[codepoint_list[index1-1]]))
+            index0 = index1
+        else:
+            # Single codepoint
+            records.append("S(0x{:04x})".format(codepoint_list[index0]))
+            data_records.append(mapping_str(data_records, folding[codepoint_list[index0]]))
+            index0 += 1
+
+    sys.stdout.write("static const unsigned FOLD_MAP_{}[] = {{\n".format(mapping_len))
+    sys.stdout.write("\n".join(textwrap.wrap(", ".join(records), 110,
+                        initial_indent = "    ", subsequent_indent="    ")))
+    sys.stdout.write("\n};\n")
+
+    sys.stdout.write("static const unsigned FOLD_MAP_{}_DATA[] = {{\n".format(mapping_len))
+    sys.stdout.write("\n".join(textwrap.wrap(", ".join(data_records), 110,
+                        initial_indent = "    ", subsequent_indent="    ")))
+    sys.stdout.write("\n};\n")
+
+
+

+ 66 - 0
markdown.mod/md4c/scripts/build_punct_map.py

@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import textwrap
+
+
+self_path = os.path.dirname(os.path.realpath(__file__));
+f = open(self_path + "/unicode/DerivedGeneralCategory.txt", "r")
+
+codepoint_list = []
+category_list = [ "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" ]
+
+# Filter codepoints falling in the right category:
+for line in f:
+    comment_off = line.find("#")
+    if comment_off >= 0:
+        line = line[:comment_off]
+    line = line.strip()
+    if not line:
+        continue
+
+    char_range, category = line.split(";")
+    char_range = char_range.strip()
+    category = category.strip()
+
+    if not category in category_list:
+        continue
+
+    delim_off = char_range.find("..")
+    if delim_off >= 0:
+        codepoint0 = int(char_range[:delim_off], 16)
+        codepoint1 = int(char_range[delim_off+2:], 16)
+        for codepoint in range(codepoint0, codepoint1 + 1):
+            codepoint_list.append(codepoint)
+    else:
+        codepoint = int(char_range, 16)
+        codepoint_list.append(codepoint)
+f.close()
+
+
+codepoint_list.sort()
+
+
+index0 = 0
+count = len(codepoint_list)
+
+records = list()
+while index0 < count:
+    index1 = index0 + 1
+    while index1 < count and codepoint_list[index1] == codepoint_list[index1-1] + 1:
+        index1 += 1
+
+    if index1 - index0 > 1:
+        # Range of codepoints
+        records.append("R(0x{:04x},0x{:04x})".format(codepoint_list[index0], codepoint_list[index1-1]))
+    else:
+        # Single codepoint
+        records.append("S(0x{:04x})".format(codepoint_list[index0]))
+
+    index0 = index1
+
+sys.stdout.write("static const unsigned PUNCT_MAP[] = {\n")
+sys.stdout.write("\n".join(textwrap.wrap(", ".join(records), 110,
+                    initial_indent = "    ", subsequent_indent="    ")))
+sys.stdout.write("\n};\n\n")

+ 66 - 0
markdown.mod/md4c/scripts/build_symbol_map.py

@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import textwrap
+
+
+self_path = os.path.dirname(os.path.realpath(__file__));
+f = open(self_path + "/unicode/DerivedGeneralCategory.txt", "r")
+
+codepoint_list = []
+category_list = [ "Sm", "Sc", "Sk", "So" ]
+
+# Filter codepoints falling in the right category:
+for line in f:
+    comment_off = line.find("#")
+    if comment_off >= 0:
+        line = line[:comment_off]
+    line = line.strip()
+    if not line:
+        continue
+
+    char_range, category = line.split(";")
+    char_range = char_range.strip()
+    category = category.strip()
+
+    if not category in category_list:
+        continue
+
+    delim_off = char_range.find("..")
+    if delim_off >= 0:
+        codepoint0 = int(char_range[:delim_off], 16)
+        codepoint1 = int(char_range[delim_off+2:], 16)
+        for codepoint in range(codepoint0, codepoint1 + 1):
+            codepoint_list.append(codepoint)
+    else:
+        codepoint = int(char_range, 16)
+        codepoint_list.append(codepoint)
+f.close()
+
+
+codepoint_list.sort()
+
+
+index0 = 0
+count = len(codepoint_list)
+
+records = list()
+while index0 < count:
+    index1 = index0 + 1
+    while index1 < count and codepoint_list[index1] == codepoint_list[index1-1] + 1:
+        index1 += 1
+
+    if index1 - index0 > 1:
+        # Range of codepoints
+        records.append("R(0x{:04x},0x{:04x})".format(codepoint_list[index0], codepoint_list[index1-1]))
+    else:
+        # Single codepoint
+        records.append("S(0x{:04x})".format(codepoint_list[index0]))
+
+    index0 = index1
+
+sys.stdout.write("static const unsigned SYMBOL_MAP[] = {\n")
+sys.stdout.write("\n".join(textwrap.wrap(", ".join(records), 110,
+                    initial_indent = "    ", subsequent_indent="    ")))
+sys.stdout.write("\n};\n\n")

+ 66 - 0
markdown.mod/md4c/scripts/build_whitespace_map.py

@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import textwrap
+
+
+self_path = os.path.dirname(os.path.realpath(__file__));
+f = open(self_path + "/unicode/DerivedGeneralCategory.txt", "r")
+
+codepoint_list = []
+category_list = [ "Zs" ]
+
+# Filter codepoints falling in the right category:
+for line in f:
+    comment_off = line.find("#")
+    if comment_off >= 0:
+        line = line[:comment_off]
+    line = line.strip()
+    if not line:
+        continue
+
+    char_range, category = line.split(";")
+    char_range = char_range.strip()
+    category = category.strip()
+
+    if not category in category_list:
+        continue
+
+    delim_off = char_range.find("..")
+    if delim_off >= 0:
+        codepoint0 = int(char_range[:delim_off], 16)
+        codepoint1 = int(char_range[delim_off+2:], 16)
+        for codepoint in range(codepoint0, codepoint1 + 1):
+            codepoint_list.append(codepoint)
+    else:
+        codepoint = int(char_range, 16)
+        codepoint_list.append(codepoint)
+f.close()
+
+
+codepoint_list.sort()
+
+
+index0 = 0
+count = len(codepoint_list)
+
+records = list()
+while index0 < count:
+    index1 = index0 + 1
+    while index1 < count and codepoint_list[index1] == codepoint_list[index1-1] + 1:
+        index1 += 1
+
+    if index1 - index0 > 1:
+        # Range of codepoints
+        records.append("R(0x{:04x},0x{:04x})".format(codepoint_list[index0], codepoint_list[index1-1]))
+    else:
+        # Single codepoint
+        records.append("S(0x{:04x})".format(codepoint_list[index0]))
+
+    index0 = index1
+
+sys.stdout.write("static const unsigned WHITESPACE_MAP[] = {\n")
+sys.stdout.write("\n".join(textwrap.wrap(", ".join(records), 110,
+                    initial_indent = "    ", subsequent_indent="    ")))
+sys.stdout.write("\n};\n\n")

+ 70 - 0
markdown.mod/md4c/scripts/coverity.sh

@@ -0,0 +1,70 @@
+#!/bin/sh
+#
+# This scripts attempts to build the project via cov-build utility, and prepare
+# a package for uploading to the coverity scan service.
+#
+# (See http://scan.coverity.com for more info.)
+
+set -e
+
+# Check presence of coverity static analyzer.
+if ! which cov-build; then
+    echo "Utility cov-build not found in PATH."
+    exit 1
+fi
+
+# Choose a build system (ninja or GNU make).
+if which ninja; then
+    BUILD_TOOL=ninja
+    GENERATOR=Ninja
+elif which make; then
+    BUILD_TOOL=make
+    GENERATOR="MSYS Makefiles"
+else
+    echo "No suitable build system found."
+    exit 1
+fi
+
+# Choose a zip tool.
+if which 7za; then
+    MKZIP="7za a -r -mx9"
+elif which 7z; then
+    MKZIP="7z a -r -mx9"
+elif which zip; then
+    MKZIP="zip -r"
+else
+    echo "No suitable zip utility found"
+    exit 1
+fi
+
+# Change dir to project root.
+cd `dirname "$0"`/..
+
+CWD=`pwd`
+ROOT_DIR="$CWD"
+BUILD_DIR="$CWD/coverity"
+OUTPUT="$CWD/cov-int.zip"
+
+# Sanity checks.
+if [ ! -x "$ROOT_DIR/scripts/coverity.sh" ]; then
+    echo "There is some path mismatch."
+    exit 1
+fi
+if [ -e "$BUILD_DIR" ]; then
+    echo "Path $BUILD_DIR already exists. Delete it and retry."
+    exit 1
+fi
+if [ -e "$OUTPUT" ]; then
+    echo "Path $OUTPUT already exists. Delete it and retry."
+    exit 1
+fi
+
+# Build the project with the Coverity analyzes enabled.
+mkdir -p "$BUILD_DIR"
+cd "$BUILD_DIR"
+cmake -G "$GENERATOR" "$ROOT_DIR"
+cov-build --dir cov-int "$BUILD_TOOL"
+$MKZIP "$OUTPUT" "cov-int"
+cd "$ROOT_DIR"
+rm -rf "$BUILD_DIR"
+

+ 91 - 0
markdown.mod/md4c/scripts/run-tests.sh

@@ -0,0 +1,91 @@
+#!/bin/sh
+#
+# Run this script from build directory.
+
+#set -e
+
+SELF_DIR=`dirname $0`
+PROJECT_DIR="$SELF_DIR/.."
+TEST_DIR="$PROJECT_DIR/test"
+
+
+PROGRAM="md2html/md2html"
+if [ ! -x "$PROGRAM" ]; then
+    echo "Cannot find the $PROGRAM." >&2
+    echo "You have to run this script from the build directory." >&2
+    exit 1
+fi
+
+if which py >>/dev/null 2>&1; then
+    PYTHON=py
+elif which python3 >>/dev/null 2>&1; then
+    PYTHON=python3
+elif which python >>/dev/null 2>&1; then
+    if [ `python --version | awk '{print $2}' | cut -d. -f1` -ge 3 ]; then
+        PYTHON=python
+    fi
+fi
+
+echo
+echo "CommonMark specification:"
+$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/spec.txt" -p "$PROGRAM"
+
+echo
+echo "Code coverage & regressions:"
+$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/coverage.txt" -p "$PROGRAM"
+
+echo
+echo "Permissive e-mail autolinks extension:"
+$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/permissive-email-autolinks.txt" -p "$PROGRAM --fpermissive-email-autolinks"
+
+echo
+echo "Permissive URL autolinks extension:"
+$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/permissive-url-autolinks.txt" -p "$PROGRAM --fpermissive-url-autolinks"
+
+echo
+echo "WWW autolinks extension:"
+$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/permissive-www-autolinks.txt" -p "$PROGRAM --fpermissive-www-autolinks"
+
+echo
+echo "Tables extension:"
+$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/tables.txt" -p "$PROGRAM --ftables"
+
+echo
+echo "Strikethrough extension:"
+$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/strikethrough.txt" -p "$PROGRAM --fstrikethrough"
+
+echo
+echo "Task lists extension:"
+$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/tasklists.txt" -p "$PROGRAM --ftasklists"
+
+echo
+echo "LaTeX extension:"
+$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/latex-math.txt" -p "$PROGRAM --flatex-math"
+
+echo
+echo "Wiki links extension:"
+$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/wiki-links.txt" -p "$PROGRAM --fwiki-links --ftables"
+
+echo
+echo "Underline extension:"
+$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/underline.txt" -p "$PROGRAM --funderline"
+
+echo
+echo "Heading auto identifiers extension:"
+$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/heading-auto-identifier.txt" -p "$PROGRAM --fheading-auto-id"
+
+echo
+echo "Pathological input:"
+$PYTHON "$TEST_DIR/pathological_tests.py" -p "$PROGRAM"
+
+echo
+echo "Heading auto identifiers pathological input:"
+$PYTHON "$TEST_DIR/pathological_auto_ident_tests.py" -p "$PROGRAM --fheading-auto-id"
+
+echo
+echo "Table of content extension:"
+$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/toc.txt" -p "$PROGRAM --table-of-content"
+
+echo
+echo "Table of content placement extension:"
+$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/toc-mark.txt" -p "$PROGRAM --table-of-content=[[__TOC__]]"

+ 1584 - 0
markdown.mod/md4c/scripts/unicode/CaseFolding.txt

@@ -0,0 +1,1584 @@
+# CaseFolding-13.0.0.txt
+# Date: 2019-09-08, 23:30:59 GMT
+# © 2019 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+#   For documentation, see http://www.unicode.org/reports/tr44/
+#
+# Case Folding Properties
+#
+# This file is a supplement to the UnicodeData file.
+# It provides a case folding mapping generated from the Unicode Character Database.
+# If all characters are mapped according to the full mapping below, then
+# case differences (according to UnicodeData.txt and SpecialCasing.txt)
+# are eliminated.
+#
+# The data supports both implementations that require simple case foldings
+# (where string lengths don't change), and implementations that allow full case folding
+# (where string lengths may grow). Note that where they can be supported, the
+# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
+#
+# All code points not listed in this file map to themselves.
+#
+# NOTE: case folding does not preserve normalization formats!
+#
+# For information on case folding, including how to have case folding
+# preserve normalization formats, see Section 3.13 Default Case Algorithms in
+# The Unicode Standard.
+#
+# ================================================================================
+# Format
+# ================================================================================
+# The entries in this file are in the following machine-readable format:
+#
+# <code>; <status>; <mapping>; # <name>
+#
+# The status field is:
+# C: common case folding, common mappings shared by both simple and full mappings.
+# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
+# S: simple case folding, mappings to single characters where different from F.
+# T: special case for uppercase I and dotted uppercase I
+#    - For non-Turkic languages, this mapping is normally not used.
+#    - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
+#      Note that the Turkic mappings do not maintain canonical equivalence without additional processing.
+#      See the discussions of case mapping in the Unicode Standard for more information.
+#
+# Usage:
+#  A. To do a simple case folding, use the mappings with status C + S.
+#  B. To do a full case folding, use the mappings with status C + F.
+#
+#    The mappings with status T can be used or omitted depending on the desired case-folding
+#    behavior. (The default option is to exclude them.)
+#
+# =================================================================
+
+# Property: Case_Folding
+
+#  All code points not explicitly listed for Case_Folding
+#  have the value C for the status field, and the code point itself for the mapping field.
+
+# =================================================================
+0041; C; 0061; # LATIN CAPITAL LETTER A
+0042; C; 0062; # LATIN CAPITAL LETTER B
+0043; C; 0063; # LATIN CAPITAL LETTER C
+0044; C; 0064; # LATIN CAPITAL LETTER D
+0045; C; 0065; # LATIN CAPITAL LETTER E
+0046; C; 0066; # LATIN CAPITAL LETTER F
+0047; C; 0067; # LATIN CAPITAL LETTER G
+0048; C; 0068; # LATIN CAPITAL LETTER H
+0049; C; 0069; # LATIN CAPITAL LETTER I
+0049; T; 0131; # LATIN CAPITAL LETTER I
+004A; C; 006A; # LATIN CAPITAL LETTER J
+004B; C; 006B; # LATIN CAPITAL LETTER K
+004C; C; 006C; # LATIN CAPITAL LETTER L
+004D; C; 006D; # LATIN CAPITAL LETTER M
+004E; C; 006E; # LATIN CAPITAL LETTER N
+004F; C; 006F; # LATIN CAPITAL LETTER O
+0050; C; 0070; # LATIN CAPITAL LETTER P
+0051; C; 0071; # LATIN CAPITAL LETTER Q
+0052; C; 0072; # LATIN CAPITAL LETTER R
+0053; C; 0073; # LATIN CAPITAL LETTER S
+0054; C; 0074; # LATIN CAPITAL LETTER T
+0055; C; 0075; # LATIN CAPITAL LETTER U
+0056; C; 0076; # LATIN CAPITAL LETTER V
+0057; C; 0077; # LATIN CAPITAL LETTER W
+0058; C; 0078; # LATIN CAPITAL LETTER X
+0059; C; 0079; # LATIN CAPITAL LETTER Y
+005A; C; 007A; # LATIN CAPITAL LETTER Z
+00B5; C; 03BC; # MICRO SIGN
+00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
+00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
+00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE
+00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS
+00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE
+00C6; C; 00E6; # LATIN CAPITAL LETTER AE
+00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA
+00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE
+00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE
+00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS
+00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE
+00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE
+00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS
+00D0; C; 00F0; # LATIN CAPITAL LETTER ETH
+00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE
+00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE
+00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE
+00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE
+00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS
+00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE
+00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE
+00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE
+00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS
+00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE
+00DE; C; 00FE; # LATIN CAPITAL LETTER THORN
+00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
+0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON
+0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE
+0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK
+0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE
+0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE
+010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON
+010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON
+0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE
+0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON
+0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE
+0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE
+0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK
+011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON
+011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE
+0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE
+0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA
+0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE
+0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE
+012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON
+012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE
+012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK
+0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0132; C; 0133; # LATIN CAPITAL LIGATURE IJ
+0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA
+0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE
+013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA
+013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON
+013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
+0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE
+0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE
+0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA
+0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON
+0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+014A; C; 014B; # LATIN CAPITAL LETTER ENG
+014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON
+014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE
+0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+0152; C; 0153; # LATIN CAPITAL LIGATURE OE
+0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE
+0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA
+0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON
+015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE
+015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA
+0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON
+0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA
+0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON
+0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE
+0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE
+016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON
+016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE
+016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE
+0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK
+0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS
+0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE
+017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE
+017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON
+017F; C; 0073; # LATIN SMALL LETTER LONG S
+0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK
+0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR
+0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX
+0186; C; 0254; # LATIN CAPITAL LETTER OPEN O
+0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK
+0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D
+018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK
+018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR
+018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E
+018F; C; 0259; # LATIN CAPITAL LETTER SCHWA
+0190; C; 025B; # LATIN CAPITAL LETTER OPEN E
+0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK
+0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK
+0194; C; 0263; # LATIN CAPITAL LETTER GAMMA
+0196; C; 0269; # LATIN CAPITAL LETTER IOTA
+0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE
+0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK
+019C; C; 026F; # LATIN CAPITAL LETTER TURNED M
+019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK
+019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
+01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN
+01A2; C; 01A3; # LATIN CAPITAL LETTER OI
+01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK
+01A6; C; 0280; # LATIN LETTER YR
+01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO
+01A9; C; 0283; # LATIN CAPITAL LETTER ESH
+01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK
+01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
+01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN
+01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON
+01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK
+01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK
+01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE
+01B7; C; 0292; # LATIN CAPITAL LETTER EZH
+01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED
+01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE
+01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON
+01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
+01C7; C; 01C9; # LATIN CAPITAL LETTER LJ
+01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
+01CA; C; 01CC; # LATIN CAPITAL LETTER NJ
+01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
+01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON
+01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON
+01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON
+01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON
+01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
+01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
+01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
+01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
+01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
+01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
+01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON
+01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE
+01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON
+01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON
+01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK
+01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
+01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON
+01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
+01F1; C; 01F3; # LATIN CAPITAL LETTER DZ
+01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
+01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE
+01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR
+01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN
+01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE
+01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
+01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE
+01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
+0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
+0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE
+0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
+0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE
+0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
+020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE
+020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
+020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE
+0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
+0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE
+0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
+0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE
+0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW
+021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW
+021C; C; 021D; # LATIN CAPITAL LETTER YOGH
+021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON
+0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
+0222; C; 0223; # LATIN CAPITAL LETTER OU
+0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK
+0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE
+0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA
+022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
+022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON
+022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE
+0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
+0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON
+023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE
+023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE
+023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR
+023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
+0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP
+0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE
+0244; C; 0289; # LATIN CAPITAL LETTER U BAR
+0245; C; 028C; # LATIN CAPITAL LETTER TURNED V
+0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE
+0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE
+024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
+024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE
+024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE
+0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI
+0370; C; 0371; # GREEK CAPITAL LETTER HETA
+0372; C; 0373; # GREEK CAPITAL LETTER ARCHAIC SAMPI
+0376; C; 0377; # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
+037F; C; 03F3; # GREEK CAPITAL LETTER YOT
+0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS
+0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS
+0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS
+038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS
+038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS
+038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS
+038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS
+0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA
+0392; C; 03B2; # GREEK CAPITAL LETTER BETA
+0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA
+0394; C; 03B4; # GREEK CAPITAL LETTER DELTA
+0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON
+0396; C; 03B6; # GREEK CAPITAL LETTER ZETA
+0397; C; 03B7; # GREEK CAPITAL LETTER ETA
+0398; C; 03B8; # GREEK CAPITAL LETTER THETA
+0399; C; 03B9; # GREEK CAPITAL LETTER IOTA
+039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA
+039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA
+039C; C; 03BC; # GREEK CAPITAL LETTER MU
+039D; C; 03BD; # GREEK CAPITAL LETTER NU
+039E; C; 03BE; # GREEK CAPITAL LETTER XI
+039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON
+03A0; C; 03C0; # GREEK CAPITAL LETTER PI
+03A1; C; 03C1; # GREEK CAPITAL LETTER RHO
+03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA
+03A4; C; 03C4; # GREEK CAPITAL LETTER TAU
+03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON
+03A6; C; 03C6; # GREEK CAPITAL LETTER PHI
+03A7; C; 03C7; # GREEK CAPITAL LETTER CHI
+03A8; C; 03C8; # GREEK CAPITAL LETTER PSI
+03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA
+03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA
+03CF; C; 03D7; # GREEK CAPITAL KAI SYMBOL
+03D0; C; 03B2; # GREEK BETA SYMBOL
+03D1; C; 03B8; # GREEK THETA SYMBOL
+03D5; C; 03C6; # GREEK PHI SYMBOL
+03D6; C; 03C0; # GREEK PI SYMBOL
+03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA
+03DA; C; 03DB; # GREEK LETTER STIGMA
+03DC; C; 03DD; # GREEK LETTER DIGAMMA
+03DE; C; 03DF; # GREEK LETTER KOPPA
+03E0; C; 03E1; # GREEK LETTER SAMPI
+03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI
+03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI
+03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI
+03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI
+03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA
+03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA
+03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI
+03F0; C; 03BA; # GREEK KAPPA SYMBOL
+03F1; C; 03C1; # GREEK RHO SYMBOL
+03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL
+03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL
+03F7; C; 03F8; # GREEK CAPITAL LETTER SHO
+03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL
+03FA; C; 03FB; # GREEK CAPITAL LETTER SAN
+03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL
+03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL
+03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
+0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE
+0401; C; 0451; # CYRILLIC CAPITAL LETTER IO
+0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE
+0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE
+0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE
+0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+0407; C; 0457; # CYRILLIC CAPITAL LETTER YI
+0408; C; 0458; # CYRILLIC CAPITAL LETTER JE
+0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE
+040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE
+040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE
+040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE
+040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE
+040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U
+040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE
+0410; C; 0430; # CYRILLIC CAPITAL LETTER A
+0411; C; 0431; # CYRILLIC CAPITAL LETTER BE
+0412; C; 0432; # CYRILLIC CAPITAL LETTER VE
+0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE
+0414; C; 0434; # CYRILLIC CAPITAL LETTER DE
+0415; C; 0435; # CYRILLIC CAPITAL LETTER IE
+0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE
+0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE
+0418; C; 0438; # CYRILLIC CAPITAL LETTER I
+0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I
+041A; C; 043A; # CYRILLIC CAPITAL LETTER KA
+041B; C; 043B; # CYRILLIC CAPITAL LETTER EL
+041C; C; 043C; # CYRILLIC CAPITAL LETTER EM
+041D; C; 043D; # CYRILLIC CAPITAL LETTER EN
+041E; C; 043E; # CYRILLIC CAPITAL LETTER O
+041F; C; 043F; # CYRILLIC CAPITAL LETTER PE
+0420; C; 0440; # CYRILLIC CAPITAL LETTER ER
+0421; C; 0441; # CYRILLIC CAPITAL LETTER ES
+0422; C; 0442; # CYRILLIC CAPITAL LETTER TE
+0423; C; 0443; # CYRILLIC CAPITAL LETTER U
+0424; C; 0444; # CYRILLIC CAPITAL LETTER EF
+0425; C; 0445; # CYRILLIC CAPITAL LETTER HA
+0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE
+0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE
+0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA
+0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA
+042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN
+042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU
+042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN
+042D; C; 044D; # CYRILLIC CAPITAL LETTER E
+042E; C; 044E; # CYRILLIC CAPITAL LETTER YU
+042F; C; 044F; # CYRILLIC CAPITAL LETTER YA
+0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA
+0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT
+0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E
+0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS
+0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
+046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS
+046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
+046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI
+0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI
+0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA
+0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA
+0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
+0478; C; 0479; # CYRILLIC CAPITAL LETTER UK
+047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA
+047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
+047E; C; 047F; # CYRILLIC CAPITAL LETTER OT
+0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA
+048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
+048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
+048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK
+0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
+0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
+0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
+0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
+049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
+049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
+049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE
+04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA
+04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
+04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE
+04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
+04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
+04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
+04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
+04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U
+04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
+04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
+04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE
+04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
+04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
+04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA
+04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
+04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
+04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA
+04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE
+04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK
+04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL
+04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK
+04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL
+04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
+04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL
+04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE
+04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
+04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE
+04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE
+04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA
+04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
+04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
+04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
+04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
+04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON
+04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS
+04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS
+04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O
+04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
+04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS
+04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON
+04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS
+04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
+04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
+04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
+04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
+04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
+04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK
+04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE
+0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE
+0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE
+0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE
+0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE
+0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE
+050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE
+050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE
+050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE
+0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE
+0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK
+0514; C; 0515; # CYRILLIC CAPITAL LETTER LHA
+0516; C; 0517; # CYRILLIC CAPITAL LETTER RHA
+0518; C; 0519; # CYRILLIC CAPITAL LETTER YAE
+051A; C; 051B; # CYRILLIC CAPITAL LETTER QA
+051C; C; 051D; # CYRILLIC CAPITAL LETTER WE
+051E; C; 051F; # CYRILLIC CAPITAL LETTER ALEUT KA
+0520; C; 0521; # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK
+0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK
+0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER
+0526; C; 0527; # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER
+0528; C; 0529; # CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK
+052A; C; 052B; # CYRILLIC CAPITAL LETTER DZZHE
+052C; C; 052D; # CYRILLIC CAPITAL LETTER DCHE
+052E; C; 052F; # CYRILLIC CAPITAL LETTER EL WITH DESCENDER
+0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
+0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
+0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
+0534; C; 0564; # ARMENIAN CAPITAL LETTER DA
+0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH
+0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA
+0537; C; 0567; # ARMENIAN CAPITAL LETTER EH
+0538; C; 0568; # ARMENIAN CAPITAL LETTER ET
+0539; C; 0569; # ARMENIAN CAPITAL LETTER TO
+053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE
+053B; C; 056B; # ARMENIAN CAPITAL LETTER INI
+053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN
+053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH
+053E; C; 056E; # ARMENIAN CAPITAL LETTER CA
+053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN
+0540; C; 0570; # ARMENIAN CAPITAL LETTER HO
+0541; C; 0571; # ARMENIAN CAPITAL LETTER JA
+0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD
+0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH
+0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN
+0545; C; 0575; # ARMENIAN CAPITAL LETTER YI
+0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW
+0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA
+0548; C; 0578; # ARMENIAN CAPITAL LETTER VO
+0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA
+054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH
+054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH
+054C; C; 057C; # ARMENIAN CAPITAL LETTER RA
+054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH
+054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW
+054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN
+0550; C; 0580; # ARMENIAN CAPITAL LETTER REH
+0551; C; 0581; # ARMENIAN CAPITAL LETTER CO
+0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN
+0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR
+0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH
+0555; C; 0585; # ARMENIAN CAPITAL LETTER OH
+0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH
+0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN
+10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN
+10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN
+10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN
+10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON
+10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN
+10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN
+10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN
+10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN
+10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN
+10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN
+10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS
+10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN
+10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR
+10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON
+10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR
+10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR
+10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE
+10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN
+10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR
+10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN
+10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR
+10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR
+10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN
+10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR
+10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN
+10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN
+10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN
+10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL
+10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL
+10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR
+10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN
+10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN
+10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE
+10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE
+10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE
+10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE
+10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR
+10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE
+10C7; C; 2D27; # GEORGIAN CAPITAL LETTER YN
+10CD; C; 2D2D; # GEORGIAN CAPITAL LETTER AEN
+13F8; C; 13F0; # CHEROKEE SMALL LETTER YE
+13F9; C; 13F1; # CHEROKEE SMALL LETTER YI
+13FA; C; 13F2; # CHEROKEE SMALL LETTER YO
+13FB; C; 13F3; # CHEROKEE SMALL LETTER YU
+13FC; C; 13F4; # CHEROKEE SMALL LETTER YV
+13FD; C; 13F5; # CHEROKEE SMALL LETTER MV
+1C80; C; 0432; # CYRILLIC SMALL LETTER ROUNDED VE
+1C81; C; 0434; # CYRILLIC SMALL LETTER LONG-LEGGED DE
+1C82; C; 043E; # CYRILLIC SMALL LETTER NARROW O
+1C83; C; 0441; # CYRILLIC SMALL LETTER WIDE ES
+1C84; C; 0442; # CYRILLIC SMALL LETTER TALL TE
+1C85; C; 0442; # CYRILLIC SMALL LETTER THREE-LEGGED TE
+1C86; C; 044A; # CYRILLIC SMALL LETTER TALL HARD SIGN
+1C87; C; 0463; # CYRILLIC SMALL LETTER TALL YAT
+1C88; C; A64B; # CYRILLIC SMALL LETTER UNBLENDED UK
+1C90; C; 10D0; # GEORGIAN MTAVRULI CAPITAL LETTER AN
+1C91; C; 10D1; # GEORGIAN MTAVRULI CAPITAL LETTER BAN
+1C92; C; 10D2; # GEORGIAN MTAVRULI CAPITAL LETTER GAN
+1C93; C; 10D3; # GEORGIAN MTAVRULI CAPITAL LETTER DON
+1C94; C; 10D4; # GEORGIAN MTAVRULI CAPITAL LETTER EN
+1C95; C; 10D5; # GEORGIAN MTAVRULI CAPITAL LETTER VIN
+1C96; C; 10D6; # GEORGIAN MTAVRULI CAPITAL LETTER ZEN
+1C97; C; 10D7; # GEORGIAN MTAVRULI CAPITAL LETTER TAN
+1C98; C; 10D8; # GEORGIAN MTAVRULI CAPITAL LETTER IN
+1C99; C; 10D9; # GEORGIAN MTAVRULI CAPITAL LETTER KAN
+1C9A; C; 10DA; # GEORGIAN MTAVRULI CAPITAL LETTER LAS
+1C9B; C; 10DB; # GEORGIAN MTAVRULI CAPITAL LETTER MAN
+1C9C; C; 10DC; # GEORGIAN MTAVRULI CAPITAL LETTER NAR
+1C9D; C; 10DD; # GEORGIAN MTAVRULI CAPITAL LETTER ON
+1C9E; C; 10DE; # GEORGIAN MTAVRULI CAPITAL LETTER PAR
+1C9F; C; 10DF; # GEORGIAN MTAVRULI CAPITAL LETTER ZHAR
+1CA0; C; 10E0; # GEORGIAN MTAVRULI CAPITAL LETTER RAE
+1CA1; C; 10E1; # GEORGIAN MTAVRULI CAPITAL LETTER SAN
+1CA2; C; 10E2; # GEORGIAN MTAVRULI CAPITAL LETTER TAR
+1CA3; C; 10E3; # GEORGIAN MTAVRULI CAPITAL LETTER UN
+1CA4; C; 10E4; # GEORGIAN MTAVRULI CAPITAL LETTER PHAR
+1CA5; C; 10E5; # GEORGIAN MTAVRULI CAPITAL LETTER KHAR
+1CA6; C; 10E6; # GEORGIAN MTAVRULI CAPITAL LETTER GHAN
+1CA7; C; 10E7; # GEORGIAN MTAVRULI CAPITAL LETTER QAR
+1CA8; C; 10E8; # GEORGIAN MTAVRULI CAPITAL LETTER SHIN
+1CA9; C; 10E9; # GEORGIAN MTAVRULI CAPITAL LETTER CHIN
+1CAA; C; 10EA; # GEORGIAN MTAVRULI CAPITAL LETTER CAN
+1CAB; C; 10EB; # GEORGIAN MTAVRULI CAPITAL LETTER JIL
+1CAC; C; 10EC; # GEORGIAN MTAVRULI CAPITAL LETTER CIL
+1CAD; C; 10ED; # GEORGIAN MTAVRULI CAPITAL LETTER CHAR
+1CAE; C; 10EE; # GEORGIAN MTAVRULI CAPITAL LETTER XAN
+1CAF; C; 10EF; # GEORGIAN MTAVRULI CAPITAL LETTER JHAN
+1CB0; C; 10F0; # GEORGIAN MTAVRULI CAPITAL LETTER HAE
+1CB1; C; 10F1; # GEORGIAN MTAVRULI CAPITAL LETTER HE
+1CB2; C; 10F2; # GEORGIAN MTAVRULI CAPITAL LETTER HIE
+1CB3; C; 10F3; # GEORGIAN MTAVRULI CAPITAL LETTER WE
+1CB4; C; 10F4; # GEORGIAN MTAVRULI CAPITAL LETTER HAR
+1CB5; C; 10F5; # GEORGIAN MTAVRULI CAPITAL LETTER HOE
+1CB6; C; 10F6; # GEORGIAN MTAVRULI CAPITAL LETTER FI
+1CB7; C; 10F7; # GEORGIAN MTAVRULI CAPITAL LETTER YN
+1CB8; C; 10F8; # GEORGIAN MTAVRULI CAPITAL LETTER ELIFI
+1CB9; C; 10F9; # GEORGIAN MTAVRULI CAPITAL LETTER TURNED GAN
+1CBA; C; 10FA; # GEORGIAN MTAVRULI CAPITAL LETTER AIN
+1CBD; C; 10FD; # GEORGIAN MTAVRULI CAPITAL LETTER AEN
+1CBE; C; 10FE; # GEORGIAN MTAVRULI CAPITAL LETTER HARD SIGN
+1CBF; C; 10FF; # GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
+1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW
+1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE
+1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW
+1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW
+1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
+1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE
+1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW
+1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW
+1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA
+1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
+1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
+1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
+1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
+1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW
+1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
+1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE
+1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON
+1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE
+1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW
+1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS
+1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA
+1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW
+1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW
+1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
+1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE
+1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW
+1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW
+1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW
+1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
+1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW
+1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
+1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE
+1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE
+1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW
+1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE
+1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW
+1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW
+1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
+1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
+1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
+1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
+1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
+1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE
+1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE
+1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE
+1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW
+1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
+1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW
+1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE
+1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW
+1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
+1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
+1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
+1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE
+1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW
+1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW
+1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
+1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
+1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW
+1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
+1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
+1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
+1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE
+1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW
+1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE
+1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE
+1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS
+1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE
+1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW
+1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE
+1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS
+1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE
+1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
+1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW
+1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW
+1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW
+1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS
+1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE
+1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
+1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
+1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE
+1E9E; F; 0073 0073; # LATIN CAPITAL LETTER SHARP S
+1E9E; S; 00DF; # LATIN CAPITAL LETTER SHARP S
+1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW
+1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE
+1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
+1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
+1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
+1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
+1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
+1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
+1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
+1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
+1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
+1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
+1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW
+1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE
+1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE
+1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
+1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
+1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
+1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
+1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
+1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE
+1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW
+1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW
+1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE
+1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
+1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
+1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
+1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
+1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
+1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE
+1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE
+1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
+1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE
+1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
+1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW
+1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE
+1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE
+1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE
+1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
+1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE
+1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
+1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE
+1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW
+1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
+1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE
+1EFA; C; 1EFB; # LATIN CAPITAL LETTER MIDDLE-WELSH LL
+1EFC; C; 1EFD; # LATIN CAPITAL LETTER MIDDLE-WELSH V
+1EFE; C; 1EFF; # LATIN CAPITAL LETTER Y WITH LOOP
+1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI
+1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA
+1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
+1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
+1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
+1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
+1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
+1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
+1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI
+1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA
+1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
+1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
+1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
+1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
+1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI
+1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA
+1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
+1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
+1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
+1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
+1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
+1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
+1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI
+1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA
+1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
+1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
+1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
+1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
+1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
+1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
+1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI
+1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA
+1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
+1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
+1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
+1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
+1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
+1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
+1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
+1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
+1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA
+1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
+1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
+1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
+1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI
+1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA
+1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
+1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
+1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
+1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
+1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
+1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
+1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
+1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
+1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
+1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
+1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
+1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
+1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
+1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
+1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
+1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
+1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY
+1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON
+1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA
+1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA
+1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FBE; C; 03B9; # GREEK PROSGEGRAMMENI
+1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
+1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
+1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
+1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
+1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA
+1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA
+1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA
+1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA
+1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
+1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
+1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
+1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY
+1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON
+1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA
+1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA
+1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
+1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
+1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI
+1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
+1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
+1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY
+1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON
+1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA
+1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA
+1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA
+1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
+1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
+1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
+1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
+1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA
+1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA
+1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA
+1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA
+1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+2126; C; 03C9; # OHM SIGN
+212A; C; 006B; # KELVIN SIGN
+212B; C; 00E5; # ANGSTROM SIGN
+2132; C; 214E; # TURNED CAPITAL F
+2160; C; 2170; # ROMAN NUMERAL ONE
+2161; C; 2171; # ROMAN NUMERAL TWO
+2162; C; 2172; # ROMAN NUMERAL THREE
+2163; C; 2173; # ROMAN NUMERAL FOUR
+2164; C; 2174; # ROMAN NUMERAL FIVE
+2165; C; 2175; # ROMAN NUMERAL SIX
+2166; C; 2176; # ROMAN NUMERAL SEVEN
+2167; C; 2177; # ROMAN NUMERAL EIGHT
+2168; C; 2178; # ROMAN NUMERAL NINE
+2169; C; 2179; # ROMAN NUMERAL TEN
+216A; C; 217A; # ROMAN NUMERAL ELEVEN
+216B; C; 217B; # ROMAN NUMERAL TWELVE
+216C; C; 217C; # ROMAN NUMERAL FIFTY
+216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED
+216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED
+216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND
+2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED
+24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A
+24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B
+24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C
+24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D
+24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E
+24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F
+24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G
+24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H
+24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I
+24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J
+24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K
+24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L
+24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M
+24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N
+24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O
+24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P
+24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q
+24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R
+24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S
+24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T
+24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U
+24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V
+24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W
+24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X
+24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y
+24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z
+2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU
+2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY
+2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE
+2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI
+2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO
+2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU
+2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE
+2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO
+2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA
+2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE
+2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE
+2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I
+2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI
+2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO
+2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE
+2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE
+2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI
+2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU
+2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI
+2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI
+2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO
+2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO
+2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU
+2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU
+2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU
+2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU
+2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE
+2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA
+2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI
+2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI
+2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA
+2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU
+2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI
+2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI
+2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA
+2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU
+2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS
+2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL
+2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO
+2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS
+2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS
+2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS
+2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA
+2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA
+2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC
+2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A
+2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
+2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR
+2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE
+2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE
+2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL
+2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER
+2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER
+2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER
+2C6D; C; 0251; # LATIN CAPITAL LETTER ALPHA
+2C6E; C; 0271; # LATIN CAPITAL LETTER M WITH HOOK
+2C6F; C; 0250; # LATIN CAPITAL LETTER TURNED A
+2C70; C; 0252; # LATIN CAPITAL LETTER TURNED ALPHA
+2C72; C; 2C73; # LATIN CAPITAL LETTER W WITH HOOK
+2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H
+2C7E; C; 023F; # LATIN CAPITAL LETTER S WITH SWASH TAIL
+2C7F; C; 0240; # LATIN CAPITAL LETTER Z WITH SWASH TAIL
+2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA
+2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA
+2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA
+2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA
+2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE
+2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU
+2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA
+2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE
+2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE
+2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA
+2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA
+2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA
+2C98; C; 2C99; # COPTIC CAPITAL LETTER MI
+2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI
+2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI
+2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O
+2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI
+2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO
+2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA
+2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU
+2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA
+2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI
+2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI
+2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI
+2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU
+2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF
+2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN
+2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE
+2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA
+2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI
+2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI
+2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU
+2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI
+2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI
+2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI
+2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH
+2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI
+2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI
+2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI
+2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA
+2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA
+2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI
+2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT
+2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA
+2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA
+2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA
+2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA
+2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI
+2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI
+2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU
+2CEB; C; 2CEC; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI
+2CED; C; 2CEE; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA
+2CF2; C; 2CF3; # COPTIC CAPITAL LETTER BOHAIRIC KHEI
+A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA
+A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO
+A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE
+A646; C; A647; # CYRILLIC CAPITAL LETTER IOTA
+A648; C; A649; # CYRILLIC CAPITAL LETTER DJERV
+A64A; C; A64B; # CYRILLIC CAPITAL LETTER MONOGRAPH UK
+A64C; C; A64D; # CYRILLIC CAPITAL LETTER BROAD OMEGA
+A64E; C; A64F; # CYRILLIC CAPITAL LETTER NEUTRAL YER
+A650; C; A651; # CYRILLIC CAPITAL LETTER YERU WITH BACK YER
+A652; C; A653; # CYRILLIC CAPITAL LETTER IOTIFIED YAT
+A654; C; A655; # CYRILLIC CAPITAL LETTER REVERSED YU
+A656; C; A657; # CYRILLIC CAPITAL LETTER IOTIFIED A
+A658; C; A659; # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS
+A65A; C; A65B; # CYRILLIC CAPITAL LETTER BLENDED YUS
+A65C; C; A65D; # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS
+A65E; C; A65F; # CYRILLIC CAPITAL LETTER YN
+A660; C; A661; # CYRILLIC CAPITAL LETTER REVERSED TSE
+A662; C; A663; # CYRILLIC CAPITAL LETTER SOFT DE
+A664; C; A665; # CYRILLIC CAPITAL LETTER SOFT EL
+A666; C; A667; # CYRILLIC CAPITAL LETTER SOFT EM
+A668; C; A669; # CYRILLIC CAPITAL LETTER MONOCULAR O
+A66A; C; A66B; # CYRILLIC CAPITAL LETTER BINOCULAR O
+A66C; C; A66D; # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O
+A680; C; A681; # CYRILLIC CAPITAL LETTER DWE
+A682; C; A683; # CYRILLIC CAPITAL LETTER DZWE
+A684; C; A685; # CYRILLIC CAPITAL LETTER ZHWE
+A686; C; A687; # CYRILLIC CAPITAL LETTER CCHE
+A688; C; A689; # CYRILLIC CAPITAL LETTER DZZE
+A68A; C; A68B; # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK
+A68C; C; A68D; # CYRILLIC CAPITAL LETTER TWE
+A68E; C; A68F; # CYRILLIC CAPITAL LETTER TSWE
+A690; C; A691; # CYRILLIC CAPITAL LETTER TSSE
+A692; C; A693; # CYRILLIC CAPITAL LETTER TCHE
+A694; C; A695; # CYRILLIC CAPITAL LETTER HWE
+A696; C; A697; # CYRILLIC CAPITAL LETTER SHWE
+A698; C; A699; # CYRILLIC CAPITAL LETTER DOUBLE O
+A69A; C; A69B; # CYRILLIC CAPITAL LETTER CROSSED O
+A722; C; A723; # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF
+A724; C; A725; # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN
+A726; C; A727; # LATIN CAPITAL LETTER HENG
+A728; C; A729; # LATIN CAPITAL LETTER TZ
+A72A; C; A72B; # LATIN CAPITAL LETTER TRESILLO
+A72C; C; A72D; # LATIN CAPITAL LETTER CUATRILLO
+A72E; C; A72F; # LATIN CAPITAL LETTER CUATRILLO WITH COMMA
+A732; C; A733; # LATIN CAPITAL LETTER AA
+A734; C; A735; # LATIN CAPITAL LETTER AO
+A736; C; A737; # LATIN CAPITAL LETTER AU
+A738; C; A739; # LATIN CAPITAL LETTER AV
+A73A; C; A73B; # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR
+A73C; C; A73D; # LATIN CAPITAL LETTER AY
+A73E; C; A73F; # LATIN CAPITAL LETTER REVERSED C WITH DOT
+A740; C; A741; # LATIN CAPITAL LETTER K WITH STROKE
+A742; C; A743; # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE
+A744; C; A745; # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE
+A746; C; A747; # LATIN CAPITAL LETTER BROKEN L
+A748; C; A749; # LATIN CAPITAL LETTER L WITH HIGH STROKE
+A74A; C; A74B; # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY
+A74C; C; A74D; # LATIN CAPITAL LETTER O WITH LOOP
+A74E; C; A74F; # LATIN CAPITAL LETTER OO
+A750; C; A751; # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER
+A752; C; A753; # LATIN CAPITAL LETTER P WITH FLOURISH
+A754; C; A755; # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL
+A756; C; A757; # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER
+A758; C; A759; # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE
+A75A; C; A75B; # LATIN CAPITAL LETTER R ROTUNDA
+A75C; C; A75D; # LATIN CAPITAL LETTER RUM ROTUNDA
+A75E; C; A75F; # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE
+A760; C; A761; # LATIN CAPITAL LETTER VY
+A762; C; A763; # LATIN CAPITAL LETTER VISIGOTHIC Z
+A764; C; A765; # LATIN CAPITAL LETTER THORN WITH STROKE
+A766; C; A767; # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER
+A768; C; A769; # LATIN CAPITAL LETTER VEND
+A76A; C; A76B; # LATIN CAPITAL LETTER ET
+A76C; C; A76D; # LATIN CAPITAL LETTER IS
+A76E; C; A76F; # LATIN CAPITAL LETTER CON
+A779; C; A77A; # LATIN CAPITAL LETTER INSULAR D
+A77B; C; A77C; # LATIN CAPITAL LETTER INSULAR F
+A77D; C; 1D79; # LATIN CAPITAL LETTER INSULAR G
+A77E; C; A77F; # LATIN CAPITAL LETTER TURNED INSULAR G
+A780; C; A781; # LATIN CAPITAL LETTER TURNED L
+A782; C; A783; # LATIN CAPITAL LETTER INSULAR R
+A784; C; A785; # LATIN CAPITAL LETTER INSULAR S
+A786; C; A787; # LATIN CAPITAL LETTER INSULAR T
+A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO
+A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H
+A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER
+A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR
+A796; C; A797; # LATIN CAPITAL LETTER B WITH FLOURISH
+A798; C; A799; # LATIN CAPITAL LETTER F WITH STROKE
+A79A; C; A79B; # LATIN CAPITAL LETTER VOLAPUK AE
+A79C; C; A79D; # LATIN CAPITAL LETTER VOLAPUK OE
+A79E; C; A79F; # LATIN CAPITAL LETTER VOLAPUK UE
+A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE
+A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE
+A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE
+A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE
+A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE
+A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK
+A7AB; C; 025C; # LATIN CAPITAL LETTER REVERSED OPEN E
+A7AC; C; 0261; # LATIN CAPITAL LETTER SCRIPT G
+A7AD; C; 026C; # LATIN CAPITAL LETTER L WITH BELT
+A7AE; C; 026A; # LATIN CAPITAL LETTER SMALL CAPITAL I
+A7B0; C; 029E; # LATIN CAPITAL LETTER TURNED K
+A7B1; C; 0287; # LATIN CAPITAL LETTER TURNED T
+A7B2; C; 029D; # LATIN CAPITAL LETTER J WITH CROSSED-TAIL
+A7B3; C; AB53; # LATIN CAPITAL LETTER CHI
+A7B4; C; A7B5; # LATIN CAPITAL LETTER BETA
+A7B6; C; A7B7; # LATIN CAPITAL LETTER OMEGA
+A7B8; C; A7B9; # LATIN CAPITAL LETTER U WITH STROKE
+A7BA; C; A7BB; # LATIN CAPITAL LETTER GLOTTAL A
+A7BC; C; A7BD; # LATIN CAPITAL LETTER GLOTTAL I
+A7BE; C; A7BF; # LATIN CAPITAL LETTER GLOTTAL U
+A7C2; C; A7C3; # LATIN CAPITAL LETTER ANGLICANA W
+A7C4; C; A794; # LATIN CAPITAL LETTER C WITH PALATAL HOOK
+A7C5; C; 0282; # LATIN CAPITAL LETTER S WITH HOOK
+A7C6; C; 1D8E; # LATIN CAPITAL LETTER Z WITH PALATAL HOOK
+A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
+A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
+A7F5; C; A7F6; # LATIN CAPITAL LETTER REVERSED HALF H
+AB70; C; 13A0; # CHEROKEE SMALL LETTER A
+AB71; C; 13A1; # CHEROKEE SMALL LETTER E
+AB72; C; 13A2; # CHEROKEE SMALL LETTER I
+AB73; C; 13A3; # CHEROKEE SMALL LETTER O
+AB74; C; 13A4; # CHEROKEE SMALL LETTER U
+AB75; C; 13A5; # CHEROKEE SMALL LETTER V
+AB76; C; 13A6; # CHEROKEE SMALL LETTER GA
+AB77; C; 13A7; # CHEROKEE SMALL LETTER KA
+AB78; C; 13A8; # CHEROKEE SMALL LETTER GE
+AB79; C; 13A9; # CHEROKEE SMALL LETTER GI
+AB7A; C; 13AA; # CHEROKEE SMALL LETTER GO
+AB7B; C; 13AB; # CHEROKEE SMALL LETTER GU
+AB7C; C; 13AC; # CHEROKEE SMALL LETTER GV
+AB7D; C; 13AD; # CHEROKEE SMALL LETTER HA
+AB7E; C; 13AE; # CHEROKEE SMALL LETTER HE
+AB7F; C; 13AF; # CHEROKEE SMALL LETTER HI
+AB80; C; 13B0; # CHEROKEE SMALL LETTER HO
+AB81; C; 13B1; # CHEROKEE SMALL LETTER HU
+AB82; C; 13B2; # CHEROKEE SMALL LETTER HV
+AB83; C; 13B3; # CHEROKEE SMALL LETTER LA
+AB84; C; 13B4; # CHEROKEE SMALL LETTER LE
+AB85; C; 13B5; # CHEROKEE SMALL LETTER LI
+AB86; C; 13B6; # CHEROKEE SMALL LETTER LO
+AB87; C; 13B7; # CHEROKEE SMALL LETTER LU
+AB88; C; 13B8; # CHEROKEE SMALL LETTER LV
+AB89; C; 13B9; # CHEROKEE SMALL LETTER MA
+AB8A; C; 13BA; # CHEROKEE SMALL LETTER ME
+AB8B; C; 13BB; # CHEROKEE SMALL LETTER MI
+AB8C; C; 13BC; # CHEROKEE SMALL LETTER MO
+AB8D; C; 13BD; # CHEROKEE SMALL LETTER MU
+AB8E; C; 13BE; # CHEROKEE SMALL LETTER NA
+AB8F; C; 13BF; # CHEROKEE SMALL LETTER HNA
+AB90; C; 13C0; # CHEROKEE SMALL LETTER NAH
+AB91; C; 13C1; # CHEROKEE SMALL LETTER NE
+AB92; C; 13C2; # CHEROKEE SMALL LETTER NI
+AB93; C; 13C3; # CHEROKEE SMALL LETTER NO
+AB94; C; 13C4; # CHEROKEE SMALL LETTER NU
+AB95; C; 13C5; # CHEROKEE SMALL LETTER NV
+AB96; C; 13C6; # CHEROKEE SMALL LETTER QUA
+AB97; C; 13C7; # CHEROKEE SMALL LETTER QUE
+AB98; C; 13C8; # CHEROKEE SMALL LETTER QUI
+AB99; C; 13C9; # CHEROKEE SMALL LETTER QUO
+AB9A; C; 13CA; # CHEROKEE SMALL LETTER QUU
+AB9B; C; 13CB; # CHEROKEE SMALL LETTER QUV
+AB9C; C; 13CC; # CHEROKEE SMALL LETTER SA
+AB9D; C; 13CD; # CHEROKEE SMALL LETTER S
+AB9E; C; 13CE; # CHEROKEE SMALL LETTER SE
+AB9F; C; 13CF; # CHEROKEE SMALL LETTER SI
+ABA0; C; 13D0; # CHEROKEE SMALL LETTER SO
+ABA1; C; 13D1; # CHEROKEE SMALL LETTER SU
+ABA2; C; 13D2; # CHEROKEE SMALL LETTER SV
+ABA3; C; 13D3; # CHEROKEE SMALL LETTER DA
+ABA4; C; 13D4; # CHEROKEE SMALL LETTER TA
+ABA5; C; 13D5; # CHEROKEE SMALL LETTER DE
+ABA6; C; 13D6; # CHEROKEE SMALL LETTER TE
+ABA7; C; 13D7; # CHEROKEE SMALL LETTER DI
+ABA8; C; 13D8; # CHEROKEE SMALL LETTER TI
+ABA9; C; 13D9; # CHEROKEE SMALL LETTER DO
+ABAA; C; 13DA; # CHEROKEE SMALL LETTER DU
+ABAB; C; 13DB; # CHEROKEE SMALL LETTER DV
+ABAC; C; 13DC; # CHEROKEE SMALL LETTER DLA
+ABAD; C; 13DD; # CHEROKEE SMALL LETTER TLA
+ABAE; C; 13DE; # CHEROKEE SMALL LETTER TLE
+ABAF; C; 13DF; # CHEROKEE SMALL LETTER TLI
+ABB0; C; 13E0; # CHEROKEE SMALL LETTER TLO
+ABB1; C; 13E1; # CHEROKEE SMALL LETTER TLU
+ABB2; C; 13E2; # CHEROKEE SMALL LETTER TLV
+ABB3; C; 13E3; # CHEROKEE SMALL LETTER TSA
+ABB4; C; 13E4; # CHEROKEE SMALL LETTER TSE
+ABB5; C; 13E5; # CHEROKEE SMALL LETTER TSI
+ABB6; C; 13E6; # CHEROKEE SMALL LETTER TSO
+ABB7; C; 13E7; # CHEROKEE SMALL LETTER TSU
+ABB8; C; 13E8; # CHEROKEE SMALL LETTER TSV
+ABB9; C; 13E9; # CHEROKEE SMALL LETTER WA
+ABBA; C; 13EA; # CHEROKEE SMALL LETTER WE
+ABBB; C; 13EB; # CHEROKEE SMALL LETTER WI
+ABBC; C; 13EC; # CHEROKEE SMALL LETTER WO
+ABBD; C; 13ED; # CHEROKEE SMALL LETTER WU
+ABBE; C; 13EE; # CHEROKEE SMALL LETTER WV
+ABBF; C; 13EF; # CHEROKEE SMALL LETTER YA
+FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF
+FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI
+FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
+FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI
+FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL
+FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T
+FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST
+FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW
+FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH
+FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI
+FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW
+FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH
+FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A
+FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B
+FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C
+FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D
+FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E
+FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F
+FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G
+FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H
+FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I
+FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J
+FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K
+FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L
+FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M
+FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N
+FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O
+FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P
+FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q
+FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R
+FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S
+FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T
+FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U
+FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V
+FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W
+FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X
+FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y
+FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
+10400; C; 10428; # DESERET CAPITAL LETTER LONG I
+10401; C; 10429; # DESERET CAPITAL LETTER LONG E
+10402; C; 1042A; # DESERET CAPITAL LETTER LONG A
+10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH
+10404; C; 1042C; # DESERET CAPITAL LETTER LONG O
+10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO
+10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I
+10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E
+10408; C; 10430; # DESERET CAPITAL LETTER SHORT A
+10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH
+1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O
+1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO
+1040C; C; 10434; # DESERET CAPITAL LETTER AY
+1040D; C; 10435; # DESERET CAPITAL LETTER OW
+1040E; C; 10436; # DESERET CAPITAL LETTER WU
+1040F; C; 10437; # DESERET CAPITAL LETTER YEE
+10410; C; 10438; # DESERET CAPITAL LETTER H
+10411; C; 10439; # DESERET CAPITAL LETTER PEE
+10412; C; 1043A; # DESERET CAPITAL LETTER BEE
+10413; C; 1043B; # DESERET CAPITAL LETTER TEE
+10414; C; 1043C; # DESERET CAPITAL LETTER DEE
+10415; C; 1043D; # DESERET CAPITAL LETTER CHEE
+10416; C; 1043E; # DESERET CAPITAL LETTER JEE
+10417; C; 1043F; # DESERET CAPITAL LETTER KAY
+10418; C; 10440; # DESERET CAPITAL LETTER GAY
+10419; C; 10441; # DESERET CAPITAL LETTER EF
+1041A; C; 10442; # DESERET CAPITAL LETTER VEE
+1041B; C; 10443; # DESERET CAPITAL LETTER ETH
+1041C; C; 10444; # DESERET CAPITAL LETTER THEE
+1041D; C; 10445; # DESERET CAPITAL LETTER ES
+1041E; C; 10446; # DESERET CAPITAL LETTER ZEE
+1041F; C; 10447; # DESERET CAPITAL LETTER ESH
+10420; C; 10448; # DESERET CAPITAL LETTER ZHEE
+10421; C; 10449; # DESERET CAPITAL LETTER ER
+10422; C; 1044A; # DESERET CAPITAL LETTER EL
+10423; C; 1044B; # DESERET CAPITAL LETTER EM
+10424; C; 1044C; # DESERET CAPITAL LETTER EN
+10425; C; 1044D; # DESERET CAPITAL LETTER ENG
+10426; C; 1044E; # DESERET CAPITAL LETTER OI
+10427; C; 1044F; # DESERET CAPITAL LETTER EW
+104B0; C; 104D8; # OSAGE CAPITAL LETTER A
+104B1; C; 104D9; # OSAGE CAPITAL LETTER AI
+104B2; C; 104DA; # OSAGE CAPITAL LETTER AIN
+104B3; C; 104DB; # OSAGE CAPITAL LETTER AH
+104B4; C; 104DC; # OSAGE CAPITAL LETTER BRA
+104B5; C; 104DD; # OSAGE CAPITAL LETTER CHA
+104B6; C; 104DE; # OSAGE CAPITAL LETTER EHCHA
+104B7; C; 104DF; # OSAGE CAPITAL LETTER E
+104B8; C; 104E0; # OSAGE CAPITAL LETTER EIN
+104B9; C; 104E1; # OSAGE CAPITAL LETTER HA
+104BA; C; 104E2; # OSAGE CAPITAL LETTER HYA
+104BB; C; 104E3; # OSAGE CAPITAL LETTER I
+104BC; C; 104E4; # OSAGE CAPITAL LETTER KA
+104BD; C; 104E5; # OSAGE CAPITAL LETTER EHKA
+104BE; C; 104E6; # OSAGE CAPITAL LETTER KYA
+104BF; C; 104E7; # OSAGE CAPITAL LETTER LA
+104C0; C; 104E8; # OSAGE CAPITAL LETTER MA
+104C1; C; 104E9; # OSAGE CAPITAL LETTER NA
+104C2; C; 104EA; # OSAGE CAPITAL LETTER O
+104C3; C; 104EB; # OSAGE CAPITAL LETTER OIN
+104C4; C; 104EC; # OSAGE CAPITAL LETTER PA
+104C5; C; 104ED; # OSAGE CAPITAL LETTER EHPA
+104C6; C; 104EE; # OSAGE CAPITAL LETTER SA
+104C7; C; 104EF; # OSAGE CAPITAL LETTER SHA
+104C8; C; 104F0; # OSAGE CAPITAL LETTER TA
+104C9; C; 104F1; # OSAGE CAPITAL LETTER EHTA
+104CA; C; 104F2; # OSAGE CAPITAL LETTER TSA
+104CB; C; 104F3; # OSAGE CAPITAL LETTER EHTSA
+104CC; C; 104F4; # OSAGE CAPITAL LETTER TSHA
+104CD; C; 104F5; # OSAGE CAPITAL LETTER DHA
+104CE; C; 104F6; # OSAGE CAPITAL LETTER U
+104CF; C; 104F7; # OSAGE CAPITAL LETTER WA
+104D0; C; 104F8; # OSAGE CAPITAL LETTER KHA
+104D1; C; 104F9; # OSAGE CAPITAL LETTER GHA
+104D2; C; 104FA; # OSAGE CAPITAL LETTER ZA
+104D3; C; 104FB; # OSAGE CAPITAL LETTER ZHA
+10C80; C; 10CC0; # OLD HUNGARIAN CAPITAL LETTER A
+10C81; C; 10CC1; # OLD HUNGARIAN CAPITAL LETTER AA
+10C82; C; 10CC2; # OLD HUNGARIAN CAPITAL LETTER EB
+10C83; C; 10CC3; # OLD HUNGARIAN CAPITAL LETTER AMB
+10C84; C; 10CC4; # OLD HUNGARIAN CAPITAL LETTER EC
+10C85; C; 10CC5; # OLD HUNGARIAN CAPITAL LETTER ENC
+10C86; C; 10CC6; # OLD HUNGARIAN CAPITAL LETTER ECS
+10C87; C; 10CC7; # OLD HUNGARIAN CAPITAL LETTER ED
+10C88; C; 10CC8; # OLD HUNGARIAN CAPITAL LETTER AND
+10C89; C; 10CC9; # OLD HUNGARIAN CAPITAL LETTER E
+10C8A; C; 10CCA; # OLD HUNGARIAN CAPITAL LETTER CLOSE E
+10C8B; C; 10CCB; # OLD HUNGARIAN CAPITAL LETTER EE
+10C8C; C; 10CCC; # OLD HUNGARIAN CAPITAL LETTER EF
+10C8D; C; 10CCD; # OLD HUNGARIAN CAPITAL LETTER EG
+10C8E; C; 10CCE; # OLD HUNGARIAN CAPITAL LETTER EGY
+10C8F; C; 10CCF; # OLD HUNGARIAN CAPITAL LETTER EH
+10C90; C; 10CD0; # OLD HUNGARIAN CAPITAL LETTER I
+10C91; C; 10CD1; # OLD HUNGARIAN CAPITAL LETTER II
+10C92; C; 10CD2; # OLD HUNGARIAN CAPITAL LETTER EJ
+10C93; C; 10CD3; # OLD HUNGARIAN CAPITAL LETTER EK
+10C94; C; 10CD4; # OLD HUNGARIAN CAPITAL LETTER AK
+10C95; C; 10CD5; # OLD HUNGARIAN CAPITAL LETTER UNK
+10C96; C; 10CD6; # OLD HUNGARIAN CAPITAL LETTER EL
+10C97; C; 10CD7; # OLD HUNGARIAN CAPITAL LETTER ELY
+10C98; C; 10CD8; # OLD HUNGARIAN CAPITAL LETTER EM
+10C99; C; 10CD9; # OLD HUNGARIAN CAPITAL LETTER EN
+10C9A; C; 10CDA; # OLD HUNGARIAN CAPITAL LETTER ENY
+10C9B; C; 10CDB; # OLD HUNGARIAN CAPITAL LETTER O
+10C9C; C; 10CDC; # OLD HUNGARIAN CAPITAL LETTER OO
+10C9D; C; 10CDD; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE
+10C9E; C; 10CDE; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE
+10C9F; C; 10CDF; # OLD HUNGARIAN CAPITAL LETTER OEE
+10CA0; C; 10CE0; # OLD HUNGARIAN CAPITAL LETTER EP
+10CA1; C; 10CE1; # OLD HUNGARIAN CAPITAL LETTER EMP
+10CA2; C; 10CE2; # OLD HUNGARIAN CAPITAL LETTER ER
+10CA3; C; 10CE3; # OLD HUNGARIAN CAPITAL LETTER SHORT ER
+10CA4; C; 10CE4; # OLD HUNGARIAN CAPITAL LETTER ES
+10CA5; C; 10CE5; # OLD HUNGARIAN CAPITAL LETTER ESZ
+10CA6; C; 10CE6; # OLD HUNGARIAN CAPITAL LETTER ET
+10CA7; C; 10CE7; # OLD HUNGARIAN CAPITAL LETTER ENT
+10CA8; C; 10CE8; # OLD HUNGARIAN CAPITAL LETTER ETY
+10CA9; C; 10CE9; # OLD HUNGARIAN CAPITAL LETTER ECH
+10CAA; C; 10CEA; # OLD HUNGARIAN CAPITAL LETTER U
+10CAB; C; 10CEB; # OLD HUNGARIAN CAPITAL LETTER UU
+10CAC; C; 10CEC; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE
+10CAD; C; 10CED; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE
+10CAE; C; 10CEE; # OLD HUNGARIAN CAPITAL LETTER EV
+10CAF; C; 10CEF; # OLD HUNGARIAN CAPITAL LETTER EZ
+10CB0; C; 10CF0; # OLD HUNGARIAN CAPITAL LETTER EZS
+10CB1; C; 10CF1; # OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN
+10CB2; C; 10CF2; # OLD HUNGARIAN CAPITAL LETTER US
+118A0; C; 118C0; # WARANG CITI CAPITAL LETTER NGAA
+118A1; C; 118C1; # WARANG CITI CAPITAL LETTER A
+118A2; C; 118C2; # WARANG CITI CAPITAL LETTER WI
+118A3; C; 118C3; # WARANG CITI CAPITAL LETTER YU
+118A4; C; 118C4; # WARANG CITI CAPITAL LETTER YA
+118A5; C; 118C5; # WARANG CITI CAPITAL LETTER YO
+118A6; C; 118C6; # WARANG CITI CAPITAL LETTER II
+118A7; C; 118C7; # WARANG CITI CAPITAL LETTER UU
+118A8; C; 118C8; # WARANG CITI CAPITAL LETTER E
+118A9; C; 118C9; # WARANG CITI CAPITAL LETTER O
+118AA; C; 118CA; # WARANG CITI CAPITAL LETTER ANG
+118AB; C; 118CB; # WARANG CITI CAPITAL LETTER GA
+118AC; C; 118CC; # WARANG CITI CAPITAL LETTER KO
+118AD; C; 118CD; # WARANG CITI CAPITAL LETTER ENY
+118AE; C; 118CE; # WARANG CITI CAPITAL LETTER YUJ
+118AF; C; 118CF; # WARANG CITI CAPITAL LETTER UC
+118B0; C; 118D0; # WARANG CITI CAPITAL LETTER ENN
+118B1; C; 118D1; # WARANG CITI CAPITAL LETTER ODD
+118B2; C; 118D2; # WARANG CITI CAPITAL LETTER TTE
+118B3; C; 118D3; # WARANG CITI CAPITAL LETTER NUNG
+118B4; C; 118D4; # WARANG CITI CAPITAL LETTER DA
+118B5; C; 118D5; # WARANG CITI CAPITAL LETTER AT
+118B6; C; 118D6; # WARANG CITI CAPITAL LETTER AM
+118B7; C; 118D7; # WARANG CITI CAPITAL LETTER BU
+118B8; C; 118D8; # WARANG CITI CAPITAL LETTER PU
+118B9; C; 118D9; # WARANG CITI CAPITAL LETTER HIYO
+118BA; C; 118DA; # WARANG CITI CAPITAL LETTER HOLO
+118BB; C; 118DB; # WARANG CITI CAPITAL LETTER HORR
+118BC; C; 118DC; # WARANG CITI CAPITAL LETTER HAR
+118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU
+118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII
+118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO
+16E40; C; 16E60; # MEDEFAIDRIN CAPITAL LETTER M
+16E41; C; 16E61; # MEDEFAIDRIN CAPITAL LETTER S
+16E42; C; 16E62; # MEDEFAIDRIN CAPITAL LETTER V
+16E43; C; 16E63; # MEDEFAIDRIN CAPITAL LETTER W
+16E44; C; 16E64; # MEDEFAIDRIN CAPITAL LETTER ATIU
+16E45; C; 16E65; # MEDEFAIDRIN CAPITAL LETTER Z
+16E46; C; 16E66; # MEDEFAIDRIN CAPITAL LETTER KP
+16E47; C; 16E67; # MEDEFAIDRIN CAPITAL LETTER P
+16E48; C; 16E68; # MEDEFAIDRIN CAPITAL LETTER T
+16E49; C; 16E69; # MEDEFAIDRIN CAPITAL LETTER G
+16E4A; C; 16E6A; # MEDEFAIDRIN CAPITAL LETTER F
+16E4B; C; 16E6B; # MEDEFAIDRIN CAPITAL LETTER I
+16E4C; C; 16E6C; # MEDEFAIDRIN CAPITAL LETTER K
+16E4D; C; 16E6D; # MEDEFAIDRIN CAPITAL LETTER A
+16E4E; C; 16E6E; # MEDEFAIDRIN CAPITAL LETTER J
+16E4F; C; 16E6F; # MEDEFAIDRIN CAPITAL LETTER E
+16E50; C; 16E70; # MEDEFAIDRIN CAPITAL LETTER B
+16E51; C; 16E71; # MEDEFAIDRIN CAPITAL LETTER C
+16E52; C; 16E72; # MEDEFAIDRIN CAPITAL LETTER U
+16E53; C; 16E73; # MEDEFAIDRIN CAPITAL LETTER YU
+16E54; C; 16E74; # MEDEFAIDRIN CAPITAL LETTER L
+16E55; C; 16E75; # MEDEFAIDRIN CAPITAL LETTER Q
+16E56; C; 16E76; # MEDEFAIDRIN CAPITAL LETTER HP
+16E57; C; 16E77; # MEDEFAIDRIN CAPITAL LETTER NY
+16E58; C; 16E78; # MEDEFAIDRIN CAPITAL LETTER X
+16E59; C; 16E79; # MEDEFAIDRIN CAPITAL LETTER D
+16E5A; C; 16E7A; # MEDEFAIDRIN CAPITAL LETTER OE
+16E5B; C; 16E7B; # MEDEFAIDRIN CAPITAL LETTER N
+16E5C; C; 16E7C; # MEDEFAIDRIN CAPITAL LETTER R
+16E5D; C; 16E7D; # MEDEFAIDRIN CAPITAL LETTER O
+16E5E; C; 16E7E; # MEDEFAIDRIN CAPITAL LETTER AI
+16E5F; C; 16E7F; # MEDEFAIDRIN CAPITAL LETTER Y
+1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF
+1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI
+1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM
+1E903; C; 1E925; # ADLAM CAPITAL LETTER MIIM
+1E904; C; 1E926; # ADLAM CAPITAL LETTER BA
+1E905; C; 1E927; # ADLAM CAPITAL LETTER SINNYIIYHE
+1E906; C; 1E928; # ADLAM CAPITAL LETTER PE
+1E907; C; 1E929; # ADLAM CAPITAL LETTER BHE
+1E908; C; 1E92A; # ADLAM CAPITAL LETTER RA
+1E909; C; 1E92B; # ADLAM CAPITAL LETTER E
+1E90A; C; 1E92C; # ADLAM CAPITAL LETTER FA
+1E90B; C; 1E92D; # ADLAM CAPITAL LETTER I
+1E90C; C; 1E92E; # ADLAM CAPITAL LETTER O
+1E90D; C; 1E92F; # ADLAM CAPITAL LETTER DHA
+1E90E; C; 1E930; # ADLAM CAPITAL LETTER YHE
+1E90F; C; 1E931; # ADLAM CAPITAL LETTER WAW
+1E910; C; 1E932; # ADLAM CAPITAL LETTER NUN
+1E911; C; 1E933; # ADLAM CAPITAL LETTER KAF
+1E912; C; 1E934; # ADLAM CAPITAL LETTER YA
+1E913; C; 1E935; # ADLAM CAPITAL LETTER U
+1E914; C; 1E936; # ADLAM CAPITAL LETTER JIIM
+1E915; C; 1E937; # ADLAM CAPITAL LETTER CHI
+1E916; C; 1E938; # ADLAM CAPITAL LETTER HA
+1E917; C; 1E939; # ADLAM CAPITAL LETTER QAAF
+1E918; C; 1E93A; # ADLAM CAPITAL LETTER GA
+1E919; C; 1E93B; # ADLAM CAPITAL LETTER NYA
+1E91A; C; 1E93C; # ADLAM CAPITAL LETTER TU
+1E91B; C; 1E93D; # ADLAM CAPITAL LETTER NHA
+1E91C; C; 1E93E; # ADLAM CAPITAL LETTER VA
+1E91D; C; 1E93F; # ADLAM CAPITAL LETTER KHA
+1E91E; C; 1E940; # ADLAM CAPITAL LETTER GBE
+1E91F; C; 1E941; # ADLAM CAPITAL LETTER ZAL
+1E920; C; 1E942; # ADLAM CAPITAL LETTER KPO
+1E921; C; 1E943; # ADLAM CAPITAL LETTER SHA
+#
+# EOF

+ 4100 - 0
markdown.mod/md4c/scripts/unicode/DerivedGeneralCategory.txt

@@ -0,0 +1,4100 @@
+# DerivedGeneralCategory-13.0.0.txt
+# Date: 2019-10-21, 14:30:32 GMT
+# © 2019 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+#   For documentation, see http://www.unicode.org/reports/tr44/
+
+# ================================================
+
+# Property:	General_Category
+
+# ================================================
+
+# General_Category=Unassigned
+
+0378..0379    ; Cn #   [2] <reserved-0378>..<reserved-0379>
+0380..0383    ; Cn #   [4] <reserved-0380>..<reserved-0383>
+038B          ; Cn #       <reserved-038B>
+038D          ; Cn #       <reserved-038D>
+03A2          ; Cn #       <reserved-03A2>
+0530          ; Cn #       <reserved-0530>
+0557..0558    ; Cn #   [2] <reserved-0557>..<reserved-0558>
+058B..058C    ; Cn #   [2] <reserved-058B>..<reserved-058C>
+0590          ; Cn #       <reserved-0590>
+05C8..05CF    ; Cn #   [8] <reserved-05C8>..<reserved-05CF>
+05EB..05EE    ; Cn #   [4] <reserved-05EB>..<reserved-05EE>
+05F5..05FF    ; Cn #  [11] <reserved-05F5>..<reserved-05FF>
+061D          ; Cn #       <reserved-061D>
+070E          ; Cn #       <reserved-070E>
+074B..074C    ; Cn #   [2] <reserved-074B>..<reserved-074C>
+07B2..07BF    ; Cn #  [14] <reserved-07B2>..<reserved-07BF>
+07FB..07FC    ; Cn #   [2] <reserved-07FB>..<reserved-07FC>
+082E..082F    ; Cn #   [2] <reserved-082E>..<reserved-082F>
+083F          ; Cn #       <reserved-083F>
+085C..085D    ; Cn #   [2] <reserved-085C>..<reserved-085D>
+085F          ; Cn #       <reserved-085F>
+086B..089F    ; Cn #  [53] <reserved-086B>..<reserved-089F>
+08B5          ; Cn #       <reserved-08B5>
+08C8..08D2    ; Cn #  [11] <reserved-08C8>..<reserved-08D2>
+0984          ; Cn #       <reserved-0984>
+098D..098E    ; Cn #   [2] <reserved-098D>..<reserved-098E>
+0991..0992    ; Cn #   [2] <reserved-0991>..<reserved-0992>
+09A9          ; Cn #       <reserved-09A9>
+09B1          ; Cn #       <reserved-09B1>
+09B3..09B5    ; Cn #   [3] <reserved-09B3>..<reserved-09B5>
+09BA..09BB    ; Cn #   [2] <reserved-09BA>..<reserved-09BB>
+09C5..09C6    ; Cn #   [2] <reserved-09C5>..<reserved-09C6>
+09C9..09CA    ; Cn #   [2] <reserved-09C9>..<reserved-09CA>
+09CF..09D6    ; Cn #   [8] <reserved-09CF>..<reserved-09D6>
+09D8..09DB    ; Cn #   [4] <reserved-09D8>..<reserved-09DB>
+09DE          ; Cn #       <reserved-09DE>
+09E4..09E5    ; Cn #   [2] <reserved-09E4>..<reserved-09E5>
+09FF..0A00    ; Cn #   [2] <reserved-09FF>..<reserved-0A00>
+0A04          ; Cn #       <reserved-0A04>
+0A0B..0A0E    ; Cn #   [4] <reserved-0A0B>..<reserved-0A0E>
+0A11..0A12    ; Cn #   [2] <reserved-0A11>..<reserved-0A12>
+0A29          ; Cn #       <reserved-0A29>
+0A31          ; Cn #       <reserved-0A31>
+0A34          ; Cn #       <reserved-0A34>
+0A37          ; Cn #       <reserved-0A37>
+0A3A..0A3B    ; Cn #   [2] <reserved-0A3A>..<reserved-0A3B>
+0A3D          ; Cn #       <reserved-0A3D>
+0A43..0A46    ; Cn #   [4] <reserved-0A43>..<reserved-0A46>
+0A49..0A4A    ; Cn #   [2] <reserved-0A49>..<reserved-0A4A>
+0A4E..0A50    ; Cn #   [3] <reserved-0A4E>..<reserved-0A50>
+0A52..0A58    ; Cn #   [7] <reserved-0A52>..<reserved-0A58>
+0A5D          ; Cn #       <reserved-0A5D>
+0A5F..0A65    ; Cn #   [7] <reserved-0A5F>..<reserved-0A65>
+0A77..0A80    ; Cn #  [10] <reserved-0A77>..<reserved-0A80>
+0A84          ; Cn #       <reserved-0A84>
+0A8E          ; Cn #       <reserved-0A8E>
+0A92          ; Cn #       <reserved-0A92>
+0AA9          ; Cn #       <reserved-0AA9>
+0AB1          ; Cn #       <reserved-0AB1>
+0AB4          ; Cn #       <reserved-0AB4>
+0ABA..0ABB    ; Cn #   [2] <reserved-0ABA>..<reserved-0ABB>
+0AC6          ; Cn #       <reserved-0AC6>
+0ACA          ; Cn #       <reserved-0ACA>
+0ACE..0ACF    ; Cn #   [2] <reserved-0ACE>..<reserved-0ACF>
+0AD1..0ADF    ; Cn #  [15] <reserved-0AD1>..<reserved-0ADF>
+0AE4..0AE5    ; Cn #   [2] <reserved-0AE4>..<reserved-0AE5>
+0AF2..0AF8    ; Cn #   [7] <reserved-0AF2>..<reserved-0AF8>
+0B00          ; Cn #       <reserved-0B00>
+0B04          ; Cn #       <reserved-0B04>
+0B0D..0B0E    ; Cn #   [2] <reserved-0B0D>..<reserved-0B0E>
+0B11..0B12    ; Cn #   [2] <reserved-0B11>..<reserved-0B12>
+0B29          ; Cn #       <reserved-0B29>
+0B31          ; Cn #       <reserved-0B31>
+0B34          ; Cn #       <reserved-0B34>
+0B3A..0B3B    ; Cn #   [2] <reserved-0B3A>..<reserved-0B3B>
+0B45..0B46    ; Cn #   [2] <reserved-0B45>..<reserved-0B46>
+0B49..0B4A    ; Cn #   [2] <reserved-0B49>..<reserved-0B4A>
+0B4E..0B54    ; Cn #   [7] <reserved-0B4E>..<reserved-0B54>
+0B58..0B5B    ; Cn #   [4] <reserved-0B58>..<reserved-0B5B>
+0B5E          ; Cn #       <reserved-0B5E>
+0B64..0B65    ; Cn #   [2] <reserved-0B64>..<reserved-0B65>
+0B78..0B81    ; Cn #  [10] <reserved-0B78>..<reserved-0B81>
+0B84          ; Cn #       <reserved-0B84>
+0B8B..0B8D    ; Cn #   [3] <reserved-0B8B>..<reserved-0B8D>
+0B91          ; Cn #       <reserved-0B91>
+0B96..0B98    ; Cn #   [3] <reserved-0B96>..<reserved-0B98>
+0B9B          ; Cn #       <reserved-0B9B>
+0B9D          ; Cn #       <reserved-0B9D>
+0BA0..0BA2    ; Cn #   [3] <reserved-0BA0>..<reserved-0BA2>
+0BA5..0BA7    ; Cn #   [3] <reserved-0BA5>..<reserved-0BA7>
+0BAB..0BAD    ; Cn #   [3] <reserved-0BAB>..<reserved-0BAD>
+0BBA..0BBD    ; Cn #   [4] <reserved-0BBA>..<reserved-0BBD>
+0BC3..0BC5    ; Cn #   [3] <reserved-0BC3>..<reserved-0BC5>
+0BC9          ; Cn #       <reserved-0BC9>
+0BCE..0BCF    ; Cn #   [2] <reserved-0BCE>..<reserved-0BCF>
+0BD1..0BD6    ; Cn #   [6] <reserved-0BD1>..<reserved-0BD6>
+0BD8..0BE5    ; Cn #  [14] <reserved-0BD8>..<reserved-0BE5>
+0BFB..0BFF    ; Cn #   [5] <reserved-0BFB>..<reserved-0BFF>
+0C0D          ; Cn #       <reserved-0C0D>
+0C11          ; Cn #       <reserved-0C11>
+0C29          ; Cn #       <reserved-0C29>
+0C3A..0C3C    ; Cn #   [3] <reserved-0C3A>..<reserved-0C3C>
+0C45          ; Cn #       <reserved-0C45>
+0C49          ; Cn #       <reserved-0C49>
+0C4E..0C54    ; Cn #   [7] <reserved-0C4E>..<reserved-0C54>
+0C57          ; Cn #       <reserved-0C57>
+0C5B..0C5F    ; Cn #   [5] <reserved-0C5B>..<reserved-0C5F>
+0C64..0C65    ; Cn #   [2] <reserved-0C64>..<reserved-0C65>
+0C70..0C76    ; Cn #   [7] <reserved-0C70>..<reserved-0C76>
+0C8D          ; Cn #       <reserved-0C8D>
+0C91          ; Cn #       <reserved-0C91>
+0CA9          ; Cn #       <reserved-0CA9>
+0CB4          ; Cn #       <reserved-0CB4>
+0CBA..0CBB    ; Cn #   [2] <reserved-0CBA>..<reserved-0CBB>
+0CC5          ; Cn #       <reserved-0CC5>
+0CC9          ; Cn #       <reserved-0CC9>
+0CCE..0CD4    ; Cn #   [7] <reserved-0CCE>..<reserved-0CD4>
+0CD7..0CDD    ; Cn #   [7] <reserved-0CD7>..<reserved-0CDD>
+0CDF          ; Cn #       <reserved-0CDF>
+0CE4..0CE5    ; Cn #   [2] <reserved-0CE4>..<reserved-0CE5>
+0CF0          ; Cn #       <reserved-0CF0>
+0CF3..0CFF    ; Cn #  [13] <reserved-0CF3>..<reserved-0CFF>
+0D0D          ; Cn #       <reserved-0D0D>
+0D11          ; Cn #       <reserved-0D11>
+0D45          ; Cn #       <reserved-0D45>
+0D49          ; Cn #       <reserved-0D49>
+0D50..0D53    ; Cn #   [4] <reserved-0D50>..<reserved-0D53>
+0D64..0D65    ; Cn #   [2] <reserved-0D64>..<reserved-0D65>
+0D80          ; Cn #       <reserved-0D80>
+0D84          ; Cn #       <reserved-0D84>
+0D97..0D99    ; Cn #   [3] <reserved-0D97>..<reserved-0D99>
+0DB2          ; Cn #       <reserved-0DB2>
+0DBC          ; Cn #       <reserved-0DBC>
+0DBE..0DBF    ; Cn #   [2] <reserved-0DBE>..<reserved-0DBF>
+0DC7..0DC9    ; Cn #   [3] <reserved-0DC7>..<reserved-0DC9>
+0DCB..0DCE    ; Cn #   [4] <reserved-0DCB>..<reserved-0DCE>
+0DD5          ; Cn #       <reserved-0DD5>
+0DD7          ; Cn #       <reserved-0DD7>
+0DE0..0DE5    ; Cn #   [6] <reserved-0DE0>..<reserved-0DE5>
+0DF0..0DF1    ; Cn #   [2] <reserved-0DF0>..<reserved-0DF1>
+0DF5..0E00    ; Cn #  [12] <reserved-0DF5>..<reserved-0E00>
+0E3B..0E3E    ; Cn #   [4] <reserved-0E3B>..<reserved-0E3E>
+0E5C..0E80    ; Cn #  [37] <reserved-0E5C>..<reserved-0E80>
+0E83          ; Cn #       <reserved-0E83>
+0E85          ; Cn #       <reserved-0E85>
+0E8B          ; Cn #       <reserved-0E8B>
+0EA4          ; Cn #       <reserved-0EA4>
+0EA6          ; Cn #       <reserved-0EA6>
+0EBE..0EBF    ; Cn #   [2] <reserved-0EBE>..<reserved-0EBF>
+0EC5          ; Cn #       <reserved-0EC5>
+0EC7          ; Cn #       <reserved-0EC7>
+0ECE..0ECF    ; Cn #   [2] <reserved-0ECE>..<reserved-0ECF>
+0EDA..0EDB    ; Cn #   [2] <reserved-0EDA>..<reserved-0EDB>
+0EE0..0EFF    ; Cn #  [32] <reserved-0EE0>..<reserved-0EFF>
+0F48          ; Cn #       <reserved-0F48>
+0F6D..0F70    ; Cn #   [4] <reserved-0F6D>..<reserved-0F70>
+0F98          ; Cn #       <reserved-0F98>
+0FBD          ; Cn #       <reserved-0FBD>
+0FCD          ; Cn #       <reserved-0FCD>
+0FDB..0FFF    ; Cn #  [37] <reserved-0FDB>..<reserved-0FFF>
+10C6          ; Cn #       <reserved-10C6>
+10C8..10CC    ; Cn #   [5] <reserved-10C8>..<reserved-10CC>
+10CE..10CF    ; Cn #   [2] <reserved-10CE>..<reserved-10CF>
+1249          ; Cn #       <reserved-1249>
+124E..124F    ; Cn #   [2] <reserved-124E>..<reserved-124F>
+1257          ; Cn #       <reserved-1257>
+1259          ; Cn #       <reserved-1259>
+125E..125F    ; Cn #   [2] <reserved-125E>..<reserved-125F>
+1289          ; Cn #       <reserved-1289>
+128E..128F    ; Cn #   [2] <reserved-128E>..<reserved-128F>
+12B1          ; Cn #       <reserved-12B1>
+12B6..12B7    ; Cn #   [2] <reserved-12B6>..<reserved-12B7>
+12BF          ; Cn #       <reserved-12BF>
+12C1          ; Cn #       <reserved-12C1>
+12C6..12C7    ; Cn #   [2] <reserved-12C6>..<reserved-12C7>
+12D7          ; Cn #       <reserved-12D7>
+1311          ; Cn #       <reserved-1311>
+1316..1317    ; Cn #   [2] <reserved-1316>..<reserved-1317>
+135B..135C    ; Cn #   [2] <reserved-135B>..<reserved-135C>
+137D..137F    ; Cn #   [3] <reserved-137D>..<reserved-137F>
+139A..139F    ; Cn #   [6] <reserved-139A>..<reserved-139F>
+13F6..13F7    ; Cn #   [2] <reserved-13F6>..<reserved-13F7>
+13FE..13FF    ; Cn #   [2] <reserved-13FE>..<reserved-13FF>
+169D..169F    ; Cn #   [3] <reserved-169D>..<reserved-169F>
+16F9..16FF    ; Cn #   [7] <reserved-16F9>..<reserved-16FF>
+170D          ; Cn #       <reserved-170D>
+1715..171F    ; Cn #  [11] <reserved-1715>..<reserved-171F>
+1737..173F    ; Cn #   [9] <reserved-1737>..<reserved-173F>
+1754..175F    ; Cn #  [12] <reserved-1754>..<reserved-175F>
+176D          ; Cn #       <reserved-176D>
+1771          ; Cn #       <reserved-1771>
+1774..177F    ; Cn #  [12] <reserved-1774>..<reserved-177F>
+17DE..17DF    ; Cn #   [2] <reserved-17DE>..<reserved-17DF>
+17EA..17EF    ; Cn #   [6] <reserved-17EA>..<reserved-17EF>
+17FA..17FF    ; Cn #   [6] <reserved-17FA>..<reserved-17FF>
+180F          ; Cn #       <reserved-180F>
+181A..181F    ; Cn #   [6] <reserved-181A>..<reserved-181F>
+1879..187F    ; Cn #   [7] <reserved-1879>..<reserved-187F>
+18AB..18AF    ; Cn #   [5] <reserved-18AB>..<reserved-18AF>
+18F6..18FF    ; Cn #  [10] <reserved-18F6>..<reserved-18FF>
+191F          ; Cn #       <reserved-191F>
+192C..192F    ; Cn #   [4] <reserved-192C>..<reserved-192F>
+193C..193F    ; Cn #   [4] <reserved-193C>..<reserved-193F>
+1941..1943    ; Cn #   [3] <reserved-1941>..<reserved-1943>
+196E..196F    ; Cn #   [2] <reserved-196E>..<reserved-196F>
+1975..197F    ; Cn #  [11] <reserved-1975>..<reserved-197F>
+19AC..19AF    ; Cn #   [4] <reserved-19AC>..<reserved-19AF>
+19CA..19CF    ; Cn #   [6] <reserved-19CA>..<reserved-19CF>
+19DB..19DD    ; Cn #   [3] <reserved-19DB>..<reserved-19DD>
+1A1C..1A1D    ; Cn #   [2] <reserved-1A1C>..<reserved-1A1D>
+1A5F          ; Cn #       <reserved-1A5F>
+1A7D..1A7E    ; Cn #   [2] <reserved-1A7D>..<reserved-1A7E>
+1A8A..1A8F    ; Cn #   [6] <reserved-1A8A>..<reserved-1A8F>
+1A9A..1A9F    ; Cn #   [6] <reserved-1A9A>..<reserved-1A9F>
+1AAE..1AAF    ; Cn #   [2] <reserved-1AAE>..<reserved-1AAF>
+1AC1..1AFF    ; Cn #  [63] <reserved-1AC1>..<reserved-1AFF>
+1B4C..1B4F    ; Cn #   [4] <reserved-1B4C>..<reserved-1B4F>
+1B7D..1B7F    ; Cn #   [3] <reserved-1B7D>..<reserved-1B7F>
+1BF4..1BFB    ; Cn #   [8] <reserved-1BF4>..<reserved-1BFB>
+1C38..1C3A    ; Cn #   [3] <reserved-1C38>..<reserved-1C3A>
+1C4A..1C4C    ; Cn #   [3] <reserved-1C4A>..<reserved-1C4C>
+1C89..1C8F    ; Cn #   [7] <reserved-1C89>..<reserved-1C8F>
+1CBB..1CBC    ; Cn #   [2] <reserved-1CBB>..<reserved-1CBC>
+1CC8..1CCF    ; Cn #   [8] <reserved-1CC8>..<reserved-1CCF>
+1CFB..1CFF    ; Cn #   [5] <reserved-1CFB>..<reserved-1CFF>
+1DFA          ; Cn #       <reserved-1DFA>
+1F16..1F17    ; Cn #   [2] <reserved-1F16>..<reserved-1F17>
+1F1E..1F1F    ; Cn #   [2] <reserved-1F1E>..<reserved-1F1F>
+1F46..1F47    ; Cn #   [2] <reserved-1F46>..<reserved-1F47>
+1F4E..1F4F    ; Cn #   [2] <reserved-1F4E>..<reserved-1F4F>
+1F58          ; Cn #       <reserved-1F58>
+1F5A          ; Cn #       <reserved-1F5A>
+1F5C          ; Cn #       <reserved-1F5C>
+1F5E          ; Cn #       <reserved-1F5E>
+1F7E..1F7F    ; Cn #   [2] <reserved-1F7E>..<reserved-1F7F>
+1FB5          ; Cn #       <reserved-1FB5>
+1FC5          ; Cn #       <reserved-1FC5>
+1FD4..1FD5    ; Cn #   [2] <reserved-1FD4>..<reserved-1FD5>
+1FDC          ; Cn #       <reserved-1FDC>
+1FF0..1FF1    ; Cn #   [2] <reserved-1FF0>..<reserved-1FF1>
+1FF5          ; Cn #       <reserved-1FF5>
+1FFF          ; Cn #       <reserved-1FFF>
+2065          ; Cn #       <reserved-2065>
+2072..2073    ; Cn #   [2] <reserved-2072>..<reserved-2073>
+208F          ; Cn #       <reserved-208F>
+209D..209F    ; Cn #   [3] <reserved-209D>..<reserved-209F>
+20C0..20CF    ; Cn #  [16] <reserved-20C0>..<reserved-20CF>
+20F1..20FF    ; Cn #  [15] <reserved-20F1>..<reserved-20FF>
+218C..218F    ; Cn #   [4] <reserved-218C>..<reserved-218F>
+2427..243F    ; Cn #  [25] <reserved-2427>..<reserved-243F>
+244B..245F    ; Cn #  [21] <reserved-244B>..<reserved-245F>
+2B74..2B75    ; Cn #   [2] <reserved-2B74>..<reserved-2B75>
+2B96          ; Cn #       <reserved-2B96>
+2C2F          ; Cn #       <reserved-2C2F>
+2C5F          ; Cn #       <reserved-2C5F>
+2CF4..2CF8    ; Cn #   [5] <reserved-2CF4>..<reserved-2CF8>
+2D26          ; Cn #       <reserved-2D26>
+2D28..2D2C    ; Cn #   [5] <reserved-2D28>..<reserved-2D2C>
+2D2E..2D2F    ; Cn #   [2] <reserved-2D2E>..<reserved-2D2F>
+2D68..2D6E    ; Cn #   [7] <reserved-2D68>..<reserved-2D6E>
+2D71..2D7E    ; Cn #  [14] <reserved-2D71>..<reserved-2D7E>
+2D97..2D9F    ; Cn #   [9] <reserved-2D97>..<reserved-2D9F>
+2DA7          ; Cn #       <reserved-2DA7>
+2DAF          ; Cn #       <reserved-2DAF>
+2DB7          ; Cn #       <reserved-2DB7>
+2DBF          ; Cn #       <reserved-2DBF>
+2DC7          ; Cn #       <reserved-2DC7>
+2DCF          ; Cn #       <reserved-2DCF>
+2DD7          ; Cn #       <reserved-2DD7>
+2DDF          ; Cn #       <reserved-2DDF>
+2E53..2E7F    ; Cn #  [45] <reserved-2E53>..<reserved-2E7F>
+2E9A          ; Cn #       <reserved-2E9A>
+2EF4..2EFF    ; Cn #  [12] <reserved-2EF4>..<reserved-2EFF>
+2FD6..2FEF    ; Cn #  [26] <reserved-2FD6>..<reserved-2FEF>
+2FFC..2FFF    ; Cn #   [4] <reserved-2FFC>..<reserved-2FFF>
+3040          ; Cn #       <reserved-3040>
+3097..3098    ; Cn #   [2] <reserved-3097>..<reserved-3098>
+3100..3104    ; Cn #   [5] <reserved-3100>..<reserved-3104>
+3130          ; Cn #       <reserved-3130>
+318F          ; Cn #       <reserved-318F>
+31E4..31EF    ; Cn #  [12] <reserved-31E4>..<reserved-31EF>
+321F          ; Cn #       <reserved-321F>
+9FFD..9FFF    ; Cn #   [3] <reserved-9FFD>..<reserved-9FFF>
+A48D..A48F    ; Cn #   [3] <reserved-A48D>..<reserved-A48F>
+A4C7..A4CF    ; Cn #   [9] <reserved-A4C7>..<reserved-A4CF>
+A62C..A63F    ; Cn #  [20] <reserved-A62C>..<reserved-A63F>
+A6F8..A6FF    ; Cn #   [8] <reserved-A6F8>..<reserved-A6FF>
+A7C0..A7C1    ; Cn #   [2] <reserved-A7C0>..<reserved-A7C1>
+A7CB..A7F4    ; Cn #  [42] <reserved-A7CB>..<reserved-A7F4>
+A82D..A82F    ; Cn #   [3] <reserved-A82D>..<reserved-A82F>
+A83A..A83F    ; Cn #   [6] <reserved-A83A>..<reserved-A83F>
+A878..A87F    ; Cn #   [8] <reserved-A878>..<reserved-A87F>
+A8C6..A8CD    ; Cn #   [8] <reserved-A8C6>..<reserved-A8CD>
+A8DA..A8DF    ; Cn #   [6] <reserved-A8DA>..<reserved-A8DF>
+A954..A95E    ; Cn #  [11] <reserved-A954>..<reserved-A95E>
+A97D..A97F    ; Cn #   [3] <reserved-A97D>..<reserved-A97F>
+A9CE          ; Cn #       <reserved-A9CE>
+A9DA..A9DD    ; Cn #   [4] <reserved-A9DA>..<reserved-A9DD>
+A9FF          ; Cn #       <reserved-A9FF>
+AA37..AA3F    ; Cn #   [9] <reserved-AA37>..<reserved-AA3F>
+AA4E..AA4F    ; Cn #   [2] <reserved-AA4E>..<reserved-AA4F>
+AA5A..AA5B    ; Cn #   [2] <reserved-AA5A>..<reserved-AA5B>
+AAC3..AADA    ; Cn #  [24] <reserved-AAC3>..<reserved-AADA>
+AAF7..AB00    ; Cn #  [10] <reserved-AAF7>..<reserved-AB00>
+AB07..AB08    ; Cn #   [2] <reserved-AB07>..<reserved-AB08>
+AB0F..AB10    ; Cn #   [2] <reserved-AB0F>..<reserved-AB10>
+AB17..AB1F    ; Cn #   [9] <reserved-AB17>..<reserved-AB1F>
+AB27          ; Cn #       <reserved-AB27>
+AB2F          ; Cn #       <reserved-AB2F>
+AB6C..AB6F    ; Cn #   [4] <reserved-AB6C>..<reserved-AB6F>
+ABEE..ABEF    ; Cn #   [2] <reserved-ABEE>..<reserved-ABEF>
+ABFA..ABFF    ; Cn #   [6] <reserved-ABFA>..<reserved-ABFF>
+D7A4..D7AF    ; Cn #  [12] <reserved-D7A4>..<reserved-D7AF>
+D7C7..D7CA    ; Cn #   [4] <reserved-D7C7>..<reserved-D7CA>
+D7FC..D7FF    ; Cn #   [4] <reserved-D7FC>..<reserved-D7FF>
+FA6E..FA6F    ; Cn #   [2] <reserved-FA6E>..<reserved-FA6F>
+FADA..FAFF    ; Cn #  [38] <reserved-FADA>..<reserved-FAFF>
+FB07..FB12    ; Cn #  [12] <reserved-FB07>..<reserved-FB12>
+FB18..FB1C    ; Cn #   [5] <reserved-FB18>..<reserved-FB1C>
+FB37          ; Cn #       <reserved-FB37>
+FB3D          ; Cn #       <reserved-FB3D>
+FB3F          ; Cn #       <reserved-FB3F>
+FB42          ; Cn #       <reserved-FB42>
+FB45          ; Cn #       <reserved-FB45>
+FBC2..FBD2    ; Cn #  [17] <reserved-FBC2>..<reserved-FBD2>
+FD40..FD4F    ; Cn #  [16] <reserved-FD40>..<reserved-FD4F>
+FD90..FD91    ; Cn #   [2] <reserved-FD90>..<reserved-FD91>
+FDC8..FDEF    ; Cn #  [40] <reserved-FDC8>..<noncharacter-FDEF>
+FDFE..FDFF    ; Cn #   [2] <reserved-FDFE>..<reserved-FDFF>
+FE1A..FE1F    ; Cn #   [6] <reserved-FE1A>..<reserved-FE1F>
+FE53          ; Cn #       <reserved-FE53>
+FE67          ; Cn #       <reserved-FE67>
+FE6C..FE6F    ; Cn #   [4] <reserved-FE6C>..<reserved-FE6F>
+FE75          ; Cn #       <reserved-FE75>
+FEFD..FEFE    ; Cn #   [2] <reserved-FEFD>..<reserved-FEFE>
+FF00          ; Cn #       <reserved-FF00>
+FFBF..FFC1    ; Cn #   [3] <reserved-FFBF>..<reserved-FFC1>
+FFC8..FFC9    ; Cn #   [2] <reserved-FFC8>..<reserved-FFC9>
+FFD0..FFD1    ; Cn #   [2] <reserved-FFD0>..<reserved-FFD1>
+FFD8..FFD9    ; Cn #   [2] <reserved-FFD8>..<reserved-FFD9>
+FFDD..FFDF    ; Cn #   [3] <reserved-FFDD>..<reserved-FFDF>
+FFE7          ; Cn #       <reserved-FFE7>
+FFEF..FFF8    ; Cn #  [10] <reserved-FFEF>..<reserved-FFF8>
+FFFE..FFFF    ; Cn #   [2] <noncharacter-FFFE>..<noncharacter-FFFF>
+1000C         ; Cn #       <reserved-1000C>
+10027         ; Cn #       <reserved-10027>
+1003B         ; Cn #       <reserved-1003B>
+1003E         ; Cn #       <reserved-1003E>
+1004E..1004F  ; Cn #   [2] <reserved-1004E>..<reserved-1004F>
+1005E..1007F  ; Cn #  [34] <reserved-1005E>..<reserved-1007F>
+100FB..100FF  ; Cn #   [5] <reserved-100FB>..<reserved-100FF>
+10103..10106  ; Cn #   [4] <reserved-10103>..<reserved-10106>
+10134..10136  ; Cn #   [3] <reserved-10134>..<reserved-10136>
+1018F         ; Cn #       <reserved-1018F>
+1019D..1019F  ; Cn #   [3] <reserved-1019D>..<reserved-1019F>
+101A1..101CF  ; Cn #  [47] <reserved-101A1>..<reserved-101CF>
+101FE..1027F  ; Cn # [130] <reserved-101FE>..<reserved-1027F>
+1029D..1029F  ; Cn #   [3] <reserved-1029D>..<reserved-1029F>
+102D1..102DF  ; Cn #  [15] <reserved-102D1>..<reserved-102DF>
+102FC..102FF  ; Cn #   [4] <reserved-102FC>..<reserved-102FF>
+10324..1032C  ; Cn #   [9] <reserved-10324>..<reserved-1032C>
+1034B..1034F  ; Cn #   [5] <reserved-1034B>..<reserved-1034F>
+1037B..1037F  ; Cn #   [5] <reserved-1037B>..<reserved-1037F>
+1039E         ; Cn #       <reserved-1039E>
+103C4..103C7  ; Cn #   [4] <reserved-103C4>..<reserved-103C7>
+103D6..103FF  ; Cn #  [42] <reserved-103D6>..<reserved-103FF>
+1049E..1049F  ; Cn #   [2] <reserved-1049E>..<reserved-1049F>
+104AA..104AF  ; Cn #   [6] <reserved-104AA>..<reserved-104AF>
+104D4..104D7  ; Cn #   [4] <reserved-104D4>..<reserved-104D7>
+104FC..104FF  ; Cn #   [4] <reserved-104FC>..<reserved-104FF>
+10528..1052F  ; Cn #   [8] <reserved-10528>..<reserved-1052F>
+10564..1056E  ; Cn #  [11] <reserved-10564>..<reserved-1056E>
+10570..105FF  ; Cn # [144] <reserved-10570>..<reserved-105FF>
+10737..1073F  ; Cn #   [9] <reserved-10737>..<reserved-1073F>
+10756..1075F  ; Cn #  [10] <reserved-10756>..<reserved-1075F>
+10768..107FF  ; Cn # [152] <reserved-10768>..<reserved-107FF>
+10806..10807  ; Cn #   [2] <reserved-10806>..<reserved-10807>
+10809         ; Cn #       <reserved-10809>
+10836         ; Cn #       <reserved-10836>
+10839..1083B  ; Cn #   [3] <reserved-10839>..<reserved-1083B>
+1083D..1083E  ; Cn #   [2] <reserved-1083D>..<reserved-1083E>
+10856         ; Cn #       <reserved-10856>
+1089F..108A6  ; Cn #   [8] <reserved-1089F>..<reserved-108A6>
+108B0..108DF  ; Cn #  [48] <reserved-108B0>..<reserved-108DF>
+108F3         ; Cn #       <reserved-108F3>
+108F6..108FA  ; Cn #   [5] <reserved-108F6>..<reserved-108FA>
+1091C..1091E  ; Cn #   [3] <reserved-1091C>..<reserved-1091E>
+1093A..1093E  ; Cn #   [5] <reserved-1093A>..<reserved-1093E>
+10940..1097F  ; Cn #  [64] <reserved-10940>..<reserved-1097F>
+109B8..109BB  ; Cn #   [4] <reserved-109B8>..<reserved-109BB>
+109D0..109D1  ; Cn #   [2] <reserved-109D0>..<reserved-109D1>
+10A04         ; Cn #       <reserved-10A04>
+10A07..10A0B  ; Cn #   [5] <reserved-10A07>..<reserved-10A0B>
+10A14         ; Cn #       <reserved-10A14>
+10A18         ; Cn #       <reserved-10A18>
+10A36..10A37  ; Cn #   [2] <reserved-10A36>..<reserved-10A37>
+10A3B..10A3E  ; Cn #   [4] <reserved-10A3B>..<reserved-10A3E>
+10A49..10A4F  ; Cn #   [7] <reserved-10A49>..<reserved-10A4F>
+10A59..10A5F  ; Cn #   [7] <reserved-10A59>..<reserved-10A5F>
+10AA0..10ABF  ; Cn #  [32] <reserved-10AA0>..<reserved-10ABF>
+10AE7..10AEA  ; Cn #   [4] <reserved-10AE7>..<reserved-10AEA>
+10AF7..10AFF  ; Cn #   [9] <reserved-10AF7>..<reserved-10AFF>
+10B36..10B38  ; Cn #   [3] <reserved-10B36>..<reserved-10B38>
+10B56..10B57  ; Cn #   [2] <reserved-10B56>..<reserved-10B57>
+10B73..10B77  ; Cn #   [5] <reserved-10B73>..<reserved-10B77>
+10B92..10B98  ; Cn #   [7] <reserved-10B92>..<reserved-10B98>
+10B9D..10BA8  ; Cn #  [12] <reserved-10B9D>..<reserved-10BA8>
+10BB0..10BFF  ; Cn #  [80] <reserved-10BB0>..<reserved-10BFF>
+10C49..10C7F  ; Cn #  [55] <reserved-10C49>..<reserved-10C7F>
+10CB3..10CBF  ; Cn #  [13] <reserved-10CB3>..<reserved-10CBF>
+10CF3..10CF9  ; Cn #   [7] <reserved-10CF3>..<reserved-10CF9>
+10D28..10D2F  ; Cn #   [8] <reserved-10D28>..<reserved-10D2F>
+10D3A..10E5F  ; Cn # [294] <reserved-10D3A>..<reserved-10E5F>
+10E7F         ; Cn #       <reserved-10E7F>
+10EAA         ; Cn #       <reserved-10EAA>
+10EAE..10EAF  ; Cn #   [2] <reserved-10EAE>..<reserved-10EAF>
+10EB2..10EFF  ; Cn #  [78] <reserved-10EB2>..<reserved-10EFF>
+10F28..10F2F  ; Cn #   [8] <reserved-10F28>..<reserved-10F2F>
+10F5A..10FAF  ; Cn #  [86] <reserved-10F5A>..<reserved-10FAF>
+10FCC..10FDF  ; Cn #  [20] <reserved-10FCC>..<reserved-10FDF>
+10FF7..10FFF  ; Cn #   [9] <reserved-10FF7>..<reserved-10FFF>
+1104E..11051  ; Cn #   [4] <reserved-1104E>..<reserved-11051>
+11070..1107E  ; Cn #  [15] <reserved-11070>..<reserved-1107E>
+110C2..110CC  ; Cn #  [11] <reserved-110C2>..<reserved-110CC>
+110CE..110CF  ; Cn #   [2] <reserved-110CE>..<reserved-110CF>
+110E9..110EF  ; Cn #   [7] <reserved-110E9>..<reserved-110EF>
+110FA..110FF  ; Cn #   [6] <reserved-110FA>..<reserved-110FF>
+11135         ; Cn #       <reserved-11135>
+11148..1114F  ; Cn #   [8] <reserved-11148>..<reserved-1114F>
+11177..1117F  ; Cn #   [9] <reserved-11177>..<reserved-1117F>
+111E0         ; Cn #       <reserved-111E0>
+111F5..111FF  ; Cn #  [11] <reserved-111F5>..<reserved-111FF>
+11212         ; Cn #       <reserved-11212>
+1123F..1127F  ; Cn #  [65] <reserved-1123F>..<reserved-1127F>
+11287         ; Cn #       <reserved-11287>
+11289         ; Cn #       <reserved-11289>
+1128E         ; Cn #       <reserved-1128E>
+1129E         ; Cn #       <reserved-1129E>
+112AA..112AF  ; Cn #   [6] <reserved-112AA>..<reserved-112AF>
+112EB..112EF  ; Cn #   [5] <reserved-112EB>..<reserved-112EF>
+112FA..112FF  ; Cn #   [6] <reserved-112FA>..<reserved-112FF>
+11304         ; Cn #       <reserved-11304>
+1130D..1130E  ; Cn #   [2] <reserved-1130D>..<reserved-1130E>
+11311..11312  ; Cn #   [2] <reserved-11311>..<reserved-11312>
+11329         ; Cn #       <reserved-11329>
+11331         ; Cn #       <reserved-11331>
+11334         ; Cn #       <reserved-11334>
+1133A         ; Cn #       <reserved-1133A>
+11345..11346  ; Cn #   [2] <reserved-11345>..<reserved-11346>
+11349..1134A  ; Cn #   [2] <reserved-11349>..<reserved-1134A>
+1134E..1134F  ; Cn #   [2] <reserved-1134E>..<reserved-1134F>
+11351..11356  ; Cn #   [6] <reserved-11351>..<reserved-11356>
+11358..1135C  ; Cn #   [5] <reserved-11358>..<reserved-1135C>
+11364..11365  ; Cn #   [2] <reserved-11364>..<reserved-11365>
+1136D..1136F  ; Cn #   [3] <reserved-1136D>..<reserved-1136F>
+11375..113FF  ; Cn # [139] <reserved-11375>..<reserved-113FF>
+1145C         ; Cn #       <reserved-1145C>
+11462..1147F  ; Cn #  [30] <reserved-11462>..<reserved-1147F>
+114C8..114CF  ; Cn #   [8] <reserved-114C8>..<reserved-114CF>
+114DA..1157F  ; Cn # [166] <reserved-114DA>..<reserved-1157F>
+115B6..115B7  ; Cn #   [2] <reserved-115B6>..<reserved-115B7>
+115DE..115FF  ; Cn #  [34] <reserved-115DE>..<reserved-115FF>
+11645..1164F  ; Cn #  [11] <reserved-11645>..<reserved-1164F>
+1165A..1165F  ; Cn #   [6] <reserved-1165A>..<reserved-1165F>
+1166D..1167F  ; Cn #  [19] <reserved-1166D>..<reserved-1167F>
+116B9..116BF  ; Cn #   [7] <reserved-116B9>..<reserved-116BF>
+116CA..116FF  ; Cn #  [54] <reserved-116CA>..<reserved-116FF>
+1171B..1171C  ; Cn #   [2] <reserved-1171B>..<reserved-1171C>
+1172C..1172F  ; Cn #   [4] <reserved-1172C>..<reserved-1172F>
+11740..117FF  ; Cn # [192] <reserved-11740>..<reserved-117FF>
+1183C..1189F  ; Cn # [100] <reserved-1183C>..<reserved-1189F>
+118F3..118FE  ; Cn #  [12] <reserved-118F3>..<reserved-118FE>
+11907..11908  ; Cn #   [2] <reserved-11907>..<reserved-11908>
+1190A..1190B  ; Cn #   [2] <reserved-1190A>..<reserved-1190B>
+11914         ; Cn #       <reserved-11914>
+11917         ; Cn #       <reserved-11917>
+11936         ; Cn #       <reserved-11936>
+11939..1193A  ; Cn #   [2] <reserved-11939>..<reserved-1193A>
+11947..1194F  ; Cn #   [9] <reserved-11947>..<reserved-1194F>
+1195A..1199F  ; Cn #  [70] <reserved-1195A>..<reserved-1199F>
+119A8..119A9  ; Cn #   [2] <reserved-119A8>..<reserved-119A9>
+119D8..119D9  ; Cn #   [2] <reserved-119D8>..<reserved-119D9>
+119E5..119FF  ; Cn #  [27] <reserved-119E5>..<reserved-119FF>
+11A48..11A4F  ; Cn #   [8] <reserved-11A48>..<reserved-11A4F>
+11AA3..11ABF  ; Cn #  [29] <reserved-11AA3>..<reserved-11ABF>
+11AF9..11BFF  ; Cn # [263] <reserved-11AF9>..<reserved-11BFF>
+11C09         ; Cn #       <reserved-11C09>
+11C37         ; Cn #       <reserved-11C37>
+11C46..11C4F  ; Cn #  [10] <reserved-11C46>..<reserved-11C4F>
+11C6D..11C6F  ; Cn #   [3] <reserved-11C6D>..<reserved-11C6F>
+11C90..11C91  ; Cn #   [2] <reserved-11C90>..<reserved-11C91>
+11CA8         ; Cn #       <reserved-11CA8>
+11CB7..11CFF  ; Cn #  [73] <reserved-11CB7>..<reserved-11CFF>
+11D07         ; Cn #       <reserved-11D07>
+11D0A         ; Cn #       <reserved-11D0A>
+11D37..11D39  ; Cn #   [3] <reserved-11D37>..<reserved-11D39>
+11D3B         ; Cn #       <reserved-11D3B>
+11D3E         ; Cn #       <reserved-11D3E>
+11D48..11D4F  ; Cn #   [8] <reserved-11D48>..<reserved-11D4F>
+11D5A..11D5F  ; Cn #   [6] <reserved-11D5A>..<reserved-11D5F>
+11D66         ; Cn #       <reserved-11D66>
+11D69         ; Cn #       <reserved-11D69>
+11D8F         ; Cn #       <reserved-11D8F>
+11D92         ; Cn #       <reserved-11D92>
+11D99..11D9F  ; Cn #   [7] <reserved-11D99>..<reserved-11D9F>
+11DAA..11EDF  ; Cn # [310] <reserved-11DAA>..<reserved-11EDF>
+11EF9..11FAF  ; Cn # [183] <reserved-11EF9>..<reserved-11FAF>
+11FB1..11FBF  ; Cn #  [15] <reserved-11FB1>..<reserved-11FBF>
+11FF2..11FFE  ; Cn #  [13] <reserved-11FF2>..<reserved-11FFE>
+1239A..123FF  ; Cn # [102] <reserved-1239A>..<reserved-123FF>
+1246F         ; Cn #       <reserved-1246F>
+12475..1247F  ; Cn #  [11] <reserved-12475>..<reserved-1247F>
+12544..12FFF  ; Cn # [2748] <reserved-12544>..<reserved-12FFF>
+1342F         ; Cn #       <reserved-1342F>
+13439..143FF  ; Cn # [4039] <reserved-13439>..<reserved-143FF>
+14647..167FF  ; Cn # [8633] <reserved-14647>..<reserved-167FF>
+16A39..16A3F  ; Cn #   [7] <reserved-16A39>..<reserved-16A3F>
+16A5F         ; Cn #       <reserved-16A5F>
+16A6A..16A6D  ; Cn #   [4] <reserved-16A6A>..<reserved-16A6D>
+16A70..16ACF  ; Cn #  [96] <reserved-16A70>..<reserved-16ACF>
+16AEE..16AEF  ; Cn #   [2] <reserved-16AEE>..<reserved-16AEF>
+16AF6..16AFF  ; Cn #  [10] <reserved-16AF6>..<reserved-16AFF>
+16B46..16B4F  ; Cn #  [10] <reserved-16B46>..<reserved-16B4F>
+16B5A         ; Cn #       <reserved-16B5A>
+16B62         ; Cn #       <reserved-16B62>
+16B78..16B7C  ; Cn #   [5] <reserved-16B78>..<reserved-16B7C>
+16B90..16E3F  ; Cn # [688] <reserved-16B90>..<reserved-16E3F>
+16E9B..16EFF  ; Cn # [101] <reserved-16E9B>..<reserved-16EFF>
+16F4B..16F4E  ; Cn #   [4] <reserved-16F4B>..<reserved-16F4E>
+16F88..16F8E  ; Cn #   [7] <reserved-16F88>..<reserved-16F8E>
+16FA0..16FDF  ; Cn #  [64] <reserved-16FA0>..<reserved-16FDF>
+16FE5..16FEF  ; Cn #  [11] <reserved-16FE5>..<reserved-16FEF>
+16FF2..16FFF  ; Cn #  [14] <reserved-16FF2>..<reserved-16FFF>
+187F8..187FF  ; Cn #   [8] <reserved-187F8>..<reserved-187FF>
+18CD6..18CFF  ; Cn #  [42] <reserved-18CD6>..<reserved-18CFF>
+18D09..1AFFF  ; Cn # [8951] <reserved-18D09>..<reserved-1AFFF>
+1B11F..1B14F  ; Cn #  [49] <reserved-1B11F>..<reserved-1B14F>
+1B153..1B163  ; Cn #  [17] <reserved-1B153>..<reserved-1B163>
+1B168..1B16F  ; Cn #   [8] <reserved-1B168>..<reserved-1B16F>
+1B2FC..1BBFF  ; Cn # [2308] <reserved-1B2FC>..<reserved-1BBFF>
+1BC6B..1BC6F  ; Cn #   [5] <reserved-1BC6B>..<reserved-1BC6F>
+1BC7D..1BC7F  ; Cn #   [3] <reserved-1BC7D>..<reserved-1BC7F>
+1BC89..1BC8F  ; Cn #   [7] <reserved-1BC89>..<reserved-1BC8F>
+1BC9A..1BC9B  ; Cn #   [2] <reserved-1BC9A>..<reserved-1BC9B>
+1BCA4..1CFFF  ; Cn # [4956] <reserved-1BCA4>..<reserved-1CFFF>
+1D0F6..1D0FF  ; Cn #  [10] <reserved-1D0F6>..<reserved-1D0FF>
+1D127..1D128  ; Cn #   [2] <reserved-1D127>..<reserved-1D128>
+1D1E9..1D1FF  ; Cn #  [23] <reserved-1D1E9>..<reserved-1D1FF>
+1D246..1D2DF  ; Cn # [154] <reserved-1D246>..<reserved-1D2DF>
+1D2F4..1D2FF  ; Cn #  [12] <reserved-1D2F4>..<reserved-1D2FF>
+1D357..1D35F  ; Cn #   [9] <reserved-1D357>..<reserved-1D35F>
+1D379..1D3FF  ; Cn # [135] <reserved-1D379>..<reserved-1D3FF>
+1D455         ; Cn #       <reserved-1D455>
+1D49D         ; Cn #       <reserved-1D49D>
+1D4A0..1D4A1  ; Cn #   [2] <reserved-1D4A0>..<reserved-1D4A1>
+1D4A3..1D4A4  ; Cn #   [2] <reserved-1D4A3>..<reserved-1D4A4>
+1D4A7..1D4A8  ; Cn #   [2] <reserved-1D4A7>..<reserved-1D4A8>
+1D4AD         ; Cn #       <reserved-1D4AD>
+1D4BA         ; Cn #       <reserved-1D4BA>
+1D4BC         ; Cn #       <reserved-1D4BC>
+1D4C4         ; Cn #       <reserved-1D4C4>
+1D506         ; Cn #       <reserved-1D506>
+1D50B..1D50C  ; Cn #   [2] <reserved-1D50B>..<reserved-1D50C>
+1D515         ; Cn #       <reserved-1D515>
+1D51D         ; Cn #       <reserved-1D51D>
+1D53A         ; Cn #       <reserved-1D53A>
+1D53F         ; Cn #       <reserved-1D53F>
+1D545         ; Cn #       <reserved-1D545>
+1D547..1D549  ; Cn #   [3] <reserved-1D547>..<reserved-1D549>
+1D551         ; Cn #       <reserved-1D551>
+1D6A6..1D6A7  ; Cn #   [2] <reserved-1D6A6>..<reserved-1D6A7>
+1D7CC..1D7CD  ; Cn #   [2] <reserved-1D7CC>..<reserved-1D7CD>
+1DA8C..1DA9A  ; Cn #  [15] <reserved-1DA8C>..<reserved-1DA9A>
+1DAA0         ; Cn #       <reserved-1DAA0>
+1DAB0..1DFFF  ; Cn # [1360] <reserved-1DAB0>..<reserved-1DFFF>
+1E007         ; Cn #       <reserved-1E007>
+1E019..1E01A  ; Cn #   [2] <reserved-1E019>..<reserved-1E01A>
+1E022         ; Cn #       <reserved-1E022>
+1E025         ; Cn #       <reserved-1E025>
+1E02B..1E0FF  ; Cn # [213] <reserved-1E02B>..<reserved-1E0FF>
+1E12D..1E12F  ; Cn #   [3] <reserved-1E12D>..<reserved-1E12F>
+1E13E..1E13F  ; Cn #   [2] <reserved-1E13E>..<reserved-1E13F>
+1E14A..1E14D  ; Cn #   [4] <reserved-1E14A>..<reserved-1E14D>
+1E150..1E2BF  ; Cn # [368] <reserved-1E150>..<reserved-1E2BF>
+1E2FA..1E2FE  ; Cn #   [5] <reserved-1E2FA>..<reserved-1E2FE>
+1E300..1E7FF  ; Cn # [1280] <reserved-1E300>..<reserved-1E7FF>
+1E8C5..1E8C6  ; Cn #   [2] <reserved-1E8C5>..<reserved-1E8C6>
+1E8D7..1E8FF  ; Cn #  [41] <reserved-1E8D7>..<reserved-1E8FF>
+1E94C..1E94F  ; Cn #   [4] <reserved-1E94C>..<reserved-1E94F>
+1E95A..1E95D  ; Cn #   [4] <reserved-1E95A>..<reserved-1E95D>
+1E960..1EC70  ; Cn # [785] <reserved-1E960>..<reserved-1EC70>
+1ECB5..1ED00  ; Cn #  [76] <reserved-1ECB5>..<reserved-1ED00>
+1ED3E..1EDFF  ; Cn # [194] <reserved-1ED3E>..<reserved-1EDFF>
+1EE04         ; Cn #       <reserved-1EE04>
+1EE20         ; Cn #       <reserved-1EE20>
+1EE23         ; Cn #       <reserved-1EE23>
+1EE25..1EE26  ; Cn #   [2] <reserved-1EE25>..<reserved-1EE26>
+1EE28         ; Cn #       <reserved-1EE28>
+1EE33         ; Cn #       <reserved-1EE33>
+1EE38         ; Cn #       <reserved-1EE38>
+1EE3A         ; Cn #       <reserved-1EE3A>
+1EE3C..1EE41  ; Cn #   [6] <reserved-1EE3C>..<reserved-1EE41>
+1EE43..1EE46  ; Cn #   [4] <reserved-1EE43>..<reserved-1EE46>
+1EE48         ; Cn #       <reserved-1EE48>
+1EE4A         ; Cn #       <reserved-1EE4A>
+1EE4C         ; Cn #       <reserved-1EE4C>
+1EE50         ; Cn #       <reserved-1EE50>
+1EE53         ; Cn #       <reserved-1EE53>
+1EE55..1EE56  ; Cn #   [2] <reserved-1EE55>..<reserved-1EE56>
+1EE58         ; Cn #       <reserved-1EE58>
+1EE5A         ; Cn #       <reserved-1EE5A>
+1EE5C         ; Cn #       <reserved-1EE5C>
+1EE5E         ; Cn #       <reserved-1EE5E>
+1EE60         ; Cn #       <reserved-1EE60>
+1EE63         ; Cn #       <reserved-1EE63>
+1EE65..1EE66  ; Cn #   [2] <reserved-1EE65>..<reserved-1EE66>
+1EE6B         ; Cn #       <reserved-1EE6B>
+1EE73         ; Cn #       <reserved-1EE73>
+1EE78         ; Cn #       <reserved-1EE78>
+1EE7D         ; Cn #       <reserved-1EE7D>
+1EE7F         ; Cn #       <reserved-1EE7F>
+1EE8A         ; Cn #       <reserved-1EE8A>
+1EE9C..1EEA0  ; Cn #   [5] <reserved-1EE9C>..<reserved-1EEA0>
+1EEA4         ; Cn #       <reserved-1EEA4>
+1EEAA         ; Cn #       <reserved-1EEAA>
+1EEBC..1EEEF  ; Cn #  [52] <reserved-1EEBC>..<reserved-1EEEF>
+1EEF2..1EFFF  ; Cn # [270] <reserved-1EEF2>..<reserved-1EFFF>
+1F02C..1F02F  ; Cn #   [4] <reserved-1F02C>..<reserved-1F02F>
+1F094..1F09F  ; Cn #  [12] <reserved-1F094>..<reserved-1F09F>
+1F0AF..1F0B0  ; Cn #   [2] <reserved-1F0AF>..<reserved-1F0B0>
+1F0C0         ; Cn #       <reserved-1F0C0>
+1F0D0         ; Cn #       <reserved-1F0D0>
+1F0F6..1F0FF  ; Cn #  [10] <reserved-1F0F6>..<reserved-1F0FF>
+1F1AE..1F1E5  ; Cn #  [56] <reserved-1F1AE>..<reserved-1F1E5>
+1F203..1F20F  ; Cn #  [13] <reserved-1F203>..<reserved-1F20F>
+1F23C..1F23F  ; Cn #   [4] <reserved-1F23C>..<reserved-1F23F>
+1F249..1F24F  ; Cn #   [7] <reserved-1F249>..<reserved-1F24F>
+1F252..1F25F  ; Cn #  [14] <reserved-1F252>..<reserved-1F25F>
+1F266..1F2FF  ; Cn # [154] <reserved-1F266>..<reserved-1F2FF>
+1F6D8..1F6DF  ; Cn #   [8] <reserved-1F6D8>..<reserved-1F6DF>
+1F6ED..1F6EF  ; Cn #   [3] <reserved-1F6ED>..<reserved-1F6EF>
+1F6FD..1F6FF  ; Cn #   [3] <reserved-1F6FD>..<reserved-1F6FF>
+1F774..1F77F  ; Cn #  [12] <reserved-1F774>..<reserved-1F77F>
+1F7D9..1F7DF  ; Cn #   [7] <reserved-1F7D9>..<reserved-1F7DF>
+1F7EC..1F7FF  ; Cn #  [20] <reserved-1F7EC>..<reserved-1F7FF>
+1F80C..1F80F  ; Cn #   [4] <reserved-1F80C>..<reserved-1F80F>
+1F848..1F84F  ; Cn #   [8] <reserved-1F848>..<reserved-1F84F>
+1F85A..1F85F  ; Cn #   [6] <reserved-1F85A>..<reserved-1F85F>
+1F888..1F88F  ; Cn #   [8] <reserved-1F888>..<reserved-1F88F>
+1F8AE..1F8AF  ; Cn #   [2] <reserved-1F8AE>..<reserved-1F8AF>
+1F8B2..1F8FF  ; Cn #  [78] <reserved-1F8B2>..<reserved-1F8FF>
+1F979         ; Cn #       <reserved-1F979>
+1F9CC         ; Cn #       <reserved-1F9CC>
+1FA54..1FA5F  ; Cn #  [12] <reserved-1FA54>..<reserved-1FA5F>
+1FA6E..1FA6F  ; Cn #   [2] <reserved-1FA6E>..<reserved-1FA6F>
+1FA75..1FA77  ; Cn #   [3] <reserved-1FA75>..<reserved-1FA77>
+1FA7B..1FA7F  ; Cn #   [5] <reserved-1FA7B>..<reserved-1FA7F>
+1FA87..1FA8F  ; Cn #   [9] <reserved-1FA87>..<reserved-1FA8F>
+1FAA9..1FAAF  ; Cn #   [7] <reserved-1FAA9>..<reserved-1FAAF>
+1FAB7..1FABF  ; Cn #   [9] <reserved-1FAB7>..<reserved-1FABF>
+1FAC3..1FACF  ; Cn #  [13] <reserved-1FAC3>..<reserved-1FACF>
+1FAD7..1FAFF  ; Cn #  [41] <reserved-1FAD7>..<reserved-1FAFF>
+1FB93         ; Cn #       <reserved-1FB93>
+1FBCB..1FBEF  ; Cn #  [37] <reserved-1FBCB>..<reserved-1FBEF>
+1FBFA..1FFFF  ; Cn # [1030] <reserved-1FBFA>..<noncharacter-1FFFF>
+2A6DE..2A6FF  ; Cn #  [34] <reserved-2A6DE>..<reserved-2A6FF>
+2B735..2B73F  ; Cn #  [11] <reserved-2B735>..<reserved-2B73F>
+2B81E..2B81F  ; Cn #   [2] <reserved-2B81E>..<reserved-2B81F>
+2CEA2..2CEAF  ; Cn #  [14] <reserved-2CEA2>..<reserved-2CEAF>
+2EBE1..2F7FF  ; Cn # [3103] <reserved-2EBE1>..<reserved-2F7FF>
+2FA1E..2FFFF  ; Cn # [1506] <reserved-2FA1E>..<noncharacter-2FFFF>
+3134B..E0000  ; Cn # [715958] <reserved-3134B>..<reserved-E0000>
+E0002..E001F  ; Cn #  [30] <reserved-E0002>..<reserved-E001F>
+E0080..E00FF  ; Cn # [128] <reserved-E0080>..<reserved-E00FF>
+E01F0..EFFFF  ; Cn # [65040] <reserved-E01F0>..<noncharacter-EFFFF>
+FFFFE..FFFFF  ; Cn #   [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
+10FFFE..10FFFF; Cn #   [2] <noncharacter-10FFFE>..<noncharacter-10FFFF>
+
+# Total code points: 830672
+
+# ================================================
+
+# General_Category=Uppercase_Letter
+
+0041..005A    ; Lu #  [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
+00C0..00D6    ; Lu #  [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS
+00D8..00DE    ; Lu #   [7] LATIN CAPITAL LETTER O WITH STROKE..LATIN CAPITAL LETTER THORN
+0100          ; Lu #       LATIN CAPITAL LETTER A WITH MACRON
+0102          ; Lu #       LATIN CAPITAL LETTER A WITH BREVE
+0104          ; Lu #       LATIN CAPITAL LETTER A WITH OGONEK
+0106          ; Lu #       LATIN CAPITAL LETTER C WITH ACUTE
+0108          ; Lu #       LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+010A          ; Lu #       LATIN CAPITAL LETTER C WITH DOT ABOVE
+010C          ; Lu #       LATIN CAPITAL LETTER C WITH CARON
+010E          ; Lu #       LATIN CAPITAL LETTER D WITH CARON
+0110          ; Lu #       LATIN CAPITAL LETTER D WITH STROKE
+0112          ; Lu #       LATIN CAPITAL LETTER E WITH MACRON
+0114          ; Lu #       LATIN CAPITAL LETTER E WITH BREVE
+0116          ; Lu #       LATIN CAPITAL LETTER E WITH DOT ABOVE
+0118          ; Lu #       LATIN CAPITAL LETTER E WITH OGONEK
+011A          ; Lu #       LATIN CAPITAL LETTER E WITH CARON
+011C          ; Lu #       LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+011E          ; Lu #       LATIN CAPITAL LETTER G WITH BREVE
+0120          ; Lu #       LATIN CAPITAL LETTER G WITH DOT ABOVE
+0122          ; Lu #       LATIN CAPITAL LETTER G WITH CEDILLA
+0124          ; Lu #       LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+0126          ; Lu #       LATIN CAPITAL LETTER H WITH STROKE
+0128          ; Lu #       LATIN CAPITAL LETTER I WITH TILDE
+012A          ; Lu #       LATIN CAPITAL LETTER I WITH MACRON
+012C          ; Lu #       LATIN CAPITAL LETTER I WITH BREVE
+012E          ; Lu #       LATIN CAPITAL LETTER I WITH OGONEK
+0130          ; Lu #       LATIN CAPITAL LETTER I WITH DOT ABOVE
+0132          ; Lu #       LATIN CAPITAL LIGATURE IJ
+0134          ; Lu #       LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+0136          ; Lu #       LATIN CAPITAL LETTER K WITH CEDILLA
+0139          ; Lu #       LATIN CAPITAL LETTER L WITH ACUTE
+013B          ; Lu #       LATIN CAPITAL LETTER L WITH CEDILLA
+013D          ; Lu #       LATIN CAPITAL LETTER L WITH CARON
+013F          ; Lu #       LATIN CAPITAL LETTER L WITH MIDDLE DOT
+0141          ; Lu #       LATIN CAPITAL LETTER L WITH STROKE
+0143          ; Lu #       LATIN CAPITAL LETTER N WITH ACUTE
+0145          ; Lu #       LATIN CAPITAL LETTER N WITH CEDILLA
+0147          ; Lu #       LATIN CAPITAL LETTER N WITH CARON
+014A          ; Lu #       LATIN CAPITAL LETTER ENG
+014C          ; Lu #       LATIN CAPITAL LETTER O WITH MACRON
+014E          ; Lu #       LATIN CAPITAL LETTER O WITH BREVE
+0150          ; Lu #       LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+0152          ; Lu #       LATIN CAPITAL LIGATURE OE
+0154          ; Lu #       LATIN CAPITAL LETTER R WITH ACUTE
+0156          ; Lu #       LATIN CAPITAL LETTER R WITH CEDILLA
+0158          ; Lu #       LATIN CAPITAL LETTER R WITH CARON
+015A          ; Lu #       LATIN CAPITAL LETTER S WITH ACUTE
+015C          ; Lu #       LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+015E          ; Lu #       LATIN CAPITAL LETTER S WITH CEDILLA
+0160          ; Lu #       LATIN CAPITAL LETTER S WITH CARON
+0162          ; Lu #       LATIN CAPITAL LETTER T WITH CEDILLA
+0164          ; Lu #       LATIN CAPITAL LETTER T WITH CARON
+0166          ; Lu #       LATIN CAPITAL LETTER T WITH STROKE
+0168          ; Lu #       LATIN CAPITAL LETTER U WITH TILDE
+016A          ; Lu #       LATIN CAPITAL LETTER U WITH MACRON
+016C          ; Lu #       LATIN CAPITAL LETTER U WITH BREVE
+016E          ; Lu #       LATIN CAPITAL LETTER U WITH RING ABOVE
+0170          ; Lu #       LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+0172          ; Lu #       LATIN CAPITAL LETTER U WITH OGONEK
+0174          ; Lu #       LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+0176          ; Lu #       LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+0178..0179    ; Lu #   [2] LATIN CAPITAL LETTER Y WITH DIAERESIS..LATIN CAPITAL LETTER Z WITH ACUTE
+017B          ; Lu #       LATIN CAPITAL LETTER Z WITH DOT ABOVE
+017D          ; Lu #       LATIN CAPITAL LETTER Z WITH CARON
+0181..0182    ; Lu #   [2] LATIN CAPITAL LETTER B WITH HOOK..LATIN CAPITAL LETTER B WITH TOPBAR
+0184          ; Lu #       LATIN CAPITAL LETTER TONE SIX
+0186..0187    ; Lu #   [2] LATIN CAPITAL LETTER OPEN O..LATIN CAPITAL LETTER C WITH HOOK
+0189..018B    ; Lu #   [3] LATIN CAPITAL LETTER AFRICAN D..LATIN CAPITAL LETTER D WITH TOPBAR
+018E..0191    ; Lu #   [4] LATIN CAPITAL LETTER REVERSED E..LATIN CAPITAL LETTER F WITH HOOK
+0193..0194    ; Lu #   [2] LATIN CAPITAL LETTER G WITH HOOK..LATIN CAPITAL LETTER GAMMA
+0196..0198    ; Lu #   [3] LATIN CAPITAL LETTER IOTA..LATIN CAPITAL LETTER K WITH HOOK
+019C..019D    ; Lu #   [2] LATIN CAPITAL LETTER TURNED M..LATIN CAPITAL LETTER N WITH LEFT HOOK
+019F..01A0    ; Lu #   [2] LATIN CAPITAL LETTER O WITH MIDDLE TILDE..LATIN CAPITAL LETTER O WITH HORN
+01A2          ; Lu #       LATIN CAPITAL LETTER OI
+01A4          ; Lu #       LATIN CAPITAL LETTER P WITH HOOK
+01A6..01A7    ; Lu #   [2] LATIN LETTER YR..LATIN CAPITAL LETTER TONE TWO
+01A9          ; Lu #       LATIN CAPITAL LETTER ESH
+01AC          ; Lu #       LATIN CAPITAL LETTER T WITH HOOK
+01AE..01AF    ; Lu #   [2] LATIN CAPITAL LETTER T WITH RETROFLEX HOOK..LATIN CAPITAL LETTER U WITH HORN
+01B1..01B3    ; Lu #   [3] LATIN CAPITAL LETTER UPSILON..LATIN CAPITAL LETTER Y WITH HOOK
+01B5          ; Lu #       LATIN CAPITAL LETTER Z WITH STROKE
+01B7..01B8    ; Lu #   [2] LATIN CAPITAL LETTER EZH..LATIN CAPITAL LETTER EZH REVERSED
+01BC          ; Lu #       LATIN CAPITAL LETTER TONE FIVE
+01C4          ; Lu #       LATIN CAPITAL LETTER DZ WITH CARON
+01C7          ; Lu #       LATIN CAPITAL LETTER LJ
+01CA          ; Lu #       LATIN CAPITAL LETTER NJ
+01CD          ; Lu #       LATIN CAPITAL LETTER A WITH CARON
+01CF          ; Lu #       LATIN CAPITAL LETTER I WITH CARON
+01D1          ; Lu #       LATIN CAPITAL LETTER O WITH CARON
+01D3          ; Lu #       LATIN CAPITAL LETTER U WITH CARON
+01D5          ; Lu #       LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
+01D7          ; Lu #       LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
+01D9          ; Lu #       LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
+01DB          ; Lu #       LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
+01DE          ; Lu #       LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
+01E0          ; Lu #       LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
+01E2          ; Lu #       LATIN CAPITAL LETTER AE WITH MACRON
+01E4          ; Lu #       LATIN CAPITAL LETTER G WITH STROKE
+01E6          ; Lu #       LATIN CAPITAL LETTER G WITH CARON
+01E8          ; Lu #       LATIN CAPITAL LETTER K WITH CARON
+01EA          ; Lu #       LATIN CAPITAL LETTER O WITH OGONEK
+01EC          ; Lu #       LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
+01EE          ; Lu #       LATIN CAPITAL LETTER EZH WITH CARON
+01F1          ; Lu #       LATIN CAPITAL LETTER DZ
+01F4          ; Lu #       LATIN CAPITAL LETTER G WITH ACUTE
+01F6..01F8    ; Lu #   [3] LATIN CAPITAL LETTER HWAIR..LATIN CAPITAL LETTER N WITH GRAVE
+01FA          ; Lu #       LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
+01FC          ; Lu #       LATIN CAPITAL LETTER AE WITH ACUTE
+01FE          ; Lu #       LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
+0200          ; Lu #       LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
+0202          ; Lu #       LATIN CAPITAL LETTER A WITH INVERTED BREVE
+0204          ; Lu #       LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
+0206          ; Lu #       LATIN CAPITAL LETTER E WITH INVERTED BREVE
+0208          ; Lu #       LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
+020A          ; Lu #       LATIN CAPITAL LETTER I WITH INVERTED BREVE
+020C          ; Lu #       LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
+020E          ; Lu #       LATIN CAPITAL LETTER O WITH INVERTED BREVE
+0210          ; Lu #       LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
+0212          ; Lu #       LATIN CAPITAL LETTER R WITH INVERTED BREVE
+0214          ; Lu #       LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
+0216          ; Lu #       LATIN CAPITAL LETTER U WITH INVERTED BREVE
+0218          ; Lu #       LATIN CAPITAL LETTER S WITH COMMA BELOW
+021A          ; Lu #       LATIN CAPITAL LETTER T WITH COMMA BELOW
+021C          ; Lu #       LATIN CAPITAL LETTER YOGH
+021E          ; Lu #       LATIN CAPITAL LETTER H WITH CARON
+0220          ; Lu #       LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
+0222          ; Lu #       LATIN CAPITAL LETTER OU
+0224          ; Lu #       LATIN CAPITAL LETTER Z WITH HOOK
+0226          ; Lu #       LATIN CAPITAL LETTER A WITH DOT ABOVE
+0228          ; Lu #       LATIN CAPITAL LETTER E WITH CEDILLA
+022A          ; Lu #       LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
+022C          ; Lu #       LATIN CAPITAL LETTER O WITH TILDE AND MACRON
+022E          ; Lu #       LATIN CAPITAL LETTER O WITH DOT ABOVE
+0230          ; Lu #       LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
+0232          ; Lu #       LATIN CAPITAL LETTER Y WITH MACRON
+023A..023B    ; Lu #   [2] LATIN CAPITAL LETTER A WITH STROKE..LATIN CAPITAL LETTER C WITH STROKE
+023D..023E    ; Lu #   [2] LATIN CAPITAL LETTER L WITH BAR..LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
+0241          ; Lu #       LATIN CAPITAL LETTER GLOTTAL STOP
+0243..0246    ; Lu #   [4] LATIN CAPITAL LETTER B WITH STROKE..LATIN CAPITAL LETTER E WITH STROKE
+0248          ; Lu #       LATIN CAPITAL LETTER J WITH STROKE
+024A          ; Lu #       LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
+024C          ; Lu #       LATIN CAPITAL LETTER R WITH STROKE
+024E          ; Lu #       LATIN CAPITAL LETTER Y WITH STROKE
+0370          ; Lu #       GREEK CAPITAL LETTER HETA
+0372          ; Lu #       GREEK CAPITAL LETTER ARCHAIC SAMPI
+0376          ; Lu #       GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
+037F          ; Lu #       GREEK CAPITAL LETTER YOT
+0386          ; Lu #       GREEK CAPITAL LETTER ALPHA WITH TONOS
+0388..038A    ; Lu #   [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
+038C          ; Lu #       GREEK CAPITAL LETTER OMICRON WITH TONOS
+038E..038F    ; Lu #   [2] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER OMEGA WITH TONOS
+0391..03A1    ; Lu #  [17] GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LETTER RHO
+03A3..03AB    ; Lu #   [9] GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+03CF          ; Lu #       GREEK CAPITAL KAI SYMBOL
+03D2..03D4    ; Lu #   [3] GREEK UPSILON WITH HOOK SYMBOL..GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL
+03D8          ; Lu #       GREEK LETTER ARCHAIC KOPPA
+03DA          ; Lu #       GREEK LETTER STIGMA
+03DC          ; Lu #       GREEK LETTER DIGAMMA
+03DE          ; Lu #       GREEK LETTER KOPPA
+03E0          ; Lu #       GREEK LETTER SAMPI
+03E2          ; Lu #       COPTIC CAPITAL LETTER SHEI
+03E4          ; Lu #       COPTIC CAPITAL LETTER FEI
+03E6          ; Lu #       COPTIC CAPITAL LETTER KHEI
+03E8          ; Lu #       COPTIC CAPITAL LETTER HORI
+03EA          ; Lu #       COPTIC CAPITAL LETTER GANGIA
+03EC          ; Lu #       COPTIC CAPITAL LETTER SHIMA
+03EE          ; Lu #       COPTIC CAPITAL LETTER DEI
+03F4          ; Lu #       GREEK CAPITAL THETA SYMBOL
+03F7          ; Lu #       GREEK CAPITAL LETTER SHO
+03F9..03FA    ; Lu #   [2] GREEK CAPITAL LUNATE SIGMA SYMBOL..GREEK CAPITAL LETTER SAN
+03FD..042F    ; Lu #  [51] GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL..CYRILLIC CAPITAL LETTER YA
+0460          ; Lu #       CYRILLIC CAPITAL LETTER OMEGA
+0462          ; Lu #       CYRILLIC CAPITAL LETTER YAT
+0464          ; Lu #       CYRILLIC CAPITAL LETTER IOTIFIED E
+0466          ; Lu #       CYRILLIC CAPITAL LETTER LITTLE YUS
+0468          ; Lu #       CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
+046A          ; Lu #       CYRILLIC CAPITAL LETTER BIG YUS
+046C          ; Lu #       CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
+046E          ; Lu #       CYRILLIC CAPITAL LETTER KSI
+0470          ; Lu #       CYRILLIC CAPITAL LETTER PSI
+0472          ; Lu #       CYRILLIC CAPITAL LETTER FITA
+0474          ; Lu #       CYRILLIC CAPITAL LETTER IZHITSA
+0476          ; Lu #       CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
+0478          ; Lu #       CYRILLIC CAPITAL LETTER UK
+047A          ; Lu #       CYRILLIC CAPITAL LETTER ROUND OMEGA
+047C          ; Lu #       CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
+047E          ; Lu #       CYRILLIC CAPITAL LETTER OT
+0480          ; Lu #       CYRILLIC CAPITAL LETTER KOPPA
+048A          ; Lu #       CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
+048C          ; Lu #       CYRILLIC CAPITAL LETTER SEMISOFT SIGN
+048E          ; Lu #       CYRILLIC CAPITAL LETTER ER WITH TICK
+0490          ; Lu #       CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+0492          ; Lu #       CYRILLIC CAPITAL LETTER GHE WITH STROKE
+0494          ; Lu #       CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
+0496          ; Lu #       CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
+0498          ; Lu #       CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
+049A          ; Lu #       CYRILLIC CAPITAL LETTER KA WITH DESCENDER
+049C          ; Lu #       CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
+049E          ; Lu #       CYRILLIC CAPITAL LETTER KA WITH STROKE
+04A0          ; Lu #       CYRILLIC CAPITAL LETTER BASHKIR KA
+04A2          ; Lu #       CYRILLIC CAPITAL LETTER EN WITH DESCENDER
+04A4          ; Lu #       CYRILLIC CAPITAL LIGATURE EN GHE
+04A6          ; Lu #       CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
+04A8          ; Lu #       CYRILLIC CAPITAL LETTER ABKHASIAN HA
+04AA          ; Lu #       CYRILLIC CAPITAL LETTER ES WITH DESCENDER
+04AC          ; Lu #       CYRILLIC CAPITAL LETTER TE WITH DESCENDER
+04AE          ; Lu #       CYRILLIC CAPITAL LETTER STRAIGHT U
+04B0          ; Lu #       CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
+04B2          ; Lu #       CYRILLIC CAPITAL LETTER HA WITH DESCENDER
+04B4          ; Lu #       CYRILLIC CAPITAL LIGATURE TE TSE
+04B6          ; Lu #       CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
+04B8          ; Lu #       CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
+04BA          ; Lu #       CYRILLIC CAPITAL LETTER SHHA
+04BC          ; Lu #       CYRILLIC CAPITAL LETTER ABKHASIAN CHE
+04BE          ; Lu #       CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
+04C0..04C1    ; Lu #   [2] CYRILLIC LETTER PALOCHKA..CYRILLIC CAPITAL LETTER ZHE WITH BREVE
+04C3          ; Lu #       CYRILLIC CAPITAL LETTER KA WITH HOOK
+04C5          ; Lu #       CYRILLIC CAPITAL LETTER EL WITH TAIL
+04C7          ; Lu #       CYRILLIC CAPITAL LETTER EN WITH HOOK
+04C9          ; Lu #       CYRILLIC CAPITAL LETTER EN WITH TAIL
+04CB          ; Lu #       CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
+04CD          ; Lu #       CYRILLIC CAPITAL LETTER EM WITH TAIL
+04D0          ; Lu #       CYRILLIC CAPITAL LETTER A WITH BREVE
+04D2          ; Lu #       CYRILLIC CAPITAL LETTER A WITH DIAERESIS
+04D4          ; Lu #       CYRILLIC CAPITAL LIGATURE A IE
+04D6          ; Lu #       CYRILLIC CAPITAL LETTER IE WITH BREVE
+04D8          ; Lu #       CYRILLIC CAPITAL LETTER SCHWA
+04DA          ; Lu #       CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
+04DC          ; Lu #       CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
+04DE          ; Lu #       CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
+04E0          ; Lu #       CYRILLIC CAPITAL LETTER ABKHASIAN DZE
+04E2          ; Lu #       CYRILLIC CAPITAL LETTER I WITH MACRON
+04E4          ; Lu #       CYRILLIC CAPITAL LETTER I WITH DIAERESIS
+04E6          ; Lu #       CYRILLIC CAPITAL LETTER O WITH DIAERESIS
+04E8          ; Lu #       CYRILLIC CAPITAL LETTER BARRED O
+04EA          ; Lu #       CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
+04EC          ; Lu #       CYRILLIC CAPITAL LETTER E WITH DIAERESIS
+04EE          ; Lu #       CYRILLIC CAPITAL LETTER U WITH MACRON
+04F0          ; Lu #       CYRILLIC CAPITAL LETTER U WITH DIAERESIS
+04F2          ; Lu #       CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
+04F4          ; Lu #       CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
+04F6          ; Lu #       CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
+04F8          ; Lu #       CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
+04FA          ; Lu #       CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
+04FC          ; Lu #       CYRILLIC CAPITAL LETTER HA WITH HOOK
+04FE          ; Lu #       CYRILLIC CAPITAL LETTER HA WITH STROKE
+0500          ; Lu #       CYRILLIC CAPITAL LETTER KOMI DE
+0502          ; Lu #       CYRILLIC CAPITAL LETTER KOMI DJE
+0504          ; Lu #       CYRILLIC CAPITAL LETTER KOMI ZJE
+0506          ; Lu #       CYRILLIC CAPITAL LETTER KOMI DZJE
+0508          ; Lu #       CYRILLIC CAPITAL LETTER KOMI LJE
+050A          ; Lu #       CYRILLIC CAPITAL LETTER KOMI NJE
+050C          ; Lu #       CYRILLIC CAPITAL LETTER KOMI SJE
+050E          ; Lu #       CYRILLIC CAPITAL LETTER KOMI TJE
+0510          ; Lu #       CYRILLIC CAPITAL LETTER REVERSED ZE
+0512          ; Lu #       CYRILLIC CAPITAL LETTER EL WITH HOOK
+0514          ; Lu #       CYRILLIC CAPITAL LETTER LHA
+0516          ; Lu #       CYRILLIC CAPITAL LETTER RHA
+0518          ; Lu #       CYRILLIC CAPITAL LETTER YAE
+051A          ; Lu #       CYRILLIC CAPITAL LETTER QA
+051C          ; Lu #       CYRILLIC CAPITAL LETTER WE
+051E          ; Lu #       CYRILLIC CAPITAL LETTER ALEUT KA
+0520          ; Lu #       CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK
+0522          ; Lu #       CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK
+0524          ; Lu #       CYRILLIC CAPITAL LETTER PE WITH DESCENDER
+0526          ; Lu #       CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER
+0528          ; Lu #       CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK
+052A          ; Lu #       CYRILLIC CAPITAL LETTER DZZHE
+052C          ; Lu #       CYRILLIC CAPITAL LETTER DCHE
+052E          ; Lu #       CYRILLIC CAPITAL LETTER EL WITH DESCENDER
+0531..0556    ; Lu #  [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
+10A0..10C5    ; Lu #  [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE
+10C7          ; Lu #       GEORGIAN CAPITAL LETTER YN
+10CD          ; Lu #       GEORGIAN CAPITAL LETTER AEN
+13A0..13F5    ; Lu #  [86] CHEROKEE LETTER A..CHEROKEE LETTER MV
+1C90..1CBA    ; Lu #  [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
+1CBD..1CBF    ; Lu #   [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
+1E00          ; Lu #       LATIN CAPITAL LETTER A WITH RING BELOW
+1E02          ; Lu #       LATIN CAPITAL LETTER B WITH DOT ABOVE
+1E04          ; Lu #       LATIN CAPITAL LETTER B WITH DOT BELOW
+1E06          ; Lu #       LATIN CAPITAL LETTER B WITH LINE BELOW
+1E08          ; Lu #       LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
+1E0A          ; Lu #       LATIN CAPITAL LETTER D WITH DOT ABOVE
+1E0C          ; Lu #       LATIN CAPITAL LETTER D WITH DOT BELOW
+1E0E          ; Lu #       LATIN CAPITAL LETTER D WITH LINE BELOW
+1E10          ; Lu #       LATIN CAPITAL LETTER D WITH CEDILLA
+1E12          ; Lu #       LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
+1E14          ; Lu #       LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
+1E16          ; Lu #       LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
+1E18          ; Lu #       LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
+1E1A          ; Lu #       LATIN CAPITAL LETTER E WITH TILDE BELOW
+1E1C          ; Lu #       LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
+1E1E          ; Lu #       LATIN CAPITAL LETTER F WITH DOT ABOVE
+1E20          ; Lu #       LATIN CAPITAL LETTER G WITH MACRON
+1E22          ; Lu #       LATIN CAPITAL LETTER H WITH DOT ABOVE
+1E24          ; Lu #       LATIN CAPITAL LETTER H WITH DOT BELOW
+1E26          ; Lu #       LATIN CAPITAL LETTER H WITH DIAERESIS
+1E28          ; Lu #       LATIN CAPITAL LETTER H WITH CEDILLA
+1E2A          ; Lu #       LATIN CAPITAL LETTER H WITH BREVE BELOW
+1E2C          ; Lu #       LATIN CAPITAL LETTER I WITH TILDE BELOW
+1E2E          ; Lu #       LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
+1E30          ; Lu #       LATIN CAPITAL LETTER K WITH ACUTE
+1E32          ; Lu #       LATIN CAPITAL LETTER K WITH DOT BELOW
+1E34          ; Lu #       LATIN CAPITAL LETTER K WITH LINE BELOW
+1E36          ; Lu #       LATIN CAPITAL LETTER L WITH DOT BELOW
+1E38          ; Lu #       LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
+1E3A          ; Lu #       LATIN CAPITAL LETTER L WITH LINE BELOW
+1E3C          ; Lu #       LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
+1E3E          ; Lu #       LATIN CAPITAL LETTER M WITH ACUTE
+1E40          ; Lu #       LATIN CAPITAL LETTER M WITH DOT ABOVE
+1E42          ; Lu #       LATIN CAPITAL LETTER M WITH DOT BELOW
+1E44          ; Lu #       LATIN CAPITAL LETTER N WITH DOT ABOVE
+1E46          ; Lu #       LATIN CAPITAL LETTER N WITH DOT BELOW
+1E48          ; Lu #       LATIN CAPITAL LETTER N WITH LINE BELOW
+1E4A          ; Lu #       LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
+1E4C          ; Lu #       LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
+1E4E          ; Lu #       LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
+1E50          ; Lu #       LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
+1E52          ; Lu #       LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
+1E54          ; Lu #       LATIN CAPITAL LETTER P WITH ACUTE
+1E56          ; Lu #       LATIN CAPITAL LETTER P WITH DOT ABOVE
+1E58          ; Lu #       LATIN CAPITAL LETTER R WITH DOT ABOVE
+1E5A          ; Lu #       LATIN CAPITAL LETTER R WITH DOT BELOW
+1E5C          ; Lu #       LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
+1E5E          ; Lu #       LATIN CAPITAL LETTER R WITH LINE BELOW
+1E60          ; Lu #       LATIN CAPITAL LETTER S WITH DOT ABOVE
+1E62          ; Lu #       LATIN CAPITAL LETTER S WITH DOT BELOW
+1E64          ; Lu #       LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
+1E66          ; Lu #       LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
+1E68          ; Lu #       LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
+1E6A          ; Lu #       LATIN CAPITAL LETTER T WITH DOT ABOVE
+1E6C          ; Lu #       LATIN CAPITAL LETTER T WITH DOT BELOW
+1E6E          ; Lu #       LATIN CAPITAL LETTER T WITH LINE BELOW
+1E70          ; Lu #       LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
+1E72          ; Lu #       LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
+1E74          ; Lu #       LATIN CAPITAL LETTER U WITH TILDE BELOW
+1E76          ; Lu #       LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
+1E78          ; Lu #       LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
+1E7A          ; Lu #       LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
+1E7C          ; Lu #       LATIN CAPITAL LETTER V WITH TILDE
+1E7E          ; Lu #       LATIN CAPITAL LETTER V WITH DOT BELOW
+1E80          ; Lu #       LATIN CAPITAL LETTER W WITH GRAVE
+1E82          ; Lu #       LATIN CAPITAL LETTER W WITH ACUTE
+1E84          ; Lu #       LATIN CAPITAL LETTER W WITH DIAERESIS
+1E86          ; Lu #       LATIN CAPITAL LETTER W WITH DOT ABOVE
+1E88          ; Lu #       LATIN CAPITAL LETTER W WITH DOT BELOW
+1E8A          ; Lu #       LATIN CAPITAL LETTER X WITH DOT ABOVE
+1E8C          ; Lu #       LATIN CAPITAL LETTER X WITH DIAERESIS
+1E8E          ; Lu #       LATIN CAPITAL LETTER Y WITH DOT ABOVE
+1E90          ; Lu #       LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
+1E92          ; Lu #       LATIN CAPITAL LETTER Z WITH DOT BELOW
+1E94          ; Lu #       LATIN CAPITAL LETTER Z WITH LINE BELOW
+1E9E          ; Lu #       LATIN CAPITAL LETTER SHARP S
+1EA0          ; Lu #       LATIN CAPITAL LETTER A WITH DOT BELOW
+1EA2          ; Lu #       LATIN CAPITAL LETTER A WITH HOOK ABOVE
+1EA4          ; Lu #       LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
+1EA6          ; Lu #       LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
+1EA8          ; Lu #       LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
+1EAA          ; Lu #       LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
+1EAC          ; Lu #       LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
+1EAE          ; Lu #       LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
+1EB0          ; Lu #       LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
+1EB2          ; Lu #       LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
+1EB4          ; Lu #       LATIN CAPITAL LETTER A WITH BREVE AND TILDE
+1EB6          ; Lu #       LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
+1EB8          ; Lu #       LATIN CAPITAL LETTER E WITH DOT BELOW
+1EBA          ; Lu #       LATIN CAPITAL LETTER E WITH HOOK ABOVE
+1EBC          ; Lu #       LATIN CAPITAL LETTER E WITH TILDE
+1EBE          ; Lu #       LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
+1EC0          ; Lu #       LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
+1EC2          ; Lu #       LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
+1EC4          ; Lu #       LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
+1EC6          ; Lu #       LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
+1EC8          ; Lu #       LATIN CAPITAL LETTER I WITH HOOK ABOVE
+1ECA          ; Lu #       LATIN CAPITAL LETTER I WITH DOT BELOW
+1ECC          ; Lu #       LATIN CAPITAL LETTER O WITH DOT BELOW
+1ECE          ; Lu #       LATIN CAPITAL LETTER O WITH HOOK ABOVE
+1ED0          ; Lu #       LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
+1ED2          ; Lu #       LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
+1ED4          ; Lu #       LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
+1ED6          ; Lu #       LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
+1ED8          ; Lu #       LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
+1EDA          ; Lu #       LATIN CAPITAL LETTER O WITH HORN AND ACUTE
+1EDC          ; Lu #       LATIN CAPITAL LETTER O WITH HORN AND GRAVE
+1EDE          ; Lu #       LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
+1EE0          ; Lu #       LATIN CAPITAL LETTER O WITH HORN AND TILDE
+1EE2          ; Lu #       LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
+1EE4          ; Lu #       LATIN CAPITAL LETTER U WITH DOT BELOW
+1EE6          ; Lu #       LATIN CAPITAL LETTER U WITH HOOK ABOVE
+1EE8          ; Lu #       LATIN CAPITAL LETTER U WITH HORN AND ACUTE
+1EEA          ; Lu #       LATIN CAPITAL LETTER U WITH HORN AND GRAVE
+1EEC          ; Lu #       LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
+1EEE          ; Lu #       LATIN CAPITAL LETTER U WITH HORN AND TILDE
+1EF0          ; Lu #       LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
+1EF2          ; Lu #       LATIN CAPITAL LETTER Y WITH GRAVE
+1EF4          ; Lu #       LATIN CAPITAL LETTER Y WITH DOT BELOW
+1EF6          ; Lu #       LATIN CAPITAL LETTER Y WITH HOOK ABOVE
+1EF8          ; Lu #       LATIN CAPITAL LETTER Y WITH TILDE
+1EFA          ; Lu #       LATIN CAPITAL LETTER MIDDLE-WELSH LL
+1EFC          ; Lu #       LATIN CAPITAL LETTER MIDDLE-WELSH V
+1EFE          ; Lu #       LATIN CAPITAL LETTER Y WITH LOOP
+1F08..1F0F    ; Lu #   [8] GREEK CAPITAL LETTER ALPHA WITH PSILI..GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
+1F18..1F1D    ; Lu #   [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
+1F28..1F2F    ; Lu #   [8] GREEK CAPITAL LETTER ETA WITH PSILI..GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
+1F38..1F3F    ; Lu #   [8] GREEK CAPITAL LETTER IOTA WITH PSILI..GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
+1F48..1F4D    ; Lu #   [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
+1F59          ; Lu #       GREEK CAPITAL LETTER UPSILON WITH DASIA
+1F5B          ; Lu #       GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
+1F5D          ; Lu #       GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
+1F5F          ; Lu #       GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
+1F68..1F6F    ; Lu #   [8] GREEK CAPITAL LETTER OMEGA WITH PSILI..GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
+1FB8..1FBB    ; Lu #   [4] GREEK CAPITAL LETTER ALPHA WITH VRACHY..GREEK CAPITAL LETTER ALPHA WITH OXIA
+1FC8..1FCB    ; Lu #   [4] GREEK CAPITAL LETTER EPSILON WITH VARIA..GREEK CAPITAL LETTER ETA WITH OXIA
+1FD8..1FDB    ; Lu #   [4] GREEK CAPITAL LETTER IOTA WITH VRACHY..GREEK CAPITAL LETTER IOTA WITH OXIA
+1FE8..1FEC    ; Lu #   [5] GREEK CAPITAL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
+1FF8..1FFB    ; Lu #   [4] GREEK CAPITAL LETTER OMICRON WITH VARIA..GREEK CAPITAL LETTER OMEGA WITH OXIA
+2102          ; Lu #       DOUBLE-STRUCK CAPITAL C
+2107          ; Lu #       EULER CONSTANT
+210B..210D    ; Lu #   [3] SCRIPT CAPITAL H..DOUBLE-STRUCK CAPITAL H
+2110..2112    ; Lu #   [3] SCRIPT CAPITAL I..SCRIPT CAPITAL L
+2115          ; Lu #       DOUBLE-STRUCK CAPITAL N
+2119..211D    ; Lu #   [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R
+2124          ; Lu #       DOUBLE-STRUCK CAPITAL Z
+2126          ; Lu #       OHM SIGN
+2128          ; Lu #       BLACK-LETTER CAPITAL Z
+212A..212D    ; Lu #   [4] KELVIN SIGN..BLACK-LETTER CAPITAL C
+2130..2133    ; Lu #   [4] SCRIPT CAPITAL E..SCRIPT CAPITAL M
+213E..213F    ; Lu #   [2] DOUBLE-STRUCK CAPITAL GAMMA..DOUBLE-STRUCK CAPITAL PI
+2145          ; Lu #       DOUBLE-STRUCK ITALIC CAPITAL D
+2183          ; Lu #       ROMAN NUMERAL REVERSED ONE HUNDRED
+2C00..2C2E    ; Lu #  [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
+2C60          ; Lu #       LATIN CAPITAL LETTER L WITH DOUBLE BAR
+2C62..2C64    ; Lu #   [3] LATIN CAPITAL LETTER L WITH MIDDLE TILDE..LATIN CAPITAL LETTER R WITH TAIL
+2C67          ; Lu #       LATIN CAPITAL LETTER H WITH DESCENDER
+2C69          ; Lu #       LATIN CAPITAL LETTER K WITH DESCENDER
+2C6B          ; Lu #       LATIN CAPITAL LETTER Z WITH DESCENDER
+2C6D..2C70    ; Lu #   [4] LATIN CAPITAL LETTER ALPHA..LATIN CAPITAL LETTER TURNED ALPHA
+2C72          ; Lu #       LATIN CAPITAL LETTER W WITH HOOK
+2C75          ; Lu #       LATIN CAPITAL LETTER HALF H
+2C7E..2C80    ; Lu #   [3] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC CAPITAL LETTER ALFA
+2C82          ; Lu #       COPTIC CAPITAL LETTER VIDA
+2C84          ; Lu #       COPTIC CAPITAL LETTER GAMMA
+2C86          ; Lu #       COPTIC CAPITAL LETTER DALDA
+2C88          ; Lu #       COPTIC CAPITAL LETTER EIE
+2C8A          ; Lu #       COPTIC CAPITAL LETTER SOU
+2C8C          ; Lu #       COPTIC CAPITAL LETTER ZATA
+2C8E          ; Lu #       COPTIC CAPITAL LETTER HATE
+2C90          ; Lu #       COPTIC CAPITAL LETTER THETHE
+2C92          ; Lu #       COPTIC CAPITAL LETTER IAUDA
+2C94          ; Lu #       COPTIC CAPITAL LETTER KAPA
+2C96          ; Lu #       COPTIC CAPITAL LETTER LAULA
+2C98          ; Lu #       COPTIC CAPITAL LETTER MI
+2C9A          ; Lu #       COPTIC CAPITAL LETTER NI
+2C9C          ; Lu #       COPTIC CAPITAL LETTER KSI
+2C9E          ; Lu #       COPTIC CAPITAL LETTER O
+2CA0          ; Lu #       COPTIC CAPITAL LETTER PI
+2CA2          ; Lu #       COPTIC CAPITAL LETTER RO
+2CA4          ; Lu #       COPTIC CAPITAL LETTER SIMA
+2CA6          ; Lu #       COPTIC CAPITAL LETTER TAU
+2CA8          ; Lu #       COPTIC CAPITAL LETTER UA
+2CAA          ; Lu #       COPTIC CAPITAL LETTER FI
+2CAC          ; Lu #       COPTIC CAPITAL LETTER KHI
+2CAE          ; Lu #       COPTIC CAPITAL LETTER PSI
+2CB0          ; Lu #       COPTIC CAPITAL LETTER OOU
+2CB2          ; Lu #       COPTIC CAPITAL LETTER DIALECT-P ALEF
+2CB4          ; Lu #       COPTIC CAPITAL LETTER OLD COPTIC AIN
+2CB6          ; Lu #       COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE
+2CB8          ; Lu #       COPTIC CAPITAL LETTER DIALECT-P KAPA
+2CBA          ; Lu #       COPTIC CAPITAL LETTER DIALECT-P NI
+2CBC          ; Lu #       COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI
+2CBE          ; Lu #       COPTIC CAPITAL LETTER OLD COPTIC OOU
+2CC0          ; Lu #       COPTIC CAPITAL LETTER SAMPI
+2CC2          ; Lu #       COPTIC CAPITAL LETTER CROSSED SHEI
+2CC4          ; Lu #       COPTIC CAPITAL LETTER OLD COPTIC SHEI
+2CC6          ; Lu #       COPTIC CAPITAL LETTER OLD COPTIC ESH
+2CC8          ; Lu #       COPTIC CAPITAL LETTER AKHMIMIC KHEI
+2CCA          ; Lu #       COPTIC CAPITAL LETTER DIALECT-P HORI
+2CCC          ; Lu #       COPTIC CAPITAL LETTER OLD COPTIC HORI
+2CCE          ; Lu #       COPTIC CAPITAL LETTER OLD COPTIC HA
+2CD0          ; Lu #       COPTIC CAPITAL LETTER L-SHAPED HA
+2CD2          ; Lu #       COPTIC CAPITAL LETTER OLD COPTIC HEI
+2CD4          ; Lu #       COPTIC CAPITAL LETTER OLD COPTIC HAT
+2CD6          ; Lu #       COPTIC CAPITAL LETTER OLD COPTIC GANGIA
+2CD8          ; Lu #       COPTIC CAPITAL LETTER OLD COPTIC DJA
+2CDA          ; Lu #       COPTIC CAPITAL LETTER OLD COPTIC SHIMA
+2CDC          ; Lu #       COPTIC CAPITAL LETTER OLD NUBIAN SHIMA
+2CDE          ; Lu #       COPTIC CAPITAL LETTER OLD NUBIAN NGI
+2CE0          ; Lu #       COPTIC CAPITAL LETTER OLD NUBIAN NYI
+2CE2          ; Lu #       COPTIC CAPITAL LETTER OLD NUBIAN WAU
+2CEB          ; Lu #       COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI
+2CED          ; Lu #       COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA
+2CF2          ; Lu #       COPTIC CAPITAL LETTER BOHAIRIC KHEI
+A640          ; Lu #       CYRILLIC CAPITAL LETTER ZEMLYA
+A642          ; Lu #       CYRILLIC CAPITAL LETTER DZELO
+A644          ; Lu #       CYRILLIC CAPITAL LETTER REVERSED DZE
+A646          ; Lu #       CYRILLIC CAPITAL LETTER IOTA
+A648          ; Lu #       CYRILLIC CAPITAL LETTER DJERV
+A64A          ; Lu #       CYRILLIC CAPITAL LETTER MONOGRAPH UK
+A64C          ; Lu #       CYRILLIC CAPITAL LETTER BROAD OMEGA
+A64E          ; Lu #       CYRILLIC CAPITAL LETTER NEUTRAL YER
+A650          ; Lu #       CYRILLIC CAPITAL LETTER YERU WITH BACK YER
+A652          ; Lu #       CYRILLIC CAPITAL LETTER IOTIFIED YAT
+A654          ; Lu #       CYRILLIC CAPITAL LETTER REVERSED YU
+A656          ; Lu #       CYRILLIC CAPITAL LETTER IOTIFIED A
+A658          ; Lu #       CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS
+A65A          ; Lu #       CYRILLIC CAPITAL LETTER BLENDED YUS
+A65C          ; Lu #       CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS
+A65E          ; Lu #       CYRILLIC CAPITAL LETTER YN
+A660          ; Lu #       CYRILLIC CAPITAL LETTER REVERSED TSE
+A662          ; Lu #       CYRILLIC CAPITAL LETTER SOFT DE
+A664          ; Lu #       CYRILLIC CAPITAL LETTER SOFT EL
+A666          ; Lu #       CYRILLIC CAPITAL LETTER SOFT EM
+A668          ; Lu #       CYRILLIC CAPITAL LETTER MONOCULAR O
+A66A          ; Lu #       CYRILLIC CAPITAL LETTER BINOCULAR O
+A66C          ; Lu #       CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O
+A680          ; Lu #       CYRILLIC CAPITAL LETTER DWE
+A682          ; Lu #       CYRILLIC CAPITAL LETTER DZWE
+A684          ; Lu #       CYRILLIC CAPITAL LETTER ZHWE
+A686          ; Lu #       CYRILLIC CAPITAL LETTER CCHE
+A688          ; Lu #       CYRILLIC CAPITAL LETTER DZZE
+A68A          ; Lu #       CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK
+A68C          ; Lu #       CYRILLIC CAPITAL LETTER TWE
+A68E          ; Lu #       CYRILLIC CAPITAL LETTER TSWE
+A690          ; Lu #       CYRILLIC CAPITAL LETTER TSSE
+A692          ; Lu #       CYRILLIC CAPITAL LETTER TCHE
+A694          ; Lu #       CYRILLIC CAPITAL LETTER HWE
+A696          ; Lu #       CYRILLIC CAPITAL LETTER SHWE
+A698          ; Lu #       CYRILLIC CAPITAL LETTER DOUBLE O
+A69A          ; Lu #       CYRILLIC CAPITAL LETTER CROSSED O
+A722          ; Lu #       LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF
+A724          ; Lu #       LATIN CAPITAL LETTER EGYPTOLOGICAL AIN
+A726          ; Lu #       LATIN CAPITAL LETTER HENG
+A728          ; Lu #       LATIN CAPITAL LETTER TZ
+A72A          ; Lu #       LATIN CAPITAL LETTER TRESILLO
+A72C          ; Lu #       LATIN CAPITAL LETTER CUATRILLO
+A72E          ; Lu #       LATIN CAPITAL LETTER CUATRILLO WITH COMMA
+A732          ; Lu #       LATIN CAPITAL LETTER AA
+A734          ; Lu #       LATIN CAPITAL LETTER AO
+A736          ; Lu #       LATIN CAPITAL LETTER AU
+A738          ; Lu #       LATIN CAPITAL LETTER AV
+A73A          ; Lu #       LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR
+A73C          ; Lu #       LATIN CAPITAL LETTER AY
+A73E          ; Lu #       LATIN CAPITAL LETTER REVERSED C WITH DOT
+A740          ; Lu #       LATIN CAPITAL LETTER K WITH STROKE
+A742          ; Lu #       LATIN CAPITAL LETTER K WITH DIAGONAL STROKE
+A744          ; Lu #       LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE
+A746          ; Lu #       LATIN CAPITAL LETTER BROKEN L
+A748          ; Lu #       LATIN CAPITAL LETTER L WITH HIGH STROKE
+A74A          ; Lu #       LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY
+A74C          ; Lu #       LATIN CAPITAL LETTER O WITH LOOP
+A74E          ; Lu #       LATIN CAPITAL LETTER OO
+A750          ; Lu #       LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER
+A752          ; Lu #       LATIN CAPITAL LETTER P WITH FLOURISH
+A754          ; Lu #       LATIN CAPITAL LETTER P WITH SQUIRREL TAIL
+A756          ; Lu #       LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER
+A758          ; Lu #       LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE
+A75A          ; Lu #       LATIN CAPITAL LETTER R ROTUNDA
+A75C          ; Lu #       LATIN CAPITAL LETTER RUM ROTUNDA
+A75E          ; Lu #       LATIN CAPITAL LETTER V WITH DIAGONAL STROKE
+A760          ; Lu #       LATIN CAPITAL LETTER VY
+A762          ; Lu #       LATIN CAPITAL LETTER VISIGOTHIC Z
+A764          ; Lu #       LATIN CAPITAL LETTER THORN WITH STROKE
+A766          ; Lu #       LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER
+A768          ; Lu #       LATIN CAPITAL LETTER VEND
+A76A          ; Lu #       LATIN CAPITAL LETTER ET
+A76C          ; Lu #       LATIN CAPITAL LETTER IS
+A76E          ; Lu #       LATIN CAPITAL LETTER CON
+A779          ; Lu #       LATIN CAPITAL LETTER INSULAR D
+A77B          ; Lu #       LATIN CAPITAL LETTER INSULAR F
+A77D..A77E    ; Lu #   [2] LATIN CAPITAL LETTER INSULAR G..LATIN CAPITAL LETTER TURNED INSULAR G
+A780          ; Lu #       LATIN CAPITAL LETTER TURNED L
+A782          ; Lu #       LATIN CAPITAL LETTER INSULAR R
+A784          ; Lu #       LATIN CAPITAL LETTER INSULAR S
+A786          ; Lu #       LATIN CAPITAL LETTER INSULAR T
+A78B          ; Lu #       LATIN CAPITAL LETTER SALTILLO
+A78D          ; Lu #       LATIN CAPITAL LETTER TURNED H
+A790          ; Lu #       LATIN CAPITAL LETTER N WITH DESCENDER
+A792          ; Lu #       LATIN CAPITAL LETTER C WITH BAR
+A796          ; Lu #       LATIN CAPITAL LETTER B WITH FLOURISH
+A798          ; Lu #       LATIN CAPITAL LETTER F WITH STROKE
+A79A          ; Lu #       LATIN CAPITAL LETTER VOLAPUK AE
+A79C          ; Lu #       LATIN CAPITAL LETTER VOLAPUK OE
+A79E          ; Lu #       LATIN CAPITAL LETTER VOLAPUK UE
+A7A0          ; Lu #       LATIN CAPITAL LETTER G WITH OBLIQUE STROKE
+A7A2          ; Lu #       LATIN CAPITAL LETTER K WITH OBLIQUE STROKE
+A7A4          ; Lu #       LATIN CAPITAL LETTER N WITH OBLIQUE STROKE
+A7A6          ; Lu #       LATIN CAPITAL LETTER R WITH OBLIQUE STROKE
+A7A8          ; Lu #       LATIN CAPITAL LETTER S WITH OBLIQUE STROKE
+A7AA..A7AE    ; Lu #   [5] LATIN CAPITAL LETTER H WITH HOOK..LATIN CAPITAL LETTER SMALL CAPITAL I
+A7B0..A7B4    ; Lu #   [5] LATIN CAPITAL LETTER TURNED K..LATIN CAPITAL LETTER BETA
+A7B6          ; Lu #       LATIN CAPITAL LETTER OMEGA
+A7B8          ; Lu #       LATIN CAPITAL LETTER U WITH STROKE
+A7BA          ; Lu #       LATIN CAPITAL LETTER GLOTTAL A
+A7BC          ; Lu #       LATIN CAPITAL LETTER GLOTTAL I
+A7BE          ; Lu #       LATIN CAPITAL LETTER GLOTTAL U
+A7C2          ; Lu #       LATIN CAPITAL LETTER ANGLICANA W
+A7C4..A7C7    ; Lu #   [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
+A7C9          ; Lu #       LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
+A7F5          ; Lu #       LATIN CAPITAL LETTER REVERSED HALF H
+FF21..FF3A    ; Lu #  [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
+10400..10427  ; Lu #  [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW
+104B0..104D3  ; Lu #  [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA
+10C80..10CB2  ; Lu #  [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US
+118A0..118BF  ; Lu #  [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO
+16E40..16E5F  ; Lu #  [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y
+1D400..1D419  ; Lu #  [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z
+1D434..1D44D  ; Lu #  [26] MATHEMATICAL ITALIC CAPITAL A..MATHEMATICAL ITALIC CAPITAL Z
+1D468..1D481  ; Lu #  [26] MATHEMATICAL BOLD ITALIC CAPITAL A..MATHEMATICAL BOLD ITALIC CAPITAL Z
+1D49C         ; Lu #       MATHEMATICAL SCRIPT CAPITAL A
+1D49E..1D49F  ; Lu #   [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
+1D4A2         ; Lu #       MATHEMATICAL SCRIPT CAPITAL G
+1D4A5..1D4A6  ; Lu #   [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K
+1D4A9..1D4AC  ; Lu #   [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q
+1D4AE..1D4B5  ; Lu #   [8] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT CAPITAL Z
+1D4D0..1D4E9  ; Lu #  [26] MATHEMATICAL BOLD SCRIPT CAPITAL A..MATHEMATICAL BOLD SCRIPT CAPITAL Z
+1D504..1D505  ; Lu #   [2] MATHEMATICAL FRAKTUR CAPITAL A..MATHEMATICAL FRAKTUR CAPITAL B
+1D507..1D50A  ; Lu #   [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G
+1D50D..1D514  ; Lu #   [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q
+1D516..1D51C  ; Lu #   [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y
+1D538..1D539  ; Lu #   [2] MATHEMATICAL DOUBLE-STRUCK CAPITAL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B
+1D53B..1D53E  ; Lu #   [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G
+1D540..1D544  ; Lu #   [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M
+1D546         ; Lu #       MATHEMATICAL DOUBLE-STRUCK CAPITAL O
+1D54A..1D550  ; Lu #   [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
+1D56C..1D585  ; Lu #  [26] MATHEMATICAL BOLD FRAKTUR CAPITAL A..MATHEMATICAL BOLD FRAKTUR CAPITAL Z
+1D5A0..1D5B9  ; Lu #  [26] MATHEMATICAL SANS-SERIF CAPITAL A..MATHEMATICAL SANS-SERIF CAPITAL Z
+1D5D4..1D5ED  ; Lu #  [26] MATHEMATICAL SANS-SERIF BOLD CAPITAL A..MATHEMATICAL SANS-SERIF BOLD CAPITAL Z
+1D608..1D621  ; Lu #  [26] MATHEMATICAL SANS-SERIF ITALIC CAPITAL A..MATHEMATICAL SANS-SERIF ITALIC CAPITAL Z
+1D63C..1D655  ; Lu #  [26] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL A..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL Z
+1D670..1D689  ; Lu #  [26] MATHEMATICAL MONOSPACE CAPITAL A..MATHEMATICAL MONOSPACE CAPITAL Z
+1D6A8..1D6C0  ; Lu #  [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA
+1D6E2..1D6FA  ; Lu #  [25] MATHEMATICAL ITALIC CAPITAL ALPHA..MATHEMATICAL ITALIC CAPITAL OMEGA
+1D71C..1D734  ; Lu #  [25] MATHEMATICAL BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA
+1D756..1D76E  ; Lu #  [25] MATHEMATICAL SANS-SERIF BOLD CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA
+1D790..1D7A8  ; Lu #  [25] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA
+1D7CA         ; Lu #       MATHEMATICAL BOLD CAPITAL DIGAMMA
+1E900..1E921  ; Lu #  [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA
+
+# Total code points: 1791
+
+# ================================================
+
+# General_Category=Lowercase_Letter
+
+0061..007A    ; Ll #  [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z
+00B5          ; Ll #       MICRO SIGN
+00DF..00F6    ; Ll #  [24] LATIN SMALL LETTER SHARP S..LATIN SMALL LETTER O WITH DIAERESIS
+00F8..00FF    ; Ll #   [8] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER Y WITH DIAERESIS
+0101          ; Ll #       LATIN SMALL LETTER A WITH MACRON
+0103          ; Ll #       LATIN SMALL LETTER A WITH BREVE
+0105          ; Ll #       LATIN SMALL LETTER A WITH OGONEK
+0107          ; Ll #       LATIN SMALL LETTER C WITH ACUTE
+0109          ; Ll #       LATIN SMALL LETTER C WITH CIRCUMFLEX
+010B          ; Ll #       LATIN SMALL LETTER C WITH DOT ABOVE
+010D          ; Ll #       LATIN SMALL LETTER C WITH CARON
+010F          ; Ll #       LATIN SMALL LETTER D WITH CARON
+0111          ; Ll #       LATIN SMALL LETTER D WITH STROKE
+0113          ; Ll #       LATIN SMALL LETTER E WITH MACRON
+0115          ; Ll #       LATIN SMALL LETTER E WITH BREVE
+0117          ; Ll #       LATIN SMALL LETTER E WITH DOT ABOVE
+0119          ; Ll #       LATIN SMALL LETTER E WITH OGONEK
+011B          ; Ll #       LATIN SMALL LETTER E WITH CARON
+011D          ; Ll #       LATIN SMALL LETTER G WITH CIRCUMFLEX
+011F          ; Ll #       LATIN SMALL LETTER G WITH BREVE
+0121          ; Ll #       LATIN SMALL LETTER G WITH DOT ABOVE
+0123          ; Ll #       LATIN SMALL LETTER G WITH CEDILLA
+0125          ; Ll #       LATIN SMALL LETTER H WITH CIRCUMFLEX
+0127          ; Ll #       LATIN SMALL LETTER H WITH STROKE
+0129          ; Ll #       LATIN SMALL LETTER I WITH TILDE
+012B          ; Ll #       LATIN SMALL LETTER I WITH MACRON
+012D          ; Ll #       LATIN SMALL LETTER I WITH BREVE
+012F          ; Ll #       LATIN SMALL LETTER I WITH OGONEK
+0131          ; Ll #       LATIN SMALL LETTER DOTLESS I
+0133          ; Ll #       LATIN SMALL LIGATURE IJ
+0135          ; Ll #       LATIN SMALL LETTER J WITH CIRCUMFLEX
+0137..0138    ; Ll #   [2] LATIN SMALL LETTER K WITH CEDILLA..LATIN SMALL LETTER KRA
+013A          ; Ll #       LATIN SMALL LETTER L WITH ACUTE
+013C          ; Ll #       LATIN SMALL LETTER L WITH CEDILLA
+013E          ; Ll #       LATIN SMALL LETTER L WITH CARON
+0140          ; Ll #       LATIN SMALL LETTER L WITH MIDDLE DOT
+0142          ; Ll #       LATIN SMALL LETTER L WITH STROKE
+0144          ; Ll #       LATIN SMALL LETTER N WITH ACUTE
+0146          ; Ll #       LATIN SMALL LETTER N WITH CEDILLA
+0148..0149    ; Ll #   [2] LATIN SMALL LETTER N WITH CARON..LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+014B          ; Ll #       LATIN SMALL LETTER ENG
+014D          ; Ll #       LATIN SMALL LETTER O WITH MACRON
+014F          ; Ll #       LATIN SMALL LETTER O WITH BREVE
+0151          ; Ll #       LATIN SMALL LETTER O WITH DOUBLE ACUTE
+0153          ; Ll #       LATIN SMALL LIGATURE OE
+0155          ; Ll #       LATIN SMALL LETTER R WITH ACUTE
+0157          ; Ll #       LATIN SMALL LETTER R WITH CEDILLA
+0159          ; Ll #       LATIN SMALL LETTER R WITH CARON
+015B          ; Ll #       LATIN SMALL LETTER S WITH ACUTE
+015D          ; Ll #       LATIN SMALL LETTER S WITH CIRCUMFLEX
+015F          ; Ll #       LATIN SMALL LETTER S WITH CEDILLA
+0161          ; Ll #       LATIN SMALL LETTER S WITH CARON
+0163          ; Ll #       LATIN SMALL LETTER T WITH CEDILLA
+0165          ; Ll #       LATIN SMALL LETTER T WITH CARON
+0167          ; Ll #       LATIN SMALL LETTER T WITH STROKE
+0169          ; Ll #       LATIN SMALL LETTER U WITH TILDE
+016B          ; Ll #       LATIN SMALL LETTER U WITH MACRON
+016D          ; Ll #       LATIN SMALL LETTER U WITH BREVE
+016F          ; Ll #       LATIN SMALL LETTER U WITH RING ABOVE
+0171          ; Ll #       LATIN SMALL LETTER U WITH DOUBLE ACUTE
+0173          ; Ll #       LATIN SMALL LETTER U WITH OGONEK
+0175          ; Ll #       LATIN SMALL LETTER W WITH CIRCUMFLEX
+0177          ; Ll #       LATIN SMALL LETTER Y WITH CIRCUMFLEX
+017A          ; Ll #       LATIN SMALL LETTER Z WITH ACUTE
+017C          ; Ll #       LATIN SMALL LETTER Z WITH DOT ABOVE
+017E..0180    ; Ll #   [3] LATIN SMALL LETTER Z WITH CARON..LATIN SMALL LETTER B WITH STROKE
+0183          ; Ll #       LATIN SMALL LETTER B WITH TOPBAR
+0185          ; Ll #       LATIN SMALL LETTER TONE SIX
+0188          ; Ll #       LATIN SMALL LETTER C WITH HOOK
+018C..018D    ; Ll #   [2] LATIN SMALL LETTER D WITH TOPBAR..LATIN SMALL LETTER TURNED DELTA
+0192          ; Ll #       LATIN SMALL LETTER F WITH HOOK
+0195          ; Ll #       LATIN SMALL LETTER HV
+0199..019B    ; Ll #   [3] LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER LAMBDA WITH STROKE
+019E          ; Ll #       LATIN SMALL LETTER N WITH LONG RIGHT LEG
+01A1          ; Ll #       LATIN SMALL LETTER O WITH HORN
+01A3          ; Ll #       LATIN SMALL LETTER OI
+01A5          ; Ll #       LATIN SMALL LETTER P WITH HOOK
+01A8          ; Ll #       LATIN SMALL LETTER TONE TWO
+01AA..01AB    ; Ll #   [2] LATIN LETTER REVERSED ESH LOOP..LATIN SMALL LETTER T WITH PALATAL HOOK
+01AD          ; Ll #       LATIN SMALL LETTER T WITH HOOK
+01B0          ; Ll #       LATIN SMALL LETTER U WITH HORN
+01B4          ; Ll #       LATIN SMALL LETTER Y WITH HOOK
+01B6          ; Ll #       LATIN SMALL LETTER Z WITH STROKE
+01B9..01BA    ; Ll #   [2] LATIN SMALL LETTER EZH REVERSED..LATIN SMALL LETTER EZH WITH TAIL
+01BD..01BF    ; Ll #   [3] LATIN SMALL LETTER TONE FIVE..LATIN LETTER WYNN
+01C6          ; Ll #       LATIN SMALL LETTER DZ WITH CARON
+01C9          ; Ll #       LATIN SMALL LETTER LJ
+01CC          ; Ll #       LATIN SMALL LETTER NJ
+01CE          ; Ll #       LATIN SMALL LETTER A WITH CARON
+01D0          ; Ll #       LATIN SMALL LETTER I WITH CARON
+01D2          ; Ll #       LATIN SMALL LETTER O WITH CARON
+01D4          ; Ll #       LATIN SMALL LETTER U WITH CARON
+01D6          ; Ll #       LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
+01D8          ; Ll #       LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
+01DA          ; Ll #       LATIN SMALL LETTER U WITH DIAERESIS AND CARON
+01DC..01DD    ; Ll #   [2] LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE..LATIN SMALL LETTER TURNED E
+01DF          ; Ll #       LATIN SMALL LETTER A WITH DIAERESIS AND MACRON
+01E1          ; Ll #       LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON
+01E3          ; Ll #       LATIN SMALL LETTER AE WITH MACRON
+01E5          ; Ll #       LATIN SMALL LETTER G WITH STROKE
+01E7          ; Ll #       LATIN SMALL LETTER G WITH CARON
+01E9          ; Ll #       LATIN SMALL LETTER K WITH CARON
+01EB          ; Ll #       LATIN SMALL LETTER O WITH OGONEK
+01ED          ; Ll #       LATIN SMALL LETTER O WITH OGONEK AND MACRON
+01EF..01F0    ; Ll #   [2] LATIN SMALL LETTER EZH WITH CARON..LATIN SMALL LETTER J WITH CARON
+01F3          ; Ll #       LATIN SMALL LETTER DZ
+01F5          ; Ll #       LATIN SMALL LETTER G WITH ACUTE
+01F9          ; Ll #       LATIN SMALL LETTER N WITH GRAVE
+01FB          ; Ll #       LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE
+01FD          ; Ll #       LATIN SMALL LETTER AE WITH ACUTE
+01FF          ; Ll #       LATIN SMALL LETTER O WITH STROKE AND ACUTE
+0201          ; Ll #       LATIN SMALL LETTER A WITH DOUBLE GRAVE
+0203          ; Ll #       LATIN SMALL LETTER A WITH INVERTED BREVE
+0205          ; Ll #       LATIN SMALL LETTER E WITH DOUBLE GRAVE
+0207          ; Ll #       LATIN SMALL LETTER E WITH INVERTED BREVE
+0209          ; Ll #       LATIN SMALL LETTER I WITH DOUBLE GRAVE
+020B          ; Ll #       LATIN SMALL LETTER I WITH INVERTED BREVE
+020D          ; Ll #       LATIN SMALL LETTER O WITH DOUBLE GRAVE
+020F          ; Ll #       LATIN SMALL LETTER O WITH INVERTED BREVE
+0211          ; Ll #       LATIN SMALL LETTER R WITH DOUBLE GRAVE
+0213          ; Ll #       LATIN SMALL LETTER R WITH INVERTED BREVE
+0215          ; Ll #       LATIN SMALL LETTER U WITH DOUBLE GRAVE
+0217          ; Ll #       LATIN SMALL LETTER U WITH INVERTED BREVE
+0219          ; Ll #       LATIN SMALL LETTER S WITH COMMA BELOW
+021B          ; Ll #       LATIN SMALL LETTER T WITH COMMA BELOW
+021D          ; Ll #       LATIN SMALL LETTER YOGH
+021F          ; Ll #       LATIN SMALL LETTER H WITH CARON
+0221          ; Ll #       LATIN SMALL LETTER D WITH CURL
+0223          ; Ll #       LATIN SMALL LETTER OU
+0225          ; Ll #       LATIN SMALL LETTER Z WITH HOOK
+0227          ; Ll #       LATIN SMALL LETTER A WITH DOT ABOVE
+0229          ; Ll #       LATIN SMALL LETTER E WITH CEDILLA
+022B          ; Ll #       LATIN SMALL LETTER O WITH DIAERESIS AND MACRON
+022D          ; Ll #       LATIN SMALL LETTER O WITH TILDE AND MACRON
+022F          ; Ll #       LATIN SMALL LETTER O WITH DOT ABOVE
+0231          ; Ll #       LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON
+0233..0239    ; Ll #   [7] LATIN SMALL LETTER Y WITH MACRON..LATIN SMALL LETTER QP DIGRAPH
+023C          ; Ll #       LATIN SMALL LETTER C WITH STROKE
+023F..0240    ; Ll #   [2] LATIN SMALL LETTER S WITH SWASH TAIL..LATIN SMALL LETTER Z WITH SWASH TAIL
+0242          ; Ll #       LATIN SMALL LETTER GLOTTAL STOP
+0247          ; Ll #       LATIN SMALL LETTER E WITH STROKE
+0249          ; Ll #       LATIN SMALL LETTER J WITH STROKE
+024B          ; Ll #       LATIN SMALL LETTER Q WITH HOOK TAIL
+024D          ; Ll #       LATIN SMALL LETTER R WITH STROKE
+024F..0293    ; Ll #  [69] LATIN SMALL LETTER Y WITH STROKE..LATIN SMALL LETTER EZH WITH CURL
+0295..02AF    ; Ll #  [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
+0371          ; Ll #       GREEK SMALL LETTER HETA
+0373          ; Ll #       GREEK SMALL LETTER ARCHAIC SAMPI
+0377          ; Ll #       GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
+037B..037D    ; Ll #   [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
+0390          ; Ll #       GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+03AC..03CE    ; Ll #  [35] GREEK SMALL LETTER ALPHA WITH TONOS..GREEK SMALL LETTER OMEGA WITH TONOS
+03D0..03D1    ; Ll #   [2] GREEK BETA SYMBOL..GREEK THETA SYMBOL
+03D5..03D7    ; Ll #   [3] GREEK PHI SYMBOL..GREEK KAI SYMBOL
+03D9          ; Ll #       GREEK SMALL LETTER ARCHAIC KOPPA
+03DB          ; Ll #       GREEK SMALL LETTER STIGMA
+03DD          ; Ll #       GREEK SMALL LETTER DIGAMMA
+03DF          ; Ll #       GREEK SMALL LETTER KOPPA
+03E1          ; Ll #       GREEK SMALL LETTER SAMPI
+03E3          ; Ll #       COPTIC SMALL LETTER SHEI
+03E5          ; Ll #       COPTIC SMALL LETTER FEI
+03E7          ; Ll #       COPTIC SMALL LETTER KHEI
+03E9          ; Ll #       COPTIC SMALL LETTER HORI
+03EB          ; Ll #       COPTIC SMALL LETTER GANGIA
+03ED          ; Ll #       COPTIC SMALL LETTER SHIMA
+03EF..03F3    ; Ll #   [5] COPTIC SMALL LETTER DEI..GREEK LETTER YOT
+03F5          ; Ll #       GREEK LUNATE EPSILON SYMBOL
+03F8          ; Ll #       GREEK SMALL LETTER SHO
+03FB..03FC    ; Ll #   [2] GREEK SMALL LETTER SAN..GREEK RHO WITH STROKE SYMBOL
+0430..045F    ; Ll #  [48] CYRILLIC SMALL LETTER A..CYRILLIC SMALL LETTER DZHE
+0461          ; Ll #       CYRILLIC SMALL LETTER OMEGA
+0463          ; Ll #       CYRILLIC SMALL LETTER YAT
+0465          ; Ll #       CYRILLIC SMALL LETTER IOTIFIED E
+0467          ; Ll #       CYRILLIC SMALL LETTER LITTLE YUS
+0469          ; Ll #       CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS
+046B          ; Ll #       CYRILLIC SMALL LETTER BIG YUS
+046D          ; Ll #       CYRILLIC SMALL LETTER IOTIFIED BIG YUS
+046F          ; Ll #       CYRILLIC SMALL LETTER KSI
+0471          ; Ll #       CYRILLIC SMALL LETTER PSI
+0473          ; Ll #       CYRILLIC SMALL LETTER FITA
+0475          ; Ll #       CYRILLIC SMALL LETTER IZHITSA
+0477          ; Ll #       CYRILLIC SMALL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
+0479          ; Ll #       CYRILLIC SMALL LETTER UK
+047B          ; Ll #       CYRILLIC SMALL LETTER ROUND OMEGA
+047D          ; Ll #       CYRILLIC SMALL LETTER OMEGA WITH TITLO
+047F          ; Ll #       CYRILLIC SMALL LETTER OT
+0481          ; Ll #       CYRILLIC SMALL LETTER KOPPA
+048B          ; Ll #       CYRILLIC SMALL LETTER SHORT I WITH TAIL
+048D          ; Ll #       CYRILLIC SMALL LETTER SEMISOFT SIGN
+048F          ; Ll #       CYRILLIC SMALL LETTER ER WITH TICK
+0491          ; Ll #       CYRILLIC SMALL LETTER GHE WITH UPTURN
+0493          ; Ll #       CYRILLIC SMALL LETTER GHE WITH STROKE
+0495          ; Ll #       CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK
+0497          ; Ll #       CYRILLIC SMALL LETTER ZHE WITH DESCENDER
+0499          ; Ll #       CYRILLIC SMALL LETTER ZE WITH DESCENDER
+049B          ; Ll #       CYRILLIC SMALL LETTER KA WITH DESCENDER
+049D          ; Ll #       CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
+049F          ; Ll #       CYRILLIC SMALL LETTER KA WITH STROKE
+04A1          ; Ll #       CYRILLIC SMALL LETTER BASHKIR KA
+04A3          ; Ll #       CYRILLIC SMALL LETTER EN WITH DESCENDER
+04A5          ; Ll #       CYRILLIC SMALL LIGATURE EN GHE
+04A7          ; Ll #       CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK
+04A9          ; Ll #       CYRILLIC SMALL LETTER ABKHASIAN HA
+04AB          ; Ll #       CYRILLIC SMALL LETTER ES WITH DESCENDER
+04AD          ; Ll #       CYRILLIC SMALL LETTER TE WITH DESCENDER
+04AF          ; Ll #       CYRILLIC SMALL LETTER STRAIGHT U
+04B1          ; Ll #       CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE
+04B3          ; Ll #       CYRILLIC SMALL LETTER HA WITH DESCENDER
+04B5          ; Ll #       CYRILLIC SMALL LIGATURE TE TSE
+04B7          ; Ll #       CYRILLIC SMALL LETTER CHE WITH DESCENDER
+04B9          ; Ll #       CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
+04BB          ; Ll #       CYRILLIC SMALL LETTER SHHA
+04BD          ; Ll #       CYRILLIC SMALL LETTER ABKHASIAN CHE
+04BF          ; Ll #       CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER
+04C2          ; Ll #       CYRILLIC SMALL LETTER ZHE WITH BREVE
+04C4          ; Ll #       CYRILLIC SMALL LETTER KA WITH HOOK
+04C6          ; Ll #       CYRILLIC SMALL LETTER EL WITH TAIL
+04C8          ; Ll #       CYRILLIC SMALL LETTER EN WITH HOOK
+04CA          ; Ll #       CYRILLIC SMALL LETTER EN WITH TAIL
+04CC          ; Ll #       CYRILLIC SMALL LETTER KHAKASSIAN CHE
+04CE..04CF    ; Ll #   [2] CYRILLIC SMALL LETTER EM WITH TAIL..CYRILLIC SMALL LETTER PALOCHKA
+04D1          ; Ll #       CYRILLIC SMALL LETTER A WITH BREVE
+04D3          ; Ll #       CYRILLIC SMALL LETTER A WITH DIAERESIS
+04D5          ; Ll #       CYRILLIC SMALL LIGATURE A IE
+04D7          ; Ll #       CYRILLIC SMALL LETTER IE WITH BREVE
+04D9          ; Ll #       CYRILLIC SMALL LETTER SCHWA
+04DB          ; Ll #       CYRILLIC SMALL LETTER SCHWA WITH DIAERESIS
+04DD          ; Ll #       CYRILLIC SMALL LETTER ZHE WITH DIAERESIS
+04DF          ; Ll #       CYRILLIC SMALL LETTER ZE WITH DIAERESIS
+04E1          ; Ll #       CYRILLIC SMALL LETTER ABKHASIAN DZE
+04E3          ; Ll #       CYRILLIC SMALL LETTER I WITH MACRON
+04E5          ; Ll #       CYRILLIC SMALL LETTER I WITH DIAERESIS
+04E7          ; Ll #       CYRILLIC SMALL LETTER O WITH DIAERESIS
+04E9          ; Ll #       CYRILLIC SMALL LETTER BARRED O
+04EB          ; Ll #       CYRILLIC SMALL LETTER BARRED O WITH DIAERESIS
+04ED          ; Ll #       CYRILLIC SMALL LETTER E WITH DIAERESIS
+04EF          ; Ll #       CYRILLIC SMALL LETTER U WITH MACRON
+04F1          ; Ll #       CYRILLIC SMALL LETTER U WITH DIAERESIS
+04F3          ; Ll #       CYRILLIC SMALL LETTER U WITH DOUBLE ACUTE
+04F5          ; Ll #       CYRILLIC SMALL LETTER CHE WITH DIAERESIS
+04F7          ; Ll #       CYRILLIC SMALL LETTER GHE WITH DESCENDER
+04F9          ; Ll #       CYRILLIC SMALL LETTER YERU WITH DIAERESIS
+04FB          ; Ll #       CYRILLIC SMALL LETTER GHE WITH STROKE AND HOOK
+04FD          ; Ll #       CYRILLIC SMALL LETTER HA WITH HOOK
+04FF          ; Ll #       CYRILLIC SMALL LETTER HA WITH STROKE
+0501          ; Ll #       CYRILLIC SMALL LETTER KOMI DE
+0503          ; Ll #       CYRILLIC SMALL LETTER KOMI DJE
+0505          ; Ll #       CYRILLIC SMALL LETTER KOMI ZJE
+0507          ; Ll #       CYRILLIC SMALL LETTER KOMI DZJE
+0509          ; Ll #       CYRILLIC SMALL LETTER KOMI LJE
+050B          ; Ll #       CYRILLIC SMALL LETTER KOMI NJE
+050D          ; Ll #       CYRILLIC SMALL LETTER KOMI SJE
+050F          ; Ll #       CYRILLIC SMALL LETTER KOMI TJE
+0511          ; Ll #       CYRILLIC SMALL LETTER REVERSED ZE
+0513          ; Ll #       CYRILLIC SMALL LETTER EL WITH HOOK
+0515          ; Ll #       CYRILLIC SMALL LETTER LHA
+0517          ; Ll #       CYRILLIC SMALL LETTER RHA
+0519          ; Ll #       CYRILLIC SMALL LETTER YAE
+051B          ; Ll #       CYRILLIC SMALL LETTER QA
+051D          ; Ll #       CYRILLIC SMALL LETTER WE
+051F          ; Ll #       CYRILLIC SMALL LETTER ALEUT KA
+0521          ; Ll #       CYRILLIC SMALL LETTER EL WITH MIDDLE HOOK
+0523          ; Ll #       CYRILLIC SMALL LETTER EN WITH MIDDLE HOOK
+0525          ; Ll #       CYRILLIC SMALL LETTER PE WITH DESCENDER
+0527          ; Ll #       CYRILLIC SMALL LETTER SHHA WITH DESCENDER
+0529          ; Ll #       CYRILLIC SMALL LETTER EN WITH LEFT HOOK
+052B          ; Ll #       CYRILLIC SMALL LETTER DZZHE
+052D          ; Ll #       CYRILLIC SMALL LETTER DCHE
+052F          ; Ll #       CYRILLIC SMALL LETTER EL WITH DESCENDER
+0560..0588    ; Ll #  [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE
+10D0..10FA    ; Ll #  [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN
+10FD..10FF    ; Ll #   [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN
+13F8..13FD    ; Ll #   [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV
+1C80..1C88    ; Ll #   [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1D00..1D2B    ; Ll #  [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
+1D6B..1D77    ; Ll #  [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G
+1D79..1D9A    ; Ll #  [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
+1E01          ; Ll #       LATIN SMALL LETTER A WITH RING BELOW
+1E03          ; Ll #       LATIN SMALL LETTER B WITH DOT ABOVE
+1E05          ; Ll #       LATIN SMALL LETTER B WITH DOT BELOW
+1E07          ; Ll #       LATIN SMALL LETTER B WITH LINE BELOW
+1E09          ; Ll #       LATIN SMALL LETTER C WITH CEDILLA AND ACUTE
+1E0B          ; Ll #       LATIN SMALL LETTER D WITH DOT ABOVE
+1E0D          ; Ll #       LATIN SMALL LETTER D WITH DOT BELOW
+1E0F          ; Ll #       LATIN SMALL LETTER D WITH LINE BELOW
+1E11          ; Ll #       LATIN SMALL LETTER D WITH CEDILLA
+1E13          ; Ll #       LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW
+1E15          ; Ll #       LATIN SMALL LETTER E WITH MACRON AND GRAVE
+1E17          ; Ll #       LATIN SMALL LETTER E WITH MACRON AND ACUTE
+1E19          ; Ll #       LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW
+1E1B          ; Ll #       LATIN SMALL LETTER E WITH TILDE BELOW
+1E1D          ; Ll #       LATIN SMALL LETTER E WITH CEDILLA AND BREVE
+1E1F          ; Ll #       LATIN SMALL LETTER F WITH DOT ABOVE
+1E21          ; Ll #       LATIN SMALL LETTER G WITH MACRON
+1E23          ; Ll #       LATIN SMALL LETTER H WITH DOT ABOVE
+1E25          ; Ll #       LATIN SMALL LETTER H WITH DOT BELOW
+1E27          ; Ll #       LATIN SMALL LETTER H WITH DIAERESIS
+1E29          ; Ll #       LATIN SMALL LETTER H WITH CEDILLA
+1E2B          ; Ll #       LATIN SMALL LETTER H WITH BREVE BELOW
+1E2D          ; Ll #       LATIN SMALL LETTER I WITH TILDE BELOW
+1E2F          ; Ll #       LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE
+1E31          ; Ll #       LATIN SMALL LETTER K WITH ACUTE
+1E33          ; Ll #       LATIN SMALL LETTER K WITH DOT BELOW
+1E35          ; Ll #       LATIN SMALL LETTER K WITH LINE BELOW
+1E37          ; Ll #       LATIN SMALL LETTER L WITH DOT BELOW
+1E39          ; Ll #       LATIN SMALL LETTER L WITH DOT BELOW AND MACRON
+1E3B          ; Ll #       LATIN SMALL LETTER L WITH LINE BELOW
+1E3D          ; Ll #       LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW
+1E3F          ; Ll #       LATIN SMALL LETTER M WITH ACUTE
+1E41          ; Ll #       LATIN SMALL LETTER M WITH DOT ABOVE
+1E43          ; Ll #       LATIN SMALL LETTER M WITH DOT BELOW
+1E45          ; Ll #       LATIN SMALL LETTER N WITH DOT ABOVE
+1E47          ; Ll #       LATIN SMALL LETTER N WITH DOT BELOW
+1E49          ; Ll #       LATIN SMALL LETTER N WITH LINE BELOW
+1E4B          ; Ll #       LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW
+1E4D          ; Ll #       LATIN SMALL LETTER O WITH TILDE AND ACUTE
+1E4F          ; Ll #       LATIN SMALL LETTER O WITH TILDE AND DIAERESIS
+1E51          ; Ll #       LATIN SMALL LETTER O WITH MACRON AND GRAVE
+1E53          ; Ll #       LATIN SMALL LETTER O WITH MACRON AND ACUTE
+1E55          ; Ll #       LATIN SMALL LETTER P WITH ACUTE
+1E57          ; Ll #       LATIN SMALL LETTER P WITH DOT ABOVE
+1E59          ; Ll #       LATIN SMALL LETTER R WITH DOT ABOVE
+1E5B          ; Ll #       LATIN SMALL LETTER R WITH DOT BELOW
+1E5D          ; Ll #       LATIN SMALL LETTER R WITH DOT BELOW AND MACRON
+1E5F          ; Ll #       LATIN SMALL LETTER R WITH LINE BELOW
+1E61          ; Ll #       LATIN SMALL LETTER S WITH DOT ABOVE
+1E63          ; Ll #       LATIN SMALL LETTER S WITH DOT BELOW
+1E65          ; Ll #       LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE
+1E67          ; Ll #       LATIN SMALL LETTER S WITH CARON AND DOT ABOVE
+1E69          ; Ll #       LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
+1E6B          ; Ll #       LATIN SMALL LETTER T WITH DOT ABOVE
+1E6D          ; Ll #       LATIN SMALL LETTER T WITH DOT BELOW
+1E6F          ; Ll #       LATIN SMALL LETTER T WITH LINE BELOW
+1E71          ; Ll #       LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW
+1E73          ; Ll #       LATIN SMALL LETTER U WITH DIAERESIS BELOW
+1E75          ; Ll #       LATIN SMALL LETTER U WITH TILDE BELOW
+1E77          ; Ll #       LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW
+1E79          ; Ll #       LATIN SMALL LETTER U WITH TILDE AND ACUTE
+1E7B          ; Ll #       LATIN SMALL LETTER U WITH MACRON AND DIAERESIS
+1E7D          ; Ll #       LATIN SMALL LETTER V WITH TILDE
+1E7F          ; Ll #       LATIN SMALL LETTER V WITH DOT BELOW
+1E81          ; Ll #       LATIN SMALL LETTER W WITH GRAVE
+1E83          ; Ll #       LATIN SMALL LETTER W WITH ACUTE
+1E85          ; Ll #       LATIN SMALL LETTER W WITH DIAERESIS
+1E87          ; Ll #       LATIN SMALL LETTER W WITH DOT ABOVE
+1E89          ; Ll #       LATIN SMALL LETTER W WITH DOT BELOW
+1E8B          ; Ll #       LATIN SMALL LETTER X WITH DOT ABOVE
+1E8D          ; Ll #       LATIN SMALL LETTER X WITH DIAERESIS
+1E8F          ; Ll #       LATIN SMALL LETTER Y WITH DOT ABOVE
+1E91          ; Ll #       LATIN SMALL LETTER Z WITH CIRCUMFLEX
+1E93          ; Ll #       LATIN SMALL LETTER Z WITH DOT BELOW
+1E95..1E9D    ; Ll #   [9] LATIN SMALL LETTER Z WITH LINE BELOW..LATIN SMALL LETTER LONG S WITH HIGH STROKE
+1E9F          ; Ll #       LATIN SMALL LETTER DELTA
+1EA1          ; Ll #       LATIN SMALL LETTER A WITH DOT BELOW
+1EA3          ; Ll #       LATIN SMALL LETTER A WITH HOOK ABOVE
+1EA5          ; Ll #       LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE
+1EA7          ; Ll #       LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE
+1EA9          ; Ll #       LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
+1EAB          ; Ll #       LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE
+1EAD          ; Ll #       LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW
+1EAF          ; Ll #       LATIN SMALL LETTER A WITH BREVE AND ACUTE
+1EB1          ; Ll #       LATIN SMALL LETTER A WITH BREVE AND GRAVE
+1EB3          ; Ll #       LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE
+1EB5          ; Ll #       LATIN SMALL LETTER A WITH BREVE AND TILDE
+1EB7          ; Ll #       LATIN SMALL LETTER A WITH BREVE AND DOT BELOW
+1EB9          ; Ll #       LATIN SMALL LETTER E WITH DOT BELOW
+1EBB          ; Ll #       LATIN SMALL LETTER E WITH HOOK ABOVE
+1EBD          ; Ll #       LATIN SMALL LETTER E WITH TILDE
+1EBF          ; Ll #       LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE
+1EC1          ; Ll #       LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE
+1EC3          ; Ll #       LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
+1EC5          ; Ll #       LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE
+1EC7          ; Ll #       LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW
+1EC9          ; Ll #       LATIN SMALL LETTER I WITH HOOK ABOVE
+1ECB          ; Ll #       LATIN SMALL LETTER I WITH DOT BELOW
+1ECD          ; Ll #       LATIN SMALL LETTER O WITH DOT BELOW
+1ECF          ; Ll #       LATIN SMALL LETTER O WITH HOOK ABOVE
+1ED1          ; Ll #       LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE
+1ED3          ; Ll #       LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE
+1ED5          ; Ll #       LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
+1ED7          ; Ll #       LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE
+1ED9          ; Ll #       LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW
+1EDB          ; Ll #       LATIN SMALL LETTER O WITH HORN AND ACUTE
+1EDD          ; Ll #       LATIN SMALL LETTER O WITH HORN AND GRAVE
+1EDF          ; Ll #       LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE
+1EE1          ; Ll #       LATIN SMALL LETTER O WITH HORN AND TILDE
+1EE3          ; Ll #       LATIN SMALL LETTER O WITH HORN AND DOT BELOW
+1EE5          ; Ll #       LATIN SMALL LETTER U WITH DOT BELOW
+1EE7          ; Ll #       LATIN SMALL LETTER U WITH HOOK ABOVE
+1EE9          ; Ll #       LATIN SMALL LETTER U WITH HORN AND ACUTE
+1EEB          ; Ll #       LATIN SMALL LETTER U WITH HORN AND GRAVE
+1EED          ; Ll #       LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE
+1EEF          ; Ll #       LATIN SMALL LETTER U WITH HORN AND TILDE
+1EF1          ; Ll #       LATIN SMALL LETTER U WITH HORN AND DOT BELOW
+1EF3          ; Ll #       LATIN SMALL LETTER Y WITH GRAVE
+1EF5          ; Ll #       LATIN SMALL LETTER Y WITH DOT BELOW
+1EF7          ; Ll #       LATIN SMALL LETTER Y WITH HOOK ABOVE
+1EF9          ; Ll #       LATIN SMALL LETTER Y WITH TILDE
+1EFB          ; Ll #       LATIN SMALL LETTER MIDDLE-WELSH LL
+1EFD          ; Ll #       LATIN SMALL LETTER MIDDLE-WELSH V
+1EFF..1F07    ; Ll #   [9] LATIN SMALL LETTER Y WITH LOOP..GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI
+1F10..1F15    ; Ll #   [6] GREEK SMALL LETTER EPSILON WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
+1F20..1F27    ; Ll #   [8] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI
+1F30..1F37    ; Ll #   [8] GREEK SMALL LETTER IOTA WITH PSILI..GREEK SMALL LETTER IOTA WITH DASIA AND PERISPOMENI
+1F40..1F45    ; Ll #   [6] GREEK SMALL LETTER OMICRON WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
+1F50..1F57    ; Ll #   [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
+1F60..1F67    ; Ll #   [8] GREEK SMALL LETTER OMEGA WITH PSILI..GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI
+1F70..1F7D    ; Ll #  [14] GREEK SMALL LETTER ALPHA WITH VARIA..GREEK SMALL LETTER OMEGA WITH OXIA
+1F80..1F87    ; Ll #   [8] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1F90..1F97    ; Ll #   [8] GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1FA0..1FA7    ; Ll #   [8] GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1FB0..1FB4    ; Ll #   [5] GREEK SMALL LETTER ALPHA WITH VRACHY..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
+1FB6..1FB7    ; Ll #   [2] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FBE          ; Ll #       GREEK PROSGEGRAMMENI
+1FC2..1FC4    ; Ll #   [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
+1FC6..1FC7    ; Ll #   [2] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FD0..1FD3    ; Ll #   [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+1FD6..1FD7    ; Ll #   [2] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
+1FE0..1FE7    ; Ll #   [8] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
+1FF2..1FF4    ; Ll #   [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
+1FF6..1FF7    ; Ll #   [2] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
+210A          ; Ll #       SCRIPT SMALL G
+210E..210F    ; Ll #   [2] PLANCK CONSTANT..PLANCK CONSTANT OVER TWO PI
+2113          ; Ll #       SCRIPT SMALL L
+212F          ; Ll #       SCRIPT SMALL E
+2134          ; Ll #       SCRIPT SMALL O
+2139          ; Ll #       INFORMATION SOURCE
+213C..213D    ; Ll #   [2] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK SMALL GAMMA
+2146..2149    ; Ll #   [4] DOUBLE-STRUCK ITALIC SMALL D..DOUBLE-STRUCK ITALIC SMALL J
+214E          ; Ll #       TURNED SMALL F
+2184          ; Ll #       LATIN SMALL LETTER REVERSED C
+2C30..2C5E    ; Ll #  [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
+2C61          ; Ll #       LATIN SMALL LETTER L WITH DOUBLE BAR
+2C65..2C66    ; Ll #   [2] LATIN SMALL LETTER A WITH STROKE..LATIN SMALL LETTER T WITH DIAGONAL STROKE
+2C68          ; Ll #       LATIN SMALL LETTER H WITH DESCENDER
+2C6A          ; Ll #       LATIN SMALL LETTER K WITH DESCENDER
+2C6C          ; Ll #       LATIN SMALL LETTER Z WITH DESCENDER
+2C71          ; Ll #       LATIN SMALL LETTER V WITH RIGHT HOOK
+2C73..2C74    ; Ll #   [2] LATIN SMALL LETTER W WITH HOOK..LATIN SMALL LETTER V WITH CURL
+2C76..2C7B    ; Ll #   [6] LATIN SMALL LETTER HALF H..LATIN LETTER SMALL CAPITAL TURNED E
+2C81          ; Ll #       COPTIC SMALL LETTER ALFA
+2C83          ; Ll #       COPTIC SMALL LETTER VIDA
+2C85          ; Ll #       COPTIC SMALL LETTER GAMMA
+2C87          ; Ll #       COPTIC SMALL LETTER DALDA
+2C89          ; Ll #       COPTIC SMALL LETTER EIE
+2C8B          ; Ll #       COPTIC SMALL LETTER SOU
+2C8D          ; Ll #       COPTIC SMALL LETTER ZATA
+2C8F          ; Ll #       COPTIC SMALL LETTER HATE
+2C91          ; Ll #       COPTIC SMALL LETTER THETHE
+2C93          ; Ll #       COPTIC SMALL LETTER IAUDA
+2C95          ; Ll #       COPTIC SMALL LETTER KAPA
+2C97          ; Ll #       COPTIC SMALL LETTER LAULA
+2C99          ; Ll #       COPTIC SMALL LETTER MI
+2C9B          ; Ll #       COPTIC SMALL LETTER NI
+2C9D          ; Ll #       COPTIC SMALL LETTER KSI
+2C9F          ; Ll #       COPTIC SMALL LETTER O
+2CA1          ; Ll #       COPTIC SMALL LETTER PI
+2CA3          ; Ll #       COPTIC SMALL LETTER RO
+2CA5          ; Ll #       COPTIC SMALL LETTER SIMA
+2CA7          ; Ll #       COPTIC SMALL LETTER TAU
+2CA9          ; Ll #       COPTIC SMALL LETTER UA
+2CAB          ; Ll #       COPTIC SMALL LETTER FI
+2CAD          ; Ll #       COPTIC SMALL LETTER KHI
+2CAF          ; Ll #       COPTIC SMALL LETTER PSI
+2CB1          ; Ll #       COPTIC SMALL LETTER OOU
+2CB3          ; Ll #       COPTIC SMALL LETTER DIALECT-P ALEF
+2CB5          ; Ll #       COPTIC SMALL LETTER OLD COPTIC AIN
+2CB7          ; Ll #       COPTIC SMALL LETTER CRYPTOGRAMMIC EIE
+2CB9          ; Ll #       COPTIC SMALL LETTER DIALECT-P KAPA
+2CBB          ; Ll #       COPTIC SMALL LETTER DIALECT-P NI
+2CBD          ; Ll #       COPTIC SMALL LETTER CRYPTOGRAMMIC NI
+2CBF          ; Ll #       COPTIC SMALL LETTER OLD COPTIC OOU
+2CC1          ; Ll #       COPTIC SMALL LETTER SAMPI
+2CC3          ; Ll #       COPTIC SMALL LETTER CROSSED SHEI
+2CC5          ; Ll #       COPTIC SMALL LETTER OLD COPTIC SHEI
+2CC7          ; Ll #       COPTIC SMALL LETTER OLD COPTIC ESH
+2CC9          ; Ll #       COPTIC SMALL LETTER AKHMIMIC KHEI
+2CCB          ; Ll #       COPTIC SMALL LETTER DIALECT-P HORI
+2CCD          ; Ll #       COPTIC SMALL LETTER OLD COPTIC HORI
+2CCF          ; Ll #       COPTIC SMALL LETTER OLD COPTIC HA
+2CD1          ; Ll #       COPTIC SMALL LETTER L-SHAPED HA
+2CD3          ; Ll #       COPTIC SMALL LETTER OLD COPTIC HEI
+2CD5          ; Ll #       COPTIC SMALL LETTER OLD COPTIC HAT
+2CD7          ; Ll #       COPTIC SMALL LETTER OLD COPTIC GANGIA
+2CD9          ; Ll #       COPTIC SMALL LETTER OLD COPTIC DJA
+2CDB          ; Ll #       COPTIC SMALL LETTER OLD COPTIC SHIMA
+2CDD          ; Ll #       COPTIC SMALL LETTER OLD NUBIAN SHIMA
+2CDF          ; Ll #       COPTIC SMALL LETTER OLD NUBIAN NGI
+2CE1          ; Ll #       COPTIC SMALL LETTER OLD NUBIAN NYI
+2CE3..2CE4    ; Ll #   [2] COPTIC SMALL LETTER OLD NUBIAN WAU..COPTIC SYMBOL KAI
+2CEC          ; Ll #       COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI
+2CEE          ; Ll #       COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA
+2CF3          ; Ll #       COPTIC SMALL LETTER BOHAIRIC KHEI
+2D00..2D25    ; Ll #  [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE
+2D27          ; Ll #       GEORGIAN SMALL LETTER YN
+2D2D          ; Ll #       GEORGIAN SMALL LETTER AEN
+A641          ; Ll #       CYRILLIC SMALL LETTER ZEMLYA
+A643          ; Ll #       CYRILLIC SMALL LETTER DZELO
+A645          ; Ll #       CYRILLIC SMALL LETTER REVERSED DZE
+A647          ; Ll #       CYRILLIC SMALL LETTER IOTA
+A649          ; Ll #       CYRILLIC SMALL LETTER DJERV
+A64B          ; Ll #       CYRILLIC SMALL LETTER MONOGRAPH UK
+A64D          ; Ll #       CYRILLIC SMALL LETTER BROAD OMEGA
+A64F          ; Ll #       CYRILLIC SMALL LETTER NEUTRAL YER
+A651          ; Ll #       CYRILLIC SMALL LETTER YERU WITH BACK YER
+A653          ; Ll #       CYRILLIC SMALL LETTER IOTIFIED YAT
+A655          ; Ll #       CYRILLIC SMALL LETTER REVERSED YU
+A657          ; Ll #       CYRILLIC SMALL LETTER IOTIFIED A
+A659          ; Ll #       CYRILLIC SMALL LETTER CLOSED LITTLE YUS
+A65B          ; Ll #       CYRILLIC SMALL LETTER BLENDED YUS
+A65D          ; Ll #       CYRILLIC SMALL LETTER IOTIFIED CLOSED LITTLE YUS
+A65F          ; Ll #       CYRILLIC SMALL LETTER YN
+A661          ; Ll #       CYRILLIC SMALL LETTER REVERSED TSE
+A663          ; Ll #       CYRILLIC SMALL LETTER SOFT DE
+A665          ; Ll #       CYRILLIC SMALL LETTER SOFT EL
+A667          ; Ll #       CYRILLIC SMALL LETTER SOFT EM
+A669          ; Ll #       CYRILLIC SMALL LETTER MONOCULAR O
+A66B          ; Ll #       CYRILLIC SMALL LETTER BINOCULAR O
+A66D          ; Ll #       CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
+A681          ; Ll #       CYRILLIC SMALL LETTER DWE
+A683          ; Ll #       CYRILLIC SMALL LETTER DZWE
+A685          ; Ll #       CYRILLIC SMALL LETTER ZHWE
+A687          ; Ll #       CYRILLIC SMALL LETTER CCHE
+A689          ; Ll #       CYRILLIC SMALL LETTER DZZE
+A68B          ; Ll #       CYRILLIC SMALL LETTER TE WITH MIDDLE HOOK
+A68D          ; Ll #       CYRILLIC SMALL LETTER TWE
+A68F          ; Ll #       CYRILLIC SMALL LETTER TSWE
+A691          ; Ll #       CYRILLIC SMALL LETTER TSSE
+A693          ; Ll #       CYRILLIC SMALL LETTER TCHE
+A695          ; Ll #       CYRILLIC SMALL LETTER HWE
+A697          ; Ll #       CYRILLIC SMALL LETTER SHWE
+A699          ; Ll #       CYRILLIC SMALL LETTER DOUBLE O
+A69B          ; Ll #       CYRILLIC SMALL LETTER CROSSED O
+A723          ; Ll #       LATIN SMALL LETTER EGYPTOLOGICAL ALEF
+A725          ; Ll #       LATIN SMALL LETTER EGYPTOLOGICAL AIN
+A727          ; Ll #       LATIN SMALL LETTER HENG
+A729          ; Ll #       LATIN SMALL LETTER TZ
+A72B          ; Ll #       LATIN SMALL LETTER TRESILLO
+A72D          ; Ll #       LATIN SMALL LETTER CUATRILLO
+A72F..A731    ; Ll #   [3] LATIN SMALL LETTER CUATRILLO WITH COMMA..LATIN LETTER SMALL CAPITAL S
+A733          ; Ll #       LATIN SMALL LETTER AA
+A735          ; Ll #       LATIN SMALL LETTER AO
+A737          ; Ll #       LATIN SMALL LETTER AU
+A739          ; Ll #       LATIN SMALL LETTER AV
+A73B          ; Ll #       LATIN SMALL LETTER AV WITH HORIZONTAL BAR
+A73D          ; Ll #       LATIN SMALL LETTER AY
+A73F          ; Ll #       LATIN SMALL LETTER REVERSED C WITH DOT
+A741          ; Ll #       LATIN SMALL LETTER K WITH STROKE
+A743          ; Ll #       LATIN SMALL LETTER K WITH DIAGONAL STROKE
+A745          ; Ll #       LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE
+A747          ; Ll #       LATIN SMALL LETTER BROKEN L
+A749          ; Ll #       LATIN SMALL LETTER L WITH HIGH STROKE
+A74B          ; Ll #       LATIN SMALL LETTER O WITH LONG STROKE OVERLAY
+A74D          ; Ll #       LATIN SMALL LETTER O WITH LOOP
+A74F          ; Ll #       LATIN SMALL LETTER OO
+A751          ; Ll #       LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER
+A753          ; Ll #       LATIN SMALL LETTER P WITH FLOURISH
+A755          ; Ll #       LATIN SMALL LETTER P WITH SQUIRREL TAIL
+A757          ; Ll #       LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER
+A759          ; Ll #       LATIN SMALL LETTER Q WITH DIAGONAL STROKE
+A75B          ; Ll #       LATIN SMALL LETTER R ROTUNDA
+A75D          ; Ll #       LATIN SMALL LETTER RUM ROTUNDA
+A75F          ; Ll #       LATIN SMALL LETTER V WITH DIAGONAL STROKE
+A761          ; Ll #       LATIN SMALL LETTER VY
+A763          ; Ll #       LATIN SMALL LETTER VISIGOTHIC Z
+A765          ; Ll #       LATIN SMALL LETTER THORN WITH STROKE
+A767          ; Ll #       LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER
+A769          ; Ll #       LATIN SMALL LETTER VEND
+A76B          ; Ll #       LATIN SMALL LETTER ET
+A76D          ; Ll #       LATIN SMALL LETTER IS
+A76F          ; Ll #       LATIN SMALL LETTER CON
+A771..A778    ; Ll #   [8] LATIN SMALL LETTER DUM..LATIN SMALL LETTER UM
+A77A          ; Ll #       LATIN SMALL LETTER INSULAR D
+A77C          ; Ll #       LATIN SMALL LETTER INSULAR F
+A77F          ; Ll #       LATIN SMALL LETTER TURNED INSULAR G
+A781          ; Ll #       LATIN SMALL LETTER TURNED L
+A783          ; Ll #       LATIN SMALL LETTER INSULAR R
+A785          ; Ll #       LATIN SMALL LETTER INSULAR S
+A787          ; Ll #       LATIN SMALL LETTER INSULAR T
+A78C          ; Ll #       LATIN SMALL LETTER SALTILLO
+A78E          ; Ll #       LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
+A791          ; Ll #       LATIN SMALL LETTER N WITH DESCENDER
+A793..A795    ; Ll #   [3] LATIN SMALL LETTER C WITH BAR..LATIN SMALL LETTER H WITH PALATAL HOOK
+A797          ; Ll #       LATIN SMALL LETTER B WITH FLOURISH
+A799          ; Ll #       LATIN SMALL LETTER F WITH STROKE
+A79B          ; Ll #       LATIN SMALL LETTER VOLAPUK AE
+A79D          ; Ll #       LATIN SMALL LETTER VOLAPUK OE
+A79F          ; Ll #       LATIN SMALL LETTER VOLAPUK UE
+A7A1          ; Ll #       LATIN SMALL LETTER G WITH OBLIQUE STROKE
+A7A3          ; Ll #       LATIN SMALL LETTER K WITH OBLIQUE STROKE
+A7A5          ; Ll #       LATIN SMALL LETTER N WITH OBLIQUE STROKE
+A7A7          ; Ll #       LATIN SMALL LETTER R WITH OBLIQUE STROKE
+A7A9          ; Ll #       LATIN SMALL LETTER S WITH OBLIQUE STROKE
+A7AF          ; Ll #       LATIN LETTER SMALL CAPITAL Q
+A7B5          ; Ll #       LATIN SMALL LETTER BETA
+A7B7          ; Ll #       LATIN SMALL LETTER OMEGA
+A7B9          ; Ll #       LATIN SMALL LETTER U WITH STROKE
+A7BB          ; Ll #       LATIN SMALL LETTER GLOTTAL A
+A7BD          ; Ll #       LATIN SMALL LETTER GLOTTAL I
+A7BF          ; Ll #       LATIN SMALL LETTER GLOTTAL U
+A7C3          ; Ll #       LATIN SMALL LETTER ANGLICANA W
+A7C8          ; Ll #       LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY
+A7CA          ; Ll #       LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
+A7F6          ; Ll #       LATIN SMALL LETTER REVERSED HALF H
+A7FA          ; Ll #       LATIN LETTER SMALL CAPITAL TURNED M
+AB30..AB5A    ; Ll #  [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
+AB60..AB68    ; Ll #   [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE
+AB70..ABBF    ; Ll #  [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA
+FB00..FB06    ; Ll #   [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
+FB13..FB17    ; Ll #   [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
+FF41..FF5A    ; Ll #  [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
+10428..1044F  ; Ll #  [40] DESERET SMALL LETTER LONG I..DESERET SMALL LETTER EW
+104D8..104FB  ; Ll #  [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA
+10CC0..10CF2  ; Ll #  [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US
+118C0..118DF  ; Ll #  [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO
+16E60..16E7F  ; Ll #  [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y
+1D41A..1D433  ; Ll #  [26] MATHEMATICAL BOLD SMALL A..MATHEMATICAL BOLD SMALL Z
+1D44E..1D454  ; Ll #   [7] MATHEMATICAL ITALIC SMALL A..MATHEMATICAL ITALIC SMALL G
+1D456..1D467  ; Ll #  [18] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL Z
+1D482..1D49B  ; Ll #  [26] MATHEMATICAL BOLD ITALIC SMALL A..MATHEMATICAL BOLD ITALIC SMALL Z
+1D4B6..1D4B9  ; Ll #   [4] MATHEMATICAL SCRIPT SMALL A..MATHEMATICAL SCRIPT SMALL D
+1D4BB         ; Ll #       MATHEMATICAL SCRIPT SMALL F
+1D4BD..1D4C3  ; Ll #   [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N
+1D4C5..1D4CF  ; Ll #  [11] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL SCRIPT SMALL Z
+1D4EA..1D503  ; Ll #  [26] MATHEMATICAL BOLD SCRIPT SMALL A..MATHEMATICAL BOLD SCRIPT SMALL Z
+1D51E..1D537  ; Ll #  [26] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL FRAKTUR SMALL Z
+1D552..1D56B  ; Ll #  [26] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL DOUBLE-STRUCK SMALL Z
+1D586..1D59F  ; Ll #  [26] MATHEMATICAL BOLD FRAKTUR SMALL A..MATHEMATICAL BOLD FRAKTUR SMALL Z
+1D5BA..1D5D3  ; Ll #  [26] MATHEMATICAL SANS-SERIF SMALL A..MATHEMATICAL SANS-SERIF SMALL Z
+1D5EE..1D607  ; Ll #  [26] MATHEMATICAL SANS-SERIF BOLD SMALL A..MATHEMATICAL SANS-SERIF BOLD SMALL Z
+1D622..1D63B  ; Ll #  [26] MATHEMATICAL SANS-SERIF ITALIC SMALL A..MATHEMATICAL SANS-SERIF ITALIC SMALL Z
+1D656..1D66F  ; Ll #  [26] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL A..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL Z
+1D68A..1D6A5  ; Ll #  [28] MATHEMATICAL MONOSPACE SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J
+1D6C2..1D6DA  ; Ll #  [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA
+1D6DC..1D6E1  ; Ll #   [6] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL BOLD PI SYMBOL
+1D6FC..1D714  ; Ll #  [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA
+1D716..1D71B  ; Ll #   [6] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL ITALIC PI SYMBOL
+1D736..1D74E  ; Ll #  [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA
+1D750..1D755  ; Ll #   [6] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC PI SYMBOL
+1D770..1D788  ; Ll #  [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA
+1D78A..1D78F  ; Ll #   [6] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD PI SYMBOL
+1D7AA..1D7C2  ; Ll #  [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
+1D7C4..1D7C9  ; Ll #   [6] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL
+1D7CB         ; Ll #       MATHEMATICAL BOLD SMALL DIGAMMA
+1E922..1E943  ; Ll #  [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
+
+# Total code points: 2155
+
+# ================================================
+
+# General_Category=Titlecase_Letter
+
+01C5          ; Lt #       LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
+01C8          ; Lt #       LATIN CAPITAL LETTER L WITH SMALL LETTER J
+01CB          ; Lt #       LATIN CAPITAL LETTER N WITH SMALL LETTER J
+01F2          ; Lt #       LATIN CAPITAL LETTER D WITH SMALL LETTER Z
+1F88..1F8F    ; Lt #   [8] GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI..GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F98..1F9F    ; Lt #   [8] GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI..GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FA8..1FAF    ; Lt #   [8] GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI..GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FBC          ; Lt #       GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FCC          ; Lt #       GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FFC          ; Lt #       GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+
+# Total code points: 31
+
+# ================================================
+
+# General_Category=Modifier_Letter
+
+02B0..02C1    ; Lm #  [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP
+02C6..02D1    ; Lm #  [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
+02E0..02E4    ; Lm #   [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
+02EC          ; Lm #       MODIFIER LETTER VOICING
+02EE          ; Lm #       MODIFIER LETTER DOUBLE APOSTROPHE
+0374          ; Lm #       GREEK NUMERAL SIGN
+037A          ; Lm #       GREEK YPOGEGRAMMENI
+0559          ; Lm #       ARMENIAN MODIFIER LETTER LEFT HALF RING
+0640          ; Lm #       ARABIC TATWEEL
+06E5..06E6    ; Lm #   [2] ARABIC SMALL WAW..ARABIC SMALL YEH
+07F4..07F5    ; Lm #   [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
+07FA          ; Lm #       NKO LAJANYALAN
+081A          ; Lm #       SAMARITAN MODIFIER LETTER EPENTHETIC YUT
+0824          ; Lm #       SAMARITAN MODIFIER LETTER SHORT A
+0828          ; Lm #       SAMARITAN MODIFIER LETTER I
+0971          ; Lm #       DEVANAGARI SIGN HIGH SPACING DOT
+0E46          ; Lm #       THAI CHARACTER MAIYAMOK
+0EC6          ; Lm #       LAO KO LA
+10FC          ; Lm #       MODIFIER LETTER GEORGIAN NAR
+17D7          ; Lm #       KHMER SIGN LEK TOO
+1843          ; Lm #       MONGOLIAN LETTER TODO LONG VOWEL SIGN
+1AA7          ; Lm #       TAI THAM SIGN MAI YAMOK
+1C78..1C7D    ; Lm #   [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
+1D2C..1D6A    ; Lm #  [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI
+1D78          ; Lm #       MODIFIER LETTER CYRILLIC EN
+1D9B..1DBF    ; Lm #  [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA
+2071          ; Lm #       SUPERSCRIPT LATIN SMALL LETTER I
+207F          ; Lm #       SUPERSCRIPT LATIN SMALL LETTER N
+2090..209C    ; Lm #  [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T
+2C7C..2C7D    ; Lm #   [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V
+2D6F          ; Lm #       TIFINAGH MODIFIER LETTER LABIALIZATION MARK
+2E2F          ; Lm #       VERTICAL TILDE
+3005          ; Lm #       IDEOGRAPHIC ITERATION MARK
+3031..3035    ; Lm #   [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
+303B          ; Lm #       VERTICAL IDEOGRAPHIC ITERATION MARK
+309D..309E    ; Lm #   [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
+30FC..30FE    ; Lm #   [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK
+A015          ; Lm #       YI SYLLABLE WU
+A4F8..A4FD    ; Lm #   [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU
+A60C          ; Lm #       VAI SYLLABLE LENGTHENER
+A67F          ; Lm #       CYRILLIC PAYEROK
+A69C..A69D    ; Lm #   [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN
+A717..A71F    ; Lm #   [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
+A770          ; Lm #       MODIFIER LETTER US
+A788          ; Lm #       MODIFIER LETTER LOW CIRCUMFLEX ACCENT
+A7F8..A7F9    ; Lm #   [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
+A9CF          ; Lm #       JAVANESE PANGRANGKEP
+A9E6          ; Lm #       MYANMAR MODIFIER LETTER SHAN REDUPLICATION
+AA70          ; Lm #       MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION
+AADD          ; Lm #       TAI VIET SYMBOL SAM
+AAF3..AAF4    ; Lm #   [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK
+AB5C..AB5F    ; Lm #   [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
+AB69          ; Lm #       MODIFIER LETTER SMALL TURNED W
+FF70          ; Lm #       HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
+FF9E..FF9F    ; Lm #   [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
+16B40..16B43  ; Lm #   [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM
+16F93..16F9F  ; Lm #  [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8
+16FE0..16FE1  ; Lm #   [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK
+16FE3         ; Lm #       OLD CHINESE ITERATION MARK
+1E137..1E13D  ; Lm #   [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
+1E94B         ; Lm #       ADLAM NASALIZATION MARK
+
+# Total code points: 260
+
+# ================================================
+
+# General_Category=Other_Letter
+
+00AA          ; Lo #       FEMININE ORDINAL INDICATOR
+00BA          ; Lo #       MASCULINE ORDINAL INDICATOR
+01BB          ; Lo #       LATIN LETTER TWO WITH STROKE
+01C0..01C3    ; Lo #   [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK
+0294          ; Lo #       LATIN LETTER GLOTTAL STOP
+05D0..05EA    ; Lo #  [27] HEBREW LETTER ALEF..HEBREW LETTER TAV
+05EF..05F2    ; Lo #   [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD
+0620..063F    ; Lo #  [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
+0641..064A    ; Lo #  [10] ARABIC LETTER FEH..ARABIC LETTER YEH
+066E..066F    ; Lo #   [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF
+0671..06D3    ; Lo #  [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
+06D5          ; Lo #       ARABIC LETTER AE
+06EE..06EF    ; Lo #   [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V
+06FA..06FC    ; Lo #   [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW
+06FF          ; Lo #       ARABIC LETTER HEH WITH INVERTED V
+0710          ; Lo #       SYRIAC LETTER ALAPH
+0712..072F    ; Lo #  [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH
+074D..07A5    ; Lo #  [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU
+07B1          ; Lo #       THAANA LETTER NAA
+07CA..07EA    ; Lo #  [33] NKO LETTER A..NKO LETTER JONA RA
+0800..0815    ; Lo #  [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF
+0840..0858    ; Lo #  [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
+0860..086A    ; Lo #  [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
+08A0..08B4    ; Lo #  [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW
+08B6..08C7    ; Lo #  [18] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE
+0904..0939    ; Lo #  [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
+093D          ; Lo #       DEVANAGARI SIGN AVAGRAHA
+0950          ; Lo #       DEVANAGARI OM
+0958..0961    ; Lo #  [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
+0972..0980    ; Lo #  [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI
+0985..098C    ; Lo #   [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L
+098F..0990    ; Lo #   [2] BENGALI LETTER E..BENGALI LETTER AI
+0993..09A8    ; Lo #  [22] BENGALI LETTER O..BENGALI LETTER NA
+09AA..09B0    ; Lo #   [7] BENGALI LETTER PA..BENGALI LETTER RA
+09B2          ; Lo #       BENGALI LETTER LA
+09B6..09B9    ; Lo #   [4] BENGALI LETTER SHA..BENGALI LETTER HA
+09BD          ; Lo #       BENGALI SIGN AVAGRAHA
+09CE          ; Lo #       BENGALI LETTER KHANDA TA
+09DC..09DD    ; Lo #   [2] BENGALI LETTER RRA..BENGALI LETTER RHA
+09DF..09E1    ; Lo #   [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL
+09F0..09F1    ; Lo #   [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL
+09FC          ; Lo #       BENGALI LETTER VEDIC ANUSVARA
+0A05..0A0A    ; Lo #   [6] GURMUKHI LETTER A..GURMUKHI LETTER UU
+0A0F..0A10    ; Lo #   [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI
+0A13..0A28    ; Lo #  [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA
+0A2A..0A30    ; Lo #   [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA
+0A32..0A33    ; Lo #   [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA
+0A35..0A36    ; Lo #   [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA
+0A38..0A39    ; Lo #   [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA
+0A59..0A5C    ; Lo #   [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA
+0A5E          ; Lo #       GURMUKHI LETTER FA
+0A72..0A74    ; Lo #   [3] GURMUKHI IRI..GURMUKHI EK ONKAR
+0A85..0A8D    ; Lo #   [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E
+0A8F..0A91    ; Lo #   [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O
+0A93..0AA8    ; Lo #  [22] GUJARATI LETTER O..GUJARATI LETTER NA
+0AAA..0AB0    ; Lo #   [7] GUJARATI LETTER PA..GUJARATI LETTER RA
+0AB2..0AB3    ; Lo #   [2] GUJARATI LETTER LA..GUJARATI LETTER LLA
+0AB5..0AB9    ; Lo #   [5] GUJARATI LETTER VA..GUJARATI LETTER HA
+0ABD          ; Lo #       GUJARATI SIGN AVAGRAHA
+0AD0          ; Lo #       GUJARATI OM
+0AE0..0AE1    ; Lo #   [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL
+0AF9          ; Lo #       GUJARATI LETTER ZHA
+0B05..0B0C    ; Lo #   [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L
+0B0F..0B10    ; Lo #   [2] ORIYA LETTER E..ORIYA LETTER AI
+0B13..0B28    ; Lo #  [22] ORIYA LETTER O..ORIYA LETTER NA
+0B2A..0B30    ; Lo #   [7] ORIYA LETTER PA..ORIYA LETTER RA
+0B32..0B33    ; Lo #   [2] ORIYA LETTER LA..ORIYA LETTER LLA
+0B35..0B39    ; Lo #   [5] ORIYA LETTER VA..ORIYA LETTER HA
+0B3D          ; Lo #       ORIYA SIGN AVAGRAHA
+0B5C..0B5D    ; Lo #   [2] ORIYA LETTER RRA..ORIYA LETTER RHA
+0B5F..0B61    ; Lo #   [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL
+0B71          ; Lo #       ORIYA LETTER WA
+0B83          ; Lo #       TAMIL SIGN VISARGA
+0B85..0B8A    ; Lo #   [6] TAMIL LETTER A..TAMIL LETTER UU
+0B8E..0B90    ; Lo #   [3] TAMIL LETTER E..TAMIL LETTER AI
+0B92..0B95    ; Lo #   [4] TAMIL LETTER O..TAMIL LETTER KA
+0B99..0B9A    ; Lo #   [2] TAMIL LETTER NGA..TAMIL LETTER CA
+0B9C          ; Lo #       TAMIL LETTER JA
+0B9E..0B9F    ; Lo #   [2] TAMIL LETTER NYA..TAMIL LETTER TTA
+0BA3..0BA4    ; Lo #   [2] TAMIL LETTER NNA..TAMIL LETTER TA
+0BA8..0BAA    ; Lo #   [3] TAMIL LETTER NA..TAMIL LETTER PA
+0BAE..0BB9    ; Lo #  [12] TAMIL LETTER MA..TAMIL LETTER HA
+0BD0          ; Lo #       TAMIL OM
+0C05..0C0C    ; Lo #   [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L
+0C0E..0C10    ; Lo #   [3] TELUGU LETTER E..TELUGU LETTER AI
+0C12..0C28    ; Lo #  [23] TELUGU LETTER O..TELUGU LETTER NA
+0C2A..0C39    ; Lo #  [16] TELUGU LETTER PA..TELUGU LETTER HA
+0C3D          ; Lo #       TELUGU SIGN AVAGRAHA
+0C58..0C5A    ; Lo #   [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
+0C60..0C61    ; Lo #   [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
+0C80          ; Lo #       KANNADA SIGN SPACING CANDRABINDU
+0C85..0C8C    ; Lo #   [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
+0C8E..0C90    ; Lo #   [3] KANNADA LETTER E..KANNADA LETTER AI
+0C92..0CA8    ; Lo #  [23] KANNADA LETTER O..KANNADA LETTER NA
+0CAA..0CB3    ; Lo #  [10] KANNADA LETTER PA..KANNADA LETTER LLA
+0CB5..0CB9    ; Lo #   [5] KANNADA LETTER VA..KANNADA LETTER HA
+0CBD          ; Lo #       KANNADA SIGN AVAGRAHA
+0CDE          ; Lo #       KANNADA LETTER FA
+0CE0..0CE1    ; Lo #   [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
+0CF1..0CF2    ; Lo #   [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
+0D04..0D0C    ; Lo #   [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
+0D0E..0D10    ; Lo #   [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
+0D12..0D3A    ; Lo #  [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
+0D3D          ; Lo #       MALAYALAM SIGN AVAGRAHA
+0D4E          ; Lo #       MALAYALAM LETTER DOT REPH
+0D54..0D56    ; Lo #   [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL
+0D5F..0D61    ; Lo #   [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL
+0D7A..0D7F    ; Lo #   [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
+0D85..0D96    ; Lo #  [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA
+0D9A..0DB1    ; Lo #  [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA
+0DB3..0DBB    ; Lo #   [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA
+0DBD          ; Lo #       SINHALA LETTER DANTAJA LAYANNA
+0DC0..0DC6    ; Lo #   [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA
+0E01..0E30    ; Lo #  [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A
+0E32..0E33    ; Lo #   [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM
+0E40..0E45    ; Lo #   [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO
+0E81..0E82    ; Lo #   [2] LAO LETTER KO..LAO LETTER KHO SUNG
+0E84          ; Lo #       LAO LETTER KHO TAM
+0E86..0E8A    ; Lo #   [5] LAO LETTER PALI GHA..LAO LETTER SO TAM
+0E8C..0EA3    ; Lo #  [24] LAO LETTER PALI JHA..LAO LETTER LO LING
+0EA5          ; Lo #       LAO LETTER LO LOOT
+0EA7..0EB0    ; Lo #  [10] LAO LETTER WO..LAO VOWEL SIGN A
+0EB2..0EB3    ; Lo #   [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM
+0EBD          ; Lo #       LAO SEMIVOWEL SIGN NYO
+0EC0..0EC4    ; Lo #   [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI
+0EDC..0EDF    ; Lo #   [4] LAO HO NO..LAO LETTER KHMU NYO
+0F00          ; Lo #       TIBETAN SYLLABLE OM
+0F40..0F47    ; Lo #   [8] TIBETAN LETTER KA..TIBETAN LETTER JA
+0F49..0F6C    ; Lo #  [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA
+0F88..0F8C    ; Lo #   [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN
+1000..102A    ; Lo #  [43] MYANMAR LETTER KA..MYANMAR LETTER AU
+103F          ; Lo #       MYANMAR LETTER GREAT SA
+1050..1055    ; Lo #   [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL
+105A..105D    ; Lo #   [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE
+1061          ; Lo #       MYANMAR LETTER SGAW KAREN SHA
+1065..1066    ; Lo #   [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA
+106E..1070    ; Lo #   [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA
+1075..1081    ; Lo #  [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA
+108E          ; Lo #       MYANMAR LETTER RUMAI PALAUNG FA
+1100..1248    ; Lo # [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA
+124A..124D    ; Lo #   [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE
+1250..1256    ; Lo #   [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO
+1258          ; Lo #       ETHIOPIC SYLLABLE QHWA
+125A..125D    ; Lo #   [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE
+1260..1288    ; Lo #  [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA
+128A..128D    ; Lo #   [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE
+1290..12B0    ; Lo #  [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA
+12B2..12B5    ; Lo #   [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE
+12B8..12BE    ; Lo #   [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO
+12C0          ; Lo #       ETHIOPIC SYLLABLE KXWA
+12C2..12C5    ; Lo #   [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE
+12C8..12D6    ; Lo #  [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O
+12D8..1310    ; Lo #  [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA
+1312..1315    ; Lo #   [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE
+1318..135A    ; Lo #  [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA
+1380..138F    ; Lo #  [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE
+1401..166C    ; Lo # [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA
+166F..167F    ; Lo #  [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W
+1681..169A    ; Lo #  [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH
+16A0..16EA    ; Lo #  [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X
+16F1..16F8    ; Lo #   [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC
+1700..170C    ; Lo #  [13] TAGALOG LETTER A..TAGALOG LETTER YA
+170E..1711    ; Lo #   [4] TAGALOG LETTER LA..TAGALOG LETTER HA
+1720..1731    ; Lo #  [18] HANUNOO LETTER A..HANUNOO LETTER HA
+1740..1751    ; Lo #  [18] BUHID LETTER A..BUHID LETTER HA
+1760..176C    ; Lo #  [13] TAGBANWA LETTER A..TAGBANWA LETTER YA
+176E..1770    ; Lo #   [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA
+1780..17B3    ; Lo #  [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU
+17DC          ; Lo #       KHMER SIGN AVAKRAHASANYA
+1820..1842    ; Lo #  [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
+1844..1878    ; Lo #  [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS
+1880..1884    ; Lo #   [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA
+1887..18A8    ; Lo #  [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA
+18AA          ; Lo #       MONGOLIAN LETTER MANCHU ALI GALI LHA
+18B0..18F5    ; Lo #  [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
+1900..191E    ; Lo #  [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA
+1950..196D    ; Lo #  [30] TAI LE LETTER KA..TAI LE LETTER AI
+1970..1974    ; Lo #   [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6
+1980..19AB    ; Lo #  [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA
+19B0..19C9    ; Lo #  [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2
+1A00..1A16    ; Lo #  [23] BUGINESE LETTER KA..BUGINESE LETTER HA
+1A20..1A54    ; Lo #  [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA
+1B05..1B33    ; Lo #  [47] BALINESE LETTER AKARA..BALINESE LETTER HA
+1B45..1B4B    ; Lo #   [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK
+1B83..1BA0    ; Lo #  [30] SUNDANESE LETTER A..SUNDANESE LETTER HA
+1BAE..1BAF    ; Lo #   [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA
+1BBA..1BE5    ; Lo #  [44] SUNDANESE AVAGRAHA..BATAK LETTER U
+1C00..1C23    ; Lo #  [36] LEPCHA LETTER KA..LEPCHA LETTER A
+1C4D..1C4F    ; Lo #   [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA
+1C5A..1C77    ; Lo #  [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
+1CE9..1CEC    ; Lo #   [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
+1CEE..1CF3    ; Lo #   [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA
+1CF5..1CF6    ; Lo #   [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA
+1CFA          ; Lo #       VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA
+2135..2138    ; Lo #   [4] ALEF SYMBOL..DALET SYMBOL
+2D30..2D67    ; Lo #  [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO
+2D80..2D96    ; Lo #  [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE
+2DA0..2DA6    ; Lo #   [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO
+2DA8..2DAE    ; Lo #   [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO
+2DB0..2DB6    ; Lo #   [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO
+2DB8..2DBE    ; Lo #   [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO
+2DC0..2DC6    ; Lo #   [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO
+2DC8..2DCE    ; Lo #   [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO
+2DD0..2DD6    ; Lo #   [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO
+2DD8..2DDE    ; Lo #   [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO
+3006          ; Lo #       IDEOGRAPHIC CLOSING MARK
+303C          ; Lo #       MASU MARK
+3041..3096    ; Lo #  [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE
+309F          ; Lo #       HIRAGANA DIGRAPH YORI
+30A1..30FA    ; Lo #  [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO
+30FF          ; Lo #       KATAKANA DIGRAPH KOTO
+3105..312F    ; Lo #  [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN
+3131..318E    ; Lo #  [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
+31A0..31BF    ; Lo #  [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH
+31F0..31FF    ; Lo #  [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
+3400..4DBF    ; Lo # [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF
+4E00..9FFC    ; Lo # [20989] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFC
+A000..A014    ; Lo #  [21] YI SYLLABLE IT..YI SYLLABLE E
+A016..A48C    ; Lo # [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
+A4D0..A4F7    ; Lo #  [40] LISU LETTER BA..LISU LETTER OE
+A500..A60B    ; Lo # [268] VAI SYLLABLE EE..VAI SYLLABLE NG
+A610..A61F    ; Lo #  [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG
+A62A..A62B    ; Lo #   [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO
+A66E          ; Lo #       CYRILLIC LETTER MULTIOCULAR O
+A6A0..A6E5    ; Lo #  [70] BAMUM LETTER A..BAMUM LETTER KI
+A78F          ; Lo #       LATIN LETTER SINOLOGICAL DOT
+A7F7          ; Lo #       LATIN EPIGRAPHIC LETTER SIDEWAYS I
+A7FB..A801    ; Lo #   [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I
+A803..A805    ; Lo #   [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O
+A807..A80A    ; Lo #   [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO
+A80C..A822    ; Lo #  [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO
+A840..A873    ; Lo #  [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU
+A882..A8B3    ; Lo #  [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA
+A8F2..A8F7    ; Lo #   [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA
+A8FB          ; Lo #       DEVANAGARI HEADSTROKE
+A8FD..A8FE    ; Lo #   [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY
+A90A..A925    ; Lo #  [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO
+A930..A946    ; Lo #  [23] REJANG LETTER KA..REJANG LETTER A
+A960..A97C    ; Lo #  [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH
+A984..A9B2    ; Lo #  [47] JAVANESE LETTER A..JAVANESE LETTER HA
+A9E0..A9E4    ; Lo #   [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA
+A9E7..A9EF    ; Lo #   [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA
+A9FA..A9FE    ; Lo #   [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA
+AA00..AA28    ; Lo #  [41] CHAM LETTER A..CHAM LETTER HA
+AA40..AA42    ; Lo #   [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG
+AA44..AA4B    ; Lo #   [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS
+AA60..AA6F    ; Lo #  [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA
+AA71..AA76    ; Lo #   [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM
+AA7A          ; Lo #       MYANMAR LETTER AITON RA
+AA7E..AAAF    ; Lo #  [50] MYANMAR LETTER SHWE PALAUNG CHA..TAI VIET LETTER HIGH O
+AAB1          ; Lo #       TAI VIET VOWEL AA
+AAB5..AAB6    ; Lo #   [2] TAI VIET VOWEL E..TAI VIET VOWEL O
+AAB9..AABD    ; Lo #   [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN
+AAC0          ; Lo #       TAI VIET TONE MAI NUENG
+AAC2          ; Lo #       TAI VIET TONE MAI SONG
+AADB..AADC    ; Lo #   [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG
+AAE0..AAEA    ; Lo #  [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA
+AAF2          ; Lo #       MEETEI MAYEK ANJI
+AB01..AB06    ; Lo #   [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO
+AB09..AB0E    ; Lo #   [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO
+AB11..AB16    ; Lo #   [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO
+AB20..AB26    ; Lo #   [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO
+AB28..AB2E    ; Lo #   [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO
+ABC0..ABE2    ; Lo #  [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM
+AC00..D7A3    ; Lo # [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH
+D7B0..D7C6    ; Lo #  [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E
+D7CB..D7FB    ; Lo #  [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH
+F900..FA6D    ; Lo # [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D
+FA70..FAD9    ; Lo # [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
+FB1D          ; Lo #       HEBREW LETTER YOD WITH HIRIQ
+FB1F..FB28    ; Lo #  [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV
+FB2A..FB36    ; Lo #  [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH
+FB38..FB3C    ; Lo #   [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH
+FB3E          ; Lo #       HEBREW LETTER MEM WITH DAGESH
+FB40..FB41    ; Lo #   [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH
+FB43..FB44    ; Lo #   [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH
+FB46..FBB1    ; Lo # [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
+FBD3..FD3D    ; Lo # [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
+FD50..FD8F    ; Lo #  [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
+FD92..FDC7    ; Lo #  [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
+FDF0..FDFB    ; Lo #  [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU
+FE70..FE74    ; Lo #   [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM
+FE76..FEFC    ; Lo # [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
+FF66..FF6F    ; Lo #  [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
+FF71..FF9D    ; Lo #  [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
+FFA0..FFBE    ; Lo #  [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH
+FFC2..FFC7    ; Lo #   [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E
+FFCA..FFCF    ; Lo #   [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE
+FFD2..FFD7    ; Lo #   [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
+FFDA..FFDC    ; Lo #   [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
+10000..1000B  ; Lo #  [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE
+1000D..10026  ; Lo #  [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO
+10028..1003A  ; Lo #  [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO
+1003C..1003D  ; Lo #   [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE
+1003F..1004D  ; Lo #  [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO
+10050..1005D  ; Lo #  [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089
+10080..100FA  ; Lo # [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305
+10280..1029C  ; Lo #  [29] LYCIAN LETTER A..LYCIAN LETTER X
+102A0..102D0  ; Lo #  [49] CARIAN LETTER A..CARIAN LETTER UUU3
+10300..1031F  ; Lo #  [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS
+1032D..10340  ; Lo #  [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA
+10342..10349  ; Lo #   [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL
+10350..10375  ; Lo #  [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA
+10380..1039D  ; Lo #  [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU
+103A0..103C3  ; Lo #  [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA
+103C8..103CF  ; Lo #   [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH
+10450..1049D  ; Lo #  [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO
+10500..10527  ; Lo #  [40] ELBASAN LETTER A..ELBASAN LETTER KHE
+10530..10563  ; Lo #  [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW
+10600..10736  ; Lo # [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
+10740..10755  ; Lo #  [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
+10760..10767  ; Lo #   [8] LINEAR A SIGN A800..LINEAR A SIGN A807
+10800..10805  ; Lo #   [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA
+10808         ; Lo #       CYPRIOT SYLLABLE JO
+1080A..10835  ; Lo #  [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO
+10837..10838  ; Lo #   [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE
+1083C         ; Lo #       CYPRIOT SYLLABLE ZA
+1083F..10855  ; Lo #  [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW
+10860..10876  ; Lo #  [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW
+10880..1089E  ; Lo #  [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW
+108E0..108F2  ; Lo #  [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH
+108F4..108F5  ; Lo #   [2] HATRAN LETTER SHIN..HATRAN LETTER TAW
+10900..10915  ; Lo #  [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU
+10920..10939  ; Lo #  [26] LYDIAN LETTER A..LYDIAN LETTER C
+10980..109B7  ; Lo #  [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA
+109BE..109BF  ; Lo #   [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN
+10A00         ; Lo #       KHAROSHTHI LETTER A
+10A10..10A13  ; Lo #   [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA
+10A15..10A17  ; Lo #   [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA
+10A19..10A35  ; Lo #  [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA
+10A60..10A7C  ; Lo #  [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH
+10A80..10A9C  ; Lo #  [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH
+10AC0..10AC7  ; Lo #   [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW
+10AC9..10AE4  ; Lo #  [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW
+10B00..10B35  ; Lo #  [54] AVESTAN LETTER A..AVESTAN LETTER HE
+10B40..10B55  ; Lo #  [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW
+10B60..10B72  ; Lo #  [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW
+10B80..10B91  ; Lo #  [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW
+10C00..10C48  ; Lo #  [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH
+10D00..10D23  ; Lo #  [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
+10E80..10EA9  ; Lo #  [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
+10EB0..10EB1  ; Lo #   [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
+10F00..10F1C  ; Lo #  [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
+10F27         ; Lo #       OLD SOGDIAN LIGATURE AYIN-DALETH
+10F30..10F45  ; Lo #  [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
+10FB0..10FC4  ; Lo #  [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW
+10FE0..10FF6  ; Lo #  [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH
+11003..11037  ; Lo #  [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA
+11083..110AF  ; Lo #  [45] KAITHI LETTER A..KAITHI LETTER HA
+110D0..110E8  ; Lo #  [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE
+11103..11126  ; Lo #  [36] CHAKMA LETTER AA..CHAKMA LETTER HAA
+11144         ; Lo #       CHAKMA LETTER LHAA
+11147         ; Lo #       CHAKMA LETTER VAA
+11150..11172  ; Lo #  [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA
+11176         ; Lo #       MAHAJANI LIGATURE SHRI
+11183..111B2  ; Lo #  [48] SHARADA LETTER A..SHARADA LETTER HA
+111C1..111C4  ; Lo #   [4] SHARADA SIGN AVAGRAHA..SHARADA OM
+111DA         ; Lo #       SHARADA EKAM
+111DC         ; Lo #       SHARADA HEADSTROKE
+11200..11211  ; Lo #  [18] KHOJKI LETTER A..KHOJKI LETTER JJA
+11213..1122B  ; Lo #  [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA
+11280..11286  ; Lo #   [7] MULTANI LETTER A..MULTANI LETTER GA
+11288         ; Lo #       MULTANI LETTER GHA
+1128A..1128D  ; Lo #   [4] MULTANI LETTER CA..MULTANI LETTER JJA
+1128F..1129D  ; Lo #  [15] MULTANI LETTER NYA..MULTANI LETTER BA
+1129F..112A8  ; Lo #  [10] MULTANI LETTER BHA..MULTANI LETTER RHA
+112B0..112DE  ; Lo #  [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA
+11305..1130C  ; Lo #   [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L
+1130F..11310  ; Lo #   [2] GRANTHA LETTER EE..GRANTHA LETTER AI
+11313..11328  ; Lo #  [22] GRANTHA LETTER OO..GRANTHA LETTER NA
+1132A..11330  ; Lo #   [7] GRANTHA LETTER PA..GRANTHA LETTER RA
+11332..11333  ; Lo #   [2] GRANTHA LETTER LA..GRANTHA LETTER LLA
+11335..11339  ; Lo #   [5] GRANTHA LETTER VA..GRANTHA LETTER HA
+1133D         ; Lo #       GRANTHA SIGN AVAGRAHA
+11350         ; Lo #       GRANTHA OM
+1135D..11361  ; Lo #   [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL
+11400..11434  ; Lo #  [53] NEWA LETTER A..NEWA LETTER HA
+11447..1144A  ; Lo #   [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI
+1145F..11461  ; Lo #   [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA
+11480..114AF  ; Lo #  [48] TIRHUTA ANJI..TIRHUTA LETTER HA
+114C4..114C5  ; Lo #   [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG
+114C7         ; Lo #       TIRHUTA OM
+11580..115AE  ; Lo #  [47] SIDDHAM LETTER A..SIDDHAM LETTER HA
+115D8..115DB  ; Lo #   [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U
+11600..1162F  ; Lo #  [48] MODI LETTER A..MODI LETTER LLA
+11644         ; Lo #       MODI SIGN HUVA
+11680..116AA  ; Lo #  [43] TAKRI LETTER A..TAKRI LETTER RRA
+116B8         ; Lo #       TAKRI LETTER ARCHAIC KHA
+11700..1171A  ; Lo #  [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA
+11800..1182B  ; Lo #  [44] DOGRA LETTER A..DOGRA LETTER RRA
+118FF..11906  ; Lo #   [8] WARANG CITI OM..DIVES AKURU LETTER E
+11909         ; Lo #       DIVES AKURU LETTER O
+1190C..11913  ; Lo #   [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA
+11915..11916  ; Lo #   [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA
+11918..1192F  ; Lo #  [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA
+1193F         ; Lo #       DIVES AKURU PREFIXED NASAL SIGN
+11941         ; Lo #       DIVES AKURU INITIAL RA
+119A0..119A7  ; Lo #   [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR
+119AA..119D0  ; Lo #  [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA
+119E1         ; Lo #       NANDINAGARI SIGN AVAGRAHA
+119E3         ; Lo #       NANDINAGARI HEADSTROKE
+11A00         ; Lo #       ZANABAZAR SQUARE LETTER A
+11A0B..11A32  ; Lo #  [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA
+11A3A         ; Lo #       ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA
+11A50         ; Lo #       SOYOMBO LETTER A
+11A5C..11A89  ; Lo #  [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA
+11A9D         ; Lo #       SOYOMBO MARK PLUTA
+11AC0..11AF8  ; Lo #  [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL
+11C00..11C08  ; Lo #   [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
+11C0A..11C2E  ; Lo #  [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
+11C40         ; Lo #       BHAIKSUKI SIGN AVAGRAHA
+11C72..11C8F  ; Lo #  [30] MARCHEN LETTER KA..MARCHEN LETTER A
+11D00..11D06  ; Lo #   [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E
+11D08..11D09  ; Lo #   [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O
+11D0B..11D30  ; Lo #  [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA
+11D46         ; Lo #       MASARAM GONDI REPHA
+11D60..11D65  ; Lo #   [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU
+11D67..11D68  ; Lo #   [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI
+11D6A..11D89  ; Lo #  [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA
+11D98         ; Lo #       GUNJALA GONDI OM
+11EE0..11EF2  ; Lo #  [19] MAKASAR LETTER KA..MAKASAR ANGKA
+11FB0         ; Lo #       LISU LETTER YHA
+12000..12399  ; Lo # [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U
+12480..12543  ; Lo # [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
+13000..1342E  ; Lo # [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
+14400..14646  ; Lo # [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530
+16800..16A38  ; Lo # [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
+16A40..16A5E  ; Lo #  [31] MRO LETTER TA..MRO LETTER TEK
+16AD0..16AED  ; Lo #  [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I
+16B00..16B2F  ; Lo #  [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU
+16B63..16B77  ; Lo #  [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS
+16B7D..16B8F  ; Lo #  [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ
+16F00..16F4A  ; Lo #  [75] MIAO LETTER PA..MIAO LETTER RTE
+16F50         ; Lo #       MIAO LETTER NASALIZATION
+17000..187F7  ; Lo # [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7
+18800..18CD5  ; Lo # [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5
+18D00..18D08  ; Lo #   [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
+1B000..1B11E  ; Lo # [287] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER N-MU-MO-2
+1B150..1B152  ; Lo #   [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
+1B164..1B167  ; Lo #   [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
+1B170..1B2FB  ; Lo # [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
+1BC00..1BC6A  ; Lo # [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
+1BC70..1BC7C  ; Lo #  [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
+1BC80..1BC88  ; Lo #   [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL
+1BC90..1BC99  ; Lo #  [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW
+1E100..1E12C  ; Lo #  [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
+1E14E         ; Lo #       NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ
+1E2C0..1E2EB  ; Lo #  [44] WANCHO LETTER AA..WANCHO LETTER YIH
+1E800..1E8C4  ; Lo # [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON
+1EE00..1EE03  ; Lo #   [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL
+1EE05..1EE1F  ; Lo #  [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF
+1EE21..1EE22  ; Lo #   [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM
+1EE24         ; Lo #       ARABIC MATHEMATICAL INITIAL HEH
+1EE27         ; Lo #       ARABIC MATHEMATICAL INITIAL HAH
+1EE29..1EE32  ; Lo #  [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF
+1EE34..1EE37  ; Lo #   [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH
+1EE39         ; Lo #       ARABIC MATHEMATICAL INITIAL DAD
+1EE3B         ; Lo #       ARABIC MATHEMATICAL INITIAL GHAIN
+1EE42         ; Lo #       ARABIC MATHEMATICAL TAILED JEEM
+1EE47         ; Lo #       ARABIC MATHEMATICAL TAILED HAH
+1EE49         ; Lo #       ARABIC MATHEMATICAL TAILED YEH
+1EE4B         ; Lo #       ARABIC MATHEMATICAL TAILED LAM
+1EE4D..1EE4F  ; Lo #   [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN
+1EE51..1EE52  ; Lo #   [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF
+1EE54         ; Lo #       ARABIC MATHEMATICAL TAILED SHEEN
+1EE57         ; Lo #       ARABIC MATHEMATICAL TAILED KHAH
+1EE59         ; Lo #       ARABIC MATHEMATICAL TAILED DAD
+1EE5B         ; Lo #       ARABIC MATHEMATICAL TAILED GHAIN
+1EE5D         ; Lo #       ARABIC MATHEMATICAL TAILED DOTLESS NOON
+1EE5F         ; Lo #       ARABIC MATHEMATICAL TAILED DOTLESS QAF
+1EE61..1EE62  ; Lo #   [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM
+1EE64         ; Lo #       ARABIC MATHEMATICAL STRETCHED HEH
+1EE67..1EE6A  ; Lo #   [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF
+1EE6C..1EE72  ; Lo #   [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF
+1EE74..1EE77  ; Lo #   [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH
+1EE79..1EE7C  ; Lo #   [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH
+1EE7E         ; Lo #       ARABIC MATHEMATICAL STRETCHED DOTLESS FEH
+1EE80..1EE89  ; Lo #  [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH
+1EE8B..1EE9B  ; Lo #  [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN
+1EEA1..1EEA3  ; Lo #   [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL
+1EEA5..1EEA9  ; Lo #   [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH
+1EEAB..1EEBB  ; Lo #  [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
+20000..2A6DD  ; Lo # [42718] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DD
+2A700..2B734  ; Lo # [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
+2B740..2B81D  ; Lo # [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
+2B820..2CEA1  ; Lo # [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
+2CEB0..2EBE0  ; Lo # [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
+2F800..2FA1D  ; Lo # [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
+30000..3134A  ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
+
+# Total code points: 127004
+
+# ================================================
+
+# General_Category=Nonspacing_Mark
+
+0300..036F    ; Mn # [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X
+0483..0487    ; Mn #   [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE
+0591..05BD    ; Mn #  [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
+05BF          ; Mn #       HEBREW POINT RAFE
+05C1..05C2    ; Mn #   [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
+05C4..05C5    ; Mn #   [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
+05C7          ; Mn #       HEBREW POINT QAMATS QATAN
+0610..061A    ; Mn #  [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
+064B..065F    ; Mn #  [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW
+0670          ; Mn #       ARABIC LETTER SUPERSCRIPT ALEF
+06D6..06DC    ; Mn #   [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
+06DF..06E4    ; Mn #   [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
+06E7..06E8    ; Mn #   [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
+06EA..06ED    ; Mn #   [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
+0711          ; Mn #       SYRIAC LETTER SUPERSCRIPT ALAPH
+0730..074A    ; Mn #  [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
+07A6..07B0    ; Mn #  [11] THAANA ABAFILI..THAANA SUKUN
+07EB..07F3    ; Mn #   [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
+07FD          ; Mn #       NKO DANTAYALAN
+0816..0819    ; Mn #   [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
+081B..0823    ; Mn #   [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
+0825..0827    ; Mn #   [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
+0829..082D    ; Mn #   [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
+0859..085B    ; Mn #   [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
+08D3..08E1    ; Mn #  [15] ARABIC SMALL LOW WAW..ARABIC SMALL HIGH SIGN SAFHA
+08E3..0902    ; Mn #  [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
+093A          ; Mn #       DEVANAGARI VOWEL SIGN OE
+093C          ; Mn #       DEVANAGARI SIGN NUKTA
+0941..0948    ; Mn #   [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
+094D          ; Mn #       DEVANAGARI SIGN VIRAMA
+0951..0957    ; Mn #   [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE
+0962..0963    ; Mn #   [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
+0981          ; Mn #       BENGALI SIGN CANDRABINDU
+09BC          ; Mn #       BENGALI SIGN NUKTA
+09C1..09C4    ; Mn #   [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR
+09CD          ; Mn #       BENGALI SIGN VIRAMA
+09E2..09E3    ; Mn #   [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
+09FE          ; Mn #       BENGALI SANDHI MARK
+0A01..0A02    ; Mn #   [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
+0A3C          ; Mn #       GURMUKHI SIGN NUKTA
+0A41..0A42    ; Mn #   [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
+0A47..0A48    ; Mn #   [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
+0A4B..0A4D    ; Mn #   [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
+0A51          ; Mn #       GURMUKHI SIGN UDAAT
+0A70..0A71    ; Mn #   [2] GURMUKHI TIPPI..GURMUKHI ADDAK
+0A75          ; Mn #       GURMUKHI SIGN YAKASH
+0A81..0A82    ; Mn #   [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA
+0ABC          ; Mn #       GUJARATI SIGN NUKTA
+0AC1..0AC5    ; Mn #   [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E
+0AC7..0AC8    ; Mn #   [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
+0ACD          ; Mn #       GUJARATI SIGN VIRAMA
+0AE2..0AE3    ; Mn #   [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
+0AFA..0AFF    ; Mn #   [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
+0B01          ; Mn #       ORIYA SIGN CANDRABINDU
+0B3C          ; Mn #       ORIYA SIGN NUKTA
+0B3F          ; Mn #       ORIYA VOWEL SIGN I
+0B41..0B44    ; Mn #   [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
+0B4D          ; Mn #       ORIYA SIGN VIRAMA
+0B55..0B56    ; Mn #   [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK
+0B62..0B63    ; Mn #   [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
+0B82          ; Mn #       TAMIL SIGN ANUSVARA
+0BC0          ; Mn #       TAMIL VOWEL SIGN II
+0BCD          ; Mn #       TAMIL SIGN VIRAMA
+0C00          ; Mn #       TELUGU SIGN COMBINING CANDRABINDU ABOVE
+0C04          ; Mn #       TELUGU SIGN COMBINING ANUSVARA ABOVE
+0C3E..0C40    ; Mn #   [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
+0C46..0C48    ; Mn #   [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
+0C4A..0C4D    ; Mn #   [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
+0C55..0C56    ; Mn #   [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
+0C62..0C63    ; Mn #   [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
+0C81          ; Mn #       KANNADA SIGN CANDRABINDU
+0CBC          ; Mn #       KANNADA SIGN NUKTA
+0CBF          ; Mn #       KANNADA VOWEL SIGN I
+0CC6          ; Mn #       KANNADA VOWEL SIGN E
+0CCC..0CCD    ; Mn #   [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
+0CE2..0CE3    ; Mn #   [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
+0D00..0D01    ; Mn #   [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
+0D3B..0D3C    ; Mn #   [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
+0D41..0D44    ; Mn #   [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
+0D4D          ; Mn #       MALAYALAM SIGN VIRAMA
+0D62..0D63    ; Mn #   [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
+0D81          ; Mn #       SINHALA SIGN CANDRABINDU
+0DCA          ; Mn #       SINHALA SIGN AL-LAKUNA
+0DD2..0DD4    ; Mn #   [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
+0DD6          ; Mn #       SINHALA VOWEL SIGN DIGA PAA-PILLA
+0E31          ; Mn #       THAI CHARACTER MAI HAN-AKAT
+0E34..0E3A    ; Mn #   [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
+0E47..0E4E    ; Mn #   [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN
+0EB1          ; Mn #       LAO VOWEL SIGN MAI KAN
+0EB4..0EBC    ; Mn #   [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO
+0EC8..0ECD    ; Mn #   [6] LAO TONE MAI EK..LAO NIGGAHITA
+0F18..0F19    ; Mn #   [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
+0F35          ; Mn #       TIBETAN MARK NGAS BZUNG NYI ZLA
+0F37          ; Mn #       TIBETAN MARK NGAS BZUNG SGOR RTAGS
+0F39          ; Mn #       TIBETAN MARK TSA -PHRU
+0F71..0F7E    ; Mn #  [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO
+0F80..0F84    ; Mn #   [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA
+0F86..0F87    ; Mn #   [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
+0F8D..0F97    ; Mn #  [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA
+0F99..0FBC    ; Mn #  [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
+0FC6          ; Mn #       TIBETAN SYMBOL PADMA GDAN
+102D..1030    ; Mn #   [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU
+1032..1037    ; Mn #   [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW
+1039..103A    ; Mn #   [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT
+103D..103E    ; Mn #   [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA
+1058..1059    ; Mn #   [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL
+105E..1060    ; Mn #   [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA
+1071..1074    ; Mn #   [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE
+1082          ; Mn #       MYANMAR CONSONANT SIGN SHAN MEDIAL WA
+1085..1086    ; Mn #   [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
+108D          ; Mn #       MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
+109D          ; Mn #       MYANMAR VOWEL SIGN AITON AI
+135D..135F    ; Mn #   [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK
+1712..1714    ; Mn #   [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
+1732..1734    ; Mn #   [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
+1752..1753    ; Mn #   [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
+1772..1773    ; Mn #   [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
+17B4..17B5    ; Mn #   [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
+17B7..17BD    ; Mn #   [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA
+17C6          ; Mn #       KHMER SIGN NIKAHIT
+17C9..17D3    ; Mn #  [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
+17DD          ; Mn #       KHMER SIGN ATTHACAN
+180B..180D    ; Mn #   [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
+1885..1886    ; Mn #   [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
+18A9          ; Mn #       MONGOLIAN LETTER ALI GALI DAGALGA
+1920..1922    ; Mn #   [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
+1927..1928    ; Mn #   [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
+1932          ; Mn #       LIMBU SMALL LETTER ANUSVARA
+1939..193B    ; Mn #   [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
+1A17..1A18    ; Mn #   [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
+1A1B          ; Mn #       BUGINESE VOWEL SIGN AE
+1A56          ; Mn #       TAI THAM CONSONANT SIGN MEDIAL LA
+1A58..1A5E    ; Mn #   [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA
+1A60          ; Mn #       TAI THAM SIGN SAKOT
+1A62          ; Mn #       TAI THAM VOWEL SIGN MAI SAT
+1A65..1A6C    ; Mn #   [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
+1A73..1A7C    ; Mn #  [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN
+1A7F          ; Mn #       TAI THAM COMBINING CRYPTOGRAMMIC DOT
+1AB0..1ABD    ; Mn #  [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
+1ABF..1AC0    ; Mn #   [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
+1B00..1B03    ; Mn #   [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
+1B34          ; Mn #       BALINESE SIGN REREKAN
+1B36..1B3A    ; Mn #   [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
+1B3C          ; Mn #       BALINESE VOWEL SIGN LA LENGA
+1B42          ; Mn #       BALINESE VOWEL SIGN PEPET
+1B6B..1B73    ; Mn #   [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
+1B80..1B81    ; Mn #   [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR
+1BA2..1BA5    ; Mn #   [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU
+1BA8..1BA9    ; Mn #   [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
+1BAB..1BAD    ; Mn #   [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA
+1BE6          ; Mn #       BATAK SIGN TOMPI
+1BE8..1BE9    ; Mn #   [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE
+1BED          ; Mn #       BATAK VOWEL SIGN KARO O
+1BEF..1BF1    ; Mn #   [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H
+1C2C..1C33    ; Mn #   [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
+1C36..1C37    ; Mn #   [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
+1CD0..1CD2    ; Mn #   [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
+1CD4..1CE0    ; Mn #  [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
+1CE2..1CE8    ; Mn #   [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
+1CED          ; Mn #       VEDIC SIGN TIRYAK
+1CF4          ; Mn #       VEDIC TONE CANDRA ABOVE
+1CF8..1CF9    ; Mn #   [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
+1DC0..1DF9    ; Mn #  [58] COMBINING DOTTED GRAVE ACCENT..COMBINING WIDE INVERTED BRIDGE BELOW
+1DFB..1DFF    ; Mn #   [5] COMBINING DELETION MARK..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
+20D0..20DC    ; Mn #  [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
+20E1          ; Mn #       COMBINING LEFT RIGHT ARROW ABOVE
+20E5..20F0    ; Mn #  [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
+2CEF..2CF1    ; Mn #   [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
+2D7F          ; Mn #       TIFINAGH CONSONANT JOINER
+2DE0..2DFF    ; Mn #  [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
+302A..302D    ; Mn #   [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK
+3099..309A    ; Mn #   [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+A66F          ; Mn #       COMBINING CYRILLIC VZMET
+A674..A67D    ; Mn #  [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK
+A69E..A69F    ; Mn #   [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E
+A6F0..A6F1    ; Mn #   [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
+A802          ; Mn #       SYLOTI NAGRI SIGN DVISVARA
+A806          ; Mn #       SYLOTI NAGRI SIGN HASANTA
+A80B          ; Mn #       SYLOTI NAGRI SIGN ANUSVARA
+A825..A826    ; Mn #   [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
+A82C          ; Mn #       SYLOTI NAGRI SIGN ALTERNATE HASANTA
+A8C4..A8C5    ; Mn #   [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU
+A8E0..A8F1    ; Mn #  [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
+A8FF          ; Mn #       DEVANAGARI VOWEL SIGN AY
+A926..A92D    ; Mn #   [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
+A947..A951    ; Mn #  [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
+A980..A982    ; Mn #   [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
+A9B3          ; Mn #       JAVANESE SIGN CECAK TELU
+A9B6..A9B9    ; Mn #   [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
+A9BC..A9BD    ; Mn #   [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET
+A9E5          ; Mn #       MYANMAR SIGN SHAN SAW
+AA29..AA2E    ; Mn #   [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
+AA31..AA32    ; Mn #   [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
+AA35..AA36    ; Mn #   [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA
+AA43          ; Mn #       CHAM CONSONANT SIGN FINAL NG
+AA4C          ; Mn #       CHAM CONSONANT SIGN FINAL M
+AA7C          ; Mn #       MYANMAR SIGN TAI LAING TONE-2
+AAB0          ; Mn #       TAI VIET MAI KANG
+AAB2..AAB4    ; Mn #   [3] TAI VIET VOWEL I..TAI VIET VOWEL U
+AAB7..AAB8    ; Mn #   [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
+AABE..AABF    ; Mn #   [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK
+AAC1          ; Mn #       TAI VIET TONE MAI THO
+AAEC..AAED    ; Mn #   [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI
+AAF6          ; Mn #       MEETEI MAYEK VIRAMA
+ABE5          ; Mn #       MEETEI MAYEK VOWEL SIGN ANAP
+ABE8          ; Mn #       MEETEI MAYEK VOWEL SIGN UNAP
+ABED          ; Mn #       MEETEI MAYEK APUN IYEK
+FB1E          ; Mn #       HEBREW POINT JUDEO-SPANISH VARIKA
+FE00..FE0F    ; Mn #  [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
+FE20..FE2F    ; Mn #  [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF
+101FD         ; Mn #       PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
+102E0         ; Mn #       COPTIC EPACT THOUSANDS MARK
+10376..1037A  ; Mn #   [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII
+10A01..10A03  ; Mn #   [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
+10A05..10A06  ; Mn #   [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
+10A0C..10A0F  ; Mn #   [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
+10A38..10A3A  ; Mn #   [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
+10A3F         ; Mn #       KHAROSHTHI VIRAMA
+10AE5..10AE6  ; Mn #   [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
+10D24..10D27  ; Mn #   [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
+10EAB..10EAC  ; Mn #   [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
+10F46..10F50  ; Mn #  [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
+11001         ; Mn #       BRAHMI SIGN ANUSVARA
+11038..11046  ; Mn #  [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
+1107F..11081  ; Mn #   [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA
+110B3..110B6  ; Mn #   [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
+110B9..110BA  ; Mn #   [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
+11100..11102  ; Mn #   [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA
+11127..1112B  ; Mn #   [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU
+1112D..11134  ; Mn #   [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA
+11173         ; Mn #       MAHAJANI SIGN NUKTA
+11180..11181  ; Mn #   [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA
+111B6..111BE  ; Mn #   [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O
+111C9..111CC  ; Mn #   [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK
+111CF         ; Mn #       SHARADA SIGN INVERTED CANDRABINDU
+1122F..11231  ; Mn #   [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI
+11234         ; Mn #       KHOJKI SIGN ANUSVARA
+11236..11237  ; Mn #   [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
+1123E         ; Mn #       KHOJKI SIGN SUKUN
+112DF         ; Mn #       KHUDAWADI SIGN ANUSVARA
+112E3..112EA  ; Mn #   [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA
+11300..11301  ; Mn #   [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU
+1133B..1133C  ; Mn #   [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA
+11340         ; Mn #       GRANTHA VOWEL SIGN II
+11366..1136C  ; Mn #   [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
+11370..11374  ; Mn #   [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
+11438..1143F  ; Mn #   [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI
+11442..11444  ; Mn #   [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA
+11446         ; Mn #       NEWA SIGN NUKTA
+1145E         ; Mn #       NEWA SANDHI MARK
+114B3..114B8  ; Mn #   [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL
+114BA         ; Mn #       TIRHUTA VOWEL SIGN SHORT E
+114BF..114C0  ; Mn #   [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA
+114C2..114C3  ; Mn #   [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA
+115B2..115B5  ; Mn #   [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR
+115BC..115BD  ; Mn #   [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA
+115BF..115C0  ; Mn #   [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA
+115DC..115DD  ; Mn #   [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU
+11633..1163A  ; Mn #   [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI
+1163D         ; Mn #       MODI SIGN ANUSVARA
+1163F..11640  ; Mn #   [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA
+116AB         ; Mn #       TAKRI SIGN ANUSVARA
+116AD         ; Mn #       TAKRI VOWEL SIGN AA
+116B0..116B5  ; Mn #   [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU
+116B7         ; Mn #       TAKRI SIGN NUKTA
+1171D..1171F  ; Mn #   [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
+11722..11725  ; Mn #   [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU
+11727..1172B  ; Mn #   [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER
+1182F..11837  ; Mn #   [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA
+11839..1183A  ; Mn #   [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA
+1193B..1193C  ; Mn #   [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU
+1193E         ; Mn #       DIVES AKURU VIRAMA
+11943         ; Mn #       DIVES AKURU SIGN NUKTA
+119D4..119D7  ; Mn #   [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR
+119DA..119DB  ; Mn #   [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI
+119E0         ; Mn #       NANDINAGARI SIGN VIRAMA
+11A01..11A0A  ; Mn #  [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK
+11A33..11A38  ; Mn #   [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA
+11A3B..11A3E  ; Mn #   [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA
+11A47         ; Mn #       ZANABAZAR SQUARE SUBJOINER
+11A51..11A56  ; Mn #   [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE
+11A59..11A5B  ; Mn #   [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK
+11A8A..11A96  ; Mn #  [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA
+11A98..11A99  ; Mn #   [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER
+11C30..11C36  ; Mn #   [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L
+11C38..11C3D  ; Mn #   [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA
+11C3F         ; Mn #       BHAIKSUKI SIGN VIRAMA
+11C92..11CA7  ; Mn #  [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA
+11CAA..11CB0  ; Mn #   [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA
+11CB2..11CB3  ; Mn #   [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E
+11CB5..11CB6  ; Mn #   [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU
+11D31..11D36  ; Mn #   [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R
+11D3A         ; Mn #       MASARAM GONDI VOWEL SIGN E
+11D3C..11D3D  ; Mn #   [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O
+11D3F..11D45  ; Mn #   [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA
+11D47         ; Mn #       MASARAM GONDI RA-KARA
+11D90..11D91  ; Mn #   [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI
+11D95         ; Mn #       GUNJALA GONDI SIGN ANUSVARA
+11D97         ; Mn #       GUNJALA GONDI VIRAMA
+11EF3..11EF4  ; Mn #   [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U
+16AF0..16AF4  ; Mn #   [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
+16B30..16B36  ; Mn #   [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
+16F4F         ; Mn #       MIAO SIGN CONSONANT MODIFIER BAR
+16F8F..16F92  ; Mn #   [4] MIAO TONE RIGHT..MIAO TONE BELOW
+16FE4         ; Mn #       KHITAN SMALL SCRIPT FILLER
+1BC9D..1BC9E  ; Mn #   [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
+1D167..1D169  ; Mn #   [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
+1D17B..1D182  ; Mn #   [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
+1D185..1D18B  ; Mn #   [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
+1D1AA..1D1AD  ; Mn #   [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
+1D242..1D244  ; Mn #   [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
+1DA00..1DA36  ; Mn #  [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN
+1DA3B..1DA6C  ; Mn #  [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT
+1DA75         ; Mn #       SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS
+1DA84         ; Mn #       SIGNWRITING LOCATION HEAD NECK
+1DA9B..1DA9F  ; Mn #   [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6
+1DAA1..1DAAF  ; Mn #  [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16
+1E000..1E006  ; Mn #   [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
+1E008..1E018  ; Mn #  [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
+1E01B..1E021  ; Mn #   [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
+1E023..1E024  ; Mn #   [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
+1E026..1E02A  ; Mn #   [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
+1E130..1E136  ; Mn #   [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
+1E2EC..1E2EF  ; Mn #   [4] WANCHO TONE TUP..WANCHO TONE KOINI
+1E8D0..1E8D6  ; Mn #   [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
+1E944..1E94A  ; Mn #   [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
+E0100..E01EF  ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
+
+# Total code points: 1839
+
+# ================================================
+
+# General_Category=Enclosing_Mark
+
+0488..0489    ; Me #   [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
+1ABE          ; Me #       COMBINING PARENTHESES OVERLAY
+20DD..20E0    ; Me #   [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
+20E2..20E4    ; Me #   [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
+A670..A672    ; Me #   [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
+
+# Total code points: 13
+
+# ================================================
+
+# General_Category=Spacing_Mark
+
+0903          ; Mc #       DEVANAGARI SIGN VISARGA
+093B          ; Mc #       DEVANAGARI VOWEL SIGN OOE
+093E..0940    ; Mc #   [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
+0949..094C    ; Mc #   [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
+094E..094F    ; Mc #   [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW
+0982..0983    ; Mc #   [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
+09BE..09C0    ; Mc #   [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II
+09C7..09C8    ; Mc #   [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI
+09CB..09CC    ; Mc #   [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU
+09D7          ; Mc #       BENGALI AU LENGTH MARK
+0A03          ; Mc #       GURMUKHI SIGN VISARGA
+0A3E..0A40    ; Mc #   [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II
+0A83          ; Mc #       GUJARATI SIGN VISARGA
+0ABE..0AC0    ; Mc #   [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II
+0AC9          ; Mc #       GUJARATI VOWEL SIGN CANDRA O
+0ACB..0ACC    ; Mc #   [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
+0B02..0B03    ; Mc #   [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
+0B3E          ; Mc #       ORIYA VOWEL SIGN AA
+0B40          ; Mc #       ORIYA VOWEL SIGN II
+0B47..0B48    ; Mc #   [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
+0B4B..0B4C    ; Mc #   [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
+0B57          ; Mc #       ORIYA AU LENGTH MARK
+0BBE..0BBF    ; Mc #   [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I
+0BC1..0BC2    ; Mc #   [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU
+0BC6..0BC8    ; Mc #   [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI
+0BCA..0BCC    ; Mc #   [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU
+0BD7          ; Mc #       TAMIL AU LENGTH MARK
+0C01..0C03    ; Mc #   [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA
+0C41..0C44    ; Mc #   [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
+0C82..0C83    ; Mc #   [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
+0CBE          ; Mc #       KANNADA VOWEL SIGN AA
+0CC0..0CC4    ; Mc #   [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR
+0CC7..0CC8    ; Mc #   [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI
+0CCA..0CCB    ; Mc #   [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
+0CD5..0CD6    ; Mc #   [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
+0D02..0D03    ; Mc #   [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
+0D3E..0D40    ; Mc #   [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
+0D46..0D48    ; Mc #   [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
+0D4A..0D4C    ; Mc #   [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
+0D57          ; Mc #       MALAYALAM AU LENGTH MARK
+0D82..0D83    ; Mc #   [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
+0DCF..0DD1    ; Mc #   [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA
+0DD8..0DDF    ; Mc #   [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA
+0DF2..0DF3    ; Mc #   [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA
+0F3E..0F3F    ; Mc #   [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES
+0F7F          ; Mc #       TIBETAN SIGN RNAM BCAD
+102B..102C    ; Mc #   [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA
+1031          ; Mc #       MYANMAR VOWEL SIGN E
+1038          ; Mc #       MYANMAR SIGN VISARGA
+103B..103C    ; Mc #   [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA
+1056..1057    ; Mc #   [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR
+1062..1064    ; Mc #   [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO
+1067..106D    ; Mc #   [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5
+1083..1084    ; Mc #   [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E
+1087..108C    ; Mc #   [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3
+108F          ; Mc #       MYANMAR SIGN RUMAI PALAUNG TONE-5
+109A..109C    ; Mc #   [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A
+17B6          ; Mc #       KHMER VOWEL SIGN AA
+17BE..17C5    ; Mc #   [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU
+17C7..17C8    ; Mc #   [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU
+1923..1926    ; Mc #   [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU
+1929..192B    ; Mc #   [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA
+1930..1931    ; Mc #   [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA
+1933..1938    ; Mc #   [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA
+1A19..1A1A    ; Mc #   [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O
+1A55          ; Mc #       TAI THAM CONSONANT SIGN MEDIAL RA
+1A57          ; Mc #       TAI THAM CONSONANT SIGN LA TANG LAI
+1A61          ; Mc #       TAI THAM VOWEL SIGN A
+1A63..1A64    ; Mc #   [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA
+1A6D..1A72    ; Mc #   [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI
+1B04          ; Mc #       BALINESE SIGN BISAH
+1B35          ; Mc #       BALINESE VOWEL SIGN TEDUNG
+1B3B          ; Mc #       BALINESE VOWEL SIGN RA REPA TEDUNG
+1B3D..1B41    ; Mc #   [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG
+1B43..1B44    ; Mc #   [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG
+1B82          ; Mc #       SUNDANESE SIGN PANGWISAD
+1BA1          ; Mc #       SUNDANESE CONSONANT SIGN PAMINGKAL
+1BA6..1BA7    ; Mc #   [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG
+1BAA          ; Mc #       SUNDANESE SIGN PAMAAEH
+1BE7          ; Mc #       BATAK VOWEL SIGN E
+1BEA..1BEC    ; Mc #   [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O
+1BEE          ; Mc #       BATAK VOWEL SIGN U
+1BF2..1BF3    ; Mc #   [2] BATAK PANGOLAT..BATAK PANONGONAN
+1C24..1C2B    ; Mc #   [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU
+1C34..1C35    ; Mc #   [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
+1CE1          ; Mc #       VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
+1CF7          ; Mc #       VEDIC SIGN ATIKRAMA
+302E..302F    ; Mc #   [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK
+A823..A824    ; Mc #   [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
+A827          ; Mc #       SYLOTI NAGRI VOWEL SIGN OO
+A880..A881    ; Mc #   [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
+A8B4..A8C3    ; Mc #  [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
+A952..A953    ; Mc #   [2] REJANG CONSONANT SIGN H..REJANG VIRAMA
+A983          ; Mc #       JAVANESE SIGN WIGNYAN
+A9B4..A9B5    ; Mc #   [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG
+A9BA..A9BB    ; Mc #   [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE
+A9BE..A9C0    ; Mc #   [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON
+AA2F..AA30    ; Mc #   [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI
+AA33..AA34    ; Mc #   [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA
+AA4D          ; Mc #       CHAM CONSONANT SIGN FINAL H
+AA7B          ; Mc #       MYANMAR SIGN PAO KAREN TONE
+AA7D          ; Mc #       MYANMAR SIGN TAI LAING TONE-5
+AAEB          ; Mc #       MEETEI MAYEK VOWEL SIGN II
+AAEE..AAEF    ; Mc #   [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU
+AAF5          ; Mc #       MEETEI MAYEK VOWEL SIGN VISARGA
+ABE3..ABE4    ; Mc #   [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP
+ABE6..ABE7    ; Mc #   [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP
+ABE9..ABEA    ; Mc #   [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG
+ABEC          ; Mc #       MEETEI MAYEK LUM IYEK
+11000         ; Mc #       BRAHMI SIGN CANDRABINDU
+11002         ; Mc #       BRAHMI SIGN VISARGA
+11082         ; Mc #       KAITHI SIGN VISARGA
+110B0..110B2  ; Mc #   [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II
+110B7..110B8  ; Mc #   [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU
+1112C         ; Mc #       CHAKMA VOWEL SIGN E
+11145..11146  ; Mc #   [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI
+11182         ; Mc #       SHARADA SIGN VISARGA
+111B3..111B5  ; Mc #   [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II
+111BF..111C0  ; Mc #   [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA
+111CE         ; Mc #       SHARADA VOWEL SIGN PRISHTHAMATRA E
+1122C..1122E  ; Mc #   [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II
+11232..11233  ; Mc #   [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU
+11235         ; Mc #       KHOJKI SIGN VIRAMA
+112E0..112E2  ; Mc #   [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II
+11302..11303  ; Mc #   [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA
+1133E..1133F  ; Mc #   [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I
+11341..11344  ; Mc #   [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR
+11347..11348  ; Mc #   [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI
+1134B..1134D  ; Mc #   [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA
+11357         ; Mc #       GRANTHA AU LENGTH MARK
+11362..11363  ; Mc #   [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
+11435..11437  ; Mc #   [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II
+11440..11441  ; Mc #   [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU
+11445         ; Mc #       NEWA SIGN VISARGA
+114B0..114B2  ; Mc #   [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II
+114B9         ; Mc #       TIRHUTA VOWEL SIGN E
+114BB..114BE  ; Mc #   [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU
+114C1         ; Mc #       TIRHUTA SIGN VISARGA
+115AF..115B1  ; Mc #   [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II
+115B8..115BB  ; Mc #   [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU
+115BE         ; Mc #       SIDDHAM SIGN VISARGA
+11630..11632  ; Mc #   [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II
+1163B..1163C  ; Mc #   [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU
+1163E         ; Mc #       MODI SIGN VISARGA
+116AC         ; Mc #       TAKRI SIGN VISARGA
+116AE..116AF  ; Mc #   [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II
+116B6         ; Mc #       TAKRI SIGN VIRAMA
+11720..11721  ; Mc #   [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
+11726         ; Mc #       AHOM VOWEL SIGN E
+1182C..1182E  ; Mc #   [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II
+11838         ; Mc #       DOGRA SIGN VISARGA
+11930..11935  ; Mc #   [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E
+11937..11938  ; Mc #   [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O
+1193D         ; Mc #       DIVES AKURU SIGN HALANTA
+11940         ; Mc #       DIVES AKURU MEDIAL YA
+11942         ; Mc #       DIVES AKURU MEDIAL RA
+119D1..119D3  ; Mc #   [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II
+119DC..119DF  ; Mc #   [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA
+119E4         ; Mc #       NANDINAGARI VOWEL SIGN PRISHTHAMATRA E
+11A39         ; Mc #       ZANABAZAR SQUARE SIGN VISARGA
+11A57..11A58  ; Mc #   [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU
+11A97         ; Mc #       SOYOMBO SIGN VISARGA
+11C2F         ; Mc #       BHAIKSUKI VOWEL SIGN AA
+11C3E         ; Mc #       BHAIKSUKI SIGN VISARGA
+11CA9         ; Mc #       MARCHEN SUBJOINED LETTER YA
+11CB1         ; Mc #       MARCHEN VOWEL SIGN I
+11CB4         ; Mc #       MARCHEN VOWEL SIGN O
+11D8A..11D8E  ; Mc #   [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU
+11D93..11D94  ; Mc #   [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU
+11D96         ; Mc #       GUNJALA GONDI SIGN VISARGA
+11EF5..11EF6  ; Mc #   [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O
+16F51..16F87  ; Mc #  [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
+16FF0..16FF1  ; Mc #   [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
+1D165..1D166  ; Mc #   [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
+1D16D..1D172  ; Mc #   [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
+
+# Total code points: 443
+
+# ================================================
+
+# General_Category=Decimal_Number
+
+0030..0039    ; Nd #  [10] DIGIT ZERO..DIGIT NINE
+0660..0669    ; Nd #  [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
+06F0..06F9    ; Nd #  [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
+07C0..07C9    ; Nd #  [10] NKO DIGIT ZERO..NKO DIGIT NINE
+0966..096F    ; Nd #  [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
+09E6..09EF    ; Nd #  [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
+0A66..0A6F    ; Nd #  [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
+0AE6..0AEF    ; Nd #  [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
+0B66..0B6F    ; Nd #  [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE
+0BE6..0BEF    ; Nd #  [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE
+0C66..0C6F    ; Nd #  [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
+0CE6..0CEF    ; Nd #  [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
+0D66..0D6F    ; Nd #  [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
+0DE6..0DEF    ; Nd #  [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE
+0E50..0E59    ; Nd #  [10] THAI DIGIT ZERO..THAI DIGIT NINE
+0ED0..0ED9    ; Nd #  [10] LAO DIGIT ZERO..LAO DIGIT NINE
+0F20..0F29    ; Nd #  [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE
+1040..1049    ; Nd #  [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
+1090..1099    ; Nd #  [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE
+17E0..17E9    ; Nd #  [10] KHMER DIGIT ZERO..KHMER DIGIT NINE
+1810..1819    ; Nd #  [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
+1946..194F    ; Nd #  [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
+19D0..19D9    ; Nd #  [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
+1A80..1A89    ; Nd #  [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE
+1A90..1A99    ; Nd #  [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE
+1B50..1B59    ; Nd #  [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
+1BB0..1BB9    ; Nd #  [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE
+1C40..1C49    ; Nd #  [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE
+1C50..1C59    ; Nd #  [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE
+A620..A629    ; Nd #  [10] VAI DIGIT ZERO..VAI DIGIT NINE
+A8D0..A8D9    ; Nd #  [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
+A900..A909    ; Nd #  [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE
+A9D0..A9D9    ; Nd #  [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE
+A9F0..A9F9    ; Nd #  [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE
+AA50..AA59    ; Nd #  [10] CHAM DIGIT ZERO..CHAM DIGIT NINE
+ABF0..ABF9    ; Nd #  [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
+FF10..FF19    ; Nd #  [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
+104A0..104A9  ; Nd #  [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
+10D30..10D39  ; Nd #  [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE
+11066..1106F  ; Nd #  [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE
+110F0..110F9  ; Nd #  [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE
+11136..1113F  ; Nd #  [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE
+111D0..111D9  ; Nd #  [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE
+112F0..112F9  ; Nd #  [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE
+11450..11459  ; Nd #  [10] NEWA DIGIT ZERO..NEWA DIGIT NINE
+114D0..114D9  ; Nd #  [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE
+11650..11659  ; Nd #  [10] MODI DIGIT ZERO..MODI DIGIT NINE
+116C0..116C9  ; Nd #  [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
+11730..11739  ; Nd #  [10] AHOM DIGIT ZERO..AHOM DIGIT NINE
+118E0..118E9  ; Nd #  [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
+11950..11959  ; Nd #  [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE
+11C50..11C59  ; Nd #  [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
+11D50..11D59  ; Nd #  [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
+11DA0..11DA9  ; Nd #  [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE
+16A60..16A69  ; Nd #  [10] MRO DIGIT ZERO..MRO DIGIT NINE
+16B50..16B59  ; Nd #  [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE
+1D7CE..1D7FF  ; Nd #  [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
+1E140..1E149  ; Nd #  [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE
+1E2F0..1E2F9  ; Nd #  [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
+1E950..1E959  ; Nd #  [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
+1FBF0..1FBF9  ; Nd #  [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
+
+# Total code points: 650
+
+# ================================================
+
+# General_Category=Letter_Number
+
+16EE..16F0    ; Nl #   [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL
+2160..2182    ; Nl #  [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND
+2185..2188    ; Nl #   [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND
+3007          ; Nl #       IDEOGRAPHIC NUMBER ZERO
+3021..3029    ; Nl #   [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
+3038..303A    ; Nl #   [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
+A6E6..A6EF    ; Nl #  [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM
+10140..10174  ; Nl #  [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS
+10341         ; Nl #       GOTHIC LETTER NINETY
+1034A         ; Nl #       GOTHIC LETTER NINE HUNDRED
+103D1..103D5  ; Nl #   [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED
+12400..1246E  ; Nl # [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
+
+# Total code points: 236
+
+# ================================================
+
+# General_Category=Other_Number
+
+00B2..00B3    ; No #   [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE
+00B9          ; No #       SUPERSCRIPT ONE
+00BC..00BE    ; No #   [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS
+09F4..09F9    ; No #   [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN
+0B72..0B77    ; No #   [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS
+0BF0..0BF2    ; No #   [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND
+0C78..0C7E    ; No #   [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR
+0D58..0D5E    ; No #   [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH
+0D70..0D78    ; No #   [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS
+0F2A..0F33    ; No #  [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO
+1369..137C    ; No #  [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND
+17F0..17F9    ; No #  [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON
+19DA          ; No #       NEW TAI LUE THAM DIGIT ONE
+2070          ; No #       SUPERSCRIPT ZERO
+2074..2079    ; No #   [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE
+2080..2089    ; No #  [10] SUBSCRIPT ZERO..SUBSCRIPT NINE
+2150..215F    ; No #  [16] VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE
+2189          ; No #       VULGAR FRACTION ZERO THIRDS
+2460..249B    ; No #  [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
+24EA..24FF    ; No #  [22] CIRCLED DIGIT ZERO..NEGATIVE CIRCLED DIGIT ZERO
+2776..2793    ; No #  [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
+2CFD          ; No #       COPTIC FRACTION ONE HALF
+3192..3195    ; No #   [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
+3220..3229    ; No #  [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
+3248..324F    ; No #   [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE
+3251..325F    ; No #  [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE
+3280..3289    ; No #  [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN
+32B1..32BF    ; No #  [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY
+A830..A835    ; No #   [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS
+10107..10133  ; No #  [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND
+10175..10178  ; No #   [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN
+1018A..1018B  ; No #   [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN
+102E1..102FB  ; No #  [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED
+10320..10323  ; No #   [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY
+10858..1085F  ; No #   [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND
+10879..1087F  ; No #   [7] PALMYRENE NUMBER ONE..PALMYRENE NUMBER TWENTY
+108A7..108AF  ; No #   [9] NABATAEAN NUMBER ONE..NABATAEAN NUMBER ONE HUNDRED
+108FB..108FF  ; No #   [5] HATRAN NUMBER ONE..HATRAN NUMBER ONE HUNDRED
+10916..1091B  ; No #   [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE
+109BC..109BD  ; No #   [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF
+109C0..109CF  ; No #  [16] MEROITIC CURSIVE NUMBER ONE..MEROITIC CURSIVE NUMBER SEVENTY
+109D2..109FF  ; No #  [46] MEROITIC CURSIVE NUMBER ONE HUNDRED..MEROITIC CURSIVE FRACTION TEN TWELFTHS
+10A40..10A48  ; No #   [9] KHAROSHTHI DIGIT ONE..KHAROSHTHI FRACTION ONE HALF
+10A7D..10A7E  ; No #   [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY
+10A9D..10A9F  ; No #   [3] OLD NORTH ARABIAN NUMBER ONE..OLD NORTH ARABIAN NUMBER TWENTY
+10AEB..10AEF  ; No #   [5] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER ONE HUNDRED
+10B58..10B5F  ; No #   [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND
+10B78..10B7F  ; No #   [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
+10BA9..10BAF  ; No #   [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED
+10CFA..10CFF  ; No #   [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND
+10E60..10E7E  ; No #  [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
+10F1D..10F26  ; No #  [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
+10F51..10F54  ; No #   [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED
+10FC5..10FCB  ; No #   [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED
+11052..11065  ; No #  [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND
+111E1..111F4  ; No #  [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND
+1173A..1173B  ; No #   [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY
+118EA..118F2  ; No #   [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY
+11C5A..11C6C  ; No #  [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK
+11FC0..11FD4  ; No #  [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH
+16B5B..16B61  ; No #   [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS
+16E80..16E96  ; No #  [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM
+1D2E0..1D2F3  ; No #  [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN
+1D360..1D378  ; No #  [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE
+1E8C7..1E8CF  ; No #   [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE
+1EC71..1ECAB  ; No #  [59] INDIC SIYAQ NUMBER ONE..INDIC SIYAQ NUMBER PREFIXED NINE
+1ECAD..1ECAF  ; No #   [3] INDIC SIYAQ FRACTION ONE QUARTER..INDIC SIYAQ FRACTION THREE QUARTERS
+1ECB1..1ECB4  ; No #   [4] INDIC SIYAQ NUMBER ALTERNATE ONE..INDIC SIYAQ ALTERNATE LAKH MARK
+1ED01..1ED2D  ; No #  [45] OTTOMAN SIYAQ NUMBER ONE..OTTOMAN SIYAQ NUMBER NINETY THOUSAND
+1ED2F..1ED3D  ; No #  [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH
+1F100..1F10C  ; No #  [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
+
+# Total code points: 895
+
+# ================================================
+
+# General_Category=Space_Separator
+
+0020          ; Zs #       SPACE
+00A0          ; Zs #       NO-BREAK SPACE
+1680          ; Zs #       OGHAM SPACE MARK
+2000..200A    ; Zs #  [11] EN QUAD..HAIR SPACE
+202F          ; Zs #       NARROW NO-BREAK SPACE
+205F          ; Zs #       MEDIUM MATHEMATICAL SPACE
+3000          ; Zs #       IDEOGRAPHIC SPACE
+
+# Total code points: 17
+
+# ================================================
+
+# General_Category=Line_Separator
+
+2028          ; Zl #       LINE SEPARATOR
+
+# Total code points: 1
+
+# ================================================
+
+# General_Category=Paragraph_Separator
+
+2029          ; Zp #       PARAGRAPH SEPARATOR
+
+# Total code points: 1
+
+# ================================================
+
+# General_Category=Control
+
+0000..001F    ; Cc #  [32] <control-0000>..<control-001F>
+007F..009F    ; Cc #  [33] <control-007F>..<control-009F>
+
+# Total code points: 65
+
+# ================================================
+
+# General_Category=Format
+
+00AD          ; Cf #       SOFT HYPHEN
+0600..0605    ; Cf #   [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE
+061C          ; Cf #       ARABIC LETTER MARK
+06DD          ; Cf #       ARABIC END OF AYAH
+070F          ; Cf #       SYRIAC ABBREVIATION MARK
+08E2          ; Cf #       ARABIC DISPUTED END OF AYAH
+180E          ; Cf #       MONGOLIAN VOWEL SEPARATOR
+200B..200F    ; Cf #   [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK
+202A..202E    ; Cf #   [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
+2060..2064    ; Cf #   [5] WORD JOINER..INVISIBLE PLUS
+2066..206F    ; Cf #  [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES
+FEFF          ; Cf #       ZERO WIDTH NO-BREAK SPACE
+FFF9..FFFB    ; Cf #   [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
+110BD         ; Cf #       KAITHI NUMBER SIGN
+110CD         ; Cf #       KAITHI NUMBER SIGN ABOVE
+13430..13438  ; Cf #   [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT
+1BCA0..1BCA3  ; Cf #   [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
+1D173..1D17A  ; Cf #   [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
+E0001         ; Cf #       LANGUAGE TAG
+E0020..E007F  ; Cf #  [96] TAG SPACE..CANCEL TAG
+
+# Total code points: 161
+
+# ================================================
+
+# General_Category=Private_Use
+
+E000..F8FF    ; Co # [6400] <private-use-E000>..<private-use-F8FF>
+F0000..FFFFD  ; Co # [65534] <private-use-F0000>..<private-use-FFFFD>
+100000..10FFFD; Co # [65534] <private-use-100000>..<private-use-10FFFD>
+
+# Total code points: 137468
+
+# ================================================
+
+# General_Category=Surrogate
+
+D800..DFFF    ; Cs # [2048] <surrogate-D800>..<surrogate-DFFF>
+
+# Total code points: 2048
+
+# ================================================
+
+# General_Category=Dash_Punctuation
+
+002D          ; Pd #       HYPHEN-MINUS
+058A          ; Pd #       ARMENIAN HYPHEN
+05BE          ; Pd #       HEBREW PUNCTUATION MAQAF
+1400          ; Pd #       CANADIAN SYLLABICS HYPHEN
+1806          ; Pd #       MONGOLIAN TODO SOFT HYPHEN
+2010..2015    ; Pd #   [6] HYPHEN..HORIZONTAL BAR
+2E17          ; Pd #       DOUBLE OBLIQUE HYPHEN
+2E1A          ; Pd #       HYPHEN WITH DIAERESIS
+2E3A..2E3B    ; Pd #   [2] TWO-EM DASH..THREE-EM DASH
+2E40          ; Pd #       DOUBLE HYPHEN
+301C          ; Pd #       WAVE DASH
+3030          ; Pd #       WAVY DASH
+30A0          ; Pd #       KATAKANA-HIRAGANA DOUBLE HYPHEN
+FE31..FE32    ; Pd #   [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH
+FE58          ; Pd #       SMALL EM DASH
+FE63          ; Pd #       SMALL HYPHEN-MINUS
+FF0D          ; Pd #       FULLWIDTH HYPHEN-MINUS
+10EAD         ; Pd #       YEZIDI HYPHENATION MARK
+
+# Total code points: 25
+
+# ================================================
+
+# General_Category=Open_Punctuation
+
+0028          ; Ps #       LEFT PARENTHESIS
+005B          ; Ps #       LEFT SQUARE BRACKET
+007B          ; Ps #       LEFT CURLY BRACKET
+0F3A          ; Ps #       TIBETAN MARK GUG RTAGS GYON
+0F3C          ; Ps #       TIBETAN MARK ANG KHANG GYON
+169B          ; Ps #       OGHAM FEATHER MARK
+201A          ; Ps #       SINGLE LOW-9 QUOTATION MARK
+201E          ; Ps #       DOUBLE LOW-9 QUOTATION MARK
+2045          ; Ps #       LEFT SQUARE BRACKET WITH QUILL
+207D          ; Ps #       SUPERSCRIPT LEFT PARENTHESIS
+208D          ; Ps #       SUBSCRIPT LEFT PARENTHESIS
+2308          ; Ps #       LEFT CEILING
+230A          ; Ps #       LEFT FLOOR
+2329          ; Ps #       LEFT-POINTING ANGLE BRACKET
+2768          ; Ps #       MEDIUM LEFT PARENTHESIS ORNAMENT
+276A          ; Ps #       MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
+276C          ; Ps #       MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT
+276E          ; Ps #       HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT
+2770          ; Ps #       HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT
+2772          ; Ps #       LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT
+2774          ; Ps #       MEDIUM LEFT CURLY BRACKET ORNAMENT
+27C5          ; Ps #       LEFT S-SHAPED BAG DELIMITER
+27E6          ; Ps #       MATHEMATICAL LEFT WHITE SQUARE BRACKET
+27E8          ; Ps #       MATHEMATICAL LEFT ANGLE BRACKET
+27EA          ; Ps #       MATHEMATICAL LEFT DOUBLE ANGLE BRACKET
+27EC          ; Ps #       MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET
+27EE          ; Ps #       MATHEMATICAL LEFT FLATTENED PARENTHESIS
+2983          ; Ps #       LEFT WHITE CURLY BRACKET
+2985          ; Ps #       LEFT WHITE PARENTHESIS
+2987          ; Ps #       Z NOTATION LEFT IMAGE BRACKET
+2989          ; Ps #       Z NOTATION LEFT BINDING BRACKET
+298B          ; Ps #       LEFT SQUARE BRACKET WITH UNDERBAR
+298D          ; Ps #       LEFT SQUARE BRACKET WITH TICK IN TOP CORNER
+298F          ; Ps #       LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
+2991          ; Ps #       LEFT ANGLE BRACKET WITH DOT
+2993          ; Ps #       LEFT ARC LESS-THAN BRACKET
+2995          ; Ps #       DOUBLE LEFT ARC GREATER-THAN BRACKET
+2997          ; Ps #       LEFT BLACK TORTOISE SHELL BRACKET
+29D8          ; Ps #       LEFT WIGGLY FENCE
+29DA          ; Ps #       LEFT DOUBLE WIGGLY FENCE
+29FC          ; Ps #       LEFT-POINTING CURVED ANGLE BRACKET
+2E22          ; Ps #       TOP LEFT HALF BRACKET
+2E24          ; Ps #       BOTTOM LEFT HALF BRACKET
+2E26          ; Ps #       LEFT SIDEWAYS U BRACKET
+2E28          ; Ps #       LEFT DOUBLE PARENTHESIS
+2E42          ; Ps #       DOUBLE LOW-REVERSED-9 QUOTATION MARK
+3008          ; Ps #       LEFT ANGLE BRACKET
+300A          ; Ps #       LEFT DOUBLE ANGLE BRACKET
+300C          ; Ps #       LEFT CORNER BRACKET
+300E          ; Ps #       LEFT WHITE CORNER BRACKET
+3010          ; Ps #       LEFT BLACK LENTICULAR BRACKET
+3014          ; Ps #       LEFT TORTOISE SHELL BRACKET
+3016          ; Ps #       LEFT WHITE LENTICULAR BRACKET
+3018          ; Ps #       LEFT WHITE TORTOISE SHELL BRACKET
+301A          ; Ps #       LEFT WHITE SQUARE BRACKET
+301D          ; Ps #       REVERSED DOUBLE PRIME QUOTATION MARK
+FD3F          ; Ps #       ORNATE RIGHT PARENTHESIS
+FE17          ; Ps #       PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET
+FE35          ; Ps #       PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
+FE37          ; Ps #       PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET
+FE39          ; Ps #       PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET
+FE3B          ; Ps #       PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET
+FE3D          ; Ps #       PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET
+FE3F          ; Ps #       PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET
+FE41          ; Ps #       PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET
+FE43          ; Ps #       PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET
+FE47          ; Ps #       PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET
+FE59          ; Ps #       SMALL LEFT PARENTHESIS
+FE5B          ; Ps #       SMALL LEFT CURLY BRACKET
+FE5D          ; Ps #       SMALL LEFT TORTOISE SHELL BRACKET
+FF08          ; Ps #       FULLWIDTH LEFT PARENTHESIS
+FF3B          ; Ps #       FULLWIDTH LEFT SQUARE BRACKET
+FF5B          ; Ps #       FULLWIDTH LEFT CURLY BRACKET
+FF5F          ; Ps #       FULLWIDTH LEFT WHITE PARENTHESIS
+FF62          ; Ps #       HALFWIDTH LEFT CORNER BRACKET
+
+# Total code points: 75
+
+# ================================================
+
+# General_Category=Close_Punctuation
+
+0029          ; Pe #       RIGHT PARENTHESIS
+005D          ; Pe #       RIGHT SQUARE BRACKET
+007D          ; Pe #       RIGHT CURLY BRACKET
+0F3B          ; Pe #       TIBETAN MARK GUG RTAGS GYAS
+0F3D          ; Pe #       TIBETAN MARK ANG KHANG GYAS
+169C          ; Pe #       OGHAM REVERSED FEATHER MARK
+2046          ; Pe #       RIGHT SQUARE BRACKET WITH QUILL
+207E          ; Pe #       SUPERSCRIPT RIGHT PARENTHESIS
+208E          ; Pe #       SUBSCRIPT RIGHT PARENTHESIS
+2309          ; Pe #       RIGHT CEILING
+230B          ; Pe #       RIGHT FLOOR
+232A          ; Pe #       RIGHT-POINTING ANGLE BRACKET
+2769          ; Pe #       MEDIUM RIGHT PARENTHESIS ORNAMENT
+276B          ; Pe #       MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT
+276D          ; Pe #       MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT
+276F          ; Pe #       HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT
+2771          ; Pe #       HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT
+2773          ; Pe #       LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT
+2775          ; Pe #       MEDIUM RIGHT CURLY BRACKET ORNAMENT
+27C6          ; Pe #       RIGHT S-SHAPED BAG DELIMITER
+27E7          ; Pe #       MATHEMATICAL RIGHT WHITE SQUARE BRACKET
+27E9          ; Pe #       MATHEMATICAL RIGHT ANGLE BRACKET
+27EB          ; Pe #       MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET
+27ED          ; Pe #       MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET
+27EF          ; Pe #       MATHEMATICAL RIGHT FLATTENED PARENTHESIS
+2984          ; Pe #       RIGHT WHITE CURLY BRACKET
+2986          ; Pe #       RIGHT WHITE PARENTHESIS
+2988          ; Pe #       Z NOTATION RIGHT IMAGE BRACKET
+298A          ; Pe #       Z NOTATION RIGHT BINDING BRACKET
+298C          ; Pe #       RIGHT SQUARE BRACKET WITH UNDERBAR
+298E          ; Pe #       RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
+2990          ; Pe #       RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER
+2992          ; Pe #       RIGHT ANGLE BRACKET WITH DOT
+2994          ; Pe #       RIGHT ARC GREATER-THAN BRACKET
+2996          ; Pe #       DOUBLE RIGHT ARC LESS-THAN BRACKET
+2998          ; Pe #       RIGHT BLACK TORTOISE SHELL BRACKET
+29D9          ; Pe #       RIGHT WIGGLY FENCE
+29DB          ; Pe #       RIGHT DOUBLE WIGGLY FENCE
+29FD          ; Pe #       RIGHT-POINTING CURVED ANGLE BRACKET
+2E23          ; Pe #       TOP RIGHT HALF BRACKET
+2E25          ; Pe #       BOTTOM RIGHT HALF BRACKET
+2E27          ; Pe #       RIGHT SIDEWAYS U BRACKET
+2E29          ; Pe #       RIGHT DOUBLE PARENTHESIS
+3009          ; Pe #       RIGHT ANGLE BRACKET
+300B          ; Pe #       RIGHT DOUBLE ANGLE BRACKET
+300D          ; Pe #       RIGHT CORNER BRACKET
+300F          ; Pe #       RIGHT WHITE CORNER BRACKET
+3011          ; Pe #       RIGHT BLACK LENTICULAR BRACKET
+3015          ; Pe #       RIGHT TORTOISE SHELL BRACKET
+3017          ; Pe #       RIGHT WHITE LENTICULAR BRACKET
+3019          ; Pe #       RIGHT WHITE TORTOISE SHELL BRACKET
+301B          ; Pe #       RIGHT WHITE SQUARE BRACKET
+301E..301F    ; Pe #   [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK
+FD3E          ; Pe #       ORNATE LEFT PARENTHESIS
+FE18          ; Pe #       PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET
+FE36          ; Pe #       PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS
+FE38          ; Pe #       PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET
+FE3A          ; Pe #       PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET
+FE3C          ; Pe #       PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET
+FE3E          ; Pe #       PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET
+FE40          ; Pe #       PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET
+FE42          ; Pe #       PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET
+FE44          ; Pe #       PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET
+FE48          ; Pe #       PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET
+FE5A          ; Pe #       SMALL RIGHT PARENTHESIS
+FE5C          ; Pe #       SMALL RIGHT CURLY BRACKET
+FE5E          ; Pe #       SMALL RIGHT TORTOISE SHELL BRACKET
+FF09          ; Pe #       FULLWIDTH RIGHT PARENTHESIS
+FF3D          ; Pe #       FULLWIDTH RIGHT SQUARE BRACKET
+FF5D          ; Pe #       FULLWIDTH RIGHT CURLY BRACKET
+FF60          ; Pe #       FULLWIDTH RIGHT WHITE PARENTHESIS
+FF63          ; Pe #       HALFWIDTH RIGHT CORNER BRACKET
+
+# Total code points: 73
+
+# ================================================
+
+# General_Category=Connector_Punctuation
+
+005F          ; Pc #       LOW LINE
+203F..2040    ; Pc #   [2] UNDERTIE..CHARACTER TIE
+2054          ; Pc #       INVERTED UNDERTIE
+FE33..FE34    ; Pc #   [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
+FE4D..FE4F    ; Pc #   [3] DASHED LOW LINE..WAVY LOW LINE
+FF3F          ; Pc #       FULLWIDTH LOW LINE
+
+# Total code points: 10
+
+# ================================================
+
+# General_Category=Other_Punctuation
+
+0021..0023    ; Po #   [3] EXCLAMATION MARK..NUMBER SIGN
+0025..0027    ; Po #   [3] PERCENT SIGN..APOSTROPHE
+002A          ; Po #       ASTERISK
+002C          ; Po #       COMMA
+002E..002F    ; Po #   [2] FULL STOP..SOLIDUS
+003A..003B    ; Po #   [2] COLON..SEMICOLON
+003F..0040    ; Po #   [2] QUESTION MARK..COMMERCIAL AT
+005C          ; Po #       REVERSE SOLIDUS
+00A1          ; Po #       INVERTED EXCLAMATION MARK
+00A7          ; Po #       SECTION SIGN
+00B6..00B7    ; Po #   [2] PILCROW SIGN..MIDDLE DOT
+00BF          ; Po #       INVERTED QUESTION MARK
+037E          ; Po #       GREEK QUESTION MARK
+0387          ; Po #       GREEK ANO TELEIA
+055A..055F    ; Po #   [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK
+0589          ; Po #       ARMENIAN FULL STOP
+05C0          ; Po #       HEBREW PUNCTUATION PASEQ
+05C3          ; Po #       HEBREW PUNCTUATION SOF PASUQ
+05C6          ; Po #       HEBREW PUNCTUATION NUN HAFUKHA
+05F3..05F4    ; Po #   [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM
+0609..060A    ; Po #   [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN
+060C..060D    ; Po #   [2] ARABIC COMMA..ARABIC DATE SEPARATOR
+061B          ; Po #       ARABIC SEMICOLON
+061E..061F    ; Po #   [2] ARABIC TRIPLE DOT PUNCTUATION MARK..ARABIC QUESTION MARK
+066A..066D    ; Po #   [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR
+06D4          ; Po #       ARABIC FULL STOP
+0700..070D    ; Po #  [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS
+07F7..07F9    ; Po #   [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK
+0830..083E    ; Po #  [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU
+085E          ; Po #       MANDAIC PUNCTUATION
+0964..0965    ; Po #   [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
+0970          ; Po #       DEVANAGARI ABBREVIATION SIGN
+09FD          ; Po #       BENGALI ABBREVIATION SIGN
+0A76          ; Po #       GURMUKHI ABBREVIATION SIGN
+0AF0          ; Po #       GUJARATI ABBREVIATION SIGN
+0C77          ; Po #       TELUGU SIGN SIDDHAM
+0C84          ; Po #       KANNADA SIGN SIDDHAM
+0DF4          ; Po #       SINHALA PUNCTUATION KUNDDALIYA
+0E4F          ; Po #       THAI CHARACTER FONGMAN
+0E5A..0E5B    ; Po #   [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT
+0F04..0F12    ; Po #  [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD
+0F14          ; Po #       TIBETAN MARK GTER TSHEG
+0F85          ; Po #       TIBETAN MARK PALUTA
+0FD0..0FD4    ; Po #   [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA
+0FD9..0FDA    ; Po #   [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS
+104A..104F    ; Po #   [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE
+10FB          ; Po #       GEORGIAN PARAGRAPH SEPARATOR
+1360..1368    ; Po #   [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR
+166E          ; Po #       CANADIAN SYLLABICS FULL STOP
+16EB..16ED    ; Po #   [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION
+1735..1736    ; Po #   [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
+17D4..17D6    ; Po #   [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH
+17D8..17DA    ; Po #   [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT
+1800..1805    ; Po #   [6] MONGOLIAN BIRGA..MONGOLIAN FOUR DOTS
+1807..180A    ; Po #   [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU
+1944..1945    ; Po #   [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
+1A1E..1A1F    ; Po #   [2] BUGINESE PALLAWA..BUGINESE END OF SECTION
+1AA0..1AA6    ; Po #   [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA
+1AA8..1AAD    ; Po #   [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG
+1B5A..1B60    ; Po #   [7] BALINESE PANTI..BALINESE PAMENENG
+1BFC..1BFF    ; Po #   [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT
+1C3B..1C3F    ; Po #   [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK
+1C7E..1C7F    ; Po #   [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
+1CC0..1CC7    ; Po #   [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA
+1CD3          ; Po #       VEDIC SIGN NIHSHVASA
+2016..2017    ; Po #   [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE
+2020..2027    ; Po #   [8] DAGGER..HYPHENATION POINT
+2030..2038    ; Po #   [9] PER MILLE SIGN..CARET
+203B..203E    ; Po #   [4] REFERENCE MARK..OVERLINE
+2041..2043    ; Po #   [3] CARET INSERTION POINT..HYPHEN BULLET
+2047..2051    ; Po #  [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY
+2053          ; Po #       SWUNG DASH
+2055..205E    ; Po #  [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS
+2CF9..2CFC    ; Po #   [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER
+2CFE..2CFF    ; Po #   [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER
+2D70          ; Po #       TIFINAGH SEPARATOR MARK
+2E00..2E01    ; Po #   [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
+2E06..2E08    ; Po #   [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER
+2E0B          ; Po #       RAISED SQUARE
+2E0E..2E16    ; Po #   [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE
+2E18..2E19    ; Po #   [2] INVERTED INTERROBANG..PALM BRANCH
+2E1B          ; Po #       TILDE WITH RING ABOVE
+2E1E..2E1F    ; Po #   [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW
+2E2A..2E2E    ; Po #   [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK
+2E30..2E39    ; Po #  [10] RING POINT..TOP HALF SECTION SIGN
+2E3C..2E3F    ; Po #   [4] STENOGRAPHIC FULL STOP..CAPITULUM
+2E41          ; Po #       REVERSED COMMA
+2E43..2E4F    ; Po #  [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER
+2E52          ; Po #       TIRONIAN SIGN CAPITAL ET
+3001..3003    ; Po #   [3] IDEOGRAPHIC COMMA..DITTO MARK
+303D          ; Po #       PART ALTERNATION MARK
+30FB          ; Po #       KATAKANA MIDDLE DOT
+A4FE..A4FF    ; Po #   [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP
+A60D..A60F    ; Po #   [3] VAI COMMA..VAI QUESTION MARK
+A673          ; Po #       SLAVONIC ASTERISK
+A67E          ; Po #       CYRILLIC KAVYKA
+A6F2..A6F7    ; Po #   [6] BAMUM NJAEMLI..BAMUM QUESTION MARK
+A874..A877    ; Po #   [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD
+A8CE..A8CF    ; Po #   [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
+A8F8..A8FA    ; Po #   [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET
+A8FC          ; Po #       DEVANAGARI SIGN SIDDHAM
+A92E..A92F    ; Po #   [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA
+A95F          ; Po #       REJANG SECTION MARK
+A9C1..A9CD    ; Po #  [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH
+A9DE..A9DF    ; Po #   [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN
+AA5C..AA5F    ; Po #   [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA
+AADE..AADF    ; Po #   [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI
+AAF0..AAF1    ; Po #   [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM
+ABEB          ; Po #       MEETEI MAYEK CHEIKHEI
+FE10..FE16    ; Po #   [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK
+FE19          ; Po #       PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
+FE30          ; Po #       PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
+FE45..FE46    ; Po #   [2] SESAME DOT..WHITE SESAME DOT
+FE49..FE4C    ; Po #   [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE
+FE50..FE52    ; Po #   [3] SMALL COMMA..SMALL FULL STOP
+FE54..FE57    ; Po #   [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK
+FE5F..FE61    ; Po #   [3] SMALL NUMBER SIGN..SMALL ASTERISK
+FE68          ; Po #       SMALL REVERSE SOLIDUS
+FE6A..FE6B    ; Po #   [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT
+FF01..FF03    ; Po #   [3] FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN
+FF05..FF07    ; Po #   [3] FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE
+FF0A          ; Po #       FULLWIDTH ASTERISK
+FF0C          ; Po #       FULLWIDTH COMMA
+FF0E..FF0F    ; Po #   [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS
+FF1A..FF1B    ; Po #   [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON
+FF1F..FF20    ; Po #   [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT
+FF3C          ; Po #       FULLWIDTH REVERSE SOLIDUS
+FF61          ; Po #       HALFWIDTH IDEOGRAPHIC FULL STOP
+FF64..FF65    ; Po #   [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT
+10100..10102  ; Po #   [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK
+1039F         ; Po #       UGARITIC WORD DIVIDER
+103D0         ; Po #       OLD PERSIAN WORD DIVIDER
+1056F         ; Po #       CAUCASIAN ALBANIAN CITATION MARK
+10857         ; Po #       IMPERIAL ARAMAIC SECTION SIGN
+1091F         ; Po #       PHOENICIAN WORD SEPARATOR
+1093F         ; Po #       LYDIAN TRIANGULAR MARK
+10A50..10A58  ; Po #   [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES
+10A7F         ; Po #       OLD SOUTH ARABIAN NUMERIC INDICATOR
+10AF0..10AF6  ; Po #   [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER
+10B39..10B3F  ; Po #   [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION
+10B99..10B9C  ; Po #   [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT
+10F55..10F59  ; Po #   [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT
+11047..1104D  ; Po #   [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS
+110BB..110BC  ; Po #   [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN
+110BE..110C1  ; Po #   [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
+11140..11143  ; Po #   [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK
+11174..11175  ; Po #   [2] MAHAJANI ABBREVIATION SIGN..MAHAJANI SECTION MARK
+111C5..111C8  ; Po #   [4] SHARADA DANDA..SHARADA SEPARATOR
+111CD         ; Po #       SHARADA SUTRA MARK
+111DB         ; Po #       SHARADA SIGN SIDDHAM
+111DD..111DF  ; Po #   [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2
+11238..1123D  ; Po #   [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN
+112A9         ; Po #       MULTANI SECTION MARK
+1144B..1144F  ; Po #   [5] NEWA DANDA..NEWA ABBREVIATION SIGN
+1145A..1145B  ; Po #   [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK
+1145D         ; Po #       NEWA INSERTION SIGN
+114C6         ; Po #       TIRHUTA ABBREVIATION SIGN
+115C1..115D7  ; Po #  [23] SIDDHAM SIGN SIDDHAM..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES
+11641..11643  ; Po #   [3] MODI DANDA..MODI ABBREVIATION SIGN
+11660..1166C  ; Po #  [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT
+1173C..1173E  ; Po #   [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI
+1183B         ; Po #       DOGRA ABBREVIATION SIGN
+11944..11946  ; Po #   [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK
+119E2         ; Po #       NANDINAGARI SIGN SIDDHAM
+11A3F..11A46  ; Po #   [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK
+11A9A..11A9C  ; Po #   [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD
+11A9E..11AA2  ; Po #   [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2
+11C41..11C45  ; Po #   [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2
+11C70..11C71  ; Po #   [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD
+11EF7..11EF8  ; Po #   [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION
+11FFF         ; Po #       TAMIL PUNCTUATION END OF TEXT
+12470..12474  ; Po #   [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON
+16A6E..16A6F  ; Po #   [2] MRO DANDA..MRO DOUBLE DANDA
+16AF5         ; Po #       BASSA VAH FULL STOP
+16B37..16B3B  ; Po #   [5] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS FEEM
+16B44         ; Po #       PAHAWH HMONG SIGN XAUS
+16E97..16E9A  ; Po #   [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH
+16FE2         ; Po #       OLD CHINESE HOOK MARK
+1BC9F         ; Po #       DUPLOYAN PUNCTUATION CHINOOK FULL STOP
+1DA87..1DA8B  ; Po #   [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS
+1E95E..1E95F  ; Po #   [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK
+
+# Total code points: 593
+
+# ================================================
+
+# General_Category=Math_Symbol
+
+002B          ; Sm #       PLUS SIGN
+003C..003E    ; Sm #   [3] LESS-THAN SIGN..GREATER-THAN SIGN
+007C          ; Sm #       VERTICAL LINE
+007E          ; Sm #       TILDE
+00AC          ; Sm #       NOT SIGN
+00B1          ; Sm #       PLUS-MINUS SIGN
+00D7          ; Sm #       MULTIPLICATION SIGN
+00F7          ; Sm #       DIVISION SIGN
+03F6          ; Sm #       GREEK REVERSED LUNATE EPSILON SYMBOL
+0606..0608    ; Sm #   [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY
+2044          ; Sm #       FRACTION SLASH
+2052          ; Sm #       COMMERCIAL MINUS SIGN
+207A..207C    ; Sm #   [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN
+208A..208C    ; Sm #   [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
+2118          ; Sm #       SCRIPT CAPITAL P
+2140..2144    ; Sm #   [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y
+214B          ; Sm #       TURNED AMPERSAND
+2190..2194    ; Sm #   [5] LEFTWARDS ARROW..LEFT RIGHT ARROW
+219A..219B    ; Sm #   [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE
+21A0          ; Sm #       RIGHTWARDS TWO HEADED ARROW
+21A3          ; Sm #       RIGHTWARDS ARROW WITH TAIL
+21A6          ; Sm #       RIGHTWARDS ARROW FROM BAR
+21AE          ; Sm #       LEFT RIGHT ARROW WITH STROKE
+21CE..21CF    ; Sm #   [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE
+21D2          ; Sm #       RIGHTWARDS DOUBLE ARROW
+21D4          ; Sm #       LEFT RIGHT DOUBLE ARROW
+21F4..22FF    ; Sm # [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP
+2320..2321    ; Sm #   [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL
+237C          ; Sm #       RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW
+239B..23B3    ; Sm #  [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
+23DC..23E1    ; Sm #   [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
+25B7          ; Sm #       WHITE RIGHT-POINTING TRIANGLE
+25C1          ; Sm #       WHITE LEFT-POINTING TRIANGLE
+25F8..25FF    ; Sm #   [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE
+266F          ; Sm #       MUSIC SHARP SIGN
+27C0..27C4    ; Sm #   [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET
+27C7..27E5    ; Sm #  [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK
+27F0..27FF    ; Sm #  [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW
+2900..2982    ; Sm # [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON
+2999..29D7    ; Sm #  [63] DOTTED FENCE..BLACK HOURGLASS
+29DC..29FB    ; Sm #  [32] INCOMPLETE INFINITY..TRIPLE PLUS
+29FE..2AFF    ; Sm # [258] TINY..N-ARY WHITE VERTICAL BAR
+2B30..2B44    ; Sm #  [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET
+2B47..2B4C    ; Sm #   [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
+FB29          ; Sm #       HEBREW LETTER ALTERNATIVE PLUS SIGN
+FE62          ; Sm #       SMALL PLUS SIGN
+FE64..FE66    ; Sm #   [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN
+FF0B          ; Sm #       FULLWIDTH PLUS SIGN
+FF1C..FF1E    ; Sm #   [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN
+FF5C          ; Sm #       FULLWIDTH VERTICAL LINE
+FF5E          ; Sm #       FULLWIDTH TILDE
+FFE2          ; Sm #       FULLWIDTH NOT SIGN
+FFE9..FFEC    ; Sm #   [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW
+1D6C1         ; Sm #       MATHEMATICAL BOLD NABLA
+1D6DB         ; Sm #       MATHEMATICAL BOLD PARTIAL DIFFERENTIAL
+1D6FB         ; Sm #       MATHEMATICAL ITALIC NABLA
+1D715         ; Sm #       MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL
+1D735         ; Sm #       MATHEMATICAL BOLD ITALIC NABLA
+1D74F         ; Sm #       MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL
+1D76F         ; Sm #       MATHEMATICAL SANS-SERIF BOLD NABLA
+1D789         ; Sm #       MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL
+1D7A9         ; Sm #       MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA
+1D7C3         ; Sm #       MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
+1EEF0..1EEF1  ; Sm #   [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL
+
+# Total code points: 948
+
+# ================================================
+
+# General_Category=Currency_Symbol
+
+0024          ; Sc #       DOLLAR SIGN
+00A2..00A5    ; Sc #   [4] CENT SIGN..YEN SIGN
+058F          ; Sc #       ARMENIAN DRAM SIGN
+060B          ; Sc #       AFGHANI SIGN
+07FE..07FF    ; Sc #   [2] NKO DOROME SIGN..NKO TAMAN SIGN
+09F2..09F3    ; Sc #   [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN
+09FB          ; Sc #       BENGALI GANDA MARK
+0AF1          ; Sc #       GUJARATI RUPEE SIGN
+0BF9          ; Sc #       TAMIL RUPEE SIGN
+0E3F          ; Sc #       THAI CURRENCY SYMBOL BAHT
+17DB          ; Sc #       KHMER CURRENCY SYMBOL RIEL
+20A0..20BF    ; Sc #  [32] EURO-CURRENCY SIGN..BITCOIN SIGN
+A838          ; Sc #       NORTH INDIC RUPEE MARK
+FDFC          ; Sc #       RIAL SIGN
+FE69          ; Sc #       SMALL DOLLAR SIGN
+FF04          ; Sc #       FULLWIDTH DOLLAR SIGN
+FFE0..FFE1    ; Sc #   [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN
+FFE5..FFE6    ; Sc #   [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN
+11FDD..11FE0  ; Sc #   [4] TAMIL SIGN KAACU..TAMIL SIGN VARAAKAN
+1E2FF         ; Sc #       WANCHO NGUN SIGN
+1ECB0         ; Sc #       INDIC SIYAQ RUPEE MARK
+
+# Total code points: 62
+
+# ================================================
+
+# General_Category=Modifier_Symbol
+
+005E          ; Sk #       CIRCUMFLEX ACCENT
+0060          ; Sk #       GRAVE ACCENT
+00A8          ; Sk #       DIAERESIS
+00AF          ; Sk #       MACRON
+00B4          ; Sk #       ACUTE ACCENT
+00B8          ; Sk #       CEDILLA
+02C2..02C5    ; Sk #   [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD
+02D2..02DF    ; Sk #  [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT
+02E5..02EB    ; Sk #   [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK
+02ED          ; Sk #       MODIFIER LETTER UNASPIRATED
+02EF..02FF    ; Sk #  [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW
+0375          ; Sk #       GREEK LOWER NUMERAL SIGN
+0384..0385    ; Sk #   [2] GREEK TONOS..GREEK DIALYTIKA TONOS
+1FBD          ; Sk #       GREEK KORONIS
+1FBF..1FC1    ; Sk #   [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
+1FCD..1FCF    ; Sk #   [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI
+1FDD..1FDF    ; Sk #   [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI
+1FED..1FEF    ; Sk #   [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA
+1FFD..1FFE    ; Sk #   [2] GREEK OXIA..GREEK DASIA
+309B..309C    ; Sk #   [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+A700..A716    ; Sk #  [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR
+A720..A721    ; Sk #   [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE
+A789..A78A    ; Sk #   [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN
+AB5B          ; Sk #       MODIFIER BREVE WITH INVERTED BREVE
+AB6A..AB6B    ; Sk #   [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK
+FBB2..FBC1    ; Sk #  [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW
+FF3E          ; Sk #       FULLWIDTH CIRCUMFLEX ACCENT
+FF40          ; Sk #       FULLWIDTH GRAVE ACCENT
+FFE3          ; Sk #       FULLWIDTH MACRON
+1F3FB..1F3FF  ; Sk #   [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
+
+# Total code points: 123
+
+# ================================================
+
+# General_Category=Other_Symbol
+
+00A6          ; So #       BROKEN BAR
+00A9          ; So #       COPYRIGHT SIGN
+00AE          ; So #       REGISTERED SIGN
+00B0          ; So #       DEGREE SIGN
+0482          ; So #       CYRILLIC THOUSANDS SIGN
+058D..058E    ; So #   [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN
+060E..060F    ; So #   [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
+06DE          ; So #       ARABIC START OF RUB EL HIZB
+06E9          ; So #       ARABIC PLACE OF SAJDAH
+06FD..06FE    ; So #   [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN
+07F6          ; So #       NKO SYMBOL OO DENNEN
+09FA          ; So #       BENGALI ISSHAR
+0B70          ; So #       ORIYA ISSHAR
+0BF3..0BF8    ; So #   [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN
+0BFA          ; So #       TAMIL NUMBER SIGN
+0C7F          ; So #       TELUGU SIGN TUUMU
+0D4F          ; So #       MALAYALAM SIGN PARA
+0D79          ; So #       MALAYALAM DATE MARK
+0F01..0F03    ; So #   [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA
+0F13          ; So #       TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN
+0F15..0F17    ; So #   [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS
+0F1A..0F1F    ; So #   [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG
+0F34          ; So #       TIBETAN MARK BSDUS RTAGS
+0F36          ; So #       TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN
+0F38          ; So #       TIBETAN MARK CHE MGO
+0FBE..0FC5    ; So #   [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE
+0FC7..0FCC    ; So #   [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL
+0FCE..0FCF    ; So #   [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM
+0FD5..0FD8    ; So #   [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS
+109E..109F    ; So #   [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION
+1390..1399    ; So #  [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT
+166D          ; So #       CANADIAN SYLLABICS CHI SIGN
+1940          ; So #       LIMBU SIGN LOO
+19DE..19FF    ; So #  [34] NEW TAI LUE SIGN LAE..KHMER SYMBOL DAP-PRAM ROC
+1B61..1B6A    ; So #  [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE
+1B74..1B7C    ; So #   [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
+2100..2101    ; So #   [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT
+2103..2106    ; So #   [4] DEGREE CELSIUS..CADA UNA
+2108..2109    ; So #   [2] SCRUPLE..DEGREE FAHRENHEIT
+2114          ; So #       L B BAR SYMBOL
+2116..2117    ; So #   [2] NUMERO SIGN..SOUND RECORDING COPYRIGHT
+211E..2123    ; So #   [6] PRESCRIPTION TAKE..VERSICLE
+2125          ; So #       OUNCE SIGN
+2127          ; So #       INVERTED OHM SIGN
+2129          ; So #       TURNED GREEK SMALL LETTER IOTA
+212E          ; So #       ESTIMATED SYMBOL
+213A..213B    ; So #   [2] ROTATED CAPITAL Q..FACSIMILE SIGN
+214A          ; So #       PROPERTY LINE
+214C..214D    ; So #   [2] PER SIGN..AKTIESELSKAB
+214F          ; So #       SYMBOL FOR SAMARITAN SOURCE
+218A..218B    ; So #   [2] TURNED DIGIT TWO..TURNED DIGIT THREE
+2195..2199    ; So #   [5] UP DOWN ARROW..SOUTH WEST ARROW
+219C..219F    ; So #   [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW
+21A1..21A2    ; So #   [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL
+21A4..21A5    ; So #   [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR
+21A7..21AD    ; So #   [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW
+21AF..21CD    ; So #  [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE
+21D0..21D1    ; So #   [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW
+21D3          ; So #       DOWNWARDS DOUBLE ARROW
+21D5..21F3    ; So #  [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW
+2300..2307    ; So #   [8] DIAMETER SIGN..WAVY LINE
+230C..231F    ; So #  [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER
+2322..2328    ; So #   [7] FROWN..KEYBOARD
+232B..237B    ; So #  [81] ERASE TO THE LEFT..NOT CHECK MARK
+237D..239A    ; So #  [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL
+23B4..23DB    ; So #  [40] TOP SQUARE BRACKET..FUSE
+23E2..2426    ; So #  [69] WHITE TRAPEZIUM..SYMBOL FOR SUBSTITUTE FORM TWO
+2440..244A    ; So #  [11] OCR HOOK..OCR DOUBLE BACKSLASH
+249C..24E9    ; So #  [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
+2500..25B6    ; So # [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE
+25B8..25C0    ; So #   [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE
+25C2..25F7    ; So #  [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT
+2600..266E    ; So # [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN
+2670..2767    ; So # [248] WEST SYRIAC CROSS..ROTATED FLORAL HEART BULLET
+2794..27BF    ; So #  [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP
+2800..28FF    ; So # [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678
+2B00..2B2F    ; So #  [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE
+2B45..2B46    ; So #   [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW
+2B4D..2B73    ; So #  [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR
+2B76..2B95    ; So #  [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW
+2B97..2BFF    ; So # [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL
+2CE5..2CEA    ; So #   [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA
+2E50..2E51    ; So #   [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR
+2E80..2E99    ; So #  [26] CJK RADICAL REPEAT..CJK RADICAL RAP
+2E9B..2EF3    ; So #  [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
+2F00..2FD5    ; So # [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
+2FF0..2FFB    ; So #  [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
+3004          ; So #       JAPANESE INDUSTRIAL STANDARD SYMBOL
+3012..3013    ; So #   [2] POSTAL MARK..GETA MARK
+3020          ; So #       POSTAL MARK FACE
+3036..3037    ; So #   [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL
+303E..303F    ; So #   [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE
+3190..3191    ; So #   [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK
+3196..319F    ; So #  [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
+31C0..31E3    ; So #  [36] CJK STROKE T..CJK STROKE Q
+3200..321E    ; So #  [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
+322A..3247    ; So #  [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO
+3250          ; So #       PARTNERSHIP SIGN
+3260..327F    ; So #  [32] CIRCLED HANGUL KIYEOK..KOREAN STANDARD SYMBOL
+328A..32B0    ; So #  [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT
+32C0..33FF    ; So # [320] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..SQUARE GAL
+4DC0..4DFF    ; So #  [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION
+A490..A4C6    ; So #  [55] YI RADICAL QOT..YI RADICAL KE
+A828..A82B    ; So #   [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4
+A836..A837    ; So #   [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK
+A839          ; So #       NORTH INDIC QUANTITY MARK
+AA77..AA79    ; So #   [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO
+FDFD          ; So #       ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
+FFE4          ; So #       FULLWIDTH BROKEN BAR
+FFE8          ; So #       HALFWIDTH FORMS LIGHT VERTICAL
+FFED..FFEE    ; So #   [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE
+FFFC..FFFD    ; So #   [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
+10137..1013F  ; So #   [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT
+10179..10189  ; So #  [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN
+1018C..1018E  ; So #   [3] GREEK SINUSOID SIGN..NOMISMA SIGN
+10190..1019C  ; So #  [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL
+101A0         ; So #       GREEK SYMBOL TAU RHO
+101D0..101FC  ; So #  [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
+10877..10878  ; So #   [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON
+10AC8         ; So #       MANICHAEAN SIGN UD
+1173F         ; So #       AHOM SYMBOL VI
+11FD5..11FDC  ; So #   [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI
+11FE1..11FF1  ; So #  [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA
+16B3C..16B3F  ; So #   [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB
+16B45         ; So #       PAHAWH HMONG SIGN CIM TSOV ROG
+1BC9C         ; So #       DUPLOYAN SIGN O WITH CROSS
+1D000..1D0F5  ; So # [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
+1D100..1D126  ; So #  [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
+1D129..1D164  ; So #  [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
+1D16A..1D16C  ; So #   [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3
+1D183..1D184  ; So #   [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN
+1D18C..1D1A9  ; So #  [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH
+1D1AE..1D1E8  ; So #  [59] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KIEVAN FLAT SIGN
+1D200..1D241  ; So #  [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54
+1D245         ; So #       GREEK MUSICAL LEIMMA
+1D300..1D356  ; So #  [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
+1D800..1D9FF  ; So # [512] SIGNWRITING HAND-FIST INDEX..SIGNWRITING HEAD
+1DA37..1DA3A  ; So #   [4] SIGNWRITING AIR BLOW SMALL ROTATIONS..SIGNWRITING BREATH EXHALE
+1DA6D..1DA74  ; So #   [8] SIGNWRITING SHOULDER HIP SPINE..SIGNWRITING TORSO-FLOORPLANE TWISTING
+1DA76..1DA83  ; So #  [14] SIGNWRITING LIMB COMBINATION..SIGNWRITING LOCATION DEPTH
+1DA85..1DA86  ; So #   [2] SIGNWRITING LOCATION TORSO..SIGNWRITING LOCATION LIMBS DIGITS
+1E14F         ; So #       NYIAKENG PUACHUE HMONG CIRCLED CA
+1ECAC         ; So #       INDIC SIYAQ PLACEHOLDER
+1ED2E         ; So #       OTTOMAN SIYAQ MARRATAN
+1F000..1F02B  ; So #  [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK
+1F030..1F093  ; So # [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06
+1F0A0..1F0AE  ; So #  [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES
+1F0B1..1F0BF  ; So #  [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER
+1F0C1..1F0CF  ; So #  [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER
+1F0D1..1F0F5  ; So #  [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21
+1F10D..1F1AD  ; So # [161] CIRCLED ZERO WITH SLASH..MASK WORK SYMBOL
+1F1E6..1F202  ; So #  [29] REGIONAL INDICATOR SYMBOL LETTER A..SQUARED KATAKANA SA
+1F210..1F23B  ; So #  [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
+1F240..1F248  ; So #   [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
+1F250..1F251  ; So #   [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
+1F260..1F265  ; So #   [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI
+1F300..1F3FA  ; So # [251] CYCLONE..AMPHORA
+1F400..1F6D7  ; So # [728] RAT..ELEVATOR
+1F6E0..1F6EC  ; So #  [13] HAMMER AND WRENCH..AIRPLANE ARRIVING
+1F6F0..1F6FC  ; So #  [13] SATELLITE..ROLLER SKATE
+1F700..1F773  ; So # [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
+1F780..1F7D8  ; So #  [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE
+1F7E0..1F7EB  ; So #  [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE
+1F800..1F80B  ; So #  [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
+1F810..1F847  ; So #  [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW
+1F850..1F859  ; So #  [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
+1F860..1F887  ; So #  [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
+1F890..1F8AD  ; So #  [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
+1F8B0..1F8B1  ; So #   [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
+1F900..1F978  ; So # [121] CIRCLED CROSS FORMEE WITH FOUR DOTS..DISGUISED FACE
+1F97A..1F9CB  ; So #  [82] FACE WITH PLEADING EYES..BUBBLE TEA
+1F9CD..1FA53  ; So # [135] STANDING PERSON..BLACK CHESS KNIGHT-BISHOP
+1FA60..1FA6D  ; So #  [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
+1FA70..1FA74  ; So #   [5] BALLET SHOES..THONG SANDAL
+1FA78..1FA7A  ; So #   [3] DROP OF BLOOD..STETHOSCOPE
+1FA80..1FA86  ; So #   [7] YO-YO..NESTING DOLLS
+1FA90..1FAA8  ; So #  [25] RINGED PLANET..ROCK
+1FAB0..1FAB6  ; So #   [7] FLY..FEATHER
+1FAC0..1FAC2  ; So #   [3] ANATOMICAL HEART..PEOPLE HUGGING
+1FAD0..1FAD6  ; So #   [7] BLUEBERRIES..TEAPOT
+1FB00..1FB92  ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
+1FB94..1FBCA  ; So #  [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
+
+# Total code points: 6431
+
+# ================================================
+
+# General_Category=Initial_Punctuation
+
+00AB          ; Pi #       LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+2018          ; Pi #       LEFT SINGLE QUOTATION MARK
+201B..201C    ; Pi #   [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK
+201F          ; Pi #       DOUBLE HIGH-REVERSED-9 QUOTATION MARK
+2039          ; Pi #       SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+2E02          ; Pi #       LEFT SUBSTITUTION BRACKET
+2E04          ; Pi #       LEFT DOTTED SUBSTITUTION BRACKET
+2E09          ; Pi #       LEFT TRANSPOSITION BRACKET
+2E0C          ; Pi #       LEFT RAISED OMISSION BRACKET
+2E1C          ; Pi #       LEFT LOW PARAPHRASE BRACKET
+2E20          ; Pi #       LEFT VERTICAL BAR WITH QUILL
+
+# Total code points: 12
+
+# ================================================
+
+# General_Category=Final_Punctuation
+
+00BB          ; Pf #       RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+2019          ; Pf #       RIGHT SINGLE QUOTATION MARK
+201D          ; Pf #       RIGHT DOUBLE QUOTATION MARK
+203A          ; Pf #       SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+2E03          ; Pf #       RIGHT SUBSTITUTION BRACKET
+2E05          ; Pf #       RIGHT DOTTED SUBSTITUTION BRACKET
+2E0A          ; Pf #       RIGHT TRANSPOSITION BRACKET
+2E0D          ; Pf #       RIGHT RAISED OMISSION BRACKET
+2E1D          ; Pf #       RIGHT LOW PARAPHRASE BRACKET
+2E21          ; Pf #       RIGHT VERTICAL BAR WITH QUILL
+
+# Total code points: 10
+
+# EOF

+ 56 - 0
markdown.mod/md4c/src/CMakeLists.txt

@@ -0,0 +1,56 @@
+
+set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS 1)
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DDEBUG")
+
+
+# Build rules for MD4C parser library
+
+configure_file(md4c.pc.in md4c.pc @ONLY)
+add_library(md4c md4c.c md4c.h)
+if(CMAKE_C_COMPILER_ID MATCHES "Clang|GNU")
+    target_compile_options(md4c PRIVATE -Wall -Wextra)
+endif()
+set_target_properties(md4c PROPERTIES
+    COMPILE_FLAGS "-DMD4C_USE_UTF8"
+    VERSION ${MD_VERSION}
+    SOVERSION ${MD_VERSION_MAJOR}
+    PUBLIC_HEADER md4c.h
+)
+
+# Build rules for HTML renderer library
+
+configure_file(md4c-html.pc.in md4c-html.pc @ONLY)
+add_library(md4c-html md4c-html.c md4c-html.h entity.c entity.h)
+set_target_properties(md4c-html PROPERTIES
+    VERSION ${MD_VERSION}
+    SOVERSION ${MD_VERSION_MAJOR}
+    PUBLIC_HEADER md4c-html.h
+)
+target_link_libraries(md4c-html md4c)
+
+
+# Install rules
+
+install(
+    TARGETS md4c
+    EXPORT md4cConfig
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+    PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+    INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+)
+install(FILES ${CMAKE_BINARY_DIR}/src/md4c.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
+
+install(
+    TARGETS md4c-html
+    EXPORT md4cConfig
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+    PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+)
+install(FILES ${CMAKE_BINARY_DIR}/src/md4c-html.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
+
+install(EXPORT md4cConfig DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/md4c/ NAMESPACE md4c::)
+

+ 2190 - 0
markdown.mod/md4c/src/entity.c

@@ -0,0 +1,2190 @@
+/*
+ * MD4C: Markdown parser for C
+ * (http://github.com/mity/md4c)
+ *
+ * Copyright (c) 2016-2017 Martin Mitas
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "entity.h"
+#include <string.h>
+
+
+/* The table is generated from https://html.spec.whatwg.org/entities.json */
+static const struct entity entity_table[] = {
+    { "&AElig;", { 198, 0 } },
+    { "&AMP;", { 38, 0 } },
+    { "&Aacute;", { 193, 0 } },
+    { "&Abreve;", { 258, 0 } },
+    { "&Acirc;", { 194, 0 } },
+    { "&Acy;", { 1040, 0 } },
+    { "&Afr;", { 120068, 0 } },
+    { "&Agrave;", { 192, 0 } },
+    { "&Alpha;", { 913, 0 } },
+    { "&Amacr;", { 256, 0 } },
+    { "&And;", { 10835, 0 } },
+    { "&Aogon;", { 260, 0 } },
+    { "&Aopf;", { 120120, 0 } },
+    { "&ApplyFunction;", { 8289, 0 } },
+    { "&Aring;", { 197, 0 } },
+    { "&Ascr;", { 119964, 0 } },
+    { "&Assign;", { 8788, 0 } },
+    { "&Atilde;", { 195, 0 } },
+    { "&Auml;", { 196, 0 } },
+    { "&Backslash;", { 8726, 0 } },
+    { "&Barv;", { 10983, 0 } },
+    { "&Barwed;", { 8966, 0 } },
+    { "&Bcy;", { 1041, 0 } },
+    { "&Because;", { 8757, 0 } },
+    { "&Bernoullis;", { 8492, 0 } },
+    { "&Beta;", { 914, 0 } },
+    { "&Bfr;", { 120069, 0 } },
+    { "&Bopf;", { 120121, 0 } },
+    { "&Breve;", { 728, 0 } },
+    { "&Bscr;", { 8492, 0 } },
+    { "&Bumpeq;", { 8782, 0 } },
+    { "&CHcy;", { 1063, 0 } },
+    { "&COPY;", { 169, 0 } },
+    { "&Cacute;", { 262, 0 } },
+    { "&Cap;", { 8914, 0 } },
+    { "&CapitalDifferentialD;", { 8517, 0 } },
+    { "&Cayleys;", { 8493, 0 } },
+    { "&Ccaron;", { 268, 0 } },
+    { "&Ccedil;", { 199, 0 } },
+    { "&Ccirc;", { 264, 0 } },
+    { "&Cconint;", { 8752, 0 } },
+    { "&Cdot;", { 266, 0 } },
+    { "&Cedilla;", { 184, 0 } },
+    { "&CenterDot;", { 183, 0 } },
+    { "&Cfr;", { 8493, 0 } },
+    { "&Chi;", { 935, 0 } },
+    { "&CircleDot;", { 8857, 0 } },
+    { "&CircleMinus;", { 8854, 0 } },
+    { "&CirclePlus;", { 8853, 0 } },
+    { "&CircleTimes;", { 8855, 0 } },
+    { "&ClockwiseContourIntegral;", { 8754, 0 } },
+    { "&CloseCurlyDoubleQuote;", { 8221, 0 } },
+    { "&CloseCurlyQuote;", { 8217, 0 } },
+    { "&Colon;", { 8759, 0 } },
+    { "&Colone;", { 10868, 0 } },
+    { "&Congruent;", { 8801, 0 } },
+    { "&Conint;", { 8751, 0 } },
+    { "&ContourIntegral;", { 8750, 0 } },
+    { "&Copf;", { 8450, 0 } },
+    { "&Coproduct;", { 8720, 0 } },
+    { "&CounterClockwiseContourIntegral;", { 8755, 0 } },
+    { "&Cross;", { 10799, 0 } },
+    { "&Cscr;", { 119966, 0 } },
+    { "&Cup;", { 8915, 0 } },
+    { "&CupCap;", { 8781, 0 } },
+    { "&DD;", { 8517, 0 } },
+    { "&DDotrahd;", { 10513, 0 } },
+    { "&DJcy;", { 1026, 0 } },
+    { "&DScy;", { 1029, 0 } },
+    { "&DZcy;", { 1039, 0 } },
+    { "&Dagger;", { 8225, 0 } },
+    { "&Darr;", { 8609, 0 } },
+    { "&Dashv;", { 10980, 0 } },
+    { "&Dcaron;", { 270, 0 } },
+    { "&Dcy;", { 1044, 0 } },
+    { "&Del;", { 8711, 0 } },
+    { "&Delta;", { 916, 0 } },
+    { "&Dfr;", { 120071, 0 } },
+    { "&DiacriticalAcute;", { 180, 0 } },
+    { "&DiacriticalDot;", { 729, 0 } },
+    { "&DiacriticalDoubleAcute;", { 733, 0 } },
+    { "&DiacriticalGrave;", { 96, 0 } },
+    { "&DiacriticalTilde;", { 732, 0 } },
+    { "&Diamond;", { 8900, 0 } },
+    { "&DifferentialD;", { 8518, 0 } },
+    { "&Dopf;", { 120123, 0 } },
+    { "&Dot;", { 168, 0 } },
+    { "&DotDot;", { 8412, 0 } },
+    { "&DotEqual;", { 8784, 0 } },
+    { "&DoubleContourIntegral;", { 8751, 0 } },
+    { "&DoubleDot;", { 168, 0 } },
+    { "&DoubleDownArrow;", { 8659, 0 } },
+    { "&DoubleLeftArrow;", { 8656, 0 } },
+    { "&DoubleLeftRightArrow;", { 8660, 0 } },
+    { "&DoubleLeftTee;", { 10980, 0 } },
+    { "&DoubleLongLeftArrow;", { 10232, 0 } },
+    { "&DoubleLongLeftRightArrow;", { 10234, 0 } },
+    { "&DoubleLongRightArrow;", { 10233, 0 } },
+    { "&DoubleRightArrow;", { 8658, 0 } },
+    { "&DoubleRightTee;", { 8872, 0 } },
+    { "&DoubleUpArrow;", { 8657, 0 } },
+    { "&DoubleUpDownArrow;", { 8661, 0 } },
+    { "&DoubleVerticalBar;", { 8741, 0 } },
+    { "&DownArrow;", { 8595, 0 } },
+    { "&DownArrowBar;", { 10515, 0 } },
+    { "&DownArrowUpArrow;", { 8693, 0 } },
+    { "&DownBreve;", { 785, 0 } },
+    { "&DownLeftRightVector;", { 10576, 0 } },
+    { "&DownLeftTeeVector;", { 10590, 0 } },
+    { "&DownLeftVector;", { 8637, 0 } },
+    { "&DownLeftVectorBar;", { 10582, 0 } },
+    { "&DownRightTeeVector;", { 10591, 0 } },
+    { "&DownRightVector;", { 8641, 0 } },
+    { "&DownRightVectorBar;", { 10583, 0 } },
+    { "&DownTee;", { 8868, 0 } },
+    { "&DownTeeArrow;", { 8615, 0 } },
+    { "&Downarrow;", { 8659, 0 } },
+    { "&Dscr;", { 119967, 0 } },
+    { "&Dstrok;", { 272, 0 } },
+    { "&ENG;", { 330, 0 } },
+    { "&ETH;", { 208, 0 } },
+    { "&Eacute;", { 201, 0 } },
+    { "&Ecaron;", { 282, 0 } },
+    { "&Ecirc;", { 202, 0 } },
+    { "&Ecy;", { 1069, 0 } },
+    { "&Edot;", { 278, 0 } },
+    { "&Efr;", { 120072, 0 } },
+    { "&Egrave;", { 200, 0 } },
+    { "&Element;", { 8712, 0 } },
+    { "&Emacr;", { 274, 0 } },
+    { "&EmptySmallSquare;", { 9723, 0 } },
+    { "&EmptyVerySmallSquare;", { 9643, 0 } },
+    { "&Eogon;", { 280, 0 } },
+    { "&Eopf;", { 120124, 0 } },
+    { "&Epsilon;", { 917, 0 } },
+    { "&Equal;", { 10869, 0 } },
+    { "&EqualTilde;", { 8770, 0 } },
+    { "&Equilibrium;", { 8652, 0 } },
+    { "&Escr;", { 8496, 0 } },
+    { "&Esim;", { 10867, 0 } },
+    { "&Eta;", { 919, 0 } },
+    { "&Euml;", { 203, 0 } },
+    { "&Exists;", { 8707, 0 } },
+    { "&ExponentialE;", { 8519, 0 } },
+    { "&Fcy;", { 1060, 0 } },
+    { "&Ffr;", { 120073, 0 } },
+    { "&FilledSmallSquare;", { 9724, 0 } },
+    { "&FilledVerySmallSquare;", { 9642, 0 } },
+    { "&Fopf;", { 120125, 0 } },
+    { "&ForAll;", { 8704, 0 } },
+    { "&Fouriertrf;", { 8497, 0 } },
+    { "&Fscr;", { 8497, 0 } },
+    { "&GJcy;", { 1027, 0 } },
+    { "&GT;", { 62, 0 } },
+    { "&Gamma;", { 915, 0 } },
+    { "&Gammad;", { 988, 0 } },
+    { "&Gbreve;", { 286, 0 } },
+    { "&Gcedil;", { 290, 0 } },
+    { "&Gcirc;", { 284, 0 } },
+    { "&Gcy;", { 1043, 0 } },
+    { "&Gdot;", { 288, 0 } },
+    { "&Gfr;", { 120074, 0 } },
+    { "&Gg;", { 8921, 0 } },
+    { "&Gopf;", { 120126, 0 } },
+    { "&GreaterEqual;", { 8805, 0 } },
+    { "&GreaterEqualLess;", { 8923, 0 } },
+    { "&GreaterFullEqual;", { 8807, 0 } },
+    { "&GreaterGreater;", { 10914, 0 } },
+    { "&GreaterLess;", { 8823, 0 } },
+    { "&GreaterSlantEqual;", { 10878, 0 } },
+    { "&GreaterTilde;", { 8819, 0 } },
+    { "&Gscr;", { 119970, 0 } },
+    { "&Gt;", { 8811, 0 } },
+    { "&HARDcy;", { 1066, 0 } },
+    { "&Hacek;", { 711, 0 } },
+    { "&Hat;", { 94, 0 } },
+    { "&Hcirc;", { 292, 0 } },
+    { "&Hfr;", { 8460, 0 } },
+    { "&HilbertSpace;", { 8459, 0 } },
+    { "&Hopf;", { 8461, 0 } },
+    { "&HorizontalLine;", { 9472, 0 } },
+    { "&Hscr;", { 8459, 0 } },
+    { "&Hstrok;", { 294, 0 } },
+    { "&HumpDownHump;", { 8782, 0 } },
+    { "&HumpEqual;", { 8783, 0 } },
+    { "&IEcy;", { 1045, 0 } },
+    { "&IJlig;", { 306, 0 } },
+    { "&IOcy;", { 1025, 0 } },
+    { "&Iacute;", { 205, 0 } },
+    { "&Icirc;", { 206, 0 } },
+    { "&Icy;", { 1048, 0 } },
+    { "&Idot;", { 304, 0 } },
+    { "&Ifr;", { 8465, 0 } },
+    { "&Igrave;", { 204, 0 } },
+    { "&Im;", { 8465, 0 } },
+    { "&Imacr;", { 298, 0 } },
+    { "&ImaginaryI;", { 8520, 0 } },
+    { "&Implies;", { 8658, 0 } },
+    { "&Int;", { 8748, 0 } },
+    { "&Integral;", { 8747, 0 } },
+    { "&Intersection;", { 8898, 0 } },
+    { "&InvisibleComma;", { 8291, 0 } },
+    { "&InvisibleTimes;", { 8290, 0 } },
+    { "&Iogon;", { 302, 0 } },
+    { "&Iopf;", { 120128, 0 } },
+    { "&Iota;", { 921, 0 } },
+    { "&Iscr;", { 8464, 0 } },
+    { "&Itilde;", { 296, 0 } },
+    { "&Iukcy;", { 1030, 0 } },
+    { "&Iuml;", { 207, 0 } },
+    { "&Jcirc;", { 308, 0 } },
+    { "&Jcy;", { 1049, 0 } },
+    { "&Jfr;", { 120077, 0 } },
+    { "&Jopf;", { 120129, 0 } },
+    { "&Jscr;", { 119973, 0 } },
+    { "&Jsercy;", { 1032, 0 } },
+    { "&Jukcy;", { 1028, 0 } },
+    { "&KHcy;", { 1061, 0 } },
+    { "&KJcy;", { 1036, 0 } },
+    { "&Kappa;", { 922, 0 } },
+    { "&Kcedil;", { 310, 0 } },
+    { "&Kcy;", { 1050, 0 } },
+    { "&Kfr;", { 120078, 0 } },
+    { "&Kopf;", { 120130, 0 } },
+    { "&Kscr;", { 119974, 0 } },
+    { "&LJcy;", { 1033, 0 } },
+    { "&LT;", { 60, 0 } },
+    { "&Lacute;", { 313, 0 } },
+    { "&Lambda;", { 923, 0 } },
+    { "&Lang;", { 10218, 0 } },
+    { "&Laplacetrf;", { 8466, 0 } },
+    { "&Larr;", { 8606, 0 } },
+    { "&Lcaron;", { 317, 0 } },
+    { "&Lcedil;", { 315, 0 } },
+    { "&Lcy;", { 1051, 0 } },
+    { "&LeftAngleBracket;", { 10216, 0 } },
+    { "&LeftArrow;", { 8592, 0 } },
+    { "&LeftArrowBar;", { 8676, 0 } },
+    { "&LeftArrowRightArrow;", { 8646, 0 } },
+    { "&LeftCeiling;", { 8968, 0 } },
+    { "&LeftDoubleBracket;", { 10214, 0 } },
+    { "&LeftDownTeeVector;", { 10593, 0 } },
+    { "&LeftDownVector;", { 8643, 0 } },
+    { "&LeftDownVectorBar;", { 10585, 0 } },
+    { "&LeftFloor;", { 8970, 0 } },
+    { "&LeftRightArrow;", { 8596, 0 } },
+    { "&LeftRightVector;", { 10574, 0 } },
+    { "&LeftTee;", { 8867, 0 } },
+    { "&LeftTeeArrow;", { 8612, 0 } },
+    { "&LeftTeeVector;", { 10586, 0 } },
+    { "&LeftTriangle;", { 8882, 0 } },
+    { "&LeftTriangleBar;", { 10703, 0 } },
+    { "&LeftTriangleEqual;", { 8884, 0 } },
+    { "&LeftUpDownVector;", { 10577, 0 } },
+    { "&LeftUpTeeVector;", { 10592, 0 } },
+    { "&LeftUpVector;", { 8639, 0 } },
+    { "&LeftUpVectorBar;", { 10584, 0 } },
+    { "&LeftVector;", { 8636, 0 } },
+    { "&LeftVectorBar;", { 10578, 0 } },
+    { "&Leftarrow;", { 8656, 0 } },
+    { "&Leftrightarrow;", { 8660, 0 } },
+    { "&LessEqualGreater;", { 8922, 0 } },
+    { "&LessFullEqual;", { 8806, 0 } },
+    { "&LessGreater;", { 8822, 0 } },
+    { "&LessLess;", { 10913, 0 } },
+    { "&LessSlantEqual;", { 10877, 0 } },
+    { "&LessTilde;", { 8818, 0 } },
+    { "&Lfr;", { 120079, 0 } },
+    { "&Ll;", { 8920, 0 } },
+    { "&Lleftarrow;", { 8666, 0 } },
+    { "&Lmidot;", { 319, 0 } },
+    { "&LongLeftArrow;", { 10229, 0 } },
+    { "&LongLeftRightArrow;", { 10231, 0 } },
+    { "&LongRightArrow;", { 10230, 0 } },
+    { "&Longleftarrow;", { 10232, 0 } },
+    { "&Longleftrightarrow;", { 10234, 0 } },
+    { "&Longrightarrow;", { 10233, 0 } },
+    { "&Lopf;", { 120131, 0 } },
+    { "&LowerLeftArrow;", { 8601, 0 } },
+    { "&LowerRightArrow;", { 8600, 0 } },
+    { "&Lscr;", { 8466, 0 } },
+    { "&Lsh;", { 8624, 0 } },
+    { "&Lstrok;", { 321, 0 } },
+    { "&Lt;", { 8810, 0 } },
+    { "&Map;", { 10501, 0 } },
+    { "&Mcy;", { 1052, 0 } },
+    { "&MediumSpace;", { 8287, 0 } },
+    { "&Mellintrf;", { 8499, 0 } },
+    { "&Mfr;", { 120080, 0 } },
+    { "&MinusPlus;", { 8723, 0 } },
+    { "&Mopf;", { 120132, 0 } },
+    { "&Mscr;", { 8499, 0 } },
+    { "&Mu;", { 924, 0 } },
+    { "&NJcy;", { 1034, 0 } },
+    { "&Nacute;", { 323, 0 } },
+    { "&Ncaron;", { 327, 0 } },
+    { "&Ncedil;", { 325, 0 } },
+    { "&Ncy;", { 1053, 0 } },
+    { "&NegativeMediumSpace;", { 8203, 0 } },
+    { "&NegativeThickSpace;", { 8203, 0 } },
+    { "&NegativeThinSpace;", { 8203, 0 } },
+    { "&NegativeVeryThinSpace;", { 8203, 0 } },
+    { "&NestedGreaterGreater;", { 8811, 0 } },
+    { "&NestedLessLess;", { 8810, 0 } },
+    { "&NewLine;", { 10, 0 } },
+    { "&Nfr;", { 120081, 0 } },
+    { "&NoBreak;", { 8288, 0 } },
+    { "&NonBreakingSpace;", { 160, 0 } },
+    { "&Nopf;", { 8469, 0 } },
+    { "&Not;", { 10988, 0 } },
+    { "&NotCongruent;", { 8802, 0 } },
+    { "&NotCupCap;", { 8813, 0 } },
+    { "&NotDoubleVerticalBar;", { 8742, 0 } },
+    { "&NotElement;", { 8713, 0 } },
+    { "&NotEqual;", { 8800, 0 } },
+    { "&NotEqualTilde;", { 8770, 824 } },
+    { "&NotExists;", { 8708, 0 } },
+    { "&NotGreater;", { 8815, 0 } },
+    { "&NotGreaterEqual;", { 8817, 0 } },
+    { "&NotGreaterFullEqual;", { 8807, 824 } },
+    { "&NotGreaterGreater;", { 8811, 824 } },
+    { "&NotGreaterLess;", { 8825, 0 } },
+    { "&NotGreaterSlantEqual;", { 10878, 824 } },
+    { "&NotGreaterTilde;", { 8821, 0 } },
+    { "&NotHumpDownHump;", { 8782, 824 } },
+    { "&NotHumpEqual;", { 8783, 824 } },
+    { "&NotLeftTriangle;", { 8938, 0 } },
+    { "&NotLeftTriangleBar;", { 10703, 824 } },
+    { "&NotLeftTriangleEqual;", { 8940, 0 } },
+    { "&NotLess;", { 8814, 0 } },
+    { "&NotLessEqual;", { 8816, 0 } },
+    { "&NotLessGreater;", { 8824, 0 } },
+    { "&NotLessLess;", { 8810, 824 } },
+    { "&NotLessSlantEqual;", { 10877, 824 } },
+    { "&NotLessTilde;", { 8820, 0 } },
+    { "&NotNestedGreaterGreater;", { 10914, 824 } },
+    { "&NotNestedLessLess;", { 10913, 824 } },
+    { "&NotPrecedes;", { 8832, 0 } },
+    { "&NotPrecedesEqual;", { 10927, 824 } },
+    { "&NotPrecedesSlantEqual;", { 8928, 0 } },
+    { "&NotReverseElement;", { 8716, 0 } },
+    { "&NotRightTriangle;", { 8939, 0 } },
+    { "&NotRightTriangleBar;", { 10704, 824 } },
+    { "&NotRightTriangleEqual;", { 8941, 0 } },
+    { "&NotSquareSubset;", { 8847, 824 } },
+    { "&NotSquareSubsetEqual;", { 8930, 0 } },
+    { "&NotSquareSuperset;", { 8848, 824 } },
+    { "&NotSquareSupersetEqual;", { 8931, 0 } },
+    { "&NotSubset;", { 8834, 8402 } },
+    { "&NotSubsetEqual;", { 8840, 0 } },
+    { "&NotSucceeds;", { 8833, 0 } },
+    { "&NotSucceedsEqual;", { 10928, 824 } },
+    { "&NotSucceedsSlantEqual;", { 8929, 0 } },
+    { "&NotSucceedsTilde;", { 8831, 824 } },
+    { "&NotSuperset;", { 8835, 8402 } },
+    { "&NotSupersetEqual;", { 8841, 0 } },
+    { "&NotTilde;", { 8769, 0 } },
+    { "&NotTildeEqual;", { 8772, 0 } },
+    { "&NotTildeFullEqual;", { 8775, 0 } },
+    { "&NotTildeTilde;", { 8777, 0 } },
+    { "&NotVerticalBar;", { 8740, 0 } },
+    { "&Nscr;", { 119977, 0 } },
+    { "&Ntilde;", { 209, 0 } },
+    { "&Nu;", { 925, 0 } },
+    { "&OElig;", { 338, 0 } },
+    { "&Oacute;", { 211, 0 } },
+    { "&Ocirc;", { 212, 0 } },
+    { "&Ocy;", { 1054, 0 } },
+    { "&Odblac;", { 336, 0 } },
+    { "&Ofr;", { 120082, 0 } },
+    { "&Ograve;", { 210, 0 } },
+    { "&Omacr;", { 332, 0 } },
+    { "&Omega;", { 937, 0 } },
+    { "&Omicron;", { 927, 0 } },
+    { "&Oopf;", { 120134, 0 } },
+    { "&OpenCurlyDoubleQuote;", { 8220, 0 } },
+    { "&OpenCurlyQuote;", { 8216, 0 } },
+    { "&Or;", { 10836, 0 } },
+    { "&Oscr;", { 119978, 0 } },
+    { "&Oslash;", { 216, 0 } },
+    { "&Otilde;", { 213, 0 } },
+    { "&Otimes;", { 10807, 0 } },
+    { "&Ouml;", { 214, 0 } },
+    { "&OverBar;", { 8254, 0 } },
+    { "&OverBrace;", { 9182, 0 } },
+    { "&OverBracket;", { 9140, 0 } },
+    { "&OverParenthesis;", { 9180, 0 } },
+    { "&PartialD;", { 8706, 0 } },
+    { "&Pcy;", { 1055, 0 } },
+    { "&Pfr;", { 120083, 0 } },
+    { "&Phi;", { 934, 0 } },
+    { "&Pi;", { 928, 0 } },
+    { "&PlusMinus;", { 177, 0 } },
+    { "&Poincareplane;", { 8460, 0 } },
+    { "&Popf;", { 8473, 0 } },
+    { "&Pr;", { 10939, 0 } },
+    { "&Precedes;", { 8826, 0 } },
+    { "&PrecedesEqual;", { 10927, 0 } },
+    { "&PrecedesSlantEqual;", { 8828, 0 } },
+    { "&PrecedesTilde;", { 8830, 0 } },
+    { "&Prime;", { 8243, 0 } },
+    { "&Product;", { 8719, 0 } },
+    { "&Proportion;", { 8759, 0 } },
+    { "&Proportional;", { 8733, 0 } },
+    { "&Pscr;", { 119979, 0 } },
+    { "&Psi;", { 936, 0 } },
+    { "&QUOT;", { 34, 0 } },
+    { "&Qfr;", { 120084, 0 } },
+    { "&Qopf;", { 8474, 0 } },
+    { "&Qscr;", { 119980, 0 } },
+    { "&RBarr;", { 10512, 0 } },
+    { "&REG;", { 174, 0 } },
+    { "&Racute;", { 340, 0 } },
+    { "&Rang;", { 10219, 0 } },
+    { "&Rarr;", { 8608, 0 } },
+    { "&Rarrtl;", { 10518, 0 } },
+    { "&Rcaron;", { 344, 0 } },
+    { "&Rcedil;", { 342, 0 } },
+    { "&Rcy;", { 1056, 0 } },
+    { "&Re;", { 8476, 0 } },
+    { "&ReverseElement;", { 8715, 0 } },
+    { "&ReverseEquilibrium;", { 8651, 0 } },
+    { "&ReverseUpEquilibrium;", { 10607, 0 } },
+    { "&Rfr;", { 8476, 0 } },
+    { "&Rho;", { 929, 0 } },
+    { "&RightAngleBracket;", { 10217, 0 } },
+    { "&RightArrow;", { 8594, 0 } },
+    { "&RightArrowBar;", { 8677, 0 } },
+    { "&RightArrowLeftArrow;", { 8644, 0 } },
+    { "&RightCeiling;", { 8969, 0 } },
+    { "&RightDoubleBracket;", { 10215, 0 } },
+    { "&RightDownTeeVector;", { 10589, 0 } },
+    { "&RightDownVector;", { 8642, 0 } },
+    { "&RightDownVectorBar;", { 10581, 0 } },
+    { "&RightFloor;", { 8971, 0 } },
+    { "&RightTee;", { 8866, 0 } },
+    { "&RightTeeArrow;", { 8614, 0 } },
+    { "&RightTeeVector;", { 10587, 0 } },
+    { "&RightTriangle;", { 8883, 0 } },
+    { "&RightTriangleBar;", { 10704, 0 } },
+    { "&RightTriangleEqual;", { 8885, 0 } },
+    { "&RightUpDownVector;", { 10575, 0 } },
+    { "&RightUpTeeVector;", { 10588, 0 } },
+    { "&RightUpVector;", { 8638, 0 } },
+    { "&RightUpVectorBar;", { 10580, 0 } },
+    { "&RightVector;", { 8640, 0 } },
+    { "&RightVectorBar;", { 10579, 0 } },
+    { "&Rightarrow;", { 8658, 0 } },
+    { "&Ropf;", { 8477, 0 } },
+    { "&RoundImplies;", { 10608, 0 } },
+    { "&Rrightarrow;", { 8667, 0 } },
+    { "&Rscr;", { 8475, 0 } },
+    { "&Rsh;", { 8625, 0 } },
+    { "&RuleDelayed;", { 10740, 0 } },
+    { "&SHCHcy;", { 1065, 0 } },
+    { "&SHcy;", { 1064, 0 } },
+    { "&SOFTcy;", { 1068, 0 } },
+    { "&Sacute;", { 346, 0 } },
+    { "&Sc;", { 10940, 0 } },
+    { "&Scaron;", { 352, 0 } },
+    { "&Scedil;", { 350, 0 } },
+    { "&Scirc;", { 348, 0 } },
+    { "&Scy;", { 1057, 0 } },
+    { "&Sfr;", { 120086, 0 } },
+    { "&ShortDownArrow;", { 8595, 0 } },
+    { "&ShortLeftArrow;", { 8592, 0 } },
+    { "&ShortRightArrow;", { 8594, 0 } },
+    { "&ShortUpArrow;", { 8593, 0 } },
+    { "&Sigma;", { 931, 0 } },
+    { "&SmallCircle;", { 8728, 0 } },
+    { "&Sopf;", { 120138, 0 } },
+    { "&Sqrt;", { 8730, 0 } },
+    { "&Square;", { 9633, 0 } },
+    { "&SquareIntersection;", { 8851, 0 } },
+    { "&SquareSubset;", { 8847, 0 } },
+    { "&SquareSubsetEqual;", { 8849, 0 } },
+    { "&SquareSuperset;", { 8848, 0 } },
+    { "&SquareSupersetEqual;", { 8850, 0 } },
+    { "&SquareUnion;", { 8852, 0 } },
+    { "&Sscr;", { 119982, 0 } },
+    { "&Star;", { 8902, 0 } },
+    { "&Sub;", { 8912, 0 } },
+    { "&Subset;", { 8912, 0 } },
+    { "&SubsetEqual;", { 8838, 0 } },
+    { "&Succeeds;", { 8827, 0 } },
+    { "&SucceedsEqual;", { 10928, 0 } },
+    { "&SucceedsSlantEqual;", { 8829, 0 } },
+    { "&SucceedsTilde;", { 8831, 0 } },
+    { "&SuchThat;", { 8715, 0 } },
+    { "&Sum;", { 8721, 0 } },
+    { "&Sup;", { 8913, 0 } },
+    { "&Superset;", { 8835, 0 } },
+    { "&SupersetEqual;", { 8839, 0 } },
+    { "&Supset;", { 8913, 0 } },
+    { "&THORN;", { 222, 0 } },
+    { "&TRADE;", { 8482, 0 } },
+    { "&TSHcy;", { 1035, 0 } },
+    { "&TScy;", { 1062, 0 } },
+    { "&Tab;", { 9, 0 } },
+    { "&Tau;", { 932, 0 } },
+    { "&Tcaron;", { 356, 0 } },
+    { "&Tcedil;", { 354, 0 } },
+    { "&Tcy;", { 1058, 0 } },
+    { "&Tfr;", { 120087, 0 } },
+    { "&Therefore;", { 8756, 0 } },
+    { "&Theta;", { 920, 0 } },
+    { "&ThickSpace;", { 8287, 8202 } },
+    { "&ThinSpace;", { 8201, 0 } },
+    { "&Tilde;", { 8764, 0 } },
+    { "&TildeEqual;", { 8771, 0 } },
+    { "&TildeFullEqual;", { 8773, 0 } },
+    { "&TildeTilde;", { 8776, 0 } },
+    { "&Topf;", { 120139, 0 } },
+    { "&TripleDot;", { 8411, 0 } },
+    { "&Tscr;", { 119983, 0 } },
+    { "&Tstrok;", { 358, 0 } },
+    { "&Uacute;", { 218, 0 } },
+    { "&Uarr;", { 8607, 0 } },
+    { "&Uarrocir;", { 10569, 0 } },
+    { "&Ubrcy;", { 1038, 0 } },
+    { "&Ubreve;", { 364, 0 } },
+    { "&Ucirc;", { 219, 0 } },
+    { "&Ucy;", { 1059, 0 } },
+    { "&Udblac;", { 368, 0 } },
+    { "&Ufr;", { 120088, 0 } },
+    { "&Ugrave;", { 217, 0 } },
+    { "&Umacr;", { 362, 0 } },
+    { "&UnderBar;", { 95, 0 } },
+    { "&UnderBrace;", { 9183, 0 } },
+    { "&UnderBracket;", { 9141, 0 } },
+    { "&UnderParenthesis;", { 9181, 0 } },
+    { "&Union;", { 8899, 0 } },
+    { "&UnionPlus;", { 8846, 0 } },
+    { "&Uogon;", { 370, 0 } },
+    { "&Uopf;", { 120140, 0 } },
+    { "&UpArrow;", { 8593, 0 } },
+    { "&UpArrowBar;", { 10514, 0 } },
+    { "&UpArrowDownArrow;", { 8645, 0 } },
+    { "&UpDownArrow;", { 8597, 0 } },
+    { "&UpEquilibrium;", { 10606, 0 } },
+    { "&UpTee;", { 8869, 0 } },
+    { "&UpTeeArrow;", { 8613, 0 } },
+    { "&Uparrow;", { 8657, 0 } },
+    { "&Updownarrow;", { 8661, 0 } },
+    { "&UpperLeftArrow;", { 8598, 0 } },
+    { "&UpperRightArrow;", { 8599, 0 } },
+    { "&Upsi;", { 978, 0 } },
+    { "&Upsilon;", { 933, 0 } },
+    { "&Uring;", { 366, 0 } },
+    { "&Uscr;", { 119984, 0 } },
+    { "&Utilde;", { 360, 0 } },
+    { "&Uuml;", { 220, 0 } },
+    { "&VDash;", { 8875, 0 } },
+    { "&Vbar;", { 10987, 0 } },
+    { "&Vcy;", { 1042, 0 } },
+    { "&Vdash;", { 8873, 0 } },
+    { "&Vdashl;", { 10982, 0 } },
+    { "&Vee;", { 8897, 0 } },
+    { "&Verbar;", { 8214, 0 } },
+    { "&Vert;", { 8214, 0 } },
+    { "&VerticalBar;", { 8739, 0 } },
+    { "&VerticalLine;", { 124, 0 } },
+    { "&VerticalSeparator;", { 10072, 0 } },
+    { "&VerticalTilde;", { 8768, 0 } },
+    { "&VeryThinSpace;", { 8202, 0 } },
+    { "&Vfr;", { 120089, 0 } },
+    { "&Vopf;", { 120141, 0 } },
+    { "&Vscr;", { 119985, 0 } },
+    { "&Vvdash;", { 8874, 0 } },
+    { "&Wcirc;", { 372, 0 } },
+    { "&Wedge;", { 8896, 0 } },
+    { "&Wfr;", { 120090, 0 } },
+    { "&Wopf;", { 120142, 0 } },
+    { "&Wscr;", { 119986, 0 } },
+    { "&Xfr;", { 120091, 0 } },
+    { "&Xi;", { 926, 0 } },
+    { "&Xopf;", { 120143, 0 } },
+    { "&Xscr;", { 119987, 0 } },
+    { "&YAcy;", { 1071, 0 } },
+    { "&YIcy;", { 1031, 0 } },
+    { "&YUcy;", { 1070, 0 } },
+    { "&Yacute;", { 221, 0 } },
+    { "&Ycirc;", { 374, 0 } },
+    { "&Ycy;", { 1067, 0 } },
+    { "&Yfr;", { 120092, 0 } },
+    { "&Yopf;", { 120144, 0 } },
+    { "&Yscr;", { 119988, 0 } },
+    { "&Yuml;", { 376, 0 } },
+    { "&ZHcy;", { 1046, 0 } },
+    { "&Zacute;", { 377, 0 } },
+    { "&Zcaron;", { 381, 0 } },
+    { "&Zcy;", { 1047, 0 } },
+    { "&Zdot;", { 379, 0 } },
+    { "&ZeroWidthSpace;", { 8203, 0 } },
+    { "&Zeta;", { 918, 0 } },
+    { "&Zfr;", { 8488, 0 } },
+    { "&Zopf;", { 8484, 0 } },
+    { "&Zscr;", { 119989, 0 } },
+    { "&aacute;", { 225, 0 } },
+    { "&abreve;", { 259, 0 } },
+    { "&ac;", { 8766, 0 } },
+    { "&acE;", { 8766, 819 } },
+    { "&acd;", { 8767, 0 } },
+    { "&acirc;", { 226, 0 } },
+    { "&acute;", { 180, 0 } },
+    { "&acy;", { 1072, 0 } },
+    { "&aelig;", { 230, 0 } },
+    { "&af;", { 8289, 0 } },
+    { "&afr;", { 120094, 0 } },
+    { "&agrave;", { 224, 0 } },
+    { "&alefsym;", { 8501, 0 } },
+    { "&aleph;", { 8501, 0 } },
+    { "&alpha;", { 945, 0 } },
+    { "&amacr;", { 257, 0 } },
+    { "&amalg;", { 10815, 0 } },
+    { "&amp;", { 38, 0 } },
+    { "&and;", { 8743, 0 } },
+    { "&andand;", { 10837, 0 } },
+    { "&andd;", { 10844, 0 } },
+    { "&andslope;", { 10840, 0 } },
+    { "&andv;", { 10842, 0 } },
+    { "&ang;", { 8736, 0 } },
+    { "&ange;", { 10660, 0 } },
+    { "&angle;", { 8736, 0 } },
+    { "&angmsd;", { 8737, 0 } },
+    { "&angmsdaa;", { 10664, 0 } },
+    { "&angmsdab;", { 10665, 0 } },
+    { "&angmsdac;", { 10666, 0 } },
+    { "&angmsdad;", { 10667, 0 } },
+    { "&angmsdae;", { 10668, 0 } },
+    { "&angmsdaf;", { 10669, 0 } },
+    { "&angmsdag;", { 10670, 0 } },
+    { "&angmsdah;", { 10671, 0 } },
+    { "&angrt;", { 8735, 0 } },
+    { "&angrtvb;", { 8894, 0 } },
+    { "&angrtvbd;", { 10653, 0 } },
+    { "&angsph;", { 8738, 0 } },
+    { "&angst;", { 197, 0 } },
+    { "&angzarr;", { 9084, 0 } },
+    { "&aogon;", { 261, 0 } },
+    { "&aopf;", { 120146, 0 } },
+    { "&ap;", { 8776, 0 } },
+    { "&apE;", { 10864, 0 } },
+    { "&apacir;", { 10863, 0 } },
+    { "&ape;", { 8778, 0 } },
+    { "&apid;", { 8779, 0 } },
+    { "&apos;", { 39, 0 } },
+    { "&approx;", { 8776, 0 } },
+    { "&approxeq;", { 8778, 0 } },
+    { "&aring;", { 229, 0 } },
+    { "&ascr;", { 119990, 0 } },
+    { "&ast;", { 42, 0 } },
+    { "&asymp;", { 8776, 0 } },
+    { "&asympeq;", { 8781, 0 } },
+    { "&atilde;", { 227, 0 } },
+    { "&auml;", { 228, 0 } },
+    { "&awconint;", { 8755, 0 } },
+    { "&awint;", { 10769, 0 } },
+    { "&bNot;", { 10989, 0 } },
+    { "&backcong;", { 8780, 0 } },
+    { "&backepsilon;", { 1014, 0 } },
+    { "&backprime;", { 8245, 0 } },
+    { "&backsim;", { 8765, 0 } },
+    { "&backsimeq;", { 8909, 0 } },
+    { "&barvee;", { 8893, 0 } },
+    { "&barwed;", { 8965, 0 } },
+    { "&barwedge;", { 8965, 0 } },
+    { "&bbrk;", { 9141, 0 } },
+    { "&bbrktbrk;", { 9142, 0 } },
+    { "&bcong;", { 8780, 0 } },
+    { "&bcy;", { 1073, 0 } },
+    { "&bdquo;", { 8222, 0 } },
+    { "&becaus;", { 8757, 0 } },
+    { "&because;", { 8757, 0 } },
+    { "&bemptyv;", { 10672, 0 } },
+    { "&bepsi;", { 1014, 0 } },
+    { "&bernou;", { 8492, 0 } },
+    { "&beta;", { 946, 0 } },
+    { "&beth;", { 8502, 0 } },
+    { "&between;", { 8812, 0 } },
+    { "&bfr;", { 120095, 0 } },
+    { "&bigcap;", { 8898, 0 } },
+    { "&bigcirc;", { 9711, 0 } },
+    { "&bigcup;", { 8899, 0 } },
+    { "&bigodot;", { 10752, 0 } },
+    { "&bigoplus;", { 10753, 0 } },
+    { "&bigotimes;", { 10754, 0 } },
+    { "&bigsqcup;", { 10758, 0 } },
+    { "&bigstar;", { 9733, 0 } },
+    { "&bigtriangledown;", { 9661, 0 } },
+    { "&bigtriangleup;", { 9651, 0 } },
+    { "&biguplus;", { 10756, 0 } },
+    { "&bigvee;", { 8897, 0 } },
+    { "&bigwedge;", { 8896, 0 } },
+    { "&bkarow;", { 10509, 0 } },
+    { "&blacklozenge;", { 10731, 0 } },
+    { "&blacksquare;", { 9642, 0 } },
+    { "&blacktriangle;", { 9652, 0 } },
+    { "&blacktriangledown;", { 9662, 0 } },
+    { "&blacktriangleleft;", { 9666, 0 } },
+    { "&blacktriangleright;", { 9656, 0 } },
+    { "&blank;", { 9251, 0 } },
+    { "&blk12;", { 9618, 0 } },
+    { "&blk14;", { 9617, 0 } },
+    { "&blk34;", { 9619, 0 } },
+    { "&block;", { 9608, 0 } },
+    { "&bne;", { 61, 8421 } },
+    { "&bnequiv;", { 8801, 8421 } },
+    { "&bnot;", { 8976, 0 } },
+    { "&bopf;", { 120147, 0 } },
+    { "&bot;", { 8869, 0 } },
+    { "&bottom;", { 8869, 0 } },
+    { "&bowtie;", { 8904, 0 } },
+    { "&boxDL;", { 9559, 0 } },
+    { "&boxDR;", { 9556, 0 } },
+    { "&boxDl;", { 9558, 0 } },
+    { "&boxDr;", { 9555, 0 } },
+    { "&boxH;", { 9552, 0 } },
+    { "&boxHD;", { 9574, 0 } },
+    { "&boxHU;", { 9577, 0 } },
+    { "&boxHd;", { 9572, 0 } },
+    { "&boxHu;", { 9575, 0 } },
+    { "&boxUL;", { 9565, 0 } },
+    { "&boxUR;", { 9562, 0 } },
+    { "&boxUl;", { 9564, 0 } },
+    { "&boxUr;", { 9561, 0 } },
+    { "&boxV;", { 9553, 0 } },
+    { "&boxVH;", { 9580, 0 } },
+    { "&boxVL;", { 9571, 0 } },
+    { "&boxVR;", { 9568, 0 } },
+    { "&boxVh;", { 9579, 0 } },
+    { "&boxVl;", { 9570, 0 } },
+    { "&boxVr;", { 9567, 0 } },
+    { "&boxbox;", { 10697, 0 } },
+    { "&boxdL;", { 9557, 0 } },
+    { "&boxdR;", { 9554, 0 } },
+    { "&boxdl;", { 9488, 0 } },
+    { "&boxdr;", { 9484, 0 } },
+    { "&boxh;", { 9472, 0 } },
+    { "&boxhD;", { 9573, 0 } },
+    { "&boxhU;", { 9576, 0 } },
+    { "&boxhd;", { 9516, 0 } },
+    { "&boxhu;", { 9524, 0 } },
+    { "&boxminus;", { 8863, 0 } },
+    { "&boxplus;", { 8862, 0 } },
+    { "&boxtimes;", { 8864, 0 } },
+    { "&boxuL;", { 9563, 0 } },
+    { "&boxuR;", { 9560, 0 } },
+    { "&boxul;", { 9496, 0 } },
+    { "&boxur;", { 9492, 0 } },
+    { "&boxv;", { 9474, 0 } },
+    { "&boxvH;", { 9578, 0 } },
+    { "&boxvL;", { 9569, 0 } },
+    { "&boxvR;", { 9566, 0 } },
+    { "&boxvh;", { 9532, 0 } },
+    { "&boxvl;", { 9508, 0 } },
+    { "&boxvr;", { 9500, 0 } },
+    { "&bprime;", { 8245, 0 } },
+    { "&breve;", { 728, 0 } },
+    { "&brvbar;", { 166, 0 } },
+    { "&bscr;", { 119991, 0 } },
+    { "&bsemi;", { 8271, 0 } },
+    { "&bsim;", { 8765, 0 } },
+    { "&bsime;", { 8909, 0 } },
+    { "&bsol;", { 92, 0 } },
+    { "&bsolb;", { 10693, 0 } },
+    { "&bsolhsub;", { 10184, 0 } },
+    { "&bull;", { 8226, 0 } },
+    { "&bullet;", { 8226, 0 } },
+    { "&bump;", { 8782, 0 } },
+    { "&bumpE;", { 10926, 0 } },
+    { "&bumpe;", { 8783, 0 } },
+    { "&bumpeq;", { 8783, 0 } },
+    { "&cacute;", { 263, 0 } },
+    { "&cap;", { 8745, 0 } },
+    { "&capand;", { 10820, 0 } },
+    { "&capbrcup;", { 10825, 0 } },
+    { "&capcap;", { 10827, 0 } },
+    { "&capcup;", { 10823, 0 } },
+    { "&capdot;", { 10816, 0 } },
+    { "&caps;", { 8745, 65024 } },
+    { "&caret;", { 8257, 0 } },
+    { "&caron;", { 711, 0 } },
+    { "&ccaps;", { 10829, 0 } },
+    { "&ccaron;", { 269, 0 } },
+    { "&ccedil;", { 231, 0 } },
+    { "&ccirc;", { 265, 0 } },
+    { "&ccups;", { 10828, 0 } },
+    { "&ccupssm;", { 10832, 0 } },
+    { "&cdot;", { 267, 0 } },
+    { "&cedil;", { 184, 0 } },
+    { "&cemptyv;", { 10674, 0 } },
+    { "&cent;", { 162, 0 } },
+    { "&centerdot;", { 183, 0 } },
+    { "&cfr;", { 120096, 0 } },
+    { "&chcy;", { 1095, 0 } },
+    { "&check;", { 10003, 0 } },
+    { "&checkmark;", { 10003, 0 } },
+    { "&chi;", { 967, 0 } },
+    { "&cir;", { 9675, 0 } },
+    { "&cirE;", { 10691, 0 } },
+    { "&circ;", { 710, 0 } },
+    { "&circeq;", { 8791, 0 } },
+    { "&circlearrowleft;", { 8634, 0 } },
+    { "&circlearrowright;", { 8635, 0 } },
+    { "&circledR;", { 174, 0 } },
+    { "&circledS;", { 9416, 0 } },
+    { "&circledast;", { 8859, 0 } },
+    { "&circledcirc;", { 8858, 0 } },
+    { "&circleddash;", { 8861, 0 } },
+    { "&cire;", { 8791, 0 } },
+    { "&cirfnint;", { 10768, 0 } },
+    { "&cirmid;", { 10991, 0 } },
+    { "&cirscir;", { 10690, 0 } },
+    { "&clubs;", { 9827, 0 } },
+    { "&clubsuit;", { 9827, 0 } },
+    { "&colon;", { 58, 0 } },
+    { "&colone;", { 8788, 0 } },
+    { "&coloneq;", { 8788, 0 } },
+    { "&comma;", { 44, 0 } },
+    { "&commat;", { 64, 0 } },
+    { "&comp;", { 8705, 0 } },
+    { "&compfn;", { 8728, 0 } },
+    { "&complement;", { 8705, 0 } },
+    { "&complexes;", { 8450, 0 } },
+    { "&cong;", { 8773, 0 } },
+    { "&congdot;", { 10861, 0 } },
+    { "&conint;", { 8750, 0 } },
+    { "&copf;", { 120148, 0 } },
+    { "&coprod;", { 8720, 0 } },
+    { "&copy;", { 169, 0 } },
+    { "&copysr;", { 8471, 0 } },
+    { "&crarr;", { 8629, 0 } },
+    { "&cross;", { 10007, 0 } },
+    { "&cscr;", { 119992, 0 } },
+    { "&csub;", { 10959, 0 } },
+    { "&csube;", { 10961, 0 } },
+    { "&csup;", { 10960, 0 } },
+    { "&csupe;", { 10962, 0 } },
+    { "&ctdot;", { 8943, 0 } },
+    { "&cudarrl;", { 10552, 0 } },
+    { "&cudarrr;", { 10549, 0 } },
+    { "&cuepr;", { 8926, 0 } },
+    { "&cuesc;", { 8927, 0 } },
+    { "&cularr;", { 8630, 0 } },
+    { "&cularrp;", { 10557, 0 } },
+    { "&cup;", { 8746, 0 } },
+    { "&cupbrcap;", { 10824, 0 } },
+    { "&cupcap;", { 10822, 0 } },
+    { "&cupcup;", { 10826, 0 } },
+    { "&cupdot;", { 8845, 0 } },
+    { "&cupor;", { 10821, 0 } },
+    { "&cups;", { 8746, 65024 } },
+    { "&curarr;", { 8631, 0 } },
+    { "&curarrm;", { 10556, 0 } },
+    { "&curlyeqprec;", { 8926, 0 } },
+    { "&curlyeqsucc;", { 8927, 0 } },
+    { "&curlyvee;", { 8910, 0 } },
+    { "&curlywedge;", { 8911, 0 } },
+    { "&curren;", { 164, 0 } },
+    { "&curvearrowleft;", { 8630, 0 } },
+    { "&curvearrowright;", { 8631, 0 } },
+    { "&cuvee;", { 8910, 0 } },
+    { "&cuwed;", { 8911, 0 } },
+    { "&cwconint;", { 8754, 0 } },
+    { "&cwint;", { 8753, 0 } },
+    { "&cylcty;", { 9005, 0 } },
+    { "&dArr;", { 8659, 0 } },
+    { "&dHar;", { 10597, 0 } },
+    { "&dagger;", { 8224, 0 } },
+    { "&daleth;", { 8504, 0 } },
+    { "&darr;", { 8595, 0 } },
+    { "&dash;", { 8208, 0 } },
+    { "&dashv;", { 8867, 0 } },
+    { "&dbkarow;", { 10511, 0 } },
+    { "&dblac;", { 733, 0 } },
+    { "&dcaron;", { 271, 0 } },
+    { "&dcy;", { 1076, 0 } },
+    { "&dd;", { 8518, 0 } },
+    { "&ddagger;", { 8225, 0 } },
+    { "&ddarr;", { 8650, 0 } },
+    { "&ddotseq;", { 10871, 0 } },
+    { "&deg;", { 176, 0 } },
+    { "&delta;", { 948, 0 } },
+    { "&demptyv;", { 10673, 0 } },
+    { "&dfisht;", { 10623, 0 } },
+    { "&dfr;", { 120097, 0 } },
+    { "&dharl;", { 8643, 0 } },
+    { "&dharr;", { 8642, 0 } },
+    { "&diam;", { 8900, 0 } },
+    { "&diamond;", { 8900, 0 } },
+    { "&diamondsuit;", { 9830, 0 } },
+    { "&diams;", { 9830, 0 } },
+    { "&die;", { 168, 0 } },
+    { "&digamma;", { 989, 0 } },
+    { "&disin;", { 8946, 0 } },
+    { "&div;", { 247, 0 } },
+    { "&divide;", { 247, 0 } },
+    { "&divideontimes;", { 8903, 0 } },
+    { "&divonx;", { 8903, 0 } },
+    { "&djcy;", { 1106, 0 } },
+    { "&dlcorn;", { 8990, 0 } },
+    { "&dlcrop;", { 8973, 0 } },
+    { "&dollar;", { 36, 0 } },
+    { "&dopf;", { 120149, 0 } },
+    { "&dot;", { 729, 0 } },
+    { "&doteq;", { 8784, 0 } },
+    { "&doteqdot;", { 8785, 0 } },
+    { "&dotminus;", { 8760, 0 } },
+    { "&dotplus;", { 8724, 0 } },
+    { "&dotsquare;", { 8865, 0 } },
+    { "&doublebarwedge;", { 8966, 0 } },
+    { "&downarrow;", { 8595, 0 } },
+    { "&downdownarrows;", { 8650, 0 } },
+    { "&downharpoonleft;", { 8643, 0 } },
+    { "&downharpoonright;", { 8642, 0 } },
+    { "&drbkarow;", { 10512, 0 } },
+    { "&drcorn;", { 8991, 0 } },
+    { "&drcrop;", { 8972, 0 } },
+    { "&dscr;", { 119993, 0 } },
+    { "&dscy;", { 1109, 0 } },
+    { "&dsol;", { 10742, 0 } },
+    { "&dstrok;", { 273, 0 } },
+    { "&dtdot;", { 8945, 0 } },
+    { "&dtri;", { 9663, 0 } },
+    { "&dtrif;", { 9662, 0 } },
+    { "&duarr;", { 8693, 0 } },
+    { "&duhar;", { 10607, 0 } },
+    { "&dwangle;", { 10662, 0 } },
+    { "&dzcy;", { 1119, 0 } },
+    { "&dzigrarr;", { 10239, 0 } },
+    { "&eDDot;", { 10871, 0 } },
+    { "&eDot;", { 8785, 0 } },
+    { "&eacute;", { 233, 0 } },
+    { "&easter;", { 10862, 0 } },
+    { "&ecaron;", { 283, 0 } },
+    { "&ecir;", { 8790, 0 } },
+    { "&ecirc;", { 234, 0 } },
+    { "&ecolon;", { 8789, 0 } },
+    { "&ecy;", { 1101, 0 } },
+    { "&edot;", { 279, 0 } },
+    { "&ee;", { 8519, 0 } },
+    { "&efDot;", { 8786, 0 } },
+    { "&efr;", { 120098, 0 } },
+    { "&eg;", { 10906, 0 } },
+    { "&egrave;", { 232, 0 } },
+    { "&egs;", { 10902, 0 } },
+    { "&egsdot;", { 10904, 0 } },
+    { "&el;", { 10905, 0 } },
+    { "&elinters;", { 9191, 0 } },
+    { "&ell;", { 8467, 0 } },
+    { "&els;", { 10901, 0 } },
+    { "&elsdot;", { 10903, 0 } },
+    { "&emacr;", { 275, 0 } },
+    { "&empty;", { 8709, 0 } },
+    { "&emptyset;", { 8709, 0 } },
+    { "&emptyv;", { 8709, 0 } },
+    { "&emsp13;", { 8196, 0 } },
+    { "&emsp14;", { 8197, 0 } },
+    { "&emsp;", { 8195, 0 } },
+    { "&eng;", { 331, 0 } },
+    { "&ensp;", { 8194, 0 } },
+    { "&eogon;", { 281, 0 } },
+    { "&eopf;", { 120150, 0 } },
+    { "&epar;", { 8917, 0 } },
+    { "&eparsl;", { 10723, 0 } },
+    { "&eplus;", { 10865, 0 } },
+    { "&epsi;", { 949, 0 } },
+    { "&epsilon;", { 949, 0 } },
+    { "&epsiv;", { 1013, 0 } },
+    { "&eqcirc;", { 8790, 0 } },
+    { "&eqcolon;", { 8789, 0 } },
+    { "&eqsim;", { 8770, 0 } },
+    { "&eqslantgtr;", { 10902, 0 } },
+    { "&eqslantless;", { 10901, 0 } },
+    { "&equals;", { 61, 0 } },
+    { "&equest;", { 8799, 0 } },
+    { "&equiv;", { 8801, 0 } },
+    { "&equivDD;", { 10872, 0 } },
+    { "&eqvparsl;", { 10725, 0 } },
+    { "&erDot;", { 8787, 0 } },
+    { "&erarr;", { 10609, 0 } },
+    { "&escr;", { 8495, 0 } },
+    { "&esdot;", { 8784, 0 } },
+    { "&esim;", { 8770, 0 } },
+    { "&eta;", { 951, 0 } },
+    { "&eth;", { 240, 0 } },
+    { "&euml;", { 235, 0 } },
+    { "&euro;", { 8364, 0 } },
+    { "&excl;", { 33, 0 } },
+    { "&exist;", { 8707, 0 } },
+    { "&expectation;", { 8496, 0 } },
+    { "&exponentiale;", { 8519, 0 } },
+    { "&fallingdotseq;", { 8786, 0 } },
+    { "&fcy;", { 1092, 0 } },
+    { "&female;", { 9792, 0 } },
+    { "&ffilig;", { 64259, 0 } },
+    { "&fflig;", { 64256, 0 } },
+    { "&ffllig;", { 64260, 0 } },
+    { "&ffr;", { 120099, 0 } },
+    { "&filig;", { 64257, 0 } },
+    { "&fjlig;", { 102, 106 } },
+    { "&flat;", { 9837, 0 } },
+    { "&fllig;", { 64258, 0 } },
+    { "&fltns;", { 9649, 0 } },
+    { "&fnof;", { 402, 0 } },
+    { "&fopf;", { 120151, 0 } },
+    { "&forall;", { 8704, 0 } },
+    { "&fork;", { 8916, 0 } },
+    { "&forkv;", { 10969, 0 } },
+    { "&fpartint;", { 10765, 0 } },
+    { "&frac12", { 189, 0 } },
+    { "&frac12;", { 189, 0 } },
+    { "&frac13;", { 8531, 0 } },
+    { "&frac14", { 188, 0 } },
+    { "&frac14;", { 188, 0 } },
+    { "&frac15;", { 8533, 0 } },
+    { "&frac16;", { 8537, 0 } },
+    { "&frac18;", { 8539, 0 } },
+    { "&frac23;", { 8532, 0 } },
+    { "&frac25;", { 8534, 0 } },
+    { "&frac34", { 190, 0 } },
+    { "&frac34;", { 190, 0 } },
+    { "&frac35;", { 8535, 0 } },
+    { "&frac38;", { 8540, 0 } },
+    { "&frac45;", { 8536, 0 } },
+    { "&frac56;", { 8538, 0 } },
+    { "&frac58;", { 8541, 0 } },
+    { "&frac78;", { 8542, 0 } },
+    { "&frasl;", { 8260, 0 } },
+    { "&frown;", { 8994, 0 } },
+    { "&fscr;", { 119995, 0 } },
+    { "&gE;", { 8807, 0 } },
+    { "&gEl;", { 10892, 0 } },
+    { "&gacute;", { 501, 0 } },
+    { "&gamma;", { 947, 0 } },
+    { "&gammad;", { 989, 0 } },
+    { "&gap;", { 10886, 0 } },
+    { "&gbreve;", { 287, 0 } },
+    { "&gcirc;", { 285, 0 } },
+    { "&gcy;", { 1075, 0 } },
+    { "&gdot;", { 289, 0 } },
+    { "&ge;", { 8805, 0 } },
+    { "&gel;", { 8923, 0 } },
+    { "&geq;", { 8805, 0 } },
+    { "&geqq;", { 8807, 0 } },
+    { "&geqslant;", { 10878, 0 } },
+    { "&ges;", { 10878, 0 } },
+    { "&gescc;", { 10921, 0 } },
+    { "&gesdot;", { 10880, 0 } },
+    { "&gesdoto;", { 10882, 0 } },
+    { "&gesdotol;", { 10884, 0 } },
+    { "&gesl;", { 8923, 65024 } },
+    { "&gesles;", { 10900, 0 } },
+    { "&gfr;", { 120100, 0 } },
+    { "&gg;", { 8811, 0 } },
+    { "&ggg;", { 8921, 0 } },
+    { "&gimel;", { 8503, 0 } },
+    { "&gjcy;", { 1107, 0 } },
+    { "&gl;", { 8823, 0 } },
+    { "&glE;", { 10898, 0 } },
+    { "&gla;", { 10917, 0 } },
+    { "&glj;", { 10916, 0 } },
+    { "&gnE;", { 8809, 0 } },
+    { "&gnap;", { 10890, 0 } },
+    { "&gnapprox;", { 10890, 0 } },
+    { "&gne;", { 10888, 0 } },
+    { "&gneq;", { 10888, 0 } },
+    { "&gneqq;", { 8809, 0 } },
+    { "&gnsim;", { 8935, 0 } },
+    { "&gopf;", { 120152, 0 } },
+    { "&grave;", { 96, 0 } },
+    { "&gscr;", { 8458, 0 } },
+    { "&gsim;", { 8819, 0 } },
+    { "&gsime;", { 10894, 0 } },
+    { "&gsiml;", { 10896, 0 } },
+    { "&gt;", { 62, 0 } },
+    { "&gtcc;", { 10919, 0 } },
+    { "&gtcir;", { 10874, 0 } },
+    { "&gtdot;", { 8919, 0 } },
+    { "&gtlPar;", { 10645, 0 } },
+    { "&gtquest;", { 10876, 0 } },
+    { "&gtrapprox;", { 10886, 0 } },
+    { "&gtrarr;", { 10616, 0 } },
+    { "&gtrdot;", { 8919, 0 } },
+    { "&gtreqless;", { 8923, 0 } },
+    { "&gtreqqless;", { 10892, 0 } },
+    { "&gtrless;", { 8823, 0 } },
+    { "&gtrsim;", { 8819, 0 } },
+    { "&gvertneqq;", { 8809, 65024 } },
+    { "&gvnE;", { 8809, 65024 } },
+    { "&hArr;", { 8660, 0 } },
+    { "&hairsp;", { 8202, 0 } },
+    { "&half;", { 189, 0 } },
+    { "&hamilt;", { 8459, 0 } },
+    { "&hardcy;", { 1098, 0 } },
+    { "&harr;", { 8596, 0 } },
+    { "&harrcir;", { 10568, 0 } },
+    { "&harrw;", { 8621, 0 } },
+    { "&hbar;", { 8463, 0 } },
+    { "&hcirc;", { 293, 0 } },
+    { "&hearts;", { 9829, 0 } },
+    { "&heartsuit;", { 9829, 0 } },
+    { "&hellip;", { 8230, 0 } },
+    { "&hercon;", { 8889, 0 } },
+    { "&hfr;", { 120101, 0 } },
+    { "&hksearow;", { 10533, 0 } },
+    { "&hkswarow;", { 10534, 0 } },
+    { "&hoarr;", { 8703, 0 } },
+    { "&homtht;", { 8763, 0 } },
+    { "&hookleftarrow;", { 8617, 0 } },
+    { "&hookrightarrow;", { 8618, 0 } },
+    { "&hopf;", { 120153, 0 } },
+    { "&horbar;", { 8213, 0 } },
+    { "&hscr;", { 119997, 0 } },
+    { "&hslash;", { 8463, 0 } },
+    { "&hstrok;", { 295, 0 } },
+    { "&hybull;", { 8259, 0 } },
+    { "&hyphen;", { 8208, 0 } },
+    { "&iacute;", { 237, 0 } },
+    { "&ic;", { 8291, 0 } },
+    { "&icirc;", { 238, 0 } },
+    { "&icy;", { 1080, 0 } },
+    { "&iecy;", { 1077, 0 } },
+    { "&iexcl;", { 161, 0 } },
+    { "&iff;", { 8660, 0 } },
+    { "&ifr;", { 120102, 0 } },
+    { "&igrave;", { 236, 0 } },
+    { "&ii;", { 8520, 0 } },
+    { "&iiiint;", { 10764, 0 } },
+    { "&iiint;", { 8749, 0 } },
+    { "&iinfin;", { 10716, 0 } },
+    { "&iiota;", { 8489, 0 } },
+    { "&ijlig;", { 307, 0 } },
+    { "&imacr;", { 299, 0 } },
+    { "&image;", { 8465, 0 } },
+    { "&imagline;", { 8464, 0 } },
+    { "&imagpart;", { 8465, 0 } },
+    { "&imath;", { 305, 0 } },
+    { "&imof;", { 8887, 0 } },
+    { "&imped;", { 437, 0 } },
+    { "&in;", { 8712, 0 } },
+    { "&incare;", { 8453, 0 } },
+    { "&infin;", { 8734, 0 } },
+    { "&infintie;", { 10717, 0 } },
+    { "&inodot;", { 305, 0 } },
+    { "&int;", { 8747, 0 } },
+    { "&intcal;", { 8890, 0 } },
+    { "&integers;", { 8484, 0 } },
+    { "&intercal;", { 8890, 0 } },
+    { "&intlarhk;", { 10775, 0 } },
+    { "&intprod;", { 10812, 0 } },
+    { "&iocy;", { 1105, 0 } },
+    { "&iogon;", { 303, 0 } },
+    { "&iopf;", { 120154, 0 } },
+    { "&iota;", { 953, 0 } },
+    { "&iprod;", { 10812, 0 } },
+    { "&iquest;", { 191, 0 } },
+    { "&iscr;", { 119998, 0 } },
+    { "&isin;", { 8712, 0 } },
+    { "&isinE;", { 8953, 0 } },
+    { "&isindot;", { 8949, 0 } },
+    { "&isins;", { 8948, 0 } },
+    { "&isinsv;", { 8947, 0 } },
+    { "&isinv;", { 8712, 0 } },
+    { "&it;", { 8290, 0 } },
+    { "&itilde;", { 297, 0 } },
+    { "&iukcy;", { 1110, 0 } },
+    { "&iuml;", { 239, 0 } },
+    { "&jcirc;", { 309, 0 } },
+    { "&jcy;", { 1081, 0 } },
+    { "&jfr;", { 120103, 0 } },
+    { "&jmath;", { 567, 0 } },
+    { "&jopf;", { 120155, 0 } },
+    { "&jscr;", { 119999, 0 } },
+    { "&jsercy;", { 1112, 0 } },
+    { "&jukcy;", { 1108, 0 } },
+    { "&kappa;", { 954, 0 } },
+    { "&kappav;", { 1008, 0 } },
+    { "&kcedil;", { 311, 0 } },
+    { "&kcy;", { 1082, 0 } },
+    { "&kfr;", { 120104, 0 } },
+    { "&kgreen;", { 312, 0 } },
+    { "&khcy;", { 1093, 0 } },
+    { "&kjcy;", { 1116, 0 } },
+    { "&kopf;", { 120156, 0 } },
+    { "&kscr;", { 120000, 0 } },
+    { "&lAarr;", { 8666, 0 } },
+    { "&lArr;", { 8656, 0 } },
+    { "&lAtail;", { 10523, 0 } },
+    { "&lBarr;", { 10510, 0 } },
+    { "&lE;", { 8806, 0 } },
+    { "&lEg;", { 10891, 0 } },
+    { "&lHar;", { 10594, 0 } },
+    { "&lacute;", { 314, 0 } },
+    { "&laemptyv;", { 10676, 0 } },
+    { "&lagran;", { 8466, 0 } },
+    { "&lambda;", { 955, 0 } },
+    { "&lang;", { 10216, 0 } },
+    { "&langd;", { 10641, 0 } },
+    { "&langle;", { 10216, 0 } },
+    { "&lap;", { 10885, 0 } },
+    { "&laquo;", { 171, 0 } },
+    { "&larr;", { 8592, 0 } },
+    { "&larrb;", { 8676, 0 } },
+    { "&larrbfs;", { 10527, 0 } },
+    { "&larrfs;", { 10525, 0 } },
+    { "&larrhk;", { 8617, 0 } },
+    { "&larrlp;", { 8619, 0 } },
+    { "&larrpl;", { 10553, 0 } },
+    { "&larrsim;", { 10611, 0 } },
+    { "&larrtl;", { 8610, 0 } },
+    { "&lat;", { 10923, 0 } },
+    { "&latail;", { 10521, 0 } },
+    { "&late;", { 10925, 0 } },
+    { "&lates;", { 10925, 65024 } },
+    { "&lbarr;", { 10508, 0 } },
+    { "&lbbrk;", { 10098, 0 } },
+    { "&lbrace;", { 123, 0 } },
+    { "&lbrack;", { 91, 0 } },
+    { "&lbrke;", { 10635, 0 } },
+    { "&lbrksld;", { 10639, 0 } },
+    { "&lbrkslu;", { 10637, 0 } },
+    { "&lcaron;", { 318, 0 } },
+    { "&lcedil;", { 316, 0 } },
+    { "&lceil;", { 8968, 0 } },
+    { "&lcub;", { 123, 0 } },
+    { "&lcy;", { 1083, 0 } },
+    { "&ldca;", { 10550, 0 } },
+    { "&ldquo;", { 8220, 0 } },
+    { "&ldquor;", { 8222, 0 } },
+    { "&ldrdhar;", { 10599, 0 } },
+    { "&ldrushar;", { 10571, 0 } },
+    { "&ldsh;", { 8626, 0 } },
+    { "&le;", { 8804, 0 } },
+    { "&leftarrow;", { 8592, 0 } },
+    { "&leftarrowtail;", { 8610, 0 } },
+    { "&leftharpoondown;", { 8637, 0 } },
+    { "&leftharpoonup;", { 8636, 0 } },
+    { "&leftleftarrows;", { 8647, 0 } },
+    { "&leftrightarrow;", { 8596, 0 } },
+    { "&leftrightarrows;", { 8646, 0 } },
+    { "&leftrightharpoons;", { 8651, 0 } },
+    { "&leftrightsquigarrow;", { 8621, 0 } },
+    { "&leftthreetimes;", { 8907, 0 } },
+    { "&leg;", { 8922, 0 } },
+    { "&leq;", { 8804, 0 } },
+    { "&leqq;", { 8806, 0 } },
+    { "&leqslant;", { 10877, 0 } },
+    { "&les;", { 10877, 0 } },
+    { "&lescc;", { 10920, 0 } },
+    { "&lesdot;", { 10879, 0 } },
+    { "&lesdoto;", { 10881, 0 } },
+    { "&lesdotor;", { 10883, 0 } },
+    { "&lesg;", { 8922, 65024 } },
+    { "&lesges;", { 10899, 0 } },
+    { "&lessapprox;", { 10885, 0 } },
+    { "&lessdot;", { 8918, 0 } },
+    { "&lesseqgtr;", { 8922, 0 } },
+    { "&lesseqqgtr;", { 10891, 0 } },
+    { "&lessgtr;", { 8822, 0 } },
+    { "&lesssim;", { 8818, 0 } },
+    { "&lfisht;", { 10620, 0 } },
+    { "&lfloor;", { 8970, 0 } },
+    { "&lfr;", { 120105, 0 } },
+    { "&lg;", { 8822, 0 } },
+    { "&lgE;", { 10897, 0 } },
+    { "&lhard;", { 8637, 0 } },
+    { "&lharu;", { 8636, 0 } },
+    { "&lharul;", { 10602, 0 } },
+    { "&lhblk;", { 9604, 0 } },
+    { "&ljcy;", { 1113, 0 } },
+    { "&ll;", { 8810, 0 } },
+    { "&llarr;", { 8647, 0 } },
+    { "&llcorner;", { 8990, 0 } },
+    { "&llhard;", { 10603, 0 } },
+    { "&lltri;", { 9722, 0 } },
+    { "&lmidot;", { 320, 0 } },
+    { "&lmoust;", { 9136, 0 } },
+    { "&lmoustache;", { 9136, 0 } },
+    { "&lnE;", { 8808, 0 } },
+    { "&lnap;", { 10889, 0 } },
+    { "&lnapprox;", { 10889, 0 } },
+    { "&lne;", { 10887, 0 } },
+    { "&lneq;", { 10887, 0 } },
+    { "&lneqq;", { 8808, 0 } },
+    { "&lnsim;", { 8934, 0 } },
+    { "&loang;", { 10220, 0 } },
+    { "&loarr;", { 8701, 0 } },
+    { "&lobrk;", { 10214, 0 } },
+    { "&longleftarrow;", { 10229, 0 } },
+    { "&longleftrightarrow;", { 10231, 0 } },
+    { "&longmapsto;", { 10236, 0 } },
+    { "&longrightarrow;", { 10230, 0 } },
+    { "&looparrowleft;", { 8619, 0 } },
+    { "&looparrowright;", { 8620, 0 } },
+    { "&lopar;", { 10629, 0 } },
+    { "&lopf;", { 120157, 0 } },
+    { "&loplus;", { 10797, 0 } },
+    { "&lotimes;", { 10804, 0 } },
+    { "&lowast;", { 8727, 0 } },
+    { "&lowbar;", { 95, 0 } },
+    { "&loz;", { 9674, 0 } },
+    { "&lozenge;", { 9674, 0 } },
+    { "&lozf;", { 10731, 0 } },
+    { "&lpar;", { 40, 0 } },
+    { "&lparlt;", { 10643, 0 } },
+    { "&lrarr;", { 8646, 0 } },
+    { "&lrcorner;", { 8991, 0 } },
+    { "&lrhar;", { 8651, 0 } },
+    { "&lrhard;", { 10605, 0 } },
+    { "&lrm;", { 8206, 0 } },
+    { "&lrtri;", { 8895, 0 } },
+    { "&lsaquo;", { 8249, 0 } },
+    { "&lscr;", { 120001, 0 } },
+    { "&lsh;", { 8624, 0 } },
+    { "&lsim;", { 8818, 0 } },
+    { "&lsime;", { 10893, 0 } },
+    { "&lsimg;", { 10895, 0 } },
+    { "&lsqb;", { 91, 0 } },
+    { "&lsquo;", { 8216, 0 } },
+    { "&lsquor;", { 8218, 0 } },
+    { "&lstrok;", { 322, 0 } },
+    { "&lt;", { 60, 0 } },
+    { "&ltcc;", { 10918, 0 } },
+    { "&ltcir;", { 10873, 0 } },
+    { "&ltdot;", { 8918, 0 } },
+    { "&lthree;", { 8907, 0 } },
+    { "&ltimes;", { 8905, 0 } },
+    { "&ltlarr;", { 10614, 0 } },
+    { "&ltquest;", { 10875, 0 } },
+    { "&ltrPar;", { 10646, 0 } },
+    { "&ltri;", { 9667, 0 } },
+    { "&ltrie;", { 8884, 0 } },
+    { "&ltrif;", { 9666, 0 } },
+    { "&lurdshar;", { 10570, 0 } },
+    { "&luruhar;", { 10598, 0 } },
+    { "&lvertneqq;", { 8808, 65024 } },
+    { "&lvnE;", { 8808, 65024 } },
+    { "&mDDot;", { 8762, 0 } },
+    { "&macr;", { 175, 0 } },
+    { "&male;", { 9794, 0 } },
+    { "&malt;", { 10016, 0 } },
+    { "&maltese;", { 10016, 0 } },
+    { "&map;", { 8614, 0 } },
+    { "&mapsto;", { 8614, 0 } },
+    { "&mapstodown;", { 8615, 0 } },
+    { "&mapstoleft;", { 8612, 0 } },
+    { "&mapstoup;", { 8613, 0 } },
+    { "&marker;", { 9646, 0 } },
+    { "&mcomma;", { 10793, 0 } },
+    { "&mcy;", { 1084, 0 } },
+    { "&mdash;", { 8212, 0 } },
+    { "&measuredangle;", { 8737, 0 } },
+    { "&mfr;", { 120106, 0 } },
+    { "&mho;", { 8487, 0 } },
+    { "&micro;", { 181, 0 } },
+    { "&mid;", { 8739, 0 } },
+    { "&midast;", { 42, 0 } },
+    { "&midcir;", { 10992, 0 } },
+    { "&middot;", { 183, 0 } },
+    { "&minus;", { 8722, 0 } },
+    { "&minusb;", { 8863, 0 } },
+    { "&minusd;", { 8760, 0 } },
+    { "&minusdu;", { 10794, 0 } },
+    { "&mlcp;", { 10971, 0 } },
+    { "&mldr;", { 8230, 0 } },
+    { "&mnplus;", { 8723, 0 } },
+    { "&models;", { 8871, 0 } },
+    { "&mopf;", { 120158, 0 } },
+    { "&mp;", { 8723, 0 } },
+    { "&mscr;", { 120002, 0 } },
+    { "&mstpos;", { 8766, 0 } },
+    { "&mu;", { 956, 0 } },
+    { "&multimap;", { 8888, 0 } },
+    { "&mumap;", { 8888, 0 } },
+    { "&nGg;", { 8921, 824 } },
+    { "&nGt;", { 8811, 8402 } },
+    { "&nGtv;", { 8811, 824 } },
+    { "&nLeftarrow;", { 8653, 0 } },
+    { "&nLeftrightarrow;", { 8654, 0 } },
+    { "&nLl;", { 8920, 824 } },
+    { "&nLt;", { 8810, 8402 } },
+    { "&nLtv;", { 8810, 824 } },
+    { "&nRightarrow;", { 8655, 0 } },
+    { "&nVDash;", { 8879, 0 } },
+    { "&nVdash;", { 8878, 0 } },
+    { "&nabla;", { 8711, 0 } },
+    { "&nacute;", { 324, 0 } },
+    { "&nang;", { 8736, 8402 } },
+    { "&nap;", { 8777, 0 } },
+    { "&napE;", { 10864, 824 } },
+    { "&napid;", { 8779, 824 } },
+    { "&napos;", { 329, 0 } },
+    { "&napprox;", { 8777, 0 } },
+    { "&natur;", { 9838, 0 } },
+    { "&natural;", { 9838, 0 } },
+    { "&naturals;", { 8469, 0 } },
+    { "&nbsp;", { 160, 0 } },
+    { "&nbump;", { 8782, 824 } },
+    { "&nbumpe;", { 8783, 824 } },
+    { "&ncap;", { 10819, 0 } },
+    { "&ncaron;", { 328, 0 } },
+    { "&ncedil;", { 326, 0 } },
+    { "&ncong;", { 8775, 0 } },
+    { "&ncongdot;", { 10861, 824 } },
+    { "&ncup;", { 10818, 0 } },
+    { "&ncy;", { 1085, 0 } },
+    { "&ndash;", { 8211, 0 } },
+    { "&ne;", { 8800, 0 } },
+    { "&neArr;", { 8663, 0 } },
+    { "&nearhk;", { 10532, 0 } },
+    { "&nearr;", { 8599, 0 } },
+    { "&nearrow;", { 8599, 0 } },
+    { "&nedot;", { 8784, 824 } },
+    { "&nequiv;", { 8802, 0 } },
+    { "&nesear;", { 10536, 0 } },
+    { "&nesim;", { 8770, 824 } },
+    { "&nexist;", { 8708, 0 } },
+    { "&nexists;", { 8708, 0 } },
+    { "&nfr;", { 120107, 0 } },
+    { "&ngE;", { 8807, 824 } },
+    { "&nge;", { 8817, 0 } },
+    { "&ngeq;", { 8817, 0 } },
+    { "&ngeqq;", { 8807, 824 } },
+    { "&ngeqslant;", { 10878, 824 } },
+    { "&nges;", { 10878, 824 } },
+    { "&ngsim;", { 8821, 0 } },
+    { "&ngt;", { 8815, 0 } },
+    { "&ngtr;", { 8815, 0 } },
+    { "&nhArr;", { 8654, 0 } },
+    { "&nharr;", { 8622, 0 } },
+    { "&nhpar;", { 10994, 0 } },
+    { "&ni;", { 8715, 0 } },
+    { "&nis;", { 8956, 0 } },
+    { "&nisd;", { 8954, 0 } },
+    { "&niv;", { 8715, 0 } },
+    { "&njcy;", { 1114, 0 } },
+    { "&nlArr;", { 8653, 0 } },
+    { "&nlE;", { 8806, 824 } },
+    { "&nlarr;", { 8602, 0 } },
+    { "&nldr;", { 8229, 0 } },
+    { "&nle;", { 8816, 0 } },
+    { "&nleftarrow;", { 8602, 0 } },
+    { "&nleftrightarrow;", { 8622, 0 } },
+    { "&nleq;", { 8816, 0 } },
+    { "&nleqq;", { 8806, 824 } },
+    { "&nleqslant;", { 10877, 824 } },
+    { "&nles;", { 10877, 824 } },
+    { "&nless;", { 8814, 0 } },
+    { "&nlsim;", { 8820, 0 } },
+    { "&nlt;", { 8814, 0 } },
+    { "&nltri;", { 8938, 0 } },
+    { "&nltrie;", { 8940, 0 } },
+    { "&nmid;", { 8740, 0 } },
+    { "&nopf;", { 120159, 0 } },
+    { "&not;", { 172, 0 } },
+    { "&notin;", { 8713, 0 } },
+    { "&notinE;", { 8953, 824 } },
+    { "&notindot;", { 8949, 824 } },
+    { "&notinva;", { 8713, 0 } },
+    { "&notinvb;", { 8951, 0 } },
+    { "&notinvc;", { 8950, 0 } },
+    { "&notni;", { 8716, 0 } },
+    { "&notniva;", { 8716, 0 } },
+    { "&notnivb;", { 8958, 0 } },
+    { "&notnivc;", { 8957, 0 } },
+    { "&npar;", { 8742, 0 } },
+    { "&nparallel;", { 8742, 0 } },
+    { "&nparsl;", { 11005, 8421 } },
+    { "&npart;", { 8706, 824 } },
+    { "&npolint;", { 10772, 0 } },
+    { "&npr;", { 8832, 0 } },
+    { "&nprcue;", { 8928, 0 } },
+    { "&npre;", { 10927, 824 } },
+    { "&nprec;", { 8832, 0 } },
+    { "&npreceq;", { 10927, 824 } },
+    { "&nrArr;", { 8655, 0 } },
+    { "&nrarr;", { 8603, 0 } },
+    { "&nrarrc;", { 10547, 824 } },
+    { "&nrarrw;", { 8605, 824 } },
+    { "&nrightarrow;", { 8603, 0 } },
+    { "&nrtri;", { 8939, 0 } },
+    { "&nrtrie;", { 8941, 0 } },
+    { "&nsc;", { 8833, 0 } },
+    { "&nsccue;", { 8929, 0 } },
+    { "&nsce;", { 10928, 824 } },
+    { "&nscr;", { 120003, 0 } },
+    { "&nshortmid;", { 8740, 0 } },
+    { "&nshortparallel;", { 8742, 0 } },
+    { "&nsim;", { 8769, 0 } },
+    { "&nsime;", { 8772, 0 } },
+    { "&nsimeq;", { 8772, 0 } },
+    { "&nsmid;", { 8740, 0 } },
+    { "&nspar;", { 8742, 0 } },
+    { "&nsqsube;", { 8930, 0 } },
+    { "&nsqsupe;", { 8931, 0 } },
+    { "&nsub;", { 8836, 0 } },
+    { "&nsubE;", { 10949, 824 } },
+    { "&nsube;", { 8840, 0 } },
+    { "&nsubset;", { 8834, 8402 } },
+    { "&nsubseteq;", { 8840, 0 } },
+    { "&nsubseteqq;", { 10949, 824 } },
+    { "&nsucc;", { 8833, 0 } },
+    { "&nsucceq;", { 10928, 824 } },
+    { "&nsup;", { 8837, 0 } },
+    { "&nsupE;", { 10950, 824 } },
+    { "&nsupe;", { 8841, 0 } },
+    { "&nsupset;", { 8835, 8402 } },
+    { "&nsupseteq;", { 8841, 0 } },
+    { "&nsupseteqq;", { 10950, 824 } },
+    { "&ntgl;", { 8825, 0 } },
+    { "&ntilde;", { 241, 0 } },
+    { "&ntlg;", { 8824, 0 } },
+    { "&ntriangleleft;", { 8938, 0 } },
+    { "&ntrianglelefteq;", { 8940, 0 } },
+    { "&ntriangleright;", { 8939, 0 } },
+    { "&ntrianglerighteq;", { 8941, 0 } },
+    { "&nu;", { 957, 0 } },
+    { "&num;", { 35, 0 } },
+    { "&numero;", { 8470, 0 } },
+    { "&numsp;", { 8199, 0 } },
+    { "&nvDash;", { 8877, 0 } },
+    { "&nvHarr;", { 10500, 0 } },
+    { "&nvap;", { 8781, 8402 } },
+    { "&nvdash;", { 8876, 0 } },
+    { "&nvge;", { 8805, 8402 } },
+    { "&nvgt;", { 62, 8402 } },
+    { "&nvinfin;", { 10718, 0 } },
+    { "&nvlArr;", { 10498, 0 } },
+    { "&nvle;", { 8804, 8402 } },
+    { "&nvlt;", { 60, 8402 } },
+    { "&nvltrie;", { 8884, 8402 } },
+    { "&nvrArr;", { 10499, 0 } },
+    { "&nvrtrie;", { 8885, 8402 } },
+    { "&nvsim;", { 8764, 8402 } },
+    { "&nwArr;", { 8662, 0 } },
+    { "&nwarhk;", { 10531, 0 } },
+    { "&nwarr;", { 8598, 0 } },
+    { "&nwarrow;", { 8598, 0 } },
+    { "&nwnear;", { 10535, 0 } },
+    { "&oS;", { 9416, 0 } },
+    { "&oacute;", { 243, 0 } },
+    { "&oast;", { 8859, 0 } },
+    { "&ocir;", { 8858, 0 } },
+    { "&ocirc;", { 244, 0 } },
+    { "&ocy;", { 1086, 0 } },
+    { "&odash;", { 8861, 0 } },
+    { "&odblac;", { 337, 0 } },
+    { "&odiv;", { 10808, 0 } },
+    { "&odot;", { 8857, 0 } },
+    { "&odsold;", { 10684, 0 } },
+    { "&oelig;", { 339, 0 } },
+    { "&ofcir;", { 10687, 0 } },
+    { "&ofr;", { 120108, 0 } },
+    { "&ogon;", { 731, 0 } },
+    { "&ograve;", { 242, 0 } },
+    { "&ogt;", { 10689, 0 } },
+    { "&ohbar;", { 10677, 0 } },
+    { "&ohm;", { 937, 0 } },
+    { "&oint;", { 8750, 0 } },
+    { "&olarr;", { 8634, 0 } },
+    { "&olcir;", { 10686, 0 } },
+    { "&olcross;", { 10683, 0 } },
+    { "&oline;", { 8254, 0 } },
+    { "&olt;", { 10688, 0 } },
+    { "&omacr;", { 333, 0 } },
+    { "&omega;", { 969, 0 } },
+    { "&omicron;", { 959, 0 } },
+    { "&omid;", { 10678, 0 } },
+    { "&ominus;", { 8854, 0 } },
+    { "&oopf;", { 120160, 0 } },
+    { "&opar;", { 10679, 0 } },
+    { "&operp;", { 10681, 0 } },
+    { "&oplus;", { 8853, 0 } },
+    { "&or;", { 8744, 0 } },
+    { "&orarr;", { 8635, 0 } },
+    { "&ord;", { 10845, 0 } },
+    { "&order;", { 8500, 0 } },
+    { "&orderof;", { 8500, 0 } },
+    { "&ordf;", { 170, 0 } },
+    { "&ordm;", { 186, 0 } },
+    { "&origof;", { 8886, 0 } },
+    { "&oror;", { 10838, 0 } },
+    { "&orslope;", { 10839, 0 } },
+    { "&orv;", { 10843, 0 } },
+    { "&oscr;", { 8500, 0 } },
+    { "&oslash;", { 248, 0 } },
+    { "&osol;", { 8856, 0 } },
+    { "&otilde;", { 245, 0 } },
+    { "&otimes;", { 8855, 0 } },
+    { "&otimesas;", { 10806, 0 } },
+    { "&ouml;", { 246, 0 } },
+    { "&ovbar;", { 9021, 0 } },
+    { "&par;", { 8741, 0 } },
+    { "&para;", { 182, 0 } },
+    { "&parallel;", { 8741, 0 } },
+    { "&parsim;", { 10995, 0 } },
+    { "&parsl;", { 11005, 0 } },
+    { "&part;", { 8706, 0 } },
+    { "&pcy;", { 1087, 0 } },
+    { "&percnt;", { 37, 0 } },
+    { "&period;", { 46, 0 } },
+    { "&permil;", { 8240, 0 } },
+    { "&perp;", { 8869, 0 } },
+    { "&pertenk;", { 8241, 0 } },
+    { "&pfr;", { 120109, 0 } },
+    { "&phi;", { 966, 0 } },
+    { "&phiv;", { 981, 0 } },
+    { "&phmmat;", { 8499, 0 } },
+    { "&phone;", { 9742, 0 } },
+    { "&pi;", { 960, 0 } },
+    { "&pitchfork;", { 8916, 0 } },
+    { "&piv;", { 982, 0 } },
+    { "&planck;", { 8463, 0 } },
+    { "&planckh;", { 8462, 0 } },
+    { "&plankv;", { 8463, 0 } },
+    { "&plus;", { 43, 0 } },
+    { "&plusacir;", { 10787, 0 } },
+    { "&plusb;", { 8862, 0 } },
+    { "&pluscir;", { 10786, 0 } },
+    { "&plusdo;", { 8724, 0 } },
+    { "&plusdu;", { 10789, 0 } },
+    { "&pluse;", { 10866, 0 } },
+    { "&plusmn;", { 177, 0 } },
+    { "&plussim;", { 10790, 0 } },
+    { "&plustwo;", { 10791, 0 } },
+    { "&pm;", { 177, 0 } },
+    { "&pointint;", { 10773, 0 } },
+    { "&popf;", { 120161, 0 } },
+    { "&pound;", { 163, 0 } },
+    { "&pr;", { 8826, 0 } },
+    { "&prE;", { 10931, 0 } },
+    { "&prap;", { 10935, 0 } },
+    { "&prcue;", { 8828, 0 } },
+    { "&pre;", { 10927, 0 } },
+    { "&prec;", { 8826, 0 } },
+    { "&precapprox;", { 10935, 0 } },
+    { "&preccurlyeq;", { 8828, 0 } },
+    { "&preceq;", { 10927, 0 } },
+    { "&precnapprox;", { 10937, 0 } },
+    { "&precneqq;", { 10933, 0 } },
+    { "&precnsim;", { 8936, 0 } },
+    { "&precsim;", { 8830, 0 } },
+    { "&prime;", { 8242, 0 } },
+    { "&primes;", { 8473, 0 } },
+    { "&prnE;", { 10933, 0 } },
+    { "&prnap;", { 10937, 0 } },
+    { "&prnsim;", { 8936, 0 } },
+    { "&prod;", { 8719, 0 } },
+    { "&profalar;", { 9006, 0 } },
+    { "&profline;", { 8978, 0 } },
+    { "&profsurf;", { 8979, 0 } },
+    { "&prop;", { 8733, 0 } },
+    { "&propto;", { 8733, 0 } },
+    { "&prsim;", { 8830, 0 } },
+    { "&prurel;", { 8880, 0 } },
+    { "&pscr;", { 120005, 0 } },
+    { "&psi;", { 968, 0 } },
+    { "&puncsp;", { 8200, 0 } },
+    { "&qfr;", { 120110, 0 } },
+    { "&qint;", { 10764, 0 } },
+    { "&qopf;", { 120162, 0 } },
+    { "&qprime;", { 8279, 0 } },
+    { "&qscr;", { 120006, 0 } },
+    { "&quaternions;", { 8461, 0 } },
+    { "&quatint;", { 10774, 0 } },
+    { "&quest;", { 63, 0 } },
+    { "&questeq;", { 8799, 0 } },
+    { "&quot;", { 34, 0 } },
+    { "&rAarr;", { 8667, 0 } },
+    { "&rArr;", { 8658, 0 } },
+    { "&rAtail;", { 10524, 0 } },
+    { "&rBarr;", { 10511, 0 } },
+    { "&rHar;", { 10596, 0 } },
+    { "&race;", { 8765, 817 } },
+    { "&racute;", { 341, 0 } },
+    { "&radic;", { 8730, 0 } },
+    { "&raemptyv;", { 10675, 0 } },
+    { "&rang;", { 10217, 0 } },
+    { "&rangd;", { 10642, 0 } },
+    { "&range;", { 10661, 0 } },
+    { "&rangle;", { 10217, 0 } },
+    { "&raquo;", { 187, 0 } },
+    { "&rarr;", { 8594, 0 } },
+    { "&rarrap;", { 10613, 0 } },
+    { "&rarrb;", { 8677, 0 } },
+    { "&rarrbfs;", { 10528, 0 } },
+    { "&rarrc;", { 10547, 0 } },
+    { "&rarrfs;", { 10526, 0 } },
+    { "&rarrhk;", { 8618, 0 } },
+    { "&rarrlp;", { 8620, 0 } },
+    { "&rarrpl;", { 10565, 0 } },
+    { "&rarrsim;", { 10612, 0 } },
+    { "&rarrtl;", { 8611, 0 } },
+    { "&rarrw;", { 8605, 0 } },
+    { "&ratail;", { 10522, 0 } },
+    { "&ratio;", { 8758, 0 } },
+    { "&rationals;", { 8474, 0 } },
+    { "&rbarr;", { 10509, 0 } },
+    { "&rbbrk;", { 10099, 0 } },
+    { "&rbrace;", { 125, 0 } },
+    { "&rbrack;", { 93, 0 } },
+    { "&rbrke;", { 10636, 0 } },
+    { "&rbrksld;", { 10638, 0 } },
+    { "&rbrkslu;", { 10640, 0 } },
+    { "&rcaron;", { 345, 0 } },
+    { "&rcedil;", { 343, 0 } },
+    { "&rceil;", { 8969, 0 } },
+    { "&rcub;", { 125, 0 } },
+    { "&rcy;", { 1088, 0 } },
+    { "&rdca;", { 10551, 0 } },
+    { "&rdldhar;", { 10601, 0 } },
+    { "&rdquo;", { 8221, 0 } },
+    { "&rdquor;", { 8221, 0 } },
+    { "&rdsh;", { 8627, 0 } },
+    { "&real;", { 8476, 0 } },
+    { "&realine;", { 8475, 0 } },
+    { "&realpart;", { 8476, 0 } },
+    { "&reals;", { 8477, 0 } },
+    { "&rect;", { 9645, 0 } },
+    { "&reg;", { 174, 0 } },
+    { "&rfisht;", { 10621, 0 } },
+    { "&rfloor;", { 8971, 0 } },
+    { "&rfr;", { 120111, 0 } },
+    { "&rhard;", { 8641, 0 } },
+    { "&rharu;", { 8640, 0 } },
+    { "&rharul;", { 10604, 0 } },
+    { "&rho;", { 961, 0 } },
+    { "&rhov;", { 1009, 0 } },
+    { "&rightarrow;", { 8594, 0 } },
+    { "&rightarrowtail;", { 8611, 0 } },
+    { "&rightharpoondown;", { 8641, 0 } },
+    { "&rightharpoonup;", { 8640, 0 } },
+    { "&rightleftarrows;", { 8644, 0 } },
+    { "&rightleftharpoons;", { 8652, 0 } },
+    { "&rightrightarrows;", { 8649, 0 } },
+    { "&rightsquigarrow;", { 8605, 0 } },
+    { "&rightthreetimes;", { 8908, 0 } },
+    { "&ring;", { 730, 0 } },
+    { "&risingdotseq;", { 8787, 0 } },
+    { "&rlarr;", { 8644, 0 } },
+    { "&rlhar;", { 8652, 0 } },
+    { "&rlm;", { 8207, 0 } },
+    { "&rmoust;", { 9137, 0 } },
+    { "&rmoustache;", { 9137, 0 } },
+    { "&rnmid;", { 10990, 0 } },
+    { "&roang;", { 10221, 0 } },
+    { "&roarr;", { 8702, 0 } },
+    { "&robrk;", { 10215, 0 } },
+    { "&ropar;", { 10630, 0 } },
+    { "&ropf;", { 120163, 0 } },
+    { "&roplus;", { 10798, 0 } },
+    { "&rotimes;", { 10805, 0 } },
+    { "&rpar;", { 41, 0 } },
+    { "&rpargt;", { 10644, 0 } },
+    { "&rppolint;", { 10770, 0 } },
+    { "&rrarr;", { 8649, 0 } },
+    { "&rsaquo;", { 8250, 0 } },
+    { "&rscr;", { 120007, 0 } },
+    { "&rsh;", { 8625, 0 } },
+    { "&rsqb;", { 93, 0 } },
+    { "&rsquo;", { 8217, 0 } },
+    { "&rsquor;", { 8217, 0 } },
+    { "&rthree;", { 8908, 0 } },
+    { "&rtimes;", { 8906, 0 } },
+    { "&rtri;", { 9657, 0 } },
+    { "&rtrie;", { 8885, 0 } },
+    { "&rtrif;", { 9656, 0 } },
+    { "&rtriltri;", { 10702, 0 } },
+    { "&ruluhar;", { 10600, 0 } },
+    { "&rx;", { 8478, 0 } },
+    { "&sacute;", { 347, 0 } },
+    { "&sbquo;", { 8218, 0 } },
+    { "&sc;", { 8827, 0 } },
+    { "&scE;", { 10932, 0 } },
+    { "&scap;", { 10936, 0 } },
+    { "&scaron;", { 353, 0 } },
+    { "&sccue;", { 8829, 0 } },
+    { "&sce;", { 10928, 0 } },
+    { "&scedil;", { 351, 0 } },
+    { "&scirc;", { 349, 0 } },
+    { "&scnE;", { 10934, 0 } },
+    { "&scnap;", { 10938, 0 } },
+    { "&scnsim;", { 8937, 0 } },
+    { "&scpolint;", { 10771, 0 } },
+    { "&scsim;", { 8831, 0 } },
+    { "&scy;", { 1089, 0 } },
+    { "&sdot;", { 8901, 0 } },
+    { "&sdotb;", { 8865, 0 } },
+    { "&sdote;", { 10854, 0 } },
+    { "&seArr;", { 8664, 0 } },
+    { "&searhk;", { 10533, 0 } },
+    { "&searr;", { 8600, 0 } },
+    { "&searrow;", { 8600, 0 } },
+    { "&sect;", { 167, 0 } },
+    { "&semi;", { 59, 0 } },
+    { "&seswar;", { 10537, 0 } },
+    { "&setminus;", { 8726, 0 } },
+    { "&setmn;", { 8726, 0 } },
+    { "&sext;", { 10038, 0 } },
+    { "&sfr;", { 120112, 0 } },
+    { "&sfrown;", { 8994, 0 } },
+    { "&sharp;", { 9839, 0 } },
+    { "&shchcy;", { 1097, 0 } },
+    { "&shcy;", { 1096, 0 } },
+    { "&shortmid;", { 8739, 0 } },
+    { "&shortparallel;", { 8741, 0 } },
+    { "&shy;", { 173, 0 } },
+    { "&sigma;", { 963, 0 } },
+    { "&sigmaf;", { 962, 0 } },
+    { "&sigmav;", { 962, 0 } },
+    { "&sim;", { 8764, 0 } },
+    { "&simdot;", { 10858, 0 } },
+    { "&sime;", { 8771, 0 } },
+    { "&simeq;", { 8771, 0 } },
+    { "&simg;", { 10910, 0 } },
+    { "&simgE;", { 10912, 0 } },
+    { "&siml;", { 10909, 0 } },
+    { "&simlE;", { 10911, 0 } },
+    { "&simne;", { 8774, 0 } },
+    { "&simplus;", { 10788, 0 } },
+    { "&simrarr;", { 10610, 0 } },
+    { "&slarr;", { 8592, 0 } },
+    { "&smallsetminus;", { 8726, 0 } },
+    { "&smashp;", { 10803, 0 } },
+    { "&smeparsl;", { 10724, 0 } },
+    { "&smid;", { 8739, 0 } },
+    { "&smile;", { 8995, 0 } },
+    { "&smt;", { 10922, 0 } },
+    { "&smte;", { 10924, 0 } },
+    { "&smtes;", { 10924, 65024 } },
+    { "&softcy;", { 1100, 0 } },
+    { "&sol;", { 47, 0 } },
+    { "&solb;", { 10692, 0 } },
+    { "&solbar;", { 9023, 0 } },
+    { "&sopf;", { 120164, 0 } },
+    { "&spades;", { 9824, 0 } },
+    { "&spadesuit;", { 9824, 0 } },
+    { "&spar;", { 8741, 0 } },
+    { "&sqcap;", { 8851, 0 } },
+    { "&sqcaps;", { 8851, 65024 } },
+    { "&sqcup;", { 8852, 0 } },
+    { "&sqcups;", { 8852, 65024 } },
+    { "&sqsub;", { 8847, 0 } },
+    { "&sqsube;", { 8849, 0 } },
+    { "&sqsubset;", { 8847, 0 } },
+    { "&sqsubseteq;", { 8849, 0 } },
+    { "&sqsup;", { 8848, 0 } },
+    { "&sqsupe;", { 8850, 0 } },
+    { "&sqsupset;", { 8848, 0 } },
+    { "&sqsupseteq;", { 8850, 0 } },
+    { "&squ;", { 9633, 0 } },
+    { "&square;", { 9633, 0 } },
+    { "&squarf;", { 9642, 0 } },
+    { "&squf;", { 9642, 0 } },
+    { "&srarr;", { 8594, 0 } },
+    { "&sscr;", { 120008, 0 } },
+    { "&ssetmn;", { 8726, 0 } },
+    { "&ssmile;", { 8995, 0 } },
+    { "&sstarf;", { 8902, 0 } },
+    { "&star;", { 9734, 0 } },
+    { "&starf;", { 9733, 0 } },
+    { "&straightepsilon;", { 1013, 0 } },
+    { "&straightphi;", { 981, 0 } },
+    { "&strns;", { 175, 0 } },
+    { "&sub;", { 8834, 0 } },
+    { "&subE;", { 10949, 0 } },
+    { "&subdot;", { 10941, 0 } },
+    { "&sube;", { 8838, 0 } },
+    { "&subedot;", { 10947, 0 } },
+    { "&submult;", { 10945, 0 } },
+    { "&subnE;", { 10955, 0 } },
+    { "&subne;", { 8842, 0 } },
+    { "&subplus;", { 10943, 0 } },
+    { "&subrarr;", { 10617, 0 } },
+    { "&subset;", { 8834, 0 } },
+    { "&subseteq;", { 8838, 0 } },
+    { "&subseteqq;", { 10949, 0 } },
+    { "&subsetneq;", { 8842, 0 } },
+    { "&subsetneqq;", { 10955, 0 } },
+    { "&subsim;", { 10951, 0 } },
+    { "&subsub;", { 10965, 0 } },
+    { "&subsup;", { 10963, 0 } },
+    { "&succ;", { 8827, 0 } },
+    { "&succapprox;", { 10936, 0 } },
+    { "&succcurlyeq;", { 8829, 0 } },
+    { "&succeq;", { 10928, 0 } },
+    { "&succnapprox;", { 10938, 0 } },
+    { "&succneqq;", { 10934, 0 } },
+    { "&succnsim;", { 8937, 0 } },
+    { "&succsim;", { 8831, 0 } },
+    { "&sum;", { 8721, 0 } },
+    { "&sung;", { 9834, 0 } },
+    { "&sup1", { 185, 0 } },
+    { "&sup1;", { 185, 0 } },
+    { "&sup2", { 178, 0 } },
+    { "&sup2;", { 178, 0 } },
+    { "&sup3", { 179, 0 } },
+    { "&sup3;", { 179, 0 } },
+    { "&sup;", { 8835, 0 } },
+    { "&supE;", { 10950, 0 } },
+    { "&supdot;", { 10942, 0 } },
+    { "&supdsub;", { 10968, 0 } },
+    { "&supe;", { 8839, 0 } },
+    { "&supedot;", { 10948, 0 } },
+    { "&suphsol;", { 10185, 0 } },
+    { "&suphsub;", { 10967, 0 } },
+    { "&suplarr;", { 10619, 0 } },
+    { "&supmult;", { 10946, 0 } },
+    { "&supnE;", { 10956, 0 } },
+    { "&supne;", { 8843, 0 } },
+    { "&supplus;", { 10944, 0 } },
+    { "&supset;", { 8835, 0 } },
+    { "&supseteq;", { 8839, 0 } },
+    { "&supseteqq;", { 10950, 0 } },
+    { "&supsetneq;", { 8843, 0 } },
+    { "&supsetneqq;", { 10956, 0 } },
+    { "&supsim;", { 10952, 0 } },
+    { "&supsub;", { 10964, 0 } },
+    { "&supsup;", { 10966, 0 } },
+    { "&swArr;", { 8665, 0 } },
+    { "&swarhk;", { 10534, 0 } },
+    { "&swarr;", { 8601, 0 } },
+    { "&swarrow;", { 8601, 0 } },
+    { "&swnwar;", { 10538, 0 } },
+    { "&szlig;", { 223, 0 } },
+    { "&target;", { 8982, 0 } },
+    { "&tau;", { 964, 0 } },
+    { "&tbrk;", { 9140, 0 } },
+    { "&tcaron;", { 357, 0 } },
+    { "&tcedil;", { 355, 0 } },
+    { "&tcy;", { 1090, 0 } },
+    { "&tdot;", { 8411, 0 } },
+    { "&telrec;", { 8981, 0 } },
+    { "&tfr;", { 120113, 0 } },
+    { "&there4;", { 8756, 0 } },
+    { "&therefore;", { 8756, 0 } },
+    { "&theta;", { 952, 0 } },
+    { "&thetasym;", { 977, 0 } },
+    { "&thetav;", { 977, 0 } },
+    { "&thickapprox;", { 8776, 0 } },
+    { "&thicksim;", { 8764, 0 } },
+    { "&thinsp;", { 8201, 0 } },
+    { "&thkap;", { 8776, 0 } },
+    { "&thksim;", { 8764, 0 } },
+    { "&thorn;", { 254, 0 } },
+    { "&tilde;", { 732, 0 } },
+    { "&times;", { 215, 0 } },
+    { "&timesb;", { 8864, 0 } },
+    { "&timesbar;", { 10801, 0 } },
+    { "&timesd;", { 10800, 0 } },
+    { "&tint;", { 8749, 0 } },
+    { "&toea;", { 10536, 0 } },
+    { "&top;", { 8868, 0 } },
+    { "&topbot;", { 9014, 0 } },
+    { "&topcir;", { 10993, 0 } },
+    { "&topf;", { 120165, 0 } },
+    { "&topfork;", { 10970, 0 } },
+    { "&tosa;", { 10537, 0 } },
+    { "&tprime;", { 8244, 0 } },
+    { "&trade;", { 8482, 0 } },
+    { "&triangle;", { 9653, 0 } },
+    { "&triangledown;", { 9663, 0 } },
+    { "&triangleleft;", { 9667, 0 } },
+    { "&trianglelefteq;", { 8884, 0 } },
+    { "&triangleq;", { 8796, 0 } },
+    { "&triangleright;", { 9657, 0 } },
+    { "&trianglerighteq;", { 8885, 0 } },
+    { "&tridot;", { 9708, 0 } },
+    { "&trie;", { 8796, 0 } },
+    { "&triminus;", { 10810, 0 } },
+    { "&triplus;", { 10809, 0 } },
+    { "&trisb;", { 10701, 0 } },
+    { "&tritime;", { 10811, 0 } },
+    { "&trpezium;", { 9186, 0 } },
+    { "&tscr;", { 120009, 0 } },
+    { "&tscy;", { 1094, 0 } },
+    { "&tshcy;", { 1115, 0 } },
+    { "&tstrok;", { 359, 0 } },
+    { "&twixt;", { 8812, 0 } },
+    { "&twoheadleftarrow;", { 8606, 0 } },
+    { "&twoheadrightarrow;", { 8608, 0 } },
+    { "&uArr;", { 8657, 0 } },
+    { "&uHar;", { 10595, 0 } },
+    { "&uacute;", { 250, 0 } },
+    { "&uarr;", { 8593, 0 } },
+    { "&ubrcy;", { 1118, 0 } },
+    { "&ubreve;", { 365, 0 } },
+    { "&ucirc;", { 251, 0 } },
+    { "&ucy;", { 1091, 0 } },
+    { "&udarr;", { 8645, 0 } },
+    { "&udblac;", { 369, 0 } },
+    { "&udhar;", { 10606, 0 } },
+    { "&ufisht;", { 10622, 0 } },
+    { "&ufr;", { 120114, 0 } },
+    { "&ugrave;", { 249, 0 } },
+    { "&uharl;", { 8639, 0 } },
+    { "&uharr;", { 8638, 0 } },
+    { "&uhblk;", { 9600, 0 } },
+    { "&ulcorn;", { 8988, 0 } },
+    { "&ulcorner;", { 8988, 0 } },
+    { "&ulcrop;", { 8975, 0 } },
+    { "&ultri;", { 9720, 0 } },
+    { "&umacr;", { 363, 0 } },
+    { "&uml;", { 168, 0 } },
+    { "&uogon;", { 371, 0 } },
+    { "&uopf;", { 120166, 0 } },
+    { "&uparrow;", { 8593, 0 } },
+    { "&updownarrow;", { 8597, 0 } },
+    { "&upharpoonleft;", { 8639, 0 } },
+    { "&upharpoonright;", { 8638, 0 } },
+    { "&uplus;", { 8846, 0 } },
+    { "&upsi;", { 965, 0 } },
+    { "&upsih;", { 978, 0 } },
+    { "&upsilon;", { 965, 0 } },
+    { "&upuparrows;", { 8648, 0 } },
+    { "&urcorn;", { 8989, 0 } },
+    { "&urcorner;", { 8989, 0 } },
+    { "&urcrop;", { 8974, 0 } },
+    { "&uring;", { 367, 0 } },
+    { "&urtri;", { 9721, 0 } },
+    { "&uscr;", { 120010, 0 } },
+    { "&utdot;", { 8944, 0 } },
+    { "&utilde;", { 361, 0 } },
+    { "&utri;", { 9653, 0 } },
+    { "&utrif;", { 9652, 0 } },
+    { "&uuarr;", { 8648, 0 } },
+    { "&uuml;", { 252, 0 } },
+    { "&uwangle;", { 10663, 0 } },
+    { "&vArr;", { 8661, 0 } },
+    { "&vBar;", { 10984, 0 } },
+    { "&vBarv;", { 10985, 0 } },
+    { "&vDash;", { 8872, 0 } },
+    { "&vangrt;", { 10652, 0 } },
+    { "&varepsilon;", { 1013, 0 } },
+    { "&varkappa;", { 1008, 0 } },
+    { "&varnothing;", { 8709, 0 } },
+    { "&varphi;", { 981, 0 } },
+    { "&varpi;", { 982, 0 } },
+    { "&varpropto;", { 8733, 0 } },
+    { "&varr;", { 8597, 0 } },
+    { "&varrho;", { 1009, 0 } },
+    { "&varsigma;", { 962, 0 } },
+    { "&varsubsetneq;", { 8842, 65024 } },
+    { "&varsubsetneqq;", { 10955, 65024 } },
+    { "&varsupsetneq;", { 8843, 65024 } },
+    { "&varsupsetneqq;", { 10956, 65024 } },
+    { "&vartheta;", { 977, 0 } },
+    { "&vartriangleleft;", { 8882, 0 } },
+    { "&vartriangleright;", { 8883, 0 } },
+    { "&vcy;", { 1074, 0 } },
+    { "&vdash;", { 8866, 0 } },
+    { "&vee;", { 8744, 0 } },
+    { "&veebar;", { 8891, 0 } },
+    { "&veeeq;", { 8794, 0 } },
+    { "&vellip;", { 8942, 0 } },
+    { "&verbar;", { 124, 0 } },
+    { "&vert;", { 124, 0 } },
+    { "&vfr;", { 120115, 0 } },
+    { "&vltri;", { 8882, 0 } },
+    { "&vnsub;", { 8834, 8402 } },
+    { "&vnsup;", { 8835, 8402 } },
+    { "&vopf;", { 120167, 0 } },
+    { "&vprop;", { 8733, 0 } },
+    { "&vrtri;", { 8883, 0 } },
+    { "&vscr;", { 120011, 0 } },
+    { "&vsubnE;", { 10955, 65024 } },
+    { "&vsubne;", { 8842, 65024 } },
+    { "&vsupnE;", { 10956, 65024 } },
+    { "&vsupne;", { 8843, 65024 } },
+    { "&vzigzag;", { 10650, 0 } },
+    { "&wcirc;", { 373, 0 } },
+    { "&wedbar;", { 10847, 0 } },
+    { "&wedge;", { 8743, 0 } },
+    { "&wedgeq;", { 8793, 0 } },
+    { "&weierp;", { 8472, 0 } },
+    { "&wfr;", { 120116, 0 } },
+    { "&wopf;", { 120168, 0 } },
+    { "&wp;", { 8472, 0 } },
+    { "&wr;", { 8768, 0 } },
+    { "&wreath;", { 8768, 0 } },
+    { "&wscr;", { 120012, 0 } },
+    { "&xcap;", { 8898, 0 } },
+    { "&xcirc;", { 9711, 0 } },
+    { "&xcup;", { 8899, 0 } },
+    { "&xdtri;", { 9661, 0 } },
+    { "&xfr;", { 120117, 0 } },
+    { "&xhArr;", { 10234, 0 } },
+    { "&xharr;", { 10231, 0 } },
+    { "&xi;", { 958, 0 } },
+    { "&xlArr;", { 10232, 0 } },
+    { "&xlarr;", { 10229, 0 } },
+    { "&xmap;", { 10236, 0 } },
+    { "&xnis;", { 8955, 0 } },
+    { "&xodot;", { 10752, 0 } },
+    { "&xopf;", { 120169, 0 } },
+    { "&xoplus;", { 10753, 0 } },
+    { "&xotime;", { 10754, 0 } },
+    { "&xrArr;", { 10233, 0 } },
+    { "&xrarr;", { 10230, 0 } },
+    { "&xscr;", { 120013, 0 } },
+    { "&xsqcup;", { 10758, 0 } },
+    { "&xuplus;", { 10756, 0 } },
+    { "&xutri;", { 9651, 0 } },
+    { "&xvee;", { 8897, 0 } },
+    { "&xwedge;", { 8896, 0 } },
+    { "&yacute;", { 253, 0 } },
+    { "&yacy;", { 1103, 0 } },
+    { "&ycirc;", { 375, 0 } },
+    { "&ycy;", { 1099, 0 } },
+    { "&yen;", { 165, 0 } },
+    { "&yfr;", { 120118, 0 } },
+    { "&yicy;", { 1111, 0 } },
+    { "&yopf;", { 120170, 0 } },
+    { "&yscr;", { 120014, 0 } },
+    { "&yucy;", { 1102, 0 } },
+    { "&yuml;", { 255, 0 } },
+    { "&zacute;", { 378, 0 } },
+    { "&zcaron;", { 382, 0 } },
+    { "&zcy;", { 1079, 0 } },
+    { "&zdot;", { 380, 0 } },
+    { "&zeetrf;", { 8488, 0 } },
+    { "&zeta;", { 950, 0 } },
+    { "&zfr;", { 120119, 0 } },
+    { "&zhcy;", { 1078, 0 } },
+    { "&zigrarr;", { 8669, 0 } },
+    { "&zopf;", { 120171, 0 } },
+    { "&zscr;", { 120015, 0 } },
+    { "&zwj;", { 8205, 0 } },
+    { "&zwnj;", { 8204, 0 } }
+};
+
+
+struct entity_key {
+    const char* name;
+    size_t name_size;
+};
+
+static int
+entity_cmp(const void* p_key, const void* p_entity)
+{
+    struct entity_key* key = (struct entity_key*) p_key;
+    struct entity* ent = (struct entity*) p_entity;
+
+    return strncmp(key->name, ent->name, key->name_size);
+}
+
+const struct entity*
+entity_lookup(const char* name, size_t name_size)
+{
+    struct entity_key key = { name, name_size };
+
+    return bsearch(&key,
+                   entity_table,
+                   sizeof(entity_table) / sizeof(entity_table[0]),
+                   sizeof(struct entity),
+                   entity_cmp);
+}

+ 42 - 0
markdown.mod/md4c/src/entity.h

@@ -0,0 +1,42 @@
+/*
+ * MD4C: Markdown parser for C
+ * (http://github.com/mity/md4c)
+ *
+ * Copyright (c) 2016-2019 Martin Mitas
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef MD4C_ENTITY_H
+#define MD4C_ENTITY_H
+
+#include <stdlib.h>
+
+
+/* Most entities are formed by single Unicode codepoint, few by two codepoints.
+ * Single-codepoint entities have codepoints[1] set to zero. */
+struct entity {
+    const char* name;
+    unsigned codepoints[2];
+};
+
+const struct entity* entity_lookup(const char* name, size_t name_size);
+
+
+#endif  /* MD4C_ENTITY_H */

+ 590 - 0
markdown.mod/md4c/src/md4c-html.c

@@ -0,0 +1,590 @@
+/*
+ * MD4C: Markdown parser for C
+ * (http://github.com/mity/md4c)
+ *
+ * Copyright (c) 2016-2019 Martin Mitas
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "md4c-html.h"
+#include "entity.h"
+
+
+#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199409L
+    /* C89/90 or old compilers in general may not understand "inline". */
+    #if defined __GNUC__
+        #define inline __inline__
+    #elif defined _MSC_VER
+        #define inline __inline
+    #else
+        #define inline
+    #endif
+#endif
+
+#ifdef _WIN32
+    #define snprintf _snprintf
+#endif
+
+
+
+typedef struct MD_HTML_tag MD_HTML;
+struct MD_HTML_tag {
+    void (*process_output)(const MD_CHAR*, MD_SIZE, void*);
+    void* userdata;
+    unsigned flags;
+    int image_nesting_level;
+    char escape_map[256];
+};
+
+#define NEED_HTML_ESC_FLAG   0x1
+#define NEED_URL_ESC_FLAG    0x2
+
+
+/*****************************************
+ ***  HTML rendering helper functions  ***
+ *****************************************/
+
+#define ISDIGIT(ch)     ('0' <= (ch) && (ch) <= '9')
+#define ISLOWER(ch)     ('a' <= (ch) && (ch) <= 'z')
+#define ISUPPER(ch)     ('A' <= (ch) && (ch) <= 'Z')
+#define ISALNUM(ch)     (ISLOWER(ch) || ISUPPER(ch) || ISDIGIT(ch))
+
+
+static inline void
+render_verbatim(MD_HTML* r, const MD_CHAR* text, MD_SIZE size)
+{
+    r->process_output(text, size, r->userdata);
+}
+
+/* Keep this as a macro. Most compiler should then be smart enough to replace
+ * the strlen() call with a compile-time constant if the string is a C literal. */
+#define RENDER_VERBATIM(r, verbatim)                                    \
+        render_verbatim((r), (verbatim), (MD_SIZE) (strlen(verbatim)))
+
+
+static void
+render_html_escaped(MD_HTML* r, const MD_CHAR* data, MD_SIZE size)
+{
+    MD_OFFSET beg = 0;
+    MD_OFFSET off = 0;
+
+    /* Some characters need to be escaped in normal HTML text. */
+    #define NEED_HTML_ESC(ch)   (r->escape_map[(unsigned char)(ch)] & NEED_HTML_ESC_FLAG)
+
+    while(1) {
+        /* Optimization: Use some loop unrolling. */
+        while(off + 3 < size  &&  !NEED_HTML_ESC(data[off+0])  &&  !NEED_HTML_ESC(data[off+1])
+                              &&  !NEED_HTML_ESC(data[off+2])  &&  !NEED_HTML_ESC(data[off+3]))
+            off += 4;
+        while(off < size  &&  !NEED_HTML_ESC(data[off]))
+            off++;
+
+        if(off > beg)
+            render_verbatim(r, data + beg, off - beg);
+
+        if(off < size) {
+            switch(data[off]) {
+                case '&':   RENDER_VERBATIM(r, "&amp;"); break;
+                case '<':   RENDER_VERBATIM(r, "&lt;"); break;
+                case '>':   RENDER_VERBATIM(r, "&gt;"); break;
+                case '"':   RENDER_VERBATIM(r, "&quot;"); break;
+            }
+            off++;
+        } else {
+            break;
+        }
+        beg = off;
+    }
+}
+
+static void
+render_url_escaped(MD_HTML* r, const MD_CHAR* data, MD_SIZE size)
+{
+    static const MD_CHAR hex_chars[] = "0123456789ABCDEF";
+    MD_OFFSET beg = 0;
+    MD_OFFSET off = 0;
+
+    /* Some characters need to be escaped in URL attributes. */
+    #define NEED_URL_ESC(ch)    (r->escape_map[(unsigned char)(ch)] & NEED_URL_ESC_FLAG)
+
+    while(1) {
+        while(off < size  &&  !NEED_URL_ESC(data[off]))
+            off++;
+        if(off > beg)
+            render_verbatim(r, data + beg, off - beg);
+
+        if(off < size) {
+            char hex[3];
+
+            switch(data[off]) {
+                case '&':   RENDER_VERBATIM(r, "&amp;"); break;
+                default:
+                    hex[0] = '%';
+                    hex[1] = hex_chars[((unsigned)data[off] >> 4) & 0xf];
+                    hex[2] = hex_chars[((unsigned)data[off] >> 0) & 0xf];
+                    render_verbatim(r, hex, 3);
+                    break;
+            }
+            off++;
+        } else {
+            break;
+        }
+
+        beg = off;
+    }
+}
+
+static unsigned
+hex_val(char ch)
+{
+    if('0' <= ch && ch <= '9')
+        return ch - '0';
+    if('A' <= ch && ch <= 'Z')
+        return ch - 'A' + 10;
+    else
+        return ch - 'a' + 10;
+}
+
+static void
+render_utf8_codepoint(MD_HTML* r, unsigned codepoint,
+                      void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE))
+{
+    static const MD_CHAR utf8_replacement_char[] = { 0xef, 0xbf, 0xbd };
+
+    unsigned char utf8[4];
+    size_t n;
+
+    if(codepoint <= 0x7f) {
+        n = 1;
+        utf8[0] = codepoint;
+    } else if(codepoint <= 0x7ff) {
+        n = 2;
+        utf8[0] = 0xc0 | ((codepoint >>  6) & 0x1f);
+        utf8[1] = 0x80 + ((codepoint >>  0) & 0x3f);
+    } else if(codepoint <= 0xffff) {
+        n = 3;
+        utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf);
+        utf8[1] = 0x80 + ((codepoint >>  6) & 0x3f);
+        utf8[2] = 0x80 + ((codepoint >>  0) & 0x3f);
+    } else {
+        n = 4;
+        utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7);
+        utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f);
+        utf8[2] = 0x80 + ((codepoint >>  6) & 0x3f);
+        utf8[3] = 0x80 + ((codepoint >>  0) & 0x3f);
+    }
+
+    if(0 < codepoint  &&  codepoint <= 0x10ffff)
+        fn_append(r, (char*)utf8, (MD_SIZE)n);
+    else
+        fn_append(r, utf8_replacement_char, 3);
+}
+
+/* Translate entity to its UTF-8 equivalent, or output the verbatim one
+ * if such entity is unknown (or if the translation is disabled). */
+static void
+render_entity(MD_HTML* r, const MD_CHAR* text, MD_SIZE size,
+              void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE))
+{
+    if(r->flags & MD_HTML_FLAG_VERBATIM_ENTITIES) {
+        render_verbatim(r, text, size);
+        return;
+    }
+
+    /* We assume UTF-8 output is what is desired. */
+    if(size > 3 && text[1] == '#') {
+        unsigned codepoint = 0;
+
+        if(text[2] == 'x' || text[2] == 'X') {
+            /* Hexadecimal entity (e.g. "&#x1234abcd;")). */
+            MD_SIZE i;
+            for(i = 3; i < size-1; i++)
+                codepoint = 16 * codepoint + hex_val(text[i]);
+        } else {
+            /* Decimal entity (e.g. "&1234;") */
+            MD_SIZE i;
+            for(i = 2; i < size-1; i++)
+                codepoint = 10 * codepoint + (text[i] - '0');
+        }
+
+        render_utf8_codepoint(r, codepoint, fn_append);
+        return;
+    } else {
+        /* Named entity (e.g. "&nbsp;"). */
+        const struct entity* ent;
+
+        ent = entity_lookup(text, size);
+        if(ent != NULL) {
+            render_utf8_codepoint(r, ent->codepoints[0], fn_append);
+            if(ent->codepoints[1])
+                render_utf8_codepoint(r, ent->codepoints[1], fn_append);
+            return;
+        }
+    }
+
+    fn_append(r, text, size);
+}
+
+static void
+render_attribute(MD_HTML* r, const MD_ATTRIBUTE* attr,
+                 void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE))
+{
+    int i;
+
+    for(i = 0; attr->substr_offsets[i] < attr->size; i++) {
+        MD_TEXTTYPE type = attr->substr_types[i];
+        MD_OFFSET off = attr->substr_offsets[i];
+        MD_SIZE size = attr->substr_offsets[i+1] - off;
+        const MD_CHAR* text = attr->text + off;
+
+        switch(type) {
+            case MD_TEXT_NULLCHAR:  render_utf8_codepoint(r, 0x0000, render_verbatim); break;
+            case MD_TEXT_ENTITY:    render_entity(r, text, size, fn_append); break;
+            default:                fn_append(r, text, size); break;
+        }
+    }
+}
+
+
+static void
+render_open_ol_block(MD_HTML* r, const MD_BLOCK_OL_DETAIL* det)
+{
+    char buf[64];
+
+    if(det->start == 1) {
+        RENDER_VERBATIM(r, "<ol>\n");
+        return;
+    }
+
+    snprintf(buf, sizeof(buf), "<ol start=\"%u\">\n", det->start);
+    RENDER_VERBATIM(r, buf);
+}
+
+static void
+render_open_li_block(MD_HTML* r, const MD_BLOCK_LI_DETAIL* det)
+{
+    if(det->is_task) {
+        RENDER_VERBATIM(r, "<li class=\"task-list-item\">"
+                          "<input type=\"checkbox\" class=\"task-list-item-checkbox\" disabled");
+        if(det->task_mark == 'x' || det->task_mark == 'X')
+            RENDER_VERBATIM(r, " checked");
+        RENDER_VERBATIM(r, ">");
+    } else {
+        RENDER_VERBATIM(r, "<li>");
+    }
+}
+
+static void
+render_open_code_block(MD_HTML* r, const MD_BLOCK_CODE_DETAIL* det)
+{
+    RENDER_VERBATIM(r, "<pre><code");
+
+    /* If known, output the HTML 5 attribute class="language-LANGNAME". */
+    if(det->lang.text != NULL) {
+        RENDER_VERBATIM(r, " class=\"language-");
+        render_attribute(r, &det->lang, render_html_escaped);
+        RENDER_VERBATIM(r, "\"");
+    }
+
+    RENDER_VERBATIM(r, ">");
+}
+
+static void
+render_header_block(MD_HTML* r, const MD_BLOCK_H_DETAIL* det)
+{
+    static const MD_CHAR* head[6] = { "<h1", "<h2", "<h3", "<h4", "<h5", "<h6" };
+
+    RENDER_VERBATIM(r, head[det->level- 1]);
+    if(det->identifier.text != NULL) {
+        RENDER_VERBATIM(r, " id=\"");
+        render_attribute(r, &det->identifier, render_html_escaped);
+        RENDER_VERBATIM(r, "\"");
+    } 
+    RENDER_VERBATIM(r, ">");
+}
+
+static void
+render_open_td_block(MD_HTML* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det)
+{
+    RENDER_VERBATIM(r, "<");
+    RENDER_VERBATIM(r, cell_type);
+
+    switch(det->align) {
+        case MD_ALIGN_LEFT:     RENDER_VERBATIM(r, " align=\"left\">"); break;
+        case MD_ALIGN_CENTER:   RENDER_VERBATIM(r, " align=\"center\">"); break;
+        case MD_ALIGN_RIGHT:    RENDER_VERBATIM(r, " align=\"right\">"); break;
+        default:                RENDER_VERBATIM(r, ">"); break;
+    }
+}
+
+static void
+render_open_a_span(MD_HTML* r, const MD_SPAN_A_DETAIL* det)
+{
+    RENDER_VERBATIM(r, "<a href=\"");
+    render_attribute(r, &det->href, render_url_escaped);
+
+    if(det->title.text != NULL) {
+        RENDER_VERBATIM(r, "\" title=\"");
+        render_attribute(r, &det->title, render_html_escaped);
+    }
+
+    RENDER_VERBATIM(r, "\">");
+}
+
+static void
+render_open_img_span(MD_HTML* r, const MD_SPAN_IMG_DETAIL* det)
+{
+    RENDER_VERBATIM(r, "<img src=\"");
+    render_attribute(r, &det->src, render_url_escaped);
+
+    RENDER_VERBATIM(r, "\" alt=\"");
+
+    r->image_nesting_level++;
+}
+
+static void
+render_close_img_span(MD_HTML* r, const MD_SPAN_IMG_DETAIL* det)
+{
+    if(det->title.text != NULL) {
+        RENDER_VERBATIM(r, "\" title=\"");
+        render_attribute(r, &det->title, render_html_escaped);
+    }
+
+    RENDER_VERBATIM(r, (r->flags & MD_HTML_FLAG_XHTML) ? "\" />" : "\">");
+
+    r->image_nesting_level--;
+}
+
+static void
+render_open_wikilink_span(MD_HTML* r, const MD_SPAN_WIKILINK_DETAIL* det)
+{
+    RENDER_VERBATIM(r, "<x-wikilink data-target=\"");
+    render_attribute(r, &det->target, render_html_escaped);
+
+    RENDER_VERBATIM(r, "\">");
+}
+
+
+/**************************************
+ ***  HTML renderer implementation  ***
+ **************************************/
+
+static int
+enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
+{
+    MD_HTML* r = (MD_HTML*) userdata;
+
+    switch(type) {
+        case MD_BLOCK_DOC:      /* noop */ break;
+        case MD_BLOCK_QUOTE:    RENDER_VERBATIM(r, "<blockquote>\n"); break;
+        case MD_BLOCK_UL:       RENDER_VERBATIM(r, "<ul>\n"); break;
+        case MD_BLOCK_OL:       render_open_ol_block(r, (const MD_BLOCK_OL_DETAIL*)detail); break;
+        case MD_BLOCK_LI:       render_open_li_block(r, (const MD_BLOCK_LI_DETAIL*)detail); break;
+        case MD_BLOCK_HR:       RENDER_VERBATIM(r, (r->flags & MD_HTML_FLAG_XHTML) ? "<hr />\n" : "<hr>\n"); break;
+        case MD_BLOCK_H:        render_header_block(r, (const MD_BLOCK_H_DETAIL*)detail); break;
+        case MD_BLOCK_CODE:     render_open_code_block(r, (const MD_BLOCK_CODE_DETAIL*) detail); break;
+        case MD_BLOCK_HTML:     /* noop */ break;
+        case MD_BLOCK_P:        RENDER_VERBATIM(r, "<p>"); break;
+        case MD_BLOCK_TABLE:    RENDER_VERBATIM(r, "<table>\n"); break;
+        case MD_BLOCK_THEAD:    RENDER_VERBATIM(r, "<thead>\n"); break;
+        case MD_BLOCK_TBODY:    RENDER_VERBATIM(r, "<tbody>\n"); break;
+        case MD_BLOCK_TR:       RENDER_VERBATIM(r, "<tr>\n"); break;
+        case MD_BLOCK_TH:       render_open_td_block(r, "th", (MD_BLOCK_TD_DETAIL*)detail); break;
+        case MD_BLOCK_TD:       render_open_td_block(r, "td", (MD_BLOCK_TD_DETAIL*)detail); break;
+        case MD_BLOCK_NAV:      RENDER_VERBATIM(r, "<nav id=\"TOC\" role=\"doc-toc\">\n"); break;
+    }
+
+    return 0;
+}
+
+static int
+leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
+{
+    static const MD_CHAR* head[6] = { "</h1>\n", "</h2>\n", "</h3>\n", "</h4>\n", "</h5>\n", "</h6>\n" };
+    MD_HTML* r = (MD_HTML*) userdata;
+
+    switch(type) {
+        case MD_BLOCK_DOC:      /*noop*/ break;
+        case MD_BLOCK_QUOTE:    RENDER_VERBATIM(r, "</blockquote>\n"); break;
+        case MD_BLOCK_UL:       RENDER_VERBATIM(r, "</ul>\n"); break;
+        case MD_BLOCK_OL:       RENDER_VERBATIM(r, "</ol>\n"); break;
+        case MD_BLOCK_LI:       RENDER_VERBATIM(r, "</li>\n"); break;
+        case MD_BLOCK_HR:       /*noop*/ break;
+        case MD_BLOCK_H:        RENDER_VERBATIM(r, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break;
+        case MD_BLOCK_CODE:     RENDER_VERBATIM(r, "</code></pre>\n"); break;
+        case MD_BLOCK_HTML:     /* noop */ break;
+        case MD_BLOCK_P:        RENDER_VERBATIM(r, "</p>\n"); break;
+        case MD_BLOCK_TABLE:    RENDER_VERBATIM(r, "</table>\n"); break;
+        case MD_BLOCK_THEAD:    RENDER_VERBATIM(r, "</thead>\n"); break;
+        case MD_BLOCK_TBODY:    RENDER_VERBATIM(r, "</tbody>\n"); break;
+        case MD_BLOCK_TR:       RENDER_VERBATIM(r, "</tr>\n"); break;
+        case MD_BLOCK_TH:       RENDER_VERBATIM(r, "</th>\n"); break;
+        case MD_BLOCK_TD:       RENDER_VERBATIM(r, "</td>\n"); break;
+        case MD_BLOCK_NAV:      RENDER_VERBATIM(r, "</nav>\n"); break;
+    }
+
+    return 0;
+}
+
+static int
+enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
+{
+    MD_HTML* r = (MD_HTML*) userdata;
+
+    if(r->image_nesting_level > 0) {
+        /* We are inside a Markdown image label. Markdown allows to use any
+         * emphasis and other rich contents in that context similarly as in
+         * any link label.
+         *
+         * However, unlike in the case of links (where that contents becomes
+         * contents of the <a>...</a> tag), in the case of images the contents
+         * is supposed to fall into the attribute alt: <img alt="...">.
+         *
+         * In that context we naturally cannot output nested HTML tags. So lets
+         * suppress them and only output the plain text (i.e. what falls into
+         * text() callback).
+         *
+         * This make-it-a-plain-text approach is the recommended practice by
+         * CommonMark specification (for HTML output).
+         */
+        return 0;
+    }
+
+    switch(type) {
+        case MD_SPAN_EM:                RENDER_VERBATIM(r, "<em>"); break;
+        case MD_SPAN_STRONG:            RENDER_VERBATIM(r, "<strong>"); break;
+        case MD_SPAN_U:                 RENDER_VERBATIM(r, "<u>"); break;
+        case MD_SPAN_A:                 render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break;
+        case MD_SPAN_IMG:               render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break;
+        case MD_SPAN_CODE:              RENDER_VERBATIM(r, "<code>"); break;
+        case MD_SPAN_DEL:               RENDER_VERBATIM(r, "<del>"); break;
+        case MD_SPAN_LATEXMATH:         RENDER_VERBATIM(r, "<x-equation>"); break;
+        case MD_SPAN_LATEXMATH_DISPLAY: RENDER_VERBATIM(r, "<x-equation type=\"display\">"); break;
+        case MD_SPAN_WIKILINK:          render_open_wikilink_span(r, (MD_SPAN_WIKILINK_DETAIL*) detail); break;
+    }
+
+    return 0;
+}
+
+static int
+leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
+{
+    MD_HTML* r = (MD_HTML*) userdata;
+
+    if(r->image_nesting_level > 0) {
+        /* Ditto as in enter_span_callback(), except we have to allow the
+         * end of the <img> tag. */
+        if(r->image_nesting_level == 1  &&  type == MD_SPAN_IMG)
+            render_close_img_span(r, (MD_SPAN_IMG_DETAIL*) detail);
+        return 0;
+    }
+
+    switch(type) {
+        case MD_SPAN_EM:                RENDER_VERBATIM(r, "</em>"); break;
+        case MD_SPAN_STRONG:            RENDER_VERBATIM(r, "</strong>"); break;
+        case MD_SPAN_U:                 RENDER_VERBATIM(r, "</u>"); break;
+        case MD_SPAN_A:                 RENDER_VERBATIM(r, "</a>"); break;
+        case MD_SPAN_IMG:               /*noop, handled above*/ break;
+        case MD_SPAN_CODE:              RENDER_VERBATIM(r, "</code>"); break;
+        case MD_SPAN_DEL:               RENDER_VERBATIM(r, "</del>"); break;
+        case MD_SPAN_LATEXMATH:         /*fall through*/
+        case MD_SPAN_LATEXMATH_DISPLAY: RENDER_VERBATIM(r, "</x-equation>"); break;
+        case MD_SPAN_WIKILINK:          RENDER_VERBATIM(r, "</x-wikilink>"); break;
+    }
+
+    return 0;
+}
+
+static int
+text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata)
+{
+    MD_HTML* r = (MD_HTML*) userdata;
+
+    switch(type) {
+        case MD_TEXT_NULLCHAR:  render_utf8_codepoint(r, 0x0000, render_verbatim); break;
+        case MD_TEXT_BR:        RENDER_VERBATIM(r, (r->image_nesting_level == 0
+                                        ? ((r->flags & MD_HTML_FLAG_XHTML) ? "<br />\n" : "<br>\n")
+                                        : " "));
+                                break;
+        case MD_TEXT_SOFTBR:    RENDER_VERBATIM(r, (r->image_nesting_level == 0 ? "\n" : " ")); break;
+        case MD_TEXT_HTML:      render_verbatim(r, text, size); break;
+        case MD_TEXT_ENTITY:    render_entity(r, text, size, render_html_escaped); break;
+        default:                render_html_escaped(r, text, size); break;
+    }
+
+    return 0;
+}
+
+static void
+debug_log_callback(const char* msg, void* userdata)
+{
+    MD_HTML* r = (MD_HTML*) userdata;
+    if(r->flags & MD_HTML_FLAG_DEBUG)
+        fprintf(stderr, "MD4C: %s\n", msg);
+}
+
+int
+md_html(const MD_CHAR* input, MD_SIZE input_size,
+        void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
+        void* userdata, unsigned parser_flags, unsigned renderer_flags,
+        MD_TOC_OPTIONS* toc_options)
+{
+    MD_HTML render = { process_output, userdata, renderer_flags, 0, { 0 } };
+    int i;
+
+    MD_PARSER parser = {
+        1,
+        parser_flags,
+        enter_block_callback,
+        leave_block_callback,
+        enter_span_callback,
+        leave_span_callback,
+        text_callback,
+        debug_log_callback,
+        *toc_options,
+        NULL
+    };
+
+    /* Build map of characters which need escaping. */
+    for(i = 0; i < 256; i++) {
+        unsigned char ch = (unsigned char) i;
+
+        if(strchr("\"&<>", ch) != NULL)
+            render.escape_map[i] |= NEED_HTML_ESC_FLAG;
+
+        if(!ISALNUM(ch)  &&  strchr("~-_.+!*(),%#@?=;:/,+$", ch) == NULL)
+            render.escape_map[i] |= NEED_URL_ESC_FLAG;
+    }
+
+    /* Consider skipping UTF-8 byte order mark (BOM). */
+    if(renderer_flags & MD_HTML_FLAG_SKIP_UTF8_BOM  &&  sizeof(MD_CHAR) == 1) {
+        static const MD_CHAR bom[3] = { 0xef, 0xbb, 0xbf };
+        if(input_size >= sizeof(bom)  &&  memcmp(input, bom, sizeof(bom)) == 0) {
+            input += sizeof(bom);
+            input_size -= sizeof(bom);
+        }
+    }
+
+    return md_parse(input, input_size, &parser, (void*) &render);
+}
+

+ 71 - 0
markdown.mod/md4c/src/md4c-html.h

@@ -0,0 +1,71 @@
+/*
+ * MD4C: Markdown parser for C
+ * (http://github.com/mity/md4c)
+ *
+ * Copyright (c) 2016-2017 Martin Mitas
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef MD4C_HTML_H
+#define MD4C_HTML_H
+
+#include "md4c.h"
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+
+/* If set, debug output from md_parse() is sent to stderr. */
+#define MD_HTML_FLAG_DEBUG                  0x0001
+#define MD_HTML_FLAG_VERBATIM_ENTITIES      0x0002
+#define MD_HTML_FLAG_SKIP_UTF8_BOM          0x0004
+#define MD_HTML_FLAG_XHTML                  0x0008
+
+
+/* Render Markdown into HTML.
+ *
+ * Note only contents of <body> tag is generated. Caller must generate
+ * HTML header/footer manually before/after calling md_html().
+ *
+ * Params input and input_size specify the Markdown input.
+ * Callback process_output() gets called with chunks of HTML output.
+ * (Typical implementation may just output the bytes to a file or append to
+ * some buffer).
+ * Param userdata is just propagated back to process_output() callback.
+ * Param parser_flags are flags from md4c.h propagated to md_parse().
+ * Param render_flags is bitmask of MD_HTML_FLAG_xxxx.
+ * Param toc_options is a pointer to toc options from md4c.h propagated to md_parse().
+ *
+ * Returns -1 on error (if md_parse() fails.)
+ * Returns 0 on success.
+ */
+int md_html(const MD_CHAR* input, MD_SIZE input_size,
+            void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
+            void* userdata, unsigned parser_flags, unsigned renderer_flags,
+            MD_TOC_OPTIONS* toc_options 
+            );
+
+
+#ifdef __cplusplus
+    }  /* extern "C" { */
+#endif
+
+#endif  /* MD4C_HTML_H */

+ 13 - 0
markdown.mod/md4c/src/md4c-html.pc.in

@@ -0,0 +1,13 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=@CMAKE_INSTALL_PREFIX@
+libdir=${exec_prefix}/@CMAKE_INSTALL_LIBDIR@
+includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
+
+Name: @PROJECT_NAME@ HTML renderer
+Description: Markdown to HTML converter library.
+Version: @PROJECT_VERSION@
+URL: @PROJECT_URL@
+
+Requires: md4c = @PROJECT_VERSION@
+Libs: -L${libdir} -lmd4c-html
+Cflags: -I${includedir}

+ 7240 - 0
markdown.mod/md4c/src/md4c.c

@@ -0,0 +1,7240 @@
+/*
+ * MD4C: Markdown parser for C
+ * (http://github.com/mity/md4c)
+ *
+ * Copyright (c) 2016-2020 Martin Mitas
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "md4c.h"
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+/*****************************
+ ***  Miscellaneous Stuff  ***
+ *****************************/
+
+#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199409L
+    /* C89/90 or old compilers in general may not understand "inline". */
+    #if defined __GNUC__
+        #define inline __inline__
+    #elif defined _MSC_VER
+        #define inline __inline
+    #else
+        #define inline
+    #endif
+#endif
+
+/* Make the UTF-8 support the default. */
+#if !defined MD4C_USE_ASCII && !defined MD4C_USE_UTF8 && !defined MD4C_USE_UTF16
+    #define MD4C_USE_UTF8
+#endif
+
+/* Magic for making wide literals with MD4C_USE_UTF16. */
+#ifdef _T
+    #undef _T
+#endif
+#if defined MD4C_USE_UTF16
+    #define _T(x)           L##x
+#else
+    #define _T(x)           x
+#endif
+
+/* Misc. macros. */
+#define SIZEOF_ARRAY(a)     (sizeof(a) / sizeof(a[0]))
+
+#define STRINGIZE_(x)       #x
+#define STRINGIZE(x)        STRINGIZE_(x)
+
+#ifndef TRUE
+    #define TRUE            1
+    #define FALSE           0
+#endif
+
+#define MD_LOG(msg)                                                     \
+    do {                                                                \
+        if(ctx->parser.debug_log != NULL)                               \
+            ctx->parser.debug_log((msg), ctx->userdata);                \
+    } while(0)
+
+#ifdef DEBUG
+    #define MD_ASSERT(cond)                                             \
+            do {                                                        \
+                if(!(cond)) {                                           \
+                    MD_LOG(__FILE__ ":" STRINGIZE(__LINE__) ": "        \
+                           "Assertion '" STRINGIZE(cond) "' failed.");  \
+                    exit(1);                                            \
+                }                                                       \
+            } while(0)
+
+    #define MD_UNREACHABLE()        MD_ASSERT(1 == 0)
+#else
+    #ifdef __GNUC__
+        #define MD_ASSERT(cond)     do { if(!(cond)) __builtin_unreachable(); } while(0)
+        #define MD_UNREACHABLE()    do { __builtin_unreachable(); } while(0)
+    #elif defined _MSC_VER  &&  _MSC_VER > 120
+        #define MD_ASSERT(cond)     do { __assume(cond); } while(0)
+        #define MD_UNREACHABLE()    do { __assume(0); } while(0)
+    #else
+        #define MD_ASSERT(cond)     do {} while(0)
+        #define MD_UNREACHABLE()    do {} while(0)
+    #endif
+#endif
+
+/* For falling through case labels in switch statements. */
+#if defined __clang__ && __clang_major__ >= 12
+    #define MD_FALLTHROUGH()        __attribute__((fallthrough))
+#elif defined __GNUC__ && __GNUC__ >= 7
+    #define MD_FALLTHROUGH()        __attribute__((fallthrough))
+#else
+    #define MD_FALLTHROUGH()        ((void)0)
+#endif
+
+/* Suppress "unused parameter" warnings. */
+#define MD_UNUSED(x)                ((void)x)
+
+
+/************************
+ ***  Internal Types  ***
+ ************************/
+
+/* These are omnipresent so lets save some typing. */
+#define CHAR    MD_CHAR
+#define SZ      MD_SIZE
+#define OFF     MD_OFFSET
+
+typedef struct MD_MARK_tag MD_MARK;
+typedef struct MD_BLOCK_tag MD_BLOCK;
+typedef struct MD_CONTAINER_tag MD_CONTAINER;
+typedef struct MD_REF_DEF_tag MD_REF_DEF;
+typedef struct MD_HEADING_DEF_tag MD_HEADING_DEF;
+
+/* During analyzes of inline marks, we need to manage some "mark chains",
+ * of (yet unresolved) openers. This structure holds start/end of the chain.
+ * The chain internals are then realized through MD_MARK::prev and ::next.
+ */
+typedef struct MD_MARKCHAIN_tag MD_MARKCHAIN;
+struct MD_MARKCHAIN_tag {
+    int head;   /* Index of first mark in the chain, or -1 if empty. */
+    int tail;   /* Index of last mark in the chain, or -1 if empty. */
+};
+
+/* Context propagated through all the parsing. */
+typedef struct MD_CTX_tag MD_CTX;
+struct MD_CTX_tag {
+    /* Immutable stuff (parameters of md_parse()). */
+    const CHAR* text;
+    SZ size;
+    MD_PARSER parser;
+    void* userdata;
+
+    /* When this is true, it allows some optimizations. */
+    int doc_ends_with_newline;
+
+    /* Helper temporary growing buffer. */
+    CHAR* buffer;
+    unsigned alloc_buffer;
+
+    /* Reference definitions. */
+    MD_REF_DEF* ref_defs;
+    int n_ref_defs;
+    int alloc_ref_defs;
+    void** ref_def_hashtable;
+    int ref_def_hashtable_size;
+
+    /* Heading definitions. */
+    MD_HEADING_DEF* heading_defs;
+    int n_heading_defs;
+    int alloc_heading_defs;
+    void** heading_def_hashtable;
+    int heading_def_hashtable_size;
+    /* autogenerated identifiers for heading */
+    CHAR* identifiers;
+    SZ identifiers_size;
+    SZ alloc_identifiers;
+
+    /* Toc informations */
+    int toc_found;
+
+    /* Stack of inline/span markers.
+     * This is only used for parsing a single block contents but by storing it
+     * here we may reuse the stack for subsequent blocks; i.e. we have fewer
+     * (re)allocations. */
+    MD_MARK* marks;
+    int n_marks;
+    int alloc_marks;
+
+#if defined MD4C_USE_UTF16
+    char mark_char_map[128];
+#else
+    char mark_char_map[256];
+#endif
+
+    /* For resolving of inline spans. */
+    MD_MARKCHAIN mark_chains[13];
+#define PTR_CHAIN                               (ctx->mark_chains[0])
+#define TABLECELLBOUNDARIES                     (ctx->mark_chains[1])
+#define ASTERISK_OPENERS_extraword_mod3_0       (ctx->mark_chains[2])
+#define ASTERISK_OPENERS_extraword_mod3_1       (ctx->mark_chains[3])
+#define ASTERISK_OPENERS_extraword_mod3_2       (ctx->mark_chains[4])
+#define ASTERISK_OPENERS_intraword_mod3_0       (ctx->mark_chains[5])
+#define ASTERISK_OPENERS_intraword_mod3_1       (ctx->mark_chains[6])
+#define ASTERISK_OPENERS_intraword_mod3_2       (ctx->mark_chains[7])
+#define UNDERSCORE_OPENERS                      (ctx->mark_chains[8])
+#define TILDE_OPENERS_1                         (ctx->mark_chains[9])
+#define TILDE_OPENERS_2                         (ctx->mark_chains[10])
+#define BRACKET_OPENERS                         (ctx->mark_chains[11])
+#define DOLLAR_OPENERS                          (ctx->mark_chains[12])
+#define OPENERS_CHAIN_FIRST                     1
+#define OPENERS_CHAIN_LAST                      12
+
+    int n_table_cell_boundaries;
+
+    /* For resolving links. */
+    int unresolved_link_head;
+    int unresolved_link_tail;
+
+    /* For resolving raw HTML. */
+    OFF html_comment_horizon;
+    OFF html_proc_instr_horizon;
+    OFF html_decl_horizon;
+    OFF html_cdata_horizon;
+
+    /* For block analysis.
+     * Notes:
+     *   -- It holds MD_BLOCK as well as MD_LINE structures. After each
+     *      MD_BLOCK, its (multiple) MD_LINE(s) follow.
+     *   -- For MD_BLOCK_HTML and MD_BLOCK_CODE, MD_VERBATIMLINE(s) are used
+     *      instead of MD_LINE(s).
+     */
+    void* block_bytes;
+    MD_BLOCK* current_block;
+    int n_block_bytes;
+    int alloc_block_bytes;
+
+    /* For container block analysis. */
+    MD_CONTAINER* containers;
+    int n_containers;
+    int alloc_containers;
+
+    /* Minimal indentation to call the block "indented code block". */
+    unsigned code_indent_offset;
+
+    /* Contextual info for line analysis. */
+    SZ code_fence_length;   /* For checking closing fence length. */
+    int html_block_type;    /* For checking closing raw HTML condition. */
+    int last_line_has_list_loosening_effect;
+    int last_list_item_starts_with_two_blank_lines;
+};
+
+enum MD_LINETYPE_tag {
+    MD_LINE_BLANK,
+    MD_LINE_HR,
+    MD_LINE_ATXHEADER,
+    MD_LINE_SETEXTHEADER,
+    MD_LINE_SETEXTUNDERLINE,
+    MD_LINE_INDENTEDCODE,
+    MD_LINE_FENCEDCODE,
+    MD_LINE_HTML,
+    MD_LINE_TEXT,
+    MD_LINE_TABLE,
+    MD_LINE_TABLEUNDERLINE,
+    MD_LINE_TOC
+};
+typedef enum MD_LINETYPE_tag MD_LINETYPE;
+
+typedef struct MD_LINE_ANALYSIS_tag MD_LINE_ANALYSIS;
+struct MD_LINE_ANALYSIS_tag {
+    MD_LINETYPE type    : 16;
+    unsigned data       : 16;
+    OFF beg;
+    OFF end;
+    unsigned indent;        /* Indentation level. */
+};
+
+typedef struct MD_LINE_tag MD_LINE;
+struct MD_LINE_tag {
+    OFF beg;
+    OFF end;
+};
+
+typedef struct MD_VERBATIMLINE_tag MD_VERBATIMLINE;
+struct MD_VERBATIMLINE_tag {
+    OFF beg;
+    OFF end;
+    OFF indent;
+};
+
+
+/*****************
+ ***  Helpers  ***
+ *****************/
+
+/* Character accessors. */
+#define CH(off)                 (ctx->text[(off)])
+#define STR(off)                (ctx->text + (off))
+
+/* Character classification.
+ * Note we assume ASCII compatibility of code points < 128 here. */
+#define ISIN_(ch, ch_min, ch_max)       ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max))
+#define ISANYOF_(ch, palette)           ((ch) != _T('\0')  &&  md_strchr((palette), (ch)) != NULL)
+#define ISANYOF2_(ch, ch1, ch2)         ((ch) == (ch1) || (ch) == (ch2))
+#define ISANYOF3_(ch, ch1, ch2, ch3)    ((ch) == (ch1) || (ch) == (ch2) || (ch) == (ch3))
+#define ISASCII_(ch)                    ((unsigned)(ch) <= 127)
+#define ISBLANK_(ch)                    (ISANYOF2_((ch), _T(' '), _T('\t')))
+#define ISNEWLINE_(ch)                  (ISANYOF2_((ch), _T('\r'), _T('\n')))
+#define ISWHITESPACE_(ch)               (ISBLANK_(ch) || ISANYOF2_((ch), _T('\v'), _T('\f')))
+#define ISCNTRL_(ch)                    ((unsigned)(ch) <= 31 || (unsigned)(ch) == 127)
+#define ISPUNCT_(ch)                    (ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126))
+#define ISSYMBOL_(ch)                   (ISANYOF3_(ch, _T('+'), _T('|'), _T('~')) || ISIN_(ch, 60, 62))
+#define ISUPPER_(ch)                    (ISIN_(ch, _T('A'), _T('Z')))
+#define ISLOWER_(ch)                    (ISIN_(ch, _T('a'), _T('z')))
+#define ISALPHA_(ch)                    (ISUPPER_(ch) || ISLOWER_(ch))
+#define ISDIGIT_(ch)                    (ISIN_(ch, _T('0'), _T('9')))
+#define ISXDIGIT_(ch)                   (ISDIGIT_(ch) || ISIN_(ch, _T('A'), _T('F')) || ISIN_(ch, _T('a'), _T('f')))
+#define ISALNUM_(ch)                    (ISALPHA_(ch) || ISDIGIT_(ch))
+
+#define ISANYOF(off, palette)           ISANYOF_(CH(off), (palette))
+#define ISANYOF2(off, ch1, ch2)         ISANYOF2_(CH(off), (ch1), (ch2))
+#define ISANYOF3(off, ch1, ch2, ch3)    ISANYOF3_(CH(off), (ch1), (ch2), (ch3))
+#define ISASCII(off)                    ISASCII_(CH(off))
+#define ISBLANK(off)                    ISBLANK_(CH(off))
+#define ISNEWLINE(off)                  ISNEWLINE_(CH(off))
+#define ISWHITESPACE(off)               ISWHITESPACE_(CH(off))
+#define ISCNTRL(off)                    ISCNTRL_(CH(off))
+#define ISPUNCT(off)                    ISPUNCT_(CH(off))
+#define ISSYMBOL(off)                   ISSYMBOL_(CH(off))
+#define ISUPPER(off)                    ISUPPER_(CH(off))
+#define ISLOWER(off)                    ISLOWER_(CH(off))
+#define ISALPHA(off)                    ISALPHA_(CH(off))
+#define ISDIGIT(off)                    ISDIGIT_(CH(off))
+#define ISXDIGIT(off)                   ISXDIGIT_(CH(off))
+#define ISALNUM(off)                    ISALNUM_(CH(off))
+
+
+#if defined MD4C_USE_UTF16
+    #define md_strchr wcschr
+#else
+    #define md_strchr strchr
+#endif
+
+
+/* Case insensitive check of string equality. */
+static inline int
+md_ascii_case_eq(const CHAR* s1, const CHAR* s2, SZ n)
+{
+    OFF i;
+    for(i = 0; i < n; i++) {
+        CHAR ch1 = s1[i];
+        CHAR ch2 = s2[i];
+
+        if(ISLOWER_(ch1))
+            ch1 += ('A'-'a');
+        if(ISLOWER_(ch2))
+            ch2 += ('A'-'a');
+        if(ch1 != ch2)
+            return FALSE;
+    }
+    return TRUE;
+}
+
+static inline int
+md_ascii_eq(const CHAR* s1, const CHAR* s2, SZ n)
+{
+    return memcmp(s1, s2, n * sizeof(CHAR)) == 0;
+}
+
+static int
+md_text_with_null_replacement(MD_CTX* ctx, MD_TEXTTYPE type, const CHAR* str, SZ size)
+{
+    OFF off = 0;
+    int ret = 0;
+
+    while(1) {
+        while(off < size  &&  str[off] != _T('\0'))
+            off++;
+
+        if(off > 0) {
+            ret = ctx->parser.text(type, str, off, ctx->userdata);
+            if(ret != 0)
+                return ret;
+
+            str += off;
+            size -= off;
+            off = 0;
+        }
+
+        if(off >= size)
+            return 0;
+
+        ret = ctx->parser.text(MD_TEXT_NULLCHAR, _T(""), 1, ctx->userdata);
+        if(ret != 0)
+            return ret;
+        off++;
+    }
+}
+
+
+#define MD_CHECK(func)                                                      \
+    do {                                                                    \
+        ret = (func);                                                       \
+        if(ret < 0)                                                         \
+            goto abort;                                                     \
+    } while(0)
+
+
+#define MD_TEMP_BUFFER(sz)                                                  \
+    do {                                                                    \
+        if(sz > ctx->alloc_buffer) {                                        \
+            CHAR* new_buffer;                                               \
+            SZ new_size = ((sz) + (sz) / 2 + 128) & ~127;                   \
+                                                                            \
+            new_buffer = realloc(ctx->buffer, new_size);                    \
+            if(new_buffer == NULL) {                                        \
+                MD_LOG("realloc() failed.");                                \
+                ret = -1;                                                   \
+                goto abort;                                                 \
+            }                                                               \
+                                                                            \
+            ctx->buffer = new_buffer;                                       \
+            ctx->alloc_buffer = new_size;                                   \
+        }                                                                   \
+    } while(0)
+
+
+#define MD_ENTER_BLOCK(type, arg)                                           \
+    do {                                                                    \
+        ret = ctx->parser.enter_block((type), (arg), ctx->userdata);        \
+        if(ret != 0) {                                                      \
+            MD_LOG("Aborted from enter_block() callback.");                 \
+            goto abort;                                                     \
+        }                                                                   \
+    } while(0)
+
+#define MD_LEAVE_BLOCK(type, arg)                                           \
+    do {                                                                    \
+        ret = ctx->parser.leave_block((type), (arg), ctx->userdata);        \
+        if(ret != 0) {                                                      \
+            MD_LOG("Aborted from leave_block() callback.");                 \
+            goto abort;                                                     \
+        }                                                                   \
+    } while(0)
+
+#define MD_ENTER_SPAN(type, arg)                                            \
+    do {                                                                    \
+        ret = ctx->parser.enter_span((type), (arg), ctx->userdata);         \
+        if(ret != 0) {                                                      \
+            MD_LOG("Aborted from enter_span() callback.");                  \
+            goto abort;                                                     \
+        }                                                                   \
+    } while(0)
+
+#define MD_LEAVE_SPAN(type, arg)                                            \
+    do {                                                                    \
+        ret = ctx->parser.leave_span((type), (arg), ctx->userdata);         \
+        if(ret != 0) {                                                      \
+            MD_LOG("Aborted from leave_span() callback.");                  \
+            goto abort;                                                     \
+        }                                                                   \
+    } while(0)
+
+#define MD_TEXT(type, str, size)                                            \
+    do {                                                                    \
+        if(size > 0) {                                                      \
+            ret = ctx->parser.text((type), (str), (size), ctx->userdata);   \
+            if(ret != 0) {                                                  \
+                MD_LOG("Aborted from text() callback.");                    \
+                goto abort;                                                 \
+            }                                                               \
+        }                                                                   \
+    } while(0)
+
+#define MD_TEXT_INSECURE(type, str, size)                                   \
+    do {                                                                    \
+        if(size > 0) {                                                      \
+            ret = md_text_with_null_replacement(ctx, type, str, size);      \
+            if(ret != 0) {                                                  \
+                MD_LOG("Aborted from text() callback.");                    \
+                goto abort;                                                 \
+            }                                                               \
+        }                                                                   \
+    } while(0)
+
+
+/* If the offset falls into a gap between line, we return the following
+ * line. */
+static const MD_LINE*
+md_lookup_line(OFF off, const MD_LINE* lines, int n_lines)
+{
+    int lo, hi;
+    int pivot;
+    const MD_LINE* line;
+
+    lo = 0;
+    hi = n_lines - 1;
+    while(lo <= hi) {
+        pivot = (lo + hi) / 2;
+        line = &lines[pivot];
+
+        if(off < line->beg) {
+            hi = pivot - 1;
+            if(hi < 0  ||  lines[hi].end <= off)
+                return line;
+        } else if(off > line->end) {
+            lo = pivot + 1;
+        } else {
+            return line;
+        }
+    }
+
+    return NULL;
+}
+
+
+/*************************
+ ***  Unicode Support  ***
+ *************************/
+
+typedef struct MD_UNICODE_FOLD_INFO_tag MD_UNICODE_FOLD_INFO;
+struct MD_UNICODE_FOLD_INFO_tag {
+    unsigned codepoints[3];
+    unsigned n_codepoints;
+};
+
+
+#if defined MD4C_USE_UTF16 || defined MD4C_USE_UTF8
+    /* Binary search over sorted "map" of codepoints. Consecutive sequences
+     * of codepoints may be encoded in the map by just using the
+     * (MIN_CODEPOINT | 0x40000000) and (MAX_CODEPOINT | 0x80000000).
+     *
+     * Returns index of the found record in the map (in the case of ranges,
+     * the minimal value is used); or -1 on failure. */
+    static int
+    md_unicode_bsearch__(unsigned codepoint, const unsigned* map, size_t map_size)
+    {
+        int beg, end;
+        int pivot_beg, pivot_end;
+
+        beg = 0;
+        end = (int) map_size-1;
+        while(beg <= end) {
+            /* Pivot may be a range, not just a single value. */
+            pivot_beg = pivot_end = (beg + end) / 2;
+            if(map[pivot_end] & 0x40000000)
+                pivot_end++;
+            if(map[pivot_beg] & 0x80000000)
+                pivot_beg--;
+
+            if(codepoint < (map[pivot_beg] & 0x00ffffff))
+                end = pivot_beg - 1;
+            else if(codepoint > (map[pivot_end] & 0x00ffffff))
+                beg = pivot_end + 1;
+            else
+                return pivot_beg;
+        }
+
+        return -1;
+    }
+
+    static int
+    md_is_unicode_whitespace__(unsigned codepoint)
+    {
+#define R(cp_min, cp_max)   ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
+#define S(cp)               (cp)
+        /* Unicode "Zs" category.
+         * (generated by scripts/build_whitespace_map.py) */
+        static const unsigned WHITESPACE_MAP[] = {
+            S(0x0020), S(0x00a0), S(0x1680), R(0x2000,0x200a), S(0x202f), S(0x205f), S(0x3000)
+        };
+#undef R
+#undef S
+
+        /* The ASCII ones are the most frequently used ones, also CommonMark
+         * specification requests few more in this range. */
+        if(codepoint <= 0x7f)
+            return ISWHITESPACE_(codepoint);
+
+        return (md_unicode_bsearch__(codepoint, WHITESPACE_MAP, SIZEOF_ARRAY(WHITESPACE_MAP)) >= 0);
+    }
+
+    static int
+    md_is_unicode_punct__(unsigned codepoint)
+    {
+#define R(cp_min, cp_max)   ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
+#define S(cp)               (cp)
+        /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories.
+         * (generated by scripts/build_punct_map.py) */
+        static const unsigned PUNCT_MAP[] = {
+            R(0x0021,0x0023), R(0x0025,0x002a), R(0x002c,0x002f), R(0x003a,0x003b), R(0x003f,0x0040),
+            R(0x005b,0x005d), S(0x005f), S(0x007b), S(0x007d), S(0x00a1), S(0x00a7), S(0x00ab), R(0x00b6,0x00b7),
+            S(0x00bb), S(0x00bf), S(0x037e), S(0x0387), R(0x055a,0x055f), R(0x0589,0x058a), S(0x05be), S(0x05c0),
+            S(0x05c3), S(0x05c6), R(0x05f3,0x05f4), R(0x0609,0x060a), R(0x060c,0x060d), S(0x061b), R(0x061e,0x061f),
+            R(0x066a,0x066d), S(0x06d4), R(0x0700,0x070d), R(0x07f7,0x07f9), R(0x0830,0x083e), S(0x085e),
+            R(0x0964,0x0965), S(0x0970), S(0x09fd), S(0x0a76), S(0x0af0), S(0x0c77), S(0x0c84), S(0x0df4), S(0x0e4f),
+            R(0x0e5a,0x0e5b), R(0x0f04,0x0f12), S(0x0f14), R(0x0f3a,0x0f3d), S(0x0f85), R(0x0fd0,0x0fd4),
+            R(0x0fd9,0x0fda), R(0x104a,0x104f), S(0x10fb), R(0x1360,0x1368), S(0x1400), S(0x166e), R(0x169b,0x169c),
+            R(0x16eb,0x16ed), R(0x1735,0x1736), R(0x17d4,0x17d6), R(0x17d8,0x17da), R(0x1800,0x180a),
+            R(0x1944,0x1945), R(0x1a1e,0x1a1f), R(0x1aa0,0x1aa6), R(0x1aa8,0x1aad), R(0x1b5a,0x1b60),
+            R(0x1bfc,0x1bff), R(0x1c3b,0x1c3f), R(0x1c7e,0x1c7f), R(0x1cc0,0x1cc7), S(0x1cd3), R(0x2010,0x2027),
+            R(0x2030,0x2043), R(0x2045,0x2051), R(0x2053,0x205e), R(0x207d,0x207e), R(0x208d,0x208e),
+            R(0x2308,0x230b), R(0x2329,0x232a), R(0x2768,0x2775), R(0x27c5,0x27c6), R(0x27e6,0x27ef),
+            R(0x2983,0x2998), R(0x29d8,0x29db), R(0x29fc,0x29fd), R(0x2cf9,0x2cfc), R(0x2cfe,0x2cff), S(0x2d70),
+            R(0x2e00,0x2e2e), R(0x2e30,0x2e4f), S(0x2e52), R(0x3001,0x3003), R(0x3008,0x3011), R(0x3014,0x301f),
+            S(0x3030), S(0x303d), S(0x30a0), S(0x30fb), R(0xa4fe,0xa4ff), R(0xa60d,0xa60f), S(0xa673), S(0xa67e),
+            R(0xa6f2,0xa6f7), R(0xa874,0xa877), R(0xa8ce,0xa8cf), R(0xa8f8,0xa8fa), S(0xa8fc), R(0xa92e,0xa92f),
+            S(0xa95f), R(0xa9c1,0xa9cd), R(0xa9de,0xa9df), R(0xaa5c,0xaa5f), R(0xaade,0xaadf), R(0xaaf0,0xaaf1),
+            S(0xabeb), R(0xfd3e,0xfd3f), R(0xfe10,0xfe19), R(0xfe30,0xfe52), R(0xfe54,0xfe61), S(0xfe63), S(0xfe68),
+            R(0xfe6a,0xfe6b), R(0xff01,0xff03), R(0xff05,0xff0a), R(0xff0c,0xff0f), R(0xff1a,0xff1b),
+            R(0xff1f,0xff20), R(0xff3b,0xff3d), S(0xff3f), S(0xff5b), S(0xff5d), R(0xff5f,0xff65), R(0x10100,0x10102),
+            S(0x1039f), S(0x103d0), S(0x1056f), S(0x10857), S(0x1091f), S(0x1093f), R(0x10a50,0x10a58), S(0x10a7f),
+            R(0x10af0,0x10af6), R(0x10b39,0x10b3f), R(0x10b99,0x10b9c), S(0x10ead), R(0x10f55,0x10f59),
+            R(0x11047,0x1104d), R(0x110bb,0x110bc), R(0x110be,0x110c1), R(0x11140,0x11143), R(0x11174,0x11175),
+            R(0x111c5,0x111c8), S(0x111cd), S(0x111db), R(0x111dd,0x111df), R(0x11238,0x1123d), S(0x112a9),
+            R(0x1144b,0x1144f), R(0x1145a,0x1145b), S(0x1145d), S(0x114c6), R(0x115c1,0x115d7), R(0x11641,0x11643),
+            R(0x11660,0x1166c), R(0x1173c,0x1173e), S(0x1183b), R(0x11944,0x11946), S(0x119e2), R(0x11a3f,0x11a46),
+            R(0x11a9a,0x11a9c), R(0x11a9e,0x11aa2), R(0x11c41,0x11c45), R(0x11c70,0x11c71), R(0x11ef7,0x11ef8),
+            S(0x11fff), R(0x12470,0x12474), R(0x16a6e,0x16a6f), S(0x16af5), R(0x16b37,0x16b3b), S(0x16b44),
+            R(0x16e97,0x16e9a), S(0x16fe2), S(0x1bc9f), R(0x1da87,0x1da8b), R(0x1e95e,0x1e95f)
+        };
+#undef R
+#undef S
+
+        /* The ASCII ones are the most frequently used ones, also CommonMark
+         * specification requests few more in this range. */
+        if(codepoint <= 0x7f)
+            return ISPUNCT_(codepoint);
+
+        return (md_unicode_bsearch__(codepoint, PUNCT_MAP, SIZEOF_ARRAY(PUNCT_MAP)) >= 0);
+    }
+
+    static int
+    md_is_unicode_symbol__(unsigned codepoint)
+    {
+#define R(cp_min, cp_max)   ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
+#define S(cp)               (cp)
+        /* Unicode "Sm", "Sc", "Sk", "So" categories.
+         * (generated by scripts/build_symbol_map.py) */
+        static const unsigned SYMBOL_MAP[] = {
+            S(0x0024), S(0x002b), R(0x003c,0x003e), S(0x005e), S(0x0060), S(0x007c), S(0x007e), R(0x00a2,0x00a6),
+            R(0x00a8,0x00a9), S(0x00ac), R(0x00ae,0x00b1), S(0x00b4), S(0x00b8), S(0x00d7), S(0x00f7),
+            R(0x02c2,0x02c5), R(0x02d2,0x02df), R(0x02e5,0x02eb), S(0x02ed), R(0x02ef,0x02ff), S(0x0375),
+            R(0x0384,0x0385), S(0x03f6), S(0x0482), R(0x058d,0x058f), R(0x0606,0x0608), S(0x060b), R(0x060e,0x060f),
+            S(0x06de), S(0x06e9), R(0x06fd,0x06fe), S(0x07f6), R(0x07fe,0x07ff), R(0x09f2,0x09f3), R(0x09fa,0x09fb),
+            S(0x0af1), S(0x0b70), R(0x0bf3,0x0bfa), S(0x0c7f), S(0x0d4f), S(0x0d79), S(0x0e3f), R(0x0f01,0x0f03),
+            S(0x0f13), R(0x0f15,0x0f17), R(0x0f1a,0x0f1f), S(0x0f34), S(0x0f36), S(0x0f38), R(0x0fbe,0x0fc5),
+            R(0x0fc7,0x0fcc), R(0x0fce,0x0fcf), R(0x0fd5,0x0fd8), R(0x109e,0x109f), R(0x1390,0x1399), S(0x166d),
+            S(0x17db), S(0x1940), R(0x19de,0x19ff), R(0x1b61,0x1b6a), R(0x1b74,0x1b7c), S(0x1fbd), R(0x1fbf,0x1fc1),
+            R(0x1fcd,0x1fcf), R(0x1fdd,0x1fdf), R(0x1fed,0x1fef), R(0x1ffd,0x1ffe), S(0x2044), S(0x2052),
+            R(0x207a,0x207c), R(0x208a,0x208c), R(0x20a0,0x20bf), R(0x2100,0x2101), R(0x2103,0x2106),
+            R(0x2108,0x2109), S(0x2114), R(0x2116,0x2118), R(0x211e,0x2123), S(0x2125), S(0x2127), S(0x2129),
+            S(0x212e), R(0x213a,0x213b), R(0x2140,0x2144), R(0x214a,0x214d), S(0x214f), R(0x218a,0x218b),
+            R(0x2190,0x2307), R(0x230c,0x2328), R(0x232b,0x2426), R(0x2440,0x244a), R(0x249c,0x24e9),
+            R(0x2500,0x2767), R(0x2794,0x27c4), R(0x27c7,0x27e5), R(0x27f0,0x2982), R(0x2999,0x29d7),
+            R(0x29dc,0x29fb), R(0x29fe,0x2b73), R(0x2b76,0x2b95), R(0x2b97,0x2bff), R(0x2ce5,0x2cea),
+            R(0x2e50,0x2e51), R(0x2e80,0x2e99), R(0x2e9b,0x2ef3), R(0x2f00,0x2fd5), R(0x2ff0,0x2ffb), S(0x3004),
+            R(0x3012,0x3013), S(0x3020), R(0x3036,0x3037), R(0x303e,0x303f), R(0x309b,0x309c), R(0x3190,0x3191),
+            R(0x3196,0x319f), R(0x31c0,0x31e3), R(0x3200,0x321e), R(0x322a,0x3247), S(0x3250), R(0x3260,0x327f),
+            R(0x328a,0x32b0), R(0x32c0,0x33ff), R(0x4dc0,0x4dff), R(0xa490,0xa4c6), R(0xa700,0xa716),
+            R(0xa720,0xa721), R(0xa789,0xa78a), R(0xa828,0xa82b), R(0xa836,0xa839), R(0xaa77,0xaa79), S(0xab5b),
+            R(0xab6a,0xab6b), S(0xfb29), R(0xfbb2,0xfbc1), R(0xfdfc,0xfdfd), S(0xfe62), R(0xfe64,0xfe66), S(0xfe69),
+            S(0xff04), S(0xff0b), R(0xff1c,0xff1e), S(0xff3e), S(0xff40), S(0xff5c), S(0xff5e), R(0xffe0,0xffe6),
+            R(0xffe8,0xffee), R(0xfffc,0xfffd), R(0x10137,0x1013f), R(0x10179,0x10189), R(0x1018c,0x1018e),
+            R(0x10190,0x1019c), S(0x101a0), R(0x101d0,0x101fc), R(0x10877,0x10878), S(0x10ac8), S(0x1173f),
+            R(0x11fd5,0x11ff1), R(0x16b3c,0x16b3f), S(0x16b45), S(0x1bc9c), R(0x1d000,0x1d0f5), R(0x1d100,0x1d126),
+            R(0x1d129,0x1d164), R(0x1d16a,0x1d16c), R(0x1d183,0x1d184), R(0x1d18c,0x1d1a9), R(0x1d1ae,0x1d1e8),
+            R(0x1d200,0x1d241), S(0x1d245), R(0x1d300,0x1d356), S(0x1d6c1), S(0x1d6db), S(0x1d6fb), S(0x1d715),
+            S(0x1d735), S(0x1d74f), S(0x1d76f), S(0x1d789), S(0x1d7a9), S(0x1d7c3), R(0x1d800,0x1d9ff),
+            R(0x1da37,0x1da3a), R(0x1da6d,0x1da74), R(0x1da76,0x1da83), R(0x1da85,0x1da86), S(0x1e14f), S(0x1e2ff),
+            S(0x1ecac), S(0x1ecb0), S(0x1ed2e), R(0x1eef0,0x1eef1), R(0x1f000,0x1f02b), R(0x1f030,0x1f093),
+            R(0x1f0a0,0x1f0ae), R(0x1f0b1,0x1f0bf), R(0x1f0c1,0x1f0cf), R(0x1f0d1,0x1f0f5), R(0x1f10d,0x1f1ad),
+            R(0x1f1e6,0x1f202), R(0x1f210,0x1f23b), R(0x1f240,0x1f248), R(0x1f250,0x1f251), R(0x1f260,0x1f265),
+            R(0x1f300,0x1f6d7), R(0x1f6e0,0x1f6ec), R(0x1f6f0,0x1f6fc), R(0x1f700,0x1f773), R(0x1f780,0x1f7d8),
+            R(0x1f7e0,0x1f7eb), R(0x1f800,0x1f80b), R(0x1f810,0x1f847), R(0x1f850,0x1f859), R(0x1f860,0x1f887),
+            R(0x1f890,0x1f8ad), R(0x1f8b0,0x1f8b1), R(0x1f900,0x1f978), R(0x1f97a,0x1f9cb), R(0x1f9cd,0x1fa53),
+            R(0x1fa60,0x1fa6d), R(0x1fa70,0x1fa74), R(0x1fa78,0x1fa7a), R(0x1fa80,0x1fa86), R(0x1fa90,0x1faa8),
+            R(0x1fab0,0x1fab6), R(0x1fac0,0x1fac2), R(0x1fad0,0x1fad6), R(0x1fb00,0x1fb92), R(0x1fb94,0x1fbca)
+        };
+
+#undef R
+#undef S
+
+        /* The ASCII ones are the most frequently used ones. */
+        if(codepoint <= 0x7f)
+            return ISSYMBOL_(codepoint);
+
+        return (md_unicode_bsearch__(codepoint, SYMBOL_MAP, SIZEOF_ARRAY(SYMBOL_MAP)) >= 0);
+    }
+
+    static void
+    md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
+    {
+#define R(cp_min, cp_max)   ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
+#define S(cp)               (cp)
+        /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories.
+         * (generated by scripts/build_folding_map.py) */
+        static const unsigned FOLD_MAP_1[] = {
+            R(0x0041,0x005a), S(0x00b5), R(0x00c0,0x00d6), R(0x00d8,0x00de), R(0x0100,0x012e), R(0x0132,0x0136),
+            R(0x0139,0x0147), R(0x014a,0x0176), S(0x0178), R(0x0179,0x017d), S(0x017f), S(0x0181), S(0x0182),
+            S(0x0184), S(0x0186), S(0x0187), S(0x0189), S(0x018a), S(0x018b), S(0x018e), S(0x018f), S(0x0190),
+            S(0x0191), S(0x0193), S(0x0194), S(0x0196), S(0x0197), S(0x0198), S(0x019c), S(0x019d), S(0x019f),
+            R(0x01a0,0x01a4), S(0x01a6), S(0x01a7), S(0x01a9), S(0x01ac), S(0x01ae), S(0x01af), S(0x01b1), S(0x01b2),
+            S(0x01b3), S(0x01b5), S(0x01b7), S(0x01b8), S(0x01bc), S(0x01c4), S(0x01c5), S(0x01c7), S(0x01c8),
+            S(0x01ca), R(0x01cb,0x01db), R(0x01de,0x01ee), S(0x01f1), S(0x01f2), S(0x01f4), S(0x01f6), S(0x01f7),
+            R(0x01f8,0x021e), S(0x0220), R(0x0222,0x0232), S(0x023a), S(0x023b), S(0x023d), S(0x023e), S(0x0241),
+            S(0x0243), S(0x0244), S(0x0245), R(0x0246,0x024e), S(0x0345), S(0x0370), S(0x0372), S(0x0376), S(0x037f),
+            S(0x0386), R(0x0388,0x038a), S(0x038c), S(0x038e), S(0x038f), R(0x0391,0x03a1), R(0x03a3,0x03ab),
+            S(0x03c2), S(0x03cf), S(0x03d0), S(0x03d1), S(0x03d5), S(0x03d6), R(0x03d8,0x03ee), S(0x03f0), S(0x03f1),
+            S(0x03f4), S(0x03f5), S(0x03f7), S(0x03f9), S(0x03fa), R(0x03fd,0x03ff), R(0x0400,0x040f),
+            R(0x0410,0x042f), R(0x0460,0x0480), R(0x048a,0x04be), S(0x04c0), R(0x04c1,0x04cd), R(0x04d0,0x052e),
+            R(0x0531,0x0556), R(0x10a0,0x10c5), S(0x10c7), S(0x10cd), R(0x13f8,0x13fd), S(0x1c80), S(0x1c81),
+            S(0x1c82), S(0x1c83), S(0x1c84), S(0x1c85), S(0x1c86), S(0x1c87), S(0x1c88), R(0x1c90,0x1cba),
+            R(0x1cbd,0x1cbf), R(0x1e00,0x1e94), S(0x1e9b), R(0x1ea0,0x1efe), R(0x1f08,0x1f0f), R(0x1f18,0x1f1d),
+            R(0x1f28,0x1f2f), R(0x1f38,0x1f3f), R(0x1f48,0x1f4d), S(0x1f59), S(0x1f5b), S(0x1f5d), S(0x1f5f),
+            R(0x1f68,0x1f6f), S(0x1fb8), S(0x1fb9), S(0x1fba), S(0x1fbb), S(0x1fbe), R(0x1fc8,0x1fcb), S(0x1fd8),
+            S(0x1fd9), S(0x1fda), S(0x1fdb), S(0x1fe8), S(0x1fe9), S(0x1fea), S(0x1feb), S(0x1fec), S(0x1ff8),
+            S(0x1ff9), S(0x1ffa), S(0x1ffb), S(0x2126), S(0x212a), S(0x212b), S(0x2132), R(0x2160,0x216f), S(0x2183),
+            R(0x24b6,0x24cf), R(0x2c00,0x2c2e), S(0x2c60), S(0x2c62), S(0x2c63), S(0x2c64), R(0x2c67,0x2c6b),
+            S(0x2c6d), S(0x2c6e), S(0x2c6f), S(0x2c70), S(0x2c72), S(0x2c75), S(0x2c7e), S(0x2c7f), R(0x2c80,0x2ce2),
+            S(0x2ceb), S(0x2ced), S(0x2cf2), R(0xa640,0xa66c), R(0xa680,0xa69a), R(0xa722,0xa72e), R(0xa732,0xa76e),
+            S(0xa779), S(0xa77b), S(0xa77d), R(0xa77e,0xa786), S(0xa78b), S(0xa78d), S(0xa790), S(0xa792),
+            R(0xa796,0xa7a8), S(0xa7aa), S(0xa7ab), S(0xa7ac), S(0xa7ad), S(0xa7ae), S(0xa7b0), S(0xa7b1), S(0xa7b2),
+            S(0xa7b3), R(0xa7b4,0xa7be), S(0xa7c2), S(0xa7c4), S(0xa7c5), S(0xa7c6), S(0xa7c7), S(0xa7c9), S(0xa7f5),
+            R(0xab70,0xabbf), R(0xff21,0xff3a), R(0x10400,0x10427), R(0x104b0,0x104d3), R(0x10c80,0x10cb2),
+            R(0x118a0,0x118bf), R(0x16e40,0x16e5f), R(0x1e900,0x1e921)
+        };
+        static const unsigned FOLD_MAP_1_DATA[] = {
+            0x0061, 0x007a, 0x03bc, 0x00e0, 0x00f6, 0x00f8, 0x00fe, 0x0101, 0x012f, 0x0133, 0x0137, 0x013a, 0x0148,
+            0x014b, 0x0177, 0x00ff, 0x017a, 0x017e, 0x0073, 0x0253, 0x0183, 0x0185, 0x0254, 0x0188, 0x0256, 0x0257,
+            0x018c, 0x01dd, 0x0259, 0x025b, 0x0192, 0x0260, 0x0263, 0x0269, 0x0268, 0x0199, 0x026f, 0x0272, 0x0275,
+            0x01a1, 0x01a5, 0x0280, 0x01a8, 0x0283, 0x01ad, 0x0288, 0x01b0, 0x028a, 0x028b, 0x01b4, 0x01b6, 0x0292,
+            0x01b9, 0x01bd, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01cc, 0x01cc, 0x01dc, 0x01df, 0x01ef, 0x01f3, 0x01f3,
+            0x01f5, 0x0195, 0x01bf, 0x01f9, 0x021f, 0x019e, 0x0223, 0x0233, 0x2c65, 0x023c, 0x019a, 0x2c66, 0x0242,
+            0x0180, 0x0289, 0x028c, 0x0247, 0x024f, 0x03b9, 0x0371, 0x0373, 0x0377, 0x03f3, 0x03ac, 0x03ad, 0x03af,
+            0x03cc, 0x03cd, 0x03ce, 0x03b1, 0x03c1, 0x03c3, 0x03cb, 0x03c3, 0x03d7, 0x03b2, 0x03b8, 0x03c6, 0x03c0,
+            0x03d9, 0x03ef, 0x03ba, 0x03c1, 0x03b8, 0x03b5, 0x03f8, 0x03f2, 0x03fb, 0x037b, 0x037d, 0x0450, 0x045f,
+            0x0430, 0x044f, 0x0461, 0x0481, 0x048b, 0x04bf, 0x04cf, 0x04c2, 0x04ce, 0x04d1, 0x052f, 0x0561, 0x0586,
+            0x2d00, 0x2d25, 0x2d27, 0x2d2d, 0x13f0, 0x13f5, 0x0432, 0x0434, 0x043e, 0x0441, 0x0442, 0x0442, 0x044a,
+            0x0463, 0xa64b, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x1e01, 0x1e95, 0x1e61, 0x1ea1, 0x1eff, 0x1f00, 0x1f07,
+            0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45, 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60,
+            0x1f67, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, 0x03b9, 0x1f72, 0x1f75, 0x1fd0, 0x1fd1, 0x1f76, 0x1f77, 0x1fe0,
+            0x1fe1, 0x1f7a, 0x1f7b, 0x1fe5, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d, 0x03c9, 0x006b, 0x00e5, 0x214e, 0x2170,
+            0x217f, 0x2184, 0x24d0, 0x24e9, 0x2c30, 0x2c5e, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c68, 0x2c6c, 0x0251,
+            0x0271, 0x0250, 0x0252, 0x2c73, 0x2c76, 0x023f, 0x0240, 0x2c81, 0x2ce3, 0x2cec, 0x2cee, 0x2cf3, 0xa641,
+            0xa66d, 0xa681, 0xa69b, 0xa723, 0xa72f, 0xa733, 0xa76f, 0xa77a, 0xa77c, 0x1d79, 0xa77f, 0xa787, 0xa78c,
+            0x0265, 0xa791, 0xa793, 0xa797, 0xa7a9, 0x0266, 0x025c, 0x0261, 0x026c, 0x026a, 0x029e, 0x0287, 0x029d,
+            0xab53, 0xa7b5, 0xa7bf, 0xa7c3, 0xa794, 0x0282, 0x1d8e, 0xa7c8, 0xa7ca, 0xa7f6, 0x13a0, 0x13ef, 0xff41,
+            0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10cc0, 0x10cf2, 0x118c0, 0x118df, 0x16e60, 0x16e7f, 0x1e922,
+            0x1e943
+        };
+        static const unsigned FOLD_MAP_2[] = {
+            S(0x00df), S(0x0130), S(0x0149), S(0x01f0), S(0x0587), S(0x1e96), S(0x1e97), S(0x1e98), S(0x1e99),
+            S(0x1e9a), S(0x1e9e), S(0x1f50), R(0x1f80,0x1f87), R(0x1f88,0x1f8f), R(0x1f90,0x1f97), R(0x1f98,0x1f9f),
+            R(0x1fa0,0x1fa7), R(0x1fa8,0x1faf), S(0x1fb2), S(0x1fb3), S(0x1fb4), S(0x1fb6), S(0x1fbc), S(0x1fc2),
+            S(0x1fc3), S(0x1fc4), S(0x1fc6), S(0x1fcc), S(0x1fd6), S(0x1fe4), S(0x1fe6), S(0x1ff2), S(0x1ff3),
+            S(0x1ff4), S(0x1ff6), S(0x1ffc), S(0xfb00), S(0xfb01), S(0xfb02), S(0xfb05), S(0xfb06), S(0xfb13),
+            S(0xfb14), S(0xfb15), S(0xfb16), S(0xfb17)
+        };
+        static const unsigned FOLD_MAP_2_DATA[] = {
+            0x0073,0x0073, 0x0069,0x0307, 0x02bc,0x006e, 0x006a,0x030c, 0x0565,0x0582, 0x0068,0x0331, 0x0074,0x0308,
+            0x0077,0x030a, 0x0079,0x030a, 0x0061,0x02be, 0x0073,0x0073, 0x03c5,0x0313, 0x1f00,0x03b9, 0x1f07,0x03b9,
+            0x1f00,0x03b9, 0x1f07,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f60,0x03b9,
+            0x1f67,0x03b9, 0x1f60,0x03b9, 0x1f67,0x03b9, 0x1f70,0x03b9, 0x03b1,0x03b9, 0x03ac,0x03b9, 0x03b1,0x0342,
+            0x03b1,0x03b9, 0x1f74,0x03b9, 0x03b7,0x03b9, 0x03ae,0x03b9, 0x03b7,0x0342, 0x03b7,0x03b9, 0x03b9,0x0342,
+            0x03c1,0x0313, 0x03c5,0x0342, 0x1f7c,0x03b9, 0x03c9,0x03b9, 0x03ce,0x03b9, 0x03c9,0x0342, 0x03c9,0x03b9,
+            0x0066,0x0066, 0x0066,0x0069, 0x0066,0x006c, 0x0073,0x0074, 0x0073,0x0074, 0x0574,0x0576, 0x0574,0x0565,
+            0x0574,0x056b, 0x057e,0x0576, 0x0574,0x056d
+        };
+        static const unsigned FOLD_MAP_3[] = {
+            S(0x0390), S(0x03b0), S(0x1f52), S(0x1f54), S(0x1f56), S(0x1fb7), S(0x1fc7), S(0x1fd2), S(0x1fd3),
+            S(0x1fd7), S(0x1fe2), S(0x1fe3), S(0x1fe7), S(0x1ff7), S(0xfb03), S(0xfb04)
+        };
+        static const unsigned FOLD_MAP_3_DATA[] = {
+            0x03b9,0x0308,0x0301, 0x03c5,0x0308,0x0301, 0x03c5,0x0313,0x0300, 0x03c5,0x0313,0x0301,
+            0x03c5,0x0313,0x0342, 0x03b1,0x0342,0x03b9, 0x03b7,0x0342,0x03b9, 0x03b9,0x0308,0x0300,
+            0x03b9,0x0308,0x0301, 0x03b9,0x0308,0x0342, 0x03c5,0x0308,0x0300, 0x03c5,0x0308,0x0301,
+            0x03c5,0x0308,0x0342, 0x03c9,0x0342,0x03b9, 0x0066,0x0066,0x0069, 0x0066,0x0066,0x006c
+        };
+#undef R
+#undef S
+        static const struct {
+            const unsigned* map;
+            const unsigned* data;
+            size_t map_size;
+            unsigned n_codepoints;
+        } FOLD_MAP_LIST[] = {
+            { FOLD_MAP_1, FOLD_MAP_1_DATA, SIZEOF_ARRAY(FOLD_MAP_1), 1 },
+            { FOLD_MAP_2, FOLD_MAP_2_DATA, SIZEOF_ARRAY(FOLD_MAP_2), 2 },
+            { FOLD_MAP_3, FOLD_MAP_3_DATA, SIZEOF_ARRAY(FOLD_MAP_3), 3 }
+        };
+
+        int i;
+
+        /* Fast path for ASCII characters. */
+        if(codepoint <= 0x7f) {
+            info->codepoints[0] = codepoint;
+            if(ISUPPER_(codepoint))
+                info->codepoints[0] += 'a' - 'A';
+            info->n_codepoints = 1;
+            return;
+        }
+
+        /* Try to locate the codepoint in any of the maps. */
+        for(i = 0; i < (int) SIZEOF_ARRAY(FOLD_MAP_LIST); i++) {
+            int index;
+
+            index = md_unicode_bsearch__(codepoint, FOLD_MAP_LIST[i].map, FOLD_MAP_LIST[i].map_size);
+            if(index >= 0) {
+                /* Found the mapping. */
+                unsigned n_codepoints = FOLD_MAP_LIST[i].n_codepoints;
+                const unsigned* map = FOLD_MAP_LIST[i].map;
+                const unsigned* codepoints = FOLD_MAP_LIST[i].data + (index * n_codepoints);
+
+                memcpy(info->codepoints, codepoints, sizeof(unsigned) * n_codepoints);
+                info->n_codepoints = n_codepoints;
+
+                if(FOLD_MAP_LIST[i].map[index] != codepoint) {
+                    /* The found mapping maps whole range of codepoints,
+                     * i.e. we have to offset info->codepoints[0] accordingly. */
+                    if((map[index] & 0x00ffffff)+1 == codepoints[0]) {
+                        /* Alternating type of the range. */
+                        info->codepoints[0] = codepoint + ((codepoint & 0x1) == (map[index] & 0x1) ? 1 : 0);
+                    } else {
+                        /* Range to range kind of mapping. */
+                        info->codepoints[0] += (codepoint - (map[index] & 0x00ffffff));
+                    }
+                }
+
+                return;
+            }
+        }
+
+        /* No mapping found. Map the codepoint to itself. */
+        info->codepoints[0] = codepoint;
+        info->n_codepoints = 1;
+    }
+#endif
+
+
+#if defined MD4C_USE_UTF16
+    #define IS_UTF16_SURROGATE_HI(word)     (((WORD)(word) & 0xfc00) == 0xd800)
+    #define IS_UTF16_SURROGATE_LO(word)     (((WORD)(word) & 0xfc00) == 0xdc00)
+    #define UTF16_DECODE_SURROGATE(hi, lo)  (0x10000 + ((((unsigned)(hi) & 0x3ff) << 10) | (((unsigned)(lo) & 0x3ff) << 0)))
+
+    static unsigned
+    md_decode_utf16le__(const CHAR* str, SZ str_size, SZ* p_size)
+    {
+        if(IS_UTF16_SURROGATE_HI(str[0])) {
+            if(1 < str_size && IS_UTF16_SURROGATE_LO(str[1])) {
+                if(p_size != NULL)
+                    *p_size = 2;
+                return UTF16_DECODE_SURROGATE(str[0], str[1]);
+            }
+        }
+
+        if(p_size != NULL)
+            *p_size = 1;
+        return str[0];
+    }
+
+    static unsigned
+    md_decode_utf16le_before__(MD_CTX* ctx, OFF off)
+    {
+        if(off > 2 && IS_UTF16_SURROGATE_HI(CH(off-2)) && IS_UTF16_SURROGATE_LO(CH(off-1)))
+            return UTF16_DECODE_SURROGATE(CH(off-2), CH(off-1));
+
+        return CH(off);
+    }
+
+    /* No whitespace uses surrogates, so no decoding needed here. */
+    #define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
+    #define ISUNICODEWHITESPACE(off)        md_is_unicode_whitespace__(CH(off))
+    #define ISUNICODEWHITESPACEBEFORE(off)  md_is_unicode_whitespace__(CH((off)-1))
+
+    #define ISUNICODEPUNCT(off)             md_is_unicode_punct__(md_decode_utf16le__(STR(off), ctx->size - (off), NULL))
+    #define ISUNICODEPUNCTBEFORE(off)       md_is_unicode_punct__(md_decode_utf16le_before__(ctx, off))
+
+    static inline int
+    md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
+    {
+        return md_decode_utf16le__(str+off, str_size-off, p_char_size);
+    }
+#elif defined MD4C_USE_UTF8
+    #define IS_UTF8_LEAD1(byte)     ((unsigned char)(byte) <= 0x7f)
+    #define IS_UTF8_LEAD2(byte)     (((unsigned char)(byte) & 0xe0) == 0xc0)
+    #define IS_UTF8_LEAD3(byte)     (((unsigned char)(byte) & 0xf0) == 0xe0)
+    #define IS_UTF8_LEAD4(byte)     (((unsigned char)(byte) & 0xf8) == 0xf0)
+    #define IS_UTF8_TAIL(byte)      (((unsigned char)(byte) & 0xc0) == 0x80)
+
+    static unsigned
+    md_decode_utf8__(const CHAR* str, SZ str_size, SZ* p_size)
+    {
+        if(!IS_UTF8_LEAD1(str[0])) {
+            if(IS_UTF8_LEAD2(str[0])) {
+                if(1 < str_size && IS_UTF8_TAIL(str[1])) {
+                    if(p_size != NULL)
+                        *p_size = 2;
+
+                    return (((unsigned int)str[0] & 0x1f) << 6) |
+                           (((unsigned int)str[1] & 0x3f) << 0);
+                }
+            } else if(IS_UTF8_LEAD3(str[0])) {
+                if(2 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2])) {
+                    if(p_size != NULL)
+                        *p_size = 3;
+
+                    return (((unsigned int)str[0] & 0x0f) << 12) |
+                           (((unsigned int)str[1] & 0x3f) << 6) |
+                           (((unsigned int)str[2] & 0x3f) << 0);
+                }
+            } else if(IS_UTF8_LEAD4(str[0])) {
+                if(3 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2]) && IS_UTF8_TAIL(str[3])) {
+                    if(p_size != NULL)
+                        *p_size = 4;
+
+                    return (((unsigned int)str[0] & 0x07) << 18) |
+                           (((unsigned int)str[1] & 0x3f) << 12) |
+                           (((unsigned int)str[2] & 0x3f) << 6) |
+                           (((unsigned int)str[3] & 0x3f) << 0);
+                }
+            }
+        }
+
+        if(p_size != NULL)
+            *p_size = 1;
+        return (unsigned) str[0];
+    }
+
+/*
+ * encode a codepoint into the corresponding utf8 byte sequence
+ * the string buffer passed must be large enough  
+ * return the number of bytes written to the buffer
+ */
+    static unsigned
+    md_encode_utf8__(unsigned codepoint, CHAR* str )
+    {
+        if(codepoint <= 0x7f){
+            *str++ = (char)codepoint;
+            return 1;
+        } else if (codepoint <=  0x7FF){
+            *str++ = 0xc0 |  (codepoint >> 6);   
+            *str++ = 0x80 | ((codepoint >> 0) & 0x3f);
+            return 2;
+        } else if ( codepoint <= 0xFFFF) {
+            *str++ = 0xe0 |  (codepoint >> 12);   
+            *str++ = 0x80 | ((codepoint >> 6 ) & 0x3f);  
+            *str++ = 0x80 | ((codepoint >> 0 ) & 0x3f);
+            return 3;
+        } else if ( codepoint <= 0x10FFFF) {
+            *str++ = 0xf0 |  (codepoint >> 18);   
+            *str++ = 0x80 | ((codepoint >> 12) & 0x3f); 
+            *str++ = 0x80 | ((codepoint >> 6 ) & 0x3f);  
+            *str++ = 0x80 | ((codepoint >> 0 ) & 0x3f);
+            return 4;
+        }
+        return 0;
+    }
+
+    static unsigned
+    md_decode_utf8_before__(MD_CTX* ctx, OFF off)
+    {
+        if(!IS_UTF8_LEAD1(CH(off-1))) {
+            if(off > 1 && IS_UTF8_LEAD2(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
+                return (((unsigned int)CH(off-2) & 0x1f) << 6) |
+                       (((unsigned int)CH(off-1) & 0x3f) << 0);
+
+            if(off > 2 && IS_UTF8_LEAD3(CH(off-3)) && IS_UTF8_TAIL(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
+                return (((unsigned int)CH(off-3) & 0x0f) << 12) |
+                       (((unsigned int)CH(off-2) & 0x3f) << 6) |
+                       (((unsigned int)CH(off-1) & 0x3f) << 0);
+
+            if(off > 3 && IS_UTF8_LEAD4(CH(off-4)) && IS_UTF8_TAIL(CH(off-3)) && IS_UTF8_TAIL(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
+                return (((unsigned int)CH(off-4) & 0x07) << 18) |
+                       (((unsigned int)CH(off-3) & 0x3f) << 12) |
+                       (((unsigned int)CH(off-2) & 0x3f) << 6) |
+                       (((unsigned int)CH(off-1) & 0x3f) << 0);
+        }
+
+        return (unsigned) CH(off-1);
+    }
+
+    #define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
+    #define ISUNICODEWHITESPACE(off)        md_is_unicode_whitespace__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
+    #define ISUNICODEWHITESPACEBEFORE(off)  md_is_unicode_whitespace__(md_decode_utf8_before__(ctx, off))
+
+    #define ISUNICODEPUNCT_(codepoint)      md_is_unicode_punct__(codepoint)
+    #define ISUNICODEPUNCT(off)             md_is_unicode_punct__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
+    #define ISUNICODEPUNCTBEFORE(off)       md_is_unicode_punct__(md_decode_utf8_before__(ctx, off))
+
+    #define ISUNICODESYMBOL_(codepoint)     md_is_unicode_symbol__(codepoint)
+
+    static inline unsigned
+    md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
+    {
+        return md_decode_utf8__(str+off, str_size-off, p_char_size);
+    }
+
+    static inline unsigned
+    md_encode_unicode(unsigned codepoint, CHAR* str )
+    {
+        return md_encode_utf8__(codepoint, str);
+    }
+#else
+    #define ISUNICODEWHITESPACE_(codepoint) ISWHITESPACE_(codepoint)
+    #define ISUNICODEWHITESPACE(off)        ISWHITESPACE(off)
+    #define ISUNICODEWHITESPACEBEFORE(off)  ISWHITESPACE((off)-1)
+
+    #define ISUNICODEPUNCT_(codepoint)      ISPUNCT_(codepoint)
+    #define ISUNICODEPUNCT(off)             ISPUNCT(off)
+    #define ISUNICODEPUNCTBEFORE(off)       ISPUNCT((off)-1)
+
+    #define ISUNICODESYMBOL_(codepoint)     ISSYMBOL_(codepoint)
+
+    static inline void
+    md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
+    {
+        info->codepoints[0] = codepoint;
+        if(ISUPPER_(codepoint))
+            info->codepoints[0] += 'a' - 'A';
+        info->n_codepoints = 1;
+    }
+
+    static unsigned
+    md_encode_unicode(unsigned codepoint, CHAR* str )
+    {
+     *str = codepoint;
+     return 1;
+    }
+
+    static inline unsigned
+    md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_size)
+    {
+        *p_size = 1;
+        return (unsigned) str[off];
+    }
+#endif
+
+
+/*************************************
+ ***  Helper string manipulations  ***
+ *************************************/
+
+/* Fill buffer with copy of the string between 'beg' and 'end' but replace any
+ * line breaks with given replacement character.
+ *
+ * NOTE: Caller is responsible to make sure the buffer is large enough.
+ * (Given the output is always shorter then input, (end - beg) is good idea
+ * what the caller should allocate.)
+ */
+static void
+md_merge_lines(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
+               CHAR line_break_replacement_char, CHAR* buffer, SZ* p_size)
+{
+    CHAR* ptr = buffer;
+    int line_index = 0;
+    OFF off = beg;
+
+    MD_UNUSED(n_lines);
+
+    while(1) {
+        const MD_LINE* line = &lines[line_index];
+        OFF line_end = line->end;
+        if(end < line_end)
+            line_end = end;
+
+        while(off < line_end) {
+            *ptr = CH(off);
+            ptr++;
+            off++;
+        }
+
+        if(off >= end) {
+            *p_size = (MD_SIZE)(ptr - buffer);
+            return;
+        }
+
+        *ptr = line_break_replacement_char;
+        ptr++;
+
+        line_index++;
+        off = lines[line_index].beg;
+    }
+}
+
+/* Wrapper of md_merge_lines() which allocates new buffer for the output string.
+ */
+static int
+md_merge_lines_alloc(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
+                    CHAR line_break_replacement_char, CHAR** p_str, SZ* p_size)
+{
+    CHAR* buffer;
+
+    buffer = (CHAR*) malloc(sizeof(CHAR) * (end - beg));
+    if(buffer == NULL) {
+        MD_LOG("malloc() failed.");
+        return -1;
+    }
+
+    md_merge_lines(ctx, beg, end, lines, n_lines,
+                line_break_replacement_char, buffer, p_size);
+
+    *p_str = buffer;
+    return 0;
+}
+
+static OFF
+md_skip_unicode_whitespace(const CHAR* label, OFF off, SZ size)
+{
+    SZ char_size;
+    unsigned codepoint;
+
+    while(off < size) {
+        codepoint = md_decode_unicode(label, off, size, &char_size);
+        if(!ISUNICODEWHITESPACE_(codepoint)  &&  !ISNEWLINE_(label[off]))
+            break;
+        off += char_size;
+    }
+
+    return off;
+}
+
+
+/******************************
+ ***  Recognizing raw HTML  ***
+ ******************************/
+
+/* md_is_html_tag() may be called when processing inlines (inline raw HTML)
+ * or when breaking document to blocks (checking for start of HTML block type 7).
+ *
+ * When breaking document to blocks, we do not yet know line boundaries, but
+ * in that case the whole tag has to live on a single line. We distinguish this
+ * by n_lines == 0.
+ */
+static int
+md_is_html_tag(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
+{
+    int attr_state;
+    OFF off = beg;
+    OFF line_end = (n_lines > 0) ? lines[0].end : ctx->size;
+    int i = 0;
+
+    MD_ASSERT(CH(beg) == _T('<'));
+
+    if(off + 1 >= line_end)
+        return FALSE;
+    off++;
+
+    /* For parsing attributes, we need a little state automaton below.
+     * State -1: no attributes are allowed.
+     * State 0: attribute could follow after some whitespace.
+     * State 1: after a whitespace (attribute name may follow).
+     * State 2: after attribute name ('=' MAY follow).
+     * State 3: after '=' (value specification MUST follow).
+     * State 41: in middle of unquoted attribute value.
+     * State 42: in middle of single-quoted attribute value.
+     * State 43: in middle of double-quoted attribute value.
+     */
+    attr_state = 0;
+
+    if(CH(off) == _T('/')) {
+        /* Closer tag "</ ... >". No attributes may be present. */
+        attr_state = -1;
+        off++;
+    }
+
+    /* Tag name */
+    if(off >= line_end  ||  !ISALPHA(off))
+        return FALSE;
+    off++;
+    while(off < line_end  &&  (ISALNUM(off)  ||  CH(off) == _T('-')))
+        off++;
+
+    /* (Optional) attributes (if not closer), (optional) '/' (if not closer)
+     * and final '>'. */
+    while(1) {
+        while(off < line_end  &&  !ISNEWLINE(off)) {
+            if(attr_state > 40) {
+                if(attr_state == 41 && (ISBLANK(off) || ISANYOF(off, _T("\"'=<>`")))) {
+                    attr_state = 0;
+                    off--;  /* Put the char back for re-inspection in the new state. */
+                } else if(attr_state == 42 && CH(off) == _T('\'')) {
+                    attr_state = 0;
+                } else if(attr_state == 43 && CH(off) == _T('"')) {
+                    attr_state = 0;
+                }
+                off++;
+            } else if(ISWHITESPACE(off)) {
+                if(attr_state == 0)
+                    attr_state = 1;
+                off++;
+            } else if(attr_state <= 2 && CH(off) == _T('>')) {
+                /* End. */
+                goto done;
+            } else if(attr_state <= 2 && CH(off) == _T('/') && off+1 < line_end && CH(off+1) == _T('>')) {
+                /* End with digraph '/>' */
+                off++;
+                goto done;
+            } else if((attr_state == 1 || attr_state == 2) && (ISALPHA(off) || CH(off) == _T('_') || CH(off) == _T(':'))) {
+                off++;
+                /* Attribute name */
+                while(off < line_end && (ISALNUM(off) || ISANYOF(off, _T("_.:-"))))
+                    off++;
+                attr_state = 2;
+            } else if(attr_state == 2 && CH(off) == _T('=')) {
+                /* Attribute assignment sign */
+                off++;
+                attr_state = 3;
+            } else if(attr_state == 3) {
+                /* Expecting start of attribute value. */
+                if(CH(off) == _T('"'))
+                    attr_state = 43;
+                else if(CH(off) == _T('\''))
+                    attr_state = 42;
+                else if(!ISANYOF(off, _T("\"'=<>`"))  &&  !ISNEWLINE(off))
+                    attr_state = 41;
+                else
+                    return FALSE;
+                off++;
+            } else {
+                /* Anything unexpected. */
+                return FALSE;
+            }
+        }
+
+        /* We have to be on a single line. See definition of start condition
+         * of HTML block, type 7. */
+        if(n_lines == 0)
+            return FALSE;
+
+        i++;
+        if(i >= n_lines)
+            return FALSE;
+
+        off = lines[i].beg;
+        line_end = lines[i].end;
+
+        if(attr_state == 0  ||  attr_state == 41)
+            attr_state = 1;
+
+        if(off >= max_end)
+            return FALSE;
+    }
+
+done:
+    if(off >= max_end)
+        return FALSE;
+
+    *p_end = off+1;
+    return TRUE;
+}
+
+static int
+md_scan_for_html_closer(MD_CTX* ctx, const MD_CHAR* str, MD_SIZE len,
+                        const MD_LINE* lines, int n_lines,
+                        OFF beg, OFF max_end, OFF* p_end,
+                        OFF* p_scan_horizon)
+{
+    OFF off = beg;
+    int i = 0;
+
+    if(off < *p_scan_horizon  &&  *p_scan_horizon >= max_end - len) {
+        /* We have already scanned the range up to the max_end so we know
+         * there is nothing to see. */
+        return FALSE;
+    }
+
+    while(TRUE) {
+        while(off + len <= lines[i].end  &&  off + len <= max_end) {
+            if(md_ascii_eq(STR(off), str, len)) {
+                /* Success. */
+                *p_end = off + len;
+                return TRUE;
+            }
+            off++;
+        }
+
+        i++;
+        if(off >= max_end  ||  i >= n_lines) {
+            /* Failure. */
+            *p_scan_horizon = off;
+            return FALSE;
+        }
+
+        off = lines[i].beg;
+    }
+}
+
+static int
+md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
+{
+    OFF off = beg;
+
+    MD_ASSERT(CH(beg) == _T('<'));
+
+    if(off + 4 >= lines[0].end)
+        return FALSE;
+    if(CH(off+1) != _T('!')  ||  CH(off+2) != _T('-')  ||  CH(off+3) != _T('-'))
+        return FALSE;
+    off += 4;
+
+    /* ">" and "->" must not follow the opening. */
+    if(off < lines[0].end  &&  CH(off) == _T('>'))
+        return FALSE;
+    if(off+1 < lines[0].end  &&  CH(off) == _T('-')  &&  CH(off+1) == _T('>'))
+        return FALSE;
+
+    /* HTML comment must not contain "--", so we scan just for "--" instead
+     * of "-->" and verify manually that '>' follows. */
+    if(md_scan_for_html_closer(ctx, _T("--"), 2,
+                lines, n_lines, off, max_end, p_end, &ctx->html_comment_horizon))
+    {
+        if(*p_end < max_end  &&  CH(*p_end) == _T('>')) {
+            *p_end = *p_end + 1;
+            return TRUE;
+        }
+    }
+
+    return FALSE;
+}
+
+static int
+md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
+{
+    OFF off = beg;
+
+    if(off + 2 >= lines[0].end)
+        return FALSE;
+    if(CH(off+1) != _T('?'))
+        return FALSE;
+    off += 2;
+
+    return md_scan_for_html_closer(ctx, _T("?>"), 2,
+                lines, n_lines, off, max_end, p_end, &ctx->html_proc_instr_horizon);
+}
+
+static int
+md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
+{
+    OFF off = beg;
+
+    if(off + 2 >= lines[0].end)
+        return FALSE;
+    if(CH(off+1) != _T('!'))
+        return FALSE;
+    off += 2;
+
+    /* Declaration name. */
+    if(off >= lines[0].end  ||  !ISALPHA(off))
+        return FALSE;
+    off++;
+    while(off < lines[0].end  &&  ISALPHA(off))
+        off++;
+    if(off < lines[0].end  &&  !ISWHITESPACE(off))
+        return FALSE;
+
+    return md_scan_for_html_closer(ctx, _T(">"), 1,
+                lines, n_lines, off, max_end, p_end, &ctx->html_decl_horizon);
+}
+
+static int
+md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
+{
+    static const CHAR open_str[] = _T("<![CDATA[");
+    static const SZ open_size = SIZEOF_ARRAY(open_str) - 1;
+
+    OFF off = beg;
+
+    if(off + open_size >= lines[0].end)
+        return FALSE;
+    if(memcmp(STR(off), open_str, open_size) != 0)
+        return FALSE;
+    off += open_size;
+
+    if(lines[n_lines-1].end < max_end)
+        max_end = lines[n_lines-1].end - 2;
+
+    return md_scan_for_html_closer(ctx, _T("]]>"), 3,
+                lines, n_lines, off, max_end, p_end, &ctx->html_cdata_horizon);
+}
+
+static int
+md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
+{
+    MD_ASSERT(CH(beg) == _T('<'));
+    return (md_is_html_tag(ctx, lines, n_lines, beg, max_end, p_end)  ||
+            md_is_html_comment(ctx, lines, n_lines, beg, max_end, p_end)  ||
+            md_is_html_processing_instruction(ctx, lines, n_lines, beg, max_end, p_end)  ||
+            md_is_html_declaration(ctx, lines, n_lines, beg, max_end, p_end)  ||
+            md_is_html_cdata(ctx, lines, n_lines, beg, max_end, p_end));
+}
+
+
+/****************************
+ ***  Recognizing Entity  ***
+ ****************************/
+
+static int
+md_is_hex_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
+{
+    OFF off = beg;
+    MD_UNUSED(ctx);
+
+    while(off < max_end  &&  ISXDIGIT_(text[off])  &&  off - beg <= 8)
+        off++;
+
+    if(1 <= off - beg  &&  off - beg <= 6) {
+        *p_end = off;
+        return TRUE;
+    } else {
+        return FALSE;
+    }
+}
+
+static int
+md_is_dec_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
+{
+    OFF off = beg;
+    MD_UNUSED(ctx);
+
+    while(off < max_end  &&  ISDIGIT_(text[off])  &&  off - beg <= 8)
+        off++;
+
+    if(1 <= off - beg  &&  off - beg <= 7) {
+        *p_end = off;
+        return TRUE;
+    } else {
+        return FALSE;
+    }
+}
+
+static int
+md_is_named_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
+{
+    OFF off = beg;
+    MD_UNUSED(ctx);
+
+    if(off < max_end  &&  ISALPHA_(text[off]))
+        off++;
+    else
+        return FALSE;
+
+    while(off < max_end  &&  ISALNUM_(text[off])  &&  off - beg <= 48)
+        off++;
+
+    if(2 <= off - beg  &&  off - beg <= 48) {
+        *p_end = off;
+        return TRUE;
+    } else {
+        return FALSE;
+    }
+}
+
+static int
+md_is_entity_str(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
+{
+    int is_contents;
+    OFF off = beg;
+
+    MD_ASSERT(text[off] == _T('&'));
+    off++;
+
+    if(off+2 < max_end  &&  text[off] == _T('#')  &&  (text[off+1] == _T('x') || text[off+1] == _T('X')))
+        is_contents = md_is_hex_entity_contents(ctx, text, off+2, max_end, &off);
+    else if(off+1 < max_end  &&  text[off] == _T('#'))
+        is_contents = md_is_dec_entity_contents(ctx, text, off+1, max_end, &off);
+    else
+        is_contents = md_is_named_entity_contents(ctx, text, off, max_end, &off);
+
+    if(is_contents  &&  off < max_end  &&  text[off] == _T(';')) {
+        *p_end = off+1;
+        return TRUE;
+    } else {
+        return FALSE;
+    }
+}
+
+static inline int
+md_is_entity(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
+{
+    return md_is_entity_str(ctx, ctx->text, beg, max_end, p_end);
+}
+
+
+/******************************
+ ***  Attribute Management  ***
+ ******************************/
+
+typedef struct MD_ATTRIBUTE_BUILD_tag MD_ATTRIBUTE_BUILD;
+struct MD_ATTRIBUTE_BUILD_tag {
+    CHAR* text;
+    MD_TEXTTYPE* substr_types;
+    OFF* substr_offsets;
+    int substr_count;
+    int substr_alloc;
+    MD_TEXTTYPE trivial_types[1];
+    OFF trivial_offsets[2];
+};
+
+
+#define MD_BUILD_ATTR_NO_ESCAPES    0x0001
+
+static int
+md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build,
+                            MD_TEXTTYPE type, OFF off)
+{
+    if(build->substr_count >= build->substr_alloc) {
+        MD_TEXTTYPE* new_substr_types;
+        OFF* new_substr_offsets;
+
+        build->substr_alloc = (build->substr_alloc > 0
+                ? build->substr_alloc + build->substr_alloc / 2
+                : 8);
+        new_substr_types = (MD_TEXTTYPE*) realloc(build->substr_types,
+                                    build->substr_alloc * sizeof(MD_TEXTTYPE));
+        if(new_substr_types == NULL) {
+            MD_LOG("realloc() failed.");
+            return -1;
+        }
+        /* Note +1 to reserve space for final offset (== raw_size). */
+        new_substr_offsets = (OFF*) realloc(build->substr_offsets,
+                                    (build->substr_alloc+1) * sizeof(OFF));
+        if(new_substr_offsets == NULL) {
+            MD_LOG("realloc() failed.");
+            free(new_substr_types);
+            return -1;
+        }
+
+        build->substr_types = new_substr_types;
+        build->substr_offsets = new_substr_offsets;
+    }
+
+    build->substr_types[build->substr_count] = type;
+    build->substr_offsets[build->substr_count] = off;
+    build->substr_count++;
+    return 0;
+}
+
+static void
+md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build)
+{
+    MD_UNUSED(ctx);
+
+    if(build->substr_alloc > 0) {
+        free(build->text);
+        if( build->substr_types != build->trivial_types)
+            free(build->substr_types);
+        if( build->substr_offsets != build->trivial_offsets)
+            free(build->substr_offsets);
+    }
+}
+
+static int
+md_build_trivial_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
+                           MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build)
+{
+    MD_UNUSED(ctx);
+    memset(build, 0, sizeof(MD_ATTRIBUTE_BUILD));
+    build->substr_types = build->trivial_types;
+    build->substr_offsets = build->trivial_offsets;
+    build->substr_count = 1;
+    build->substr_alloc = 0;
+    build->trivial_types[0] = MD_TEXT_NORMAL;
+    build->trivial_offsets[0] = 0;
+    build->trivial_offsets[1] = raw_size;
+
+    attr->text = (CHAR*) (raw_size ? raw_text : NULL);
+    attr->size = raw_size;
+    attr->substr_offsets = build->substr_offsets;
+    attr->substr_types = build->substr_types;
+    return 0;
+}
+
+/* Convert a 16 bits unsigned word to a string
+* the dest buffer must be at least 5 char long
+* It does not nul terminat the string
+* Return the number of characters used by the string
+*/
+static int
+md_int16_to_str(unsigned short n, CHAR* dest){
+    char count = 5;
+
+    if(n <10 ){
+       static const CHAR numbers[] = _T("0123456789");
+       *dest = numbers[n];
+       return 1;
+    }    
+    while(1){
+        if(n< 100){ count = 2; break;}
+        if(n< 1000){ count = 3; break;}
+        if(n< 10000){ count = 4; break;}
+       break;
+    }
+    // start from end
+    dest += count;
+    while (n) {
+        *--dest = '0' + ( n % 10);
+        n /= 10;
+    }
+    return count;
+}
+
+static int
+md_build_attribute_postfix(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
+                   unsigned postfix, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build)
+{
+    OFF off;
+    const SZ MAX_POSTFIX_SIZE = 5; // but also add 1 for the '-'  
+
+    memset(build, 0, sizeof(MD_ATTRIBUTE_BUILD));   
+    build->substr_types = build->trivial_types;
+    build->substr_offsets = build->trivial_offsets;
+    build->substr_count = 1;
+    build->substr_alloc = 1;
+    build->trivial_types[0] = MD_TEXT_NORMAL;
+    build->trivial_offsets[0] = 0;
+    off = raw_size;
+    if (postfix > 0xffff) {
+        // postfix is not allowed to be bigger than 65535 (2^16) , so maximum 5 char     
+        postfix  = 0xffff;
+    }
+
+    build->text = (CHAR*) malloc((raw_size + MAX_POSTFIX_SIZE+1) * sizeof(CHAR));
+    if(build->text == NULL) {
+        MD_LOG("malloc() failed.");
+        goto abort;
+    }
+
+    // copy original text 
+    memcpy(build->text, raw_text, raw_size);
+    // append postfix
+    build->text[off++] = _T('-');
+    off+= md_int16_to_str(postfix, &build->text[off]);    
+
+    attr->text = build->text;
+    build->trivial_offsets[1] = off; 
+    attr->size = off;
+    attr->substr_offsets = build->substr_offsets;
+    attr->substr_types = build->substr_types;
+    return 0;
+
+abort:
+    md_free_attribute(ctx, build);
+    return -1;
+}
+
+static int
+md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
+                   unsigned flags, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build)
+{
+    OFF raw_off, off;
+    int is_trivial;
+    int ret = 0;
+
+    memset(build, 0, sizeof(MD_ATTRIBUTE_BUILD));
+
+    /* If there is no backslash and no ampersand, build trivial attribute
+     * without any malloc(). */
+    is_trivial = TRUE;
+    for(raw_off = 0; raw_off < raw_size; raw_off++) {
+        if(ISANYOF3_(raw_text[raw_off], _T('\\'), _T('&'), _T('\0'))) {
+            is_trivial = FALSE;
+            break;
+        }
+    }
+
+    if(is_trivial) {
+        build->text = (CHAR*) (raw_size ? raw_text : NULL);
+        build->substr_types = build->trivial_types;
+        build->substr_offsets = build->trivial_offsets;
+        build->substr_count = 1;
+        build->substr_alloc = 0;
+        build->trivial_types[0] = MD_TEXT_NORMAL;
+        build->trivial_offsets[0] = 0;
+        build->trivial_offsets[1] = raw_size;
+        off = raw_size;
+    } else {
+        build->text = (CHAR*) malloc(raw_size * sizeof(CHAR));
+        if(build->text == NULL) {
+            MD_LOG("malloc() failed.");
+            goto abort;
+        }
+
+        raw_off = 0;
+        off = 0;
+
+        while(raw_off < raw_size) {
+            if(raw_text[raw_off] == _T('\0')) {
+                MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off));
+                memcpy(build->text + off, raw_text + raw_off, 1);
+                off++;
+                raw_off++;
+                continue;
+            }
+
+            if(raw_text[raw_off] == _T('&')) {
+                OFF ent_end;
+
+                if(md_is_entity_str(ctx, raw_text, raw_off, raw_size, &ent_end)) {
+                    MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_ENTITY, off));
+                    memcpy(build->text + off, raw_text + raw_off, ent_end - raw_off);
+                    off += ent_end - raw_off;
+                    raw_off = ent_end;
+                    continue;
+                }
+            }
+
+            if(build->substr_count == 0  ||  build->substr_types[build->substr_count-1] != MD_TEXT_NORMAL)
+                MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NORMAL, off));
+
+            if(!(flags & MD_BUILD_ATTR_NO_ESCAPES)  &&
+               raw_text[raw_off] == _T('\\')  &&  raw_off+1 < raw_size  &&
+               (ISPUNCT_(raw_text[raw_off+1]) || ISNEWLINE_(raw_text[raw_off+1])))
+                raw_off++;
+
+            build->text[off++] = raw_text[raw_off++];
+        }
+        build->substr_offsets[build->substr_count] = off;
+    }
+
+    attr->text = build->text;
+    attr->size = off;
+    attr->substr_offsets = build->substr_offsets;
+    attr->substr_types = build->substr_types;
+    return 0;
+
+abort:
+    md_free_attribute(ctx, build);
+    return -1;
+}
+
+/*********************************************
+ ***  Dictionary of Reference Definitions  ***
+ *********************************************/
+
+#define MD_FNV1A_BASE       2166136261U
+#define MD_FNV1A_PRIME      16777619U
+
+static inline unsigned
+md_fnv1a(unsigned base, const void* data, size_t n)
+{
+    const unsigned char* buf = (const unsigned char*) data;
+    unsigned hash = base;
+    size_t i;
+
+    for(i = 0; i < n; i++) {
+        hash ^= buf[i];
+        hash *= MD_FNV1A_PRIME;
+    }
+
+    return hash;
+}
+
+
+struct MD_REF_DEF_tag {
+    CHAR* label;
+    CHAR* title;
+    CHAR* dest;
+    unsigned hash;
+    SZ label_size;
+    SZ title_size;
+    SZ dest_size;
+    unsigned char label_needs_free : 1;
+    unsigned char title_needs_free : 1;
+};
+
+/* Label equivalence is quite complicated with regards to whitespace and case
+ * folding. This complicates computing a hash of it as well as direct comparison
+ * of two labels. */
+
+static unsigned
+md_link_label_hash(const CHAR* label, SZ size)
+{
+    unsigned hash = MD_FNV1A_BASE;
+    OFF off;
+    unsigned codepoint;
+    int is_whitespace = FALSE;
+
+    off = md_skip_unicode_whitespace(label, 0, size);
+    while(off < size) {
+        SZ char_size;
+
+        codepoint = md_decode_unicode(label, off, size, &char_size);
+        is_whitespace = ISUNICODEWHITESPACE_(codepoint) || ISNEWLINE_(label[off]);
+
+        if(is_whitespace) {
+            codepoint = ' ';
+            hash = md_fnv1a(hash, &codepoint, sizeof(unsigned));
+            off = md_skip_unicode_whitespace(label, off, size);
+        } else {
+            MD_UNICODE_FOLD_INFO fold_info;
+
+            md_get_unicode_fold_info(codepoint, &fold_info);
+            hash = md_fnv1a(hash, fold_info.codepoints, fold_info.n_codepoints * sizeof(unsigned));
+            off += char_size;
+        }
+    }
+
+    return hash;
+}
+
+static OFF
+md_link_label_cmp_load_fold_info(const CHAR* label, OFF off, SZ size,
+                                 MD_UNICODE_FOLD_INFO* fold_info)
+{
+    unsigned codepoint;
+    SZ char_size;
+
+    if(off >= size) {
+        /* Treat end of a link label as a whitespace. */
+        goto whitespace;
+    }
+
+    codepoint = md_decode_unicode(label, off, size, &char_size);
+    off += char_size;
+    if(ISUNICODEWHITESPACE_(codepoint)) {
+        /* Treat all whitespace as equivalent */
+        goto whitespace;
+    }
+
+    /* Get real folding info. */
+    md_get_unicode_fold_info(codepoint, fold_info);
+    return off;
+
+whitespace:
+    fold_info->codepoints[0] = _T(' ');
+    fold_info->n_codepoints = 1;
+    return md_skip_unicode_whitespace(label, off, size);
+}
+
+static int
+md_link_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size)
+{
+    OFF a_off;
+    OFF b_off;
+    MD_UNICODE_FOLD_INFO a_fi = { { 0 }, 0 };
+    MD_UNICODE_FOLD_INFO b_fi = { { 0 }, 0 };
+    OFF a_fi_off = 0;
+    OFF b_fi_off = 0;
+    int cmp;
+
+    a_off = md_skip_unicode_whitespace(a_label, 0, a_size);
+    b_off = md_skip_unicode_whitespace(b_label, 0, b_size);
+    while(a_off < a_size || a_fi_off < a_fi.n_codepoints ||
+          b_off < b_size || b_fi_off < b_fi.n_codepoints)
+    {
+        /* If needed, load fold info for next char. */
+        if(a_fi_off >= a_fi.n_codepoints) {
+            a_fi_off = 0;
+            a_off = md_link_label_cmp_load_fold_info(a_label, a_off, a_size, &a_fi);
+        }
+        if(b_fi_off >= b_fi.n_codepoints) {
+            b_fi_off = 0;
+            b_off = md_link_label_cmp_load_fold_info(b_label, b_off, b_size, &b_fi);
+        }
+
+        cmp = b_fi.codepoints[b_fi_off] - a_fi.codepoints[a_fi_off];
+        if(cmp != 0)
+            return cmp;
+
+        a_fi_off++;
+        b_fi_off++;
+    }
+
+    return 0;
+}
+
+typedef struct MD_REF_DEF_LIST_tag MD_REF_DEF_LIST;
+struct MD_REF_DEF_LIST_tag {
+    int n_ref_defs;
+    int alloc_ref_defs;
+    MD_REF_DEF* ref_defs[];  /* Valid items always  point into ctx->ref_defs[] */
+};
+
+static int
+md_ref_def_cmp(const void* a, const void* b)
+{
+    const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a;
+    const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b;
+
+    if(a_ref->hash < b_ref->hash)
+        return -1;
+    else if(a_ref->hash > b_ref->hash)
+        return +1;
+    else
+        return md_link_label_cmp(a_ref->label, a_ref->label_size, b_ref->label, b_ref->label_size);
+}
+
+static int
+md_ref_def_cmp_for_sort(const void* a, const void* b)
+{
+    int cmp;
+
+    cmp = md_ref_def_cmp(a, b);
+
+    /* Ensure stability of the sorting. */
+    if(cmp == 0) {
+        const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a;
+        const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b;
+
+        if(a_ref < b_ref)
+            cmp = -1;
+        else if(a_ref > b_ref)
+            cmp = +1;
+        else
+            cmp = 0;
+    }
+
+    return cmp;
+}
+
+static int
+md_build_ref_def_hashtable(MD_CTX* ctx)
+{
+    int i, j;
+
+    if(ctx->n_ref_defs == 0)
+        return 0;
+
+    ctx->ref_def_hashtable_size = (ctx->n_ref_defs * 5) / 4;
+    ctx->ref_def_hashtable = malloc(ctx->ref_def_hashtable_size * sizeof(void*));
+    if(ctx->ref_def_hashtable == NULL) {
+        MD_LOG("malloc() failed.");
+        goto abort;
+    }
+    memset(ctx->ref_def_hashtable, 0, ctx->ref_def_hashtable_size * sizeof(void*));
+
+    /* Each member of ctx->ref_def_hashtable[] can be:
+     *  -- NULL,
+     *  -- pointer to the MD_REF_DEF in ctx->ref_defs[], or
+     *  -- pointer to a MD_REF_DEF_LIST, which holds multiple pointers to
+     *     such MD_REF_DEFs.
+     */
+    for(i = 0; i < ctx->n_ref_defs; i++) {
+        MD_REF_DEF* def = &ctx->ref_defs[i];
+        void* bucket;
+        MD_REF_DEF_LIST* list;
+
+        def->hash = md_link_label_hash(def->label, def->label_size);
+        bucket = ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size];
+
+        if(bucket == NULL) {
+            /* The bucket is empty. Make it just point to the def. */
+            ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = def;
+            continue;
+        }
+
+        if(ctx->ref_defs <= (MD_REF_DEF*) bucket  &&  (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) {
+            /* The bucket already contains one ref. def. Lets see whether it
+             * is the same label (ref. def. duplicate) or different one
+             * (hash conflict). */
+            MD_REF_DEF* old_def = (MD_REF_DEF*) bucket;
+
+            if(md_link_label_cmp(def->label, def->label_size, old_def->label, old_def->label_size) == 0) {
+                /* Duplicate label: Ignore this ref. def. */
+                continue;
+            }
+
+            /* Make the bucket complex, i.e. able to hold more ref. defs. */
+            list = (MD_REF_DEF_LIST*) malloc(sizeof(MD_REF_DEF_LIST) + 2 * sizeof(MD_REF_DEF*));
+            if(list == NULL) {
+                MD_LOG("malloc() failed.");
+                goto abort;
+            }
+            list->ref_defs[0] = old_def;
+            list->ref_defs[1] = def;
+            list->n_ref_defs = 2;
+            list->alloc_ref_defs = 2;
+            ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
+            continue;
+        }
+
+        /* Append the def to the complex bucket list.
+         *
+         * Note in this case we ignore potential duplicates to avoid expensive
+         * iterating over the complex bucket. Below, we revisit all the complex
+         * buckets and handle it more cheaply after the complex bucket contents
+         * is sorted. */
+        list = (MD_REF_DEF_LIST*) bucket;
+        if(list->n_ref_defs >= list->alloc_ref_defs) {
+            int alloc_ref_defs = list->alloc_ref_defs + list->alloc_ref_defs / 2;
+            MD_REF_DEF_LIST* list_tmp = (MD_REF_DEF_LIST*) realloc(list,
+                        sizeof(MD_REF_DEF_LIST) + alloc_ref_defs * sizeof(MD_REF_DEF*));
+            if(list_tmp == NULL) {
+                MD_LOG("realloc() failed.");
+                goto abort;
+            }
+            list = list_tmp;
+            list->alloc_ref_defs = alloc_ref_defs;
+            ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
+        }
+
+        list->ref_defs[list->n_ref_defs] = def;
+        list->n_ref_defs++;
+    }
+
+    /* Sort the complex buckets so we can use bsearch() with them. */
+    for(i = 0; i < ctx->ref_def_hashtable_size; i++) {
+        void* bucket = ctx->ref_def_hashtable[i];
+        MD_REF_DEF_LIST* list;
+
+        if(bucket == NULL)
+            continue;
+        if(ctx->ref_defs <= (MD_REF_DEF*) bucket  &&  (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs)
+            continue;
+
+        list = (MD_REF_DEF_LIST*) bucket;
+        qsort(list->ref_defs, list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp_for_sort);
+
+        /* Disable all duplicates in the complex bucket by forcing all such
+         * records to point to the 1st such ref. def. I.e. no matter which
+         * record is found during the lookup, it will always point to the right
+         * ref. def. in ctx->ref_defs[]. */
+        for(j = 1; j < list->n_ref_defs; j++) {
+            if(md_ref_def_cmp(&list->ref_defs[j-1], &list->ref_defs[j]) == 0)
+                list->ref_defs[j] = list->ref_defs[j-1];
+        }
+    }
+
+    return 0;
+
+abort:
+    return -1;
+}
+
+static void
+md_free_ref_def_hashtable(MD_CTX* ctx)
+{
+    if(ctx->ref_def_hashtable != NULL) {
+        int i;
+
+        for(i = 0; i < ctx->ref_def_hashtable_size; i++) {
+            void* bucket = ctx->ref_def_hashtable[i];
+            if(bucket == NULL)
+                continue;
+            if(ctx->ref_defs <= (MD_REF_DEF*) bucket  &&  (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs)
+                continue;
+            free(bucket);
+        }
+
+        free(ctx->ref_def_hashtable);
+    }
+}
+
+static const MD_REF_DEF*
+md_lookup_ref_def(MD_CTX* ctx, const CHAR* label, SZ label_size)
+{
+    unsigned hash;
+    void* bucket;
+
+    if(ctx->ref_def_hashtable_size == 0)
+        return NULL;
+
+    hash = md_link_label_hash(label, label_size);
+    bucket = ctx->ref_def_hashtable[hash % ctx->ref_def_hashtable_size];
+
+    if(bucket == NULL) {
+        return NULL;
+    } else if(ctx->ref_defs <= (MD_REF_DEF*) bucket  &&  (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) {
+        const MD_REF_DEF* def = (MD_REF_DEF*) bucket;
+
+        if(md_link_label_cmp(def->label, def->label_size, label, label_size) == 0)
+            return def;
+        else
+            return NULL;
+    } else {
+        MD_REF_DEF_LIST* list = (MD_REF_DEF_LIST*) bucket;
+        MD_REF_DEF key_buf;
+        const MD_REF_DEF* key = &key_buf;
+        const MD_REF_DEF** ret;
+
+        key_buf.label = (CHAR*) label;
+        key_buf.label_size = label_size;
+        key_buf.hash = md_link_label_hash(key_buf.label, key_buf.label_size);
+
+        ret = (const MD_REF_DEF**) bsearch(&key, list->ref_defs,
+                    list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp);
+        if(ret != NULL)
+            return *ret;
+        else
+            return NULL;
+    }
+}
+
+
+/***************************
+ ***  Recognizing Links  ***
+ ***************************/
+
+/* Note this code is partially shared between processing inlines and blocks
+ * as reference definitions and links share some helper parser functions.
+ */
+
+typedef struct MD_LINK_ATTR_tag MD_LINK_ATTR;
+struct MD_LINK_ATTR_tag {
+    CHAR* dest;
+    SZ dest_size;
+
+    CHAR* title;
+    SZ title_size;
+    int title_needs_free;
+};
+
+
+static int
+md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
+                 OFF* p_end, int* p_beg_line_index, int* p_end_line_index,
+                 OFF* p_contents_beg, OFF* p_contents_end)
+{
+    OFF off = beg;
+    OFF contents_beg = 0;
+    OFF contents_end = 0;
+    int line_index = 0;
+    int len = 0;
+
+    if(CH(off) != _T('['))
+        return FALSE;
+    off++;
+
+    while(1) {
+        OFF line_end = lines[line_index].end;
+
+        while(off < line_end) {
+            if(CH(off) == _T('\\')  &&  off+1 < ctx->size  &&  (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
+                if(contents_end == 0) {
+                    contents_beg = off;
+                    *p_beg_line_index = line_index;
+                }
+                contents_end = off + 2;
+                off += 2;
+            } else if(CH(off) == _T('[')) {
+                return FALSE;
+            } else if(CH(off) == _T(']')) {
+                if(contents_beg < contents_end) {
+                    /* Success. */
+                    *p_contents_beg = contents_beg;
+                    *p_contents_end = contents_end;
+                    *p_end = off+1;
+                    *p_end_line_index = line_index;
+                    return TRUE;
+                } else {
+                    /* Link label must have some non-whitespace contents. */
+                    return FALSE;
+                }
+            } else {
+                unsigned codepoint;
+                SZ char_size;
+
+                codepoint = md_decode_unicode(ctx->text, off, ctx->size, &char_size);
+                if(!ISUNICODEWHITESPACE_(codepoint)) {
+                    if(contents_end == 0) {
+                        contents_beg = off;
+                        *p_beg_line_index = line_index;
+                    }
+                    contents_end = off + char_size;
+                }
+
+                off += char_size;
+            }
+
+            len++;
+            if(len > 999)
+                return FALSE;
+        }
+
+        line_index++;
+        len++;
+        if(line_index < n_lines)
+            off = lines[line_index].beg;
+        else
+            break;
+    }
+
+    return FALSE;
+}
+
+static int
+md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
+                         CHAR** p_contents, SZ* p_contents_size)
+{
+    OFF off = beg;
+
+    if(off >= max_end  ||  CH(off) != _T('<'))
+        return FALSE;
+    off++;
+
+    while(off < max_end) {
+        if(CH(off) == _T('\\')  &&  off+1 < max_end  &&  ISPUNCT(off+1)) {
+            off += 2;
+            continue;
+        }
+
+        if(ISNEWLINE(off)  ||  CH(off) == _T('<'))
+            return FALSE;
+
+        if(CH(off) == _T('>')) {
+            /* Success. */
+            *p_contents = (CHAR*)STR(beg+1);
+            *p_contents_size = off - (beg+1);
+            *p_end = off+1;
+            return TRUE;
+        }
+
+        off++;
+    }
+
+    return FALSE;
+}
+
+static int
+md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
+                         CHAR** p_contents, SZ* p_contents_size)
+{
+    OFF off = beg;
+    int parenthesis_level = 0;
+
+    while(off < max_end) {
+        if(CH(off) == _T('\\')  &&  off+1 < max_end  &&  ISPUNCT(off+1)) {
+            off += 2;
+            continue;
+        }
+
+        if(ISWHITESPACE(off) || ISCNTRL(off))
+            break;
+
+        /* Link destination may include balanced pairs of unescaped '(' ')'.
+         * Note we limit the maximal nesting level by 32 to protect us from
+         * https://github.com/jgm/cmark/issues/214 */
+        if(CH(off) == _T('(')) {
+            parenthesis_level++;
+            if(parenthesis_level > 32)
+                return FALSE;
+        } else if(CH(off) == _T(')')) {
+            if(parenthesis_level == 0)
+                break;
+            parenthesis_level--;
+        }
+
+        off++;
+    }
+
+    if(parenthesis_level != 0  ||  off == beg)
+        return FALSE;
+
+    /* Success. */
+    *p_contents = (CHAR*)STR(beg);
+    *p_contents_size = off - beg;
+    *p_end = off;
+    return TRUE;
+}
+
+static inline int
+md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
+                       CHAR** p_contents, SZ* p_contents_size)
+{
+    if(CH(beg) == _T('<'))
+        return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents, p_contents_size);
+    else
+        return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents, p_contents_size);
+}
+
+static int
+md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
+                 OFF* p_end, int* p_beg_line_index, int* p_end_line_index,
+                 OFF* p_contents_beg, OFF* p_contents_end)
+{
+    OFF off = beg;
+    CHAR closer_char;
+    int line_index = 0;
+
+    /* White space with up to one line break. */
+    while(off < lines[line_index].end  &&  ISWHITESPACE(off))
+        off++;
+    if(off >= lines[line_index].end) {
+        line_index++;
+        if(line_index >= n_lines)
+            return FALSE;
+        off = lines[line_index].beg;
+    }
+    if(off == beg)
+        return FALSE;
+
+    *p_beg_line_index = line_index;
+
+    /* First char determines how to detect end of it. */
+    switch(CH(off)) {
+        case _T('"'):   closer_char = _T('"'); break;
+        case _T('\''):  closer_char = _T('\''); break;
+        case _T('('):   closer_char = _T(')'); break;
+        default:        return FALSE;
+    }
+    off++;
+
+    *p_contents_beg = off;
+
+    while(line_index < n_lines) {
+        OFF line_end = lines[line_index].end;
+
+        while(off < line_end) {
+            if(CH(off) == _T('\\')  &&  off+1 < ctx->size  &&  (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
+                off++;
+            } else if(CH(off) == closer_char) {
+                /* Success. */
+                *p_contents_end = off;
+                *p_end = off+1;
+                *p_end_line_index = line_index;
+                return TRUE;
+            } else if(closer_char == _T(')')  &&  CH(off) == _T('(')) {
+                /* ()-style title cannot contain (unescaped '(')) */
+                return FALSE;
+            }
+
+            off++;
+        }
+
+        line_index++;
+    }
+
+    return FALSE;
+}
+
+static int 
+md_push_ref_def(MD_CTX* ctx)
+{
+    if(ctx->n_ref_defs >= ctx->alloc_ref_defs) {
+        MD_REF_DEF* new_defs;
+
+        ctx->alloc_ref_defs = (ctx->alloc_ref_defs > 0
+                ? ctx->alloc_ref_defs + ctx->alloc_ref_defs / 2
+                : 16);
+        new_defs = (MD_REF_DEF*) realloc(ctx->ref_defs, ctx->alloc_ref_defs * sizeof(MD_REF_DEF));
+        if(new_defs == NULL) {
+            MD_LOG("realloc() failed.");
+        return -1;
+        }
+
+        ctx->ref_defs = new_defs;
+    }
+    return 0;
+}
+
+/* Returns 0 if it is not a reference definition.
+ *
+ * Returns N > 0 if it is a reference definition. N then corresponds to the
+ * number of lines forming it). In this case the definition is stored for
+ * resolving any links referring to it.
+ *
+ * Returns -1 in case of an error (out of memory).
+ */
+static int
+md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
+{
+    OFF label_contents_beg;
+    OFF label_contents_end;
+    int label_contents_line_index = -1;
+    int label_is_multiline = FALSE;
+    CHAR* dest_contents;
+    SZ dest_contents_size;
+    OFF title_contents_beg;
+    OFF title_contents_end;
+    int title_contents_line_index;
+    int title_is_multiline = FALSE;
+    OFF off;
+    int line_index = 0;
+    int tmp_line_index;
+    MD_REF_DEF* def = NULL;
+    int ret = 0;
+
+    /* Link label. */
+    if(!md_is_link_label(ctx, lines, n_lines, lines[0].beg,
+                &off, &label_contents_line_index, &line_index,
+                &label_contents_beg, &label_contents_end))
+        return FALSE;
+    label_is_multiline = (label_contents_line_index != line_index);
+
+    /* Colon. */
+    if(off >= lines[line_index].end  ||  CH(off) != _T(':'))
+        return FALSE;
+    off++;
+
+    /* Optional white space with up to one line break. */
+    while(off < lines[line_index].end  &&  ISWHITESPACE(off))
+        off++;
+    if(off >= lines[line_index].end) {
+        line_index++;
+        if(line_index >= n_lines)
+            return FALSE;
+        off = lines[line_index].beg;
+    }
+
+    /* Link destination. */
+    if(!md_is_link_destination(ctx, off, lines[line_index].end,
+                &off, &dest_contents, &dest_contents_size))
+        return FALSE;
+
+    /* (Optional) title. Note we interpret it as an title only if nothing
+     * more follows on its last line. */
+    if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
+                &off, &title_contents_line_index, &tmp_line_index,
+                &title_contents_beg, &title_contents_end)
+        &&  off >= lines[line_index + tmp_line_index].end)
+    {
+        title_is_multiline = (tmp_line_index != title_contents_line_index);
+        title_contents_line_index += line_index;
+        line_index += tmp_line_index;
+    } else {
+        /* Not a title. */
+        title_is_multiline = FALSE;
+        title_contents_beg = off;
+        title_contents_end = off;
+        title_contents_line_index = 0;
+    }
+
+    /* Nothing more can follow on the last line. */
+    if(off < lines[line_index].end)
+        return FALSE;
+
+    /* So, it _is_ a reference definition. Remember it. */
+    MD_CHECK(md_push_ref_def(ctx));
+    def = &ctx->ref_defs[ctx->n_ref_defs];
+    memset(def, 0, sizeof(MD_REF_DEF));
+
+    if(label_is_multiline) {
+        MD_CHECK(md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end,
+                    lines + label_contents_line_index, n_lines - label_contents_line_index,
+                    _T(' '), &def->label, &def->label_size));
+        def->label_needs_free = TRUE;
+    } else {
+        def->label = (CHAR*) STR(label_contents_beg);
+        def->label_size = label_contents_end - label_contents_beg;
+    }
+
+    if(title_is_multiline) {
+        MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
+                    lines + title_contents_line_index, n_lines - title_contents_line_index,
+                    _T('\n'), &def->title, &def->title_size));
+        def->title_needs_free = TRUE;
+    } else {
+        def->title = (CHAR*) STR(title_contents_beg);
+        def->title_size = title_contents_end - title_contents_beg;
+    }
+
+    def->dest = dest_contents;
+    def->dest_size = dest_contents_size;
+
+    /* Success. */
+    ctx->n_ref_defs++;
+    return line_index + 1;
+
+abort:
+    /* Failure. */
+    if(def != NULL  &&  def->label_needs_free)
+        free(def->label);
+    if(def != NULL  &&  def->title_needs_free)
+        free(def->title);
+    return ret;
+}
+
+static int
+md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
+                     OFF beg, OFF end, MD_LINK_ATTR* attr)
+{
+    const MD_REF_DEF* def;
+    const MD_LINE* beg_line;
+    int is_multiline;
+    CHAR* label;
+    SZ label_size;
+    int ret;
+
+    MD_ASSERT(CH(beg) == _T('[') || CH(beg) == _T('!'));
+    MD_ASSERT(CH(end-1) == _T(']'));
+
+    beg += (CH(beg) == _T('!') ? 2 : 1);
+    end--;
+
+    /* Find lines corresponding to the beg and end positions. */
+    beg_line = md_lookup_line(beg, lines, n_lines);
+    is_multiline = (end > beg_line->end);
+
+    if(is_multiline) {
+        MD_CHECK(md_merge_lines_alloc(ctx, beg, end, beg_line,
+                 (int)(n_lines - (beg_line - lines)), _T(' '), &label, &label_size));
+    } else {
+        label = (CHAR*) STR(beg);
+        label_size = end - beg;
+    }
+
+    def = md_lookup_ref_def(ctx, label, label_size);
+    if(def != NULL) {
+        attr->dest = def->dest;
+        attr->dest_size = def->dest_size;
+        attr->title = def->title;
+        attr->title_size = def->title_size;
+        attr->title_needs_free = FALSE;
+    }
+
+    if(is_multiline)
+        free(label);
+
+    ret = (def != NULL);
+
+abort:
+    return ret;
+}
+
+static int
+md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
+                       OFF beg, OFF* p_end, MD_LINK_ATTR* attr)
+{
+    int line_index = 0;
+    int tmp_line_index;
+    OFF title_contents_beg;
+    OFF title_contents_end;
+    int title_contents_line_index;
+    int title_is_multiline;
+    OFF off = beg;
+    int ret = FALSE;
+
+    while(off >= lines[line_index].end)
+        line_index++;
+
+    MD_ASSERT(CH(off) == _T('('));
+    off++;
+
+    /* Optional white space with up to one line break. */
+    while(off < lines[line_index].end  &&  ISWHITESPACE(off))
+        off++;
+    if(off >= lines[line_index].end  &&  (off >= ctx->size  ||  ISNEWLINE(off))) {
+        line_index++;
+        if(line_index >= n_lines)
+            return FALSE;
+        off = lines[line_index].beg;
+    }
+
+    /* Link destination may be omitted, but only when not also having a title. */
+    if(off < ctx->size  &&  CH(off) == _T(')')) {
+        attr->dest = (CHAR*)STR(off);
+        attr->dest_size = 0;
+        attr->title = NULL;
+        attr->title_size = 0;
+        attr->title_needs_free = FALSE;
+        off++;
+        *p_end = off;
+        return TRUE;
+    }
+
+    /* Link destination. */
+    if(!md_is_link_destination(ctx, off, lines[line_index].end,
+                        &off, &attr->dest, &attr->dest_size))
+        return FALSE;
+
+    /* (Optional) title. */
+    if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
+                &off, &title_contents_line_index, &tmp_line_index,
+                &title_contents_beg, &title_contents_end))
+    {
+        title_is_multiline = (tmp_line_index != title_contents_line_index);
+        title_contents_line_index += line_index;
+        line_index += tmp_line_index;
+    } else {
+        /* Not a title. */
+        title_is_multiline = FALSE;
+        title_contents_beg = off;
+        title_contents_end = off;
+        title_contents_line_index = 0;
+    }
+
+    /* Optional whitespace followed with final ')'. */
+    while(off < lines[line_index].end  &&  ISWHITESPACE(off))
+        off++;
+    if (off >= lines[line_index].end  &&  (off >= ctx->size || ISNEWLINE(off))) {
+        line_index++;
+        if(line_index >= n_lines)
+            return FALSE;
+        off = lines[line_index].beg;
+    }
+    if(CH(off) != _T(')'))
+        goto abort;
+    off++;
+
+    if(title_contents_beg >= title_contents_end) {
+        attr->title = NULL;
+        attr->title_size = 0;
+        attr->title_needs_free = FALSE;
+    } else if(!title_is_multiline) {
+        attr->title = (CHAR*) STR(title_contents_beg);
+        attr->title_size = title_contents_end - title_contents_beg;
+        attr->title_needs_free = FALSE;
+    } else {
+        MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
+                    lines + title_contents_line_index, n_lines - title_contents_line_index,
+                    _T('\n'), &attr->title, &attr->title_size));
+        attr->title_needs_free = TRUE;
+    }
+
+    *p_end = off;
+    ret = TRUE;
+
+abort:
+    return ret;
+}
+
+static void
+md_free_ref_defs(MD_CTX* ctx)
+{
+    int i;
+
+    for(i = 0; i < ctx->n_ref_defs; i++) {
+        MD_REF_DEF* def = &ctx->ref_defs[i];
+
+        if(def->label_needs_free)
+            free(def->label);
+        if(def->title_needs_free)
+            free(def->title);
+    }
+
+    free(ctx->ref_defs);
+}
+
+/*********************************************
+ ***  Dictionary of Heading Definitions  ***
+ *********************************************/
+
+struct MD_HEADING_DEF_tag {
+    CHAR* heading;
+    SZ heading_size;
+    CHAR* identifier; // only valid after all heading are known 
+    unsigned hash;
+    OFF ident_beg;
+    SZ ident_size;
+    unsigned postfix;
+    unsigned level:8;
+};
+
+static int 
+md_push_heading_def(MD_CTX* ctx)
+{
+    if(ctx->n_heading_defs >= ctx->alloc_heading_defs) {
+        MD_HEADING_DEF* new_defs;
+
+        ctx->alloc_heading_defs = (ctx->alloc_heading_defs > 0
+                ? ctx->alloc_heading_defs + ctx->alloc_heading_defs / 2
+                : 16);
+        new_defs = (MD_HEADING_DEF*) realloc(ctx->heading_defs, ctx->alloc_heading_defs * sizeof(MD_HEADING_DEF));
+        if(new_defs == NULL) {
+            MD_LOG("realloc() failed.");
+        return -1;
+        }
+
+        ctx->heading_defs = new_defs;
+    }
+    return 0;
+}
+
+static int
+md_alloc_identifiers(MD_CTX *ctx, MD_HEADING_DEF* def)
+{
+    if (ctx->identifiers_size + def->ident_size >= ctx->alloc_identifiers)
+    {
+        CHAR *new_identifiers;
+
+        ctx->alloc_identifiers = (ctx->alloc_identifiers > 0
+                ? ctx->alloc_identifiers + ctx->alloc_identifiers / 2
+                : 512);
+
+        new_identifiers = (CHAR *)realloc(ctx->identifiers, sizeof(CHAR) * ctx->alloc_identifiers);
+        if (new_identifiers == NULL)
+        {
+            MD_LOG("realloc() failed.");
+            return -1;
+        }
+        if (ctx->identifiers != new_identifiers){
+            // rebuild all ref_def pointing to identifiers
+            int i;
+            for(i = 0; i < ctx->n_ref_defs; i++) {
+                MD_REF_DEF* def = &ctx->ref_defs[i];
+                if (def->dest > ctx->identifiers 
+                && def->dest <= ctx->identifiers+ctx->identifiers_size ){
+                    def->dest = new_identifiers + (def->dest - ctx->identifiers);
+                }
+            }  
+        } 
+        ctx->identifiers = new_identifiers;
+    }
+    
+    def->ident_beg = ctx->identifiers_size;
+    return 0;
+}
+
+/** forward declaration */
+static int
+md_heading_build_ident(MD_CTX* ctx, MD_HEADING_DEF* def, MD_LINE* lines, int n_lines, int level);
+
+typedef struct MD_HEADING_DEF_LIST_tag MD_HEADING_DEF_LIST;
+struct MD_HEADING_DEF_LIST_tag {
+    int n_heading_defs;
+    int alloc_heading_defs;
+    MD_HEADING_DEF* heading_defs[];  /* Valid items always  point into ctx->heading_defs[] */
+};
+
+static int
+md_heading_def_cmp(const void* a, const void* b)
+{
+    const MD_HEADING_DEF* a_ref = *(const MD_HEADING_DEF**)a;
+    const MD_HEADING_DEF* b_ref = *(const MD_HEADING_DEF**)b;
+
+    if(a_ref->hash < b_ref->hash)
+        return -1;
+    else if(a_ref->hash > b_ref->hash)
+        return +1;
+    else
+        return md_link_label_cmp(a_ref->identifier, a_ref->ident_size,
+                                 b_ref->identifier, b_ref->ident_size);
+}
+
+static int
+md_heading_def_cmp_for_sort(const void* a, const void* b)
+{
+    int cmp;
+
+    cmp = md_heading_def_cmp(a, b);
+
+    /* Ensure stability of the sorting. */
+    if(cmp == 0) {
+        const MD_HEADING_DEF* a_ref = *(const MD_HEADING_DEF**)a;
+        const MD_HEADING_DEF* b_ref = *(const MD_HEADING_DEF**)b;
+
+        if(a_ref < b_ref)
+            cmp = -1;
+        else if(a_ref > b_ref)
+            cmp = +1;
+        else
+            cmp = 0;
+    }
+
+    return cmp;
+}
+
+static int
+md_build_heading_def_hashtable(MD_CTX* ctx)
+{
+    int i, j;
+
+    if(ctx->n_heading_defs == 0)
+        return 0;
+
+    ctx->heading_def_hashtable_size = (ctx->n_heading_defs * 5) / 4;
+    ctx->heading_def_hashtable = malloc(ctx->heading_def_hashtable_size * sizeof(void*));
+    if(ctx->heading_def_hashtable == NULL) {
+        MD_LOG("malloc() failed.");
+        goto abort;
+    }
+    memset(ctx->heading_def_hashtable, 0, ctx->heading_def_hashtable_size * sizeof(void*));
+
+    /* Each member of ctx->heading_def_hashtable[] can be:
+     *  -- NULL,
+     *  -- pointer to the MD_HEADING_DEF in ctx->heading_defs[], or
+     *  -- pointer to a MD_HEADING_DEF_LIST, which holds multiple pointers to
+     *     such MD_HEADING_DEFs.
+     */
+    for(i = 0; i < ctx->n_heading_defs; i++) {
+        MD_HEADING_DEF* def = &ctx->heading_defs[i];
+        void* bucket;
+        MD_HEADING_DEF_LIST* list;
+
+        // compute identifier hash reusing the link label hash function
+        def->identifier = &ctx->identifiers[def->ident_beg];
+        def->hash = md_link_label_hash(def->identifier, def->ident_size);
+        bucket = ctx->heading_def_hashtable[def->hash % ctx->heading_def_hashtable_size];
+
+        if(bucket == NULL) {
+            /* The bucket is empty. Make it just point to the def. */
+            ctx->heading_def_hashtable[def->hash % ctx->heading_def_hashtable_size] = def;
+            continue;
+        }
+
+        if(ctx->heading_defs <= (MD_HEADING_DEF*) bucket  &&  (MD_HEADING_DEF*) bucket < ctx->heading_defs + ctx->n_heading_defs) {
+            /* The bucket already contains one heading def.*/
+            MD_HEADING_DEF* old_def = (MD_HEADING_DEF*) bucket;
+
+            /* Make the bucket complex, i.e. able to hold more heading defs. */
+            list = (MD_HEADING_DEF_LIST*) malloc(sizeof(MD_HEADING_DEF_LIST) + 2 * sizeof(MD_HEADING_DEF*));
+            if(list == NULL) {
+                MD_LOG("malloc() failed.");
+                goto abort;
+            }
+            list->heading_defs[0] = old_def;
+            list->heading_defs[1] = def;
+            list->n_heading_defs = 2;
+            list->alloc_heading_defs = 2;
+            ctx->heading_def_hashtable[def->hash % ctx->heading_def_hashtable_size] = list;
+            continue;
+        }
+
+        /* Append the def to the complex bucket list. */
+        list = (MD_HEADING_DEF_LIST*) bucket;
+        if(list->n_heading_defs >= list->alloc_heading_defs) {
+            int alloc_heading_defs = list->alloc_heading_defs + list->alloc_heading_defs / 2;
+            MD_HEADING_DEF_LIST* list_tmp = (MD_HEADING_DEF_LIST*) realloc(list,
+                        sizeof(MD_HEADING_DEF_LIST) + alloc_heading_defs * sizeof(MD_HEADING_DEF*));
+            if(list_tmp == NULL) {
+                MD_LOG("realloc() failed.");
+                goto abort;
+            }
+            list = list_tmp;
+            list->alloc_heading_defs = alloc_heading_defs;
+            ctx->heading_def_hashtable[def->hash % ctx->heading_def_hashtable_size] = list;
+        }
+
+        list->heading_defs[list->n_heading_defs] = def;
+        list->n_heading_defs++;
+    }
+
+    /* Sort the complex buckets so we can use bsearch() with them. */
+    for(i = 0; i < ctx->heading_def_hashtable_size; i++) {
+        void* bucket = ctx->heading_def_hashtable[i];
+        MD_HEADING_DEF_LIST* list;
+
+        if(bucket == NULL)
+            continue;
+        if(ctx->heading_defs <= (MD_HEADING_DEF*) bucket  &&  (MD_HEADING_DEF*) bucket < ctx->heading_defs + ctx->n_heading_defs)
+            continue;
+
+        list = (MD_HEADING_DEF_LIST*) bucket;
+        qsort(list->heading_defs, list->n_heading_defs, sizeof(MD_HEADING_DEF*), md_heading_def_cmp_for_sort);
+
+        for(j = 1; j < list->n_heading_defs; j++) {
+            if(md_heading_def_cmp(&list->heading_defs[j-1], &list->heading_defs[j]) == 0)
+                list->heading_defs[j]->postfix = list->heading_defs[j-1]->postfix + 1;
+        }
+    }
+
+    return 0;
+
+abort:
+    return -1;
+}
+
+static void
+md_free_heading_def_hashtable(MD_CTX* ctx)
+{
+    if(ctx->heading_def_hashtable != NULL) {
+        int i;
+
+        for(i = 0; i < ctx->heading_def_hashtable_size; i++) {
+            void* bucket = ctx->heading_def_hashtable[i];
+            if(bucket == NULL)
+                continue;
+            if(ctx->heading_defs <= (MD_HEADING_DEF*) bucket  &&  (MD_HEADING_DEF*) bucket < ctx->heading_defs + ctx->n_heading_defs)
+                continue;
+            free(bucket);
+        }
+
+        free(ctx->heading_def_hashtable);
+    }
+}
+
+static void
+md_free_heading_defs(MD_CTX* ctx)
+{
+    free(ctx->heading_defs);
+}
+
+/******************************************
+ ***  Processing Inlines (a.k.a Spans)  ***
+ ******************************************/
+
+/* We process inlines in few phases:
+ *
+ * (1) We go through the block text and collect all significant characters
+ *     which may start/end a span or some other significant position into
+ *     ctx->marks[]. Core of this is what md_collect_marks() does.
+ *
+ *     We also do some very brief preliminary context-less analysis, whether
+ *     it might be opener or closer (e.g. of an emphasis span).
+ *
+ *     This speeds the other steps as we do not need to re-iterate over all
+ *     characters anymore.
+ *
+ * (2) We analyze each potential mark types, in order by their precedence.
+ *
+ *     In each md_analyze_XXX() function, we re-iterate list of the marks,
+ *     skipping already resolved regions (in preceding precedences) and try to
+ *     resolve them.
+ *
+ * (2.1) For trivial marks, which are single (e.g. HTML entity), we just mark
+ *       them as resolved.
+ *
+ * (2.2) For range-type marks, we analyze whether the mark could be closer
+ *       and, if yes, whether there is some preceding opener it could satisfy.
+ *
+ *       If not we check whether it could be really an opener and if yes, we
+ *       remember it so subsequent closers may resolve it.
+ *
+ * (3) Finally, when all marks were analyzed, we render the block contents
+ *     by calling MD_RENDERER::text() callback, interrupting by ::enter_span()
+ *     or ::close_span() whenever we reach a resolved mark.
+ */
+
+
+/* The mark structure.
+ *
+ * '\\': Maybe escape sequence.
+ * '\0': NULL char.
+ *  '*': Maybe (strong) emphasis start/end.
+ *  '_': Maybe (strong) emphasis start/end.
+ *  '~': Maybe strikethrough start/end (needs MD_FLAG_STRIKETHROUGH).
+ *  '`': Maybe code span start/end.
+ *  '&': Maybe start of entity.
+ *  ';': Maybe end of entity.
+ *  '<': Maybe start of raw HTML or autolink.
+ *  '>': Maybe end of raw HTML or autolink.
+ *  '[': Maybe start of link label or link text.
+ *  '!': Equivalent of '[' for image.
+ *  ']': Maybe end of link label or link text.
+ *  '@': Maybe permissive e-mail auto-link (needs MD_FLAG_PERMISSIVEEMAILAUTOLINKS).
+ *  ':': Maybe permissive URL auto-link (needs MD_FLAG_PERMISSIVEURLAUTOLINKS).
+ *  '.': Maybe permissive WWW auto-link (needs MD_FLAG_PERMISSIVEWWWAUTOLINKS).
+ *  'D': Dummy mark, it reserves a space for splitting a previous mark
+ *       (e.g. emphasis) or to make more space for storing some special data
+ *       related to the preceding mark (e.g. link).
+ *
+ * Note that not all instances of these chars in the text imply creation of the
+ * structure. Only those which have (or may have, after we see more context)
+ * the special meaning.
+ *
+ * (Keep this struct as small as possible to fit as much of them into CPU
+ * cache line.)
+ */
+struct MD_MARK_tag {
+    OFF beg;
+    OFF end;
+
+    /* For unresolved openers, 'prev' and 'next' form the chain of open openers
+     * of given type 'ch'.
+     *
+     * During resolving, we disconnect from the chain and point to the
+     * corresponding counterpart so opener points to its closer and vice versa.
+     */
+    int prev;
+    int next;
+    CHAR ch;
+    unsigned char flags;
+};
+
+/* Mark flags (these apply to ALL mark types). */
+#define MD_MARK_POTENTIAL_OPENER            0x01  /* Maybe opener. */
+#define MD_MARK_POTENTIAL_CLOSER            0x02  /* Maybe closer. */
+#define MD_MARK_OPENER                      0x04  /* Definitely opener. */
+#define MD_MARK_CLOSER                      0x08  /* Definitely closer. */
+#define MD_MARK_RESOLVED                    0x10  /* Resolved in any definite way. */
+
+/* Mark flags specific for various mark types (so they can share bits). */
+#define MD_MARK_EMPH_INTRAWORD              0x20  /* Helper for the "rule of 3". */
+#define MD_MARK_EMPH_MOD3_0                 0x40
+#define MD_MARK_EMPH_MOD3_1                 0x80
+#define MD_MARK_EMPH_MOD3_2                 (0x40 | 0x80)
+#define MD_MARK_EMPH_MOD3_MASK              (0x40 | 0x80)
+#define MD_MARK_AUTOLINK                    0x20  /* Distinguisher for '<', '>'. */
+#define MD_MARK_VALIDPERMISSIVEAUTOLINK     0x20  /* For permissive autolinks. */
+#define MD_MARK_HASNESTEDBRACKETS           0x20  /* For '[' to rule out invalid link labels early */
+
+static MD_MARKCHAIN*
+md_asterisk_chain(MD_CTX* ctx, unsigned flags)
+{
+    switch(flags & (MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_MASK)) {
+        case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_0:  return &ASTERISK_OPENERS_intraword_mod3_0;
+        case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_1:  return &ASTERISK_OPENERS_intraword_mod3_1;
+        case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_2:  return &ASTERISK_OPENERS_intraword_mod3_2;
+        case MD_MARK_EMPH_MOD3_0:                           return &ASTERISK_OPENERS_extraword_mod3_0;
+        case MD_MARK_EMPH_MOD3_1:                           return &ASTERISK_OPENERS_extraword_mod3_1;
+        case MD_MARK_EMPH_MOD3_2:                           return &ASTERISK_OPENERS_extraword_mod3_2;
+        default:                                            MD_UNREACHABLE();
+    }
+    return NULL;
+}
+
+static MD_MARKCHAIN*
+md_mark_chain(MD_CTX* ctx, int mark_index)
+{
+    MD_MARK* mark = &ctx->marks[mark_index];
+
+    switch(mark->ch) {
+        case _T('*'):   return md_asterisk_chain(ctx, mark->flags);
+        case _T('_'):   return &UNDERSCORE_OPENERS;
+        case _T('~'):   return (mark->end - mark->beg == 1) ? &TILDE_OPENERS_1 : &TILDE_OPENERS_2;
+        case _T('!'):   MD_FALLTHROUGH();
+        case _T('['):   return &BRACKET_OPENERS;
+        case _T('|'):   return &TABLECELLBOUNDARIES;
+        default:        return NULL;
+    }
+}
+
+static MD_MARK*
+md_push_mark(MD_CTX* ctx)
+{
+    if(ctx->n_marks >= ctx->alloc_marks) {
+        MD_MARK* new_marks;
+
+        ctx->alloc_marks = (ctx->alloc_marks > 0
+                ? ctx->alloc_marks + ctx->alloc_marks / 2
+                : 64);
+        new_marks = realloc(ctx->marks, ctx->alloc_marks * sizeof(MD_MARK));
+        if(new_marks == NULL) {
+            MD_LOG("realloc() failed.");
+            return NULL;
+        }
+
+        ctx->marks = new_marks;
+    }
+
+    return &ctx->marks[ctx->n_marks++];
+}
+
+#define PUSH_MARK_()                                                    \
+        do {                                                            \
+            mark = md_push_mark(ctx);                                   \
+            if(mark == NULL) {                                          \
+                ret = -1;                                               \
+                goto abort;                                             \
+            }                                                           \
+        } while(0)
+
+#define PUSH_MARK(ch_, beg_, end_, flags_)                              \
+        do {                                                            \
+            PUSH_MARK_();                                               \
+            mark->beg = (beg_);                                         \
+            mark->end = (end_);                                         \
+            mark->prev = -1;                                            \
+            mark->next = -1;                                            \
+            mark->ch = (char)(ch_);                                     \
+            mark->flags = (flags_);                                     \
+        } while(0)
+
+
+static void
+md_mark_chain_append(MD_CTX* ctx, MD_MARKCHAIN* chain, int mark_index)
+{
+    if(chain->tail >= 0)
+        ctx->marks[chain->tail].next = mark_index;
+    else
+        chain->head = mark_index;
+
+    ctx->marks[mark_index].prev = chain->tail;
+    ctx->marks[mark_index].next = -1;
+    chain->tail = mark_index;
+}
+
+/* Sometimes, we need to store a pointer into the mark. It is quite rare
+ * so we do not bother to make MD_MARK use union, and it can only happen
+ * for dummy marks. */
+static inline void
+md_mark_store_ptr(MD_CTX* ctx, int mark_index, void* ptr)
+{
+    MD_MARK* mark = &ctx->marks[mark_index];
+    MD_ASSERT(mark->ch == 'D');
+
+    /* Check only members beg and end are misused for this. */
+    MD_ASSERT(sizeof(void*) <= 2 * sizeof(OFF));
+    memcpy(mark, &ptr, sizeof(void*));
+}
+
+static inline void*
+md_mark_get_ptr(MD_CTX* ctx, int mark_index)
+{
+    void* ptr;
+    MD_MARK* mark = &ctx->marks[mark_index];
+    MD_ASSERT(mark->ch == 'D');
+    memcpy(&ptr, mark, sizeof(void*));
+    return ptr;
+}
+
+static void
+md_resolve_range(MD_CTX* ctx, MD_MARKCHAIN* chain, int opener_index, int closer_index)
+{
+    MD_MARK* opener = &ctx->marks[opener_index];
+    MD_MARK* closer = &ctx->marks[closer_index];
+
+    /* Remove opener from the list of openers. */
+    if(chain != NULL) {
+        if(opener->prev >= 0)
+            ctx->marks[opener->prev].next = opener->next;
+        else
+            chain->head = opener->next;
+
+        if(opener->next >= 0)
+            ctx->marks[opener->next].prev = opener->prev;
+        else
+            chain->tail = opener->prev;
+    }
+
+    /* Interconnect opener and closer and mark both as resolved. */
+    opener->next = closer_index;
+    opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
+    closer->prev = opener_index;
+    closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
+}
+
+
+#define MD_ROLLBACK_ALL         0
+#define MD_ROLLBACK_CROSSING    1
+
+/* In the range ctx->marks[opener_index] ... [closer_index], undo some or all
+ * resolvings accordingly to these rules:
+ *
+ * (1) All openers BEFORE the range corresponding to any closer inside the
+ *     range are un-resolved and they are re-added to their respective chains
+ *     of unresolved openers. This ensures we can reuse the opener for closers
+ *     AFTER the range.
+ *
+ * (2) If 'how' is MD_ROLLBACK_ALL, then ALL resolved marks inside the range
+ *     are discarded.
+ *
+ * (3) If 'how' is MD_ROLLBACK_CROSSING, only closers with openers handled
+ *     in (1) are discarded. I.e. pairs of openers and closers which are both
+ *     inside the range are retained as well as any unpaired marks.
+ */
+static void
+md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how)
+{
+    int i;
+    int mark_index;
+
+    /* Cut all unresolved openers at the mark index. */
+    for(i = OPENERS_CHAIN_FIRST; i < OPENERS_CHAIN_LAST+1; i++) {
+        MD_MARKCHAIN* chain = &ctx->mark_chains[i];
+
+        while(chain->tail >= opener_index) {
+            int same = chain->tail == opener_index;
+            chain->tail = ctx->marks[chain->tail].prev;
+            if (same) break;
+        }
+
+        if(chain->tail >= 0)
+            ctx->marks[chain->tail].next = -1;
+        else
+            chain->head = -1;
+    }
+
+    /* Go backwards so that unresolved openers are re-added into their
+     * respective chains, in the right order. */
+    mark_index = closer_index - 1;
+    while(mark_index > opener_index) {
+        MD_MARK* mark = &ctx->marks[mark_index];
+        int mark_flags = mark->flags;
+        int discard_flag = (how == MD_ROLLBACK_ALL);
+
+        if(mark->flags & MD_MARK_CLOSER) {
+            int mark_opener_index = mark->prev;
+
+            /* Undo opener BEFORE the range. */
+            if(mark_opener_index < opener_index) {
+                MD_MARK* mark_opener = &ctx->marks[mark_opener_index];
+                MD_MARKCHAIN* chain;
+
+                mark_opener->flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED);
+                chain = md_mark_chain(ctx, opener_index);
+                if(chain != NULL) {
+                    md_mark_chain_append(ctx, chain, mark_opener_index);
+                    discard_flag = 1;
+                }
+            }
+        }
+
+        /* And reset our flags. */
+        if(discard_flag) {
+            /* Make zero-length closer a dummy mark as that's how it was born */
+            if((mark->flags & MD_MARK_CLOSER)  &&  mark->beg == mark->end)
+                mark->ch = 'D';
+
+            mark->flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED);
+        }
+
+        /* Jump as far as we can over unresolved or non-interesting marks. */
+        switch(how) {
+            case MD_ROLLBACK_CROSSING:
+                if((mark_flags & MD_MARK_CLOSER)  &&  mark->prev > opener_index) {
+                    /* If we are closer with opener INSIDE the range, there may
+                     * not be any other crosser inside the subrange. */
+                    mark_index = mark->prev;
+                    break;
+                }
+                MD_FALLTHROUGH();
+            default:
+                mark_index--;
+                break;
+        }
+    }
+}
+
+static void
+md_build_mark_char_map(MD_CTX* ctx)
+{
+    memset(ctx->mark_char_map, 0, sizeof(ctx->mark_char_map));
+
+    ctx->mark_char_map['\\'] = 1;
+    ctx->mark_char_map['*'] = 1;
+    ctx->mark_char_map['_'] = 1;
+    ctx->mark_char_map['`'] = 1;
+    ctx->mark_char_map['&'] = 1;
+    ctx->mark_char_map[';'] = 1;
+    ctx->mark_char_map['<'] = 1;
+    ctx->mark_char_map['>'] = 1;
+    ctx->mark_char_map['['] = 1;
+    ctx->mark_char_map['!'] = 1;
+    ctx->mark_char_map[']'] = 1;
+    ctx->mark_char_map['\0'] = 1;
+
+    if(ctx->parser.flags & MD_FLAG_STRIKETHROUGH)
+        ctx->mark_char_map['~'] = 1;
+
+    if(ctx->parser.flags & MD_FLAG_LATEXMATHSPANS)
+        ctx->mark_char_map['$'] = 1;
+
+    if(ctx->parser.flags & MD_FLAG_PERMISSIVEEMAILAUTOLINKS)
+        ctx->mark_char_map['@'] = 1;
+
+    if(ctx->parser.flags & MD_FLAG_PERMISSIVEURLAUTOLINKS)
+        ctx->mark_char_map[':'] = 1;
+
+    if(ctx->parser.flags & MD_FLAG_PERMISSIVEWWWAUTOLINKS)
+        ctx->mark_char_map['.'] = 1;
+
+    if((ctx->parser.flags & MD_FLAG_TABLES) || (ctx->parser.flags & MD_FLAG_WIKILINKS))
+        ctx->mark_char_map['|'] = 1;
+
+    if(ctx->parser.flags & MD_FLAG_COLLAPSEWHITESPACE) {
+        int i;
+
+        for(i = 0; i < (int) sizeof(ctx->mark_char_map); i++) {
+            if(ISWHITESPACE_(i))
+                ctx->mark_char_map[i] = 1;
+        }
+    }
+}
+
+/* We limit code span marks to lower than 32 backticks. This solves the
+ * pathologic case of too many openers, each of different length: Their
+ * resolving would be then O(n^2). */
+#define CODESPAN_MARK_MAXLEN    32
+
+static int
+md_is_code_span(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
+                OFF* p_opener_beg, OFF* p_opener_end,
+                OFF* p_closer_beg, OFF* p_closer_end,
+                OFF last_potential_closers[CODESPAN_MARK_MAXLEN],
+                int* p_reached_paragraph_end)
+{
+    OFF opener_beg = beg;
+    OFF opener_end;
+    OFF closer_beg;
+    OFF closer_end;
+    SZ mark_len;
+    OFF line_end;
+    int has_space_after_opener = FALSE;
+    int has_eol_after_opener = FALSE;
+    int has_space_before_closer = FALSE;
+    int has_eol_before_closer = FALSE;
+    int has_only_space = TRUE;
+    int line_index = 0;
+
+    line_end = lines[0].end;
+    opener_end = opener_beg;
+    while(opener_end < line_end  &&  CH(opener_end) == _T('`'))
+        opener_end++;
+    has_space_after_opener = (opener_end < line_end && CH(opener_end) == _T(' '));
+    has_eol_after_opener = (opener_end == line_end);
+
+    /* The caller needs to know end of the opening mark even if we fail. */
+    *p_opener_end = opener_end;
+
+    mark_len = opener_end - opener_beg;
+    if(mark_len > CODESPAN_MARK_MAXLEN)
+        return FALSE;
+
+    /* Check whether we already know there is no closer of this length.
+     * If so, re-scan does no sense. This fixes issue #59. */
+    if(last_potential_closers[mark_len-1] >= lines[n_lines-1].end  ||
+       (*p_reached_paragraph_end  &&  last_potential_closers[mark_len-1] < opener_end))
+        return FALSE;
+
+    closer_beg = opener_end;
+    closer_end = opener_end;
+
+    /* Find closer mark. */
+    while(TRUE) {
+        while(closer_beg < line_end  &&  CH(closer_beg) != _T('`')) {
+            if(CH(closer_beg) != _T(' '))
+                has_only_space = FALSE;
+            closer_beg++;
+        }
+        closer_end = closer_beg;
+        while(closer_end < line_end  &&  CH(closer_end) == _T('`'))
+            closer_end++;
+
+        if(closer_end - closer_beg == mark_len) {
+            /* Success. */
+            has_space_before_closer = (closer_beg > lines[line_index].beg && CH(closer_beg-1) == _T(' '));
+            has_eol_before_closer = (closer_beg == lines[line_index].beg);
+            break;
+        }
+
+        if(closer_end - closer_beg > 0) {
+            /* We have found a back-tick which is not part of the closer. */
+            has_only_space = FALSE;
+
+            /* But if we eventually fail, remember it as a potential closer
+             * of its own length for future attempts. This mitigates needs for
+             * rescans. */
+            if(closer_end - closer_beg < CODESPAN_MARK_MAXLEN) {
+                if(closer_beg > last_potential_closers[closer_end - closer_beg - 1])
+                    last_potential_closers[closer_end - closer_beg - 1] = closer_beg;
+            }
+        }
+
+        if(closer_end >= line_end) {
+            line_index++;
+            if(line_index >= n_lines) {
+                /* Reached end of the paragraph and still nothing. */
+                *p_reached_paragraph_end = TRUE;
+                return FALSE;
+            }
+            /* Try on the next line. */
+            line_end = lines[line_index].end;
+            closer_beg = lines[line_index].beg;
+        } else {
+            closer_beg = closer_end;
+        }
+    }
+
+    /* If there is a space or a new line both after and before the opener
+     * (and if the code span is not made of spaces only), consume one initial
+     * and one trailing space as part of the marks. */
+    if(!has_only_space  &&
+       (has_space_after_opener || has_eol_after_opener)  &&
+       (has_space_before_closer || has_eol_before_closer))
+    {
+        if(has_space_after_opener)
+            opener_end++;
+        else
+            opener_end = lines[1].beg;
+
+        if(has_space_before_closer)
+            closer_beg--;
+        else {
+            closer_beg = lines[line_index-1].end;
+            /* We need to eat the preceding "\r\n" but not any line trailing
+             * spaces. */
+            while(closer_beg < ctx->size  &&  ISBLANK(closer_beg))
+                closer_beg++;
+        }
+    }
+
+    *p_opener_beg = opener_beg;
+    *p_opener_end = opener_end;
+    *p_closer_beg = closer_beg;
+    *p_closer_end = closer_end;
+    return TRUE;
+}
+
+static int
+md_is_autolink_uri(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
+{
+    OFF off = beg+1;
+
+    MD_ASSERT(CH(beg) == _T('<'));
+
+    /* Check for scheme. */
+    if(off >= max_end  ||  !ISASCII(off))
+        return FALSE;
+    off++;
+    while(1) {
+        if(off >= max_end)
+            return FALSE;
+        if(off - beg > 32)
+            return FALSE;
+        if(CH(off) == _T(':')  &&  off - beg >= 3)
+            break;
+        if(!ISALNUM(off) && CH(off) != _T('+') && CH(off) != _T('-') && CH(off) != _T('.'))
+            return FALSE;
+        off++;
+    }
+
+    /* Check the path after the scheme. */
+    while(off < max_end  &&  CH(off) != _T('>')) {
+        if(ISWHITESPACE(off) || ISCNTRL(off) || CH(off) == _T('<'))
+            return FALSE;
+        off++;
+    }
+
+    if(off >= max_end)
+        return FALSE;
+
+    MD_ASSERT(CH(off) == _T('>'));
+    *p_end = off+1;
+    return TRUE;
+}
+
+static int
+md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
+{
+    OFF off = beg + 1;
+    int label_len;
+
+    MD_ASSERT(CH(beg) == _T('<'));
+
+    /* The code should correspond to this regexp:
+            /^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+
+            @[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
+            (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/
+     */
+
+    /* Username (before '@'). */
+    while(off < max_end  &&  (ISALNUM(off) || ISANYOF(off, _T(".!#$%&'*+/=?^_`{|}~-"))))
+        off++;
+    if(off <= beg+1)
+        return FALSE;
+
+    /* '@' */
+    if(off >= max_end  ||  CH(off) != _T('@'))
+        return FALSE;
+    off++;
+
+    /* Labels delimited with '.'; each label is sequence of 1 - 63 alnum
+     * characters or '-', but '-' is not allowed as first or last char. */
+    label_len = 0;
+    while(off < max_end) {
+        if(ISALNUM(off))
+            label_len++;
+        else if(CH(off) == _T('-')  &&  label_len > 0)
+            label_len++;
+        else if(CH(off) == _T('.')  &&  label_len > 0  &&  CH(off-1) != _T('-'))
+            label_len = 0;
+        else
+            break;
+
+        if(label_len > 63)
+            return FALSE;
+
+        off++;
+    }
+
+    if(label_len <= 0  || off >= max_end  ||  CH(off) != _T('>') ||  CH(off-1) == _T('-'))
+        return FALSE;
+
+    *p_end = off+1;
+    return TRUE;
+}
+
+static int
+md_is_autolink(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, int* p_missing_mailto)
+{
+    if(md_is_autolink_uri(ctx, beg, max_end, p_end)) {
+        *p_missing_mailto = FALSE;
+        return TRUE;
+    }
+
+    if(md_is_autolink_email(ctx, beg, max_end, p_end)) {
+        *p_missing_mailto = TRUE;
+        return TRUE;
+    }
+
+    return FALSE;
+}
+
+static int
+md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
+{
+    const MD_LINE* line_term = lines + n_lines;
+    const MD_LINE* line;
+    int ret = 0;
+    MD_MARK* mark;
+    OFF codespan_last_potential_closers[CODESPAN_MARK_MAXLEN] = { 0 };
+    int codespan_scanned_till_paragraph_end = FALSE;
+
+    for(line = lines; line < line_term; line++) {
+        OFF off = line->beg;
+        OFF line_end = line->end;
+
+        while(TRUE) {
+            CHAR ch;
+
+#ifdef MD4C_USE_UTF16
+    /* For UTF-16, mark_char_map[] covers only ASCII. */
+    #define IS_MARK_CHAR(off)   ((CH(off) < SIZEOF_ARRAY(ctx->mark_char_map))  &&  \
+                                (ctx->mark_char_map[(unsigned char) CH(off)]))
+#else
+    /* For 8-bit encodings, mark_char_map[] covers all 256 elements. */
+    #define IS_MARK_CHAR(off)   (ctx->mark_char_map[(unsigned char) CH(off)])
+#endif
+
+            /* Optimization: Use some loop unrolling. */
+            while(off + 3 < line_end  &&  !IS_MARK_CHAR(off+0)  &&  !IS_MARK_CHAR(off+1)
+                                      &&  !IS_MARK_CHAR(off+2)  &&  !IS_MARK_CHAR(off+3))
+                off += 4;
+            while(off < line_end  &&  !IS_MARK_CHAR(off+0))
+                off++;
+
+            if(off >= line_end)
+                break;
+
+            ch = CH(off);
+
+            /* A backslash escape.
+             * It can go beyond line->end as it may involve escaped new
+             * line to form a hard break. */
+            if(ch == _T('\\')  &&  off+1 < ctx->size  &&  (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
+                /* Hard-break cannot be on the last line of the block. */
+                if(!ISNEWLINE(off+1)  ||  line+1 < line_term)
+                    PUSH_MARK(ch, off, off+2, MD_MARK_RESOLVED);
+                off += 2;
+                continue;
+            }
+
+            /* A potential (string) emphasis start/end. */
+            if(ch == _T('*')  ||  ch == _T('_')) {
+                OFF tmp = off+1;
+                int left_level;     /* What precedes: 0 = whitespace; 1 = punctuation; 2 = other char. */
+                int right_level;    /* What follows: 0 = whitespace; 1 = punctuation; 2 = other char. */
+
+                while(tmp < line_end  &&  CH(tmp) == ch)
+                    tmp++;
+
+                if(off == line->beg  ||  ISUNICODEWHITESPACEBEFORE(off))
+                    left_level = 0;
+                else if(ISUNICODEPUNCTBEFORE(off))
+                    left_level = 1;
+                else
+                    left_level = 2;
+
+                if(tmp == line_end  ||  ISUNICODEWHITESPACE(tmp))
+                    right_level = 0;
+                else if(ISUNICODEPUNCT(tmp))
+                    right_level = 1;
+                else
+                    right_level = 2;
+
+                /* Intra-word underscore doesn't have special meaning. */
+                if(ch == _T('_')  &&  left_level == 2  &&  right_level == 2) {
+                    left_level = 0;
+                    right_level = 0;
+                }
+
+                if(left_level != 0  ||  right_level != 0) {
+                    unsigned flags = 0;
+
+                    if(left_level > 0  &&  left_level >= right_level)
+                        flags |= MD_MARK_POTENTIAL_CLOSER;
+                    if(right_level > 0  &&  right_level >= left_level)
+                        flags |= MD_MARK_POTENTIAL_OPENER;
+                    if(left_level == 2  &&  right_level == 2)
+                        flags |= MD_MARK_EMPH_INTRAWORD;
+
+                    /* For "the rule of three" we need to remember the original
+                     * size of the mark (modulo three), before we potentially
+                     * split the mark when being later resolved partially by some
+                     * shorter closer. */
+                    switch((tmp - off) % 3) {
+                        case 0: flags |= MD_MARK_EMPH_MOD3_0; break;
+                        case 1: flags |= MD_MARK_EMPH_MOD3_1; break;
+                        case 2: flags |= MD_MARK_EMPH_MOD3_2; break;
+                    }
+
+                    PUSH_MARK(ch, off, tmp, flags);
+
+                    /* During resolving, multiple asterisks may have to be
+                     * split into independent span start/ends. Consider e.g.
+                     * "**foo* bar*". Therefore we push also some empty dummy
+                     * marks to have enough space for that. */
+                    off++;
+                    while(off < tmp) {
+                        PUSH_MARK('D', off, off, 0);
+                        off++;
+                    }
+                    continue;
+                }
+
+                off = tmp;
+                continue;
+            }
+
+            /* A potential code span start/end. */
+            if(ch == _T('`')) {
+                OFF opener_beg, opener_end;
+                OFF closer_beg, closer_end;
+                int is_code_span;
+
+                is_code_span = md_is_code_span(ctx, line, line_term - line, off,
+                                    &opener_beg, &opener_end, &closer_beg, &closer_end,
+                                    codespan_last_potential_closers,
+                                    &codespan_scanned_till_paragraph_end);
+                if(is_code_span) {
+                    PUSH_MARK(_T('`'), opener_beg, opener_end, MD_MARK_OPENER | MD_MARK_RESOLVED);
+                    PUSH_MARK(_T('`'), closer_beg, closer_end, MD_MARK_CLOSER | MD_MARK_RESOLVED);
+                    ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
+                    ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
+
+                    off = closer_end;
+
+                    /* Advance the current line accordingly. */
+                    if(off > line_end) {
+                        line = md_lookup_line(off, line, line_term - line);
+                        line_end = line->end;
+                    }
+                    continue;
+                }
+
+                off = opener_end;
+                continue;
+            }
+
+            /* A potential entity start. */
+            if(ch == _T('&')) {
+                PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER);
+                off++;
+                continue;
+            }
+
+            /* A potential entity end. */
+            if(ch == _T(';')) {
+                /* We surely cannot be entity unless the previous mark is '&'. */
+                if(ctx->n_marks > 0  &&  ctx->marks[ctx->n_marks-1].ch == _T('&'))
+                    PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER);
+
+                off++;
+                continue;
+            }
+
+            /* A potential autolink or raw HTML start/end. */
+            if(ch == _T('<')) {
+                int is_autolink;
+                OFF autolink_end;
+                int missing_mailto;
+
+                if(!(ctx->parser.flags & MD_FLAG_NOHTMLSPANS)) {
+                    int is_html;
+                    OFF html_end;
+
+                    /* Given the nature of the raw HTML, we have to recognize
+                     * it here. Doing so later in md_analyze_lt_gt() could
+                     * open can of worms of quadratic complexity. */
+                    is_html = md_is_html_any(ctx, line, line_term - line, off,
+                                    lines[n_lines-1].end, &html_end);
+                    if(is_html) {
+                        PUSH_MARK(_T('<'), off, off, MD_MARK_OPENER | MD_MARK_RESOLVED);
+                        PUSH_MARK(_T('>'), html_end, html_end, MD_MARK_CLOSER | MD_MARK_RESOLVED);
+                        ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
+                        ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
+                        off = html_end;
+
+                        /* Advance the current line accordingly. */
+                        if(off > line_end) {
+                            line = md_lookup_line(off, line, line_term - line);
+                            line_end = line->end;
+                        }
+                        continue;
+                    }
+                }
+
+                is_autolink = md_is_autolink(ctx, off, lines[n_lines-1].end,
+                                    &autolink_end, &missing_mailto);
+                if(is_autolink) {
+                    PUSH_MARK((missing_mailto ? _T('@') : _T('<')), off, off+1,
+                                MD_MARK_OPENER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK);
+                    PUSH_MARK(_T('>'), autolink_end-1, autolink_end,
+                                MD_MARK_CLOSER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK);
+                    ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
+                    ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
+                    off = autolink_end;
+                    continue;
+                }
+
+                off++;
+                continue;
+            }
+
+            /* A potential link or its part. */
+            if(ch == _T('[')  ||  (ch == _T('!') && off+1 < line_end && CH(off+1) == _T('['))) {
+                OFF tmp = (ch == _T('[') ? off+1 : off+2);
+                PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER);
+                off = tmp;
+                /* Two dummies to make enough place for data we need if it is
+                 * a link. */
+                PUSH_MARK('D', off, off, 0);
+                PUSH_MARK('D', off, off, 0);
+                continue;
+            }
+            if(ch == _T(']')) {
+                PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER);
+                off++;
+                continue;
+            }
+
+            /* A potential permissive e-mail autolink. */
+            if(ch == _T('@')) {
+                if(line->beg + 1 <= off  &&  ISALNUM(off-1)  &&
+                    off + 3 < line->end  &&  ISALNUM(off+1))
+                {
+                    PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER);
+                    /* Push a dummy as a reserve for a closer. */
+                    PUSH_MARK('D', off, off, 0);
+                }
+
+                off++;
+                continue;
+            }
+
+            /* A potential permissive URL autolink. */
+            if(ch == _T(':')) {
+                static struct {
+                    const CHAR* scheme;
+                    SZ scheme_size;
+                    const CHAR* suffix;
+                    SZ suffix_size;
+                } scheme_map[] = {
+                    /* In the order from the most frequently used, arguably. */
+                    { _T("http"), 4,    _T("//"), 2 },
+                    { _T("https"), 5,   _T("//"), 2 },
+                    { _T("ftp"), 3,     _T("//"), 2 }
+                };
+                int scheme_index;
+
+                for(scheme_index = 0; scheme_index < (int) SIZEOF_ARRAY(scheme_map); scheme_index++) {
+                    const CHAR* scheme = scheme_map[scheme_index].scheme;
+                    const SZ scheme_size = scheme_map[scheme_index].scheme_size;
+                    const CHAR* suffix = scheme_map[scheme_index].suffix;
+                    const SZ suffix_size = scheme_map[scheme_index].suffix_size;
+
+                    if(line->beg + scheme_size <= off  &&  md_ascii_eq(STR(off-scheme_size), scheme, scheme_size)  &&
+                        (line->beg + scheme_size == off || ISWHITESPACE(off-scheme_size-1) || ISANYOF(off-scheme_size-1, _T("*_~([")))  &&
+                        off + 1 + suffix_size < line->end  &&  md_ascii_eq(STR(off+1), suffix, suffix_size))
+                    {
+                        PUSH_MARK(ch, off-scheme_size, off+1+suffix_size, MD_MARK_POTENTIAL_OPENER);
+                        /* Push a dummy as a reserve for a closer. */
+                        PUSH_MARK('D', off, off, 0);
+                        off += 1 + suffix_size;
+                        break;
+                    }
+                }
+
+                off++;
+                continue;
+            }
+
+            /* A potential permissive WWW autolink. */
+            if(ch == _T('.')) {
+                if(line->beg + 3 <= off  &&  md_ascii_eq(STR(off-3), _T("www"), 3)  &&
+                    (line->beg + 3 == off || ISWHITESPACE(off-4) || ISANYOF(off-4, _T("*_~([")))  &&
+                    off + 1 < line_end)
+                {
+                    PUSH_MARK(ch, off-3, off+1, MD_MARK_POTENTIAL_OPENER);
+                    /* Push a dummy as a reserve for a closer. */
+                    PUSH_MARK('D', off, off, 0);
+                    off++;
+                    continue;
+                }
+
+                off++;
+                continue;
+            }
+
+            /* A potential table cell boundary or wiki link label delimiter. */
+            if((table_mode || ctx->parser.flags & MD_FLAG_WIKILINKS) && ch == _T('|')) {
+                PUSH_MARK(ch, off, off+1, 0);
+                off++;
+                continue;
+            }
+
+            /* A potential strikethrough start/end. */
+            if(ch == _T('~')) {
+                OFF tmp = off+1;
+
+                while(tmp < line_end  &&  CH(tmp) == _T('~'))
+                    tmp++;
+
+                if(tmp - off < 3) {
+                    unsigned flags = 0;
+
+                    if(tmp < line_end  &&  !ISUNICODEWHITESPACE(tmp))
+                        flags |= MD_MARK_POTENTIAL_OPENER;
+                    if(off > line->beg  &&  !ISUNICODEWHITESPACEBEFORE(off))
+                        flags |= MD_MARK_POTENTIAL_CLOSER;
+                    if(flags != 0)
+                        PUSH_MARK(ch, off, tmp, flags);
+                }
+
+                off = tmp;
+                continue;
+            }
+
+            /* A potential equation start/end */
+            if(ch == _T('$')) {
+                /* We can have at most two consecutive $ signs,
+                 * where two dollar signs signify a display equation. */
+                OFF tmp = off+1;
+
+                while(tmp < line_end && CH(tmp) == _T('$'))
+                    tmp++;
+
+                if (tmp - off <= 2)
+                    PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER);
+                off = tmp;
+                continue;
+            }
+
+            /* Turn non-trivial whitespace into single space. */
+            if(ISWHITESPACE_(ch)) {
+                OFF tmp = off+1;
+
+                while(tmp < line_end  &&  ISWHITESPACE(tmp))
+                    tmp++;
+
+                if(tmp - off > 1  ||  ch != _T(' '))
+                    PUSH_MARK(ch, off, tmp, MD_MARK_RESOLVED);
+
+                off = tmp;
+                continue;
+            }
+
+            /* NULL character. */
+            if(ch == _T('\0')) {
+                PUSH_MARK(ch, off, off+1, MD_MARK_RESOLVED);
+                off++;
+                continue;
+            }
+
+            off++;
+        }
+    }
+
+    /* Add a dummy mark at the end of the mark vector to simplify
+     * process_inlines(). */
+    PUSH_MARK(127, ctx->size, ctx->size, MD_MARK_RESOLVED);
+
+abort:
+    return ret;
+}
+
+static void
+md_analyze_bracket(MD_CTX* ctx, int mark_index)
+{
+    /* We cannot really resolve links here as for that we would need
+     * more context. E.g. a following pair of brackets (reference link),
+     * or enclosing pair of brackets (if the inner is the link, the outer
+     * one cannot be.)
+     *
+     * Therefore we here only construct a list of '[' ']' pairs ordered by
+     * position of the closer. This allows us to analyze what is or is not
+     * link in the right order, from inside to outside in case of nested
+     * brackets.
+     *
+     * The resolving itself is deferred to md_resolve_links().
+     */
+
+    MD_MARK* mark = &ctx->marks[mark_index];
+
+    if(mark->flags & MD_MARK_POTENTIAL_OPENER) {
+        if(BRACKET_OPENERS.head != -1)
+            ctx->marks[BRACKET_OPENERS.tail].flags |= MD_MARK_HASNESTEDBRACKETS;
+
+        md_mark_chain_append(ctx, &BRACKET_OPENERS, mark_index);
+        return;
+    }
+
+    if(BRACKET_OPENERS.tail >= 0) {
+        /* Pop the opener from the chain. */
+        int opener_index = BRACKET_OPENERS.tail;
+        MD_MARK* opener = &ctx->marks[opener_index];
+        if(opener->prev >= 0)
+            ctx->marks[opener->prev].next = -1;
+        else
+            BRACKET_OPENERS.head = -1;
+        BRACKET_OPENERS.tail = opener->prev;
+
+        /* Interconnect the opener and closer. */
+        opener->next = mark_index;
+        mark->prev = opener_index;
+
+        /* Add the pair into chain of potential links for md_resolve_links().
+         * Note we misuse opener->prev for this as opener->next points to its
+         * closer. */
+        if(ctx->unresolved_link_tail >= 0)
+            ctx->marks[ctx->unresolved_link_tail].prev = opener_index;
+        else
+            ctx->unresolved_link_head = opener_index;
+        ctx->unresolved_link_tail = opener_index;
+        opener->prev = -1;
+    }
+}
+
+/* Forward declaration. */
+static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
+                                     int mark_beg, int mark_end);
+
+static int
+md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
+{
+    int opener_index = ctx->unresolved_link_head;
+    OFF last_link_beg = 0;
+    OFF last_link_end = 0;
+    OFF last_img_beg = 0;
+    OFF last_img_end = 0;
+
+    while(opener_index >= 0) {
+        MD_MARK* opener = &ctx->marks[opener_index];
+        int closer_index = opener->next;
+        MD_MARK* closer = &ctx->marks[closer_index];
+        int next_index = opener->prev;
+        MD_MARK* next_opener;
+        MD_MARK* next_closer;
+        MD_LINK_ATTR attr;
+        int is_link = FALSE;
+
+        if(next_index >= 0) {
+            next_opener = &ctx->marks[next_index];
+            next_closer = &ctx->marks[next_opener->next];
+        } else {
+            next_opener = NULL;
+            next_closer = NULL;
+        }
+
+        /* If nested ("[ [ ] ]"), we need to make sure that:
+         *   - The outer does not end inside of (...) belonging to the inner.
+         *   - The outer cannot be link if the inner is link (i.e. not image).
+         *
+         * (Note we here analyze from inner to outer as the marks are ordered
+         * by closer->beg.)
+         */
+        if((opener->beg < last_link_beg  &&  closer->end < last_link_end)  ||
+           (opener->beg < last_img_beg  &&  closer->end < last_img_end)  ||
+           (opener->beg < last_link_end  &&  opener->ch == '['))
+        {
+            opener_index = next_index;
+            continue;
+        }
+
+        /* Recognize and resolve wiki links.
+         * Wiki-links maybe '[[destination]]' or '[[destination|label]]'.
+         */
+        if ((ctx->parser.flags & MD_FLAG_WIKILINKS) &&
+            (opener->end - opener->beg == 1) &&         /* not image */
+            next_opener != NULL &&                      /* double '[' opener */
+            next_opener->ch == '[' &&
+            (next_opener->beg == opener->beg - 1) &&
+            (next_opener->end - next_opener->beg == 1) &&
+            next_closer != NULL &&                      /* double ']' closer */
+            next_closer->ch == ']' &&
+            (next_closer->beg == closer->beg + 1) &&
+            (next_closer->end - next_closer->beg == 1))
+        {
+            MD_MARK* delim = NULL;
+            int delim_index;
+            OFF dest_beg, dest_end;
+
+            is_link = TRUE;
+
+            /* We don't allow destination to be longer than 100 characters.
+             * Lets scan to see whether there is '|'. (If not then the whole
+             * wiki-link has to be below the 100 characters.) */
+            delim_index = opener_index + 1;
+            while(delim_index < closer_index) {
+                MD_MARK* m = &ctx->marks[delim_index];
+                if(m->ch == '|') {
+                    delim = m;
+                    break;
+                }
+                if(m->ch != 'D'  &&  m->beg - opener->end > 100)
+                    break;
+                delim_index++;
+            }
+            dest_beg = opener->end;
+            dest_end = (delim != NULL) ? delim->beg : closer->beg;
+            if(dest_end - dest_beg == 0 || dest_end - dest_beg > 100)
+                is_link = FALSE;
+
+            /* There may not be any new line in the destination. */
+            if(is_link) {
+                OFF off;
+                for(off = dest_beg; off < dest_end; off++) {
+                    if(ISNEWLINE(off)) {
+                        is_link = FALSE;
+                        break;
+                    }
+                }
+            }
+
+            if(is_link) {
+                if(delim != NULL) {
+                    if(delim->end < closer->beg) {
+                        md_rollback(ctx, opener_index, delim_index, MD_ROLLBACK_ALL);
+                        md_rollback(ctx, delim_index, closer_index, MD_ROLLBACK_CROSSING);
+                        delim->flags |= MD_MARK_RESOLVED;
+                        opener->end = delim->beg;
+                    } else {
+                        /* The pipe is just before the closer: [[foo|]] */
+                        md_rollback(ctx, opener_index, closer_index, MD_ROLLBACK_ALL);
+                        closer->beg = delim->beg;
+                        delim = NULL;
+                    }
+                }
+
+                opener->beg = next_opener->beg;
+                opener->next = closer_index;
+                opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
+
+                closer->end = next_closer->end;
+                closer->prev = opener_index;
+                closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
+
+                last_link_beg = opener->beg;
+                last_link_end = closer->end;
+
+                if(delim != NULL)
+                    md_analyze_link_contents(ctx, lines, n_lines, delim_index+1, closer_index);
+
+                opener_index = next_opener->prev;
+                continue;
+            }
+        }
+
+        if(next_opener != NULL  &&  next_opener->beg == closer->end) {
+            if(next_closer->beg > closer->end + 1) {
+                /* Might be full reference link. */
+                if(!(next_opener->flags & MD_MARK_HASNESTEDBRACKETS))
+                    is_link = md_is_link_reference(ctx, lines, n_lines, next_opener->beg, next_closer->end, &attr);
+            } else {
+                /* Might be shortcut reference link. */
+                if(!(opener->flags & MD_MARK_HASNESTEDBRACKETS))
+                    is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr);
+            }
+
+            if(is_link < 0)
+                return -1;
+
+            if(is_link) {
+                /* Eat the 2nd "[...]". */
+                closer->end = next_closer->end;
+
+                /* Do not analyze the label as a standalone link in the next
+                 * iteration. */
+                next_index = ctx->marks[next_index].prev;
+            }
+        } else {
+            if(closer->end < ctx->size  &&  CH(closer->end) == _T('(')) {
+                /* Might be inline link. */
+                OFF inline_link_end = UINT_MAX;
+
+                is_link = md_is_inline_link_spec(ctx, lines, n_lines, closer->end, &inline_link_end, &attr);
+                if(is_link < 0)
+                    return -1;
+
+                /* Check the closing ')' is not inside an already resolved range
+                 * (i.e. a range with a higher priority), e.g. a code span. */
+                if(is_link) {
+                    int i = closer_index + 1;
+
+                    while(i < ctx->n_marks) {
+                        MD_MARK* mark = &ctx->marks[i];
+
+                        if(mark->beg >= inline_link_end)
+                            break;
+                        if((mark->flags & (MD_MARK_OPENER | MD_MARK_RESOLVED)) == (MD_MARK_OPENER | MD_MARK_RESOLVED)) {
+                            if(ctx->marks[mark->next].beg >= inline_link_end) {
+                                /* Cancel the link status. */
+                                if(attr.title_needs_free)
+                                    free(attr.title);
+                                is_link = FALSE;
+                                break;
+                            }
+
+                            i = mark->next + 1;
+                        } else {
+                            i++;
+                        }
+                    }
+                }
+
+                if(is_link) {
+                    /* Eat the "(...)" */
+                    closer->end = inline_link_end;
+                }
+            }
+
+            if(!is_link) {
+                /* Might be collapsed reference link. */
+                if(!(opener->flags & MD_MARK_HASNESTEDBRACKETS))
+                    is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr);
+                if(is_link < 0)
+                    return -1;
+            }
+        }
+
+        if(is_link) {
+            /* Resolve the brackets as a link. */
+            opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
+            closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
+
+            /* If it is a link, we store the destination and title in the two
+             * dummy marks after the opener. */
+            MD_ASSERT(ctx->marks[opener_index+1].ch == 'D');
+            md_mark_store_ptr(ctx, opener_index+1, attr.dest);
+            ctx->marks[opener_index+1].prev = attr.dest_size;
+
+            MD_ASSERT(ctx->marks[opener_index+2].ch == 'D');
+            md_mark_store_ptr(ctx, opener_index+2, attr.title);
+            /* The title might or might not have been allocated for us. */
+            if(attr.title_needs_free)
+                md_mark_chain_append(ctx, &PTR_CHAIN, opener_index+2);
+            ctx->marks[opener_index+2].prev = attr.title_size;
+
+            if(opener->ch == '[') {
+                last_link_beg = opener->beg;
+                last_link_end = closer->end;
+            } else {
+                last_img_beg = opener->beg;
+                last_img_end = closer->end;
+            }
+
+            md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index);
+
+            /* If the link text is formed by nothing but permissive autolink,
+             * suppress the autolink.
+             * See https://github.com/mity/md4c/issues/152 for more info. */
+            if(ctx->parser.flags & MD_FLAG_PERMISSIVEAUTOLINKS) {
+                MD_MARK* first_nested;
+                MD_MARK* last_nested;
+
+                first_nested = opener + 1;
+                while(first_nested->ch == _T('D')  &&  first_nested < closer)
+                    first_nested++;
+
+                last_nested = closer - 1;
+                while(first_nested->ch == _T('D')  &&  last_nested > opener)
+                    last_nested--;
+
+                if((first_nested->flags & MD_MARK_RESOLVED)  &&
+                   first_nested->beg == opener->end  &&
+                   ISANYOF_(first_nested->ch, _T("@:."))  &&
+                   first_nested->next == (last_nested - ctx->marks)  &&
+                   last_nested->end == closer->beg)
+                {
+                    first_nested->ch = _T('D');
+                    first_nested->flags &= ~MD_MARK_RESOLVED;
+                    last_nested->ch = _T('D');
+                    last_nested->flags &= ~MD_MARK_RESOLVED;
+                }
+            }
+        }
+
+        opener_index = next_index;
+    }
+
+    return 0;
+}
+
+/* Analyze whether the mark '&' starts a HTML entity.
+ * If so, update its flags as well as flags of corresponding closer ';'. */
+static void
+md_analyze_entity(MD_CTX* ctx, int mark_index)
+{
+    MD_MARK* opener = &ctx->marks[mark_index];
+    MD_MARK* closer;
+    OFF off;
+
+    /* Cannot be entity if there is no closer as the next mark.
+     * (Any other mark between would mean strange character which cannot be
+     * part of the entity.
+     *
+     * So we can do all the work on '&' and do not call this later for the
+     * closing mark ';'.
+     */
+    if(mark_index + 1 >= ctx->n_marks)
+        return;
+    closer = &ctx->marks[mark_index+1];
+    if(closer->ch != ';')
+        return;
+
+    if(md_is_entity(ctx, opener->beg, closer->end, &off)) {
+        MD_ASSERT(off == closer->end);
+
+        md_resolve_range(ctx, NULL, mark_index, mark_index+1);
+        opener->end = closer->end;
+    }
+}
+
+static void
+md_analyze_table_cell_boundary(MD_CTX* ctx, int mark_index)
+{
+    MD_MARK* mark = &ctx->marks[mark_index];
+    mark->flags |= MD_MARK_RESOLVED;
+
+    md_mark_chain_append(ctx, &TABLECELLBOUNDARIES, mark_index);
+    ctx->n_table_cell_boundaries++;
+}
+
+/* Split a longer mark into two. The new mark takes the given count of
+ * characters. May only be called if an adequate number of dummy 'D' marks
+ * follows.
+ */
+static int
+md_split_emph_mark(MD_CTX* ctx, int mark_index, SZ n)
+{
+    MD_MARK* mark = &ctx->marks[mark_index];
+    int new_mark_index = mark_index + (mark->end - mark->beg - n);
+    MD_MARK* dummy = &ctx->marks[new_mark_index];
+
+    MD_ASSERT(mark->end - mark->beg > n);
+    MD_ASSERT(dummy->ch == 'D');
+
+    memcpy(dummy, mark, sizeof(MD_MARK));
+    mark->end -= n;
+    dummy->beg = mark->end;
+
+    return new_mark_index;
+}
+
+static void
+md_analyze_emph(MD_CTX* ctx, int mark_index)
+{
+    MD_MARK* mark = &ctx->marks[mark_index];
+    MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index);
+
+    /* If we can be a closer, try to resolve with the preceding opener. */
+    if(mark->flags & MD_MARK_POTENTIAL_CLOSER) {
+        MD_MARK* opener = NULL;
+        int opener_index = 0;
+
+        if(mark->ch == _T('*')) {
+            MD_MARKCHAIN* opener_chains[6];
+            int i, n_opener_chains;
+            unsigned flags = mark->flags;
+
+            /* Apply the "rule of three". */
+            n_opener_chains = 0;
+            opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_0;
+            if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
+                opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_1;
+            if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
+                opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_2;
+            opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_0;
+            if(!(flags & MD_MARK_EMPH_INTRAWORD)  ||  (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
+                opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_1;
+            if(!(flags & MD_MARK_EMPH_INTRAWORD)  ||  (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
+                opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_2;
+
+            /* Opener is the most recent mark from the allowed chains. */
+            for(i = 0; i < n_opener_chains; i++) {
+                if(opener_chains[i]->tail >= 0) {
+                    int tmp_index = opener_chains[i]->tail;
+                    MD_MARK* tmp_mark = &ctx->marks[tmp_index];
+                    if(opener == NULL  ||  tmp_mark->end > opener->end) {
+                        opener_index = tmp_index;
+                        opener = tmp_mark;
+                    }
+                }
+            }
+        } else {
+            /* Simple emph. mark */
+            if(chain->tail >= 0) {
+                opener_index = chain->tail;
+                opener = &ctx->marks[opener_index];
+            }
+        }
+
+        /* Resolve, if we have found matching opener. */
+        if(opener != NULL) {
+            SZ opener_size = opener->end - opener->beg;
+            SZ closer_size = mark->end - mark->beg;
+            MD_MARKCHAIN* opener_chain = md_mark_chain(ctx, opener_index);
+
+            if(opener_size > closer_size) {
+                opener_index = md_split_emph_mark(ctx, opener_index, closer_size);
+                md_mark_chain_append(ctx, opener_chain, opener_index);
+            } else if(opener_size < closer_size) {
+                md_split_emph_mark(ctx, mark_index, closer_size - opener_size);
+            }
+
+            md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING);
+            md_resolve_range(ctx, opener_chain, opener_index, mark_index);
+            return;
+        }
+    }
+
+    /* If we could not resolve as closer, we may be yet be an opener. */
+    if(mark->flags & MD_MARK_POTENTIAL_OPENER)
+        md_mark_chain_append(ctx, chain, mark_index);
+}
+
+static void
+md_analyze_tilde(MD_CTX* ctx, int mark_index)
+{
+    MD_MARK* mark = &ctx->marks[mark_index];
+    MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index);
+
+    /* We attempt to be Github Flavored Markdown compatible here. GFM accepts
+     * only tildes sequences of length 1 and 2, and the length of the opener
+     * and closer has to match. */
+
+    if((mark->flags & MD_MARK_POTENTIAL_CLOSER)  &&  chain->head >= 0) {
+        int opener_index = chain->head;
+
+        md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING);
+        md_resolve_range(ctx, chain, opener_index, mark_index);
+        return;
+    }
+
+    if(mark->flags & MD_MARK_POTENTIAL_OPENER)
+        md_mark_chain_append(ctx, chain, mark_index);
+}
+
+static void
+md_analyze_dollar(MD_CTX* ctx, int mark_index)
+{
+    /* This should mimic the way inline equations work in LaTeX, so there
+     * can only ever be one item in the chain (i.e. the dollars can't be
+     * nested). This is basically the same as the md_analyze_tilde function,
+     * except that we require matching openers and closers to be of the same
+     * length.
+     *
+     * E.g.: $abc$$def$$ => abc (display equation) def (end equation) */
+    if(DOLLAR_OPENERS.head >= 0) {
+        /* If the potential closer has a non-matching number of $, discard */
+        MD_MARK* open = &ctx->marks[DOLLAR_OPENERS.head];
+        MD_MARK* close = &ctx->marks[mark_index];
+
+        int opener_index = DOLLAR_OPENERS.head;
+        md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_ALL);
+        if (open->end - open->beg == close->end - close->beg) {
+            /* We are the matching closer */
+            md_resolve_range(ctx, &DOLLAR_OPENERS, opener_index, mark_index);
+            return;
+        }
+    }
+
+    md_mark_chain_append(ctx, &DOLLAR_OPENERS, mark_index);
+}
+
+static void
+md_analyze_permissive_url_autolink(MD_CTX* ctx, int mark_index)
+{
+    MD_MARK* opener = &ctx->marks[mark_index];
+    int closer_index = mark_index + 1;
+    MD_MARK* closer = &ctx->marks[closer_index];
+    MD_MARK* next_resolved_mark;
+    OFF off = opener->end;
+    int n_dots = FALSE;
+    int has_underscore_in_last_seg = FALSE;
+    int has_underscore_in_next_to_last_seg = FALSE;
+    int n_opened_parenthesis = 0;
+    int n_excess_parenthesis = 0;
+
+    /* Check for domain. */
+    while(off < ctx->size) {
+        if(ISALNUM(off) || CH(off) == _T('-')) {
+            off++;
+        } else if(CH(off) == _T('.')) {
+            /* We must see at least one period. */
+            n_dots++;
+            has_underscore_in_next_to_last_seg = has_underscore_in_last_seg;
+            has_underscore_in_last_seg = FALSE;
+            off++;
+        } else if(CH(off) == _T('_')) {
+            /* No underscore may be present in the last two domain segments. */
+            has_underscore_in_last_seg = TRUE;
+            off++;
+        } else {
+            break;
+        }
+    }
+    if(off > opener->end  &&  CH(off-1) == _T('.')) {
+        off--;
+        n_dots--;
+    }
+    if(off <= opener->end || n_dots == 0 || has_underscore_in_next_to_last_seg || has_underscore_in_last_seg)
+        return;
+
+    /* Check for path. */
+    next_resolved_mark = closer + 1;
+    while(next_resolved_mark->ch == 'D' || !(next_resolved_mark->flags & MD_MARK_RESOLVED))
+        next_resolved_mark++;
+    while(off < next_resolved_mark->beg  &&  CH(off) != _T('<')  &&  !ISWHITESPACE(off)  &&  !ISNEWLINE(off)) {
+        /* Parenthesis must be balanced. */
+        if(CH(off) == _T('(')) {
+            n_opened_parenthesis++;
+        } else if(CH(off) == _T(')')) {
+            if(n_opened_parenthesis > 0)
+                n_opened_parenthesis--;
+            else
+                n_excess_parenthesis++;
+        }
+
+        off++;
+    }
+
+    /* Trim a trailing punctuation from the end. */
+    while(TRUE) {
+        if(ISANYOF(off-1, _T("?!.,:*_~"))) {
+            off--;
+        } else if(CH(off-1) == ')'  &&  n_excess_parenthesis > 0) {
+            /* Unmatched ')' can be in an interior of the path but not at the
+             * of it, so the auto-link may be safely nested in a parenthesis
+             * pair. */
+            off--;
+            n_excess_parenthesis--;
+        } else {
+            break;
+        }
+    }
+
+    /* Ok. Lets call it an auto-link. Adapt opener and create closer to zero
+     * length so all the contents becomes the link text. */
+    MD_ASSERT(closer->ch == 'D' ||
+              ((ctx->parser.flags & MD_FLAG_PERMISSIVEWWWAUTOLINKS) &&
+               (closer->ch == '.' || closer->ch == ':' || closer->ch == '@')));
+    opener->end = opener->beg;
+    closer->ch = opener->ch;
+    closer->beg = off;
+    closer->end = off;
+    md_resolve_range(ctx, NULL, mark_index, closer_index);
+}
+
+/* The permissive autolinks do not have to be enclosed in '<' '>' but we
+ * instead impose stricter rules what is understood as an e-mail address
+ * here. Actually any non-alphanumeric characters with exception of '.'
+ * are prohibited both in username and after '@'. */
+static void
+md_analyze_permissive_email_autolink(MD_CTX* ctx, int mark_index)
+{
+    MD_MARK* opener = &ctx->marks[mark_index];
+    int closer_index;
+    MD_MARK* closer;
+    OFF beg = opener->beg;
+    OFF end = opener->end;
+    int dot_count = 0;
+
+    MD_ASSERT(opener->ch == _T('@'));
+
+    /* Scan for name before '@'. */
+    while(beg > 0  &&  (ISALNUM(beg-1) || ISANYOF(beg-1, _T(".-_+"))))
+        beg--;
+
+    /* Scan for domain after '@'. */
+    while(end < ctx->size  &&  (ISALNUM(end) || ISANYOF(end, _T(".-_")))) {
+        if(CH(end) == _T('.'))
+            dot_count++;
+        end++;
+    }
+    if(CH(end-1) == _T('.')) {  /* Final '.' not part of it. */
+        dot_count--;
+        end--;
+    }
+    else if(ISANYOF2(end-1, _T('-'), _T('_'))) /* These are forbidden at the end. */
+        return;
+    if(CH(end-1) == _T('@')  ||  dot_count == 0)
+        return;
+
+    /* Ok. Lets call it auto-link. Adapt opener and create closer to zero
+     * length so all the contents becomes the link text. */
+    closer_index = mark_index + 1;
+    closer = &ctx->marks[closer_index];
+    if (closer->ch != 'D') return;
+
+    opener->beg = beg;
+    opener->end = beg;
+    closer->ch = opener->ch;
+    closer->beg = end;
+    closer->end = end;
+    md_resolve_range(ctx, NULL, mark_index, closer_index);
+}
+
+static inline void
+md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
+                 int mark_beg, int mark_end, const CHAR* mark_chars)
+{
+    int i = mark_beg;
+    MD_UNUSED(lines);
+    MD_UNUSED(n_lines);
+
+    while(i < mark_end) {
+        MD_MARK* mark = &ctx->marks[i];
+
+        /* Skip resolved spans. */
+        if(mark->flags & MD_MARK_RESOLVED) {
+            if(mark->flags & MD_MARK_OPENER) {
+                MD_ASSERT(i < mark->next);
+                i = mark->next + 1;
+            } else {
+                i++;
+            }
+            continue;
+        }
+
+        /* Skip marks we do not want to deal with. */
+        if(!ISANYOF_(mark->ch, mark_chars)) {
+            i++;
+            continue;
+        }
+
+        /* Analyze the mark. */
+        switch(mark->ch) {
+            case '[':   /* Pass through. */
+            case '!':   /* Pass through. */
+            case ']':   md_analyze_bracket(ctx, i); break;
+            case '&':   md_analyze_entity(ctx, i); break;
+            case '|':   md_analyze_table_cell_boundary(ctx, i); break;
+            case '_':   /* Pass through. */
+            case '*':   md_analyze_emph(ctx, i); break;
+            case '~':   md_analyze_tilde(ctx, i); break;
+            case '$':   md_analyze_dollar(ctx, i); break;
+            case '.':   /* Pass through. */
+            case ':':   md_analyze_permissive_url_autolink(ctx, i); break;
+            case '@':   md_analyze_permissive_email_autolink(ctx, i); break;
+        }
+
+        i++;
+    }
+}
+
+/* Analyze marks (build ctx->marks). */
+static int
+md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
+{
+    int ret;
+
+    /* Reset the previously collected stack of marks. */
+    ctx->n_marks = 0;
+
+    /* Collect all marks. */
+    MD_CHECK(md_collect_marks(ctx, lines, n_lines, table_mode));
+
+    /* (1) Links. */
+    md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("[]!"));
+    MD_CHECK(md_resolve_links(ctx, lines, n_lines));
+    BRACKET_OPENERS.head = -1;
+    BRACKET_OPENERS.tail = -1;
+    ctx->unresolved_link_head = -1;
+    ctx->unresolved_link_tail = -1;
+
+    if(table_mode) {
+        /* (2) Analyze table cell boundaries.
+         * Note we reset TABLECELLBOUNDARIES chain prior to the call md_analyze_marks(),
+         * not after, because caller may need it. */
+        MD_ASSERT(n_lines == 1);
+        TABLECELLBOUNDARIES.head = -1;
+        TABLECELLBOUNDARIES.tail = -1;
+        ctx->n_table_cell_boundaries = 0;
+        md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("|"));
+        return ret;
+    }
+
+    /* (3) Emphasis and strong emphasis; permissive autolinks. */
+    md_analyze_link_contents(ctx, lines, n_lines, 0, ctx->n_marks);
+
+abort:
+    return ret;
+}
+
+static void
+md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
+                         int mark_beg, int mark_end)
+{
+    int i;
+
+    md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("&"));
+    md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("*_~$@:."));
+
+    for(i = OPENERS_CHAIN_FIRST; i <= OPENERS_CHAIN_LAST; i++) {
+        ctx->mark_chains[i].head = -1;
+        ctx->mark_chains[i].tail = -1;
+    }
+}
+
+static int
+md_enter_leave_span_a(MD_CTX* ctx, int enter, MD_SPANTYPE type,
+                      const CHAR* dest, SZ dest_size, int prohibit_escapes_in_dest,
+                      const CHAR* title, SZ title_size)
+{
+    MD_ATTRIBUTE_BUILD href_build = { 0 };
+    MD_ATTRIBUTE_BUILD title_build = { 0 };
+    MD_SPAN_A_DETAIL det;
+    int ret = 0;
+
+    /* Note we here rely on fact that MD_SPAN_A_DETAIL and
+     * MD_SPAN_IMG_DETAIL are binary-compatible. */
+    memset(&det, 0, sizeof(MD_SPAN_A_DETAIL));
+    MD_CHECK(md_build_attribute(ctx, dest, dest_size,
+                    (prohibit_escapes_in_dest ? MD_BUILD_ATTR_NO_ESCAPES : 0),
+                    &det.href, &href_build));
+    MD_CHECK(md_build_attribute(ctx, title, title_size, 0, &det.title, &title_build));
+
+    if(enter)
+        MD_ENTER_SPAN(type, &det);
+    else
+        MD_LEAVE_SPAN(type, &det);
+
+abort:
+    md_free_attribute(ctx, &href_build);
+    md_free_attribute(ctx, &title_build);
+    return ret;
+}
+
+static int
+md_enter_leave_span_wikilink(MD_CTX* ctx, int enter, const CHAR* target, SZ target_size)
+{
+    MD_ATTRIBUTE_BUILD target_build = { 0 };
+    MD_SPAN_WIKILINK_DETAIL det;
+    int ret = 0;
+
+    memset(&det, 0, sizeof(MD_SPAN_WIKILINK_DETAIL));
+    MD_CHECK(md_build_attribute(ctx, target, target_size, 0, &det.target, &target_build));
+
+    if (enter)
+        MD_ENTER_SPAN(MD_SPAN_WIKILINK, &det);
+    else
+        MD_LEAVE_SPAN(MD_SPAN_WIKILINK, &det);
+
+abort:
+    md_free_attribute(ctx, &target_build);
+    return ret;
+}
+
+/** forward declaration */
+static int md_output_toc(MD_CTX *ctx);
+
+/* Render the output, accordingly to the analyzed ctx->marks. */
+static int
+md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
+{
+    MD_TEXTTYPE text_type;
+    const MD_LINE* line = lines;
+    MD_MARK* prev_mark = NULL;
+    MD_MARK* mark;
+    OFF off = lines[0].beg;
+    OFF end = lines[n_lines-1].end;
+    int enforce_hardbreak = 0;
+    int ret = 0;
+
+    /* Find first resolved mark. Note there is always at least one resolved
+     * mark,  the dummy last one after the end of the latest line we actually
+     * never really reach. This saves us of a lot of special checks and cases
+     * in this function. */
+    mark = ctx->marks;
+    while(!(mark->flags & MD_MARK_RESOLVED))
+        mark++;
+
+    text_type = MD_TEXT_NORMAL;
+
+    while(1) {
+        /* Process the text up to the next mark or end-of-line. */
+        OFF tmp = (line->end < mark->beg ? line->end : mark->beg);
+        if(tmp > off) {
+            MD_TEXT(text_type, STR(off), tmp - off);
+            off = tmp;
+        }
+
+        /* If reached the mark, process it and move to next one. */
+        if(off >= mark->beg) {
+            switch(mark->ch) {
+                case '\\':      /* Backslash escape. */
+                    if(ISNEWLINE(mark->beg+1))
+                        enforce_hardbreak = 1;
+                    else
+                        MD_TEXT(text_type, STR(mark->beg+1), 1);
+                    break;
+
+                case ' ':       /* Non-trivial space. */
+                    MD_TEXT(text_type, _T(" "), 1);
+                    break;
+
+                case '`':       /* Code span. */
+                    if(mark->flags & MD_MARK_OPENER) {
+                        MD_ENTER_SPAN(MD_SPAN_CODE, NULL);
+                        text_type = MD_TEXT_CODE;
+                    } else {
+                        MD_LEAVE_SPAN(MD_SPAN_CODE, NULL);
+                        text_type = MD_TEXT_NORMAL;
+                    }
+                    break;
+
+                case '_':       /* Underline (or emphasis if we fall through). */
+                    if(ctx->parser.flags & MD_FLAG_UNDERLINE) {
+                        if(mark->flags & MD_MARK_OPENER) {
+                            while(off < mark->end) {
+                                MD_ENTER_SPAN(MD_SPAN_U, NULL);
+                                off++;
+                            }
+                        } else {
+                            while(off < mark->end) {
+                                MD_LEAVE_SPAN(MD_SPAN_U, NULL);
+                                off++;
+                            }
+                        }
+                        break;
+                    }
+                    MD_FALLTHROUGH();
+
+                case '*':       /* Emphasis, strong emphasis. */
+                    if(mark->flags & MD_MARK_OPENER) {
+                        if((mark->end - off) % 2) {
+                            MD_ENTER_SPAN(MD_SPAN_EM, NULL);
+                            off++;
+                        }
+                        while(off + 1 < mark->end) {
+                            MD_ENTER_SPAN(MD_SPAN_STRONG, NULL);
+                            off += 2;
+                        }
+                    } else {
+                        while(off + 1 < mark->end) {
+                            MD_LEAVE_SPAN(MD_SPAN_STRONG, NULL);
+                            off += 2;
+                        }
+                        if((mark->end - off) % 2) {
+                            MD_LEAVE_SPAN(MD_SPAN_EM, NULL);
+                            off++;
+                        }
+                    }
+                    break;
+
+                case '~':
+                    if(mark->flags & MD_MARK_OPENER)
+                        MD_ENTER_SPAN(MD_SPAN_DEL, NULL);
+                    else
+                        MD_LEAVE_SPAN(MD_SPAN_DEL, NULL);
+                    break;
+
+                case '$':
+                    if(mark->flags & MD_MARK_OPENER) {
+                        MD_ENTER_SPAN((mark->end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL);
+                        text_type = MD_TEXT_LATEXMATH;
+                    } else {
+                        MD_LEAVE_SPAN((mark->end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL);
+                        text_type = MD_TEXT_NORMAL;
+                    }
+                    break;
+
+                case '[':       /* Link, wiki link, image. */
+                case '!':
+                case ']':
+                {
+                    const MD_MARK* opener = (mark->ch != ']' ? mark : &ctx->marks[mark->prev]);
+                    const MD_MARK* closer = &ctx->marks[opener->next];
+                    const MD_MARK* dest_mark;
+                    const MD_MARK* title_mark;
+
+                    if ((opener->ch == '[' && closer->ch == ']') &&
+                        opener->end - opener->beg >= 2 &&
+                        closer->end - closer->beg >= 2)
+                    {
+                        int has_label = (opener->end - opener->beg > 2);
+                        SZ target_sz;
+
+                        if(has_label)
+                            target_sz = opener->end - (opener->beg+2);
+                        else
+                            target_sz = closer->beg - opener->end;
+
+                        MD_CHECK(md_enter_leave_span_wikilink(ctx, (mark->ch != ']'),
+                                 has_label ? STR(opener->beg+2) : STR(opener->end),
+                                 target_sz));
+
+                        break;
+                    }
+
+                    dest_mark = opener+1;
+                    MD_ASSERT(dest_mark->ch == 'D');
+                    title_mark = opener+2;
+                    if (title_mark->ch != 'D') break;
+
+                    MD_CHECK(md_enter_leave_span_a(ctx, (mark->ch != ']'),
+                                (opener->ch == '!' ? MD_SPAN_IMG : MD_SPAN_A),
+                                md_mark_get_ptr(ctx, (int)(dest_mark - ctx->marks)),
+                                dest_mark->prev, FALSE,
+                                md_mark_get_ptr(ctx, (int)(title_mark - ctx->marks)),
+								title_mark->prev));
+
+                    /* link/image closer may span multiple lines. */
+                    if(mark->ch == ']') {
+                        while(mark->end > line->end)
+                            line++;
+                    }
+
+                    break;
+                }
+
+                case '<':
+                case '>':       /* Autolink or raw HTML. */
+                    if(!(mark->flags & MD_MARK_AUTOLINK)) {
+                        /* Raw HTML. */
+                        if(mark->flags & MD_MARK_OPENER)
+                            text_type = MD_TEXT_HTML;
+                        else
+                            text_type = MD_TEXT_NORMAL;
+                        break;
+                    }
+                    /* Pass through, if auto-link. */
+                    MD_FALLTHROUGH();
+
+                case '@':       /* Permissive e-mail autolink. */
+                case ':':       /* Permissive URL autolink. */
+                case '.':       /* Permissive WWW autolink. */
+                {
+                    MD_MARK* opener = ((mark->flags & MD_MARK_OPENER) ? mark : &ctx->marks[mark->prev]);
+                    MD_MARK* closer = &ctx->marks[opener->next];
+                    const CHAR* dest = STR(opener->end);
+                    SZ dest_size = closer->beg - opener->end;
+
+                    /* For permissive auto-links we do not know closer mark
+                     * position at the time of md_collect_marks(), therefore
+                     * it can be out-of-order in ctx->marks[].
+                     *
+                     * With this flag, we make sure that we output the closer
+                     * only if we processed the opener. */
+                    if(mark->flags & MD_MARK_OPENER)
+                        closer->flags |= MD_MARK_VALIDPERMISSIVEAUTOLINK;
+
+                    if(opener->ch == '@' || opener->ch == '.') {
+                        dest_size += 7;
+                        MD_TEMP_BUFFER(dest_size * sizeof(CHAR));
+                        memcpy(ctx->buffer,
+                                (opener->ch == '@' ? _T("mailto:") : _T("http://")),
+                                7 * sizeof(CHAR));
+                        memcpy(ctx->buffer + 7, dest, (dest_size-7) * sizeof(CHAR));
+                        dest = ctx->buffer;
+                    }
+
+                    if(closer->flags & MD_MARK_VALIDPERMISSIVEAUTOLINK)
+                        MD_CHECK(md_enter_leave_span_a(ctx, (mark->flags & MD_MARK_OPENER),
+                                    MD_SPAN_A, dest, dest_size, TRUE, NULL, 0));
+                    break;
+                }
+
+                case '&':       /* Entity. */
+                    MD_TEXT(MD_TEXT_ENTITY, STR(mark->beg), mark->end - mark->beg);
+                    break;
+
+                case '\0':
+                    MD_TEXT(MD_TEXT_NULLCHAR, _T(""), 1);
+                    break;
+
+                case 127:
+                    goto abort;
+            }
+
+            off = mark->end;
+
+            /* Move to next resolved mark. */
+            prev_mark = mark;
+            mark++;
+            while(!(mark->flags & MD_MARK_RESOLVED)  ||  mark->beg < off)
+                mark++;
+        }
+
+        /* If reached end of line, move to next one. */
+        if(off >= line->end) {
+            /* If it is the last line, we are done. */
+            if(off >= end)
+                break;
+
+            if(text_type == MD_TEXT_CODE || text_type == MD_TEXT_LATEXMATH) {
+                OFF tmp;
+
+                MD_ASSERT(prev_mark != NULL);
+                MD_ASSERT(ISANYOF2_(prev_mark->ch, '`', '$')  &&  (prev_mark->flags & MD_MARK_OPENER));
+                MD_ASSERT(ISANYOF2_(mark->ch, '`', '$')  &&  (mark->flags & MD_MARK_CLOSER));
+
+                /* Inside a code span, trailing line whitespace has to be
+                 * outputted. */
+                tmp = off;
+                while(off < ctx->size  &&  ISBLANK(off))
+                    off++;
+                if(off > tmp)
+                    MD_TEXT(text_type, STR(tmp), off-tmp);
+
+                /* and new lines are transformed into single spaces. */
+                if(prev_mark->end < off  &&  off < mark->beg)
+                    MD_TEXT(text_type, _T(" "), 1);
+            } else if(text_type == MD_TEXT_HTML) {
+                /* Inside raw HTML, we output the new line verbatim, including
+                 * any trailing spaces. */
+                OFF tmp = off;
+
+                while(tmp < end  &&  ISBLANK(tmp))
+                    tmp++;
+                if(tmp > off)
+                    MD_TEXT(MD_TEXT_HTML, STR(off), tmp - off);
+                MD_TEXT(MD_TEXT_HTML, _T("\n"), 1);
+            } else {
+                /* Output soft or hard line break. */
+                MD_TEXTTYPE break_type = MD_TEXT_SOFTBR;
+
+                if(text_type == MD_TEXT_NORMAL) {
+                    if(enforce_hardbreak)
+                        break_type = MD_TEXT_BR;
+                    else if((CH(line->end) == _T(' ') && CH(line->end+1) == _T(' ')))
+                        break_type = MD_TEXT_BR;
+                }
+
+                MD_TEXT(break_type, _T("\n"), 1);
+            }
+
+            /* Move to the next line. */
+            line++;
+            off = line->beg;
+
+            enforce_hardbreak = 0;
+        }
+    }
+
+abort:
+    return ret;
+}
+
+
+/***************************
+ ***  Processing Tables  ***
+ ***************************/
+
+static void
+md_analyze_table_alignment(MD_CTX* ctx, OFF beg, OFF end, MD_ALIGN* align, int n_align)
+{
+    static const MD_ALIGN align_map[] = { MD_ALIGN_DEFAULT, MD_ALIGN_LEFT, MD_ALIGN_RIGHT, MD_ALIGN_CENTER };
+    OFF off = beg;
+
+    while(n_align > 0) {
+        int index = 0;  /* index into align_map[] */
+
+        while(CH(off) != _T('-'))
+            off++;
+        if(off > beg  &&  CH(off-1) == _T(':'))
+            index |= 1;
+        while(off < end  &&  CH(off) == _T('-'))
+            off++;
+        if(off < end  &&  CH(off) == _T(':'))
+            index |= 2;
+
+        *align = align_map[index];
+        align++;
+        n_align--;
+    }
+
+}
+
+/* Forward declaration. */
+static int md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines);
+
+static int
+md_process_table_cell(MD_CTX* ctx, MD_BLOCKTYPE cell_type, MD_ALIGN align, OFF beg, OFF end)
+{
+    MD_LINE line;
+    MD_BLOCK_TD_DETAIL det;
+    int ret = 0;
+
+    while(beg < end  &&  ISWHITESPACE(beg))
+        beg++;
+    while(end > beg  &&  ISWHITESPACE(end-1))
+        end--;
+
+    det.align = align;
+    line.beg = beg;
+    line.end = end;
+
+    MD_ENTER_BLOCK(cell_type, &det);
+    MD_CHECK(md_process_normal_block_contents(ctx, &line, 1));
+    MD_LEAVE_BLOCK(cell_type, &det);
+
+abort:
+    return ret;
+}
+
+static int
+md_process_table_row(MD_CTX* ctx, MD_BLOCKTYPE cell_type, OFF beg, OFF end,
+                     const MD_ALIGN* align, int col_count)
+{
+    MD_LINE line;
+    OFF* pipe_offs = NULL;
+    int i, j, k, n;
+    int ret = 0;
+
+    line.beg = beg;
+    line.end = end;
+
+    /* Break the line into table cells by identifying pipe characters who
+     * form the cell boundary. */
+    MD_CHECK(md_analyze_inlines(ctx, &line, 1, TRUE));
+
+    /* We have to remember the cell boundaries in local buffer because
+     * ctx->marks[] shall be reused during cell contents processing. */
+    n = ctx->n_table_cell_boundaries + 2;
+    pipe_offs = (OFF*) malloc(n * sizeof(OFF));
+    if(pipe_offs == NULL) {
+        MD_LOG("malloc() failed.");
+        ret = -1;
+        goto abort;
+    }
+    j = 0;
+    pipe_offs[j++] = beg;
+    for(i = TABLECELLBOUNDARIES.head; i >= 0; i = ctx->marks[i].next) {
+        MD_MARK* mark = &ctx->marks[i];
+        pipe_offs[j++] = mark->end;
+    }
+    pipe_offs[j++] = end+1;
+
+    /* Process cells. */
+    MD_ENTER_BLOCK(MD_BLOCK_TR, NULL);
+    k = 0;
+    for(i = 0; i < j-1  &&  k < col_count; i++) {
+        if(pipe_offs[i] < pipe_offs[i+1]-1)
+            MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], pipe_offs[i], pipe_offs[i+1]-1));
+    }
+    /* Make sure we call enough table cells even if the current table contains
+     * too few of them. */
+    while(k < col_count)
+        MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], 0, 0));
+    MD_LEAVE_BLOCK(MD_BLOCK_TR, NULL);
+
+abort:
+    free(pipe_offs);
+
+    /* Free any temporary memory blocks stored within some dummy marks. */
+    for(i = PTR_CHAIN.head; i >= 0; i = ctx->marks[i].next)
+        free(md_mark_get_ptr(ctx, i));
+    PTR_CHAIN.head = -1;
+    PTR_CHAIN.tail = -1;
+
+    return ret;
+}
+
+static int
+md_process_table_block_contents(MD_CTX* ctx, int col_count, const MD_LINE* lines, int n_lines)
+{
+    MD_ALIGN* align;
+    int i;
+    int ret = 0;
+
+    /* At least two lines have to be present: The column headers and the line
+     * with the underlines. */
+    MD_ASSERT(n_lines >= 2);
+
+    align = malloc(col_count * sizeof(MD_ALIGN));
+    if(align == NULL) {
+        MD_LOG("malloc() failed.");
+        ret = -1;
+        goto abort;
+    }
+
+    md_analyze_table_alignment(ctx, lines[1].beg, lines[1].end, align, col_count);
+
+    MD_ENTER_BLOCK(MD_BLOCK_THEAD, NULL);
+    MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TH,
+                        lines[0].beg, lines[0].end, align, col_count));
+    MD_LEAVE_BLOCK(MD_BLOCK_THEAD, NULL);
+
+    if(n_lines > 2) {
+        MD_ENTER_BLOCK(MD_BLOCK_TBODY, NULL);
+        for(i = 2; i < n_lines; i++) {
+            MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TD,
+                     lines[i].beg, lines[i].end, align, col_count));
+        }
+        MD_LEAVE_BLOCK(MD_BLOCK_TBODY, NULL);
+    }
+
+abort:
+    free(align);
+    return ret;
+}
+
+
+/**************************
+ ***  Processing Block  ***
+ **************************/
+
+#define MD_BLOCK_CONTAINER_OPENER   0x01
+#define MD_BLOCK_CONTAINER_CLOSER   0x02
+#define MD_BLOCK_CONTAINER          (MD_BLOCK_CONTAINER_OPENER | MD_BLOCK_CONTAINER_CLOSER)
+#define MD_BLOCK_LOOSE_LIST         0x04
+#define MD_BLOCK_SETEXT_HEADER      0x08
+
+struct MD_BLOCK_tag {
+    MD_BLOCKTYPE type  :  8;
+    unsigned flags     :  8;
+
+    /* MD_BLOCK_H:      Header level (1 - 6)
+     * MD_BLOCK_CODE:   Non-zero if fenced, zero if indented.
+     * MD_BLOCK_LI:     Task mark character (0 if not task list item, 'x', 'X' or ' ').
+     * MD_BLOCK_TABLE:  Column count (as determined by the table underline).
+     */
+    unsigned data      : 16;
+
+    /* Leaf blocks:     Count of lines (MD_LINE or MD_VERBATIMLINE) on the block.
+     * MD_BLOCK_LI:     Task mark offset in the input doc.
+     * MD_BLOCK_OL:     Start item number.
+     */
+    unsigned n_lines;
+    /* MD_BLOCK_H:      reference definition index
+    */
+    unsigned heading_def; // todo rename me to heading_idx ?
+
+};
+
+struct MD_CONTAINER_tag {
+    CHAR ch;
+    unsigned is_loose    : 8;
+    unsigned is_task     : 8;
+    unsigned start;
+    unsigned mark_indent;
+    unsigned contents_indent;
+    OFF block_byte_off;
+    OFF task_mark_off;
+};
+
+
+static int
+md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
+{
+    int i;
+    int ret;
+
+    MD_CHECK(md_analyze_inlines(ctx, lines, n_lines, FALSE));
+    MD_CHECK(md_process_inlines(ctx, lines, n_lines));
+
+abort:
+    /* Free any temporary memory blocks stored within some dummy marks. */
+    for(i = PTR_CHAIN.head; i >= 0; i = ctx->marks[i].next)
+        free(md_mark_get_ptr(ctx, i));
+    PTR_CHAIN.head = -1;
+    PTR_CHAIN.tail = -1;
+
+    return ret;
+}
+
+static int
+md_process_verbatim_block_contents(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_VERBATIMLINE* lines, int n_lines)
+{
+    static const CHAR indent_chunk_str[] = _T("                ");
+    static const SZ indent_chunk_size = SIZEOF_ARRAY(indent_chunk_str) - 1;
+
+    int i;
+    int ret = 0;
+
+    for(i = 0; i < n_lines; i++) {
+        const MD_VERBATIMLINE* line = &lines[i];
+        int indent = line->indent;
+
+        MD_ASSERT(indent >= 0);
+
+        /* Output code indentation. */
+        while(indent > (int) indent_chunk_size) {
+            MD_TEXT(text_type, indent_chunk_str, indent_chunk_size);
+            indent -= indent_chunk_size;
+        }
+        if(indent > 0)
+            MD_TEXT(text_type, indent_chunk_str, indent);
+
+        /* Output the code line itself. */
+        MD_TEXT_INSECURE(text_type, STR(line->beg), line->end - line->beg);
+
+        /* Enforce end-of-line. */
+        MD_TEXT(text_type, _T("\n"), 1);
+    }
+
+abort:
+    return ret;
+}
+
+static int
+md_process_code_block_contents(MD_CTX* ctx, int is_fenced, const MD_VERBATIMLINE* lines, int n_lines)
+{
+    if(is_fenced) {
+        /* Skip the first line in case of fenced code: It is the fence.
+         * (Only the starting fence is present due to logic in md_analyze_line().) */
+        lines++;
+        n_lines--;
+    } else {
+        /* Ignore blank lines at start/end of indented code block. */
+        while(n_lines > 0  &&  lines[0].beg == lines[0].end) {
+            lines++;
+            n_lines--;
+        }
+        while(n_lines > 0  &&  lines[n_lines-1].beg == lines[n_lines-1].end) {
+            n_lines--;
+        }
+    }
+
+    if(n_lines == 0)
+        return 0;
+
+    return md_process_verbatim_block_contents(ctx, MD_TEXT_CODE, lines, n_lines);
+}
+
+static int
+md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DETAIL* det,
+                            MD_ATTRIBUTE_BUILD* info_build, MD_ATTRIBUTE_BUILD* lang_build)
+{
+    const MD_VERBATIMLINE* fence_line = (const MD_VERBATIMLINE*)(block + 1);
+    OFF beg = fence_line->beg;
+    OFF end = fence_line->end;
+    OFF lang_end;
+    CHAR fence_ch = CH(fence_line->beg);
+    int ret = 0;
+
+    /* Skip the fence itself. */
+    while(beg < ctx->size  &&  CH(beg) == fence_ch)
+        beg++;
+    /* Trim initial spaces. */
+    while(beg < ctx->size  &&  CH(beg) == _T(' '))
+        beg++;
+
+    /* Trim trailing spaces. */
+    while(end > beg  &&  CH(end-1) == _T(' '))
+        end--;
+
+    /* Build info string attribute. */
+    MD_CHECK(md_build_attribute(ctx, STR(beg), end - beg, 0, &det->info, info_build));
+
+    /* Build info string attribute. */
+    lang_end = beg;
+    while(lang_end < end  &&  !ISWHITESPACE(lang_end))
+        lang_end++;
+    MD_CHECK(md_build_attribute(ctx, STR(beg), lang_end - beg, 0, &det->lang, lang_build));
+
+    det->fence_char = fence_ch;
+
+abort:
+    return ret;
+}
+
+static int
+md_setup_H_identifier(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_H_DETAIL* det,
+                            MD_ATTRIBUTE_BUILD* id_build)
+{
+
+    int ret = 0;
+
+    /* Build info string attribute. */
+
+    MD_HEADING_DEF * heading = &ctx->heading_defs[block->heading_def];
+    if(heading->postfix == 0) {
+        MD_CHECK(md_build_trivial_attribute(ctx, &ctx->identifiers[heading->ident_beg]+1,
+            heading->ident_size-1, &det->identifier, id_build));
+    } else { 
+        MD_CHECK(md_build_attribute_postfix(ctx, &ctx->identifiers[heading->ident_beg]+1,
+            heading->ident_size-1, heading->postfix, &det->identifier, id_build));
+    }
+abort:
+    return ret;
+}
+
+static int
+md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
+{
+    union {
+        MD_BLOCK_H_DETAIL header;
+        MD_BLOCK_CODE_DETAIL code;
+        MD_BLOCK_TABLE_DETAIL table;
+    } det;
+    MD_ATTRIBUTE_BUILD identifier_build;
+    int clean_header_detail = FALSE;
+    MD_ATTRIBUTE_BUILD info_build;
+    MD_ATTRIBUTE_BUILD lang_build;
+    int is_in_tight_list;
+    int clean_fence_code_detail = FALSE;
+    int ret = 0;
+
+    memset(&det, 0, sizeof(det));
+
+    if(ctx->n_containers == 0)
+        is_in_tight_list = FALSE;
+    else
+        is_in_tight_list = !ctx->containers[ctx->n_containers-1].is_loose;
+
+    switch(block->type) {
+        case MD_BLOCK_H:
+            det.header.level = block->data;
+            if (ctx->parser.flags & MD_FLAG_HEADINGAUTOID){ 
+                clean_header_detail = TRUE;
+                MD_CHECK(md_setup_H_identifier(ctx, block, &det.header, &identifier_build ));
+            } 
+        break;
+
+        case MD_BLOCK_CODE:
+            /* For fenced code block, we may need to set the info string. */
+            if(block->data != 0) {
+                memset(&det.code, 0, sizeof(MD_BLOCK_CODE_DETAIL));
+                clean_fence_code_detail = TRUE;
+                MD_CHECK(md_setup_fenced_code_detail(ctx, block, &det.code, &info_build, &lang_build));
+            }
+            break;
+
+        case MD_BLOCK_TABLE:
+            det.table.col_count = block->data;
+            det.table.head_row_count = 1;
+            det.table.body_row_count = block->n_lines - 2;
+            break;
+
+        default:
+            /* Noop. */
+            break;
+    }
+
+    if(!is_in_tight_list  ||  block->type != MD_BLOCK_P)
+        MD_ENTER_BLOCK(block->type, (void*) &det);
+
+    /* Process the block contents accordingly to is type. */
+    switch(block->type) {
+        case MD_BLOCK_HR:
+            /* noop */
+            break;
+
+        case MD_BLOCK_CODE:
+            MD_CHECK(md_process_code_block_contents(ctx, (block->data != 0),
+                            (const MD_VERBATIMLINE*)(block + 1), block->n_lines));
+            break;
+
+        case MD_BLOCK_HTML:
+            MD_CHECK(md_process_verbatim_block_contents(ctx, MD_TEXT_HTML,
+                            (const MD_VERBATIMLINE*)(block + 1), block->n_lines));
+            break;
+
+        case MD_BLOCK_TABLE:
+            MD_CHECK(md_process_table_block_contents(ctx, block->data,
+                            (const MD_LINE*)(block + 1), block->n_lines));
+            break;
+
+        case MD_BLOCK_NAV:
+            MD_CHECK(md_output_toc(ctx));
+            break;
+
+        default:
+            MD_CHECK(md_process_normal_block_contents(ctx,
+                            (const MD_LINE*)(block + 1), block->n_lines));
+            break;
+    }
+
+    if(!is_in_tight_list  ||  block->type != MD_BLOCK_P)
+        MD_LEAVE_BLOCK(block->type, (void*) &det);
+
+abort:
+    if(clean_header_detail) {
+        md_free_attribute(ctx, &identifier_build);
+    }
+    if(clean_fence_code_detail) {
+        md_free_attribute(ctx, &info_build);
+        md_free_attribute(ctx, &lang_build);
+    }
+    return ret;
+}
+
+static int
+md_process_all_blocks(MD_CTX* ctx)
+{
+    int byte_off = 0;
+    int ret = 0;
+
+    /* ctx->containers now is not needed for detection of lists and list items
+     * so we reuse it for tracking what lists are loose or tight. We rely
+     * on the fact the vector is large enough to hold the deepest nesting
+     * level of lists. */
+    ctx->n_containers = 0;
+
+    while(byte_off < ctx->n_block_bytes) {
+        MD_BLOCK* block = (MD_BLOCK*)((char*)ctx->block_bytes + byte_off);
+        union {
+            MD_BLOCK_UL_DETAIL ul;
+            MD_BLOCK_OL_DETAIL ol;
+            MD_BLOCK_LI_DETAIL li;
+        } det;
+
+        switch(block->type) {
+            case MD_BLOCK_UL:
+                det.ul.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE;
+                det.ul.mark = (CHAR) block->data;
+                break;
+
+            case MD_BLOCK_OL:
+                det.ol.start = block->n_lines;
+                det.ol.is_tight =  (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE;
+                det.ol.mark_delimiter = (CHAR) block->data;
+                break;
+
+            case MD_BLOCK_LI:
+                det.li.is_task = (block->data != 0);
+                det.li.task_mark = (CHAR) block->data;
+                det.li.task_mark_offset = (OFF) block->n_lines;
+                break;
+
+            default:
+                /* noop */
+                break;
+        }
+
+        if(block->flags & MD_BLOCK_CONTAINER) {
+            if(block->flags & MD_BLOCK_CONTAINER_CLOSER) {
+                MD_LEAVE_BLOCK(block->type, &det);
+
+                if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL || block->type == MD_BLOCK_QUOTE)
+                    ctx->n_containers--;
+            }
+
+            if(block->flags & MD_BLOCK_CONTAINER_OPENER) {
+                MD_ENTER_BLOCK(block->type, &det);
+
+                if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL) {
+                    ctx->containers[ctx->n_containers].is_loose = (block->flags & MD_BLOCK_LOOSE_LIST);
+                    ctx->n_containers++;
+                } else if(block->type == MD_BLOCK_QUOTE) {
+                    /* This causes that any text in a block quote, even if
+                     * nested inside a tight list item, is wrapped with
+                     * <p>...</p>. */
+                    ctx->containers[ctx->n_containers].is_loose = TRUE;
+                    ctx->n_containers++;
+                }
+            }
+        } else {
+            MD_CHECK(md_process_leaf_block(ctx, block));
+
+            if(block->type == MD_BLOCK_CODE || block->type == MD_BLOCK_HTML)
+                byte_off += block->n_lines * sizeof(MD_VERBATIMLINE);
+            else
+                byte_off += block->n_lines * sizeof(MD_LINE);
+        }
+
+        byte_off += sizeof(MD_BLOCK);
+    }
+
+    ctx->n_block_bytes = 0;
+
+abort:
+    return ret;
+}
+
+
+/************************************
+ ***  Grouping Lines into Blocks  ***
+ ************************************/
+
+static void*
+md_push_block_bytes(MD_CTX* ctx, int n_bytes)
+{
+    void* ptr;
+
+    if(ctx->n_block_bytes + n_bytes > ctx->alloc_block_bytes) {
+        void* new_block_bytes;
+
+        ctx->alloc_block_bytes = (ctx->alloc_block_bytes > 0
+                ? ctx->alloc_block_bytes + ctx->alloc_block_bytes / 2
+                : 512);
+        new_block_bytes = realloc(ctx->block_bytes, ctx->alloc_block_bytes);
+        if(new_block_bytes == NULL) {
+            MD_LOG("realloc() failed.");
+            return NULL;
+        }
+
+        /* Fix the ->current_block after the reallocation. */
+        if(ctx->current_block != NULL) {
+            OFF off_current_block = (OFF) ((char*) ctx->current_block - (char*) ctx->block_bytes);
+            ctx->current_block = (MD_BLOCK*) ((char*) new_block_bytes + off_current_block);
+        }
+
+        ctx->block_bytes = new_block_bytes;
+    }
+
+    ptr = (char*)ctx->block_bytes + ctx->n_block_bytes;
+    ctx->n_block_bytes += n_bytes;
+    return ptr;
+}
+
+static int
+md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line)
+{
+    MD_BLOCK* block;
+
+    MD_ASSERT(ctx->current_block == NULL);
+
+    block = (MD_BLOCK*) md_push_block_bytes(ctx, sizeof(MD_BLOCK));
+    if(block == NULL)
+        return -1;
+
+    switch(line->type) {
+        case MD_LINE_HR:
+            block->type = MD_BLOCK_HR;
+            break;
+
+        case MD_LINE_ATXHEADER:
+        case MD_LINE_SETEXTHEADER:
+            block->type = MD_BLOCK_H;
+            break;
+
+        case MD_LINE_FENCEDCODE:
+        case MD_LINE_INDENTEDCODE:
+            block->type = MD_BLOCK_CODE;
+            break;
+
+        case MD_LINE_TEXT:
+            block->type = MD_BLOCK_P;
+            break;
+
+        case MD_LINE_HTML:
+            block->type = MD_BLOCK_HTML;
+            break;
+
+        case MD_LINE_TOC:
+            block->type = MD_BLOCK_NAV;
+            break;
+
+        case MD_LINE_BLANK:
+        case MD_LINE_SETEXTUNDERLINE:
+        case MD_LINE_TABLEUNDERLINE:
+        default:
+            MD_UNREACHABLE();
+            break;
+    }
+
+    block->flags = 0;
+    block->data = line->data;
+    block->n_lines = 0;
+
+    ctx->current_block = block;
+    return 0;
+}
+
+/* Eat from start of current (textual) block any reference definitions and
+ * remember them so we can resolve any links referring to them.
+ *
+ * (Reference definitions can only be at start of it as they cannot break
+ * a paragraph.)
+ */
+static int
+md_consume_link_reference_definitions(MD_CTX* ctx)
+{
+    MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1);
+    int n_lines = ctx->current_block->n_lines;
+    int n = 0;
+
+    /* Compute how many lines at the start of the block form one or more
+     * reference definitions. */
+    while(n < n_lines) {
+        int n_link_ref_lines;
+
+        n_link_ref_lines = md_is_link_reference_definition(ctx,
+                                    lines + n, n_lines - n);
+        /* Not a reference definition? */
+        if(n_link_ref_lines == 0)
+            break;
+
+        /* We fail if it is the ref. def. but it could not be stored due
+         * a memory allocation error. */
+        if(n_link_ref_lines < 0)
+            return -1;
+
+        n += n_link_ref_lines;
+    }
+
+    /* If there was at least one reference definition, we need to remove
+     * its lines from the block, or perhaps even the whole block. */
+    if(n > 0) {
+        if(n == n_lines) {
+            /* Remove complete block. */
+            ctx->n_block_bytes -= n * sizeof(MD_LINE);
+            ctx->n_block_bytes -= sizeof(MD_BLOCK);
+            ctx->current_block = NULL;
+        } else {
+            /* Remove just some initial lines from the block. */
+            memmove(lines, lines + n, (n_lines - n) * sizeof(MD_LINE));
+            ctx->current_block->n_lines -= n;
+            ctx->n_block_bytes -= n * sizeof(MD_LINE);
+        }
+    }
+
+    return 0;
+}
+
+/* Build the identifier for this heading and remember them so we can 
+ * resolve any link referring to them.
+ *
+ */
+static int 
+md_make_heading(MD_CTX* ctx)
+{
+    int ret = 0;
+
+    MD_BLOCK* block = ctx->current_block;
+    MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1);
+
+    MD_HEADING_DEF * def = NULL;
+    MD_REF_DEF * rdef = NULL;
+    MD_CHECK(md_push_heading_def(ctx));
+    def = &ctx->heading_defs[ctx->n_heading_defs];
+    memset(def, 0, sizeof(MD_HEADING_DEF));
+
+    // filling of the heading def    
+    MD_CHECK(md_heading_build_ident(ctx, def, lines, block->n_lines, block->data));
+    block->heading_def = ctx->n_heading_defs;
+    ctx->n_heading_defs++;
+
+    // remember the heading as a reference definition
+    MD_CHECK(md_push_ref_def(ctx));
+    rdef = &ctx->ref_defs[ctx->n_ref_defs];
+    memset(rdef, 0, sizeof(MD_REF_DEF));
+    rdef->label = def->heading;
+    rdef->label_size = def->heading_size;
+ 
+    rdef->dest = &ctx->identifiers[def->ident_beg];
+    rdef->dest_size = def->ident_size;
+
+
+    /* Success. */
+    ctx->n_ref_defs++;
+
+abort:
+    return ret;
+}
+
+static int
+md_end_current_block(MD_CTX* ctx)
+{
+    int ret = 0;
+
+    if(ctx->current_block == NULL)
+        return ret;
+
+    /* Check whether there is a reference definition. (We do this here instead
+     * of in md_analyze_line() because reference definition can take multiple
+     * lines.) */
+    if(ctx->current_block->type == MD_BLOCK_P  ||
+       (ctx->current_block->type == MD_BLOCK_H  &&  (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER)))
+    {
+        MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1);
+        if(CH(lines[0].beg) == _T('[')) {
+            MD_CHECK(md_consume_link_reference_definitions(ctx));
+            if(ctx->current_block == NULL)
+                return ret;
+        }
+    }
+
+    if(ctx->current_block->type == MD_BLOCK_H  &&  (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER)) {
+        int n_lines = ctx->current_block->n_lines;
+
+        if(n_lines > 1) {
+            /* Get rid of the underline. */
+            ctx->current_block->n_lines--;
+            ctx->n_block_bytes -= sizeof(MD_LINE);
+        } else {
+            /* Only the underline has left after eating the ref. defs.
+             * Keep the line as beginning of a new ordinary paragraph. */
+            ctx->current_block->type = MD_BLOCK_P;
+            return 0;
+        }
+    }
+
+    if(ctx->current_block->type == MD_BLOCK_H && (ctx->parser.flags & MD_FLAG_HEADINGAUTOID)){
+        MD_CHECK(md_make_heading(ctx));
+    }
+
+    /* Mark we are not building any block anymore. */
+    ctx->current_block = NULL;
+
+abort:
+    return ret;
+}
+
+static int
+md_add_line_into_current_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* analysis)
+{
+    MD_ASSERT(ctx->current_block != NULL);
+
+    if(ctx->current_block->type == MD_BLOCK_CODE || ctx->current_block->type == MD_BLOCK_HTML) {
+        MD_VERBATIMLINE* line;
+
+        line = (MD_VERBATIMLINE*) md_push_block_bytes(ctx, sizeof(MD_VERBATIMLINE));
+        if(line == NULL)
+            return -1;
+
+        line->indent = analysis->indent;
+        line->beg = analysis->beg;
+        line->end = analysis->end;
+    } else {
+        MD_LINE* line;
+
+        line = (MD_LINE*) md_push_block_bytes(ctx, sizeof(MD_LINE));
+        if(line == NULL)
+            return -1;
+
+        line->beg = analysis->beg;
+        line->end = analysis->end;
+    }
+    ctx->current_block->n_lines++;
+
+    return 0;
+}
+
+static int
+md_push_container_bytes(MD_CTX* ctx, MD_BLOCKTYPE type, unsigned start,
+                        unsigned data, unsigned flags)
+{
+    MD_BLOCK* block;
+    int ret = 0;
+
+    MD_CHECK(md_end_current_block(ctx));
+
+    block = (MD_BLOCK*) md_push_block_bytes(ctx, sizeof(MD_BLOCK));
+    if(block == NULL)
+        return -1;
+
+    block->type = type;
+    block->flags = flags;
+    block->data = data;
+    block->n_lines = start;
+
+abort:
+    return ret;
+}
+
+
+
+/***********************
+ ***  Line Analysis  ***
+ ***********************/
+
+static int
+md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end, OFF* p_killer)
+{
+    OFF off = beg + 1;
+    int n = 1;
+
+    while(off < ctx->size  &&  (CH(off) == CH(beg) || CH(off) == _T(' ') || CH(off) == _T('\t'))) {
+        if(CH(off) == CH(beg))
+            n++;
+        off++;
+    }
+
+    if(n < 3) {
+        *p_killer = off;
+        return FALSE;
+    }
+
+    /* Nothing else can be present on the line. */
+    if(off < ctx->size  &&  !ISNEWLINE(off)) {
+        *p_killer = off;
+        return FALSE;
+    }
+
+    *p_end = off;
+    return TRUE;
+}
+
+static int
+md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end, unsigned* p_level)
+{
+    int n;
+    OFF off = beg + 1;
+
+    while(off < ctx->size  &&  CH(off) == _T('#')  &&  off - beg < 7)
+        off++;
+    n = off - beg;
+
+    if(n > 6)
+        return FALSE;
+    *p_level = n;
+
+    if(!(ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS)  &&  off < ctx->size  &&
+       CH(off) != _T(' ')  &&  CH(off) != _T('\t')  &&  !ISNEWLINE(off))
+        return FALSE;
+
+    while(off < ctx->size  &&  CH(off) == _T(' '))
+        off++;
+    *p_beg = off;
+    *p_end = off;
+    return TRUE;
+}
+
+static int
+md_is_setext_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_level)
+{
+    OFF off = beg + 1;
+
+    while(off < ctx->size  &&  CH(off) == CH(beg))
+        off++;
+
+    /* Optionally, space(s) can follow. */
+    while(off < ctx->size  &&  CH(off) == _T(' '))
+        off++;
+
+    /* But nothing more is allowed on the line. */
+    if(off < ctx->size  &&  !ISNEWLINE(off))
+        return FALSE;
+
+    *p_level = (CH(beg) == _T('=') ? 1 : 2);
+    *p_end = off;
+    return TRUE;
+}
+
+static int
+md_is_table_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_col_count)
+{
+    OFF off = beg;
+    int found_pipe = FALSE;
+    unsigned col_count = 0;
+
+    if(off < ctx->size  &&  CH(off) == _T('|')) {
+        found_pipe = TRUE;
+        off++;
+        while(off < ctx->size  &&  ISWHITESPACE(off))
+            off++;
+    }
+
+    while(1) {
+        int delimited = FALSE;
+
+        /* Cell underline ("-----", ":----", "----:" or ":----:") */
+        if(off < ctx->size  &&  CH(off) == _T(':'))
+            off++;
+        if(off >= ctx->size  ||  CH(off) != _T('-'))
+            return FALSE;
+        while(off < ctx->size  &&  CH(off) == _T('-'))
+            off++;
+        if(off < ctx->size  &&  CH(off) == _T(':'))
+            off++;
+
+        col_count++;
+
+        /* Pipe delimiter (optional at the end of line). */
+        while(off < ctx->size  &&  ISWHITESPACE(off))
+            off++;
+        if(off < ctx->size  &&  CH(off) == _T('|')) {
+            delimited = TRUE;
+            found_pipe =  TRUE;
+            off++;
+            while(off < ctx->size  &&  ISWHITESPACE(off))
+                off++;
+        }
+
+        /* Success, if we reach end of line. */
+        if(off >= ctx->size  ||  ISNEWLINE(off))
+            break;
+
+        if(!delimited)
+            return FALSE;
+    }
+
+    if(!found_pipe)
+        return FALSE;
+
+    *p_end = off;
+    *p_col_count = col_count;
+    return TRUE;
+}
+
+static int
+md_is_toc_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end)
+{
+    OFF off = beg;
+    const CHAR * toc = ctx->parser.toc_options.toc_placeholder;    
+
+    // allow for blank chars before the TOC mark
+    while(off < ctx->size  &&  ISBLANK(off))
+        off++;
+
+    if(off < ctx->size  &&  ISNEWLINE(off))
+        return FALSE;
+
+    while(off < ctx->size  &&  '\0' != *toc){
+        if(CH(off) != *toc)
+            return FALSE; 
+        toc++;
+        off++;   
+    }
+    if('\0' == *toc){
+        *p_beg = off; 
+        *p_end = off;
+    }
+    return '\0' == *toc;
+}
+
+static int
+md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end)
+{
+    OFF off = beg;
+
+    while(off < ctx->size && CH(off) == CH(beg))
+        off++;
+
+    /* Fence must have at least three characters. */
+    if(off - beg < 3)
+        return FALSE;
+
+    ctx->code_fence_length = off - beg;
+
+    /* Optionally, space(s) can follow. */
+    while(off < ctx->size  &&  CH(off) == _T(' '))
+        off++;
+
+    /* Optionally, an info string can follow. */
+    while(off < ctx->size  &&  !ISNEWLINE(off)) {
+        /* Backtick-based fence must not contain '`' in the info string. */
+        if(CH(beg) == _T('`')  &&  CH(off) == _T('`'))
+            return FALSE;
+        off++;
+    }
+
+    *p_end = off;
+    return TRUE;
+}
+
+static int
+md_is_closing_code_fence(MD_CTX* ctx, CHAR ch, OFF beg, OFF* p_end)
+{
+    OFF off = beg;
+    int ret = FALSE;
+
+    /* Closing fence must have at least the same length and use same char as
+     * opening one. */
+    while(off < ctx->size  &&  CH(off) == ch)
+        off++;
+    if(off - beg < ctx->code_fence_length)
+        goto out;
+
+    /* Optionally, space(s) can follow */
+    while(off < ctx->size  &&  CH(off) == _T(' '))
+        off++;
+
+    /* But nothing more is allowed on the line. */
+    if(off < ctx->size  &&  !ISNEWLINE(off))
+        goto out;
+
+    ret = TRUE;
+
+out:
+    /* Note we set *p_end even on failure: If we are not closing fence, caller
+     * would eat the line anyway without any parsing. */
+    *p_end = off;
+    return ret;
+}
+
+/* Returns type of the raw HTML block, or FALSE if it is not HTML block.
+ * (Refer to CommonMark specification for details about the types.)
+ */
+static int
+md_is_html_block_start_condition(MD_CTX* ctx, OFF beg)
+{
+    typedef struct TAG_tag TAG;
+    struct TAG_tag {
+        const CHAR* name;
+        unsigned len    : 8;
+    };
+
+    /* Type 6 is started by a long list of allowed tags. We use two-level
+     * tree to speed-up the search. */
+#ifdef X
+    #undef X
+#endif
+#define X(name)     { _T(name), (sizeof(name)-1) / sizeof(CHAR) }
+#define Xend        { NULL, 0 }
+    static const TAG t1[] = { X("pre"), X("script"), X("style"), X("textarea"), Xend };
+
+    static const TAG a6[] = { X("address"), X("article"), X("aside"), Xend };
+    static const TAG b6[] = { X("base"), X("basefont"), X("blockquote"), X("body"), Xend };
+    static const TAG c6[] = { X("caption"), X("center"), X("col"), X("colgroup"), Xend };
+    static const TAG d6[] = { X("dd"), X("details"), X("dialog"), X("dir"),
+                              X("div"), X("dl"), X("dt"), Xend };
+    static const TAG f6[] = { X("fieldset"), X("figcaption"), X("figure"), X("footer"),
+                              X("form"), X("frame"), X("frameset"), Xend };
+    static const TAG h6[] = { X("h1"), X("head"), X("header"), X("hr"), X("html"), Xend };
+    static const TAG i6[] = { X("iframe"), Xend };
+    static const TAG l6[] = { X("legend"), X("li"), X("link"), Xend };
+    static const TAG m6[] = { X("main"), X("menu"), X("menuitem"), Xend };
+    static const TAG n6[] = { X("nav"), X("noframes"), Xend };
+    static const TAG o6[] = { X("ol"), X("optgroup"), X("option"), Xend };
+    static const TAG p6[] = { X("p"), X("param"), Xend };
+    static const TAG s6[] = { X("section"), X("source"), X("summary"), Xend };
+    static const TAG t6[] = { X("table"), X("tbody"), X("td"), X("tfoot"), X("th"),
+                              X("thead"), X("title"), X("tr"), X("track"), Xend };
+    static const TAG u6[] = { X("ul"), Xend };
+    static const TAG xx[] = { Xend };
+#undef X
+
+    static const TAG* map6[26] = {
+        a6, b6, c6, d6, xx, f6, xx, h6, i6, xx, xx, l6, m6,
+        n6, o6, p6, xx, xx, s6, t6, u6, xx, xx, xx, xx, xx
+    };
+    OFF off = beg + 1;
+    int i;
+
+    /* Check for type 1: <script, <pre, or <style */
+    for(i = 0; t1[i].name != NULL; i++) {
+        if(off + t1[i].len <= ctx->size) {
+            if(md_ascii_case_eq(STR(off), t1[i].name, t1[i].len))
+                return 1;
+        }
+    }
+
+    /* Check for type 2: <!-- */
+    if(off + 3 < ctx->size  &&  CH(off) == _T('!')  &&  CH(off+1) == _T('-')  &&  CH(off+2) == _T('-'))
+        return 2;
+
+    /* Check for type 3: <? */
+    if(off < ctx->size  &&  CH(off) == _T('?'))
+        return 3;
+
+    /* Check for type 4 or 5: <! */
+    if(off < ctx->size  &&  CH(off) == _T('!')) {
+        /* Check for type 4: <! followed by uppercase letter. */
+        if(off + 1 < ctx->size  &&  ISASCII(off+1))
+            return 4;
+
+        /* Check for type 5: <![CDATA[ */
+        if(off + 8 < ctx->size) {
+            if(md_ascii_eq(STR(off), _T("![CDATA["), 8))
+                return 5;
+        }
+    }
+
+    /* Check for type 6: Many possible starting tags listed above. */
+    if(off + 1 < ctx->size  &&  (ISALPHA(off) || (CH(off) == _T('/') && ISALPHA(off+1)))) {
+        int slot;
+        const TAG* tags;
+
+        if(CH(off) == _T('/'))
+            off++;
+
+        slot = (ISUPPER(off) ? CH(off) - 'A' : CH(off) - 'a');
+        tags = map6[slot];
+
+        for(i = 0; tags[i].name != NULL; i++) {
+            if(off + tags[i].len <= ctx->size) {
+                if(md_ascii_case_eq(STR(off), tags[i].name, tags[i].len)) {
+                    OFF tmp = off + tags[i].len;
+                    if(tmp >= ctx->size)
+                        return 6;
+                    if(ISBLANK(tmp) || ISNEWLINE(tmp) || CH(tmp) == _T('>'))
+                        return 6;
+                    if(tmp+1 < ctx->size && CH(tmp) == _T('/') && CH(tmp+1) == _T('>'))
+                        return 6;
+                    break;
+                }
+            }
+        }
+    }
+
+    /* Check for type 7: any COMPLETE other opening or closing tag. */
+    if(off + 1 < ctx->size) {
+        OFF end;
+
+        if(md_is_html_tag(ctx, NULL, 0, beg, ctx->size, &end)) {
+            /* Only optional whitespace and new line may follow. */
+            while(end < ctx->size  &&  ISWHITESPACE(end))
+                end++;
+            if(end >= ctx->size  ||  ISNEWLINE(end))
+                return 7;
+        }
+    }
+
+    return FALSE;
+}
+
+/* Case sensitive check whether there is a substring 'what' between 'beg'
+ * and end of line. */
+static int
+md_line_contains(MD_CTX* ctx, OFF beg, const CHAR* what, SZ what_len, OFF* p_end)
+{
+    OFF i;
+    for(i = beg; i + what_len < ctx->size; i++) {
+        if(ISNEWLINE(i))
+            break;
+        if(memcmp(STR(i), what, what_len * sizeof(CHAR)) == 0) {
+            *p_end = i + what_len;
+            return TRUE;
+        }
+    }
+
+    *p_end = i;
+    return FALSE;
+}
+
+/* Returns type of HTML block end condition or FALSE if not an end condition.
+ *
+ * Note it fills p_end even when it is not end condition as the caller
+ * does not need to analyze contents of a raw HTML block.
+ */
+static int
+md_is_html_block_end_condition(MD_CTX* ctx, OFF beg, OFF* p_end)
+{
+    switch(ctx->html_block_type) {
+        case 1:
+        {
+            OFF off = beg;
+
+            while(off < ctx->size  &&  !ISNEWLINE(off)) {
+                if(CH(off) == _T('<')) {
+                  #define FIND_TAG_END(string, length) \
+                    if(off + length <= ctx->size && \
+                       md_ascii_case_eq(STR(off), _T(string), length)) { \
+                        *p_end = off + length; \
+                        return TRUE; \
+                    }
+                  FIND_TAG_END("</script>", 9)
+                  FIND_TAG_END("</style>", 8)
+                  FIND_TAG_END("</pre>", 6)
+                  #undef FIND_TAG_END
+                }
+
+                off++;
+            }
+            *p_end = off;
+            return FALSE;
+        }
+
+        case 2:
+            return (md_line_contains(ctx, beg, _T("-->"), 3, p_end) ? 2 : FALSE);
+
+        case 3:
+            return (md_line_contains(ctx, beg, _T("?>"), 2, p_end) ? 3 : FALSE);
+
+        case 4:
+            return (md_line_contains(ctx, beg, _T(">"), 1, p_end) ? 4 : FALSE);
+
+        case 5:
+            return (md_line_contains(ctx, beg, _T("]]>"), 3, p_end) ? 5 : FALSE);
+
+        case 6:     /* Pass through */
+        case 7:
+            *p_end = beg;
+            return (beg >= ctx->size || ISNEWLINE(beg) ? ctx->html_block_type : FALSE);
+
+        default:
+            MD_UNREACHABLE();
+    }
+    return FALSE;
+}
+
+
+static int
+md_is_container_compatible(const MD_CONTAINER* pivot, const MD_CONTAINER* container)
+{
+    /* Block quote has no "items" like lists. */
+    if(container->ch == _T('>'))
+        return FALSE;
+
+    if(container->ch != pivot->ch)
+        return FALSE;
+    if(container->mark_indent > pivot->contents_indent)
+        return FALSE;
+
+    return TRUE;
+}
+
+static int
+md_push_container(MD_CTX* ctx, const MD_CONTAINER* container)
+{
+    if(ctx->n_containers >= ctx->alloc_containers) {
+        MD_CONTAINER* new_containers;
+
+        ctx->alloc_containers = (ctx->alloc_containers > 0
+                ? ctx->alloc_containers + ctx->alloc_containers / 2
+                : 16);
+        new_containers = realloc(ctx->containers, ctx->alloc_containers * sizeof(MD_CONTAINER));
+        if(new_containers == NULL) {
+            MD_LOG("realloc() failed.");
+            return -1;
+        }
+
+        ctx->containers = new_containers;
+    }
+
+    memcpy(&ctx->containers[ctx->n_containers++], container, sizeof(MD_CONTAINER));
+    return 0;
+}
+
+static int
+md_enter_child_containers(MD_CTX* ctx, int n_children)
+{
+    int i;
+    int ret = 0;
+
+    for(i = ctx->n_containers - n_children; i < ctx->n_containers; i++) {
+        MD_CONTAINER* c = &ctx->containers[i];
+        int is_ordered_list = FALSE;
+
+        switch(c->ch) {
+            case _T(')'):
+            case _T('.'):
+                is_ordered_list = TRUE;
+                MD_FALLTHROUGH();
+
+            case _T('-'):
+            case _T('+'):
+            case _T('*'):
+                /* Remember offset in ctx->block_bytes so we can revisit the
+                 * block if we detect it is a loose list. */
+                md_end_current_block(ctx);
+                c->block_byte_off = ctx->n_block_bytes;
+
+                MD_CHECK(md_push_container_bytes(ctx,
+                                (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL),
+                                c->start, c->ch, MD_BLOCK_CONTAINER_OPENER));
+                MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
+                                c->task_mark_off,
+                                (c->is_task ? CH(c->task_mark_off) : 0),
+                                MD_BLOCK_CONTAINER_OPENER));
+                break;
+
+            case _T('>'):
+                MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, 0, MD_BLOCK_CONTAINER_OPENER));
+                break;
+
+            default:
+                MD_UNREACHABLE();
+                break;
+        }
+    }
+
+abort:
+    return ret;
+}
+
+static int
+md_leave_child_containers(MD_CTX* ctx, int n_keep)
+{
+    int ret = 0;
+
+    while(ctx->n_containers > n_keep) {
+        MD_CONTAINER* c = &ctx->containers[ctx->n_containers-1];
+        int is_ordered_list = FALSE;
+
+        switch(c->ch) {
+            case _T(')'):
+            case _T('.'):
+                is_ordered_list = TRUE;
+                MD_FALLTHROUGH();
+
+            case _T('-'):
+            case _T('+'):
+            case _T('*'):
+                MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
+                                c->task_mark_off, (c->is_task ? CH(c->task_mark_off) : 0),
+                                MD_BLOCK_CONTAINER_CLOSER));
+                MD_CHECK(md_push_container_bytes(ctx,
+                                (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), 0,
+                                c->ch, MD_BLOCK_CONTAINER_CLOSER));
+                break;
+
+            case _T('>'):
+                MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0,
+                                0, MD_BLOCK_CONTAINER_CLOSER));
+                break;
+
+            default:
+                MD_UNREACHABLE();
+                break;
+        }
+
+        ctx->n_containers--;
+    }
+
+abort:
+    return ret;
+}
+
+static int
+md_is_container_mark(MD_CTX* ctx, unsigned indent, OFF beg, OFF* p_end, MD_CONTAINER* p_container)
+{
+    OFF max_end;
+    OFF off = beg;
+
+
+    if(off >= ctx->size  ||  indent >= ctx->code_indent_offset)
+        return FALSE;
+
+    /* Check for block quote mark. */
+    if(CH(off) == _T('>')) {
+        off++;
+        p_container->ch = _T('>');
+        p_container->is_loose = FALSE;
+        p_container->is_task = FALSE;
+        p_container->mark_indent = indent;
+        p_container->contents_indent = indent + 1;
+        *p_end = off;
+        return TRUE;
+    }
+
+    /* Check for list item bullet mark. */
+    if(ISANYOF(off, _T("-+*"))  &&  (off+1 >= ctx->size || ISBLANK(off+1) || ISNEWLINE(off+1))) {
+        p_container->ch = CH(off);
+        p_container->is_loose = FALSE;
+        p_container->is_task = FALSE;
+        p_container->mark_indent = indent;
+        p_container->contents_indent = indent + 1;
+        *p_end = off+1;
+        return TRUE;
+    }
+
+    /* Check for ordered list item marks. */
+    max_end = off + 9;
+    if(max_end > ctx->size)
+        max_end = ctx->size;
+    p_container->start = 0;
+    while(off < max_end  &&  ISDIGIT(off)) {
+        p_container->start = p_container->start * 10 + CH(off) - _T('0');
+        off++;
+    }
+    if(off > beg  &&
+       off < ctx->size  &&
+       (CH(off) == _T('.') || CH(off) == _T(')'))  &&
+       (off+1 >= ctx->size || ISBLANK(off+1) || ISNEWLINE(off+1)))
+    {
+        p_container->ch = CH(off);
+        p_container->is_loose = FALSE;
+        p_container->is_task = FALSE;
+        p_container->mark_indent = indent;
+        p_container->contents_indent = indent + off - beg + 1;
+        *p_end = off+1;
+        return TRUE;
+    }
+
+    return FALSE;
+}
+
+static int
+md_heading_build_ident(MD_CTX* ctx, MD_HEADING_DEF* def, MD_LINE* lines, int n_lines, int level)
+{
+    MD_MARK* mark;
+    CHAR* ptr;
+    int ret = 0;
+     
+    const MD_LINE* line = lines;
+    OFF beg = lines[0].beg;
+    OFF off = beg; 
+    OFF end = lines[n_lines-1].end;
+
+    /* store the heading */
+    def->heading = (CHAR*)STR(beg);
+    def->heading_size = end-beg;
+    /* store the heading level */
+    def->level = level;
+
+    /* Reset the previously collected stack of marks. */
+    ctx->n_marks = 0;
+ 
+    MD_CHECK(md_analyze_inlines(ctx, lines, n_lines, FALSE));
+
+    /* Find first resolved mark. Note there is always at least one resolved
+     * mark,  the dummy last one after the end of the latest line we actually
+     * never really reach. This saves us of a lot of special checks and cases
+     * in this function. */
+    mark = ctx->marks;
+    while(!(mark->flags & MD_MARK_RESOLVED))
+        mark++;
+
+    /* The identifier will not be bigger than the heading + '#' */
+    def->ident_size = end - beg + 1; 
+    MD_CHECK(md_alloc_identifiers(ctx, def));
+   
+    /* copy the ident and transform as needed */
+    ptr = &ctx->identifiers[def->ident_beg];
+    *ptr++ = _T('#'); // start with a '#'
+    while(1) {
+        
+        OFF line_end = line->end;
+        /* Process the text up to the next mark or end-of-line. */
+        OFF tmp = (line->end < mark->beg ? line->end : mark->beg);
+        if(end < line_end)
+            line_end = end;
+
+        while(off < tmp) {
+            unsigned codepoint;
+            SZ char_size;
+            
+            if( CH(off) == _T('-') ){   // '-' are not replaced
+                *ptr++ = _T('-');
+                off++;
+                continue;
+            }
+
+            codepoint = md_decode_unicode(ctx->text, off, line_end, &char_size);
+            if(ISUNICODEWHITESPACE_(codepoint) || ISNEWLINE(off)) {// replace white spaces by '-'
+                *ptr++ = _T('-');       
+                off = md_skip_unicode_whitespace(ctx->text, off, line_end);
+            } else if (ISUNICODEPUNCT_(codepoint) || ISUNICODESYMBOL_(codepoint)) {    // skip ponctuation and symbols
+                off += char_size;
+                continue;
+            } else {                // make lower case
+                MD_UNICODE_FOLD_INFO fold_info;
+                md_get_unicode_fold_info(codepoint, &fold_info);
+                for (unsigned i = 0; i < fold_info.n_codepoints; i++) {
+                    SZ n = md_encode_unicode(fold_info.codepoints[i], ptr);
+                    ptr += n;
+                } 
+                off += char_size;
+            }
+        }
+        /* If reached the mark, process it and move to next one. */
+        if(off >= mark->beg) {
+            switch(mark->ch) {
+
+                case '[':       /* Link, wiki link, image. */
+                case '!':
+                case ']':
+                {
+                    const MD_MARK* opener = (mark->ch != ']' ? mark : &ctx->marks[mark->prev]);
+                    const MD_MARK* closer = &ctx->marks[opener->next];
+                    const MD_MARK* dest_mark;
+                    const MD_MARK* title_mark;
+
+                    if ((opener->ch == '[' && closer->ch == ']') &&
+                        opener->end - opener->beg >= 2 &&
+                        closer->end - closer->beg >= 2)
+                    {
+                        break;
+                    }
+
+                    dest_mark = opener+1;
+                    MD_ASSERT(dest_mark->ch == 'D');
+                    title_mark = opener+2;
+                    if (title_mark->ch != 'D') break;
+   
+                    /* link/image closer may span multiple lines. */
+                    if(mark->ch == ']') {
+                        while(mark->end > line->end)
+                            line++;
+                    }
+
+                    break;
+                }
+            }
+            
+            off = mark->end;
+
+            /* Move to next resolved mark. But not past the last mark */
+            if(mark < &ctx->marks[ctx->n_marks])
+                mark++;
+            while((mark < &ctx->marks[ctx->n_marks])  &&
+                ( !(mark->flags & MD_MARK_RESOLVED)  ||  mark->beg < off))
+            {
+                mark++;
+            }
+        }
+
+        /* If reached end of line, move to next one. */
+        if(off >= line->end) {
+            /* If it is the last line, we are done. */
+            if(off >= end) {
+                // update real identifier size
+                def->ident_size = (MD_SIZE)(ptr - &ctx->identifiers[def->ident_beg]);
+                break;
+            }
+
+            *ptr = _T('-'); // end of line 
+            ptr++;
+
+            /* Move to the next line. */
+            line++;
+            off = line->beg;
+        }
+    }
+    // update used identifier buffer size
+    ctx->identifiers_size += def->ident_size;
+
+    return 0;
+abort:
+    
+    return -1;
+}
+
+static unsigned
+md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end)
+{
+    OFF off = beg;
+    unsigned indent = total_indent;
+
+    while(off < ctx->size  &&  ISBLANK(off)) {
+        if(CH(off) == _T('\t'))
+            indent = (indent + 4) & ~3;
+        else
+            indent++;
+        off++;
+    }
+
+    *p_end = off;
+    return indent - total_indent;
+}
+
+static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0, 0, 0, 0 };
+
+/* Analyze type of the line and find some its properties. This serves as a
+ * main input for determining type and boundaries of a block. */
+static int
+md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end,
+                const MD_LINE_ANALYSIS* pivot_line, MD_LINE_ANALYSIS* line)
+{
+    unsigned total_indent = 0;
+    int n_parents = 0;
+    int n_brothers = 0;
+    int n_children = 0;
+    MD_CONTAINER container = { 0 };
+    int prev_line_has_list_loosening_effect = ctx->last_line_has_list_loosening_effect;
+    OFF off = beg;
+    OFF hr_killer = 0;
+    int ret = 0;
+
+    line->indent = md_line_indentation(ctx, total_indent, off, &off);
+    total_indent += line->indent;
+    line->beg = off;
+
+    /* Given the indentation and block quote marks '>', determine how many of
+     * the current containers are our parents. */
+    while(n_parents < ctx->n_containers) {
+        MD_CONTAINER* c = &ctx->containers[n_parents];
+
+        if(c->ch == _T('>')  &&  line->indent < ctx->code_indent_offset  &&
+            off < ctx->size  &&  CH(off) == _T('>'))
+        {
+            /* Block quote mark. */
+            off++;
+            total_indent++;
+            line->indent = md_line_indentation(ctx, total_indent, off, &off);
+            total_indent += line->indent;
+
+            /* The optional 1st space after '>' is part of the block quote mark. */
+            if(line->indent > 0)
+                line->indent--;
+
+            line->beg = off;
+
+        } else if(c->ch != _T('>')  &&  line->indent >= c->contents_indent) {
+            /* List. */
+            line->indent -= c->contents_indent;
+        } else {
+            break;
+        }
+
+        n_parents++;
+    }
+
+    if(off >= ctx->size  ||  ISNEWLINE(off)) {
+        /* Blank line does not need any real indentation to be nested inside
+         * a list. */
+        if(n_brothers + n_children == 0) {
+            while(n_parents < ctx->n_containers  &&  ctx->containers[n_parents].ch != _T('>'))
+                n_parents++;
+        }
+    }
+
+    while(TRUE) {
+        /* Check whether we are fenced code continuation. */
+        if(pivot_line->type == MD_LINE_FENCEDCODE) {
+            line->beg = off;
+
+            /* We are another MD_LINE_FENCEDCODE unless we are closing fence
+             * which we transform into MD_LINE_BLANK. */
+            if(line->indent < ctx->code_indent_offset) {
+                if(md_is_closing_code_fence(ctx, CH(pivot_line->beg), off, &off)) {
+                    line->type = MD_LINE_BLANK;
+                    ctx->last_line_has_list_loosening_effect = FALSE;
+                    break;
+                }
+            }
+
+            /* Change indentation accordingly to the initial code fence. */
+            if(n_parents == ctx->n_containers) {
+                if(line->indent > pivot_line->indent)
+                    line->indent -= pivot_line->indent;
+                else
+                    line->indent = 0;
+
+                line->type = MD_LINE_FENCEDCODE;
+                break;
+            }
+        }
+
+        /* Check whether we are HTML block continuation. */
+        if(pivot_line->type == MD_LINE_HTML  &&  ctx->html_block_type > 0) {
+            if(n_parents < ctx->n_containers) {
+                /* HTML block is implicitly ended if the enclosing container
+                 * block ends. */
+                ctx->html_block_type = 0;
+            } else {
+                int html_block_type;
+
+                html_block_type = md_is_html_block_end_condition(ctx, off, &off);
+                if(html_block_type > 0) {
+                    MD_ASSERT(html_block_type == ctx->html_block_type);
+
+                    /* Make sure this is the last line of the block. */
+                    ctx->html_block_type = 0;
+
+                    /* Some end conditions serve as blank lines at the same time. */
+                    if(html_block_type == 6 || html_block_type == 7) {
+                        line->type = MD_LINE_BLANK;
+                        line->indent = 0;
+                        break;
+                    }
+                }
+
+                line->type = MD_LINE_HTML;
+                n_parents = ctx->n_containers;
+                break;
+            }
+        }
+
+        /* Check for blank line. */
+        if(off >= ctx->size  ||  ISNEWLINE(off)) {
+            if(pivot_line->type == MD_LINE_INDENTEDCODE  &&  n_parents == ctx->n_containers) {
+                line->type = MD_LINE_INDENTEDCODE;
+                if(line->indent > ctx->code_indent_offset)
+                    line->indent -= ctx->code_indent_offset;
+                else
+                    line->indent = 0;
+                ctx->last_line_has_list_loosening_effect = FALSE;
+            } else {
+                line->type = MD_LINE_BLANK;
+                ctx->last_line_has_list_loosening_effect = (n_parents > 0  &&
+                        n_brothers + n_children == 0  &&
+                        ctx->containers[n_parents-1].ch != _T('>'));
+
+    #if 1
+                /* See https://github.com/mity/md4c/issues/6
+                 *
+                 * This ugly checking tests we are in (yet empty) list item but
+                 * not its very first line (i.e. not the line with the list
+                 * item mark).
+                 *
+                 * If we are such a blank line, then any following non-blank
+                 * line which would be part of the list item actually has to
+                 * end the list because according to the specification, "a list
+                 * item can begin with at most one blank line."
+                 */
+                if(n_parents > 0  &&  ctx->containers[n_parents-1].ch != _T('>')  &&
+                   n_brothers + n_children == 0  &&  ctx->current_block == NULL  &&
+                   ctx->n_block_bytes > (int) sizeof(MD_BLOCK))
+                {
+                    MD_BLOCK* top_block = (MD_BLOCK*) ((char*)ctx->block_bytes + ctx->n_block_bytes - sizeof(MD_BLOCK));
+                    if(top_block->type == MD_BLOCK_LI)
+                        ctx->last_list_item_starts_with_two_blank_lines = TRUE;
+                }
+    #endif
+            }
+            break;
+        } else {
+    #if 1
+            /* This is the 2nd half of the hack. If the flag is set (i.e. there
+             * was a 2nd blank line at the beginning of the list item) and if
+             * we would otherwise still belong to the list item, we enforce
+             * the end of the list. */
+            ctx->last_line_has_list_loosening_effect = FALSE;
+            if(ctx->last_list_item_starts_with_two_blank_lines) {
+                if(n_parents > 0  &&  ctx->containers[n_parents-1].ch != _T('>')  &&
+                   n_brothers + n_children == 0  &&  ctx->current_block == NULL  &&
+                   ctx->n_block_bytes > (int) sizeof(MD_BLOCK))
+                {
+                    MD_BLOCK* top_block = (MD_BLOCK*) ((char*)ctx->block_bytes + ctx->n_block_bytes - sizeof(MD_BLOCK));
+                    if(top_block->type == MD_BLOCK_LI)
+                        n_parents--;
+                }
+
+                ctx->last_list_item_starts_with_two_blank_lines = FALSE;
+            }
+    #endif
+        }
+
+        /* Check whether we are Setext underline. */
+        if(line->indent < ctx->code_indent_offset  &&  pivot_line->type == MD_LINE_TEXT
+            &&  off < ctx->size  &&  ISANYOF2(off, _T('='), _T('-'))
+            &&  (n_parents == ctx->n_containers))
+        {
+            unsigned level;
+
+            if(md_is_setext_underline(ctx, off, &off, &level)) {
+                line->type = MD_LINE_SETEXTUNDERLINE;
+                line->data = level;
+                break;
+            }
+        }
+
+        /* Check for thematic break line. */
+        if(line->indent < ctx->code_indent_offset
+            &&  off < ctx->size  &&  off >= hr_killer
+            &&  ISANYOF(off, _T("-_*")))
+        {
+            if(md_is_hr_line(ctx, off, &off, &hr_killer)) {
+                line->type = MD_LINE_HR;
+                break;
+            }
+        }
+
+        /* Check for "brother" container. I.e. whether we are another list item
+         * in already started list. */
+        if(n_parents < ctx->n_containers  &&  n_brothers + n_children == 0) {
+            OFF tmp;
+
+            if(md_is_container_mark(ctx, line->indent, off, &tmp, &container)  &&
+               md_is_container_compatible(&ctx->containers[n_parents], &container))
+            {
+                pivot_line = &md_dummy_blank_line;
+
+                off = tmp;
+
+                total_indent += container.contents_indent - container.mark_indent;
+                line->indent = md_line_indentation(ctx, total_indent, off, &off);
+                total_indent += line->indent;
+                line->beg = off;
+
+                /* Some of the following whitespace actually still belongs to the mark. */
+                if(off >= ctx->size || ISNEWLINE(off)) {
+                    container.contents_indent++;
+                } else if(line->indent <= ctx->code_indent_offset) {
+                    container.contents_indent += line->indent;
+                    line->indent = 0;
+                } else {
+                    container.contents_indent += 1;
+                    line->indent--;
+                }
+
+                ctx->containers[n_parents].mark_indent = container.mark_indent;
+                ctx->containers[n_parents].contents_indent = container.contents_indent;
+
+                n_brothers++;
+                continue;
+            }
+        }
+
+        /* Check for indented code.
+         * Note indented code block cannot interrupt a paragraph. */
+        if(line->indent >= ctx->code_indent_offset  &&
+            (pivot_line->type == MD_LINE_BLANK || pivot_line->type == MD_LINE_INDENTEDCODE))
+        {
+            line->type = MD_LINE_INDENTEDCODE;
+            MD_ASSERT(line->indent >= ctx->code_indent_offset);
+            line->indent -= ctx->code_indent_offset;
+            line->data = 0;
+            break;
+        }
+
+        /* Check for start of a new container block. */
+        if(line->indent < ctx->code_indent_offset  &&
+           md_is_container_mark(ctx, line->indent, off, &off, &container))
+        {
+            if(pivot_line->type == MD_LINE_TEXT  &&  n_parents == ctx->n_containers  &&
+                        (off >= ctx->size || ISNEWLINE(off))  &&  container.ch != _T('>'))
+            {
+                /* Noop. List mark followed by a blank line cannot interrupt a paragraph. */
+            } else if(pivot_line->type == MD_LINE_TEXT  &&  n_parents == ctx->n_containers  &&
+                        ISANYOF2_(container.ch, _T('.'), _T(')'))  &&  container.start != 1)
+            {
+                /* Noop. Ordered list cannot interrupt a paragraph unless the start index is 1. */
+            } else {
+                total_indent += container.contents_indent - container.mark_indent;
+                line->indent = md_line_indentation(ctx, total_indent, off, &off);
+                total_indent += line->indent;
+
+                line->beg = off;
+                line->data = container.ch;
+
+                /* Some of the following whitespace actually still belongs to the mark. */
+                if(off >= ctx->size || ISNEWLINE(off)) {
+                    container.contents_indent++;
+                } else if(line->indent <= ctx->code_indent_offset) {
+                    container.contents_indent += line->indent;
+                    line->indent = 0;
+                } else {
+                    container.contents_indent += 1;
+                    line->indent--;
+                }
+
+                if(n_brothers + n_children == 0)
+                    pivot_line = &md_dummy_blank_line;
+
+                if(n_children == 0)
+                    MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers));
+
+                n_children++;
+                MD_CHECK(md_push_container(ctx, &container));
+                continue;
+            }
+        }
+
+        /* Check whether we are table continuation. */
+        if(pivot_line->type == MD_LINE_TABLE  &&  n_parents == ctx->n_containers) {
+            line->type = MD_LINE_TABLE;
+            break;
+        }
+
+        /* Check for ATX header. */
+        if(line->indent < ctx->code_indent_offset  &&
+                off < ctx->size  &&  CH(off) == _T('#'))
+        {
+            unsigned level;
+
+            if(md_is_atxheader_line(ctx, off, &line->beg, &off, &level)) {
+                line->type = MD_LINE_ATXHEADER;
+                line->data = level;
+                break;
+            }
+        }
+
+        /* Check whether we are starting code fence. */
+        if(off < ctx->size  &&  ISANYOF2(off, _T('`'), _T('~'))) {
+            if(md_is_opening_code_fence(ctx, off, &off)) {
+                line->type = MD_LINE_FENCEDCODE;
+                line->data = 1;
+                break;
+            }
+        }
+
+        /* Check for start of raw HTML block. */
+        if(off < ctx->size  &&  CH(off) == _T('<')
+            &&  !(ctx->parser.flags & MD_FLAG_NOHTMLBLOCKS))
+        {
+            ctx->html_block_type = md_is_html_block_start_condition(ctx, off);
+
+            /* HTML block type 7 cannot interrupt paragraph. */
+            if(ctx->html_block_type == 7  &&  pivot_line->type == MD_LINE_TEXT)
+                ctx->html_block_type = 0;
+
+            if(ctx->html_block_type > 0) {
+                /* The line itself also may immediately close the block. */
+                if(md_is_html_block_end_condition(ctx, off, &off) == ctx->html_block_type) {
+                    /* Make sure this is the last line of the block. */
+                    ctx->html_block_type = 0;
+                }
+
+                line->type = MD_LINE_HTML;
+                break;
+            }
+        }
+
+        /* Check for table underline. */
+        if((ctx->parser.flags & MD_FLAG_TABLES)  &&  pivot_line->type == MD_LINE_TEXT
+            &&  off < ctx->size  &&  ISANYOF3(off, _T('|'), _T('-'), _T(':'))
+            &&  n_parents == ctx->n_containers)
+        {
+            unsigned col_count;
+
+            if(ctx->current_block != NULL  &&  ctx->current_block->n_lines == 1  &&
+                md_is_table_underline(ctx, off, &off, &col_count))
+            {
+                line->data = col_count;
+                line->type = MD_LINE_TABLEUNDERLINE;
+                break;
+            }
+        }
+
+        /* check for TOC mark */
+        if(ctx->parser.toc_options.toc_placeholder != NULL  &&  !ctx->toc_found  &&
+            md_is_toc_line(ctx, off, &line->beg, &off)) 
+        {
+                line->type = MD_LINE_TOC;
+                ctx->toc_found = TRUE;
+                break;
+        }
+
+        /* By default, we are normal text line. */
+        line->type = MD_LINE_TEXT;
+        if(pivot_line->type == MD_LINE_TEXT  &&  n_brothers + n_children == 0) {
+            /* Lazy continuation. */
+            n_parents = ctx->n_containers;
+        }
+
+        /* Check for task mark. */
+        if((ctx->parser.flags & MD_FLAG_TASKLISTS)  &&  n_brothers + n_children > 0  &&
+           ISANYOF_(ctx->containers[ctx->n_containers-1].ch, _T("-+*.)")))
+        {
+            OFF tmp = off;
+
+            while(tmp < ctx->size  &&  tmp < off + 3  &&  ISBLANK(tmp))
+                tmp++;
+            if(tmp + 2 < ctx->size  &&  CH(tmp) == _T('[')  &&
+               ISANYOF(tmp+1, _T("xX "))  &&  CH(tmp+2) == _T(']')  &&
+               (tmp + 3 == ctx->size  ||  ISBLANK(tmp+3)  ||  ISNEWLINE(tmp+3)))
+            {
+                MD_CONTAINER* task_container = (n_children > 0 ? &ctx->containers[ctx->n_containers-1] : &container);
+                task_container->is_task = TRUE;
+                task_container->task_mark_off = tmp + 1;
+                off = tmp + 3;
+                while(off < ctx->size && ISWHITESPACE(off))
+                    off++;
+                if (off == ctx->size) break;
+                line->beg = off;
+            }
+        }
+
+        break;
+    }
+
+    /* Scan for end of the line.
+     *
+     * Note this is quite a bottleneck of the parsing as we here iterate almost
+     * over compete document.
+     */
+#if defined __linux__ && !defined MD4C_USE_UTF16
+    /* Recent glibc versions have superbly optimized strcspn(), even using
+     * vectorization if available. */
+    if(ctx->doc_ends_with_newline  &&  off < ctx->size) {
+        while(TRUE) {
+            off += (OFF) strcspn(STR(off), "\r\n");
+
+            /* strcspn() can stop on zero terminator; but that can appear
+             * anywhere in the Markfown input... */
+            if(CH(off) == _T('\0'))
+                off++;
+            else
+                break;
+        }
+    } else
+#endif
+    {
+        /* Optimization: Use some loop unrolling. */
+        while(off + 3 < ctx->size  &&  !ISNEWLINE(off+0)  &&  !ISNEWLINE(off+1)
+                                   &&  !ISNEWLINE(off+2)  &&  !ISNEWLINE(off+3))
+            off += 4;
+        while(off < ctx->size  &&  !ISNEWLINE(off))
+            off++;
+    }
+
+    /* Set end of the line. */
+    line->end = off;
+
+    /* But for ATX header, we should exclude the optional trailing mark. */
+    if(line->type == MD_LINE_ATXHEADER) {
+        OFF tmp = line->end;
+        while(tmp > line->beg && CH(tmp-1) == _T(' '))
+            tmp--;
+        while(tmp > line->beg && CH(tmp-1) == _T('#'))
+            tmp--;
+        if(tmp == line->beg || CH(tmp-1) == _T(' ') || (ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS))
+            line->end = tmp;
+    }
+
+    /* Trim trailing spaces. */
+    if(line->type != MD_LINE_INDENTEDCODE  &&  line->type != MD_LINE_FENCEDCODE) {
+        while(line->end > line->beg && CH(line->end-1) == _T(' '))
+            line->end--;
+    }
+
+    /* Eat also the new line. */
+    if(off < ctx->size && CH(off) == _T('\r'))
+        off++;
+    if(off < ctx->size && CH(off) == _T('\n'))
+        off++;
+
+    *p_end = off;
+
+    /* If we belong to a list after seeing a blank line, the list is loose. */
+    if(prev_line_has_list_loosening_effect  &&  line->type != MD_LINE_BLANK  &&  n_parents + n_brothers > 0) {
+        MD_CONTAINER* c = &ctx->containers[n_parents + n_brothers - 1];
+        if(c->ch != _T('>')) {
+            MD_BLOCK* block = (MD_BLOCK*) (((char*)ctx->block_bytes) + c->block_byte_off);
+            block->flags |= MD_BLOCK_LOOSE_LIST;
+        }
+    }
+
+    /* Leave any containers we are not part of anymore. */
+    if(n_children == 0  &&  n_parents + n_brothers < ctx->n_containers)
+        MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers));
+
+    /* Enter any container we found a mark for. */
+    if(n_brothers > 0) {
+        MD_ASSERT(n_brothers == 1);
+        MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
+                    ctx->containers[n_parents].task_mark_off,
+                    (ctx->containers[n_parents].is_task ? CH(ctx->containers[n_parents].task_mark_off) : 0),
+                    MD_BLOCK_CONTAINER_CLOSER));
+        MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
+                    container.task_mark_off,
+                    (container.is_task ? CH(container.task_mark_off) : 0),
+                    MD_BLOCK_CONTAINER_OPENER));
+        ctx->containers[n_parents].is_task = container.is_task;
+        ctx->containers[n_parents].task_mark_off = container.task_mark_off;
+    }
+
+    if(n_children > 0)
+        MD_CHECK(md_enter_child_containers(ctx, n_children));
+
+abort:
+    return ret;
+}
+
+static int
+md_process_line(MD_CTX* ctx, const MD_LINE_ANALYSIS** p_pivot_line, MD_LINE_ANALYSIS* line)
+{
+    const MD_LINE_ANALYSIS* pivot_line = *p_pivot_line;
+    int ret = 0;
+
+    /* Blank line ends current leaf block. */
+    if(line->type == MD_LINE_BLANK) {
+        MD_CHECK(md_end_current_block(ctx));
+        *p_pivot_line = &md_dummy_blank_line;
+        return 0;
+    }
+
+    /* Some line types form block on their own. */
+    if(line->type == MD_LINE_HR || line->type == MD_LINE_ATXHEADER) {
+        MD_CHECK(md_end_current_block(ctx));
+
+        /* Add our single-line block. */
+        MD_CHECK(md_start_new_block(ctx, line));
+        MD_CHECK(md_add_line_into_current_block(ctx, line));
+        MD_CHECK(md_end_current_block(ctx));
+        *p_pivot_line = &md_dummy_blank_line;
+        return 0;
+    }
+
+    /* MD_LINE_SETEXTUNDERLINE changes meaning of the current block and ends it. */
+    if(line->type == MD_LINE_SETEXTUNDERLINE) {
+        MD_ASSERT(ctx->current_block != NULL);
+        ctx->current_block->type = MD_BLOCK_H;
+        ctx->current_block->data = line->data;
+        ctx->current_block->flags |= MD_BLOCK_SETEXT_HEADER;
+        MD_CHECK(md_add_line_into_current_block(ctx, line));
+        MD_CHECK(md_end_current_block(ctx));
+        if(ctx->current_block == NULL) {
+            *p_pivot_line = &md_dummy_blank_line;
+        } else {
+            /* This happens if we have consumed all the body as link ref. defs.
+             * and downgraded the underline into start of a new paragraph block. */
+            line->type = MD_LINE_TEXT;
+            *p_pivot_line = line;
+        }
+        return 0;
+    }
+
+    /* MD_LINE_TABLEUNDERLINE changes meaning of the current block. */
+    if(line->type == MD_LINE_TABLEUNDERLINE) {
+        MD_ASSERT(ctx->current_block != NULL);
+        MD_ASSERT(ctx->current_block->n_lines == 1);
+        ctx->current_block->type = MD_BLOCK_TABLE;
+        ctx->current_block->data = line->data;
+        MD_ASSERT(pivot_line != &md_dummy_blank_line);
+        ((MD_LINE_ANALYSIS*)pivot_line)->type = MD_LINE_TABLE;
+        MD_CHECK(md_add_line_into_current_block(ctx, line));
+        return 0;
+    }
+
+    /* The current block also ends if the line has different type. */
+    if(line->type != pivot_line->type)
+        MD_CHECK(md_end_current_block(ctx));
+
+    /* The current line may start a new block. */
+    if(ctx->current_block == NULL) {
+        MD_CHECK(md_start_new_block(ctx, line));
+        *p_pivot_line = line;
+    }
+
+    /* In all other cases the line is just a continuation of the current block. */
+    MD_CHECK(md_add_line_into_current_block(ctx, line));
+
+abort:
+    return ret;
+}
+
+static int
+md_output_toc(MD_CTX *ctx)
+{
+    MD_HEADING_DEF *hd;
+    MD_BLOCK_LI_DETAIL li_det = {0};
+
+    MD_ATTRIBUTE_BUILD href_build = {0};
+    MD_ATTRIBUTE_BUILD title_build = {0};
+    MD_SPAN_A_DETAIL a_det;
+    int ret = 0;
+    int level = 0;
+    int i;
+
+    for (i = 0; i < ctx->n_heading_defs; ++i){
+        hd = &ctx->heading_defs[i];
+        while (hd->level > level){
+            ++level;
+            if (level <= ctx->parser.toc_options.depth)
+                MD_ENTER_BLOCK(MD_BLOCK_UL, NULL);
+        }
+        while (hd->level < level){
+            if (level <= ctx->parser.toc_options.depth)
+                MD_LEAVE_BLOCK(MD_BLOCK_UL, NULL);
+            --level;
+        }
+
+        if (level <= ctx->parser.toc_options.depth){
+            MD_ENTER_BLOCK(MD_BLOCK_LI, &li_det);
+            memset(&a_det, 0, sizeof(MD_SPAN_A_DETAIL));
+            if (hd->postfix == 0){
+                MD_CHECK(md_build_attribute(ctx, hd->identifier, hd->ident_size,
+                                            MD_BUILD_ATTR_NO_ESCAPES,
+                                            &a_det.href, &href_build));
+            } else {
+                MD_CHECK(md_build_attribute_postfix(ctx,
+                                                    hd->identifier, hd->ident_size,
+                                                    hd->postfix, &a_det.href, &href_build));
+            }
+
+            MD_CHECK(md_build_attribute(ctx, NULL, 0, 0, &a_det.title, &title_build));
+
+            MD_ENTER_SPAN(MD_SPAN_A, &a_det);
+
+            MD_TEXT(MD_TEXT_NORMAL, hd->heading, hd->heading_size);
+            MD_LEAVE_SPAN(MD_SPAN_A, NULL);
+            MD_LEAVE_BLOCK(MD_BLOCK_LI, NULL);
+        }
+      
+    }
+
+    // close remaining opened level
+    while (level > 0){
+        if (level <= ctx->parser.toc_options.depth)
+            MD_LEAVE_BLOCK(MD_BLOCK_UL, NULL);
+        --level;
+    }
+
+abort:
+    md_free_attribute(ctx, &href_build);
+    md_free_attribute(ctx, &title_build);
+    return ret;
+}
+
+static int
+md_process_doc(MD_CTX *ctx)
+{
+    const MD_LINE_ANALYSIS* pivot_line = &md_dummy_blank_line;
+    MD_LINE_ANALYSIS line_buf[2];
+    MD_LINE_ANALYSIS* line = &line_buf[0];
+    OFF off = 0;
+    int ret = 0;
+
+    MD_ENTER_BLOCK(MD_BLOCK_DOC, NULL);
+
+    while(off < ctx->size) {
+        if(line == pivot_line)
+            line = (line == &line_buf[0] ? &line_buf[1] : &line_buf[0]);
+
+        MD_CHECK(md_analyze_line(ctx, off, &off, pivot_line, line));
+        MD_CHECK(md_process_line(ctx, &pivot_line, line));
+    }
+
+    md_end_current_block(ctx);
+
+    if(ctx->parser.flags & MD_FLAG_HEADINGAUTOID) {
+        MD_CHECK(md_build_heading_def_hashtable(ctx));
+    }
+    MD_CHECK(md_build_ref_def_hashtable(ctx));
+
+    /* Output the TOC */
+    if(ctx->parser.toc_options.depth > 0 && !ctx->toc_found) {
+        MD_ENTER_BLOCK(MD_BLOCK_NAV, NULL);
+        MD_CHECK(md_output_toc(ctx));
+        MD_LEAVE_BLOCK(MD_BLOCK_NAV, NULL);
+    }
+
+    /* Process all blocks. */
+    MD_CHECK(md_leave_child_containers(ctx, 0));
+    MD_CHECK(md_process_all_blocks(ctx));
+
+    MD_LEAVE_BLOCK(MD_BLOCK_DOC, NULL);
+
+abort:
+
+#if 0
+    /* Output some memory consumption statistics. */
+    {
+        char buffer[256];
+        sprintf(buffer, "Alloced %u bytes for block buffer.",
+                    (unsigned)(ctx->alloc_block_bytes));
+        MD_LOG(buffer);
+
+        sprintf(buffer, "Alloced %u bytes for containers buffer.",
+                    (unsigned)(ctx->alloc_containers * sizeof(MD_CONTAINER)));
+        MD_LOG(buffer);
+
+        sprintf(buffer, "Alloced %u bytes for marks buffer.",
+                    (unsigned)(ctx->alloc_marks * sizeof(MD_MARK)));
+        MD_LOG(buffer);
+
+        sprintf(buffer, "Alloced %u bytes for aux. buffer.",
+                    (unsigned)(ctx->alloc_buffer * sizeof(MD_CHAR)));
+        MD_LOG(buffer);
+
+        sprintf(buffer, "Alloced %u bytes for reference definition buffer.",
+                    (unsigned)(ctx->alloc_ref_defs * sizeof(MD_REF_DEF)));
+        MD_LOG(buffer);
+
+        sprintf(buffer, "Alloced %u bytes for identifiers buffer.",
+                    (unsigned)(ctx->alloc_identifiers * sizeof(MD_CHAR)));
+        MD_LOG(buffer);
+
+        sprintf(buffer, "Alloced %u bytes for heading definition buffer.",
+                    (unsigned)(ctx->alloc_heading_defs * sizeof(MD_HEADING_DEF)));
+        MD_LOG(buffer);
+
+    }
+#endif
+
+    return ret;
+}
+
+
+/********************
+ ***  Public API  ***
+ ********************/
+
+int
+md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata)
+{
+    MD_CTX ctx;
+    int i;
+    int ret;
+
+    if(parser->abi_version != 1) {
+        if(parser->debug_log != NULL)
+            parser->debug_log("Unsupported abi_version.", userdata);
+        return -1;
+    }
+
+    /* Setup context structure. */
+    memset(&ctx, 0, sizeof(MD_CTX));
+    ctx.text = text;
+    ctx.size = size;
+    memcpy(&ctx.parser, parser, sizeof(MD_PARSER));
+    ctx.userdata = userdata;
+    ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? (OFF)(-1) : 4;
+    md_build_mark_char_map(&ctx);
+    ctx.doc_ends_with_newline = (size > 0  &&  ISNEWLINE_(text[size-1]));
+
+    /* Reset all unresolved opener mark chains. */
+    for(i = 0; i < (int) SIZEOF_ARRAY(ctx.mark_chains); i++) {
+        ctx.mark_chains[i].head = -1;
+        ctx.mark_chains[i].tail = -1;
+    }
+    ctx.unresolved_link_head = -1;
+    ctx.unresolved_link_tail = -1;
+
+    /* All the work. */
+    ret = md_process_doc(&ctx);
+
+    /* Clean-up. */
+    md_free_heading_defs(&ctx);
+    md_free_heading_def_hashtable(&ctx);
+    free(ctx.identifiers);
+    md_free_ref_defs(&ctx);
+    md_free_ref_def_hashtable(&ctx);
+    free(ctx.buffer);
+    free(ctx.marks);
+    free(ctx.block_bytes);
+    free(ctx.containers);
+
+    return ret;
+}

+ 430 - 0
markdown.mod/md4c/src/md4c.h

@@ -0,0 +1,430 @@
+/*
+ * MD4C: Markdown parser for C
+ * (http://github.com/mity/md4c)
+ *
+ * Copyright (c) 2016-2020 Martin Mitas
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef MD4C_H
+#define MD4C_H
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+#if defined MD4C_USE_UTF16
+    /* Magic to support UTF-16. Note that in order to use it, you have to define
+     * the macro MD4C_USE_UTF16 both when building MD4C as well as when
+     * including this header in your code. */
+    #ifdef _WIN32
+        #include <windows.h>
+        typedef WCHAR       MD_CHAR;
+    #else
+        #error MD4C_USE_UTF16 is only supported on Windows.
+    #endif
+#else
+    typedef char            MD_CHAR;
+#endif
+
+typedef unsigned MD_SIZE;
+typedef unsigned MD_OFFSET;
+
+
+/* Block represents a part of document hierarchy structure like a paragraph
+ * or list item.
+ */
+typedef enum MD_BLOCKTYPE {
+    /* <body>...</body> */
+    MD_BLOCK_DOC = 0,
+
+    /* <blockquote>...</blockquote> */
+    MD_BLOCK_QUOTE,
+
+    /* <ul>...</ul>
+     * Detail: Structure MD_BLOCK_UL_DETAIL. */
+    MD_BLOCK_UL,
+
+    /* <ol>...</ol>
+     * Detail: Structure MD_BLOCK_OL_DETAIL. */
+    MD_BLOCK_OL,
+
+    /* <li>...</li>
+     * Detail: Structure MD_BLOCK_LI_DETAIL. */
+    MD_BLOCK_LI,
+
+    /* <hr> */
+    MD_BLOCK_HR,
+
+    /* <h1>...</h1> (for levels up to 6)
+     * Detail: Structure MD_BLOCK_H_DETAIL. */
+    MD_BLOCK_H,
+
+    /* <pre><code>...</code></pre>
+     * Note the text lines within code blocks are terminated with '\n'
+     * instead of explicit MD_TEXT_BR. */
+    MD_BLOCK_CODE,
+
+    /* Raw HTML block. This itself does not correspond to any particular HTML
+     * tag. The contents of it _is_ raw HTML source intended to be put
+     * in verbatim form to the HTML output. */
+    MD_BLOCK_HTML,
+
+    /* <p>...</p> */
+    MD_BLOCK_P,
+
+    /* <table>...</table> and its contents.
+     * Detail: Structure MD_BLOCK_TABLE_DETAIL (for MD_BLOCK_TABLE),
+     *         structure MD_BLOCK_TD_DETAIL (for MD_BLOCK_TH and MD_BLOCK_TD)
+     * Note all of these are used only if extension MD_FLAG_TABLES is enabled. */
+    MD_BLOCK_TABLE,
+    MD_BLOCK_THEAD,
+    MD_BLOCK_TBODY,
+    MD_BLOCK_TR,
+    MD_BLOCK_TH,
+    MD_BLOCK_TD,
+    MD_BLOCK_NAV
+} MD_BLOCKTYPE;
+
+/* Span represents an in-line piece of a document which should be rendered with
+ * the same font, color and other attributes. A sequence of spans forms a block
+ * like paragraph or list item. */
+typedef enum MD_SPANTYPE {
+    /* <em>...</em> */
+    MD_SPAN_EM,
+
+    /* <strong>...</strong> */
+    MD_SPAN_STRONG,
+
+    /* <a href="xxx">...</a>
+     * Detail: Structure MD_SPAN_A_DETAIL. */
+    MD_SPAN_A,
+
+    /* <img src="xxx">...</a>
+     * Detail: Structure MD_SPAN_IMG_DETAIL.
+     * Note: Image text can contain nested spans and even nested images.
+     * If rendered into ALT attribute of HTML <IMG> tag, it's responsibility
+     * of the parser to deal with it.
+     */
+    MD_SPAN_IMG,
+
+    /* <code>...</code> */
+    MD_SPAN_CODE,
+
+    /* <del>...</del>
+     * Note: Recognized only when MD_FLAG_STRIKETHROUGH is enabled.
+     */
+    MD_SPAN_DEL,
+
+    /* For recognizing inline ($) and display ($$) equations
+     * Note: Recognized only when MD_FLAG_LATEXMATHSPANS is enabled.
+     */
+    MD_SPAN_LATEXMATH,
+    MD_SPAN_LATEXMATH_DISPLAY,
+
+    /* Wiki links
+     * Note: Recognized only when MD_FLAG_WIKILINKS is enabled.
+     */
+    MD_SPAN_WIKILINK,
+
+    /* <u>...</u>
+     * Note: Recognized only when MD_FLAG_UNDERLINE is enabled. */
+    MD_SPAN_U
+} MD_SPANTYPE;
+
+/* Text is the actual textual contents of span. */
+typedef enum MD_TEXTTYPE {
+    /* Normal text. */
+    MD_TEXT_NORMAL = 0,
+
+    /* NULL character. CommonMark requires replacing NULL character with
+     * the replacement char U+FFFD, so this allows caller to do that easily. */
+    MD_TEXT_NULLCHAR,
+
+    /* Line breaks.
+     * Note these are not sent from blocks with verbatim output (MD_BLOCK_CODE
+     * or MD_BLOCK_HTML). In such cases, '\n' is part of the text itself. */
+    MD_TEXT_BR,         /* <br> (hard break) */
+    MD_TEXT_SOFTBR,     /* '\n' in source text where it is not semantically meaningful (soft break) */
+
+    /* Entity.
+     * (a) Named entity, e.g. &nbsp; 
+     *     (Note MD4C does not have a list of known entities.
+     *     Anything matching the regexp /&[A-Za-z][A-Za-z0-9]{1,47};/ is
+     *     treated as a named entity.)
+     * (b) Numerical entity, e.g. &#1234;
+     * (c) Hexadecimal entity, e.g. &#x12AB;
+     *
+     * As MD4C is mostly encoding agnostic, application gets the verbatim
+     * entity text into the MD_PARSER::text_callback(). */
+    MD_TEXT_ENTITY,
+
+    /* Text in a code block (inside MD_BLOCK_CODE) or inlined code (`code`).
+     * If it is inside MD_BLOCK_CODE, it includes spaces for indentation and
+     * '\n' for new lines. MD_TEXT_BR and MD_TEXT_SOFTBR are not sent for this
+     * kind of text. */
+    MD_TEXT_CODE,
+
+    /* Text is a raw HTML. If it is contents of a raw HTML block (i.e. not
+     * an inline raw HTML), then MD_TEXT_BR and MD_TEXT_SOFTBR are not used.
+     * The text contains verbatim '\n' for the new lines. */
+    MD_TEXT_HTML,
+
+    /* Text is inside an equation. This is processed the same way as inlined code
+     * spans (`code`). */
+    MD_TEXT_LATEXMATH
+} MD_TEXTTYPE;
+
+
+/* Alignment enumeration. */
+typedef enum MD_ALIGN {
+    MD_ALIGN_DEFAULT = 0,   /* When unspecified. */
+    MD_ALIGN_LEFT,
+    MD_ALIGN_CENTER,
+    MD_ALIGN_RIGHT
+} MD_ALIGN;
+
+
+/* String attribute.
+ *
+ * This wraps strings which are outside of a normal text flow and which are
+ * propagated within various detailed structures, but which still may contain
+ * string portions of different types like e.g. entities.
+ *
+ * So, for example, lets consider this image:
+ *
+ *     ![image alt text](http://example.org/image.png 'foo &quot; bar')
+ *
+ * The image alt text is propagated as a normal text via the MD_PARSER::text()
+ * callback. However, the image title ('foo &quot; bar') is propagated as
+ * MD_ATTRIBUTE in MD_SPAN_IMG_DETAIL::title.
+ *
+ * Then the attribute MD_SPAN_IMG_DETAIL::title shall provide the following:
+ *  -- [0]: "foo "   (substr_types[0] == MD_TEXT_NORMAL; substr_offsets[0] == 0)
+ *  -- [1]: "&quot;" (substr_types[1] == MD_TEXT_ENTITY; substr_offsets[1] == 4)
+ *  -- [2]: " bar"   (substr_types[2] == MD_TEXT_NORMAL; substr_offsets[2] == 10)
+ *  -- [3]: (n/a)    (n/a                              ; substr_offsets[3] == 14)
+ *
+ * Note that these invariants are always guaranteed:
+ *  -- substr_offsets[0] == 0
+ *  -- substr_offsets[LAST+1] == size
+ *  -- Currently, only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR
+ *     substrings can appear. This could change only of the specification
+ *     changes.
+ */
+typedef struct MD_ATTRIBUTE {
+    const MD_CHAR* text;
+    MD_SIZE size;
+    const MD_TEXTTYPE* substr_types;
+    const MD_OFFSET* substr_offsets;
+} MD_ATTRIBUTE;
+
+
+/* Detailed info for MD_BLOCK_UL. */
+typedef struct MD_BLOCK_UL_DETAIL {
+    int is_tight;           /* Non-zero if tight list, zero if loose. */
+    MD_CHAR mark;           /* Item bullet character in MarkDown source of the list, e.g. '-', '+', '*'. */
+} MD_BLOCK_UL_DETAIL;
+
+/* Detailed info for MD_BLOCK_OL. */
+typedef struct MD_BLOCK_OL_DETAIL {
+    unsigned start;         /* Start index of the ordered list. */
+    int is_tight;           /* Non-zero if tight list, zero if loose. */
+    MD_CHAR mark_delimiter; /* Character delimiting the item marks in MarkDown source, e.g. '.' or ')' */
+} MD_BLOCK_OL_DETAIL;
+
+/* Detailed info for MD_BLOCK_LI. */
+typedef struct MD_BLOCK_LI_DETAIL {
+    int is_task;            /* Can be non-zero only with MD_FLAG_TASKLISTS */
+    MD_CHAR task_mark;      /* If is_task, then one of 'x', 'X' or ' '. Undefined otherwise. */
+    MD_OFFSET task_mark_offset;  /* If is_task, then offset in the input of the char between '[' and ']'. */
+} MD_BLOCK_LI_DETAIL;
+
+/* Detailed info for MD_BLOCK_H. */
+typedef struct MD_BLOCK_H_DETAIL {
+    unsigned level;         /* Header level (1 - 6) */
+    MD_ATTRIBUTE identifier;  /* identifier, eg {#some-id} or autogenerated from the heading text*/
+} MD_BLOCK_H_DETAIL;
+
+/* Detailed info for MD_BLOCK_CODE. */
+typedef struct MD_BLOCK_CODE_DETAIL {
+    MD_ATTRIBUTE info;
+    MD_ATTRIBUTE lang;
+    MD_CHAR fence_char;     /* The character used for fenced code block; or zero for indented code block. */
+} MD_BLOCK_CODE_DETAIL;
+
+/* Detailed info for MD_BLOCK_TABLE. */
+typedef struct MD_BLOCK_TABLE_DETAIL {
+    unsigned col_count;         /* Count of columns in the table. */
+    unsigned head_row_count;    /* Count of rows in the table header (currently always 1) */
+    unsigned body_row_count;    /* Count of rows in the table body */
+} MD_BLOCK_TABLE_DETAIL;
+
+/* Detailed info for MD_BLOCK_TH and MD_BLOCK_TD. */
+typedef struct MD_BLOCK_TD_DETAIL {
+    MD_ALIGN align;
+} MD_BLOCK_TD_DETAIL;
+
+/* Detailed info for MD_SPAN_A. */
+typedef struct MD_SPAN_A_DETAIL {
+    MD_ATTRIBUTE href;
+    MD_ATTRIBUTE title;
+} MD_SPAN_A_DETAIL;
+
+/* Detailed info for MD_SPAN_IMG. */
+typedef struct MD_SPAN_IMG_DETAIL {
+    MD_ATTRIBUTE src;
+    MD_ATTRIBUTE title;
+} MD_SPAN_IMG_DETAIL;
+
+/* Detailed info for MD_SPAN_WIKILINK. */
+typedef struct MD_SPAN_WIKILINK {
+    MD_ATTRIBUTE target;
+} MD_SPAN_WIKILINK_DETAIL;
+
+/* Flags specifying extensions/deviations from CommonMark specification.
+ *
+ * By default (when MD_PARSER::flags == 0), we follow CommonMark specification.
+ * The following flags may allow some extensions or deviations from it.
+ */
+#define MD_FLAG_COLLAPSEWHITESPACE          0x0001  /* In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' */
+#define MD_FLAG_PERMISSIVEATXHEADERS        0x0002  /* Do not require space in ATX headers ( ###header ) */
+#define MD_FLAG_PERMISSIVEURLAUTOLINKS      0x0004  /* Recognize URLs as autolinks even without '<', '>' */
+#define MD_FLAG_PERMISSIVEEMAILAUTOLINKS    0x0008  /* Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */
+#define MD_FLAG_NOINDENTEDCODEBLOCKS        0x0010  /* Disable indented code blocks. (Only fenced code works.) */
+#define MD_FLAG_NOHTMLBLOCKS                0x0020  /* Disable raw HTML blocks. */
+#define MD_FLAG_NOHTMLSPANS                 0x0040  /* Disable raw HTML (inline). */
+#define MD_FLAG_TABLES                      0x0100  /* Enable tables extension. */
+#define MD_FLAG_STRIKETHROUGH               0x0200  /* Enable strikethrough extension. */
+#define MD_FLAG_PERMISSIVEWWWAUTOLINKS      0x0400  /* Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */
+#define MD_FLAG_TASKLISTS                   0x0800  /* Enable task list extension. */
+#define MD_FLAG_LATEXMATHSPANS              0x1000  /* Enable $ and $$ containing LaTeX equations. */
+#define MD_FLAG_WIKILINKS                   0x2000  /* Enable wiki links extension. */
+#define MD_FLAG_UNDERLINE                   0x4000  /* Enable underline extension (and disables '_' for normal emphasis). */
+#define MD_FLAG_HEADINGAUTOID               0x8000  /* Enable header auto identifiers like github. */
+
+#define MD_FLAG_PERMISSIVEAUTOLINKS         (MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS)
+#define MD_FLAG_NOHTML                      (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS)
+
+/* Convenient sets of flags corresponding to well-known Markdown dialects.
+ *
+ * Note we may only support subset of features of the referred dialect.
+ * The constant just enables those extensions which bring us as close as
+ * possible given what features we implement.
+ *
+ * ABI compatibility note: Meaning of these can change in time as new
+ * extensions, bringing the dialect closer to the original, are implemented.
+ */
+#define MD_DIALECT_COMMONMARK               0
+#define MD_DIALECT_GITHUB                   (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH | MD_FLAG_TASKLISTS | MD_FLAG_HEADINGAUTOID)
+
+/* Table of content option structure
+ */
+typedef struct MD_TOC_OPTIONS {
+  /* Specify the maximum level of heading to include in the table of contents. 
+   * a value of 0 disable Table of content generation
+   */
+  int depth;
+
+  /* Specify a table of content placeholder.
+   * 
+   * Providing a empty or NULL placeholder will output the TOC at document start. 
+   */
+  const MD_CHAR* toc_placeholder;
+
+} MD_TOC_OPTIONS;
+
+/* Parser structure.
+ */
+typedef struct MD_PARSER {
+    /* Reserved. Set to 1.
+     */
+    unsigned abi_version;
+
+    /* Dialect options. Bitmask of MD_FLAG_xxxx values.
+     */
+    unsigned flags;
+
+    /* Caller-provided rendering callbacks.
+     *
+     * For some block/span types, more detailed information is provided in a
+     * type-specific structure pointed by the argument 'detail'.
+     *
+     * The last argument of all callbacks, 'userdata', is just propagated from
+     * md_parse() and is available for any use by the application.
+     *
+     * Note any strings provided to the callbacks as their arguments or as
+     * members of any detail structure are generally not zero-terminated.
+     * Application has to take the respective size information into account.
+     *
+     * Any rendering callback may abort further parsing of the document by
+     * returning non-zero.
+     */
+    int (*enter_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
+    int (*leave_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
+
+    int (*enter_span)(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
+    int (*leave_span)(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
+
+    int (*text)(MD_TEXTTYPE /*type*/, const MD_CHAR* /*text*/, MD_SIZE /*size*/, void* /*userdata*/);
+
+    /* Debug callback. Optional (may be NULL).
+     *
+     * If provided and something goes wrong, this function gets called.
+     * This is intended for debugging and problem diagnosis for developers;
+     * it is not intended to provide any errors suitable for displaying to an
+     * end user.
+     */
+    void (*debug_log)(const char* /*msg*/, void* /*userdata*/);
+
+    /* Table of content parameters
+     *  
+     * 
+     */
+    MD_TOC_OPTIONS toc_options;
+
+    /* Reserved. Set to NULL.
+     */
+    void (*syntax)(void);
+} MD_PARSER;
+
+
+/* For backward compatibility. Do not use in new code.
+ */
+typedef MD_PARSER MD_RENDERER;
+
+
+/* Parse the Markdown document stored in the string 'text' of size 'size'.
+ * The parser provides callbacks to be called during the parsing so the
+ * caller can render the document on the screen or convert the Markdown
+ * to another format.
+ *
+ * Zero is returned on success. If a runtime error occurs (e.g. a memory
+ * fails), -1 is returned. If the processing is aborted due any callback
+ * returning non-zero, the return value of the callback is returned.
+ */
+int md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata);
+
+
+#ifdef __cplusplus
+    }  /* extern "C" { */
+#endif
+
+#endif  /* MD4C_H */

+ 13 - 0
markdown.mod/md4c/src/md4c.pc.in

@@ -0,0 +1,13 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=@CMAKE_INSTALL_PREFIX@
+libdir=${exec_prefix}/@CMAKE_INSTALL_LIBDIR@
+includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
+
+Name: @PROJECT_NAME@
+Description: Markdown parser library with a SAX-like callback-based interface.
+Version: @PROJECT_VERSION@
+URL: @PROJECT_URL@
+
+Requires:
+Libs: -L${libdir} -lmd4c
+Cflags: -I${includedir}

+ 64 - 0
markdown.mod/md4c/test/LICENSE

@@ -0,0 +1,64 @@
+The CommonMark spec (spec.txt) and DTD (CommonMark.dtd) are
+
+Copyright (C) 2014-16 John MacFarlane
+
+Released under the Creative Commons CC-BY-SA 4.0 license:
+<http://creativecommons.org/licenses/by-sa/4.0/>.
+
+---
+
+The test software in test/ and the programs in tools/ are
+
+Copyright (c) 2014, John MacFarlane
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+---
+
+The normalization code in runtests.py was derived from the
+markdowntest project, Copyright 2013 Karl Dubost:
+
+The MIT License (MIT)
+
+Copyright (c) 2013 Karl Dubost
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

+ 40 - 0
markdown.mod/md4c/test/cmark.py

@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+from ctypes import CDLL, c_char_p, c_long
+from subprocess import *
+import platform
+import os
+
+def pipe_through_prog(prog, text):
+    p1 = Popen(prog.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE)
+    [result, err] = p1.communicate(input=text.encode('utf-8'))
+    return [p1.returncode, result.decode('utf-8'), err]
+
+def use_library(lib, text):
+    textbytes = text.encode('utf-8')
+    textlen = len(textbytes)
+    return [0, lib(textbytes, textlen, 0).decode('utf-8'), '']
+
+class CMark:
+    def __init__(self, prog=None, library_dir=None):
+        self.prog = prog
+        if prog:
+            self.to_html = lambda x: pipe_through_prog(prog, x)
+        else:
+            sysname = platform.system()
+            if sysname == 'Darwin':
+                libname = "libcmark.dylib"
+            elif sysname == 'Windows':
+                libname = "cmark.dll"
+            else:
+                libname = "libcmark.so"
+            if library_dir:
+                libpath = os.path.join(library_dir, libname)
+            else:
+                libpath = os.path.join("build", "src", libname)
+            cmark = CDLL(libpath)
+            markdown = cmark.cmark_markdown_to_html
+            markdown.restype = c_char_p
+            markdown.argtypes = [c_char_p, c_long]
+            self.to_html = lambda x: use_library(markdown, x)

+ 522 - 0
markdown.mod/md4c/test/coverage.txt

@@ -0,0 +1,522 @@
+
+# Coverage
+
+This file is just a collection of unit tests not covered elsewhere.
+
+Most notably regression tests, tests improving code coverage and other useful
+things may drop here.
+
+(However any tests requiring any additional command line option, like enabling
+an extension, must be included in their respective files.)
+
+
+## GitHub Issues
+
+### [Issue 2](https://github.com/mity/md4c/issues/2)
+
+Raw HTML block:
+
+```````````````````````````````` example
+<gi att1=tok1 att2=tok2>
+.
+<gi att1=tok1 att2=tok2>
+````````````````````````````````
+
+Inline:
+
+```````````````````````````````` example
+foo <gi att1=tok1 att2=tok2> bar
+.
+<p>foo <gi att1=tok1 att2=tok2> bar</p>
+````````````````````````````````
+
+Inline with a line break:
+
+```````````````````````````````` example
+foo <gi att1=tok1
+att2=tok2> bar
+.
+<p>foo <gi att1=tok1
+att2=tok2> bar</p>
+````````````````````````````````
+
+
+### [Issue 4](https://github.com/mity/md4c/issues/4)
+
+```````````````````````````````` example
+![alt text with *entity* &copy;](img.png 'title')
+.
+<p><img src="img.png" alt="alt text with entity ©" title="title"></p>
+````````````````````````````````
+
+
+### [Issue 9](https://github.com/mity/md4c/issues/9)
+
+```````````````````````````````` example
+> [foo
+> bar]: /url
+>
+> [foo bar]
+.
+<blockquote>
+<p><a href="/url">foo
+bar</a></p>
+</blockquote>
+````````````````````````````````
+
+
+### [Issue 10](https://github.com/mity/md4c/issues/10)
+
+```````````````````````````````` example
+[x]:
+x
+- <?
+
+  x
+.
+<ul>
+<li><?
+
+x
+</li>
+</ul>
+````````````````````````````````
+
+
+### [Issue 11](https://github.com/mity/md4c/issues/11)
+
+```````````````````````````````` example
+x [link](/url "foo &ndash; bar") x
+.
+<p>x <a href="/url" title="foo – bar">link</a> x</p>
+````````````````````````````````
+
+
+### [Issue 14](https://github.com/mity/md4c/issues/14)
+
+```````````````````````````````` example
+a***b* c*
+.
+<p>a*<em><em>b</em> c</em></p>
+````````````````````````````````
+
+
+### [Issue 15](https://github.com/mity/md4c/issues/15)
+
+```````````````````````````````` example
+***b* c*
+.
+<p>*<em><em>b</em> c</em></p>
+````````````````````````````````
+
+
+### [Issue 21](https://github.com/mity/md4c/issues/21)
+
+```````````````````````````````` example
+a*b**c*
+.
+<p>a<em>b**c</em></p>
+````````````````````````````````
+
+
+### [Issue 33](https://github.com/mity/md4c/issues/33)
+
+```````````````````````````````` example
+```&amp;&amp;&amp;&amp;&amp;&amp;&amp;&amp;
+.
+<pre><code class="language-&amp;&amp;&amp;&amp;&amp;&amp;&amp;&amp;"></code></pre>
+````````````````````````````````
+
+
+### [Issue 36](https://github.com/mity/md4c/issues/36)
+
+```````````````````````````````` example
+__x_ _x___
+.
+<p><em><em>x</em> <em>x</em></em>_</p>
+````````````````````````````````
+
+
+### [Issue 39](https://github.com/mity/md4c/issues/39)
+
+```````````````````````````````` example
+[\\]: x
+.
+````````````````````````````````
+
+
+### [Issue 40](https://github.com/mity/md4c/issues/40)
+
+```````````````````````````````` example
+[x](url
+'title'
+)x
+.
+<p><a href="url" title="title">x</a>x</p>
+````````````````````````````````
+
+
+### [Issue 65](https://github.com/mity/md4c/issues/65)
+
+```````````````````````````````` example
+`
+.
+<p>`</p>
+````````````````````````````````
+
+
+### [Issue 74](https://github.com/mity/md4c/issues/74)
+
+```````````````````````````````` example
+[f]:
+-
+    xx
+-
+.
+<pre><code>xx
+</code></pre>
+<ul>
+<li></li>
+</ul>
+````````````````````````````````
+
+
+### [Issue 78](https://github.com/mity/md4c/issues/78)
+
+```````````````````````````````` example
+[SS ẞ]: /url
+[ẞ SS]
+.
+<p><a href="/url">ẞ SS</a></p>
+````````````````````````````````
+
+
+### [Issue 83](https://github.com/mity/md4c/issues/83)
+
+```````````````````````````````` example
+foo
+>
+.
+<p>foo</p>
+<blockquote>
+</blockquote>
+
+````````````````````````````````
+
+
+### [Issue 95](https://github.com/mity/md4c/issues/95)
+
+```````````````````````````````` example
+. foo
+.
+<p>. foo</p>
+````````````````````````````````
+
+
+### [Issue 96](https://github.com/mity/md4c/issues/96)
+
+```````````````````````````````` example
+[ab]: /foo
+[a] [ab] [abc]
+.
+<p>[a] <a href="/foo">ab</a> [abc]</p>
+````````````````````````````````
+
+```````````````````````````````` example
+[a b]: /foo
+[a   b]
+.
+<p><a href="/foo">a   b</a></p>
+````````````````````````````````
+
+
+### [Issue 97](https://github.com/mity/md4c/issues/97)
+
+```````````````````````````````` example
+*a **b c* d**
+.
+<p><em>a <em><em>b c</em> d</em></em></p>
+
+````````````````````````````````
+
+
+### [Issue 100](https://github.com/mity/md4c/issues/100)
+
+```````````````````````````````` example
+<foo@123456789012345678901234567890123456789012345678901234567890123.123456789012345678901234567890123456789012345678901234567890123>
+.
+<p><a href="mailto:foo@123456789012345678901234567890123456789012345678901234567890123.123456789012345678901234567890123456789012345678901234567890123">foo@123456789012345678901234567890123456789012345678901234567890123.123456789012345678901234567890123456789012345678901234567890123</a></p>
+````````````````````````````````
+
+```````````````````````````````` example
+<foo@123456789012345678901234567890123456789012345678901234567890123x.123456789012345678901234567890123456789012345678901234567890123>
+.
+<p>&lt;foo@123456789012345678901234567890123456789012345678901234567890123x.123456789012345678901234567890123456789012345678901234567890123&gt;</p>
+````````````````````````````````
+(Note the `x` here which turns it over the max. allowed length limit.)
+
+
+### [Issue 107](https://github.com/mity/md4c/issues/107)
+
+```````````````````````````````` example
+***foo *bar baz***
+.
+<p>*<strong>foo <em>bar baz</em></strong></p>
+
+````````````````````````````````
+
+
+### [Issue 124](https://github.com/mity/md4c/issues/124)
+
+```````````````````````````````` example
+~~~
+                x
+~~~
+
+~~~
+                 x
+~~~
+.
+<pre><code>                x
+</code></pre>
+<pre><code>                 x
+</code></pre>
+````````````````````````````````
+
+
+### [Issue 131](https://github.com/mity/md4c/issues/131)
+
+```````````````````````````````` example
+[![alt][img]][link]
+
+[img]: img_url
+[link]: link_url
+.
+<p><a href="link_url"><img src="img_url" alt="alt"></a></p>
+````````````````````````````````
+
+
+### [Issue 142](https://github.com/mity/md4c/issues/142)
+
+```````````````````````````````` example
+[fooﬗ]: /url
+[fooﬕ]
+.
+<p>[fooﬕ]</p>
+````````````````````````````````
+
+
+### [Issue 149](https://github.com/mity/md4c/issues/149)
+
+```````````````````````````````` example
+- <script>
+- foo
+bar
+</script>
+.
+<ul>
+<li><script>
+</li>
+<li>foo
+bar
+</script></li>
+</ul>
+````````````````````````````````
+
+
+## Code coverage
+
+### `md_is_unicode_whitespace__()`
+
+Unicode whitespace (here U+2000) forms a word boundary so these cannot be
+resolved as emphasis span because there is no closer mark.
+
+```````````````````````````````` example
+*foo *bar
+.
+<p>*foo *bar</p>
+````````````````````````````````
+
+
+### `md_is_unicode_punct__()`
+
+Ditto for Unicode punctuation (here U+00A1).
+
+```````````````````````````````` example
+*foo¡*bar
+.
+<p>*foo¡*bar</p>
+````````````````````````````````
+
+
+### `md_get_unicode_fold_info()`
+
+```````````````````````````````` example
+[Příliš žluťoučký kůň úpěl ďábelské ódy.]
+
+[PŘÍLIŠ ŽLUŤOUČKÝ KŮŇ ÚPĚL ĎÁBELSKÉ ÓDY.]: /url
+.
+<p><a href="/url">Příliš žluťoučký kůň úpěl ďábelské ódy.</a></p>
+````````````````````````````````
+
+
+### `md_decode_utf8__()` and `md_decode_utf8_before__()`
+
+```````````````````````````````` example
+á*Á (U+00E1, i.e. two byte UTF-8 sequence)
+ *  (U+2000, i.e. three byte UTF-8 sequence)
+.
+<p>á*Á (U+00E1, i.e. two byte UTF-8 sequence)
+ *  (U+2000, i.e. three byte UTF-8 sequence)</p>
+````````````````````````````````
+
+
+### `md_is_link_destination_A()`
+
+```````````````````````````````` example
+[link](</url\.with\.escape>)
+.
+<p><a href="/url.with.escape">link</a></p>
+````````````````````````````````
+
+
+### `md_link_label_eq()`
+
+```````````````````````````````` example
+[foo bar]
+
+[foo bar]: /url
+.
+<p><a href="/url">foo bar</a></p>
+````````````````````````````````
+
+
+### `md_is_inline_link_spec()`
+
+```````````````````````````````` example
+> [link](/url 'foo
+> bar')
+.
+<blockquote>
+<p><a href="/url" title="foo
+bar">link</a></p>
+</blockquote>
+````````````````````````````````
+
+
+### `md_build_ref_def_hashtable()`
+
+All link labels in the following example all have the same FNV1a hash (after
+normalization of the label, which means after converting to a vector of Unicode
+codepoints and lowercase folding).
+
+So the example triggers quite complex code paths which are not otherwise easily
+tested.
+
+```````````````````````````````` example
+[foo]: /foo
+[qnptgbh]: /qnptgbh
+[abgbrwcv]: /abgbrwcv
+[abgbrwcv]: /abgbrwcv2
+[abgbrwcv]: /abgbrwcv3
+[abgbrwcv]: /abgbrwcv4
+[alqadfgn]: /alqadfgn
+
+[foo]
+[qnptgbh]
+[abgbrwcv]
+[alqadfgn]
+[axgydtdu]
+.
+<p><a href="/foo">foo</a>
+<a href="/qnptgbh">qnptgbh</a>
+<a href="/abgbrwcv">abgbrwcv</a>
+<a href="/alqadfgn">alqadfgn</a>
+[axgydtdu]</p>
+````````````````````````````````
+
+For the sake of completeness, the following C program was used to find the hash
+collisions by brute force:
+
+~~~
+
+#include <stdio.h>
+#include <string.h>
+
+
+static unsigned etalon;
+
+
+
+#define MD_FNV1A_BASE       2166136261
+#define MD_FNV1A_PRIME      16777619
+
+static inline unsigned
+fnv1a(unsigned base, const void* data, size_t n)
+{
+    const unsigned char* buf = (const unsigned char*) data;
+    unsigned hash = base;
+    size_t i;
+
+    for(i = 0; i < n; i++) {
+        hash ^= buf[i];
+        hash *= MD_FNV1A_PRIME;
+    }
+
+    return hash;
+}
+
+
+static unsigned
+unicode_hash(const char* data, size_t n)
+{
+    unsigned value;
+    unsigned hash = MD_FNV1A_BASE;
+    int i;
+
+    for(i = 0; i < n; i++) {
+        value = data[i];
+        hash = fnv1a(hash, &value, sizeof(unsigned));
+    }
+
+    return hash;
+}
+
+
+static void
+recurse(char* buffer, size_t off, size_t len)
+{
+    int ch;
+
+    if(off < len - 1) {
+        for(ch = 'a'; ch <= 'z'; ch++) {
+            buffer[off] = ch;
+            recurse(buffer, off+1, len);
+        }
+    } else {
+        for(ch = 'a'; ch <= 'z'; ch++) {
+            buffer[off] = ch;
+            if(unicode_hash(buffer, len) == etalon) {
+                printf("Dup: %.*s\n", (int)len, buffer);
+            }
+        }
+    }
+}
+
+int
+main(int argc, char** argv)
+{
+    char buffer[32];
+    int len;
+
+    if(argc < 2)
+        etalon = unicode_hash("foo", 3);
+    else
+        etalon = unicode_hash(argv[1], strlen(argv[1]));
+
+    for(len = 1; len <= sizeof(buffer); len++)
+        recurse(buffer, 0, len);
+
+    return 0;
+}
+~~~

+ 40 - 0
markdown.mod/md4c/test/fuzz-input/commonmark.md

@@ -0,0 +1,40 @@
+
+# h1
+## h2
+### h3
+#### h4
+##### h5
+###### h6
+
+h1
+==
+
+h2
+--
+
+--------------------
+
+    indented code
+
+```
+fenced code
+```
+
+<tag attr='val' attr2="val2">
+
+> quote
+
+* list item
+1. list item
+
+[ref]: /url
+
+paragraph
+&copy; &#1234; &#xabcd;
+`code`
+*emph* **strong** ***strong emph***
+_emph_ __strong__ ___strong emph___
+[ref] [ref][] [link](/url)
+![ref] ![ref][] ![img](/url)
+<http://example.com> <[email protected]>
+\\ \* \. \` \

+ 10 - 0
markdown.mod/md4c/test/fuzz-input/gfm.md

@@ -0,0 +1,10 @@
+* [ ] unchecked
+* [x] checked
+
+ A | B | C
+---|--:|:-:
+aaa|bbb|ccc
+
+~del~ ~~del~~
+
+http://example.com www.example.com [email protected]

+ 1 - 0
markdown.mod/md4c/test/fuzz-input/latex-math.md

@@ -0,0 +1 @@
+$a^2+b^2=c^2$ $$a^2+b^2=c^2$$

+ 1 - 0
markdown.mod/md4c/test/fuzz-input/wiki.md

@@ -0,0 +1 @@
+[[wiki]] [[wiki|label]]

+ 35 - 0
markdown.mod/md4c/test/fuzzers/fuzz-mdhtml.c

@@ -0,0 +1,35 @@
+
+#include <stdint.h>
+#include <stdlib.h>
+#include "md4c-html.h"
+
+
+static void
+process_output(const MD_CHAR* text, MD_SIZE size, void* userdata)
+{
+   /* This is a dummy function because we don't need to generate any output
+    * actually. */
+   return;
+}
+
+int
+LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
+{
+    unsigned parser_flags, renderer_flags;
+
+    if(size < 2 * sizeof(unsigned)) {
+        /* We interpret the 1st 8 bytes as parser flags and renderer flags. */
+        return 0;
+    }
+
+    parser_flags = *(unsigned*)data;
+    data += sizeof(unsigned); size -= sizeof(unsigned);
+
+    renderer_flags = *(unsigned*)data;
+    data += sizeof(unsigned); size -= sizeof(unsigned);
+
+    /* Allocate enough space */
+    md_html(data, size, process_output, NULL, parser_flags, renderer_flags);
+
+    return 0;
+}

+ 163 - 0
markdown.mod/md4c/test/heading-auto-identifier.txt

@@ -0,0 +1,163 @@
+
+# Heading auto identifiers
+
+With the flag `MD_FLAG_HEADINGAUTOID`, MD4C generate an identifier for a heading.
+
+```````````````````````````````` example
+# heading
+.
+<h1 id="heading">heading</h1>
+````````````````````````````````
+
+Spaces are replaced by `-` and uppercase are replaced by lower case 
+
+```````````````````````````````` example
+# The Heading
+.
+<h1 id="the-heading">The Heading</h1>
+````````````````````````````````
+
+Unicode characters can also be put lower case
+
+```````````````````````````````` example
+# ĀĄŁŇŢŰŽבあИЯ𐒰
+.
+<h1 id="āąłňţűžבあия𐓘">ĀĄŁŇŢŰŽבあИЯ𐒰</h1>
+````````````````````````````````
+
+
+The non-alphanumeric characters are discarded except for `-.
+
+```````````````````````````````` example
+# The %@!= stupid _ heading !
+.
+<h1 id="the--stupid--heading-">The %@!= stupid _ heading !</h1>
+````````````````````````````````
+
+As a result, you can get some empty heading with no identifier.
+
+```````````````````````````````` example
+# !
+.
+<h1>!</h1>
+````````````````````````````````
+
+Heading starting with numbers are not treated differently
+
+```````````````````````````````` example
+# 1.1 The start
+.
+<h1 id="11-the-start">1.1 The start</h1>
+````````````````````````````````
+
+Heading can contain link inside
+
+```````````````````````````````` example
+# Title with a [link](hidden) inside  
+.
+<h1 id="title-with-a-link-inside">Title with a <a href="hidden">link</a> inside</h1>
+````````````````````````````````
+
+Heading can contain wiki link inside but requiere the MD_FLAG_WIKILINKS  
+
+```````````````````````````````` example
+# Title with a [[hidden-wiki|link]] inside  
+.
+<h1 id="title-with-a-hidden-wikilink-inside">Title with a [[hidden-wiki|link]] inside</h1>
+````````````````````````````````
+
+Heading can contain formatting
+
+```````````````````````````````` example
+# Title with *emphasis* inside 
+.
+<h1 id="title-with-emphasis-inside">Title with <em>emphasis</em> inside</h1>
+````````````````````````````````
+
+Heading can contain some emoji code like :emoji:, they are treated as normal text
+
+```````````````````````````````` example
+# emoji1 :+1:
+# emoji2 :-1:
+# emoji3 :100:
+.
+<h1 id="emoji1-1">emoji1 :+1:</h1>
+<h1 id="emoji2--1">emoji2 :-1:</h1>
+<h1 id="emoji3-100">emoji3 :100:</h1>
+````````````````````````````````
+
+But unicode emoji characters are stripped 
+
+```````````````````````````````` example
+# emoji4 👍
+# emoji5 💯
+# the + sign
+.
+<h1 id="emoji4-">emoji4 👍</h1>
+<h1 id="emoji5-">emoji5 💯</h1>
+<h1 id="the--sign">the + sign</h1>
+````````````````````````````````
+
+Same heading get a suffix number. 
+
+```````````````````````````````` example 
+# title
+# title
+## title
+### title
+# Title
+# title
+# ti!tle
+# title
+# title
+# title
+# title
+# title
+.
+<h1 id="title">title</h1>
+<h1 id="title-1">title</h1>
+<h2 id="title-2">title</h2>
+<h3 id="title-3">title</h3>
+<h1 id="title-4">Title</h1>
+<h1 id="title-5">title</h1>
+<h1 id="title-6">ti!tle</h1>
+<h1 id="title-7">title</h1>
+<h1 id="title-8">title</h1>
+<h1 id="title-9">title</h1>
+<h1 id="title-10">title</h1>
+<h1 id="title-11">title</h1>
+````````````````````````````````
+
+# Coverage
+
+additional test to improve test coverage.
+
+No heading in a document
+
+```````````````````````````````` example 
+no heading
+.
+<p>no heading</p>
+````````````````````````````````
+
+Multi line heading require a link so it can contain a new line.
+
+```````````````````````````````` example 
+Title with a [multi 
+line
+link](link) inside 
+======================
+.
+<h1 id="title-with-a-multi-line-link-inside">Title with a <a href="link">multi
+line
+link</a> inside</h1>
+
+````````````````````````````````
+
+We need to be able to parse empty title
+```````````````````````````````` example 
+#
+.
+<h1></h1>
+````````````````````````````````
+

+ 39 - 0
markdown.mod/md4c/test/latex-math.txt

@@ -0,0 +1,39 @@
+
+# LaTeX Math
+
+With the flag `MD_FLAG_LATEXMATHSPANS`, MD4C enables extension for recognition
+of LaTeX style math spans.
+
+A math span is is any text wrapped in dollars or double dollars (`$...$` or
+`$$...$$`).
+
+```````````````````````````````` example
+$a+b=c$ Hello, world!
+.
+<p><x-equation>a+b=c</x-equation> Hello, world!</p>
+````````````````````````````````
+
+If the double dollar sign is used, the math span is a display math span.
+
+```````````````````````````````` example
+This is a display equation: $$\int_a^b x dx$$.
+.
+<p>This is a display equation: <x-equation type="display">\int_a^b x dx</x-equation>.</p>
+````````````````````````````````
+
+Math spans may span multiple lines as they are normal spans:
+
+```````````````````````````````` example
+$$
+\int_a^b
+f(x) dx
+$$
+.
+<p><x-equation type="display">\int_a^b f(x) dx </x-equation></p>
+````````````````````````````````
+
+Note though that many (simple) renderers may output the math spans just as a
+verbatim text. (This includes the HTML renderer used by the `md2html` utility.)
+
+Only advanced renderers which implement LaTeX math syntax can be expected to
+provide better results.

+ 194 - 0
markdown.mod/md4c/test/normalize.py

@@ -0,0 +1,194 @@
+# -*- coding: utf-8 -*-
+from html.parser import HTMLParser
+import urllib
+
+try:
+    from html.parser import HTMLParseError
+except ImportError:
+    # HTMLParseError was removed in Python 3.5. It could never be
+    # thrown, so we define a placeholder instead.
+    class HTMLParseError(Exception):
+        pass
+
+from html.entities import name2codepoint
+import sys
+import re
+import html
+
+# Normalization code, adapted from
+# https://github.com/karlcow/markdown-testsuite/
+significant_attrs = ["alt", "href", "src", "title"]
+whitespace_re = re.compile('\s+')
+class MyHTMLParser(HTMLParser):
+    def __init__(self):
+        HTMLParser.__init__(self)
+        self.convert_charrefs = False
+        self.last = "starttag"
+        self.in_pre = False
+        self.output = ""
+        self.last_tag = ""
+    def handle_data(self, data):
+        after_tag = self.last == "endtag" or self.last == "starttag"
+        after_block_tag = after_tag and self.is_block_tag(self.last_tag)
+        if after_tag and self.last_tag == "br":
+            data = data.lstrip('\n')
+        if not self.in_pre:
+            data = whitespace_re.sub(' ', data)
+        if after_block_tag and not self.in_pre:
+            if self.last == "starttag":
+                data = data.lstrip()
+            elif self.last == "endtag":
+                data = data.strip()
+        self.output += data
+        self.last = "data"
+    def handle_endtag(self, tag):
+        if tag == "pre":
+            self.in_pre = False
+        elif self.is_block_tag(tag):
+            self.output = self.output.rstrip()
+        self.output += "</" + tag + ">"
+        self.last_tag = tag
+        self.last = "endtag"
+    def handle_starttag(self, tag, attrs):
+        if tag == "pre":
+            self.in_pre = True
+        if self.is_block_tag(tag):
+            self.output = self.output.rstrip()
+        self.output += "<" + tag
+        # For now we don't strip out 'extra' attributes, because of
+        # raw HTML test cases.
+        # attrs = filter(lambda attr: attr[0] in significant_attrs, attrs)
+        if attrs:
+            attrs.sort()
+            for (k,v) in attrs:
+                self.output += " " + k
+                if v in ['href','src']:
+                    self.output += ("=" + '"' +
+                            urllib.quote(urllib.unquote(v), safe='/') + '"')
+                elif v != None:
+                    self.output += ("=" + '"' + html.escape(v,quote=True) + '"')
+        self.output += ">"
+        self.last_tag = tag
+        self.last = "starttag"
+    def handle_startendtag(self, tag, attrs):
+        """Ignore closing tag for self-closing """
+        self.handle_starttag(tag, attrs)
+        self.last_tag = tag
+        self.last = "endtag"
+    def handle_comment(self, data):
+        self.output += '<!--' + data + '-->'
+        self.last = "comment"
+    def handle_decl(self, data):
+        self.output += '<!' + data + '>'
+        self.last = "decl"
+    def unknown_decl(self, data):
+        self.output += '<!' + data + '>'
+        self.last = "decl"
+    def handle_pi(self,data):
+        self.output += '<?' + data + '>'
+        self.last = "pi"
+    def handle_entityref(self, name):
+        try:
+            c = chr(name2codepoint[name])
+        except KeyError:
+            c = None
+        self.output_char(c, '&' + name + ';')
+        self.last = "ref"
+    def handle_charref(self, name):
+        try:
+            if name.startswith("x"):
+                c = chr(int(name[1:], 16))
+            else:
+                c = chr(int(name))
+        except ValueError:
+                c = None
+        self.output_char(c, '&' + name + ';')
+        self.last = "ref"
+    # Helpers.
+    def output_char(self, c, fallback):
+        if c == '<':
+            self.output += "&lt;"
+        elif c == '>':
+            self.output += "&gt;"
+        elif c == '&':
+            self.output += "&amp;"
+        elif c == '"':
+            self.output += "&quot;"
+        elif c == None:
+            self.output += fallback
+        else:
+            self.output += c
+
+    def is_block_tag(self,tag):
+        return (tag in ['article', 'header', 'aside', 'hgroup', 'blockquote',
+            'hr', 'iframe', 'body', 'li', 'map', 'button', 'object', 'canvas',
+            'ol', 'caption', 'output', 'col', 'p', 'colgroup', 'pre', 'dd',
+            'progress', 'div', 'section', 'dl', 'table', 'td', 'dt',
+            'tbody', 'embed', 'textarea', 'fieldset', 'tfoot', 'figcaption',
+            'th', 'figure', 'thead', 'footer', 'tr', 'form', 'ul',
+            'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'video', 'script', 'style'])
+
+def normalize_html(html):
+    r"""
+    Return normalized form of HTML which ignores insignificant output
+    differences:
+
+    Multiple inner whitespaces are collapsed to a single space (except
+    in pre tags):
+
+        >>> normalize_html("<p>a  \t b</p>")
+        '<p>a b</p>'
+
+        >>> normalize_html("<p>a  \t\nb</p>")
+        '<p>a b</p>'
+
+    * Whitespace surrounding block-level tags is removed.
+
+        >>> normalize_html("<p>a  b</p>")
+        '<p>a b</p>'
+
+        >>> normalize_html(" <p>a  b</p>")
+        '<p>a b</p>'
+
+        >>> normalize_html("<p>a  b</p> ")
+        '<p>a b</p>'
+
+        >>> normalize_html("\n\t<p>\n\t\ta  b\t\t</p>\n\t")
+        '<p>a b</p>'
+
+        >>> normalize_html("<i>a  b</i> ")
+        '<i>a b</i> '
+
+    * Self-closing tags are converted to open tags.
+
+        >>> normalize_html("<br />")
+        '<br>'
+
+    * Attributes are sorted and lowercased.
+
+        >>> normalize_html('<a title="bar" HREF="foo">x</a>')
+        '<a href="foo" title="bar">x</a>'
+
+    * References are converted to unicode, except that '<', '>', '&', and
+      '"' are rendered using entities.
+
+        >>> normalize_html("&forall;&amp;&gt;&lt;&quot;")
+        '\u2200&amp;&gt;&lt;&quot;'
+
+    """
+    html_chunk_re = re.compile("(\<!\[CDATA\[.*?\]\]\>|\<[^>]*\>|[^<]+)")
+    try:
+        parser = MyHTMLParser()
+        # We work around HTMLParser's limitations parsing CDATA
+        # by breaking the input into chunks and passing CDATA chunks
+        # through verbatim.
+        for chunk in re.finditer(html_chunk_re, html):
+            if chunk.group(0)[:8] == "<![CDATA":
+                parser.output += chunk.group(0)
+            else:
+                parser.feed(chunk.group(0))
+        parser.close()
+        return parser.output
+    except HTMLParseError as e:
+        sys.stderr.write("Normalization error: " + e.msg + "\n")
+        return html  # on error, return unnormalized HTML

+ 63 - 0
markdown.mod/md4c/test/pathological_auto_ident_tests.py

@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import re
+import argparse
+import sys
+import platform
+from cmark import CMark
+from timeit import default_timer as timer
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Run cmark tests.')
+    parser.add_argument('-p', '--program', dest='program', nargs='?', default=None,
+            help='program to test')
+    parser.add_argument('--library-dir', dest='library_dir', nargs='?',
+            default=None, help='directory containing dynamic library')
+    args = parser.parse_args(sys.argv[1:])
+
+cmark = CMark(prog=args.program, library_dir=args.library_dir)
+
+# list of pairs consisting of input and a regex that must match the output.
+pathological = {
+    # note - some pythons have limit of 65535 for {num-matches} in re.
+
+    "many identical heading":
+            (("# a\n" * (50000+1)),
+            re.compile("^<h1 id=\"a\">a</h1>\n(<h1 id=\"a-\d+\">a</h1>\n){50000}$")),
+    "too many identical heading":
+            (("# a\n" * (70000+2)),
+            re.compile("^<h1 id=\"a\">a</h1>\n(<h1 id=\"a-\d+\">a</h1>\n){70000}(<h1 id=\"a-65535\">a</h1>\n)$")),
+    "heading realocation":
+            (("# A long title to trigger a reallocation\n"*(300+1)),
+            re.compile("^<h1 id=\"a-long-title-to-trigger-a-reallocation\">A long title to trigger a reallocation</h1>\n(<h1 id=\"a-long-title-to-trigger-a-reallocation-\d+\">A long title to trigger a reallocation</h1>\n){300}$"))      
+}
+
+whitespace_re = re.compile('/s+/')
+passed = 0
+errored = 0
+failed = 0
+
+#print("Testing pathological cases:")
+for description in pathological:
+    (inp, regex) = pathological[description]
+    start = timer()
+    [rc, actual, err] = cmark.to_html(inp)
+    end = timer()
+    if rc != 0:
+        errored += 1
+        print('{:35} [ERRORED (return code %d)]'.format(description, rc))
+        print(err)
+    elif regex.search(actual):
+        print('{:35} [PASSED] {:.3f} secs'.format(description, end-start))
+        passed += 1
+    else:
+        print('{:35} [FAILED]'.format(description))
+        print(repr(actual))
+        failed += 1
+
+print("%d passed, %d failed, %d errored" % (passed, failed, errored))
+if (failed == 0 and errored == 0):
+    exit(0)
+else:
+    exit(1)

+ 128 - 0
markdown.mod/md4c/test/pathological_tests.py

@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import re
+import argparse
+import sys
+import platform
+from cmark import CMark
+from timeit import default_timer as timer
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Run cmark tests.')
+    parser.add_argument('-p', '--program', dest='program', nargs='?', default=None,
+            help='program to test')
+    parser.add_argument('--library-dir', dest='library_dir', nargs='?',
+            default=None, help='directory containing dynamic library')
+    args = parser.parse_args(sys.argv[1:])
+
+cmark = CMark(prog=args.program, library_dir=args.library_dir)
+
+# list of pairs consisting of input and a regex that must match the output.
+pathological = {
+    # note - some pythons have limit of 65535 for {num-matches} in re.
+    "U+0000":
+            ("abc\u0000de\u0000",
+            re.compile("abc\ufffd?de\ufffd?")),
+    "U+FEFF (Unicode BOM)":
+            ("\ufefffoo",
+            re.compile("<p>foo</p>")),
+    "nested strong emph":
+            (("*a **a " * 65000) + "b" + (" a** a*" * 65000),
+            re.compile("(<em>a <strong>a ){65000}b( a</strong> a</em>){65000}")),
+    "many emph closers with no openers":
+            (("a_ " * 65000),
+            re.compile("(a[_] ){64999}a_")),
+    "many emph openers with no closers":
+            (("_a " * 65000),
+            re.compile("(_a ){64999}_a")),
+    "many 3-emph openers with no closers":
+            (("a***" * 65000),
+            re.compile("(a<em><strong>a</strong></em>){32500}")),
+    "many link closers with no openers":
+            (("a]" * 65000),
+            re.compile("(a\]){65000}")),
+    "many link openers with no closers":
+            (("[a" * 65000),
+            re.compile("(\[a){65000}")),
+    "mismatched openers and closers":
+            (("*a_ " * 50000),
+            re.compile("([*]a[_] ){49999}[*]a_")),
+    "openers and closers multiple of 3":
+            (("a**b" + ("c* " * 50000)),
+            re.compile("a[*][*]b(c[*] ){49999}c[*]")),
+    "link openers and emph closers":
+            (("[ a_" * 50000),
+            re.compile("(\[ a_){50000}")),
+    "hard link/emph case":
+            ("**x [a*b**c*](d)",
+            re.compile("\\*\\*x <a href=\"d\">a<em>b\\*\\*c</em></a>")),
+    "nested brackets":
+            (("[" * 50000) + "a" + ("]" * 50000),
+            re.compile("\[{50000}a\]{50000}")),
+    "nested block quotes":
+            ((("> " * 50000) + "a"),
+            re.compile("(<blockquote>\r?\n){50000}")),
+    "backticks":
+            ("".join(map(lambda x: ("e" + "`" * x), range(1,1000))),
+            re.compile("^<p>[e`]*</p>\r?\n$")),
+    "many links":
+            ("[t](/u) " * 50000,
+            re.compile("(<a href=\"/u\">t</a> ?){50000}")),
+    "many references":
+            ("".join(map(lambda x: ("[" + str(x) + "]: u\n"), range(1,20000 * 16))) + "[0] " * 20000,
+            re.compile("(\[0\] ){19999}")),
+    "deeply nested lists":
+            ("".join(map(lambda x: ("  " * x + "* a\n"), range(0,1000))),
+            re.compile("<ul>\r?\n(<li>a<ul>\r?\n){999}<li>a</li>\r?\n</ul>\r?\n(</li>\r?\n</ul>\r?\n){999}")),
+    "many html openers and closers":
+            (("<>" * 50000),
+            re.compile("(&lt;&gt;){50000}")),
+    "many html proc. inst. openers":
+            (("x" + "<?" * 50000),
+            re.compile("x(&lt;\\?){50000}")),
+    "many html CDATA openers":
+            (("x" + "<![CDATA[" * 50000),
+            re.compile("x(&lt;!\\[CDATA\\[){50000}")),
+    "many backticks and escapes":
+            (("\\``" * 50000),
+            re.compile("(``){50000}")),
+    "many broken link titles":
+            (("[ (](" * 50000),
+            re.compile("(\[ \(\]\(){50000}")),
+    "broken thematic break":
+            (("* " * 50000 + "a"),
+            re.compile("<ul>\r?\n(<li><ul>\r?\n){49999}<li>a</li>\r?\n</ul>\r?\n(</li>\r?\n</ul>\r?\n){49999}")),
+    "nested invalid link references":
+            (("[" * 50000 + "]" * 50000 + "\n\n[a]: /b"),
+            re.compile("\[{50000}\]{50000}"))
+}
+
+whitespace_re = re.compile('/s+/')
+passed = 0
+errored = 0
+failed = 0
+
+#print("Testing pathological cases:")
+for description in pathological:
+    (inp, regex) = pathological[description]
+    start = timer()
+    [rc, actual, err] = cmark.to_html(inp)
+    end = timer()
+    if rc != 0:
+        errored += 1
+        print('{:35} [ERRORED (return code %d)]'.format(description, rc))
+        print(err)
+    elif regex.search(actual):
+        print('{:35} [PASSED] {:.3f} secs'.format(description, end-start))
+        passed += 1
+    else:
+        print('{:35} [FAILED]'.format(description))
+        print(repr(actual))
+        failed += 1
+
+print("%d passed, %d failed, %d errored" % (passed, failed, errored))
+if (failed == 0 and errored == 0):
+    exit(0)
+else:
+    exit(1)

+ 50 - 0
markdown.mod/md4c/test/permissive-email-autolinks.txt

@@ -0,0 +1,50 @@
+
+# Permissive E-mail Autolinks
+
+With the flag `MD_FLAG_PERMISSIVEEMAILAUTOLINKS`, MD4C enables more permissive
+recognition of e-mail addresses and transforms them to autolinks, even if they
+do not exactly follow the syntax of autolink as specified in CommonMark
+specification.
+
+This is standard CommonMark e-mail autolink:
+
+```````````````````````````````` example
+E-mail: <mailto:[email protected]>
+.
+<p>E-mail: <a href="mailto:[email protected]">mailto:[email protected]</a></p>
+````````````````````````````````
+
+With the permissive autolinks enabled, this is sufficient:
+
+```````````````````````````````` example
+E-mail: [email protected]
+.
+<p>E-mail: <a href="mailto:[email protected]">[email protected]</a></p>
+````````````````````````````````
+
+`+` can occur before the `@`, but not after.
+
+```````````````````````````````` example
+hello@mail+xyz.example isn't valid, but [email protected] is.
+.
+<p>hello@mail+xyz.example isn't valid, but <a href="mailto:[email protected]">[email protected]</a> is.</p>
+````````````````````````````````
+
+`.`, `-`, and `_` can occur on both sides of the `@`, but only `.` may occur at
+the end of the email address, in which case it will not be considered part of
+the address:
+
+```````````````````````````````` example
[email protected]
+
[email protected].
+
[email protected]
+
[email protected]_
+.
+<p><a href="mailto:[email protected]">[email protected]</a></p>
+<p><a href="mailto:[email protected]">[email protected]</a>.</p>
+<p>[email protected]</p>
+<p>[email protected]_</p>
+````````````````````````````````

+ 99 - 0
markdown.mod/md4c/test/permissive-url-autolinks.txt

@@ -0,0 +1,99 @@
+
+# Permissive URL Autolinks
+
+With the flag `MD_FLAG_PERMISSIVEURLAUTOLINKS`, MD4C enables more permissive recognition
+of URLs and transform them to autolinks, even if they do not exactly follow the syntax
+of autolink as specified in CommonMark specification.
+
+This is a standard CommonMark autolink:
+
+```````````````````````````````` example
+Homepage: <https://github.com/mity/md4c>
+.
+<p>Homepage: <a href="https://github.com/mity/md4c">https://github.com/mity/md4c</a></p>
+````````````````````````````````
+
+With the permissive autolinks enabled, this is sufficient:
+
+```````````````````````````````` example
+Homepage: https://github.com/mity/md4c
+.
+<p>Homepage: <a href="https://github.com/mity/md4c">https://github.com/mity/md4c</a></p>
+````````````````````````````````
+
+But this permissive autolink feature can work only for very widely used URL
+schemes, in alphabetical order `ftp:`, `http:`, `https:`.
+
+That's why this is not a permissive autolink:
+
+```````````````````````````````` example
+ssh://[email protected]
+.
+<p>ssh://[email protected]</p>
+````````````````````````````````
+
+The same rules for path validation as for permissivve WWW autolinks apply.
+Therefore the final question mark here is not part of the autolink:
+
+```````````````````````````````` example
+Have you ever visited http://www.zombo.com?
+.
+<p>Have you ever visited <a href="http://www.zombo.com">http://www.zombo.com</a>?</p>
+````````````````````````````````
+
+But in contrast, in this example it is:
+
+```````````````````````````````` example
+http://www.bing.com/search?q=md4c
+.
+<p><a href="http://www.bing.com/search?q=md4c">http://www.bing.com/search?q=md4c</a></p>
+````````````````````````````````
+
+And finally one complex example:
+
+```````````````````````````````` example
+http://commonmark.org
+
+(Visit https://encrypted.google.com/search?q=Markup+(business))
+
+Anonymous FTP is available at ftp://foo.bar.baz.
+.
+<p><a href="http://commonmark.org">http://commonmark.org</a></p>
+<p>(Visit <a href="https://encrypted.google.com/search?q=Markup+(business)">https://encrypted.google.com/search?q=Markup+(business)</a>)</p>
+<p>Anonymous FTP is available at <a href="ftp://foo.bar.baz">ftp://foo.bar.baz</a>.</p>
+````````````````````````````````
+
+
+## GitHub Issues
+
+### [Issue 53](https://github.com/mity/md4c/issues/53)
+
+```````````````````````````````` example
+This is [link](http://github.com/).
+.
+<p>This is <a href="http://github.com/">link</a>.</p>
+````````````````````````````````
+
+```````````````````````````````` example
+This is [link](http://github.com/)X
+.
+<p>This is <a href="http://github.com/">link</a>X</p>
+````````````````````````````````
+
+
+## [Issue 76](https://github.com/mity/md4c/issues/76)
+
+```````````````````````````````` example
+*(http://example.com)*
+.
+<p><em>(<a href="http://example.com">http://example.com</a>)</em></p>
+````````````````````````````````
+
+
+## [Issue 152](https://github.com/mity/md4c/issues/152)
+
+```````````````````````````````` example
+[http://example.com](http://example.com)
+.
+<p><a href="http://example.com">http://example.com</a></p>
+````````````````````````````````

+ 107 - 0
markdown.mod/md4c/test/permissive-www-autolinks.txt

@@ -0,0 +1,107 @@
+
+# Permissive WWW Autolinks
+
+With the flag `MD_FLAG_PERMISSIVEWWWAUTOLINKS`, MD4C enables recognition of
+autolinks starting with `www.`, even if they do not exactly follow the syntax
+of autolink as specified in CommonMark specification.
+
+These do not have to be enclosed in `<` and `>`, and they even do not need
+any preceding scheme specification.
+
+The WWW autolink will be recognized when the text `www.` is found followed by a
+valid domain. A valid domain consists of segments of alphanumeric characters,
+underscores (`_`) and hyphens (`-`) separated by periods (`.`). There must be
+at least one period, and no underscores may be present in the last two segments
+of the domain.
+
+The scheme `http` will be inserted automatically:
+
+```````````````````````````````` example
+www.commonmark.org
+.
+<p><a href="http://www.commonmark.org">www.commonmark.org</a></p>
+````````````````````````````````
+
+After a valid domain, zero or more non-space non-`<` characters may follow:
+
+```````````````````````````````` example
+Visit www.commonmark.org/help for more information.
+.
+<p>Visit <a href="http://www.commonmark.org/help">www.commonmark.org/help</a> for more information.</p>
+````````````````````````````````
+
+We then apply extended autolink path validation as follows:
+
+Trailing punctuation (specifically, `?`, `!`, `.`, `,`, `:`, `*`, `_`, and `~`)
+will not be considered part of the autolink, though they may be included in the
+interior of the link:
+
+```````````````````````````````` example
+Visit www.commonmark.org.
+
+Visit www.commonmark.org/a.b.
+.
+<p>Visit <a href="http://www.commonmark.org">www.commonmark.org</a>.</p>
+<p>Visit <a href="http://www.commonmark.org/a.b">www.commonmark.org/a.b</a>.</p>
+````````````````````````````````
+
+When an autolink ends in `)`, we scan the entire autolink for the total number
+of parentheses.  If there is a greater number of closing parentheses than
+opening ones, we don't consider the last character part of the autolink, in
+order to facilitate including an autolink inside a parenthesis:
+
+```````````````````````````````` example
+www.google.com/search?q=Markup+(business)
+
+(www.google.com/search?q=Markup+(business))
+.
+<p><a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a></p>
+<p>(<a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a>)</p>
+````````````````````````````````
+
+This check is only done when the link ends in a closing parentheses `)`, so if
+the only parentheses are in the interior of the autolink, no special rules are
+applied:
+
+```````````````````````````````` example
+www.google.com/search?q=(business))+ok
+.
+<p><a href="http://www.google.com/search?q=(business))+ok">www.google.com/search?q=(business))+ok</a></p>
+````````````````````````````````
+
+If an autolink ends in a semicolon (`;`), we check to see if it appears to
+resemble an [entity reference][entity references]; if the preceding text is `&`
+followed by one or more alphanumeric characters.  If so, it is excluded from
+the autolink:
+
+```````````````````````````````` example
+www.google.com/search?q=commonmark&hl=en
+
+www.google.com/search?q=commonmark&hl;
+.
+<p><a href="http://www.google.com/search?q=commonmark&amp;hl=en">www.google.com/search?q=commonmark&amp;hl=en</a></p>
+<p><a href="http://www.google.com/search?q=commonmark">www.google.com/search?q=commonmark</a>&amp;hl;</p>
+````````````````````````````````
+
+`<` immediately ends an autolink.
+
+```````````````````````````````` example
+www.commonmark.org/he<lp
+.
+<p><a href="http://www.commonmark.org/he">www.commonmark.org/he</a>&lt;lp</p>
+````````````````````````````````
+
+
+## GitHub Issues
+
+### [Issue 53](https://github.com/mity/md4c/issues/53)
+```````````````````````````````` example
+This is [link](www.github.com/).
+.
+<p>This is <a href="www.github.com/">link</a>.</p>
+````````````````````````````````
+```````````````````````````````` example
+This is [link](www.github.com/)X
+.
+<p>This is <a href="www.github.com/">link</a>X</p>
+````````````````````````````````

+ 9756 - 0
markdown.mod/md4c/test/spec.txt

@@ -0,0 +1,9756 @@
+---
+title: CommonMark Spec
+author: John MacFarlane
+version: '0.30'
+date: '2021-06-19'
+license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)'
+...
+
+# Introduction
+
+## What is Markdown?
+
+Markdown is a plain text format for writing structured documents,
+based on conventions for indicating formatting in email
+and usenet posts.  It was developed by John Gruber (with
+help from Aaron Swartz) and released in 2004 in the form of a
+[syntax description](http://daringfireball.net/projects/markdown/syntax)
+and a Perl script (`Markdown.pl`) for converting Markdown to
+HTML.  In the next decade, dozens of implementations were
+developed in many languages.  Some extended the original
+Markdown syntax with conventions for footnotes, tables, and
+other document elements.  Some allowed Markdown documents to be
+rendered in formats other than HTML.  Websites like Reddit,
+StackOverflow, and GitHub had millions of people using Markdown.
+And Markdown started to be used beyond the web, to author books,
+articles, slide shows, letters, and lecture notes.
+
+What distinguishes Markdown from many other lightweight markup
+syntaxes, which are often easier to write, is its readability.
+As Gruber writes:
+
+> The overriding design goal for Markdown's formatting syntax is
+> to make it as readable as possible. The idea is that a
+> Markdown-formatted document should be publishable as-is, as
+> plain text, without looking like it's been marked up with tags
+> or formatting instructions.
+> (<http://daringfireball.net/projects/markdown/>)
+
+The point can be illustrated by comparing a sample of
+[AsciiDoc](http://www.methods.co.nz/asciidoc/) with
+an equivalent sample of Markdown.  Here is a sample of
+AsciiDoc from the AsciiDoc manual:
+
+```
+1. List item one.
++
+List item one continued with a second paragraph followed by an
+Indented block.
++
+.................
+$ ls *.sh
+$ mv *.sh ~/tmp
+.................
++
+List item continued with a third paragraph.
+
+2. List item two continued with an open block.
++
+--
+This paragraph is part of the preceding list item.
+
+a. This list is nested and does not require explicit item
+continuation.
++
+This paragraph is part of the preceding list item.
+
+b. List item b.
+
+This paragraph belongs to item two of the outer list.
+--
+```
+
+And here is the equivalent in Markdown:
+```
+1.  List item one.
+
+    List item one continued with a second paragraph followed by an
+    Indented block.
+
+        $ ls *.sh
+        $ mv *.sh ~/tmp
+
+    List item continued with a third paragraph.
+
+2.  List item two continued with an open block.
+
+    This paragraph is part of the preceding list item.
+
+    1. This list is nested and does not require explicit item continuation.
+
+       This paragraph is part of the preceding list item.
+
+    2. List item b.
+
+    This paragraph belongs to item two of the outer list.
+```
+
+The AsciiDoc version is, arguably, easier to write. You don't need
+to worry about indentation.  But the Markdown version is much easier
+to read.  The nesting of list items is apparent to the eye in the
+source, not just in the processed document.
+
+## Why is a spec needed?
+
+John Gruber's [canonical description of Markdown's
+syntax](http://daringfireball.net/projects/markdown/syntax)
+does not specify the syntax unambiguously.  Here are some examples of
+questions it does not answer:
+
+1.  How much indentation is needed for a sublist?  The spec says that
+    continuation paragraphs need to be indented four spaces, but is
+    not fully explicit about sublists.  It is natural to think that
+    they, too, must be indented four spaces, but `Markdown.pl` does
+    not require that.  This is hardly a "corner case," and divergences
+    between implementations on this issue often lead to surprises for
+    users in real documents. (See [this comment by John
+    Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).)
+
+2.  Is a blank line needed before a block quote or heading?
+    Most implementations do not require the blank line.  However,
+    this can lead to unexpected results in hard-wrapped text, and
+    also to ambiguities in parsing (note that some implementations
+    put the heading inside the blockquote, while others do not).
+    (John Gruber has also spoken [in favor of requiring the blank
+    lines](http://article.gmane.org/gmane.text.markdown.general/2146).)
+
+3.  Is a blank line needed before an indented code block?
+    (`Markdown.pl` requires it, but this is not mentioned in the
+    documentation, and some implementations do not require it.)
+
+    ``` markdown
+    paragraph
+        code?
+    ```
+
+4.  What is the exact rule for determining when list items get
+    wrapped in `<p>` tags?  Can a list be partially "loose" and partially
+    "tight"?  What should we do with a list like this?
+
+    ``` markdown
+    1. one
+
+    2. two
+    3. three
+    ```
+
+    Or this?
+
+    ``` markdown
+    1.  one
+        - a
+
+        - b
+    2.  two
+    ```
+
+    (There are some relevant comments by John Gruber
+    [here](http://article.gmane.org/gmane.text.markdown.general/2554).)
+
+5.  Can list markers be indented?  Can ordered list markers be right-aligned?
+
+    ``` markdown
+     8. item 1
+     9. item 2
+    10. item 2a
+    ```
+
+6.  Is this one list with a thematic break in its second item,
+    or two lists separated by a thematic break?
+
+    ``` markdown
+    * a
+    * * * * *
+    * b
+    ```
+
+7.  When list markers change from numbers to bullets, do we have
+    two lists or one?  (The Markdown syntax description suggests two,
+    but the perl scripts and many other implementations produce one.)
+
+    ``` markdown
+    1. fee
+    2. fie
+    -  foe
+    -  fum
+    ```
+
+8.  What are the precedence rules for the markers of inline structure?
+    For example, is the following a valid link, or does the code span
+    take precedence ?
+
+    ``` markdown
+    [a backtick (`)](/url) and [another backtick (`)](/url).
+    ```
+
+9.  What are the precedence rules for markers of emphasis and strong
+    emphasis?  For example, how should the following be parsed?
+
+    ``` markdown
+    *foo *bar* baz*
+    ```
+
+10. What are the precedence rules between block-level and inline-level
+    structure?  For example, how should the following be parsed?
+
+    ``` markdown
+    - `a long code span can contain a hyphen like this
+      - and it can screw things up`
+    ```
+
+11. Can list items include section headings?  (`Markdown.pl` does not
+    allow this, but does allow blockquotes to include headings.)
+
+    ``` markdown
+    - # Heading
+    ```
+
+12. Can list items be empty?
+
+    ``` markdown
+    * a
+    *
+    * b
+    ```
+
+13. Can link references be defined inside block quotes or list items?
+
+    ``` markdown
+    > Blockquote [foo].
+    >
+    > [foo]: /url
+    ```
+
+14. If there are multiple definitions for the same reference, which takes
+    precedence?
+
+    ``` markdown
+    [foo]: /url1
+    [foo]: /url2
+
+    [foo][]
+    ```
+
+In the absence of a spec, early implementers consulted `Markdown.pl`
+to resolve these ambiguities.  But `Markdown.pl` was quite buggy, and
+gave manifestly bad results in many cases, so it was not a
+satisfactory replacement for a spec.
+
+Because there is no unambiguous spec, implementations have diverged
+considerably.  As a result, users are often surprised to find that
+a document that renders one way on one system (say, a GitHub wiki)
+renders differently on another (say, converting to docbook using
+pandoc).  To make matters worse, because nothing in Markdown counts
+as a "syntax error," the divergence often isn't discovered right away.
+
+## About this document
+
+This document attempts to specify Markdown syntax unambiguously.
+It contains many examples with side-by-side Markdown and
+HTML.  These are intended to double as conformance tests.  An
+accompanying script `spec_tests.py` can be used to run the tests
+against any Markdown program:
+
+    python test/spec_tests.py --spec spec.txt --program PROGRAM
+
+Since this document describes how Markdown is to be parsed into
+an abstract syntax tree, it would have made sense to use an abstract
+representation of the syntax tree instead of HTML.  But HTML is capable
+of representing the structural distinctions we need to make, and the
+choice of HTML for the tests makes it possible to run the tests against
+an implementation without writing an abstract syntax tree renderer.
+
+Note that not every feature of the HTML samples is mandated by
+the spec.  For example, the spec says what counts as a link
+destination, but it doesn't mandate that non-ASCII characters in
+the URL be percent-encoded.  To use the automatic tests,
+implementers will need to provide a renderer that conforms to
+the expectations of the spec examples (percent-encoding
+non-ASCII characters in URLs).  But a conforming implementation
+can use a different renderer and may choose not to
+percent-encode non-ASCII characters in URLs.
+
+This document is generated from a text file, `spec.txt`, written
+in Markdown with a small extension for the side-by-side tests.
+The script `tools/makespec.py` can be used to convert `spec.txt` into
+HTML or CommonMark (which can then be converted into other formats).
+
+In the examples, the `→` character is used to represent tabs.
+
+# Preliminaries
+
+## Characters and lines
+
+Any sequence of [characters] is a valid CommonMark
+document.
+
+A [character](@) is a Unicode code point.  Although some
+code points (for example, combining accents) do not correspond to
+characters in an intuitive sense, all code points count as characters
+for purposes of this spec.
+
+This spec does not specify an encoding; it thinks of lines as composed
+of [characters] rather than bytes.  A conforming parser may be limited
+to a certain encoding.
+
+A [line](@) is a sequence of zero or more [characters]
+other than line feed (`U+000A`) or carriage return (`U+000D`),
+followed by a [line ending] or by the end of file.
+
+A [line ending](@) is a line feed (`U+000A`), a carriage return
+(`U+000D`) not followed by a line feed, or a carriage return and a
+following line feed.
+
+A line containing no characters, or a line containing only spaces
+(`U+0020`) or tabs (`U+0009`), is called a [blank line](@).
+
+The following definitions of character classes will be used in this spec:
+
+A [Unicode whitespace character](@) is
+any code point in the Unicode `Zs` general category, or a tab (`U+0009`),
+line feed (`U+000A`), form feed (`U+000C`), or carriage return (`U+000D`).
+
+[Unicode whitespace](@) is a sequence of one or more
+[Unicode whitespace characters].
+
+A [tab](@) is `U+0009`.
+
+A [space](@) is `U+0020`.
+
+An [ASCII control character](@) is a character between `U+0000–1F` (both
+including) or `U+007F`.
+
+An [ASCII punctuation character](@)
+is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`,
+`*`, `+`, `,`, `-`, `.`, `/` (U+0021–2F), 
+`:`, `;`, `<`, `=`, `>`, `?`, `@` (U+003A–0040),
+`[`, `\`, `]`, `^`, `_`, `` ` `` (U+005B–0060), 
+`{`, `|`, `}`, or `~` (U+007B–007E).
+
+A [Unicode punctuation character](@) is an [ASCII
+punctuation character] or anything in
+the general Unicode categories  `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`.
+
+## Tabs
+
+Tabs in lines are not expanded to [spaces].  However,
+in contexts where spaces help to define block structure,
+tabs behave as if they were replaced by spaces with a tab stop
+of 4 characters.
+
+Thus, for example, a tab can be used instead of four spaces
+in an indented code block.  (Note, however, that internal
+tabs are passed through as literal tabs, not expanded to
+spaces.)
+
+```````````````````````````````` example
+→foo→baz→→bim
+.
+<pre><code>foo→baz→→bim
+</code></pre>
+````````````````````````````````
+
+```````````````````````````````` example
+  →foo→baz→→bim
+.
+<pre><code>foo→baz→→bim
+</code></pre>
+````````````````````````````````
+
+```````````````````````````````` example
+    a→a
+    ὐ→a
+.
+<pre><code>a→a
+ὐ→a
+</code></pre>
+````````````````````````````````
+
+In the following example, a continuation paragraph of a list
+item is indented with a tab; this has exactly the same effect
+as indentation with four spaces would:
+
+```````````````````````````````` example
+  - foo
+
+→bar
+.
+<ul>
+<li>
+<p>foo</p>
+<p>bar</p>
+</li>
+</ul>
+````````````````````````````````
+
+```````````````````````````````` example
+- foo
+
+→→bar
+.
+<ul>
+<li>
+<p>foo</p>
+<pre><code>  bar
+</code></pre>
+</li>
+</ul>
+````````````````````````````````
+
+Normally the `>` that begins a block quote may be followed
+optionally by a space, which is not considered part of the
+content.  In the following case `>` is followed by a tab,
+which is treated as if it were expanded into three spaces.
+Since one of these spaces is considered part of the
+delimiter, `foo` is considered to be indented six spaces
+inside the block quote context, so we get an indented
+code block starting with two spaces.
+
+```````````````````````````````` example
+>→→foo
+.
+<blockquote>
+<pre><code>  foo
+</code></pre>
+</blockquote>
+````````````````````````````````
+
+```````````````````````````````` example
+-→→foo
+.
+<ul>
+<li>
+<pre><code>  foo
+</code></pre>
+</li>
+</ul>
+````````````````````````````````
+
+
+```````````````````````````````` example
+    foo
+→bar
+.
+<pre><code>foo
+bar
+</code></pre>
+````````````````````````````````
+
+```````````````````````````````` example
+ - foo
+   - bar
+→ - baz
+.
+<ul>
+<li>foo
+<ul>
+<li>bar
+<ul>
+<li>baz</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+````````````````````````````````
+
+```````````````````````````````` example
+#→Foo
+.
+<h1>Foo</h1>
+````````````````````````````````
+
+```````````````````````````````` example
+*→*→*→
+.
+<hr />
+````````````````````````````````
+
+
+## Insecure characters
+
+For security reasons, the Unicode character `U+0000` must be replaced
+with the REPLACEMENT CHARACTER (`U+FFFD`).
+
+
+## Backslash escapes
+
+Any ASCII punctuation character may be backslash-escaped:
+
+```````````````````````````````` example
+\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~
+.
+<p>!&quot;#$%&amp;'()*+,-./:;&lt;=&gt;?@[\]^_`{|}~</p>
+````````````````````````````````
+
+
+Backslashes before other characters are treated as literal
+backslashes:
+
+```````````````````````````````` example
+\→\A\a\ \3\φ\«
+.
+<p>\→\A\a\ \3\φ\«</p>
+````````````````````````````````
+
+
+Escaped characters are treated as regular characters and do
+not have their usual Markdown meanings:
+
+```````````````````````````````` example
+\*not emphasized*
+\<br/> not a tag
+\[not a link](/foo)
+\`not code`
+1\. not a list
+\* not a list
+\# not a heading
+\[foo]: /url "not a reference"
+\&ouml; not a character entity
+.
+<p>*not emphasized*
+&lt;br/&gt; not a tag
+[not a link](/foo)
+`not code`
+1. not a list
+* not a list
+# not a heading
+[foo]: /url &quot;not a reference&quot;
+&amp;ouml; not a character entity</p>
+````````````````````````````````
+
+
+If a backslash is itself escaped, the following character is not:
+
+```````````````````````````````` example
+\\*emphasis*
+.
+<p>\<em>emphasis</em></p>
+````````````````````````````````
+
+
+A backslash at the end of the line is a [hard line break]:
+
+```````````````````````````````` example
+foo\
+bar
+.
+<p>foo<br />
+bar</p>
+````````````````````````````````
+
+
+Backslash escapes do not work in code blocks, code spans, autolinks, or
+raw HTML:
+
+```````````````````````````````` example
+`` \[\` ``
+.
+<p><code>\[\`</code></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+    \[\]
+.
+<pre><code>\[\]
+</code></pre>
+````````````````````````````````
+
+
+```````````````````````````````` example
+~~~
+\[\]
+~~~
+.
+<pre><code>\[\]
+</code></pre>
+````````````````````````````````
+
+
+```````````````````````````````` example
+<http://example.com?find=\*>
+.
+<p><a href="http://example.com?find=%5C*">http://example.com?find=\*</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+<a href="/bar\/)">
+.
+<a href="/bar\/)">
+````````````````````````````````
+
+
+But they work in all other contexts, including URLs and link titles,
+link references, and [info strings] in [fenced code blocks]:
+
+```````````````````````````````` example
+[foo](/bar\* "ti\*tle")
+.
+<p><a href="/bar*" title="ti*tle">foo</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[foo]
+
+[foo]: /bar\* "ti\*tle"
+.
+<p><a href="/bar*" title="ti*tle">foo</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+``` foo\+bar
+foo
+```
+.
+<pre><code class="language-foo+bar">foo
+</code></pre>
+````````````````````````````````
+
+
+## Entity and numeric character references
+
+Valid HTML entity references and numeric character references
+can be used in place of the corresponding Unicode character,
+with the following exceptions:
+
+- Entity and character references are not recognized in code
+  blocks and code spans.
+
+- Entity and character references cannot stand in place of
+  special characters that define structural elements in
+  CommonMark.  For example, although `&#42;` can be used
+  in place of a literal `*` character, `&#42;` cannot replace
+  `*` in emphasis delimiters, bullet list markers, or thematic
+  breaks.
+
+Conforming CommonMark parsers need not store information about
+whether a particular character was represented in the source
+using a Unicode character or an entity reference.
+
+[Entity references](@) consist of `&` + any of the valid
+HTML5 entity names + `;`. The
+document <https://html.spec.whatwg.org/entities.json>
+is used as an authoritative source for the valid entity
+references and their corresponding code points.
+
+```````````````````````````````` example
+&nbsp; &amp; &copy; &AElig; &Dcaron;
+&frac34; &HilbertSpace; &DifferentialD;
+&ClockwiseContourIntegral; &ngE;
+.
+<p>  &amp; © Æ Ď
+¾ ℋ ⅆ
+∲ ≧̸</p>
+````````````````````````````````
+
+
+[Decimal numeric character
+references](@)
+consist of `&#` + a string of 1--7 arabic digits + `;`. A
+numeric character reference is parsed as the corresponding
+Unicode character. Invalid Unicode code points will be replaced by
+the REPLACEMENT CHARACTER (`U+FFFD`).  For security reasons,
+the code point `U+0000` will also be replaced by `U+FFFD`.
+
+```````````````````````````````` example
+&#35; &#1234; &#992; &#0;
+.
+<p># Ӓ Ϡ �</p>
+````````````````````````````````
+
+
+[Hexadecimal numeric character
+references](@) consist of `&#` +
+either `X` or `x` + a string of 1-6 hexadecimal digits + `;`.
+They too are parsed as the corresponding Unicode character (this
+time specified with a hexadecimal numeral instead of decimal).
+
+```````````````````````````````` example
+&#X22; &#XD06; &#xcab;
+.
+<p>&quot; ആ ಫ</p>
+````````````````````````````````
+
+
+Here are some nonentities:
+
+```````````````````````````````` example
+&nbsp &x; &#; &#x;
+&#87654321;
+&#abcdef0;
+&ThisIsNotDefined; &hi?;
+.
+<p>&amp;nbsp &amp;x; &amp;#; &amp;#x;
+&amp;#87654321;
+&amp;#abcdef0;
+&amp;ThisIsNotDefined; &amp;hi?;</p>
+````````````````````````````````
+
+
+Although HTML5 does accept some entity references
+without a trailing semicolon (such as `&copy`), these are not
+recognized here, because it makes the grammar too ambiguous:
+
+```````````````````````````````` example
+&copy
+.
+<p>&amp;copy</p>
+````````````````````````````````
+
+
+Strings that are not on the list of HTML5 named entities are not
+recognized as entity references either:
+
+```````````````````````````````` example
+&MadeUpEntity;
+.
+<p>&amp;MadeUpEntity;</p>
+````````````````````````````````
+
+
+Entity and numeric character references are recognized in any
+context besides code spans or code blocks, including
+URLs, [link titles], and [fenced code block][] [info strings]:
+
+```````````````````````````````` example
+<a href="&ouml;&ouml;.html">
+.
+<a href="&ouml;&ouml;.html">
+````````````````````````````````
+
+
+```````````````````````````````` example
+[foo](/f&ouml;&ouml; "f&ouml;&ouml;")
+.
+<p><a href="/f%C3%B6%C3%B6" title="föö">foo</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[foo]
+
+[foo]: /f&ouml;&ouml; "f&ouml;&ouml;"
+.
+<p><a href="/f%C3%B6%C3%B6" title="föö">foo</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+``` f&ouml;&ouml;
+foo
+```
+.
+<pre><code class="language-föö">foo
+</code></pre>
+````````````````````````````````
+
+
+Entity and numeric character references are treated as literal
+text in code spans and code blocks:
+
+```````````````````````````````` example
+`f&ouml;&ouml;`
+.
+<p><code>f&amp;ouml;&amp;ouml;</code></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+    f&ouml;f&ouml;
+.
+<pre><code>f&amp;ouml;f&amp;ouml;
+</code></pre>
+````````````````````````````````
+
+
+Entity and numeric character references cannot be used
+in place of symbols indicating structure in CommonMark
+documents.
+
+```````````````````````````````` example
+&#42;foo&#42;
+*foo*
+.
+<p>*foo*
+<em>foo</em></p>
+````````````````````````````````
+
+```````````````````````````````` example
+&#42; foo
+
+* foo
+.
+<p>* foo</p>
+<ul>
+<li>foo</li>
+</ul>
+````````````````````````````````
+
+```````````````````````````````` example
+foo&#10;&#10;bar
+.
+<p>foo
+
+bar</p>
+````````````````````````````````
+
+```````````````````````````````` example
+&#9;foo
+.
+<p>→foo</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[a](url &quot;tit&quot;)
+.
+<p>[a](url &quot;tit&quot;)</p>
+````````````````````````````````
+
+
+
+# Blocks and inlines
+
+We can think of a document as a sequence of
+[blocks](@)---structural elements like paragraphs, block
+quotations, lists, headings, rules, and code blocks.  Some blocks (like
+block quotes and list items) contain other blocks; others (like
+headings and paragraphs) contain [inline](@) content---text,
+links, emphasized text, images, code spans, and so on.
+
+## Precedence
+
+Indicators of block structure always take precedence over indicators
+of inline structure.  So, for example, the following is a list with
+two items, not a list with one item containing a code span:
+
+```````````````````````````````` example
+- `one
+- two`
+.
+<ul>
+<li>`one</li>
+<li>two`</li>
+</ul>
+````````````````````````````````
+
+
+This means that parsing can proceed in two steps:  first, the block
+structure of the document can be discerned; second, text lines inside
+paragraphs, headings, and other block constructs can be parsed for inline
+structure.  The second step requires information about link reference
+definitions that will be available only at the end of the first
+step.  Note that the first step requires processing lines in sequence,
+but the second can be parallelized, since the inline parsing of
+one block element does not affect the inline parsing of any other.
+
+## Container blocks and leaf blocks
+
+We can divide blocks into two types:
+[container blocks](#container-blocks),
+which can contain other blocks, and [leaf blocks](#leaf-blocks),
+which cannot.
+
+# Leaf blocks
+
+This section describes the different kinds of leaf block that make up a
+Markdown document.
+
+## Thematic breaks
+
+A line consisting of optionally up to three spaces of indentation, followed by a
+sequence of three or more matching `-`, `_`, or `*` characters, each followed
+optionally by any number of spaces or tabs, forms a
+[thematic break](@).
+
+```````````````````````````````` example
+***
+---
+___
+.
+<hr />
+<hr />
+<hr />
+````````````````````````````````
+
+
+Wrong characters:
+
+```````````````````````````````` example
++++
+.
+<p>+++</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+===
+.
+<p>===</p>
+````````````````````````````````
+
+
+Not enough characters:
+
+```````````````````````````````` example
+--
+**
+__
+.
+<p>--
+**
+__</p>
+````````````````````````````````
+
+
+Up to three spaces of indentation are allowed:
+
+```````````````````````````````` example
+ ***
+  ***
+   ***
+.
+<hr />
+<hr />
+<hr />
+````````````````````````````````
+
+
+Four spaces of indentation is too many:
+
+```````````````````````````````` example
+    ***
+.
+<pre><code>***
+</code></pre>
+````````````````````````````````
+
+
+```````````````````````````````` example
+Foo
+    ***
+.
+<p>Foo
+***</p>
+````````````````````````````````
+
+
+More than three characters may be used:
+
+```````````````````````````````` example
+_____________________________________
+.
+<hr />
+````````````````````````````````
+
+
+Spaces and tabs are allowed between the characters:
+
+```````````````````````````````` example
+ - - -
+.
+<hr />
+````````````````````````````````
+
+
+```````````````````````````````` example
+ **  * ** * ** * **
+.
+<hr />
+````````````````````````````````
+
+
+```````````````````````````````` example
+-     -      -      -
+.
+<hr />
+````````````````````````````````
+
+
+Spaces and tabs are allowed at the end:
+
+```````````````````````````````` example
+- - - -    
+.
+<hr />
+````````````````````````````````
+
+
+However, no other characters may occur in the line:
+
+```````````````````````````````` example
+_ _ _ _ a
+
+a------
+
+---a---
+.
+<p>_ _ _ _ a</p>
+<p>a------</p>
+<p>---a---</p>
+````````````````````````````````
+
+
+It is required that all of the characters other than spaces or tabs be the same.
+So, this is not a thematic break:
+
+```````````````````````````````` example
+ *-*
+.
+<p><em>-</em></p>
+````````````````````````````````
+
+
+Thematic breaks do not need blank lines before or after:
+
+```````````````````````````````` example
+- foo
+***
+- bar
+.
+<ul>
+<li>foo</li>
+</ul>
+<hr />
+<ul>
+<li>bar</li>
+</ul>
+````````````````````````````````
+
+
+Thematic breaks can interrupt a paragraph:
+
+```````````````````````````````` example
+Foo
+***
+bar
+.
+<p>Foo</p>
+<hr />
+<p>bar</p>
+````````````````````````````````
+
+
+If a line of dashes that meets the above conditions for being a
+thematic break could also be interpreted as the underline of a [setext
+heading], the interpretation as a
+[setext heading] takes precedence. Thus, for example,
+this is a setext heading, not a paragraph followed by a thematic break:
+
+```````````````````````````````` example
+Foo
+---
+bar
+.
+<h2>Foo</h2>
+<p>bar</p>
+````````````````````````````````
+
+
+When both a thematic break and a list item are possible
+interpretations of a line, the thematic break takes precedence:
+
+```````````````````````````````` example
+* Foo
+* * *
+* Bar
+.
+<ul>
+<li>Foo</li>
+</ul>
+<hr />
+<ul>
+<li>Bar</li>
+</ul>
+````````````````````````````````
+
+
+If you want a thematic break in a list item, use a different bullet:
+
+```````````````````````````````` example
+- Foo
+- * * *
+.
+<ul>
+<li>Foo</li>
+<li>
+<hr />
+</li>
+</ul>
+````````````````````````````````
+
+
+## ATX headings
+
+An [ATX heading](@)
+consists of a string of characters, parsed as inline content, between an
+opening sequence of 1--6 unescaped `#` characters and an optional
+closing sequence of any number of unescaped `#` characters.
+The opening sequence of `#` characters must be followed by spaces or tabs, or
+by the end of line. The optional closing sequence of `#`s must be preceded by
+spaces or tabs and may be followed by spaces or tabs only.  The opening
+`#` character may be preceded by up to three spaces of indentation.  The raw
+contents of the heading are stripped of leading and trailing space or tabs
+before being parsed as inline content.  The heading level is equal to the number
+of `#` characters in the opening sequence.
+
+Simple headings:
+
+```````````````````````````````` example
+# foo
+## foo
+### foo
+#### foo
+##### foo
+###### foo
+.
+<h1>foo</h1>
+<h2>foo</h2>
+<h3>foo</h3>
+<h4>foo</h4>
+<h5>foo</h5>
+<h6>foo</h6>
+````````````````````````````````
+
+
+More than six `#` characters is not a heading:
+
+```````````````````````````````` example
+####### foo
+.
+<p>####### foo</p>
+````````````````````````````````
+
+
+At least one space or tab is required between the `#` characters and the
+heading's contents, unless the heading is empty.  Note that many
+implementations currently do not require the space.  However, the
+space was required by the
+[original ATX implementation](http://www.aaronsw.com/2002/atx/atx.py),
+and it helps prevent things like the following from being parsed as
+headings:
+
+```````````````````````````````` example
+#5 bolt
+
+#hashtag
+.
+<p>#5 bolt</p>
+<p>#hashtag</p>
+````````````````````````````````
+
+
+This is not a heading, because the first `#` is escaped:
+
+```````````````````````````````` example
+\## foo
+.
+<p>## foo</p>
+````````````````````````````````
+
+
+Contents are parsed as inlines:
+
+```````````````````````````````` example
+# foo *bar* \*baz\*
+.
+<h1>foo <em>bar</em> *baz*</h1>
+````````````````````````````````
+
+
+Leading and trailing spaces or tabs are ignored in parsing inline content:
+
+```````````````````````````````` example
+#                  foo                     
+.
+<h1>foo</h1>
+````````````````````````````````
+
+
+Up to three spaces of indentation are allowed:
+
+```````````````````````````````` example
+ ### foo
+  ## foo
+   # foo
+.
+<h3>foo</h3>
+<h2>foo</h2>
+<h1>foo</h1>
+````````````````````````````````
+
+
+Four spaces of indentation is too many:
+
+```````````````````````````````` example
+    # foo
+.
+<pre><code># foo
+</code></pre>
+````````````````````````````````
+
+
+```````````````````````````````` example
+foo
+    # bar
+.
+<p>foo
+# bar</p>
+````````````````````````````````
+
+
+A closing sequence of `#` characters is optional:
+
+```````````````````````````````` example
+## foo ##
+  ###   bar    ###
+.
+<h2>foo</h2>
+<h3>bar</h3>
+````````````````````````````````
+
+
+It need not be the same length as the opening sequence:
+
+```````````````````````````````` example
+# foo ##################################
+##### foo ##
+.
+<h1>foo</h1>
+<h5>foo</h5>
+````````````````````````````````
+
+
+Spaces or tabs are allowed after the closing sequence:
+
+```````````````````````````````` example
+### foo ###     
+.
+<h3>foo</h3>
+````````````````````````````````
+
+
+A sequence of `#` characters with anything but spaces or tabs following it
+is not a closing sequence, but counts as part of the contents of the
+heading:
+
+```````````````````````````````` example
+### foo ### b
+.
+<h3>foo ### b</h3>
+````````````````````````````````
+
+
+The closing sequence must be preceded by a space or tab:
+
+```````````````````````````````` example
+# foo#
+.
+<h1>foo#</h1>
+````````````````````````````````
+
+
+Backslash-escaped `#` characters do not count as part
+of the closing sequence:
+
+```````````````````````````````` example
+### foo \###
+## foo #\##
+# foo \#
+.
+<h3>foo ###</h3>
+<h2>foo ###</h2>
+<h1>foo #</h1>
+````````````````````````````````
+
+
+ATX headings need not be separated from surrounding content by blank
+lines, and they can interrupt paragraphs:
+
+```````````````````````````````` example
+****
+## foo
+****
+.
+<hr />
+<h2>foo</h2>
+<hr />
+````````````````````````````````
+
+
+```````````````````````````````` example
+Foo bar
+# baz
+Bar foo
+.
+<p>Foo bar</p>
+<h1>baz</h1>
+<p>Bar foo</p>
+````````````````````````````````
+
+
+ATX headings can be empty:
+
+```````````````````````````````` example
+## 
+#
+### ###
+.
+<h2></h2>
+<h1></h1>
+<h3></h3>
+````````````````````````````````
+
+
+## Setext headings
+
+A [setext heading](@) consists of one or more
+lines of text, not interrupted by a blank line, of which the first line does not
+have more than 3 spaces of indentation, followed by
+a [setext heading underline].  The lines of text must be such
+that, were they not followed by the setext heading underline,
+they would be interpreted as a paragraph:  they cannot be
+interpretable as a [code fence], [ATX heading][ATX headings],
+[block quote][block quotes], [thematic break][thematic breaks],
+[list item][list items], or [HTML block][HTML blocks].
+
+A [setext heading underline](@) is a sequence of
+`=` characters or a sequence of `-` characters, with no more than 3
+spaces of indentation and any number of trailing spaces or tabs.  If a line
+containing a single `-` can be interpreted as an
+empty [list items], it should be interpreted this way
+and not as a [setext heading underline].
+
+The heading is a level 1 heading if `=` characters are used in
+the [setext heading underline], and a level 2 heading if `-`
+characters are used.  The contents of the heading are the result
+of parsing the preceding lines of text as CommonMark inline
+content.
+
+In general, a setext heading need not be preceded or followed by a
+blank line.  However, it cannot interrupt a paragraph, so when a
+setext heading comes after a paragraph, a blank line is needed between
+them.
+
+Simple examples:
+
+```````````````````````````````` example
+Foo *bar*
+=========
+
+Foo *bar*
+---------
+.
+<h1>Foo <em>bar</em></h1>
+<h2>Foo <em>bar</em></h2>
+````````````````````````````````
+
+
+The content of the header may span more than one line:
+
+```````````````````````````````` example
+Foo *bar
+baz*
+====
+.
+<h1>Foo <em>bar
+baz</em></h1>
+````````````````````````````````
+
+The contents are the result of parsing the headings's raw
+content as inlines.  The heading's raw content is formed by
+concatenating the lines and removing initial and final
+spaces or tabs.
+
+```````````````````````````````` example
+  Foo *bar
+baz*→
+====
+.
+<h1>Foo <em>bar
+baz</em></h1>
+````````````````````````````````
+
+
+The underlining can be any length:
+
+```````````````````````````````` example
+Foo
+-------------------------
+
+Foo
+=
+.
+<h2>Foo</h2>
+<h1>Foo</h1>
+````````````````````````````````
+
+
+The heading content can be preceded by up to three spaces of indentation, and
+need not line up with the underlining:
+
+```````````````````````````````` example
+   Foo
+---
+
+  Foo
+-----
+
+  Foo
+  ===
+.
+<h2>Foo</h2>
+<h2>Foo</h2>
+<h1>Foo</h1>
+````````````````````````````````
+
+
+Four spaces of indentation is too many:
+
+```````````````````````````````` example
+    Foo
+    ---
+
+    Foo
+---
+.
+<pre><code>Foo
+---
+
+Foo
+</code></pre>
+<hr />
+````````````````````````````````
+
+
+The setext heading underline can be preceded by up to three spaces of
+indentation, and may have trailing spaces or tabs:
+
+```````````````````````````````` example
+Foo
+   ----      
+.
+<h2>Foo</h2>
+````````````````````````````````
+
+
+Four spaces of indentation is too many:
+
+```````````````````````````````` example
+Foo
+    ---
+.
+<p>Foo
+---</p>
+````````````````````````````````
+
+
+The setext heading underline cannot contain internal spaces or tabs:
+
+```````````````````````````````` example
+Foo
+= =
+
+Foo
+--- -
+.
+<p>Foo
+= =</p>
+<p>Foo</p>
+<hr />
+````````````````````````````````
+
+
+Trailing spaces or tabs in the content line do not cause a hard line break:
+
+```````````````````````````````` example
+Foo  
+-----
+.
+<h2>Foo</h2>
+````````````````````````````````
+
+
+Nor does a backslash at the end:
+
+```````````````````````````````` example
+Foo\
+----
+.
+<h2>Foo\</h2>
+````````````````````````````````
+
+
+Since indicators of block structure take precedence over
+indicators of inline structure, the following are setext headings:
+
+```````````````````````````````` example
+`Foo
+----
+`
+
+<a title="a lot
+---
+of dashes"/>
+.
+<h2>`Foo</h2>
+<p>`</p>
+<h2>&lt;a title=&quot;a lot</h2>
+<p>of dashes&quot;/&gt;</p>
+````````````````````````````````
+
+
+The setext heading underline cannot be a [lazy continuation
+line] in a list item or block quote:
+
+```````````````````````````````` example
+> Foo
+---
+.
+<blockquote>
+<p>Foo</p>
+</blockquote>
+<hr />
+````````````````````````````````
+
+
+```````````````````````````````` example
+> foo
+bar
+===
+.
+<blockquote>
+<p>foo
+bar
+===</p>
+</blockquote>
+````````````````````````````````
+
+
+```````````````````````````````` example
+- Foo
+---
+.
+<ul>
+<li>Foo</li>
+</ul>
+<hr />
+````````````````````````````````
+
+
+A blank line is needed between a paragraph and a following
+setext heading, since otherwise the paragraph becomes part
+of the heading's content:
+
+```````````````````````````````` example
+Foo
+Bar
+---
+.
+<h2>Foo
+Bar</h2>
+````````````````````````````````
+
+
+But in general a blank line is not required before or after
+setext headings:
+
+```````````````````````````````` example
+---
+Foo
+---
+Bar
+---
+Baz
+.
+<hr />
+<h2>Foo</h2>
+<h2>Bar</h2>
+<p>Baz</p>
+````````````````````````````````
+
+
+Setext headings cannot be empty:
+
+```````````````````````````````` example
+
+====
+.
+<p>====</p>
+````````````````````````````````
+
+
+Setext heading text lines must not be interpretable as block
+constructs other than paragraphs.  So, the line of dashes
+in these examples gets interpreted as a thematic break:
+
+```````````````````````````````` example
+---
+---
+.
+<hr />
+<hr />
+````````````````````````````````
+
+
+```````````````````````````````` example
+- foo
+-----
+.
+<ul>
+<li>foo</li>
+</ul>
+<hr />
+````````````````````````````````
+
+
+```````````````````````````````` example
+    foo
+---
+.
+<pre><code>foo
+</code></pre>
+<hr />
+````````````````````````````````
+
+
+```````````````````````````````` example
+> foo
+-----
+.
+<blockquote>
+<p>foo</p>
+</blockquote>
+<hr />
+````````````````````````````````
+
+
+If you want a heading with `> foo` as its literal text, you can
+use backslash escapes:
+
+```````````````````````````````` example
+\> foo
+------
+.
+<h2>&gt; foo</h2>
+````````````````````````````````
+
+
+**Compatibility note:**  Most existing Markdown implementations
+do not allow the text of setext headings to span multiple lines.
+But there is no consensus about how to interpret
+
+``` markdown
+Foo
+bar
+---
+baz
+```
+
+One can find four different interpretations:
+
+1. paragraph "Foo", heading "bar", paragraph "baz"
+2. paragraph "Foo bar", thematic break, paragraph "baz"
+3. paragraph "Foo bar --- baz"
+4. heading "Foo bar", paragraph "baz"
+
+We find interpretation 4 most natural, and interpretation 4
+increases the expressive power of CommonMark, by allowing
+multiline headings.  Authors who want interpretation 1 can
+put a blank line after the first paragraph:
+
+```````````````````````````````` example
+Foo
+
+bar
+---
+baz
+.
+<p>Foo</p>
+<h2>bar</h2>
+<p>baz</p>
+````````````````````````````````
+
+
+Authors who want interpretation 2 can put blank lines around
+the thematic break,
+
+```````````````````````````````` example
+Foo
+bar
+
+---
+
+baz
+.
+<p>Foo
+bar</p>
+<hr />
+<p>baz</p>
+````````````````````````````````
+
+
+or use a thematic break that cannot count as a [setext heading
+underline], such as
+
+```````````````````````````````` example
+Foo
+bar
+* * *
+baz
+.
+<p>Foo
+bar</p>
+<hr />
+<p>baz</p>
+````````````````````````````````
+
+
+Authors who want interpretation 3 can use backslash escapes:
+
+```````````````````````````````` example
+Foo
+bar
+\---
+baz
+.
+<p>Foo
+bar
+---
+baz</p>
+````````````````````````````````
+
+
+## Indented code blocks
+
+An [indented code block](@) is composed of one or more
+[indented chunks] separated by blank lines.
+An [indented chunk](@) is a sequence of non-blank lines,
+each preceded by four or more spaces of indentation. The contents of the code
+block are the literal contents of the lines, including trailing
+[line endings], minus four spaces of indentation.
+An indented code block has no [info string].
+
+An indented code block cannot interrupt a paragraph, so there must be
+a blank line between a paragraph and a following indented code block.
+(A blank line is not needed, however, between a code block and a following
+paragraph.)
+
+```````````````````````````````` example
+    a simple
+      indented code block
+.
+<pre><code>a simple
+  indented code block
+</code></pre>
+````````````````````````````````
+
+
+If there is any ambiguity between an interpretation of indentation
+as a code block and as indicating that material belongs to a [list
+item][list items], the list item interpretation takes precedence:
+
+```````````````````````````````` example
+  - foo
+
+    bar
+.
+<ul>
+<li>
+<p>foo</p>
+<p>bar</p>
+</li>
+</ul>
+````````````````````````````````
+
+
+```````````````````````````````` example
+1.  foo
+
+    - bar
+.
+<ol>
+<li>
+<p>foo</p>
+<ul>
+<li>bar</li>
+</ul>
+</li>
+</ol>
+````````````````````````````````
+
+
+
+The contents of a code block are literal text, and do not get parsed
+as Markdown:
+
+```````````````````````````````` example
+    <a/>
+    *hi*
+
+    - one
+.
+<pre><code>&lt;a/&gt;
+*hi*
+
+- one
+</code></pre>
+````````````````````````````````
+
+
+Here we have three chunks separated by blank lines:
+
+```````````````````````````````` example
+    chunk1
+
+    chunk2
+  
+ 
+ 
+    chunk3
+.
+<pre><code>chunk1
+
+chunk2
+
+
+
+chunk3
+</code></pre>
+````````````````````````````````
+
+
+Any initial spaces or tabs beyond four spaces of indentation will be included in
+the content, even in interior blank lines:
+
+```````````````````````````````` example
+    chunk1
+      
+      chunk2
+.
+<pre><code>chunk1
+  
+  chunk2
+</code></pre>
+````````````````````````````````
+
+
+An indented code block cannot interrupt a paragraph.  (This
+allows hanging indents and the like.)
+
+```````````````````````````````` example
+Foo
+    bar
+
+.
+<p>Foo
+bar</p>
+````````````````````````````````
+
+
+However, any non-blank line with fewer than four spaces of indentation ends
+the code block immediately.  So a paragraph may occur immediately
+after indented code:
+
+```````````````````````````````` example
+    foo
+bar
+.
+<pre><code>foo
+</code></pre>
+<p>bar</p>
+````````````````````````````````
+
+
+And indented code can occur immediately before and after other kinds of
+blocks:
+
+```````````````````````````````` example
+# Heading
+    foo
+Heading
+------
+    foo
+----
+.
+<h1>Heading</h1>
+<pre><code>foo
+</code></pre>
+<h2>Heading</h2>
+<pre><code>foo
+</code></pre>
+<hr />
+````````````````````````````````
+
+
+The first line can be preceded by more than four spaces of indentation:
+
+```````````````````````````````` example
+        foo
+    bar
+.
+<pre><code>    foo
+bar
+</code></pre>
+````````````````````````````````
+
+
+Blank lines preceding or following an indented code block
+are not included in it:
+
+```````````````````````````````` example
+
+    
+    foo
+    
+
+.
+<pre><code>foo
+</code></pre>
+````````````````````````````````
+
+
+Trailing spaces or tabs are included in the code block's content:
+
+```````````````````````````````` example
+    foo  
+.
+<pre><code>foo  
+</code></pre>
+````````````````````````````````
+
+
+
+## Fenced code blocks
+
+A [code fence](@) is a sequence
+of at least three consecutive backtick characters (`` ` ``) or
+tildes (`~`).  (Tildes and backticks cannot be mixed.)
+A [fenced code block](@)
+begins with a code fence, preceded by up to three spaces of indentation.
+
+The line with the opening code fence may optionally contain some text
+following the code fence; this is trimmed of leading and trailing
+spaces or tabs and called the [info string](@). If the [info string] comes
+after a backtick fence, it may not contain any backtick
+characters.  (The reason for this restriction is that otherwise
+some inline code would be incorrectly interpreted as the
+beginning of a fenced code block.)
+
+The content of the code block consists of all subsequent lines, until
+a closing [code fence] of the same type as the code block
+began with (backticks or tildes), and with at least as many backticks
+or tildes as the opening code fence.  If the leading code fence is
+preceded by N spaces of indentation, then up to N spaces of indentation are
+removed from each line of the content (if present).  (If a content line is not
+indented, it is preserved unchanged.  If it is indented N spaces or less, all
+of the indentation is removed.)
+
+The closing code fence may be preceded by up to three spaces of indentation, and
+may be followed only by spaces or tabs, which are ignored.  If the end of the
+containing block (or document) is reached and no closing code fence
+has been found, the code block contains all of the lines after the
+opening code fence until the end of the containing block (or
+document).  (An alternative spec would require backtracking in the
+event that a closing code fence is not found.  But this makes parsing
+much less efficient, and there seems to be no real down side to the
+behavior described here.)
+
+A fenced code block may interrupt a paragraph, and does not require
+a blank line either before or after.
+
+The content of a code fence is treated as literal text, not parsed
+as inlines.  The first word of the [info string] is typically used to
+specify the language of the code sample, and rendered in the `class`
+attribute of the `code` tag.  However, this spec does not mandate any
+particular treatment of the [info string].
+
+Here is a simple example with backticks:
+
+```````````````````````````````` example
+```
+<
+ >
+```
+.
+<pre><code>&lt;
+ &gt;
+</code></pre>
+````````````````````````````````
+
+
+With tildes:
+
+```````````````````````````````` example
+~~~
+<
+ >
+~~~
+.
+<pre><code>&lt;
+ &gt;
+</code></pre>
+````````````````````````````````
+
+Fewer than three backticks is not enough:
+
+```````````````````````````````` example
+``
+foo
+``
+.
+<p><code>foo</code></p>
+````````````````````````````````
+
+The closing code fence must use the same character as the opening
+fence:
+
+```````````````````````````````` example
+```
+aaa
+~~~
+```
+.
+<pre><code>aaa
+~~~
+</code></pre>
+````````````````````````````````
+
+
+```````````````````````````````` example
+~~~
+aaa
+```
+~~~
+.
+<pre><code>aaa
+```
+</code></pre>
+````````````````````````````````
+
+
+The closing code fence must be at least as long as the opening fence:
+
+```````````````````````````````` example
+````
+aaa
+```
+``````
+.
+<pre><code>aaa
+```
+</code></pre>
+````````````````````````````````
+
+
+```````````````````````````````` example
+~~~~
+aaa
+~~~
+~~~~
+.
+<pre><code>aaa
+~~~
+</code></pre>
+````````````````````````````````
+
+
+Unclosed code blocks are closed by the end of the document
+(or the enclosing [block quote][block quotes] or [list item][list items]):
+
+```````````````````````````````` example
+```
+.
+<pre><code></code></pre>
+````````````````````````````````
+
+
+```````````````````````````````` example
+`````
+
+```
+aaa
+.
+<pre><code>
+```
+aaa
+</code></pre>
+````````````````````````````````
+
+
+```````````````````````````````` example
+> ```
+> aaa
+
+bbb
+.
+<blockquote>
+<pre><code>aaa
+</code></pre>
+</blockquote>
+<p>bbb</p>
+````````````````````````````````
+
+
+A code block can have all empty lines as its content:
+
+```````````````````````````````` example
+```
+
+  
+```
+.
+<pre><code>
+  
+</code></pre>
+````````````````````````````````
+
+
+A code block can be empty:
+
+```````````````````````````````` example
+```
+```
+.
+<pre><code></code></pre>
+````````````````````````````````
+
+
+Fences can be indented.  If the opening fence is indented,
+content lines will have equivalent opening indentation removed,
+if present:
+
+```````````````````````````````` example
+ ```
+ aaa
+aaa
+```
+.
+<pre><code>aaa
+aaa
+</code></pre>
+````````````````````````````````
+
+
+```````````````````````````````` example
+  ```
+aaa
+  aaa
+aaa
+  ```
+.
+<pre><code>aaa
+aaa
+aaa
+</code></pre>
+````````````````````````````````
+
+
+```````````````````````````````` example
+   ```
+   aaa
+    aaa
+  aaa
+   ```
+.
+<pre><code>aaa
+ aaa
+aaa
+</code></pre>
+````````````````````````````````
+
+
+Four spaces of indentation is too many:
+
+```````````````````````````````` example
+    ```
+    aaa
+    ```
+.
+<pre><code>```
+aaa
+```
+</code></pre>
+````````````````````````````````
+
+
+Closing fences may be preceded by up to three spaces of indentation, and their
+indentation need not match that of the opening fence:
+
+```````````````````````````````` example
+```
+aaa
+  ```
+.
+<pre><code>aaa
+</code></pre>
+````````````````````````````````
+
+
+```````````````````````````````` example
+   ```
+aaa
+  ```
+.
+<pre><code>aaa
+</code></pre>
+````````````````````````````````
+
+
+This is not a closing fence, because it is indented 4 spaces:
+
+```````````````````````````````` example
+```
+aaa
+    ```
+.
+<pre><code>aaa
+    ```
+</code></pre>
+````````````````````````````````
+
+
+
+Code fences (opening and closing) cannot contain internal spaces or tabs:
+
+```````````````````````````````` example
+``` ```
+aaa
+.
+<p><code> </code>
+aaa</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+~~~~~~
+aaa
+~~~ ~~
+.
+<pre><code>aaa
+~~~ ~~
+</code></pre>
+````````````````````````````````
+
+
+Fenced code blocks can interrupt paragraphs, and can be followed
+directly by paragraphs, without a blank line between:
+
+```````````````````````````````` example
+foo
+```
+bar
+```
+baz
+.
+<p>foo</p>
+<pre><code>bar
+</code></pre>
+<p>baz</p>
+````````````````````````````````
+
+
+Other blocks can also occur before and after fenced code blocks
+without an intervening blank line:
+
+```````````````````````````````` example
+foo
+---
+~~~
+bar
+~~~
+# baz
+.
+<h2>foo</h2>
+<pre><code>bar
+</code></pre>
+<h1>baz</h1>
+````````````````````````````````
+
+
+An [info string] can be provided after the opening code fence.
+Although this spec doesn't mandate any particular treatment of
+the info string, the first word is typically used to specify
+the language of the code block. In HTML output, the language is
+normally indicated by adding a class to the `code` element consisting
+of `language-` followed by the language name.
+
+```````````````````````````````` example
+```ruby
+def foo(x)
+  return 3
+end
+```
+.
+<pre><code class="language-ruby">def foo(x)
+  return 3
+end
+</code></pre>
+````````````````````````````````
+
+
+```````````````````````````````` example
+~~~~    ruby startline=3 $%@#$
+def foo(x)
+  return 3
+end
+~~~~~~~
+.
+<pre><code class="language-ruby">def foo(x)
+  return 3
+end
+</code></pre>
+````````````````````````````````
+
+
+```````````````````````````````` example
+````;
+````
+.
+<pre><code class="language-;"></code></pre>
+````````````````````````````````
+
+
+[Info strings] for backtick code blocks cannot contain backticks:
+
+```````````````````````````````` example
+``` aa ```
+foo
+.
+<p><code>aa</code>
+foo</p>
+````````````````````````````````
+
+
+[Info strings] for tilde code blocks can contain backticks and tildes:
+
+```````````````````````````````` example
+~~~ aa ``` ~~~
+foo
+~~~
+.
+<pre><code class="language-aa">foo
+</code></pre>
+````````````````````````````````
+
+
+Closing code fences cannot have [info strings]:
+
+```````````````````````````````` example
+```
+``` aaa
+```
+.
+<pre><code>``` aaa
+</code></pre>
+````````````````````````````````
+
+
+
+## HTML blocks
+
+An [HTML block](@) is a group of lines that is treated
+as raw HTML (and will not be escaped in HTML output).
+
+There are seven kinds of [HTML block], which can be defined by their
+start and end conditions.  The block begins with a line that meets a
+[start condition](@) (after up to three optional spaces of indentation).
+It ends with the first subsequent line that meets a matching
+[end condition](@), or the last line of the document, or the last line of
+the [container block](#container-blocks) containing the current HTML
+block, if no line is encountered that meets the [end condition].  If
+the first line meets both the [start condition] and the [end
+condition], the block will contain just that line.
+
+1.  **Start condition:**  line begins with the string `<pre`,
+`<script`, `<style`, or `<textarea` (case-insensitive), followed by a space,
+a tab, the string `>`, or the end of the line.\
+**End condition:**  line contains an end tag
+`</pre>`, `</script>`, `</style>`, or `</textarea>` (case-insensitive; it
+need not match the start tag).
+
+2.  **Start condition:** line begins with the string `<!--`.\
+**End condition:**  line contains the string `-->`.
+
+3.  **Start condition:** line begins with the string `<?`.\
+**End condition:** line contains the string `?>`.
+
+4.  **Start condition:** line begins with the string `<!`
+followed by an ASCII letter.\
+**End condition:** line contains the character `>`.
+
+5.  **Start condition:**  line begins with the string
+`<![CDATA[`.\
+**End condition:** line contains the string `]]>`.
+
+6.  **Start condition:** line begins the string `<` or `</`
+followed by one of the strings (case-insensitive) `address`,
+`article`, `aside`, `base`, `basefont`, `blockquote`, `body`,
+`caption`, `center`, `col`, `colgroup`, `dd`, `details`, `dialog`,
+`dir`, `div`, `dl`, `dt`, `fieldset`, `figcaption`, `figure`,
+`footer`, `form`, `frame`, `frameset`,
+`h1`, `h2`, `h3`, `h4`, `h5`, `h6`, `head`, `header`, `hr`,
+`html`, `iframe`, `legend`, `li`, `link`, `main`, `menu`, `menuitem`,
+`nav`, `noframes`, `ol`, `optgroup`, `option`, `p`, `param`,
+`section`, `source`, `summary`, `table`, `tbody`, `td`,
+`tfoot`, `th`, `thead`, `title`, `tr`, `track`, `ul`, followed
+by a space, a tab, the end of the line, the string `>`, or
+the string `/>`.\
+**End condition:** line is followed by a [blank line].
+
+7.  **Start condition:**  line begins with a complete [open tag]
+(with any [tag name] other than `pre`, `script`,
+`style`, or `textarea`) or a complete [closing tag],
+followed by zero or more spaces and tabs, followed by the end of the line.\
+**End condition:** line is followed by a [blank line].
+
+HTML blocks continue until they are closed by their appropriate
+[end condition], or the last line of the document or other [container
+block](#container-blocks).  This means any HTML **within an HTML
+block** that might otherwise be recognised as a start condition will
+be ignored by the parser and passed through as-is, without changing
+the parser's state.
+
+For instance, `<pre>` within an HTML block started by `<table>` will not affect
+the parser state; as the HTML block was started in by start condition 6, it
+will end at any blank line. This can be surprising:
+
+```````````````````````````````` example
+<table><tr><td>
+<pre>
+**Hello**,
+
+_world_.
+</pre>
+</td></tr></table>
+.
+<table><tr><td>
+<pre>
+**Hello**,
+<p><em>world</em>.
+</pre></p>
+</td></tr></table>
+````````````````````````````````
+
+In this case, the HTML block is terminated by the blank line — the `**Hello**`
+text remains verbatim — and regular parsing resumes, with a paragraph,
+emphasised `world` and inline and block HTML following.
+
+All types of [HTML blocks] except type 7 may interrupt
+a paragraph.  Blocks of type 7 may not interrupt a paragraph.
+(This restriction is intended to prevent unwanted interpretation
+of long tags inside a wrapped paragraph as starting HTML blocks.)
+
+Some simple examples follow.  Here are some basic HTML blocks
+of type 6:
+
+```````````````````````````````` example
+<table>
+  <tr>
+    <td>
+           hi
+    </td>
+  </tr>
+</table>
+
+okay.
+.
+<table>
+  <tr>
+    <td>
+           hi
+    </td>
+  </tr>
+</table>
+<p>okay.</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+ <div>
+  *hello*
+         <foo><a>
+.
+ <div>
+  *hello*
+         <foo><a>
+````````````````````````````````
+
+
+A block can also start with a closing tag:
+
+```````````````````````````````` example
+</div>
+*foo*
+.
+</div>
+*foo*
+````````````````````````````````
+
+
+Here we have two HTML blocks with a Markdown paragraph between them:
+
+```````````````````````````````` example
+<DIV CLASS="foo">
+
+*Markdown*
+
+</DIV>
+.
+<DIV CLASS="foo">
+<p><em>Markdown</em></p>
+</DIV>
+````````````````````````````````
+
+
+The tag on the first line can be partial, as long
+as it is split where there would be whitespace:
+
+```````````````````````````````` example
+<div id="foo"
+  class="bar">
+</div>
+.
+<div id="foo"
+  class="bar">
+</div>
+````````````````````````````````
+
+
+```````````````````````````````` example
+<div id="foo" class="bar
+  baz">
+</div>
+.
+<div id="foo" class="bar
+  baz">
+</div>
+````````````````````````````````
+
+
+An open tag need not be closed:
+```````````````````````````````` example
+<div>
+*foo*
+
+*bar*
+.
+<div>
+*foo*
+<p><em>bar</em></p>
+````````````````````````````````
+
+
+
+A partial tag need not even be completed (garbage
+in, garbage out):
+
+```````````````````````````````` example
+<div id="foo"
+*hi*
+.
+<div id="foo"
+*hi*
+````````````````````````````````
+
+
+```````````````````````````````` example
+<div class
+foo
+.
+<div class
+foo
+````````````````````````````````
+
+
+The initial tag doesn't even need to be a valid
+tag, as long as it starts like one:
+
+```````````````````````````````` example
+<div *???-&&&-<---
+*foo*
+.
+<div *???-&&&-<---
+*foo*
+````````````````````````````````
+
+
+In type 6 blocks, the initial tag need not be on a line by
+itself:
+
+```````````````````````````````` example
+<div><a href="bar">*foo*</a></div>
+.
+<div><a href="bar">*foo*</a></div>
+````````````````````````````````
+
+
+```````````````````````````````` example
+<table><tr><td>
+foo
+</td></tr></table>
+.
+<table><tr><td>
+foo
+</td></tr></table>
+````````````````````````````````
+
+
+Everything until the next blank line or end of document
+gets included in the HTML block.  So, in the following
+example, what looks like a Markdown code block
+is actually part of the HTML block, which continues until a blank
+line or the end of the document is reached:
+
+```````````````````````````````` example
+<div></div>
+``` c
+int x = 33;
+```
+.
+<div></div>
+``` c
+int x = 33;
+```
+````````````````````````````````
+
+
+To start an [HTML block] with a tag that is *not* in the
+list of block-level tags in (6), you must put the tag by
+itself on the first line (and it must be complete):
+
+```````````````````````````````` example
+<a href="foo">
+*bar*
+</a>
+.
+<a href="foo">
+*bar*
+</a>
+````````````````````````````````
+
+
+In type 7 blocks, the [tag name] can be anything:
+
+```````````````````````````````` example
+<Warning>
+*bar*
+</Warning>
+.
+<Warning>
+*bar*
+</Warning>
+````````````````````````````````
+
+
+```````````````````````````````` example
+<i class="foo">
+*bar*
+</i>
+.
+<i class="foo">
+*bar*
+</i>
+````````````````````````````````
+
+
+```````````````````````````````` example
+</ins>
+*bar*
+.
+</ins>
+*bar*
+````````````````````````````````
+
+
+These rules are designed to allow us to work with tags that
+can function as either block-level or inline-level tags.
+The `<del>` tag is a nice example.  We can surround content with
+`<del>` tags in three different ways.  In this case, we get a raw
+HTML block, because the `<del>` tag is on a line by itself:
+
+```````````````````````````````` example
+<del>
+*foo*
+</del>
+.
+<del>
+*foo*
+</del>
+````````````````````````````````
+
+
+In this case, we get a raw HTML block that just includes
+the `<del>` tag (because it ends with the following blank
+line).  So the contents get interpreted as CommonMark:
+
+```````````````````````````````` example
+<del>
+
+*foo*
+
+</del>
+.
+<del>
+<p><em>foo</em></p>
+</del>
+````````````````````````````````
+
+
+Finally, in this case, the `<del>` tags are interpreted
+as [raw HTML] *inside* the CommonMark paragraph.  (Because
+the tag is not on a line by itself, we get inline HTML
+rather than an [HTML block].)
+
+```````````````````````````````` example
+<del>*foo*</del>
+.
+<p><del><em>foo</em></del></p>
+````````````````````````````````
+
+
+HTML tags designed to contain literal content
+(`pre`, `script`, `style`, `textarea`), comments, processing instructions,
+and declarations are treated somewhat differently.
+Instead of ending at the first blank line, these blocks
+end at the first line containing a corresponding end tag.
+As a result, these blocks can contain blank lines:
+
+A pre tag (type 1):
+
+```````````````````````````````` example
+<pre language="haskell"><code>
+import Text.HTML.TagSoup
+
+main :: IO ()
+main = print $ parseTags tags
+</code></pre>
+okay
+.
+<pre language="haskell"><code>
+import Text.HTML.TagSoup
+
+main :: IO ()
+main = print $ parseTags tags
+</code></pre>
+<p>okay</p>
+````````````````````````````````
+
+
+A script tag (type 1):
+
+```````````````````````````````` example
+<script type="text/javascript">
+// JavaScript example
+
+document.getElementById("demo").innerHTML = "Hello JavaScript!";
+</script>
+okay
+.
+<script type="text/javascript">
+// JavaScript example
+
+document.getElementById("demo").innerHTML = "Hello JavaScript!";
+</script>
+<p>okay</p>
+````````````````````````````````
+
+
+A textarea tag (type 1):
+
+```````````````````````````````` example
+<textarea>
+
+*foo*
+
+_bar_
+
+</textarea>
+.
+<textarea>
+
+*foo*
+
+_bar_
+
+</textarea>
+````````````````````````````````
+
+A style tag (type 1):
+
+```````````````````````````````` example
+<style
+  type="text/css">
+h1 {color:red;}
+
+p {color:blue;}
+</style>
+okay
+.
+<style
+  type="text/css">
+h1 {color:red;}
+
+p {color:blue;}
+</style>
+<p>okay</p>
+````````````````````````````````
+
+
+If there is no matching end tag, the block will end at the
+end of the document (or the enclosing [block quote][block quotes]
+or [list item][list items]):
+
+```````````````````````````````` example
+<style
+  type="text/css">
+
+foo
+.
+<style
+  type="text/css">
+
+foo
+````````````````````````````````
+
+
+```````````````````````````````` example
+> <div>
+> foo
+
+bar
+.
+<blockquote>
+<div>
+foo
+</blockquote>
+<p>bar</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+- <div>
+- foo
+.
+<ul>
+<li>
+<div>
+</li>
+<li>foo</li>
+</ul>
+````````````````````````````````
+
+
+The end tag can occur on the same line as the start tag:
+
+```````````````````````````````` example
+<style>p{color:red;}</style>
+*foo*
+.
+<style>p{color:red;}</style>
+<p><em>foo</em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+<!-- foo -->*bar*
+*baz*
+.
+<!-- foo -->*bar*
+<p><em>baz</em></p>
+````````````````````````````````
+
+
+Note that anything on the last line after the
+end tag will be included in the [HTML block]:
+
+```````````````````````````````` example
+<script>
+foo
+</script>1. *bar*
+.
+<script>
+foo
+</script>1. *bar*
+````````````````````````````````
+
+
+A comment (type 2):
+
+```````````````````````````````` example
+<!-- Foo
+
+bar
+   baz -->
+okay
+.
+<!-- Foo
+
+bar
+   baz -->
+<p>okay</p>
+````````````````````````````````
+
+
+
+A processing instruction (type 3):
+
+```````````````````````````````` example
+<?php
+
+  echo '>';
+
+?>
+okay
+.
+<?php
+
+  echo '>';
+
+?>
+<p>okay</p>
+````````````````````````````````
+
+
+A declaration (type 4):
+
+```````````````````````````````` example
+<!DOCTYPE html>
+.
+<!DOCTYPE html>
+````````````````````````````````
+
+
+CDATA (type 5):
+
+```````````````````````````````` example
+<![CDATA[
+function matchwo(a,b)
+{
+  if (a < b && a < 0) then {
+    return 1;
+
+  } else {
+
+    return 0;
+  }
+}
+]]>
+okay
+.
+<![CDATA[
+function matchwo(a,b)
+{
+  if (a < b && a < 0) then {
+    return 1;
+
+  } else {
+
+    return 0;
+  }
+}
+]]>
+<p>okay</p>
+````````````````````````````````
+
+
+The opening tag can be preceded by up to three spaces of indentation, but not
+four:
+
+```````````````````````````````` example
+  <!-- foo -->
+
+    <!-- foo -->
+.
+  <!-- foo -->
+<pre><code>&lt;!-- foo --&gt;
+</code></pre>
+````````````````````````````````
+
+
+```````````````````````````````` example
+  <div>
+
+    <div>
+.
+  <div>
+<pre><code>&lt;div&gt;
+</code></pre>
+````````````````````````````````
+
+
+An HTML block of types 1--6 can interrupt a paragraph, and need not be
+preceded by a blank line.
+
+```````````````````````````````` example
+Foo
+<div>
+bar
+</div>
+.
+<p>Foo</p>
+<div>
+bar
+</div>
+````````````````````````````````
+
+
+However, a following blank line is needed, except at the end of
+a document, and except for blocks of types 1--5, [above][HTML
+block]:
+
+```````````````````````````````` example
+<div>
+bar
+</div>
+*foo*
+.
+<div>
+bar
+</div>
+*foo*
+````````````````````````````````
+
+
+HTML blocks of type 7 cannot interrupt a paragraph:
+
+```````````````````````````````` example
+Foo
+<a href="bar">
+baz
+.
+<p>Foo
+<a href="bar">
+baz</p>
+````````````````````````````````
+
+
+This rule differs from John Gruber's original Markdown syntax
+specification, which says:
+
+> The only restrictions are that block-level HTML elements —
+> e.g. `<div>`, `<table>`, `<pre>`, `<p>`, etc. — must be separated from
+> surrounding content by blank lines, and the start and end tags of the
+> block should not be indented with spaces or tabs.
+
+In some ways Gruber's rule is more restrictive than the one given
+here:
+
+- It requires that an HTML block be preceded by a blank line.
+- It does not allow the start tag to be indented.
+- It requires a matching end tag, which it also does not allow to
+  be indented.
+
+Most Markdown implementations (including some of Gruber's own) do not
+respect all of these restrictions.
+
+There is one respect, however, in which Gruber's rule is more liberal
+than the one given here, since it allows blank lines to occur inside
+an HTML block.  There are two reasons for disallowing them here.
+First, it removes the need to parse balanced tags, which is
+expensive and can require backtracking from the end of the document
+if no matching end tag is found. Second, it provides a very simple
+and flexible way of including Markdown content inside HTML tags:
+simply separate the Markdown from the HTML using blank lines:
+
+Compare:
+
+```````````````````````````````` example
+<div>
+
+*Emphasized* text.
+
+</div>
+.
+<div>
+<p><em>Emphasized</em> text.</p>
+</div>
+````````````````````````````````
+
+
+```````````````````````````````` example
+<div>
+*Emphasized* text.
+</div>
+.
+<div>
+*Emphasized* text.
+</div>
+````````````````````````````````
+
+
+Some Markdown implementations have adopted a convention of
+interpreting content inside tags as text if the open tag has
+the attribute `markdown=1`.  The rule given above seems a simpler and
+more elegant way of achieving the same expressive power, which is also
+much simpler to parse.
+
+The main potential drawback is that one can no longer paste HTML
+blocks into Markdown documents with 100% reliability.  However,
+*in most cases* this will work fine, because the blank lines in
+HTML are usually followed by HTML block tags.  For example:
+
+```````````````````````````````` example
+<table>
+
+<tr>
+
+<td>
+Hi
+</td>
+
+</tr>
+
+</table>
+.
+<table>
+<tr>
+<td>
+Hi
+</td>
+</tr>
+</table>
+````````````````````````````````
+
+
+There are problems, however, if the inner tags are indented
+*and* separated by spaces, as then they will be interpreted as
+an indented code block:
+
+```````````````````````````````` example
+<table>
+
+  <tr>
+
+    <td>
+      Hi
+    </td>
+
+  </tr>
+
+</table>
+.
+<table>
+  <tr>
+<pre><code>&lt;td&gt;
+  Hi
+&lt;/td&gt;
+</code></pre>
+  </tr>
+</table>
+````````````````````````````````
+
+
+Fortunately, blank lines are usually not necessary and can be
+deleted.  The exception is inside `<pre>` tags, but as described
+[above][HTML blocks], raw HTML blocks starting with `<pre>`
+*can* contain blank lines.
+
+## Link reference definitions
+
+A [link reference definition](@)
+consists of a [link label], optionally preceded by up to three spaces of
+indentation, followed
+by a colon (`:`), optional spaces or tabs (including up to one
+[line ending]), a [link destination],
+optional spaces or tabs (including up to one
+[line ending]), and an optional [link
+title], which if it is present must be separated
+from the [link destination] by spaces or tabs.
+No further character may occur.
+
+A [link reference definition]
+does not correspond to a structural element of a document.  Instead, it
+defines a label which can be used in [reference links]
+and reference-style [images] elsewhere in the document.  [Link
+reference definitions] can come either before or after the links that use
+them.
+
+```````````````````````````````` example
+[foo]: /url "title"
+
+[foo]
+.
+<p><a href="/url" title="title">foo</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+   [foo]: 
+      /url  
+           'the title'  
+
+[foo]
+.
+<p><a href="/url" title="the title">foo</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[Foo*bar\]]:my_(url) 'title (with parens)'
+
+[Foo*bar\]]
+.
+<p><a href="my_(url)" title="title (with parens)">Foo*bar]</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[Foo bar]:
+<my url>
+'title'
+
+[Foo bar]
+.
+<p><a href="my%20url" title="title">Foo bar</a></p>
+````````````````````````````````
+
+
+The title may extend over multiple lines:
+
+```````````````````````````````` example
+[foo]: /url '
+title
+line1
+line2
+'
+
+[foo]
+.
+<p><a href="/url" title="
+title
+line1
+line2
+">foo</a></p>
+````````````````````````````````
+
+
+However, it may not contain a [blank line]:
+
+```````````````````````````````` example
+[foo]: /url 'title
+
+with blank line'
+
+[foo]
+.
+<p>[foo]: /url 'title</p>
+<p>with blank line'</p>
+<p>[foo]</p>
+````````````````````````````````
+
+
+The title may be omitted:
+
+```````````````````````````````` example
+[foo]:
+/url
+
+[foo]
+.
+<p><a href="/url">foo</a></p>
+````````````````````````````````
+
+
+The link destination may not be omitted:
+
+```````````````````````````````` example
+[foo]:
+
+[foo]
+.
+<p>[foo]:</p>
+<p>[foo]</p>
+````````````````````````````````
+
+ However, an empty link destination may be specified using
+ angle brackets:
+
+```````````````````````````````` example
+[foo]: <>
+
+[foo]
+.
+<p><a href="">foo</a></p>
+````````````````````````````````
+
+The title must be separated from the link destination by
+spaces or tabs:
+
+```````````````````````````````` example
+[foo]: <bar>(baz)
+
+[foo]
+.
+<p>[foo]: <bar>(baz)</p>
+<p>[foo]</p>
+````````````````````````````````
+
+
+Both title and destination can contain backslash escapes
+and literal backslashes:
+
+```````````````````````````````` example
+[foo]: /url\bar\*baz "foo\"bar\baz"
+
+[foo]
+.
+<p><a href="/url%5Cbar*baz" title="foo&quot;bar\baz">foo</a></p>
+````````````````````````````````
+
+
+A link can come before its corresponding definition:
+
+```````````````````````````````` example
+[foo]
+
+[foo]: url
+.
+<p><a href="url">foo</a></p>
+````````````````````````````````
+
+
+If there are several matching definitions, the first one takes
+precedence:
+
+```````````````````````````````` example
+[foo]
+
+[foo]: first
+[foo]: second
+.
+<p><a href="first">foo</a></p>
+````````````````````````````````
+
+
+As noted in the section on [Links], matching of labels is
+case-insensitive (see [matches]).
+
+```````````````````````````````` example
+[FOO]: /url
+
+[Foo]
+.
+<p><a href="/url">Foo</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[ΑΓΩ]: /φου
+
+[αγω]
+.
+<p><a href="/%CF%86%CE%BF%CF%85">αγω</a></p>
+````````````````````````````````
+
+
+Whether something is a [link reference definition] is
+independent of whether the link reference it defines is
+used in the document.  Thus, for example, the following
+document contains just a link reference definition, and
+no visible content:
+
+```````````````````````````````` example
+[foo]: /url
+.
+````````````````````````````````
+
+
+Here is another one:
+
+```````````````````````````````` example
+[
+foo
+]: /url
+bar
+.
+<p>bar</p>
+````````````````````````````````
+
+
+This is not a link reference definition, because there are
+characters other than spaces or tabs after the title:
+
+```````````````````````````````` example
+[foo]: /url "title" ok
+.
+<p>[foo]: /url &quot;title&quot; ok</p>
+````````````````````````````````
+
+
+This is a link reference definition, but it has no title:
+
+```````````````````````````````` example
+[foo]: /url
+"title" ok
+.
+<p>&quot;title&quot; ok</p>
+````````````````````````````````
+
+
+This is not a link reference definition, because it is indented
+four spaces:
+
+```````````````````````````````` example
+    [foo]: /url "title"
+
+[foo]
+.
+<pre><code>[foo]: /url &quot;title&quot;
+</code></pre>
+<p>[foo]</p>
+````````````````````````````````
+
+
+This is not a link reference definition, because it occurs inside
+a code block:
+
+```````````````````````````````` example
+```
+[foo]: /url
+```
+
+[foo]
+.
+<pre><code>[foo]: /url
+</code></pre>
+<p>[foo]</p>
+````````````````````````````````
+
+
+A [link reference definition] cannot interrupt a paragraph.
+
+```````````````````````````````` example
+Foo
+[bar]: /baz
+
+[bar]
+.
+<p>Foo
+[bar]: /baz</p>
+<p>[bar]</p>
+````````````````````````````````
+
+
+However, it can directly follow other block elements, such as headings
+and thematic breaks, and it need not be followed by a blank line.
+
+```````````````````````````````` example
+# [Foo]
+[foo]: /url
+> bar
+.
+<h1><a href="/url">Foo</a></h1>
+<blockquote>
+<p>bar</p>
+</blockquote>
+````````````````````````````````
+
+```````````````````````````````` example
+[foo]: /url
+bar
+===
+[foo]
+.
+<h1>bar</h1>
+<p><a href="/url">foo</a></p>
+````````````````````````````````
+
+```````````````````````````````` example
+[foo]: /url
+===
+[foo]
+.
+<p>===
+<a href="/url">foo</a></p>
+````````````````````````````````
+
+
+Several [link reference definitions]
+can occur one after another, without intervening blank lines.
+
+```````````````````````````````` example
+[foo]: /foo-url "foo"
+[bar]: /bar-url
+  "bar"
+[baz]: /baz-url
+
+[foo],
+[bar],
+[baz]
+.
+<p><a href="/foo-url" title="foo">foo</a>,
+<a href="/bar-url" title="bar">bar</a>,
+<a href="/baz-url">baz</a></p>
+````````````````````````````````
+
+
+[Link reference definitions] can occur
+inside block containers, like lists and block quotations.  They
+affect the entire document, not just the container in which they
+are defined:
+
+```````````````````````````````` example
+[foo]
+
+> [foo]: /url
+.
+<p><a href="/url">foo</a></p>
+<blockquote>
+</blockquote>
+````````````````````````````````
+
+
+## Paragraphs
+
+A sequence of non-blank lines that cannot be interpreted as other
+kinds of blocks forms a [paragraph](@).
+The contents of the paragraph are the result of parsing the
+paragraph's raw content as inlines.  The paragraph's raw content
+is formed by concatenating the lines and removing initial and final
+spaces or tabs.
+
+A simple example with two paragraphs:
+
+```````````````````````````````` example
+aaa
+
+bbb
+.
+<p>aaa</p>
+<p>bbb</p>
+````````````````````````````````
+
+
+Paragraphs can contain multiple lines, but no blank lines:
+
+```````````````````````````````` example
+aaa
+bbb
+
+ccc
+ddd
+.
+<p>aaa
+bbb</p>
+<p>ccc
+ddd</p>
+````````````````````````````````
+
+
+Multiple blank lines between paragraphs have no effect:
+
+```````````````````````````````` example
+aaa
+
+
+bbb
+.
+<p>aaa</p>
+<p>bbb</p>
+````````````````````````````````
+
+
+Leading spaces or tabs are skipped:
+
+```````````````````````````````` example
+  aaa
+ bbb
+.
+<p>aaa
+bbb</p>
+````````````````````````````````
+
+
+Lines after the first may be indented any amount, since indented
+code blocks cannot interrupt paragraphs.
+
+```````````````````````````````` example
+aaa
+             bbb
+                                       ccc
+.
+<p>aaa
+bbb
+ccc</p>
+````````````````````````````````
+
+
+However, the first line may be preceded by up to three spaces of indentation.
+Four spaces of indentation is too many:
+
+```````````````````````````````` example
+   aaa
+bbb
+.
+<p>aaa
+bbb</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+    aaa
+bbb
+.
+<pre><code>aaa
+</code></pre>
+<p>bbb</p>
+````````````````````````````````
+
+
+Final spaces or tabs are stripped before inline parsing, so a paragraph
+that ends with two or more spaces will not end with a [hard line
+break]:
+
+```````````````````````````````` example
+aaa     
+bbb     
+.
+<p>aaa<br />
+bbb</p>
+````````````````````````````````
+
+
+## Blank lines
+
+[Blank lines] between block-level elements are ignored,
+except for the role they play in determining whether a [list]
+is [tight] or [loose].
+
+Blank lines at the beginning and end of the document are also ignored.
+
+```````````````````````````````` example
+  
+
+aaa
+  
+
+# aaa
+
+  
+.
+<p>aaa</p>
+<h1>aaa</h1>
+````````````````````````````````
+
+
+
+# Container blocks
+
+A [container block](#container-blocks) is a block that has other
+blocks as its contents.  There are two basic kinds of container blocks:
+[block quotes] and [list items].
+[Lists] are meta-containers for [list items].
+
+We define the syntax for container blocks recursively.  The general
+form of the definition is:
+
+> If X is a sequence of blocks, then the result of
+> transforming X in such-and-such a way is a container of type Y
+> with these blocks as its content.
+
+So, we explain what counts as a block quote or list item by explaining
+how these can be *generated* from their contents. This should suffice
+to define the syntax, although it does not give a recipe for *parsing*
+these constructions.  (A recipe is provided below in the section entitled
+[A parsing strategy](#appendix-a-parsing-strategy).)
+
+## Block quotes
+
+A [block quote marker](@),
+optionally preceded by up to three spaces of indentation,
+consists of (a) the character `>` together with a following space of
+indentation, or (b) a single character `>` not followed by a space of
+indentation.
+
+The following rules define [block quotes]:
+
+1.  **Basic case.**  If a string of lines *Ls* constitute a sequence
+    of blocks *Bs*, then the result of prepending a [block quote
+    marker] to the beginning of each line in *Ls*
+    is a [block quote](#block-quotes) containing *Bs*.
+
+2.  **Laziness.**  If a string of lines *Ls* constitute a [block
+    quote](#block-quotes) with contents *Bs*, then the result of deleting
+    the initial [block quote marker] from one or
+    more lines in which the next character other than a space or tab after the
+    [block quote marker] is [paragraph continuation
+    text] is a block quote with *Bs* as its content.
+    [Paragraph continuation text](@) is text
+    that will be parsed as part of the content of a paragraph, but does
+    not occur at the beginning of the paragraph.
+
+3.  **Consecutiveness.**  A document cannot contain two [block
+    quotes] in a row unless there is a [blank line] between them.
+
+Nothing else counts as a [block quote](#block-quotes).
+
+Here is a simple example:
+
+```````````````````````````````` example
+> # Foo
+> bar
+> baz
+.
+<blockquote>
+<h1>Foo</h1>
+<p>bar
+baz</p>
+</blockquote>
+````````````````````````````````
+
+
+The space or tab after the `>` characters can be omitted:
+
+```````````````````````````````` example
+># Foo
+>bar
+> baz
+.
+<blockquote>
+<h1>Foo</h1>
+<p>bar
+baz</p>
+</blockquote>
+````````````````````````````````
+
+
+The `>` characters can be preceded by up to three spaces of indentation:
+
+```````````````````````````````` example
+   > # Foo
+   > bar
+ > baz
+.
+<blockquote>
+<h1>Foo</h1>
+<p>bar
+baz</p>
+</blockquote>
+````````````````````````````````
+
+
+Four spaces of indentation is too many:
+
+```````````````````````````````` example
+    > # Foo
+    > bar
+    > baz
+.
+<pre><code>&gt; # Foo
+&gt; bar
+&gt; baz
+</code></pre>
+````````````````````````````````
+
+
+The Laziness clause allows us to omit the `>` before
+[paragraph continuation text]:
+
+```````````````````````````````` example
+> # Foo
+> bar
+baz
+.
+<blockquote>
+<h1>Foo</h1>
+<p>bar
+baz</p>
+</blockquote>
+````````````````````````````````
+
+
+A block quote can contain some lazy and some non-lazy
+continuation lines:
+
+```````````````````````````````` example
+> bar
+baz
+> foo
+.
+<blockquote>
+<p>bar
+baz
+foo</p>
+</blockquote>
+````````````````````````````````
+
+
+Laziness only applies to lines that would have been continuations of
+paragraphs had they been prepended with [block quote markers].
+For example, the `> ` cannot be omitted in the second line of
+
+``` markdown
+> foo
+> ---
+```
+
+without changing the meaning:
+
+```````````````````````````````` example
+> foo
+---
+.
+<blockquote>
+<p>foo</p>
+</blockquote>
+<hr />
+````````````````````````````````
+
+
+Similarly, if we omit the `> ` in the second line of
+
+``` markdown
+> - foo
+> - bar
+```
+
+then the block quote ends after the first line:
+
+```````````````````````````````` example
+> - foo
+- bar
+.
+<blockquote>
+<ul>
+<li>foo</li>
+</ul>
+</blockquote>
+<ul>
+<li>bar</li>
+</ul>
+````````````````````````````````
+
+
+For the same reason, we can't omit the `> ` in front of
+subsequent lines of an indented or fenced code block:
+
+```````````````````````````````` example
+>     foo
+    bar
+.
+<blockquote>
+<pre><code>foo
+</code></pre>
+</blockquote>
+<pre><code>bar
+</code></pre>
+````````````````````````````````
+
+
+```````````````````````````````` example
+> ```
+foo
+```
+.
+<blockquote>
+<pre><code></code></pre>
+</blockquote>
+<p>foo</p>
+<pre><code></code></pre>
+````````````````````````````````
+
+
+Note that in the following case, we have a [lazy
+continuation line]:
+
+```````````````````````````````` example
+> foo
+    - bar
+.
+<blockquote>
+<p>foo
+- bar</p>
+</blockquote>
+````````````````````````````````
+
+
+To see why, note that in
+
+```markdown
+> foo
+>     - bar
+```
+
+the `- bar` is indented too far to start a list, and can't
+be an indented code block because indented code blocks cannot
+interrupt paragraphs, so it is [paragraph continuation text].
+
+A block quote can be empty:
+
+```````````````````````````````` example
+>
+.
+<blockquote>
+</blockquote>
+````````````````````````````````
+
+
+```````````````````````````````` example
+>
+>  
+> 
+.
+<blockquote>
+</blockquote>
+````````````````````````````````
+
+
+A block quote can have initial or final blank lines:
+
+```````````````````````````````` example
+>
+> foo
+>  
+.
+<blockquote>
+<p>foo</p>
+</blockquote>
+````````````````````````````````
+
+
+A blank line always separates block quotes:
+
+```````````````````````````````` example
+> foo
+
+> bar
+.
+<blockquote>
+<p>foo</p>
+</blockquote>
+<blockquote>
+<p>bar</p>
+</blockquote>
+````````````````````````````````
+
+
+(Most current Markdown implementations, including John Gruber's
+original `Markdown.pl`, will parse this example as a single block quote
+with two paragraphs.  But it seems better to allow the author to decide
+whether two block quotes or one are wanted.)
+
+Consecutiveness means that if we put these block quotes together,
+we get a single block quote:
+
+```````````````````````````````` example
+> foo
+> bar
+.
+<blockquote>
+<p>foo
+bar</p>
+</blockquote>
+````````````````````````````````
+
+
+To get a block quote with two paragraphs, use:
+
+```````````````````````````````` example
+> foo
+>
+> bar
+.
+<blockquote>
+<p>foo</p>
+<p>bar</p>
+</blockquote>
+````````````````````````````````
+
+
+Block quotes can interrupt paragraphs:
+
+```````````````````````````````` example
+foo
+> bar
+.
+<p>foo</p>
+<blockquote>
+<p>bar</p>
+</blockquote>
+````````````````````````````````
+
+
+In general, blank lines are not needed before or after block
+quotes:
+
+```````````````````````````````` example
+> aaa
+***
+> bbb
+.
+<blockquote>
+<p>aaa</p>
+</blockquote>
+<hr />
+<blockquote>
+<p>bbb</p>
+</blockquote>
+````````````````````````````````
+
+
+However, because of laziness, a blank line is needed between
+a block quote and a following paragraph:
+
+```````````````````````````````` example
+> bar
+baz
+.
+<blockquote>
+<p>bar
+baz</p>
+</blockquote>
+````````````````````````````````
+
+
+```````````````````````````````` example
+> bar
+
+baz
+.
+<blockquote>
+<p>bar</p>
+</blockquote>
+<p>baz</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+> bar
+>
+baz
+.
+<blockquote>
+<p>bar</p>
+</blockquote>
+<p>baz</p>
+````````````````````````````````
+
+
+It is a consequence of the Laziness rule that any number
+of initial `>`s may be omitted on a continuation line of a
+nested block quote:
+
+```````````````````````````````` example
+> > > foo
+bar
+.
+<blockquote>
+<blockquote>
+<blockquote>
+<p>foo
+bar</p>
+</blockquote>
+</blockquote>
+</blockquote>
+````````````````````````````````
+
+
+```````````````````````````````` example
+>>> foo
+> bar
+>>baz
+.
+<blockquote>
+<blockquote>
+<blockquote>
+<p>foo
+bar
+baz</p>
+</blockquote>
+</blockquote>
+</blockquote>
+````````````````````````````````
+
+
+When including an indented code block in a block quote,
+remember that the [block quote marker] includes
+both the `>` and a following space of indentation.  So *five spaces* are needed
+after the `>`:
+
+```````````````````````````````` example
+>     code
+
+>    not code
+.
+<blockquote>
+<pre><code>code
+</code></pre>
+</blockquote>
+<blockquote>
+<p>not code</p>
+</blockquote>
+````````````````````````````````
+
+
+
+## List items
+
+A [list marker](@) is a
+[bullet list marker] or an [ordered list marker].
+
+A [bullet list marker](@)
+is a `-`, `+`, or `*` character.
+
+An [ordered list marker](@)
+is a sequence of 1--9 arabic digits (`0-9`), followed by either a
+`.` character or a `)` character.  (The reason for the length
+limit is that with 10 digits we start seeing integer overflows
+in some browsers.)
+
+The following rules define [list items]:
+
+1.  **Basic case.**  If a sequence of lines *Ls* constitute a sequence of
+    blocks *Bs* starting with a character other than a space or tab, and *M* is
+    a list marker of width *W* followed by 1 ≤ *N* ≤ 4 spaces of indentation,
+    then the result of prepending *M* and the following spaces to the first line
+    of Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a
+    list item with *Bs* as its contents.  The type of the list item
+    (bullet or ordered) is determined by the type of its list marker.
+    If the list item is ordered, then it is also assigned a start
+    number, based on the ordered list marker.
+
+    Exceptions:
+
+    1. When the first list item in a [list] interrupts
+       a paragraph---that is, when it starts on a line that would
+       otherwise count as [paragraph continuation text]---then (a)
+       the lines *Ls* must not begin with a blank line, and (b) if
+       the list item is ordered, the start number must be 1.
+    2. If any line is a [thematic break][thematic breaks] then
+       that line is not a list item.
+
+For example, let *Ls* be the lines
+
+```````````````````````````````` example
+A paragraph
+with two lines.
+
+    indented code
+
+> A block quote.
+.
+<p>A paragraph
+with two lines.</p>
+<pre><code>indented code
+</code></pre>
+<blockquote>
+<p>A block quote.</p>
+</blockquote>
+````````````````````````````````
+
+
+And let *M* be the marker `1.`, and *N* = 2.  Then rule #1 says
+that the following is an ordered list item with start number 1,
+and the same contents as *Ls*:
+
+```````````````````````````````` example
+1.  A paragraph
+    with two lines.
+
+        indented code
+
+    > A block quote.
+.
+<ol>
+<li>
+<p>A paragraph
+with two lines.</p>
+<pre><code>indented code
+</code></pre>
+<blockquote>
+<p>A block quote.</p>
+</blockquote>
+</li>
+</ol>
+````````````````````````````````
+
+
+The most important thing to notice is that the position of
+the text after the list marker determines how much indentation
+is needed in subsequent blocks in the list item.  If the list
+marker takes up two spaces of indentation, and there are three spaces between
+the list marker and the next character other than a space or tab, then blocks
+must be indented five spaces in order to fall under the list
+item.
+
+Here are some examples showing how far content must be indented to be
+put under the list item:
+
+```````````````````````````````` example
+- one
+
+ two
+.
+<ul>
+<li>one</li>
+</ul>
+<p>two</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+- one
+
+  two
+.
+<ul>
+<li>
+<p>one</p>
+<p>two</p>
+</li>
+</ul>
+````````````````````````````````
+
+
+```````````````````````````````` example
+ -    one
+
+     two
+.
+<ul>
+<li>one</li>
+</ul>
+<pre><code> two
+</code></pre>
+````````````````````````````````
+
+
+```````````````````````````````` example
+ -    one
+
+      two
+.
+<ul>
+<li>
+<p>one</p>
+<p>two</p>
+</li>
+</ul>
+````````````````````````````````
+
+
+It is tempting to think of this in terms of columns:  the continuation
+blocks must be indented at least to the column of the first character other than
+a space or tab after the list marker.  However, that is not quite right.
+The spaces of indentation after the list marker determine how much relative
+indentation is needed.  Which column this indentation reaches will depend on
+how the list item is embedded in other constructions, as shown by
+this example:
+
+```````````````````````````````` example
+   > > 1.  one
+>>
+>>     two
+.
+<blockquote>
+<blockquote>
+<ol>
+<li>
+<p>one</p>
+<p>two</p>
+</li>
+</ol>
+</blockquote>
+</blockquote>
+````````````````````````````````
+
+
+Here `two` occurs in the same column as the list marker `1.`,
+but is actually contained in the list item, because there is
+sufficient indentation after the last containing blockquote marker.
+
+The converse is also possible.  In the following example, the word `two`
+occurs far to the right of the initial text of the list item, `one`, but
+it is not considered part of the list item, because it is not indented
+far enough past the blockquote marker:
+
+```````````````````````````````` example
+>>- one
+>>
+  >  > two
+.
+<blockquote>
+<blockquote>
+<ul>
+<li>one</li>
+</ul>
+<p>two</p>
+</blockquote>
+</blockquote>
+````````````````````````````````
+
+
+Note that at least one space or tab is needed between the list marker and
+any following content, so these are not list items:
+
+```````````````````````````````` example
+-one
+
+2.two
+.
+<p>-one</p>
+<p>2.two</p>
+````````````````````````````````
+
+
+A list item may contain blocks that are separated by more than
+one blank line.
+
+```````````````````````````````` example
+- foo
+
+
+  bar
+.
+<ul>
+<li>
+<p>foo</p>
+<p>bar</p>
+</li>
+</ul>
+````````````````````````````````
+
+
+A list item may contain any kind of block:
+
+```````````````````````````````` example
+1.  foo
+
+    ```
+    bar
+    ```
+
+    baz
+
+    > bam
+.
+<ol>
+<li>
+<p>foo</p>
+<pre><code>bar
+</code></pre>
+<p>baz</p>
+<blockquote>
+<p>bam</p>
+</blockquote>
+</li>
+</ol>
+````````````````````````````````
+
+
+A list item that contains an indented code block will preserve
+empty lines within the code block verbatim.
+
+```````````````````````````````` example
+- Foo
+
+      bar
+
+
+      baz
+.
+<ul>
+<li>
+<p>Foo</p>
+<pre><code>bar
+
+
+baz
+</code></pre>
+</li>
+</ul>
+````````````````````````````````
+
+Note that ordered list start numbers must be nine digits or less:
+
+```````````````````````````````` example
+123456789. ok
+.
+<ol start="123456789">
+<li>ok</li>
+</ol>
+````````````````````````````````
+
+
+```````````````````````````````` example
+1234567890. not ok
+.
+<p>1234567890. not ok</p>
+````````````````````````````````
+
+
+A start number may begin with 0s:
+
+```````````````````````````````` example
+0. ok
+.
+<ol start="0">
+<li>ok</li>
+</ol>
+````````````````````````````````
+
+
+```````````````````````````````` example
+003. ok
+.
+<ol start="3">
+<li>ok</li>
+</ol>
+````````````````````````````````
+
+
+A start number may not be negative:
+
+```````````````````````````````` example
+-1. not ok
+.
+<p>-1. not ok</p>
+````````````````````````````````
+
+
+
+2.  **Item starting with indented code.**  If a sequence of lines *Ls*
+    constitute a sequence of blocks *Bs* starting with an indented code
+    block, and *M* is a list marker of width *W* followed by
+    one space of indentation, then the result of prepending *M* and the
+    following space to the first line of *Ls*, and indenting subsequent lines
+    of *Ls* by *W + 1* spaces, is a list item with *Bs* as its contents.
+    If a line is empty, then it need not be indented.  The type of the
+    list item (bullet or ordered) is determined by the type of its list
+    marker.  If the list item is ordered, then it is also assigned a
+    start number, based on the ordered list marker.
+
+An indented code block will have to be preceded by four spaces of indentation
+beyond the edge of the region where text will be included in the list item.
+In the following case that is 6 spaces:
+
+```````````````````````````````` example
+- foo
+
+      bar
+.
+<ul>
+<li>
+<p>foo</p>
+<pre><code>bar
+</code></pre>
+</li>
+</ul>
+````````````````````````````````
+
+
+And in this case it is 11 spaces:
+
+```````````````````````````````` example
+  10.  foo
+
+           bar
+.
+<ol start="10">
+<li>
+<p>foo</p>
+<pre><code>bar
+</code></pre>
+</li>
+</ol>
+````````````````````````````````
+
+
+If the *first* block in the list item is an indented code block,
+then by rule #2, the contents must be preceded by *one* space of indentation
+after the list marker:
+
+```````````````````````````````` example
+    indented code
+
+paragraph
+
+    more code
+.
+<pre><code>indented code
+</code></pre>
+<p>paragraph</p>
+<pre><code>more code
+</code></pre>
+````````````````````````````````
+
+
+```````````````````````````````` example
+1.     indented code
+
+   paragraph
+
+       more code
+.
+<ol>
+<li>
+<pre><code>indented code
+</code></pre>
+<p>paragraph</p>
+<pre><code>more code
+</code></pre>
+</li>
+</ol>
+````````````````````````````````
+
+
+Note that an additional space of indentation is interpreted as space
+inside the code block:
+
+```````````````````````````````` example
+1.      indented code
+
+   paragraph
+
+       more code
+.
+<ol>
+<li>
+<pre><code> indented code
+</code></pre>
+<p>paragraph</p>
+<pre><code>more code
+</code></pre>
+</li>
+</ol>
+````````````````````````````````
+
+
+Note that rules #1 and #2 only apply to two cases:  (a) cases
+in which the lines to be included in a list item begin with a
+characer other than a space or tab, and (b) cases in which
+they begin with an indented code
+block.  In a case like the following, where the first block begins with
+three spaces of indentation, the rules do not allow us to form a list item by
+indenting the whole thing and prepending a list marker:
+
+```````````````````````````````` example
+   foo
+
+bar
+.
+<p>foo</p>
+<p>bar</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+-    foo
+
+  bar
+.
+<ul>
+<li>foo</li>
+</ul>
+<p>bar</p>
+````````````````````````````````
+
+
+This is not a significant restriction, because when a block is preceded by up to
+three spaces of indentation, the indentation can always be removed without
+a change in interpretation, allowing rule #1 to be applied.  So, in
+the above case:
+
+```````````````````````````````` example
+-  foo
+
+   bar
+.
+<ul>
+<li>
+<p>foo</p>
+<p>bar</p>
+</li>
+</ul>
+````````````````````````````````
+
+
+3.  **Item starting with a blank line.**  If a sequence of lines *Ls*
+    starting with a single [blank line] constitute a (possibly empty)
+    sequence of blocks *Bs*, and *M* is a list marker of width *W*,
+    then the result of prepending *M* to the first line of *Ls*, and
+    preceding subsequent lines of *Ls* by *W + 1* spaces of indentation, is a
+    list item with *Bs* as its contents.
+    If a line is empty, then it need not be indented.  The type of the
+    list item (bullet or ordered) is determined by the type of its list
+    marker.  If the list item is ordered, then it is also assigned a
+    start number, based on the ordered list marker.
+
+Here are some list items that start with a blank line but are not empty:
+
+```````````````````````````````` example
+-
+  foo
+-
+  ```
+  bar
+  ```
+-
+      baz
+.
+<ul>
+<li>foo</li>
+<li>
+<pre><code>bar
+</code></pre>
+</li>
+<li>
+<pre><code>baz
+</code></pre>
+</li>
+</ul>
+````````````````````````````````
+
+When the list item starts with a blank line, the number of spaces
+following the list marker doesn't change the required indentation:
+
+```````````````````````````````` example
+-   
+  foo
+.
+<ul>
+<li>foo</li>
+</ul>
+````````````````````````````````
+
+
+A list item can begin with at most one blank line.
+In the following example, `foo` is not part of the list
+item:
+
+```````````````````````````````` example
+-
+
+  foo
+.
+<ul>
+<li></li>
+</ul>
+<p>foo</p>
+````````````````````````````````
+
+
+Here is an empty bullet list item:
+
+```````````````````````````````` example
+- foo
+-
+- bar
+.
+<ul>
+<li>foo</li>
+<li></li>
+<li>bar</li>
+</ul>
+````````````````````````````````
+
+
+It does not matter whether there are spaces or tabs following the [list marker]:
+
+```````````````````````````````` example
+- foo
+-   
+- bar
+.
+<ul>
+<li>foo</li>
+<li></li>
+<li>bar</li>
+</ul>
+````````````````````````````````
+
+
+Here is an empty ordered list item:
+
+```````````````````````````````` example
+1. foo
+2.
+3. bar
+.
+<ol>
+<li>foo</li>
+<li></li>
+<li>bar</li>
+</ol>
+````````````````````````````````
+
+
+A list may start or end with an empty list item:
+
+```````````````````````````````` example
+*
+.
+<ul>
+<li></li>
+</ul>
+````````````````````````````````
+
+However, an empty list item cannot interrupt a paragraph:
+
+```````````````````````````````` example
+foo
+*
+
+foo
+1.
+.
+<p>foo
+*</p>
+<p>foo
+1.</p>
+````````````````````````````````
+
+
+4.  **Indentation.**  If a sequence of lines *Ls* constitutes a list item
+    according to rule #1, #2, or #3, then the result of preceding each line
+    of *Ls* by up to three spaces of indentation (the same for each line) also
+    constitutes a list item with the same contents and attributes.  If a line is
+    empty, then it need not be indented.
+
+Indented one space:
+
+```````````````````````````````` example
+ 1.  A paragraph
+     with two lines.
+
+         indented code
+
+     > A block quote.
+.
+<ol>
+<li>
+<p>A paragraph
+with two lines.</p>
+<pre><code>indented code
+</code></pre>
+<blockquote>
+<p>A block quote.</p>
+</blockquote>
+</li>
+</ol>
+````````````````````````````````
+
+
+Indented two spaces:
+
+```````````````````````````````` example
+  1.  A paragraph
+      with two lines.
+
+          indented code
+
+      > A block quote.
+.
+<ol>
+<li>
+<p>A paragraph
+with two lines.</p>
+<pre><code>indented code
+</code></pre>
+<blockquote>
+<p>A block quote.</p>
+</blockquote>
+</li>
+</ol>
+````````````````````````````````
+
+
+Indented three spaces:
+
+```````````````````````````````` example
+   1.  A paragraph
+       with two lines.
+
+           indented code
+
+       > A block quote.
+.
+<ol>
+<li>
+<p>A paragraph
+with two lines.</p>
+<pre><code>indented code
+</code></pre>
+<blockquote>
+<p>A block quote.</p>
+</blockquote>
+</li>
+</ol>
+````````````````````````````````
+
+
+Four spaces indent gives a code block:
+
+```````````````````````````````` example
+    1.  A paragraph
+        with two lines.
+
+            indented code
+
+        > A block quote.
+.
+<pre><code>1.  A paragraph
+    with two lines.
+
+        indented code
+
+    &gt; A block quote.
+</code></pre>
+````````````````````````````````
+
+
+
+5.  **Laziness.**  If a string of lines *Ls* constitute a [list
+    item](#list-items) with contents *Bs*, then the result of deleting
+    some or all of the indentation from one or more lines in which the
+    next character other than a space or tab after the indentation is
+    [paragraph continuation text] is a
+    list item with the same contents and attributes.  The unindented
+    lines are called
+    [lazy continuation line](@)s.
+
+Here is an example with [lazy continuation lines]:
+
+```````````````````````````````` example
+  1.  A paragraph
+with two lines.
+
+          indented code
+
+      > A block quote.
+.
+<ol>
+<li>
+<p>A paragraph
+with two lines.</p>
+<pre><code>indented code
+</code></pre>
+<blockquote>
+<p>A block quote.</p>
+</blockquote>
+</li>
+</ol>
+````````````````````````````````
+
+
+Indentation can be partially deleted:
+
+```````````````````````````````` example
+  1.  A paragraph
+    with two lines.
+.
+<ol>
+<li>A paragraph
+with two lines.</li>
+</ol>
+````````````````````````````````
+
+
+These examples show how laziness can work in nested structures:
+
+```````````````````````````````` example
+> 1. > Blockquote
+continued here.
+.
+<blockquote>
+<ol>
+<li>
+<blockquote>
+<p>Blockquote
+continued here.</p>
+</blockquote>
+</li>
+</ol>
+</blockquote>
+````````````````````````````````
+
+
+```````````````````````````````` example
+> 1. > Blockquote
+> continued here.
+.
+<blockquote>
+<ol>
+<li>
+<blockquote>
+<p>Blockquote
+continued here.</p>
+</blockquote>
+</li>
+</ol>
+</blockquote>
+````````````````````````````````
+
+
+
+6.  **That's all.** Nothing that is not counted as a list item by rules
+    #1--5 counts as a [list item](#list-items).
+
+The rules for sublists follow from the general rules
+[above][List items].  A sublist must be indented the same number
+of spaces of indentation a paragraph would need to be in order to be included
+in the list item.
+
+So, in this case we need two spaces indent:
+
+```````````````````````````````` example
+- foo
+  - bar
+    - baz
+      - boo
+.
+<ul>
+<li>foo
+<ul>
+<li>bar
+<ul>
+<li>baz
+<ul>
+<li>boo</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+````````````````````````````````
+
+
+One is not enough:
+
+```````````````````````````````` example
+- foo
+ - bar
+  - baz
+   - boo
+.
+<ul>
+<li>foo</li>
+<li>bar</li>
+<li>baz</li>
+<li>boo</li>
+</ul>
+````````````````````````````````
+
+
+Here we need four, because the list marker is wider:
+
+```````````````````````````````` example
+10) foo
+    - bar
+.
+<ol start="10">
+<li>foo
+<ul>
+<li>bar</li>
+</ul>
+</li>
+</ol>
+````````````````````````````````
+
+
+Three is not enough:
+
+```````````````````````````````` example
+10) foo
+   - bar
+.
+<ol start="10">
+<li>foo</li>
+</ol>
+<ul>
+<li>bar</li>
+</ul>
+````````````````````````````````
+
+
+A list may be the first block in a list item:
+
+```````````````````````````````` example
+- - foo
+.
+<ul>
+<li>
+<ul>
+<li>foo</li>
+</ul>
+</li>
+</ul>
+````````````````````````````````
+
+
+```````````````````````````````` example
+1. - 2. foo
+.
+<ol>
+<li>
+<ul>
+<li>
+<ol start="2">
+<li>foo</li>
+</ol>
+</li>
+</ul>
+</li>
+</ol>
+````````````````````````````````
+
+
+A list item can contain a heading:
+
+```````````````````````````````` example
+- # Foo
+- Bar
+  ---
+  baz
+.
+<ul>
+<li>
+<h1>Foo</h1>
+</li>
+<li>
+<h2>Bar</h2>
+baz</li>
+</ul>
+````````````````````````````````
+
+
+### Motivation
+
+John Gruber's Markdown spec says the following about list items:
+
+1. "List markers typically start at the left margin, but may be indented
+   by up to three spaces. List markers must be followed by one or more
+   spaces or a tab."
+
+2. "To make lists look nice, you can wrap items with hanging indents....
+   But if you don't want to, you don't have to."
+
+3. "List items may consist of multiple paragraphs. Each subsequent
+   paragraph in a list item must be indented by either 4 spaces or one
+   tab."
+
+4. "It looks nice if you indent every line of the subsequent paragraphs,
+   but here again, Markdown will allow you to be lazy."
+
+5. "To put a blockquote within a list item, the blockquote's `>`
+   delimiters need to be indented."
+
+6. "To put a code block within a list item, the code block needs to be
+   indented twice — 8 spaces or two tabs."
+
+These rules specify that a paragraph under a list item must be indented
+four spaces (presumably, from the left margin, rather than the start of
+the list marker, but this is not said), and that code under a list item
+must be indented eight spaces instead of the usual four.  They also say
+that a block quote must be indented, but not by how much; however, the
+example given has four spaces indentation.  Although nothing is said
+about other kinds of block-level content, it is certainly reasonable to
+infer that *all* block elements under a list item, including other
+lists, must be indented four spaces.  This principle has been called the
+*four-space rule*.
+
+The four-space rule is clear and principled, and if the reference
+implementation `Markdown.pl` had followed it, it probably would have
+become the standard.  However, `Markdown.pl` allowed paragraphs and
+sublists to start with only two spaces indentation, at least on the
+outer level.  Worse, its behavior was inconsistent: a sublist of an
+outer-level list needed two spaces indentation, but a sublist of this
+sublist needed three spaces.  It is not surprising, then, that different
+implementations of Markdown have developed very different rules for
+determining what comes under a list item.  (Pandoc and python-Markdown,
+for example, stuck with Gruber's syntax description and the four-space
+rule, while discount, redcarpet, marked, PHP Markdown, and others
+followed `Markdown.pl`'s behavior more closely.)
+
+Unfortunately, given the divergences between implementations, there
+is no way to give a spec for list items that will be guaranteed not
+to break any existing documents.  However, the spec given here should
+correctly handle lists formatted with either the four-space rule or
+the more forgiving `Markdown.pl` behavior, provided they are laid out
+in a way that is natural for a human to read.
+
+The strategy here is to let the width and indentation of the list marker
+determine the indentation necessary for blocks to fall under the list
+item, rather than having a fixed and arbitrary number.  The writer can
+think of the body of the list item as a unit which gets indented to the
+right enough to fit the list marker (and any indentation on the list
+marker).  (The laziness rule, #5, then allows continuation lines to be
+unindented if needed.)
+
+This rule is superior, we claim, to any rule requiring a fixed level of
+indentation from the margin.  The four-space rule is clear but
+unnatural. It is quite unintuitive that
+
+``` markdown
+- foo
+
+  bar
+
+  - baz
+```
+
+should be parsed as two lists with an intervening paragraph,
+
+``` html
+<ul>
+<li>foo</li>
+</ul>
+<p>bar</p>
+<ul>
+<li>baz</li>
+</ul>
+```
+
+as the four-space rule demands, rather than a single list,
+
+``` html
+<ul>
+<li>
+<p>foo</p>
+<p>bar</p>
+<ul>
+<li>baz</li>
+</ul>
+</li>
+</ul>
+```
+
+The choice of four spaces is arbitrary.  It can be learned, but it is
+not likely to be guessed, and it trips up beginners regularly.
+
+Would it help to adopt a two-space rule?  The problem is that such
+a rule, together with the rule allowing up to three spaces of indentation for
+the initial list marker, allows text that is indented *less than* the
+original list marker to be included in the list item. For example,
+`Markdown.pl` parses
+
+``` markdown
+   - one
+
+  two
+```
+
+as a single list item, with `two` a continuation paragraph:
+
+``` html
+<ul>
+<li>
+<p>one</p>
+<p>two</p>
+</li>
+</ul>
+```
+
+and similarly
+
+``` markdown
+>   - one
+>
+>  two
+```
+
+as
+
+``` html
+<blockquote>
+<ul>
+<li>
+<p>one</p>
+<p>two</p>
+</li>
+</ul>
+</blockquote>
+```
+
+This is extremely unintuitive.
+
+Rather than requiring a fixed indent from the margin, we could require
+a fixed indent (say, two spaces, or even one space) from the list marker (which
+may itself be indented).  This proposal would remove the last anomaly
+discussed.  Unlike the spec presented above, it would count the following
+as a list item with a subparagraph, even though the paragraph `bar`
+is not indented as far as the first paragraph `foo`:
+
+``` markdown
+ 10. foo
+
+   bar  
+```
+
+Arguably this text does read like a list item with `bar` as a subparagraph,
+which may count in favor of the proposal.  However, on this proposal indented
+code would have to be indented six spaces after the list marker.  And this
+would break a lot of existing Markdown, which has the pattern:
+
+``` markdown
+1.  foo
+
+        indented code
+```
+
+where the code is indented eight spaces.  The spec above, by contrast, will
+parse this text as expected, since the code block's indentation is measured
+from the beginning of `foo`.
+
+The one case that needs special treatment is a list item that *starts*
+with indented code.  How much indentation is required in that case, since
+we don't have a "first paragraph" to measure from?  Rule #2 simply stipulates
+that in such cases, we require one space indentation from the list marker
+(and then the normal four spaces for the indented code).  This will match the
+four-space rule in cases where the list marker plus its initial indentation
+takes four spaces (a common case), but diverge in other cases.
+
+## Lists
+
+A [list](@) is a sequence of one or more
+list items [of the same type].  The list items
+may be separated by any number of blank lines.
+
+Two list items are [of the same type](@)
+if they begin with a [list marker] of the same type.
+Two list markers are of the
+same type if (a) they are bullet list markers using the same character
+(`-`, `+`, or `*`) or (b) they are ordered list numbers with the same
+delimiter (either `.` or `)`).
+
+A list is an [ordered list](@)
+if its constituent list items begin with
+[ordered list markers], and a
+[bullet list](@) if its constituent list
+items begin with [bullet list markers].
+
+The [start number](@)
+of an [ordered list] is determined by the list number of
+its initial list item.  The numbers of subsequent list items are
+disregarded.
+
+A list is [loose](@) if any of its constituent
+list items are separated by blank lines, or if any of its constituent
+list items directly contain two block-level elements with a blank line
+between them.  Otherwise a list is [tight](@).
+(The difference in HTML output is that paragraphs in a loose list are
+wrapped in `<p>` tags, while paragraphs in a tight list are not.)
+
+Changing the bullet or ordered list delimiter starts a new list:
+
+```````````````````````````````` example
+- foo
+- bar
++ baz
+.
+<ul>
+<li>foo</li>
+<li>bar</li>
+</ul>
+<ul>
+<li>baz</li>
+</ul>
+````````````````````````````````
+
+
+```````````````````````````````` example
+1. foo
+2. bar
+3) baz
+.
+<ol>
+<li>foo</li>
+<li>bar</li>
+</ol>
+<ol start="3">
+<li>baz</li>
+</ol>
+````````````````````````````````
+
+
+In CommonMark, a list can interrupt a paragraph. That is,
+no blank line is needed to separate a paragraph from a following
+list:
+
+```````````````````````````````` example
+Foo
+- bar
+- baz
+.
+<p>Foo</p>
+<ul>
+<li>bar</li>
+<li>baz</li>
+</ul>
+````````````````````````````````
+
+`Markdown.pl` does not allow this, through fear of triggering a list
+via a numeral in a hard-wrapped line:
+
+``` markdown
+The number of windows in my house is
+14.  The number of doors is 6.
+```
+
+Oddly, though, `Markdown.pl` *does* allow a blockquote to
+interrupt a paragraph, even though the same considerations might
+apply.
+
+In CommonMark, we do allow lists to interrupt paragraphs, for
+two reasons.  First, it is natural and not uncommon for people
+to start lists without blank lines:
+
+``` markdown
+I need to buy
+- new shoes
+- a coat
+- a plane ticket
+```
+
+Second, we are attracted to a
+
+> [principle of uniformity](@):
+> if a chunk of text has a certain
+> meaning, it will continue to have the same meaning when put into a
+> container block (such as a list item or blockquote).
+
+(Indeed, the spec for [list items] and [block quotes] presupposes
+this principle.) This principle implies that if
+
+``` markdown
+  * I need to buy
+    - new shoes
+    - a coat
+    - a plane ticket
+```
+
+is a list item containing a paragraph followed by a nested sublist,
+as all Markdown implementations agree it is (though the paragraph
+may be rendered without `<p>` tags, since the list is "tight"),
+then
+
+``` markdown
+I need to buy
+- new shoes
+- a coat
+- a plane ticket
+```
+
+by itself should be a paragraph followed by a nested sublist.
+
+Since it is well established Markdown practice to allow lists to
+interrupt paragraphs inside list items, the [principle of
+uniformity] requires us to allow this outside list items as
+well.  ([reStructuredText](http://docutils.sourceforge.net/rst.html)
+takes a different approach, requiring blank lines before lists
+even inside other list items.)
+
+In order to solve of unwanted lists in paragraphs with
+hard-wrapped numerals, we allow only lists starting with `1` to
+interrupt paragraphs.  Thus,
+
+```````````````````````````````` example
+The number of windows in my house is
+14.  The number of doors is 6.
+.
+<p>The number of windows in my house is
+14.  The number of doors is 6.</p>
+````````````````````````````````
+
+We may still get an unintended result in cases like
+
+```````````````````````````````` example
+The number of windows in my house is
+1.  The number of doors is 6.
+.
+<p>The number of windows in my house is</p>
+<ol>
+<li>The number of doors is 6.</li>
+</ol>
+````````````````````````````````
+
+but this rule should prevent most spurious list captures.
+
+There can be any number of blank lines between items:
+
+```````````````````````````````` example
+- foo
+
+- bar
+
+
+- baz
+.
+<ul>
+<li>
+<p>foo</p>
+</li>
+<li>
+<p>bar</p>
+</li>
+<li>
+<p>baz</p>
+</li>
+</ul>
+````````````````````````````````
+
+```````````````````````````````` example
+- foo
+  - bar
+    - baz
+
+
+      bim
+.
+<ul>
+<li>foo
+<ul>
+<li>bar
+<ul>
+<li>
+<p>baz</p>
+<p>bim</p>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+````````````````````````````````
+
+
+To separate consecutive lists of the same type, or to separate a
+list from an indented code block that would otherwise be parsed
+as a subparagraph of the final list item, you can insert a blank HTML
+comment:
+
+```````````````````````````````` example
+- foo
+- bar
+
+<!-- -->
+
+- baz
+- bim
+.
+<ul>
+<li>foo</li>
+<li>bar</li>
+</ul>
+<!-- -->
+<ul>
+<li>baz</li>
+<li>bim</li>
+</ul>
+````````````````````````````````
+
+
+```````````````````````````````` example
+-   foo
+
+    notcode
+
+-   foo
+
+<!-- -->
+
+    code
+.
+<ul>
+<li>
+<p>foo</p>
+<p>notcode</p>
+</li>
+<li>
+<p>foo</p>
+</li>
+</ul>
+<!-- -->
+<pre><code>code
+</code></pre>
+````````````````````````````````
+
+
+List items need not be indented to the same level.  The following
+list items will be treated as items at the same list level,
+since none is indented enough to belong to the previous list
+item:
+
+```````````````````````````````` example
+- a
+ - b
+  - c
+   - d
+  - e
+ - f
+- g
+.
+<ul>
+<li>a</li>
+<li>b</li>
+<li>c</li>
+<li>d</li>
+<li>e</li>
+<li>f</li>
+<li>g</li>
+</ul>
+````````````````````````````````
+
+
+```````````````````````````````` example
+1. a
+
+  2. b
+
+   3. c
+.
+<ol>
+<li>
+<p>a</p>
+</li>
+<li>
+<p>b</p>
+</li>
+<li>
+<p>c</p>
+</li>
+</ol>
+````````````````````````````````
+
+Note, however, that list items may not be preceded by more than
+three spaces of indentation.  Here `- e` is treated as a paragraph continuation
+line, because it is indented more than three spaces:
+
+```````````````````````````````` example
+- a
+ - b
+  - c
+   - d
+    - e
+.
+<ul>
+<li>a</li>
+<li>b</li>
+<li>c</li>
+<li>d
+- e</li>
+</ul>
+````````````````````````````````
+
+And here, `3. c` is treated as in indented code block,
+because it is indented four spaces and preceded by a
+blank line.
+
+```````````````````````````````` example
+1. a
+
+  2. b
+
+    3. c
+.
+<ol>
+<li>
+<p>a</p>
+</li>
+<li>
+<p>b</p>
+</li>
+</ol>
+<pre><code>3. c
+</code></pre>
+````````````````````````````````
+
+
+This is a loose list, because there is a blank line between
+two of the list items:
+
+```````````````````````````````` example
+- a
+- b
+
+- c
+.
+<ul>
+<li>
+<p>a</p>
+</li>
+<li>
+<p>b</p>
+</li>
+<li>
+<p>c</p>
+</li>
+</ul>
+````````````````````````````````
+
+
+So is this, with a empty second item:
+
+```````````````````````````````` example
+* a
+*
+
+* c
+.
+<ul>
+<li>
+<p>a</p>
+</li>
+<li></li>
+<li>
+<p>c</p>
+</li>
+</ul>
+````````````````````````````````
+
+
+These are loose lists, even though there are no blank lines between the items,
+because one of the items directly contains two block-level elements
+with a blank line between them:
+
+```````````````````````````````` example
+- a
+- b
+
+  c
+- d
+.
+<ul>
+<li>
+<p>a</p>
+</li>
+<li>
+<p>b</p>
+<p>c</p>
+</li>
+<li>
+<p>d</p>
+</li>
+</ul>
+````````````````````````````````
+
+
+```````````````````````````````` example
+- a
+- b
+
+  [ref]: /url
+- d
+.
+<ul>
+<li>
+<p>a</p>
+</li>
+<li>
+<p>b</p>
+</li>
+<li>
+<p>d</p>
+</li>
+</ul>
+````````````````````````````````
+
+
+This is a tight list, because the blank lines are in a code block:
+
+```````````````````````````````` example
+- a
+- ```
+  b
+
+
+  ```
+- c
+.
+<ul>
+<li>a</li>
+<li>
+<pre><code>b
+
+
+</code></pre>
+</li>
+<li>c</li>
+</ul>
+````````````````````````````````
+
+
+This is a tight list, because the blank line is between two
+paragraphs of a sublist.  So the sublist is loose while
+the outer list is tight:
+
+```````````````````````````````` example
+- a
+  - b
+
+    c
+- d
+.
+<ul>
+<li>a
+<ul>
+<li>
+<p>b</p>
+<p>c</p>
+</li>
+</ul>
+</li>
+<li>d</li>
+</ul>
+````````````````````````````````
+
+
+This is a tight list, because the blank line is inside the
+block quote:
+
+```````````````````````````````` example
+* a
+  > b
+  >
+* c
+.
+<ul>
+<li>a
+<blockquote>
+<p>b</p>
+</blockquote>
+</li>
+<li>c</li>
+</ul>
+````````````````````````````````
+
+
+This list is tight, because the consecutive block elements
+are not separated by blank lines:
+
+```````````````````````````````` example
+- a
+  > b
+  ```
+  c
+  ```
+- d
+.
+<ul>
+<li>a
+<blockquote>
+<p>b</p>
+</blockquote>
+<pre><code>c
+</code></pre>
+</li>
+<li>d</li>
+</ul>
+````````````````````````````````
+
+
+A single-paragraph list is tight:
+
+```````````````````````````````` example
+- a
+.
+<ul>
+<li>a</li>
+</ul>
+````````````````````````````````
+
+
+```````````````````````````````` example
+- a
+  - b
+.
+<ul>
+<li>a
+<ul>
+<li>b</li>
+</ul>
+</li>
+</ul>
+````````````````````````````````
+
+
+This list is loose, because of the blank line between the
+two block elements in the list item:
+
+```````````````````````````````` example
+1. ```
+   foo
+   ```
+
+   bar
+.
+<ol>
+<li>
+<pre><code>foo
+</code></pre>
+<p>bar</p>
+</li>
+</ol>
+````````````````````````````````
+
+
+Here the outer list is loose, the inner list tight:
+
+```````````````````````````````` example
+* foo
+  * bar
+
+  baz
+.
+<ul>
+<li>
+<p>foo</p>
+<ul>
+<li>bar</li>
+</ul>
+<p>baz</p>
+</li>
+</ul>
+````````````````````````````````
+
+
+```````````````````````````````` example
+- a
+  - b
+  - c
+
+- d
+  - e
+  - f
+.
+<ul>
+<li>
+<p>a</p>
+<ul>
+<li>b</li>
+<li>c</li>
+</ul>
+</li>
+<li>
+<p>d</p>
+<ul>
+<li>e</li>
+<li>f</li>
+</ul>
+</li>
+</ul>
+````````````````````````````````
+
+
+# Inlines
+
+Inlines are parsed sequentially from the beginning of the character
+stream to the end (left to right, in left-to-right languages).
+Thus, for example, in
+
+```````````````````````````````` example
+`hi`lo`
+.
+<p><code>hi</code>lo`</p>
+````````````````````````````````
+
+`hi` is parsed as code, leaving the backtick at the end as a literal
+backtick.
+
+
+
+## Code spans
+
+A [backtick string](@)
+is a string of one or more backtick characters (`` ` ``) that is neither
+preceded nor followed by a backtick.
+
+A [code span](@) begins with a backtick string and ends with
+a backtick string of equal length.  The contents of the code span are
+the characters between these two backtick strings, normalized in the
+following ways:
+
+- First, [line endings] are converted to [spaces].
+- If the resulting string both begins *and* ends with a [space]
+  character, but does not consist entirely of [space]
+  characters, a single [space] character is removed from the
+  front and back.  This allows you to include code that begins
+  or ends with backtick characters, which must be separated by
+  whitespace from the opening or closing backtick strings.
+
+This is a simple code span:
+
+```````````````````````````````` example
+`foo`
+.
+<p><code>foo</code></p>
+````````````````````````````````
+
+
+Here two backticks are used, because the code contains a backtick.
+This example also illustrates stripping of a single leading and
+trailing space:
+
+```````````````````````````````` example
+`` foo ` bar ``
+.
+<p><code>foo ` bar</code></p>
+````````````````````````````````
+
+
+This example shows the motivation for stripping leading and trailing
+spaces:
+
+```````````````````````````````` example
+` `` `
+.
+<p><code>``</code></p>
+````````````````````````````````
+
+Note that only *one* space is stripped:
+
+```````````````````````````````` example
+`  ``  `
+.
+<p><code> `` </code></p>
+````````````````````````````````
+
+The stripping only happens if the space is on both
+sides of the string:
+
+```````````````````````````````` example
+` a`
+.
+<p><code> a</code></p>
+````````````````````````````````
+
+Only [spaces], and not [unicode whitespace] in general, are
+stripped in this way:
+
+```````````````````````````````` example
+` b `
+.
+<p><code> b </code></p>
+````````````````````````````````
+
+No stripping occurs if the code span contains only spaces:
+
+```````````````````````````````` example
+` `
+`  `
+.
+<p><code> </code>
+<code>  </code></p>
+````````````````````````````````
+
+
+[Line endings] are treated like spaces:
+
+```````````````````````````````` example
+``
+foo
+bar  
+baz
+``
+.
+<p><code>foo bar   baz</code></p>
+````````````````````````````````
+
+```````````````````````````````` example
+``
+foo 
+``
+.
+<p><code>foo </code></p>
+````````````````````````````````
+
+
+Interior spaces are not collapsed:
+
+```````````````````````````````` example
+`foo   bar 
+baz`
+.
+<p><code>foo   bar  baz</code></p>
+````````````````````````````````
+
+Note that browsers will typically collapse consecutive spaces
+when rendering `<code>` elements, so it is recommended that
+the following CSS be used:
+
+    code{white-space: pre-wrap;}
+
+
+Note that backslash escapes do not work in code spans. All backslashes
+are treated literally:
+
+```````````````````````````````` example
+`foo\`bar`
+.
+<p><code>foo\</code>bar`</p>
+````````````````````````````````
+
+
+Backslash escapes are never needed, because one can always choose a
+string of *n* backtick characters as delimiters, where the code does
+not contain any strings of exactly *n* backtick characters.
+
+```````````````````````````````` example
+``foo`bar``
+.
+<p><code>foo`bar</code></p>
+````````````````````````````````
+
+```````````````````````````````` example
+` foo `` bar `
+.
+<p><code>foo `` bar</code></p>
+````````````````````````````````
+
+
+Code span backticks have higher precedence than any other inline
+constructs except HTML tags and autolinks.  Thus, for example, this is
+not parsed as emphasized text, since the second `*` is part of a code
+span:
+
+```````````````````````````````` example
+*foo`*`
+.
+<p>*foo<code>*</code></p>
+````````````````````````````````
+
+
+And this is not parsed as a link:
+
+```````````````````````````````` example
+[not a `link](/foo`)
+.
+<p>[not a <code>link](/foo</code>)</p>
+````````````````````````````````
+
+
+Code spans, HTML tags, and autolinks have the same precedence.
+Thus, this is code:
+
+```````````````````````````````` example
+`<a href="`">`
+.
+<p><code>&lt;a href=&quot;</code>&quot;&gt;`</p>
+````````````````````````````````
+
+
+But this is an HTML tag:
+
+```````````````````````````````` example
+<a href="`">`
+.
+<p><a href="`">`</p>
+````````````````````````````````
+
+
+And this is code:
+
+```````````````````````````````` example
+`<http://foo.bar.`baz>`
+.
+<p><code>&lt;http://foo.bar.</code>baz&gt;`</p>
+````````````````````````````````
+
+
+But this is an autolink:
+
+```````````````````````````````` example
+<http://foo.bar.`baz>`
+.
+<p><a href="http://foo.bar.%60baz">http://foo.bar.`baz</a>`</p>
+````````````````````````````````
+
+
+When a backtick string is not closed by a matching backtick string,
+we just have literal backticks:
+
+```````````````````````````````` example
+```foo``
+.
+<p>```foo``</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+`foo
+.
+<p>`foo</p>
+````````````````````````````````
+
+The following case also illustrates the need for opening and
+closing backtick strings to be equal in length:
+
+```````````````````````````````` example
+`foo``bar``
+.
+<p>`foo<code>bar</code></p>
+````````````````````````````````
+
+
+## Emphasis and strong emphasis
+
+John Gruber's original [Markdown syntax
+description](http://daringfireball.net/projects/markdown/syntax#em) says:
+
+> Markdown treats asterisks (`*`) and underscores (`_`) as indicators of
+> emphasis. Text wrapped with one `*` or `_` will be wrapped with an HTML
+> `<em>` tag; double `*`'s or `_`'s will be wrapped with an HTML `<strong>`
+> tag.
+
+This is enough for most users, but these rules leave much undecided,
+especially when it comes to nested emphasis.  The original
+`Markdown.pl` test suite makes it clear that triple `***` and
+`___` delimiters can be used for strong emphasis, and most
+implementations have also allowed the following patterns:
+
+``` markdown
+***strong emph***
+***strong** in emph*
+***emph* in strong**
+**in strong *emph***
+*in emph **strong***
+```
+
+The following patterns are less widely supported, but the intent
+is clear and they are useful (especially in contexts like bibliography
+entries):
+
+``` markdown
+*emph *with emph* in it*
+**strong **with strong** in it**
+```
+
+Many implementations have also restricted intraword emphasis to
+the `*` forms, to avoid unwanted emphasis in words containing
+internal underscores.  (It is best practice to put these in code
+spans, but users often do not.)
+
+``` markdown
+internal emphasis: foo*bar*baz
+no emphasis: foo_bar_baz
+```
+
+The rules given below capture all of these patterns, while allowing
+for efficient parsing strategies that do not backtrack.
+
+First, some definitions.  A [delimiter run](@) is either
+a sequence of one or more `*` characters that is not preceded or
+followed by a non-backslash-escaped `*` character, or a sequence
+of one or more `_` characters that is not preceded or followed by
+a non-backslash-escaped `_` character.
+
+A [left-flanking delimiter run](@) is
+a [delimiter run] that is (1) not followed by [Unicode whitespace],
+and either (2a) not followed by a [Unicode punctuation character], or
+(2b) followed by a [Unicode punctuation character] and
+preceded by [Unicode whitespace] or a [Unicode punctuation character].
+For purposes of this definition, the beginning and the end of
+the line count as Unicode whitespace.
+
+A [right-flanking delimiter run](@) is
+a [delimiter run] that is (1) not preceded by [Unicode whitespace],
+and either (2a) not preceded by a [Unicode punctuation character], or
+(2b) preceded by a [Unicode punctuation character] and
+followed by [Unicode whitespace] or a [Unicode punctuation character].
+For purposes of this definition, the beginning and the end of
+the line count as Unicode whitespace.
+
+Here are some examples of delimiter runs.
+
+  - left-flanking but not right-flanking:
+
+    ```
+    ***abc
+      _abc
+    **"abc"
+     _"abc"
+    ```
+
+  - right-flanking but not left-flanking:
+
+    ```
+     abc***
+     abc_
+    "abc"**
+    "abc"_
+    ```
+
+  - Both left and right-flanking:
+
+    ```
+     abc***def
+    "abc"_"def"
+    ```
+
+  - Neither left nor right-flanking:
+
+    ```
+    abc *** def
+    a _ b
+    ```
+
+(The idea of distinguishing left-flanking and right-flanking
+delimiter runs based on the character before and the character
+after comes from Roopesh Chander's
+[vfmd](http://www.vfmd.org/vfmd-spec/specification/#procedure-for-identifying-emphasis-tags).
+vfmd uses the terminology "emphasis indicator string" instead of "delimiter
+run," and its rules for distinguishing left- and right-flanking runs
+are a bit more complex than the ones given here.)
+
+The following rules define emphasis and strong emphasis:
+
+1.  A single `*` character [can open emphasis](@)
+    iff (if and only if) it is part of a [left-flanking delimiter run].
+
+2.  A single `_` character [can open emphasis] iff
+    it is part of a [left-flanking delimiter run]
+    and either (a) not part of a [right-flanking delimiter run]
+    or (b) part of a [right-flanking delimiter run]
+    preceded by a [Unicode punctuation character].
+
+3.  A single `*` character [can close emphasis](@)
+    iff it is part of a [right-flanking delimiter run].
+
+4.  A single `_` character [can close emphasis] iff
+    it is part of a [right-flanking delimiter run]
+    and either (a) not part of a [left-flanking delimiter run]
+    or (b) part of a [left-flanking delimiter run]
+    followed by a [Unicode punctuation character].
+
+5.  A double `**` [can open strong emphasis](@)
+    iff it is part of a [left-flanking delimiter run].
+
+6.  A double `__` [can open strong emphasis] iff
+    it is part of a [left-flanking delimiter run]
+    and either (a) not part of a [right-flanking delimiter run]
+    or (b) part of a [right-flanking delimiter run]
+    preceded by a [Unicode punctuation character].
+
+7.  A double `**` [can close strong emphasis](@)
+    iff it is part of a [right-flanking delimiter run].
+
+8.  A double `__` [can close strong emphasis] iff
+    it is part of a [right-flanking delimiter run]
+    and either (a) not part of a [left-flanking delimiter run]
+    or (b) part of a [left-flanking delimiter run]
+    followed by a [Unicode punctuation character].
+
+9.  Emphasis begins with a delimiter that [can open emphasis] and ends
+    with a delimiter that [can close emphasis], and that uses the same
+    character (`_` or `*`) as the opening delimiter.  The
+    opening and closing delimiters must belong to separate
+    [delimiter runs].  If one of the delimiters can both
+    open and close emphasis, then the sum of the lengths of the
+    delimiter runs containing the opening and closing delimiters
+    must not be a multiple of 3 unless both lengths are
+    multiples of 3.
+
+10. Strong emphasis begins with a delimiter that
+    [can open strong emphasis] and ends with a delimiter that
+    [can close strong emphasis], and that uses the same character
+    (`_` or `*`) as the opening delimiter.  The
+    opening and closing delimiters must belong to separate
+    [delimiter runs].  If one of the delimiters can both open
+    and close strong emphasis, then the sum of the lengths of
+    the delimiter runs containing the opening and closing
+    delimiters must not be a multiple of 3 unless both lengths
+    are multiples of 3.
+
+11. A literal `*` character cannot occur at the beginning or end of
+    `*`-delimited emphasis or `**`-delimited strong emphasis, unless it
+    is backslash-escaped.
+
+12. A literal `_` character cannot occur at the beginning or end of
+    `_`-delimited emphasis or `__`-delimited strong emphasis, unless it
+    is backslash-escaped.
+
+Where rules 1--12 above are compatible with multiple parsings,
+the following principles resolve ambiguity:
+
+13. The number of nestings should be minimized. Thus, for example,
+    an interpretation `<strong>...</strong>` is always preferred to
+    `<em><em>...</em></em>`.
+
+14. An interpretation `<em><strong>...</strong></em>` is always
+    preferred to `<strong><em>...</em></strong>`.
+
+15. When two potential emphasis or strong emphasis spans overlap,
+    so that the second begins before the first ends and ends after
+    the first ends, the first takes precedence. Thus, for example,
+    `*foo _bar* baz_` is parsed as `<em>foo _bar</em> baz_` rather
+    than `*foo <em>bar* baz</em>`.
+
+16. When there are two potential emphasis or strong emphasis spans
+    with the same closing delimiter, the shorter one (the one that
+    opens later) takes precedence. Thus, for example,
+    `**foo **bar baz**` is parsed as `**foo <strong>bar baz</strong>`
+    rather than `<strong>foo **bar baz</strong>`.
+
+17. Inline code spans, links, images, and HTML tags group more tightly
+    than emphasis.  So, when there is a choice between an interpretation
+    that contains one of these elements and one that does not, the
+    former always wins.  Thus, for example, `*[foo*](bar)` is
+    parsed as `*<a href="bar">foo*</a>` rather than as
+    `<em>[foo</em>](bar)`.
+
+These rules can be illustrated through a series of examples.
+
+Rule 1:
+
+```````````````````````````````` example
+*foo bar*
+.
+<p><em>foo bar</em></p>
+````````````````````````````````
+
+
+This is not emphasis, because the opening `*` is followed by
+whitespace, and hence not part of a [left-flanking delimiter run]:
+
+```````````````````````````````` example
+a * foo bar*
+.
+<p>a * foo bar*</p>
+````````````````````````````````
+
+
+This is not emphasis, because the opening `*` is preceded
+by an alphanumeric and followed by punctuation, and hence
+not part of a [left-flanking delimiter run]:
+
+```````````````````````````````` example
+a*"foo"*
+.
+<p>a*&quot;foo&quot;*</p>
+````````````````````````````````
+
+
+Unicode nonbreaking spaces count as whitespace, too:
+
+```````````````````````````````` example
+* a *
+.
+<p>* a *</p>
+````````````````````````````````
+
+
+Intraword emphasis with `*` is permitted:
+
+```````````````````````````````` example
+foo*bar*
+.
+<p>foo<em>bar</em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+5*6*78
+.
+<p>5<em>6</em>78</p>
+````````````````````````````````
+
+
+Rule 2:
+
+```````````````````````````````` example
+_foo bar_
+.
+<p><em>foo bar</em></p>
+````````````````````````````````
+
+
+This is not emphasis, because the opening `_` is followed by
+whitespace:
+
+```````````````````````````````` example
+_ foo bar_
+.
+<p>_ foo bar_</p>
+````````````````````````````````
+
+
+This is not emphasis, because the opening `_` is preceded
+by an alphanumeric and followed by punctuation:
+
+```````````````````````````````` example
+a_"foo"_
+.
+<p>a_&quot;foo&quot;_</p>
+````````````````````````````````
+
+
+Emphasis with `_` is not allowed inside words:
+
+```````````````````````````````` example
+foo_bar_
+.
+<p>foo_bar_</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+5_6_78
+.
+<p>5_6_78</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+пристаням_стремятся_
+.
+<p>пристаням_стремятся_</p>
+````````````````````````````````
+
+
+Here `_` does not generate emphasis, because the first delimiter run
+is right-flanking and the second left-flanking:
+
+```````````````````````````````` example
+aa_"bb"_cc
+.
+<p>aa_&quot;bb&quot;_cc</p>
+````````````````````````````````
+
+
+This is emphasis, even though the opening delimiter is
+both left- and right-flanking, because it is preceded by
+punctuation:
+
+```````````````````````````````` example
+foo-_(bar)_
+.
+<p>foo-<em>(bar)</em></p>
+````````````````````````````````
+
+
+Rule 3:
+
+This is not emphasis, because the closing delimiter does
+not match the opening delimiter:
+
+```````````````````````````````` example
+_foo*
+.
+<p>_foo*</p>
+````````````````````````````````
+
+
+This is not emphasis, because the closing `*` is preceded by
+whitespace:
+
+```````````````````````````````` example
+*foo bar *
+.
+<p>*foo bar *</p>
+````````````````````````````````
+
+
+A line ending also counts as whitespace:
+
+```````````````````````````````` example
+*foo bar
+*
+.
+<p>*foo bar
+*</p>
+````````````````````````````````
+
+
+This is not emphasis, because the second `*` is
+preceded by punctuation and followed by an alphanumeric
+(hence it is not part of a [right-flanking delimiter run]:
+
+```````````````````````````````` example
+*(*foo)
+.
+<p>*(*foo)</p>
+````````````````````````````````
+
+
+The point of this restriction is more easily appreciated
+with this example:
+
+```````````````````````````````` example
+*(*foo*)*
+.
+<p><em>(<em>foo</em>)</em></p>
+````````````````````````````````
+
+
+Intraword emphasis with `*` is allowed:
+
+```````````````````````````````` example
+*foo*bar
+.
+<p><em>foo</em>bar</p>
+````````````````````````````````
+
+
+
+Rule 4:
+
+This is not emphasis, because the closing `_` is preceded by
+whitespace:
+
+```````````````````````````````` example
+_foo bar _
+.
+<p>_foo bar _</p>
+````````````````````````````````
+
+
+This is not emphasis, because the second `_` is
+preceded by punctuation and followed by an alphanumeric:
+
+```````````````````````````````` example
+_(_foo)
+.
+<p>_(_foo)</p>
+````````````````````````````````
+
+
+This is emphasis within emphasis:
+
+```````````````````````````````` example
+_(_foo_)_
+.
+<p><em>(<em>foo</em>)</em></p>
+````````````````````````````````
+
+
+Intraword emphasis is disallowed for `_`:
+
+```````````````````````````````` example
+_foo_bar
+.
+<p>_foo_bar</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+_пристаням_стремятся
+.
+<p>_пристаням_стремятся</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+_foo_bar_baz_
+.
+<p><em>foo_bar_baz</em></p>
+````````````````````````````````
+
+
+This is emphasis, even though the closing delimiter is
+both left- and right-flanking, because it is followed by
+punctuation:
+
+```````````````````````````````` example
+_(bar)_.
+.
+<p><em>(bar)</em>.</p>
+````````````````````````````````
+
+
+Rule 5:
+
+```````````````````````````````` example
+**foo bar**
+.
+<p><strong>foo bar</strong></p>
+````````````````````````````````
+
+
+This is not strong emphasis, because the opening delimiter is
+followed by whitespace:
+
+```````````````````````````````` example
+** foo bar**
+.
+<p>** foo bar**</p>
+````````````````````````````````
+
+
+This is not strong emphasis, because the opening `**` is preceded
+by an alphanumeric and followed by punctuation, and hence
+not part of a [left-flanking delimiter run]:
+
+```````````````````````````````` example
+a**"foo"**
+.
+<p>a**&quot;foo&quot;**</p>
+````````````````````````````````
+
+
+Intraword strong emphasis with `**` is permitted:
+
+```````````````````````````````` example
+foo**bar**
+.
+<p>foo<strong>bar</strong></p>
+````````````````````````````````
+
+
+Rule 6:
+
+```````````````````````````````` example
+__foo bar__
+.
+<p><strong>foo bar</strong></p>
+````````````````````````````````
+
+
+This is not strong emphasis, because the opening delimiter is
+followed by whitespace:
+
+```````````````````````````````` example
+__ foo bar__
+.
+<p>__ foo bar__</p>
+````````````````````````````````
+
+
+A line ending counts as whitespace:
+```````````````````````````````` example
+__
+foo bar__
+.
+<p>__
+foo bar__</p>
+````````````````````````````````
+
+
+This is not strong emphasis, because the opening `__` is preceded
+by an alphanumeric and followed by punctuation:
+
+```````````````````````````````` example
+a__"foo"__
+.
+<p>a__&quot;foo&quot;__</p>
+````````````````````````````````
+
+
+Intraword strong emphasis is forbidden with `__`:
+
+```````````````````````````````` example
+foo__bar__
+.
+<p>foo__bar__</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+5__6__78
+.
+<p>5__6__78</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+пристаням__стремятся__
+.
+<p>пристаням__стремятся__</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+__foo, __bar__, baz__
+.
+<p><strong>foo, <strong>bar</strong>, baz</strong></p>
+````````````````````````````````
+
+
+This is strong emphasis, even though the opening delimiter is
+both left- and right-flanking, because it is preceded by
+punctuation:
+
+```````````````````````````````` example
+foo-__(bar)__
+.
+<p>foo-<strong>(bar)</strong></p>
+````````````````````````````````
+
+
+
+Rule 7:
+
+This is not strong emphasis, because the closing delimiter is preceded
+by whitespace:
+
+```````````````````````````````` example
+**foo bar **
+.
+<p>**foo bar **</p>
+````````````````````````````````
+
+
+(Nor can it be interpreted as an emphasized `*foo bar *`, because of
+Rule 11.)
+
+This is not strong emphasis, because the second `**` is
+preceded by punctuation and followed by an alphanumeric:
+
+```````````````````````````````` example
+**(**foo)
+.
+<p>**(**foo)</p>
+````````````````````````````````
+
+
+The point of this restriction is more easily appreciated
+with these examples:
+
+```````````````````````````````` example
+*(**foo**)*
+.
+<p><em>(<strong>foo</strong>)</em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+**Gomphocarpus (*Gomphocarpus physocarpus*, syn.
+*Asclepias physocarpa*)**
+.
+<p><strong>Gomphocarpus (<em>Gomphocarpus physocarpus</em>, syn.
+<em>Asclepias physocarpa</em>)</strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+**foo "*bar*" foo**
+.
+<p><strong>foo &quot;<em>bar</em>&quot; foo</strong></p>
+````````````````````````````````
+
+
+Intraword emphasis:
+
+```````````````````````````````` example
+**foo**bar
+.
+<p><strong>foo</strong>bar</p>
+````````````````````````````````
+
+
+Rule 8:
+
+This is not strong emphasis, because the closing delimiter is
+preceded by whitespace:
+
+```````````````````````````````` example
+__foo bar __
+.
+<p>__foo bar __</p>
+````````````````````````````````
+
+
+This is not strong emphasis, because the second `__` is
+preceded by punctuation and followed by an alphanumeric:
+
+```````````````````````````````` example
+__(__foo)
+.
+<p>__(__foo)</p>
+````````````````````````````````
+
+
+The point of this restriction is more easily appreciated
+with this example:
+
+```````````````````````````````` example
+_(__foo__)_
+.
+<p><em>(<strong>foo</strong>)</em></p>
+````````````````````````````````
+
+
+Intraword strong emphasis is forbidden with `__`:
+
+```````````````````````````````` example
+__foo__bar
+.
+<p>__foo__bar</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+__пристаням__стремятся
+.
+<p>__пристаням__стремятся</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+__foo__bar__baz__
+.
+<p><strong>foo__bar__baz</strong></p>
+````````````````````````````````
+
+
+This is strong emphasis, even though the closing delimiter is
+both left- and right-flanking, because it is followed by
+punctuation:
+
+```````````````````````````````` example
+__(bar)__.
+.
+<p><strong>(bar)</strong>.</p>
+````````````````````````````````
+
+
+Rule 9:
+
+Any nonempty sequence of inline elements can be the contents of an
+emphasized span.
+
+```````````````````````````````` example
+*foo [bar](/url)*
+.
+<p><em>foo <a href="/url">bar</a></em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+*foo
+bar*
+.
+<p><em>foo
+bar</em></p>
+````````````````````````````````
+
+
+In particular, emphasis and strong emphasis can be nested
+inside emphasis:
+
+```````````````````````````````` example
+_foo __bar__ baz_
+.
+<p><em>foo <strong>bar</strong> baz</em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+_foo _bar_ baz_
+.
+<p><em>foo <em>bar</em> baz</em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+__foo_ bar_
+.
+<p><em><em>foo</em> bar</em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+*foo *bar**
+.
+<p><em>foo <em>bar</em></em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+*foo **bar** baz*
+.
+<p><em>foo <strong>bar</strong> baz</em></p>
+````````````````````````````````
+
+```````````````````````````````` example
+*foo**bar**baz*
+.
+<p><em>foo<strong>bar</strong>baz</em></p>
+````````````````````````````````
+
+Note that in the preceding case, the interpretation
+
+``` markdown
+<p><em>foo</em><em>bar<em></em>baz</em></p>
+```
+
+
+is precluded by the condition that a delimiter that
+can both open and close (like the `*` after `foo`)
+cannot form emphasis if the sum of the lengths of
+the delimiter runs containing the opening and
+closing delimiters is a multiple of 3 unless
+both lengths are multiples of 3.
+
+
+For the same reason, we don't get two consecutive
+emphasis sections in this example:
+
+```````````````````````````````` example
+*foo**bar*
+.
+<p><em>foo**bar</em></p>
+````````````````````````````````
+
+
+The same condition ensures that the following
+cases are all strong emphasis nested inside
+emphasis, even when the interior whitespace is
+omitted:
+
+
+```````````````````````````````` example
+***foo** bar*
+.
+<p><em><strong>foo</strong> bar</em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+*foo **bar***
+.
+<p><em>foo <strong>bar</strong></em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+*foo**bar***
+.
+<p><em>foo<strong>bar</strong></em></p>
+````````````````````````````````
+
+
+When the lengths of the interior closing and opening
+delimiter runs are *both* multiples of 3, though,
+they can match to create emphasis:
+
+```````````````````````````````` example
+foo***bar***baz
+.
+<p>foo<em><strong>bar</strong></em>baz</p>
+````````````````````````````````
+
+```````````````````````````````` example
+foo******bar*********baz
+.
+<p>foo<strong><strong><strong>bar</strong></strong></strong>***baz</p>
+````````````````````````````````
+
+
+Indefinite levels of nesting are possible:
+
+```````````````````````````````` example
+*foo **bar *baz* bim** bop*
+.
+<p><em>foo <strong>bar <em>baz</em> bim</strong> bop</em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+*foo [*bar*](/url)*
+.
+<p><em>foo <a href="/url"><em>bar</em></a></em></p>
+````````````````````````````````
+
+
+There can be no empty emphasis or strong emphasis:
+
+```````````````````````````````` example
+** is not an empty emphasis
+.
+<p>** is not an empty emphasis</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+**** is not an empty strong emphasis
+.
+<p>**** is not an empty strong emphasis</p>
+````````````````````````````````
+
+
+
+Rule 10:
+
+Any nonempty sequence of inline elements can be the contents of an
+strongly emphasized span.
+
+```````````````````````````````` example
+**foo [bar](/url)**
+.
+<p><strong>foo <a href="/url">bar</a></strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+**foo
+bar**
+.
+<p><strong>foo
+bar</strong></p>
+````````````````````````````````
+
+
+In particular, emphasis and strong emphasis can be nested
+inside strong emphasis:
+
+```````````````````````````````` example
+__foo _bar_ baz__
+.
+<p><strong>foo <em>bar</em> baz</strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+__foo __bar__ baz__
+.
+<p><strong>foo <strong>bar</strong> baz</strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+____foo__ bar__
+.
+<p><strong><strong>foo</strong> bar</strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+**foo **bar****
+.
+<p><strong>foo <strong>bar</strong></strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+**foo *bar* baz**
+.
+<p><strong>foo <em>bar</em> baz</strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+**foo*bar*baz**
+.
+<p><strong>foo<em>bar</em>baz</strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+***foo* bar**
+.
+<p><strong><em>foo</em> bar</strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+**foo *bar***
+.
+<p><strong>foo <em>bar</em></strong></p>
+````````````````````````````````
+
+
+Indefinite levels of nesting are possible:
+
+```````````````````````````````` example
+**foo *bar **baz**
+bim* bop**
+.
+<p><strong>foo <em>bar <strong>baz</strong>
+bim</em> bop</strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+**foo [*bar*](/url)**
+.
+<p><strong>foo <a href="/url"><em>bar</em></a></strong></p>
+````````````````````````````````
+
+
+There can be no empty emphasis or strong emphasis:
+
+```````````````````````````````` example
+__ is not an empty emphasis
+.
+<p>__ is not an empty emphasis</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+____ is not an empty strong emphasis
+.
+<p>____ is not an empty strong emphasis</p>
+````````````````````````````````
+
+
+
+Rule 11:
+
+```````````````````````````````` example
+foo ***
+.
+<p>foo ***</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+foo *\**
+.
+<p>foo <em>*</em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+foo *_*
+.
+<p>foo <em>_</em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+foo *****
+.
+<p>foo *****</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+foo **\***
+.
+<p>foo <strong>*</strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+foo **_**
+.
+<p>foo <strong>_</strong></p>
+````````````````````````````````
+
+
+Note that when delimiters do not match evenly, Rule 11 determines
+that the excess literal `*` characters will appear outside of the
+emphasis, rather than inside it:
+
+```````````````````````````````` example
+**foo*
+.
+<p>*<em>foo</em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+*foo**
+.
+<p><em>foo</em>*</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+***foo**
+.
+<p>*<strong>foo</strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+****foo*
+.
+<p>***<em>foo</em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+**foo***
+.
+<p><strong>foo</strong>*</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+*foo****
+.
+<p><em>foo</em>***</p>
+````````````````````````````````
+
+
+
+Rule 12:
+
+```````````````````````````````` example
+foo ___
+.
+<p>foo ___</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+foo _\__
+.
+<p>foo <em>_</em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+foo _*_
+.
+<p>foo <em>*</em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+foo _____
+.
+<p>foo _____</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+foo __\___
+.
+<p>foo <strong>_</strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+foo __*__
+.
+<p>foo <strong>*</strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+__foo_
+.
+<p>_<em>foo</em></p>
+````````````````````````````````
+
+
+Note that when delimiters do not match evenly, Rule 12 determines
+that the excess literal `_` characters will appear outside of the
+emphasis, rather than inside it:
+
+```````````````````````````````` example
+_foo__
+.
+<p><em>foo</em>_</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+___foo__
+.
+<p>_<strong>foo</strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+____foo_
+.
+<p>___<em>foo</em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+__foo___
+.
+<p><strong>foo</strong>_</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+_foo____
+.
+<p><em>foo</em>___</p>
+````````````````````````````````
+
+
+Rule 13 implies that if you want emphasis nested directly inside
+emphasis, you must use different delimiters:
+
+```````````````````````````````` example
+**foo**
+.
+<p><strong>foo</strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+*_foo_*
+.
+<p><em><em>foo</em></em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+__foo__
+.
+<p><strong>foo</strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+_*foo*_
+.
+<p><em><em>foo</em></em></p>
+````````````````````````````````
+
+
+However, strong emphasis within strong emphasis is possible without
+switching delimiters:
+
+```````````````````````````````` example
+****foo****
+.
+<p><strong><strong>foo</strong></strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+____foo____
+.
+<p><strong><strong>foo</strong></strong></p>
+````````````````````````````````
+
+
+
+Rule 13 can be applied to arbitrarily long sequences of
+delimiters:
+
+```````````````````````````````` example
+******foo******
+.
+<p><strong><strong><strong>foo</strong></strong></strong></p>
+````````````````````````````````
+
+
+Rule 14:
+
+```````````````````````````````` example
+***foo***
+.
+<p><em><strong>foo</strong></em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+_____foo_____
+.
+<p><em><strong><strong>foo</strong></strong></em></p>
+````````````````````````````````
+
+
+Rule 15:
+
+```````````````````````````````` example
+*foo _bar* baz_
+.
+<p><em>foo _bar</em> baz_</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+*foo __bar *baz bim__ bam*
+.
+<p><em>foo <strong>bar *baz bim</strong> bam</em></p>
+````````````````````````````````
+
+
+Rule 16:
+
+```````````````````````````````` example
+**foo **bar baz**
+.
+<p>**foo <strong>bar baz</strong></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+*foo *bar baz*
+.
+<p>*foo <em>bar baz</em></p>
+````````````````````````````````
+
+
+Rule 17:
+
+```````````````````````````````` example
+*[bar*](/url)
+.
+<p>*<a href="/url">bar*</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+_foo [bar_](/url)
+.
+<p>_foo <a href="/url">bar_</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+*<img src="foo" title="*"/>
+.
+<p>*<img src="foo" title="*"/></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+**<a href="**">
+.
+<p>**<a href="**"></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+__<a href="__">
+.
+<p>__<a href="__"></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+*a `*`*
+.
+<p><em>a <code>*</code></em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+_a `_`_
+.
+<p><em>a <code>_</code></em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+**a<http://foo.bar/?q=**>
+.
+<p>**a<a href="http://foo.bar/?q=**">http://foo.bar/?q=**</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+__a<http://foo.bar/?q=__>
+.
+<p>__a<a href="http://foo.bar/?q=__">http://foo.bar/?q=__</a></p>
+````````````````````````````````
+
+
+
+## Links
+
+A link contains [link text] (the visible text), a [link destination]
+(the URI that is the link destination), and optionally a [link title].
+There are two basic kinds of links in Markdown.  In [inline links] the
+destination and title are given immediately after the link text.  In
+[reference links] the destination and title are defined elsewhere in
+the document.
+
+A [link text](@) consists of a sequence of zero or more
+inline elements enclosed by square brackets (`[` and `]`).  The
+following rules apply:
+
+- Links may not contain other links, at any level of nesting. If
+  multiple otherwise valid link definitions appear nested inside each
+  other, the inner-most definition is used.
+
+- Brackets are allowed in the [link text] only if (a) they
+  are backslash-escaped or (b) they appear as a matched pair of brackets,
+  with an open bracket `[`, a sequence of zero or more inlines, and
+  a close bracket `]`.
+
+- Backtick [code spans], [autolinks], and raw [HTML tags] bind more tightly
+  than the brackets in link text.  Thus, for example,
+  `` [foo`]` `` could not be a link text, since the second `]`
+  is part of a code span.
+
+- The brackets in link text bind more tightly than markers for
+  [emphasis and strong emphasis]. Thus, for example, `*[foo*](url)` is a link.
+
+A [link destination](@) consists of either
+
+- a sequence of zero or more characters between an opening `<` and a
+  closing `>` that contains no line endings or unescaped
+  `<` or `>` characters, or
+
+- a nonempty sequence of characters that does not start with `<`,
+  does not include [ASCII control characters][ASCII control character]
+  or [space] character, and includes parentheses only if (a) they are
+  backslash-escaped or (b) they are part of a balanced pair of
+  unescaped parentheses.
+  (Implementations may impose limits on parentheses nesting to
+  avoid performance issues, but at least three levels of nesting
+  should be supported.)
+
+A [link title](@)  consists of either
+
+- a sequence of zero or more characters between straight double-quote
+  characters (`"`), including a `"` character only if it is
+  backslash-escaped, or
+
+- a sequence of zero or more characters between straight single-quote
+  characters (`'`), including a `'` character only if it is
+  backslash-escaped, or
+
+- a sequence of zero or more characters between matching parentheses
+  (`(...)`), including a `(` or `)` character only if it is
+  backslash-escaped.
+
+Although [link titles] may span multiple lines, they may not contain
+a [blank line].
+
+An [inline link](@) consists of a [link text] followed immediately
+by a left parenthesis `(`, an optional [link destination], an optional
+[link title], and a right parenthesis `)`.
+These four components may be separated by spaces, tabs, and up to one line
+ending.
+If both [link destination] and [link title] are present, they *must* be
+separated by spaces, tabs, and up to one line ending.
+
+The link's text consists of the inlines contained
+in the [link text] (excluding the enclosing square brackets).
+The link's URI consists of the link destination, excluding enclosing
+`<...>` if present, with backslash-escapes in effect as described
+above.  The link's title consists of the link title, excluding its
+enclosing delimiters, with backslash-escapes in effect as described
+above.
+
+Here is a simple inline link:
+
+```````````````````````````````` example
+[link](/uri "title")
+.
+<p><a href="/uri" title="title">link</a></p>
+````````````````````````````````
+
+
+The title, the link text and even 
+the destination may be omitted:
+
+```````````````````````````````` example
+[link](/uri)
+.
+<p><a href="/uri">link</a></p>
+````````````````````````````````
+
+```````````````````````````````` example
+[](./target.md)
+.
+<p><a href="./target.md"></a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[link]()
+.
+<p><a href="">link</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[link](<>)
+.
+<p><a href="">link</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[]()
+.
+<p><a href=""></a></p>
+````````````````````````````````
+
+The destination can only contain spaces if it is
+enclosed in pointy brackets:
+
+```````````````````````````````` example
+[link](/my uri)
+.
+<p>[link](/my uri)</p>
+````````````````````````````````
+
+```````````````````````````````` example
+[link](</my uri>)
+.
+<p><a href="/my%20uri">link</a></p>
+````````````````````````````````
+
+The destination cannot contain line endings,
+even if enclosed in pointy brackets:
+
+```````````````````````````````` example
+[link](foo
+bar)
+.
+<p>[link](foo
+bar)</p>
+````````````````````````````````
+
+```````````````````````````````` example
+[link](<foo
+bar>)
+.
+<p>[link](<foo
+bar>)</p>
+````````````````````````````````
+
+The destination can contain `)` if it is enclosed
+in pointy brackets:
+
+```````````````````````````````` example
+[a](<b)c>)
+.
+<p><a href="b)c">a</a></p>
+````````````````````````````````
+
+Pointy brackets that enclose links must be unescaped:
+
+```````````````````````````````` example
+[link](<foo\>)
+.
+<p>[link](&lt;foo&gt;)</p>
+````````````````````````````````
+
+These are not links, because the opening pointy bracket
+is not matched properly:
+
+```````````````````````````````` example
+[a](<b)c
+[a](<b)c>
+[a](<b>c)
+.
+<p>[a](&lt;b)c
+[a](&lt;b)c&gt;
+[a](<b>c)</p>
+````````````````````````````````
+
+Parentheses inside the link destination may be escaped:
+
+```````````````````````````````` example
+[link](\(foo\))
+.
+<p><a href="(foo)">link</a></p>
+````````````````````````````````
+
+Any number of parentheses are allowed without escaping, as long as they are
+balanced:
+
+```````````````````````````````` example
+[link](foo(and(bar)))
+.
+<p><a href="foo(and(bar))">link</a></p>
+````````````````````````````````
+
+However, if you have unbalanced parentheses, you need to escape or use the
+`<...>` form:
+
+```````````````````````````````` example
+[link](foo(and(bar))
+.
+<p>[link](foo(and(bar))</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[link](foo\(and\(bar\))
+.
+<p><a href="foo(and(bar)">link</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[link](<foo(and(bar)>)
+.
+<p><a href="foo(and(bar)">link</a></p>
+````````````````````````````````
+
+
+Parentheses and other symbols can also be escaped, as usual
+in Markdown:
+
+```````````````````````````````` example
+[link](foo\)\:)
+.
+<p><a href="foo):">link</a></p>
+````````````````````````````````
+
+
+A link can contain fragment identifiers and queries:
+
+```````````````````````````````` example
+[link](#fragment)
+
+[link](http://example.com#fragment)
+
+[link](http://example.com?foo=3#frag)
+.
+<p><a href="#fragment">link</a></p>
+<p><a href="http://example.com#fragment">link</a></p>
+<p><a href="http://example.com?foo=3#frag">link</a></p>
+````````````````````````````````
+
+
+Note that a backslash before a non-escapable character is
+just a backslash:
+
+```````````````````````````````` example
+[link](foo\bar)
+.
+<p><a href="foo%5Cbar">link</a></p>
+````````````````````````````````
+
+
+URL-escaping should be left alone inside the destination, as all
+URL-escaped characters are also valid URL characters. Entity and
+numerical character references in the destination will be parsed
+into the corresponding Unicode code points, as usual.  These may
+be optionally URL-escaped when written as HTML, but this spec
+does not enforce any particular policy for rendering URLs in
+HTML or other formats.  Renderers may make different decisions
+about how to escape or normalize URLs in the output.
+
+```````````````````````````````` example
+[link](foo%20b&auml;)
+.
+<p><a href="foo%20b%C3%A4">link</a></p>
+````````````````````````````````
+
+
+Note that, because titles can often be parsed as destinations,
+if you try to omit the destination and keep the title, you'll
+get unexpected results:
+
+```````````````````````````````` example
+[link]("title")
+.
+<p><a href="%22title%22">link</a></p>
+````````````````````````````````
+
+
+Titles may be in single quotes, double quotes, or parentheses:
+
+```````````````````````````````` example
+[link](/url "title")
+[link](/url 'title')
+[link](/url (title))
+.
+<p><a href="/url" title="title">link</a>
+<a href="/url" title="title">link</a>
+<a href="/url" title="title">link</a></p>
+````````````````````````````````
+
+
+Backslash escapes and entity and numeric character references
+may be used in titles:
+
+```````````````````````````````` example
+[link](/url "title \"&quot;")
+.
+<p><a href="/url" title="title &quot;&quot;">link</a></p>
+````````````````````````````````
+
+
+Titles must be separated from the link using spaces, tabs, and up to one line
+ending.
+Other [Unicode whitespace] like non-breaking space doesn't work.
+
+```````````````````````````````` example
+[link](/url "title")
+.
+<p><a href="/url%C2%A0%22title%22">link</a></p>
+````````````````````````````````
+
+
+Nested balanced quotes are not allowed without escaping:
+
+```````````````````````````````` example
+[link](/url "title "and" title")
+.
+<p>[link](/url &quot;title &quot;and&quot; title&quot;)</p>
+````````````````````````````````
+
+
+But it is easy to work around this by using a different quote type:
+
+```````````````````````````````` example
+[link](/url 'title "and" title')
+.
+<p><a href="/url" title="title &quot;and&quot; title">link</a></p>
+````````````````````````````````
+
+
+(Note:  `Markdown.pl` did allow double quotes inside a double-quoted
+title, and its test suite included a test demonstrating this.
+But it is hard to see a good rationale for the extra complexity this
+brings, since there are already many ways---backslash escaping,
+entity and numeric character references, or using a different
+quote type for the enclosing title---to write titles containing
+double quotes.  `Markdown.pl`'s handling of titles has a number
+of other strange features.  For example, it allows single-quoted
+titles in inline links, but not reference links.  And, in
+reference links but not inline links, it allows a title to begin
+with `"` and end with `)`.  `Markdown.pl` 1.0.1 even allows
+titles with no closing quotation mark, though 1.0.2b8 does not.
+It seems preferable to adopt a simple, rational rule that works
+the same way in inline links and link reference definitions.)
+
+Spaces, tabs, and up to one line ending is allowed around the destination and
+title:
+
+```````````````````````````````` example
+[link](   /uri
+  "title"  )
+.
+<p><a href="/uri" title="title">link</a></p>
+````````````````````````````````
+
+
+But it is not allowed between the link text and the
+following parenthesis:
+
+```````````````````````````````` example
+[link] (/uri)
+.
+<p>[link] (/uri)</p>
+````````````````````````````````
+
+
+The link text may contain balanced brackets, but not unbalanced ones,
+unless they are escaped:
+
+```````````````````````````````` example
+[link [foo [bar]]](/uri)
+.
+<p><a href="/uri">link [foo [bar]]</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[link] bar](/uri)
+.
+<p>[link] bar](/uri)</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[link [bar](/uri)
+.
+<p>[link <a href="/uri">bar</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[link \[bar](/uri)
+.
+<p><a href="/uri">link [bar</a></p>
+````````````````````````````````
+
+
+The link text may contain inline content:
+
+```````````````````````````````` example
+[link *foo **bar** `#`*](/uri)
+.
+<p><a href="/uri">link <em>foo <strong>bar</strong> <code>#</code></em></a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[![moon](moon.jpg)](/uri)
+.
+<p><a href="/uri"><img src="moon.jpg" alt="moon" /></a></p>
+````````````````````````````````
+
+
+However, links may not contain other links, at any level of nesting.
+
+```````````````````````````````` example
+[foo [bar](/uri)](/uri)
+.
+<p>[foo <a href="/uri">bar</a>](/uri)</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[foo *[bar [baz](/uri)](/uri)*](/uri)
+.
+<p>[foo <em>[bar <a href="/uri">baz</a>](/uri)</em>](/uri)</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+![[[foo](uri1)](uri2)](uri3)
+.
+<p><img src="uri3" alt="[foo](uri2)" /></p>
+````````````````````````````````
+
+
+These cases illustrate the precedence of link text grouping over
+emphasis grouping:
+
+```````````````````````````````` example
+*[foo*](/uri)
+.
+<p>*<a href="/uri">foo*</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[foo *bar](baz*)
+.
+<p><a href="baz*">foo *bar</a></p>
+````````````````````````````````
+
+
+Note that brackets that *aren't* part of links do not take
+precedence:
+
+```````````````````````````````` example
+*foo [bar* baz]
+.
+<p><em>foo [bar</em> baz]</p>
+````````````````````````````````
+
+
+These cases illustrate the precedence of HTML tags, code spans,
+and autolinks over link grouping:
+
+```````````````````````````````` example
+[foo <bar attr="](baz)">
+.
+<p>[foo <bar attr="](baz)"></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[foo`](/uri)`
+.
+<p>[foo<code>](/uri)</code></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[foo<http://example.com/?search=](uri)>
+.
+<p>[foo<a href="http://example.com/?search=%5D(uri)">http://example.com/?search=](uri)</a></p>
+````````````````````````````````
+
+
+There are three kinds of [reference link](@)s:
+[full](#full-reference-link), [collapsed](#collapsed-reference-link),
+and [shortcut](#shortcut-reference-link).
+
+A [full reference link](@)
+consists of a [link text] immediately followed by a [link label]
+that [matches] a [link reference definition] elsewhere in the document.
+
+A [link label](@)  begins with a left bracket (`[`) and ends
+with the first right bracket (`]`) that is not backslash-escaped.
+Between these brackets there must be at least one character that is not a space,
+tab, or line ending.
+Unescaped square bracket characters are not allowed inside the
+opening and closing square brackets of [link labels].  A link
+label can have at most 999 characters inside the square
+brackets.
+
+One label [matches](@)
+another just in case their normalized forms are equal.  To normalize a
+label, strip off the opening and closing brackets,
+perform the *Unicode case fold*, strip leading and trailing
+spaces, tabs, and line endings, and collapse consecutive internal
+spaces, tabs, and line endings to a single space.  If there are multiple
+matching reference link definitions, the one that comes first in the
+document is used.  (It is desirable in such cases to emit a warning.)
+
+The link's URI and title are provided by the matching [link
+reference definition].
+
+Here is a simple example:
+
+```````````````````````````````` example
+[foo][bar]
+
+[bar]: /url "title"
+.
+<p><a href="/url" title="title">foo</a></p>
+````````````````````````````````
+
+
+The rules for the [link text] are the same as with
+[inline links].  Thus:
+
+The link text may contain balanced brackets, but not unbalanced ones,
+unless they are escaped:
+
+```````````````````````````````` example
+[link [foo [bar]]][ref]
+
+[ref]: /uri
+.
+<p><a href="/uri">link [foo [bar]]</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[link \[bar][ref]
+
+[ref]: /uri
+.
+<p><a href="/uri">link [bar</a></p>
+````````````````````````````````
+
+
+The link text may contain inline content:
+
+```````````````````````````````` example
+[link *foo **bar** `#`*][ref]
+
+[ref]: /uri
+.
+<p><a href="/uri">link <em>foo <strong>bar</strong> <code>#</code></em></a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[![moon](moon.jpg)][ref]
+
+[ref]: /uri
+.
+<p><a href="/uri"><img src="moon.jpg" alt="moon" /></a></p>
+````````````````````````````````
+
+
+However, links may not contain other links, at any level of nesting.
+
+```````````````````````````````` example
+[foo [bar](/uri)][ref]
+
+[ref]: /uri
+.
+<p>[foo <a href="/uri">bar</a>]<a href="/uri">ref</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[foo *bar [baz][ref]*][ref]
+
+[ref]: /uri
+.
+<p>[foo <em>bar <a href="/uri">baz</a></em>]<a href="/uri">ref</a></p>
+````````````````````````````````
+
+
+(In the examples above, we have two [shortcut reference links]
+instead of one [full reference link].)
+
+The following cases illustrate the precedence of link text grouping over
+emphasis grouping:
+
+```````````````````````````````` example
+*[foo*][ref]
+
+[ref]: /uri
+.
+<p>*<a href="/uri">foo*</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[foo *bar][ref]*
+
+[ref]: /uri
+.
+<p><a href="/uri">foo *bar</a>*</p>
+````````````````````````````````
+
+
+These cases illustrate the precedence of HTML tags, code spans,
+and autolinks over link grouping:
+
+```````````````````````````````` example
+[foo <bar attr="][ref]">
+
+[ref]: /uri
+.
+<p>[foo <bar attr="][ref]"></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[foo`][ref]`
+
+[ref]: /uri
+.
+<p>[foo<code>][ref]</code></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[foo<http://example.com/?search=][ref]>
+
+[ref]: /uri
+.
+<p>[foo<a href="http://example.com/?search=%5D%5Bref%5D">http://example.com/?search=][ref]</a></p>
+````````````````````````````````
+
+
+Matching is case-insensitive:
+
+```````````````````````````````` example
+[foo][BaR]
+
+[bar]: /url "title"
+.
+<p><a href="/url" title="title">foo</a></p>
+````````````````````````````````
+
+
+Unicode case fold is used:
+
+```````````````````````````````` example
+[ẞ]
+
+[SS]: /url
+.
+<p><a href="/url">ẞ</a></p>
+````````````````````````````````
+
+
+Consecutive internal spaces, tabs, and line endings are treated as one space for
+purposes of determining matching:
+
+```````````````````````````````` example
+[Foo
+  bar]: /url
+
+[Baz][Foo bar]
+.
+<p><a href="/url">Baz</a></p>
+````````````````````````````````
+
+
+No spaces, tabs, or line endings are allowed between the [link text] and the
+[link label]:
+
+```````````````````````````````` example
+[foo] [bar]
+
+[bar]: /url "title"
+.
+<p>[foo] <a href="/url" title="title">bar</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[foo]
+[bar]
+
+[bar]: /url "title"
+.
+<p>[foo]
+<a href="/url" title="title">bar</a></p>
+````````````````````````````````
+
+
+This is a departure from John Gruber's original Markdown syntax
+description, which explicitly allows whitespace between the link
+text and the link label.  It brings reference links in line with
+[inline links], which (according to both original Markdown and
+this spec) cannot have whitespace after the link text.  More
+importantly, it prevents inadvertent capture of consecutive
+[shortcut reference links]. If whitespace is allowed between the
+link text and the link label, then in the following we will have
+a single reference link, not two shortcut reference links, as
+intended:
+
+``` markdown
+[foo]
+[bar]
+
+[foo]: /url1
+[bar]: /url2
+```
+
+(Note that [shortcut reference links] were introduced by Gruber
+himself in a beta version of `Markdown.pl`, but never included
+in the official syntax description.  Without shortcut reference
+links, it is harmless to allow space between the link text and
+link label; but once shortcut references are introduced, it is
+too dangerous to allow this, as it frequently leads to
+unintended results.)
+
+When there are multiple matching [link reference definitions],
+the first is used:
+
+```````````````````````````````` example
+[foo]: /url1
+
+[foo]: /url2
+
+[bar][foo]
+.
+<p><a href="/url1">bar</a></p>
+````````````````````````````````
+
+
+Note that matching is performed on normalized strings, not parsed
+inline content.  So the following does not match, even though the
+labels define equivalent inline content:
+
+```````````````````````````````` example
+[bar][foo\!]
+
+[foo!]: /url
+.
+<p>[bar][foo!]</p>
+````````````````````````````````
+
+
+[Link labels] cannot contain brackets, unless they are
+backslash-escaped:
+
+```````````````````````````````` example
+[foo][ref[]
+
+[ref[]: /uri
+.
+<p>[foo][ref[]</p>
+<p>[ref[]: /uri</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[foo][ref[bar]]
+
+[ref[bar]]: /uri
+.
+<p>[foo][ref[bar]]</p>
+<p>[ref[bar]]: /uri</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[[[foo]]]
+
+[[[foo]]]: /url
+.
+<p>[[[foo]]]</p>
+<p>[[[foo]]]: /url</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[foo][ref\[]
+
+[ref\[]: /uri
+.
+<p><a href="/uri">foo</a></p>
+````````````````````````````````
+
+
+Note that in this example `]` is not backslash-escaped:
+
+```````````````````````````````` example
+[bar\\]: /uri
+
+[bar\\]
+.
+<p><a href="/uri">bar\</a></p>
+````````````````````````````````
+
+
+A [link label] must contain at least one character that is not a space, tab, or
+line ending:
+
+```````````````````````````````` example
+[]
+
+[]: /uri
+.
+<p>[]</p>
+<p>[]: /uri</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[
+ ]
+
+[
+ ]: /uri
+.
+<p>[
+]</p>
+<p>[
+]: /uri</p>
+````````````````````````````````
+
+
+A [collapsed reference link](@)
+consists of a [link label] that [matches] a
+[link reference definition] elsewhere in the
+document, followed by the string `[]`.
+The contents of the first link label are parsed as inlines,
+which are used as the link's text.  The link's URI and title are
+provided by the matching reference link definition.  Thus,
+`[foo][]` is equivalent to `[foo][foo]`.
+
+```````````````````````````````` example
+[foo][]
+
+[foo]: /url "title"
+.
+<p><a href="/url" title="title">foo</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[*foo* bar][]
+
+[*foo* bar]: /url "title"
+.
+<p><a href="/url" title="title"><em>foo</em> bar</a></p>
+````````````````````````````````
+
+
+The link labels are case-insensitive:
+
+```````````````````````````````` example
+[Foo][]
+
+[foo]: /url "title"
+.
+<p><a href="/url" title="title">Foo</a></p>
+````````````````````````````````
+
+
+
+As with full reference links, spaces, tabs, or line endings are not
+allowed between the two sets of brackets:
+
+```````````````````````````````` example
+[foo] 
+[]
+
+[foo]: /url "title"
+.
+<p><a href="/url" title="title">foo</a>
+[]</p>
+````````````````````````````````
+
+
+A [shortcut reference link](@)
+consists of a [link label] that [matches] a
+[link reference definition] elsewhere in the
+document and is not followed by `[]` or a link label.
+The contents of the first link label are parsed as inlines,
+which are used as the link's text.  The link's URI and title
+are provided by the matching link reference definition.
+Thus, `[foo]` is equivalent to `[foo][]`.
+
+```````````````````````````````` example
+[foo]
+
+[foo]: /url "title"
+.
+<p><a href="/url" title="title">foo</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[*foo* bar]
+
+[*foo* bar]: /url "title"
+.
+<p><a href="/url" title="title"><em>foo</em> bar</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[[*foo* bar]]
+
+[*foo* bar]: /url "title"
+.
+<p>[<a href="/url" title="title"><em>foo</em> bar</a>]</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+[[bar [foo]
+
+[foo]: /url
+.
+<p>[[bar <a href="/url">foo</a></p>
+````````````````````````````````
+
+
+The link labels are case-insensitive:
+
+```````````````````````````````` example
+[Foo]
+
+[foo]: /url "title"
+.
+<p><a href="/url" title="title">Foo</a></p>
+````````````````````````````````
+
+
+A space after the link text should be preserved:
+
+```````````````````````````````` example
+[foo] bar
+
+[foo]: /url
+.
+<p><a href="/url">foo</a> bar</p>
+````````````````````````````````
+
+
+If you just want bracketed text, you can backslash-escape the
+opening bracket to avoid links:
+
+```````````````````````````````` example
+\[foo]
+
+[foo]: /url "title"
+.
+<p>[foo]</p>
+````````````````````````````````
+
+
+Note that this is a link, because a link label ends with the first
+following closing bracket:
+
+```````````````````````````````` example
+[foo*]: /url
+
+*[foo*]
+.
+<p>*<a href="/url">foo*</a></p>
+````````````````````````````````
+
+
+Full and compact references take precedence over shortcut
+references:
+
+```````````````````````````````` example
+[foo][bar]
+
+[foo]: /url1
+[bar]: /url2
+.
+<p><a href="/url2">foo</a></p>
+````````````````````````````````
+
+```````````````````````````````` example
+[foo][]
+
+[foo]: /url1
+.
+<p><a href="/url1">foo</a></p>
+````````````````````````````````
+
+Inline links also take precedence:
+
+```````````````````````````````` example
+[foo]()
+
+[foo]: /url1
+.
+<p><a href="">foo</a></p>
+````````````````````````````````
+
+```````````````````````````````` example
+[foo](not a link)
+
+[foo]: /url1
+.
+<p><a href="/url1">foo</a>(not a link)</p>
+````````````````````````````````
+
+In the following case `[bar][baz]` is parsed as a reference,
+`[foo]` as normal text:
+
+```````````````````````````````` example
+[foo][bar][baz]
+
+[baz]: /url
+.
+<p>[foo]<a href="/url">bar</a></p>
+````````````````````````````````
+
+
+Here, though, `[foo][bar]` is parsed as a reference, since
+`[bar]` is defined:
+
+```````````````````````````````` example
+[foo][bar][baz]
+
+[baz]: /url1
+[bar]: /url2
+.
+<p><a href="/url2">foo</a><a href="/url1">baz</a></p>
+````````````````````````````````
+
+
+Here `[foo]` is not parsed as a shortcut reference, because it
+is followed by a link label (even though `[bar]` is not defined):
+
+```````````````````````````````` example
+[foo][bar][baz]
+
+[baz]: /url1
+[foo]: /url2
+.
+<p>[foo]<a href="/url1">bar</a></p>
+````````````````````````````````
+
+
+
+## Images
+
+Syntax for images is like the syntax for links, with one
+difference. Instead of [link text], we have an
+[image description](@).  The rules for this are the
+same as for [link text], except that (a) an
+image description starts with `![` rather than `[`, and
+(b) an image description may contain links.
+An image description has inline elements
+as its contents.  When an image is rendered to HTML,
+this is standardly used as the image's `alt` attribute.
+
+```````````````````````````````` example
+![foo](/url "title")
+.
+<p><img src="/url" alt="foo" title="title" /></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+![foo *bar*]
+
+[foo *bar*]: train.jpg "train & tracks"
+.
+<p><img src="train.jpg" alt="foo bar" title="train &amp; tracks" /></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+![foo ![bar](/url)](/url2)
+.
+<p><img src="/url2" alt="foo bar" /></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+![foo [bar](/url)](/url2)
+.
+<p><img src="/url2" alt="foo bar" /></p>
+````````````````````````````````
+
+
+Though this spec is concerned with parsing, not rendering, it is
+recommended that in rendering to HTML, only the plain string content
+of the [image description] be used.  Note that in
+the above example, the alt attribute's value is `foo bar`, not `foo
+[bar](/url)` or `foo <a href="/url">bar</a>`.  Only the plain string
+content is rendered, without formatting.
+
+```````````````````````````````` example
+![foo *bar*][]
+
+[foo *bar*]: train.jpg "train & tracks"
+.
+<p><img src="train.jpg" alt="foo bar" title="train &amp; tracks" /></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+![foo *bar*][foobar]
+
+[FOOBAR]: train.jpg "train & tracks"
+.
+<p><img src="train.jpg" alt="foo bar" title="train &amp; tracks" /></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+![foo](train.jpg)
+.
+<p><img src="train.jpg" alt="foo" /></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+My ![foo bar](/path/to/train.jpg  "title"   )
+.
+<p>My <img src="/path/to/train.jpg" alt="foo bar" title="title" /></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+![foo](<url>)
+.
+<p><img src="url" alt="foo" /></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+![](/url)
+.
+<p><img src="/url" alt="" /></p>
+````````````````````````````````
+
+
+Reference-style:
+
+```````````````````````````````` example
+![foo][bar]
+
+[bar]: /url
+.
+<p><img src="/url" alt="foo" /></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+![foo][bar]
+
+[BAR]: /url
+.
+<p><img src="/url" alt="foo" /></p>
+````````````````````````````````
+
+
+Collapsed:
+
+```````````````````````````````` example
+![foo][]
+
+[foo]: /url "title"
+.
+<p><img src="/url" alt="foo" title="title" /></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+![*foo* bar][]
+
+[*foo* bar]: /url "title"
+.
+<p><img src="/url" alt="foo bar" title="title" /></p>
+````````````````````````````````
+
+
+The labels are case-insensitive:
+
+```````````````````````````````` example
+![Foo][]
+
+[foo]: /url "title"
+.
+<p><img src="/url" alt="Foo" title="title" /></p>
+````````````````````````````````
+
+
+As with reference links, spaces, tabs, and line endings, are not allowed
+between the two sets of brackets:
+
+```````````````````````````````` example
+![foo] 
+[]
+
+[foo]: /url "title"
+.
+<p><img src="/url" alt="foo" title="title" />
+[]</p>
+````````````````````````````````
+
+
+Shortcut:
+
+```````````````````````````````` example
+![foo]
+
+[foo]: /url "title"
+.
+<p><img src="/url" alt="foo" title="title" /></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+![*foo* bar]
+
+[*foo* bar]: /url "title"
+.
+<p><img src="/url" alt="foo bar" title="title" /></p>
+````````````````````````````````
+
+
+Note that link labels cannot contain unescaped brackets:
+
+```````````````````````````````` example
+![[foo]]
+
+[[foo]]: /url "title"
+.
+<p>![[foo]]</p>
+<p>[[foo]]: /url &quot;title&quot;</p>
+````````````````````````````````
+
+
+The link labels are case-insensitive:
+
+```````````````````````````````` example
+![Foo]
+
+[foo]: /url "title"
+.
+<p><img src="/url" alt="Foo" title="title" /></p>
+````````````````````````````````
+
+
+If you just want a literal `!` followed by bracketed text, you can
+backslash-escape the opening `[`:
+
+```````````````````````````````` example
+!\[foo]
+
+[foo]: /url "title"
+.
+<p>![foo]</p>
+````````````````````````````````
+
+
+If you want a link after a literal `!`, backslash-escape the
+`!`:
+
+```````````````````````````````` example
+\![foo]
+
+[foo]: /url "title"
+.
+<p>!<a href="/url" title="title">foo</a></p>
+````````````````````````````````
+
+
+## Autolinks
+
+[Autolink](@)s are absolute URIs and email addresses inside
+`<` and `>`. They are parsed as links, with the URL or email address
+as the link label.
+
+A [URI autolink](@) consists of `<`, followed by an
+[absolute URI] followed by `>`.  It is parsed as
+a link to the URI, with the URI as the link's label.
+
+An [absolute URI](@),
+for these purposes, consists of a [scheme] followed by a colon (`:`)
+followed by zero or more characters other [ASCII control
+characters][ASCII control character], [space], `<`, and `>`.
+If the URI includes these characters, they must be percent-encoded
+(e.g. `%20` for a space).
+
+For purposes of this spec, a [scheme](@) is any sequence
+of 2--32 characters beginning with an ASCII letter and followed
+by any combination of ASCII letters, digits, or the symbols plus
+("+"), period ("."), or hyphen ("-").
+
+Here are some valid autolinks:
+
+```````````````````````````````` example
+<http://foo.bar.baz>
+.
+<p><a href="http://foo.bar.baz">http://foo.bar.baz</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+<http://foo.bar.baz/test?q=hello&id=22&boolean>
+.
+<p><a href="http://foo.bar.baz/test?q=hello&amp;id=22&amp;boolean">http://foo.bar.baz/test?q=hello&amp;id=22&amp;boolean</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+<irc://foo.bar:2233/baz>
+.
+<p><a href="irc://foo.bar:2233/baz">irc://foo.bar:2233/baz</a></p>
+````````````````````````````````
+
+
+Uppercase is also fine:
+
+```````````````````````````````` example
+<MAILTO:[email protected]>
+.
+<p><a href="MAILTO:[email protected]">MAILTO:[email protected]</a></p>
+````````````````````````````````
+
+
+Note that many strings that count as [absolute URIs] for
+purposes of this spec are not valid URIs, because their
+schemes are not registered or because of other problems
+with their syntax:
+
+```````````````````````````````` example
+<a+b+c:d>
+.
+<p><a href="a+b+c:d">a+b+c:d</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+<made-up-scheme://foo,bar>
+.
+<p><a href="made-up-scheme://foo,bar">made-up-scheme://foo,bar</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+<http://../>
+.
+<p><a href="http://../">http://../</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+<localhost:5001/foo>
+.
+<p><a href="localhost:5001/foo">localhost:5001/foo</a></p>
+````````````````````````````````
+
+
+Spaces are not allowed in autolinks:
+
+```````````````````````````````` example
+<http://foo.bar/baz bim>
+.
+<p>&lt;http://foo.bar/baz bim&gt;</p>
+````````````````````````````````
+
+
+Backslash-escapes do not work inside autolinks:
+
+```````````````````````````````` example
+<http://example.com/\[\>
+.
+<p><a href="http://example.com/%5C%5B%5C">http://example.com/\[\</a></p>
+````````````````````````````````
+
+
+An [email autolink](@)
+consists of `<`, followed by an [email address],
+followed by `>`.  The link's label is the email address,
+and the URL is `mailto:` followed by the email address.
+
+An [email address](@),
+for these purposes, is anything that matches
+the [non-normative regex from the HTML5
+spec](https://html.spec.whatwg.org/multipage/forms.html#e-mail-state-(type=email)):
+
+    /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
+    (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/
+
+Examples of email autolinks:
+
+```````````````````````````````` example
+<[email protected]>
+.
+<p><a href="mailto:[email protected]">[email protected]</a></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+<[email protected]>
+.
+<p><a href="mailto:[email protected]">[email protected]</a></p>
+````````````````````````````````
+
+
+Backslash-escapes do not work inside email autolinks:
+
+```````````````````````````````` example
+<foo\[email protected]>
+.
+<p>&lt;[email protected]&gt;</p>
+````````````````````````````````
+
+
+These are not autolinks:
+
+```````````````````````````````` example
+<>
+.
+<p>&lt;&gt;</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+< http://foo.bar >
+.
+<p>&lt; http://foo.bar &gt;</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+<m:abc>
+.
+<p>&lt;m:abc&gt;</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+<foo.bar.baz>
+.
+<p>&lt;foo.bar.baz&gt;</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+http://example.com
+.
+<p>http://example.com</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
[email protected]
+.
+<p>[email protected]</p>
+````````````````````````````````
+
+
+## Raw HTML
+
+Text between `<` and `>` that looks like an HTML tag is parsed as a
+raw HTML tag and will be rendered in HTML without escaping.
+Tag and attribute names are not limited to current HTML tags,
+so custom tags (and even, say, DocBook tags) may be used.
+
+Here is the grammar for tags:
+
+A [tag name](@) consists of an ASCII letter
+followed by zero or more ASCII letters, digits, or
+hyphens (`-`).
+
+An [attribute](@) consists of spaces, tabs, and up to one line ending,
+an [attribute name], and an optional
+[attribute value specification].
+
+An [attribute name](@)
+consists of an ASCII letter, `_`, or `:`, followed by zero or more ASCII
+letters, digits, `_`, `.`, `:`, or `-`.  (Note:  This is the XML
+specification restricted to ASCII.  HTML5 is laxer.)
+
+An [attribute value specification](@)
+consists of optional spaces, tabs, and up to one line ending,
+a `=` character, optional spaces, tabs, and up to one line ending,
+and an [attribute value].
+
+An [attribute value](@)
+consists of an [unquoted attribute value],
+a [single-quoted attribute value], or a [double-quoted attribute value].
+
+An [unquoted attribute value](@)
+is a nonempty string of characters not
+including spaces, tabs, line endings, `"`, `'`, `=`, `<`, `>`, or `` ` ``.
+
+A [single-quoted attribute value](@)
+consists of `'`, zero or more
+characters not including `'`, and a final `'`.
+
+A [double-quoted attribute value](@)
+consists of `"`, zero or more
+characters not including `"`, and a final `"`.
+
+An [open tag](@) consists of a `<` character, a [tag name],
+zero or more [attributes], optional spaces, tabs, and up to one line ending,
+an optional `/` character, and a `>` character.
+
+A [closing tag](@) consists of the string `</`, a
+[tag name], optional spaces, tabs, and up to one line ending, and the character
+`>`.
+
+An [HTML comment](@) consists of `<!--` + *text* + `-->`,
+where *text* does not start with `>` or `->`, does not end with `-`,
+and does not contain `--`.  (See the
+[HTML5 spec](http://www.w3.org/TR/html5/syntax.html#comments).)
+
+A [processing instruction](@)
+consists of the string `<?`, a string
+of characters not including the string `?>`, and the string
+`?>`.
+
+A [declaration](@) consists of the string `<!`, an ASCII letter, zero or more
+characters not including the character `>`, and the character `>`.
+
+A [CDATA section](@) consists of
+the string `<![CDATA[`, a string of characters not including the string
+`]]>`, and the string `]]>`.
+
+An [HTML tag](@) consists of an [open tag], a [closing tag],
+an [HTML comment], a [processing instruction], a [declaration],
+or a [CDATA section].
+
+Here are some simple open tags:
+
+```````````````````````````````` example
+<a><bab><c2c>
+.
+<p><a><bab><c2c></p>
+````````````````````````````````
+
+
+Empty elements:
+
+```````````````````````````````` example
+<a/><b2/>
+.
+<p><a/><b2/></p>
+````````````````````````````````
+
+
+Whitespace is allowed:
+
+```````````````````````````````` example
+<a  /><b2
+data="foo" >
+.
+<p><a  /><b2
+data="foo" ></p>
+````````````````````````````````
+
+
+With attributes:
+
+```````````````````````````````` example
+<a foo="bar" bam = 'baz <em>"</em>'
+_boolean zoop:33=zoop:33 />
+.
+<p><a foo="bar" bam = 'baz <em>"</em>'
+_boolean zoop:33=zoop:33 /></p>
+````````````````````````````````
+
+
+Custom tag names can be used:
+
+```````````````````````````````` example
+Foo <responsive-image src="foo.jpg" />
+.
+<p>Foo <responsive-image src="foo.jpg" /></p>
+````````````````````````````````
+
+
+Illegal tag names, not parsed as HTML:
+
+```````````````````````````````` example
+<33> <__>
+.
+<p>&lt;33&gt; &lt;__&gt;</p>
+````````````````````````````````
+
+
+Illegal attribute names:
+
+```````````````````````````````` example
+<a h*#ref="hi">
+.
+<p>&lt;a h*#ref=&quot;hi&quot;&gt;</p>
+````````````````````````````````
+
+
+Illegal attribute values:
+
+```````````````````````````````` example
+<a href="hi'> <a href=hi'>
+.
+<p>&lt;a href=&quot;hi'&gt; &lt;a href=hi'&gt;</p>
+````````````````````````````````
+
+
+Illegal whitespace:
+
+```````````````````````````````` example
+< a><
+foo><bar/ >
+<foo bar=baz
+bim!bop />
+.
+<p>&lt; a&gt;&lt;
+foo&gt;&lt;bar/ &gt;
+&lt;foo bar=baz
+bim!bop /&gt;</p>
+````````````````````````````````
+
+
+Missing whitespace:
+
+```````````````````````````````` example
+<a href='bar'title=title>
+.
+<p>&lt;a href='bar'title=title&gt;</p>
+````````````````````````````````
+
+
+Closing tags:
+
+```````````````````````````````` example
+</a></foo >
+.
+<p></a></foo ></p>
+````````````````````````````````
+
+
+Illegal attributes in closing tag:
+
+```````````````````````````````` example
+</a href="foo">
+.
+<p>&lt;/a href=&quot;foo&quot;&gt;</p>
+````````````````````````````````
+
+
+Comments:
+
+```````````````````````````````` example
+foo <!-- this is a
+comment - with hyphen -->
+.
+<p>foo <!-- this is a
+comment - with hyphen --></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+foo <!-- not a comment -- two hyphens -->
+.
+<p>foo &lt;!-- not a comment -- two hyphens --&gt;</p>
+````````````````````````````````
+
+
+Not comments:
+
+```````````````````````````````` example
+foo <!--> foo -->
+
+foo <!-- foo--->
+.
+<p>foo &lt;!--&gt; foo --&gt;</p>
+<p>foo &lt;!-- foo---&gt;</p>
+````````````````````````````````
+
+
+Processing instructions:
+
+```````````````````````````````` example
+foo <?php echo $a; ?>
+.
+<p>foo <?php echo $a; ?></p>
+````````````````````````````````
+
+
+Declarations:
+
+```````````````````````````````` example
+foo <!ELEMENT br EMPTY>
+.
+<p>foo <!ELEMENT br EMPTY></p>
+````````````````````````````````
+
+
+CDATA sections:
+
+```````````````````````````````` example
+foo <![CDATA[>&<]]>
+.
+<p>foo <![CDATA[>&<]]></p>
+````````````````````````````````
+
+
+Entity and numeric character references are preserved in HTML
+attributes:
+
+```````````````````````````````` example
+foo <a href="&ouml;">
+.
+<p>foo <a href="&ouml;"></p>
+````````````````````````````````
+
+
+Backslash escapes do not work in HTML attributes:
+
+```````````````````````````````` example
+foo <a href="\*">
+.
+<p>foo <a href="\*"></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+<a href="\"">
+.
+<p>&lt;a href=&quot;&quot;&quot;&gt;</p>
+````````````````````````````````
+
+
+## Hard line breaks
+
+A line ending (not in a code span or HTML tag) that is preceded
+by two or more spaces and does not occur at the end of a block
+is parsed as a [hard line break](@) (rendered
+in HTML as a `<br />` tag):
+
+```````````````````````````````` example
+foo  
+baz
+.
+<p>foo<br />
+baz</p>
+````````````````````````````````
+
+
+For a more visible alternative, a backslash before the
+[line ending] may be used instead of two or more spaces:
+
+```````````````````````````````` example
+foo\
+baz
+.
+<p>foo<br />
+baz</p>
+````````````````````````````````
+
+
+More than two spaces can be used:
+
+```````````````````````````````` example
+foo       
+baz
+.
+<p>foo<br />
+baz</p>
+````````````````````````````````
+
+
+Leading spaces at the beginning of the next line are ignored:
+
+```````````````````````````````` example
+foo  
+     bar
+.
+<p>foo<br />
+bar</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+foo\
+     bar
+.
+<p>foo<br />
+bar</p>
+````````````````````````````````
+
+
+Hard line breaks can occur inside emphasis, links, and other constructs
+that allow inline content:
+
+```````````````````````````````` example
+*foo  
+bar*
+.
+<p><em>foo<br />
+bar</em></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+*foo\
+bar*
+.
+<p><em>foo<br />
+bar</em></p>
+````````````````````````````````
+
+
+Hard line breaks do not occur inside code spans
+
+```````````````````````````````` example
+`code  
+span`
+.
+<p><code>code   span</code></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+`code\
+span`
+.
+<p><code>code\ span</code></p>
+````````````````````````````````
+
+
+or HTML tags:
+
+```````````````````````````````` example
+<a href="foo  
+bar">
+.
+<p><a href="foo  
+bar"></p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+<a href="foo\
+bar">
+.
+<p><a href="foo\
+bar"></p>
+````````````````````````````````
+
+
+Hard line breaks are for separating inline content within a block.
+Neither syntax for hard line breaks works at the end of a paragraph or
+other block element:
+
+```````````````````````````````` example
+foo\
+.
+<p>foo\</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+foo  
+.
+<p>foo</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+### foo\
+.
+<h3>foo\</h3>
+````````````````````````````````
+
+
+```````````````````````````````` example
+### foo  
+.
+<h3>foo</h3>
+````````````````````````````````
+
+
+## Soft line breaks
+
+A regular line ending (not in a code span or HTML tag) that is not
+preceded by two or more spaces or a backslash is parsed as a
+[softbreak](@).  (A soft line break may be rendered in HTML either as a
+[line ending] or as a space. The result will be the same in
+browsers. In the examples here, a [line ending] will be used.)
+
+```````````````````````````````` example
+foo
+baz
+.
+<p>foo
+baz</p>
+````````````````````````````````
+
+
+Spaces at the end of the line and beginning of the next line are
+removed:
+
+```````````````````````````````` example
+foo 
+ baz
+.
+<p>foo
+baz</p>
+````````````````````````````````
+
+
+A conforming parser may render a soft line break in HTML either as a
+line ending or as a space.
+
+A renderer may also provide an option to render soft line breaks
+as hard line breaks.
+
+## Textual content
+
+Any characters not given an interpretation by the above rules will
+be parsed as plain textual content.
+
+```````````````````````````````` example
+hello $.;'there
+.
+<p>hello $.;'there</p>
+````````````````````````````````
+
+
+```````````````````````````````` example
+Foo χρῆν
+.
+<p>Foo χρῆν</p>
+````````````````````````````````
+
+
+Internal spaces are preserved verbatim:
+
+```````````````````````````````` example
+Multiple     spaces
+.
+<p>Multiple     spaces</p>
+````````````````````````````````
+
+
+<!-- END TESTS -->
+
+# Appendix: A parsing strategy
+
+In this appendix we describe some features of the parsing strategy
+used in the CommonMark reference implementations.
+
+## Overview
+
+Parsing has two phases:
+
+1. In the first phase, lines of input are consumed and the block
+structure of the document---its division into paragraphs, block quotes,
+list items, and so on---is constructed.  Text is assigned to these
+blocks but not parsed. Link reference definitions are parsed and a
+map of links is constructed.
+
+2. In the second phase, the raw text contents of paragraphs and headings
+are parsed into sequences of Markdown inline elements (strings,
+code spans, links, emphasis, and so on), using the map of link
+references constructed in phase 1.
+
+At each point in processing, the document is represented as a tree of
+**blocks**.  The root of the tree is a `document` block.  The `document`
+may have any number of other blocks as **children**.  These children
+may, in turn, have other blocks as children.  The last child of a block
+is normally considered **open**, meaning that subsequent lines of input
+can alter its contents.  (Blocks that are not open are **closed**.)
+Here, for example, is a possible document tree, with the open blocks
+marked by arrows:
+
+``` tree
+-> document
+  -> block_quote
+       paragraph
+         "Lorem ipsum dolor\nsit amet."
+    -> list (type=bullet tight=true bullet_char=-)
+         list_item
+           paragraph
+             "Qui *quodsi iracundia*"
+      -> list_item
+        -> paragraph
+             "aliquando id"
+```
+
+## Phase 1: block structure
+
+Each line that is processed has an effect on this tree.  The line is
+analyzed and, depending on its contents, the document may be altered
+in one or more of the following ways:
+
+1. One or more open blocks may be closed.
+2. One or more new blocks may be created as children of the
+   last open block.
+3. Text may be added to the last (deepest) open block remaining
+   on the tree.
+
+Once a line has been incorporated into the tree in this way,
+it can be discarded, so input can be read in a stream.
+
+For each line, we follow this procedure:
+
+1. First we iterate through the open blocks, starting with the
+root document, and descending through last children down to the last
+open block.  Each block imposes a condition that the line must satisfy
+if the block is to remain open.  For example, a block quote requires a
+`>` character.  A paragraph requires a non-blank line.
+In this phase we may match all or just some of the open
+blocks.  But we cannot close unmatched blocks yet, because we may have a
+[lazy continuation line].
+
+2.  Next, after consuming the continuation markers for existing
+blocks, we look for new block starts (e.g. `>` for a block quote).
+If we encounter a new block start, we close any blocks unmatched
+in step 1 before creating the new block as a child of the last
+matched container block.
+
+3.  Finally, we look at the remainder of the line (after block
+markers like `>`, list markers, and indentation have been consumed).
+This is text that can be incorporated into the last open
+block (a paragraph, code block, heading, or raw HTML).
+
+Setext headings are formed when we see a line of a paragraph
+that is a [setext heading underline].
+
+Reference link definitions are detected when a paragraph is closed;
+the accumulated text lines are parsed to see if they begin with
+one or more reference link definitions.  Any remainder becomes a
+normal paragraph.
+
+We can see how this works by considering how the tree above is
+generated by four lines of Markdown:
+
+``` markdown
+> Lorem ipsum dolor
+sit amet.
+> - Qui *quodsi iracundia*
+> - aliquando id
+```
+
+At the outset, our document model is just
+
+``` tree
+-> document
+```
+
+The first line of our text,
+
+``` markdown
+> Lorem ipsum dolor
+```
+
+causes a `block_quote` block to be created as a child of our
+open `document` block, and a `paragraph` block as a child of
+the `block_quote`.  Then the text is added to the last open
+block, the `paragraph`:
+
+``` tree
+-> document
+  -> block_quote
+    -> paragraph
+         "Lorem ipsum dolor"
+```
+
+The next line,
+
+``` markdown
+sit amet.
+```
+
+is a "lazy continuation" of the open `paragraph`, so it gets added
+to the paragraph's text:
+
+``` tree
+-> document
+  -> block_quote
+    -> paragraph
+         "Lorem ipsum dolor\nsit amet."
+```
+
+The third line,
+
+``` markdown
+> - Qui *quodsi iracundia*
+```
+
+causes the `paragraph` block to be closed, and a new `list` block
+opened as a child of the `block_quote`.  A `list_item` is also
+added as a child of the `list`, and a `paragraph` as a child of
+the `list_item`.  The text is then added to the new `paragraph`:
+
+``` tree
+-> document
+  -> block_quote
+       paragraph
+         "Lorem ipsum dolor\nsit amet."
+    -> list (type=bullet tight=true bullet_char=-)
+      -> list_item
+        -> paragraph
+             "Qui *quodsi iracundia*"
+```
+
+The fourth line,
+
+``` markdown
+> - aliquando id
+```
+
+causes the `list_item` (and its child the `paragraph`) to be closed,
+and a new `list_item` opened up as child of the `list`.  A `paragraph`
+is added as a child of the new `list_item`, to contain the text.
+We thus obtain the final tree:
+
+``` tree
+-> document
+  -> block_quote
+       paragraph
+         "Lorem ipsum dolor\nsit amet."
+    -> list (type=bullet tight=true bullet_char=-)
+         list_item
+           paragraph
+             "Qui *quodsi iracundia*"
+      -> list_item
+        -> paragraph
+             "aliquando id"
+```
+
+## Phase 2: inline structure
+
+Once all of the input has been parsed, all open blocks are closed.
+
+We then "walk the tree," visiting every node, and parse raw
+string contents of paragraphs and headings as inlines.  At this
+point we have seen all the link reference definitions, so we can
+resolve reference links as we go.
+
+``` tree
+document
+  block_quote
+    paragraph
+      str "Lorem ipsum dolor"
+      softbreak
+      str "sit amet."
+    list (type=bullet tight=true bullet_char=-)
+      list_item
+        paragraph
+          str "Qui "
+          emph
+            str "quodsi iracundia"
+      list_item
+        paragraph
+          str "aliquando id"
+```
+
+Notice how the [line ending] in the first paragraph has
+been parsed as a `softbreak`, and the asterisks in the first list item
+have become an `emph`.
+
+### An algorithm for parsing nested emphasis and links
+
+By far the trickiest part of inline parsing is handling emphasis,
+strong emphasis, links, and images.  This is done using the following
+algorithm.
+
+When we're parsing inlines and we hit either
+
+- a run of `*` or `_` characters, or
+- a `[` or `![`
+
+we insert a text node with these symbols as its literal content, and we
+add a pointer to this text node to the [delimiter stack](@).
+
+The [delimiter stack] is a doubly linked list.  Each
+element contains a pointer to a text node, plus information about
+
+- the type of delimiter (`[`, `![`, `*`, `_`)
+- the number of delimiters,
+- whether the delimiter is "active" (all are active to start), and
+- whether the delimiter is a potential opener, a potential closer,
+  or both (which depends on what sort of characters precede
+  and follow the delimiters).
+
+When we hit a `]` character, we call the *look for link or image*
+procedure (see below).
+
+When we hit the end of the input, we call the *process emphasis*
+procedure (see below), with `stack_bottom` = NULL.
+
+#### *look for link or image*
+
+Starting at the top of the delimiter stack, we look backwards
+through the stack for an opening `[` or `![` delimiter.
+
+- If we don't find one, we return a literal text node `]`.
+
+- If we do find one, but it's not *active*, we remove the inactive
+  delimiter from the stack, and return a literal text node `]`.
+
+- If we find one and it's active, then we parse ahead to see if
+  we have an inline link/image, reference link/image, compact reference
+  link/image, or shortcut reference link/image.
+
+  + If we don't, then we remove the opening delimiter from the
+    delimiter stack and return a literal text node `]`.
+
+  + If we do, then
+
+    * We return a link or image node whose children are the inlines
+      after the text node pointed to by the opening delimiter.
+
+    * We run *process emphasis* on these inlines, with the `[` opener
+      as `stack_bottom`.
+
+    * We remove the opening delimiter.
+
+    * If we have a link (and not an image), we also set all
+      `[` delimiters before the opening delimiter to *inactive*.  (This
+      will prevent us from getting links within links.)
+
+#### *process emphasis*
+
+Parameter `stack_bottom` sets a lower bound to how far we
+descend in the [delimiter stack].  If it is NULL, we can
+go all the way to the bottom.  Otherwise, we stop before
+visiting `stack_bottom`.
+
+Let `current_position` point to the element on the [delimiter stack]
+just above `stack_bottom` (or the first element if `stack_bottom`
+is NULL).
+
+We keep track of the `openers_bottom` for each delimiter
+type (`*`, `_`), indexed to the length of the closing delimiter run
+(modulo 3) and to whether the closing delimiter can also be an
+opener.  Initialize this to `stack_bottom`.
+
+Then we repeat the following until we run out of potential
+closers:
+
+- Move `current_position` forward in the delimiter stack (if needed)
+  until we find the first potential closer with delimiter `*` or `_`.
+  (This will be the potential closer closest
+  to the beginning of the input -- the first one in parse order.)
+
+- Now, look back in the stack (staying above `stack_bottom` and
+  the `openers_bottom` for this delimiter type) for the
+  first matching potential opener ("matching" means same delimiter).
+
+- If one is found:
+
+  + Figure out whether we have emphasis or strong emphasis:
+    if both closer and opener spans have length >= 2, we have
+    strong, otherwise regular.
+
+  + Insert an emph or strong emph node accordingly, after
+    the text node corresponding to the opener.
+
+  + Remove any delimiters between the opener and closer from
+    the delimiter stack.
+
+  + Remove 1 (for regular emph) or 2 (for strong emph) delimiters
+    from the opening and closing text nodes.  If they become empty
+    as a result, remove them and remove the corresponding element
+    of the delimiter stack.  If the closing node is removed, reset
+    `current_position` to the next element in the stack.
+
+- If none is found:
+
+  + Set `openers_bottom` to the element before `current_position`.
+    (We know that there are no openers for this kind of closer up to and
+    including this point, so this puts a lower bound on future searches.)
+
+  + If the closer at `current_position` is not a potential opener,
+    remove it from the delimiter stack (since we know it can't
+    be a closer either).
+
+  + Advance `current_position` to the next element in the stack.
+
+After we're done, we remove all delimiters above `stack_bottom` from the
+delimiter stack.

+ 144 - 0
markdown.mod/md4c/test/spec_tests.py

@@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import sys
+from difflib import unified_diff
+import argparse
+import re
+import json
+from cmark import CMark
+from normalize import normalize_html
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Run cmark tests.')
+    parser.add_argument('-p', '--program', dest='program', nargs='?', default=None,
+            help='program to test')
+    parser.add_argument('-s', '--spec', dest='spec', nargs='?', default='spec.txt',
+            help='path to spec')
+    parser.add_argument('-P', '--pattern', dest='pattern', nargs='?',
+            default=None, help='limit to sections matching regex pattern')
+    parser.add_argument('--library-dir', dest='library_dir', nargs='?',
+            default=None, help='directory containing dynamic library')
+    parser.add_argument('--no-normalize', dest='normalize',
+            action='store_const', const=False, default=True,
+            help='do not normalize HTML')
+    parser.add_argument('-d', '--dump-tests', dest='dump_tests',
+            action='store_const', const=True, default=False,
+            help='dump tests in JSON format')
+    parser.add_argument('--debug-normalization', dest='debug_normalization',
+            action='store_const', const=True,
+            default=False, help='filter stdin through normalizer for testing')
+    parser.add_argument('-n', '--number', type=int, default=None,
+            help='only consider the test with the given number')
+    args = parser.parse_args(sys.argv[1:])
+
+def out(str):
+    sys.stdout.buffer.write(str.encode('utf-8')) 
+
+def print_test_header(headertext, example_number, start_line, end_line):
+    out("Example %d (lines %d-%d) %s\n" % (example_number,start_line,end_line,headertext))
+
+def do_test(test, normalize, result_counts):
+    [retcode, actual_html, err] = cmark.to_html(test['markdown'])
+    if retcode == 0:
+        expected_html = test['html']
+        unicode_error = None
+        if normalize:
+            try:
+                passed = normalize_html(actual_html) == normalize_html(expected_html)
+            except UnicodeDecodeError as e:
+                unicode_error = e
+                passed = False
+        else:
+            passed = actual_html == expected_html
+        if passed:
+            result_counts['pass'] += 1
+        else:
+            print_test_header(test['section'], test['example'], test['start_line'], test['end_line'])
+            out(test['markdown'] + '\n')
+            if unicode_error:
+                out("Unicode error: " + str(unicode_error) + '\n')
+                out("Expected: " + repr(expected_html) + '\n')
+                out("Got:      " + repr(actual_html) + '\n')
+            else:
+                expected_html_lines = expected_html.splitlines(True)
+                actual_html_lines = actual_html.splitlines(True)
+                for diffline in unified_diff(expected_html_lines, actual_html_lines,
+                                "expected HTML", "actual HTML"):
+                    out(diffline)
+            out('\n')
+            result_counts['fail'] += 1
+    else:
+        print_test_header(test['section'], test['example'], test['start_line'], test['end_line'])
+        out("program returned error code %d\n" % retcode)
+        sys.stdout.buffer.write(err)
+        result_counts['error'] += 1
+
+def get_tests(specfile):
+    line_number = 0
+    start_line = 0
+    end_line = 0
+    example_number = 0
+    markdown_lines = []
+    html_lines = []
+    state = 0  # 0 regular text, 1 markdown example, 2 html output
+    headertext = ''
+    tests = []
+
+    header_re = re.compile('#+ ')
+
+    with open(specfile, 'r', encoding='utf-8', newline='\n') as specf:
+        for line in specf:
+            line_number = line_number + 1
+            l = line.strip()
+            #if l == "`" * 32 + " example":
+            if re.match("`{32} example( [a-z]{1,})?", l):
+                state = 1
+            elif state == 2 and l == "`" * 32:
+                state = 0
+                example_number = example_number + 1
+                end_line = line_number
+                tests.append({
+                    "markdown":''.join(markdown_lines).replace('→',"\t"),
+                    "html":''.join(html_lines).replace('→',"\t"),
+                    "example": example_number,
+                    "start_line": start_line,
+                    "end_line": end_line,
+                    "section": headertext})
+                start_line = 0
+                markdown_lines = []
+                html_lines = []
+            elif l == ".":
+                state = 2
+            elif state == 1:
+                if start_line == 0:
+                    start_line = line_number - 1
+                markdown_lines.append(line)
+            elif state == 2:
+                html_lines.append(line)
+            elif state == 0 and re.match(header_re, line):
+                headertext = header_re.sub('', line).strip()
+    return tests
+
+if __name__ == "__main__":
+    if args.debug_normalization:
+        out(normalize_html(sys.stdin.read()))
+        exit(0)
+
+    all_tests = get_tests(args.spec)
+    if args.pattern:
+        pattern_re = re.compile(args.pattern, re.IGNORECASE)
+    else:
+        pattern_re = re.compile('.')
+    tests = [ test for test in all_tests if re.search(pattern_re, test['section']) and (not args.number or test['example'] == args.number) ]
+    if args.dump_tests:
+        out(json.dumps(tests, ensure_ascii=False, indent=2))
+        exit(0)
+    else:
+        skipped = len(all_tests) - len(tests)
+        cmark = CMark(prog=args.program, library_dir=args.library_dir)
+        result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': skipped}
+        for test in tests:
+            do_test(test, args.normalize, result_counts)
+        out("{pass} passed, {fail} failed, {error} errored, {skip} skipped\n".format(**result_counts))
+        exit(result_counts['fail'] + result_counts['error'])

+ 75 - 0
markdown.mod/md4c/test/strikethrough.txt

@@ -0,0 +1,75 @@
+
+# Strike-Through
+
+With the flag `MD_FLAG_STRIKETHROUGH`, MD4C enables extension for recognition
+of strike-through spans.
+
+Strike-through text is any text wrapped in one or two tildes (`~`).
+
+```````````````````````````````` example
+~Hi~ Hello, world!
+.
+<p><del>Hi</del> Hello, world!</p>
+````````````````````````````````
+
+If the length of the opener and closer doesn't match, the strike-through is
+not recognized.
+
+```````````````````````````````` example
+This ~text~~ is curious.
+.
+<p>This ~text~~ is curious.</p>
+````````````````````````````````
+
+Too long tilde sequence won't be recognized:
+
+```````````````````````````````` example
+foo ~~~bar~~~
+.
+<p>foo ~~~bar~~~</p>
+````````````````````````````````
+
+Also note the markers cannot open a strike-through span if they are followed
+with a whitespace; and similarly, then cannot close the span if they are
+preceded with a whitespace:
+
+```````````````````````````````` example
+~foo ~bar
+.
+<p>~foo ~bar</p>
+````````````````````````````````
+
+
+As with regular emphasis delimiters, a new paragraph will cause the cessation
+of parsing a strike-through:
+
+```````````````````````````````` example
+This ~~has a
+
+new paragraph~~.
+.
+<p>This ~~has a</p>
+<p>new paragraph~~.</p>
+````````````````````````````````
+
+
+## GitHub Issues
+
+### [Issue 69](https://github.com/mity/md4c/issues/69)
+```````````````````````````````` example
+~`foo`~
+.
+<p><del><code>foo</code></del></p>
+````````````````````````````````
+
+```````````````````````````````` example
+~*foo*~
+.
+<p><del><em>foo</em></del></p>
+````````````````````````````````
+
+```````````````````````````````` example
+*~foo~*
+.
+<p><em><del>foo</del></em></p>
+````````````````````````````````

+ 357 - 0
markdown.mod/md4c/test/tables.txt

@@ -0,0 +1,357 @@
+
+# Tables
+
+With the flag `MD_FLAG_TABLES`, MD4C enables extension for recognition of
+tables.
+
+Basic table example of a table with two columns and three lines (when not
+counting the header) is as follows:
+
+```````````````````````````````` example
+| Column 1 | Column 2 |
+|----------|----------|
+| foo      | bar      |
+| baz      | qux      |
+| quux     | quuz     |
+.
+<table>
+<thead>
+<tr><th>Column 1</th><th>Column 2</th></tr>
+</thead>
+<tbody>
+<tr><td>foo</td><td>bar</td></tr>
+<tr><td>baz</td><td>qux</td></tr>
+<tr><td>quux</td><td>quuz</td></tr>
+</tbody>
+</table>
+````````````````````````````````
+
+The leading and succeeding pipe characters (`|`) on each line are optional:
+
+```````````````````````````````` example
+Column 1 | Column 2 |
+---------|--------- |
+foo      | bar      |
+baz      | qux      |
+quux     | quuz     |
+.
+<table>
+<thead>
+<tr><th>Column 1</th><th>Column 2</th></tr>
+</thead>
+<tbody>
+<tr><td>foo</td><td>bar</td></tr>
+<tr><td>baz</td><td>qux</td></tr>
+<tr><td>quux</td><td>quuz</td></tr>
+</tbody>
+</table>
+````````````````````````````````
+
+```````````````````````````````` example
+| Column 1 | Column 2
+|----------|---------
+| foo      | bar
+| baz      | qux
+| quux     | quuz
+.
+<table>
+<thead>
+<tr><th>Column 1</th><th>Column 2</th></tr>
+</thead>
+<tbody>
+<tr><td>foo</td><td>bar</td></tr>
+<tr><td>baz</td><td>qux</td></tr>
+<tr><td>quux</td><td>quuz</td></tr>
+</tbody>
+</table>
+````````````````````````````````
+
+```````````````````````````````` example
+Column 1 | Column 2
+---------|---------
+foo      | bar
+baz      | qux
+quux     | quuz
+.
+<table>
+<thead>
+<tr><th>Column 1</th><th>Column 2</th></tr>
+</thead>
+<tbody>
+<tr><td>foo</td><td>bar</td></tr>
+<tr><td>baz</td><td>qux</td></tr>
+<tr><td>quux</td><td>quuz</td></tr>
+</tbody>
+</table>
+````````````````````````````````
+
+However for one-column table, at least one pipe has to be used in the table
+header underline, otherwise it would be parsed as a Setext title followed by
+a paragraph.
+
+```````````````````````````````` example
+Column 1
+--------
+foo
+baz
+quux
+.
+<h2>Column 1</h2>
+<p>foo
+baz
+quux</p>
+````````````````````````````````
+
+Leading and trailing whitespace in a table cell is ignored and the columns do
+not need to be aligned.
+
+```````````````````````````````` example
+Column 1 |Column 2
+---|---
+foo | bar
+baz| qux
+quux|quuz
+.
+<table>
+<thead>
+<tr><th>Column 1</th><th>Column 2</th></tr>
+</thead>
+<tbody>
+<tr><td>foo</td><td>bar</td></tr>
+<tr><td>baz</td><td>qux</td></tr>
+<tr><td>quux</td><td>quuz</td></tr>
+</tbody>
+</table>
+````````````````````````````````
+
+The table cannot interrupt a paragraph.
+
+```````````````````````````````` example
+Lorem ipsum dolor sit amet.
+| Column 1 | Column 2
+| ---------|---------
+| foo      | bar
+| baz      | qux
+| quux     | quuz
+.
+<p>Lorem ipsum dolor sit amet.
+| Column 1 | Column 2
+| ---------|---------
+| foo      | bar
+| baz      | qux
+| quux     | quuz</p>
+````````````````````````````````
+
+Similarly, paragraph cannot interrupt a table:
+
+```````````````````````````````` example
+Column 1 | Column 2
+---------|---------
+foo      | bar
+baz      | qux
+quux     | quuz
+Lorem ipsum dolor sit amet.
+.
+<table>
+<thead>
+<tr><th>Column 1</th><th>Column 2</th></tr>
+</thead>
+<tbody>
+<tr><td>foo</td><td>bar</td></tr>
+<tr><td>baz</td><td>qux</td></tr>
+<tr><td>quux</td><td>quuz</td></tr>
+<tr><td>Lorem ipsum dolor sit amet.</td><td></td></tr>
+</tbody>
+</table>
+````````````````````````````````
+
+The first, the last or both the first and the last dash in each column
+underline can be replaced with a colon (`:`) to request left, right or middle
+alignment of the respective column:
+
+```````````````````````````````` example
+| Column 1 | Column 2 | Column 3 | Column 4 |
+|----------|:---------|:--------:|---------:|
+| default  | left     | center   | right    |
+.
+<table>
+<thead>
+<tr><th>Column 1</th><th align="left">Column 2</th><th align="center">Column 3</th><th align="right">Column 4</th></tr>
+</thead>
+<tbody>
+<tr><td>default</td><td align="left">left</td><td align="center">center</td><td align="right">right</td></tr>
+</tbody>
+</table>
+````````````````````````````````
+
+To include a literal pipe character in any cell, it has to be escaped.
+
+```````````````````````````````` example
+Column 1 | Column 2
+---------|---------
+foo      | bar
+baz      | qux \| xyzzy
+quux     | quuz
+.
+<table>
+<thead>
+<tr><th>Column 1</th><th>Column 2</th></tr>
+</thead>
+<tbody>
+<tr><td>foo</td><td>bar</td></tr>
+<tr><td>baz</td><td>qux | xyzzy</td></tr>
+<tr><td>quux</td><td>quuz</td></tr>
+</tbody>
+</table>
+````````````````````````````````
+
+Contents of each cell is parsed as an inline text which may contents any
+inline Markdown spans like emphasis, strong emphasis, links etc.
+
+```````````````````````````````` example
+Column 1 | Column 2
+---------|---------
+*foo*    | bar
+**baz**  | [qux]
+quux     | [quuz](/url2)
+
+[qux]: /url
+.
+<table>
+<thead>
+<tr><th>Column 1</th><th>Column 2</th></tr>
+</thead>
+<tbody>
+<tr><td><em>foo</em></td><td>bar</td></tr>
+<tr><td><strong>baz</strong></td><td><a href="/url">qux</a></td></tr>
+<tr><td>quux</td><td><a href="/url2">quuz</a></td></tr>
+</tbody>
+</table>
+````````````````````````````````
+
+However pipes which are inside a code span are not recognized as cell
+boundaries.
+
+```````````````````````````````` example
+Column 1 | Column 2
+---------|---------
+`foo     | bar`
+baz      | qux
+quux     | quuz
+.
+<table>
+<thead>
+<tr><th>Column 1</th><th>Column 2</th></tr>
+</thead>
+<tbody>
+<tr><td><code>foo     | bar</code></td><td></td></tr>
+<tr><td>baz</td><td>qux</td></tr>
+<tr><td>quux</td><td>quuz</td></tr>
+</tbody>
+</table>
+````````````````````````````````
+
+
+## GitHub Issues
+
+### [Issue 41](https://github.com/mity/md4c/issues/41)
+```````````````````````````````` example
+* x|x
+---|---
+.
+<ul>
+<li>x|x
+---|---</li>
+</ul>
+````````````````````````````````
+(Not a table, because the underline has wrong indentation and is not part of the
+list item.)
+
+```````````````````````````````` example
+* x|x
+  ---|---
+x|x
+.
+<ul>
+<li><table>
+<thead>
+<tr>
+<th>x</th>
+<th>x</th>
+</tr>
+</thead>
+</table>
+</li>
+</ul>
+<p>x|x</p>
+````````````````````````````````
+(Here the underline has the right indentation so the table is detected.
+But the last line is not part of it due its indentation.)
+
+
+### [Issue 42](https://github.com/mity/md4c/issues/42)
+
+```````````````````````````````` example
+] http://x.x *x*
+
+|x|x|
+|---|---|
+|x|
+.
+<p>] http://x.x <em>x</em></p>
+<table>
+<thead>
+<tr>
+<th>x</th>
+<th>x</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>x</td>
+<td></td>
+</tr>
+</tbody>
+</table>
+````````````````````````````````
+
+
+### [Issue 104](https://github.com/mity/md4c/issues/104)
+
+```````````````````````````````` example
+A | B
+--- | ---
+[x](url)
+.
+<table>
+<thead>
+<tr>
+<th>A</th>
+<th>B</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><a href="url">x</a></td>
+<td></td>
+</tr>
+</tbody>
+</table>
+````````````````````````````````
+
+
+### [Issue 138](https://github.com/mity/md4c/issues/138)
+
+```````````````````````````````` example
+| abc | def |
+| --- | --- |
+.
+<table>
+<thead>
+<tr>
+<th>abc</th>
+<th>def</th>
+</tr>
+</thead>
+</table>
+````````````````````````````````

+ 117 - 0
markdown.mod/md4c/test/tasklists.txt

@@ -0,0 +1,117 @@
+
+# Tasklists
+
+With the flag `MD_FLAG_TASKLISTS`, MD4C enables extension for recognition of
+task lists.
+
+Basic task list may look as follows:
+
+```````````````````````````````` example
+ * [x] foo
+ * [X] bar
+ * [ ] baz
+.
+<ul>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>foo</li>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>bar</li>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled>baz</li>
+</ul>
+````````````````````````````````
+
+Task lists can also be in ordered lists:
+
+```````````````````````````````` example
+ 1. [x] foo
+ 2. [X] bar
+ 3. [ ] baz
+.
+<ol>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>foo</li>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>bar</li>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled>baz</li>
+</ol>
+````````````````````````````````
+
+Task lists can also be nested in ordinary lists:
+
+```````````````````````````````` example
+ * xxx:
+   * [x] foo
+   * [x] bar
+   * [ ] baz
+ * yyy:
+   * [ ] qux
+   * [x] quux
+   * [ ] quuz
+.
+<ul>
+<li>xxx:
+<ul>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>foo</li>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>bar</li>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled>baz</li>
+</ul></li>
+<li>yyy:
+<ul>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled>qux</li>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>quux</li>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled>quuz</li>
+</ul></li>
+</ul>
+````````````````````````````````
+
+Or in a parent task list:
+
+```````````````````````````````` example
+ 1. [x] xxx:
+    * [x] foo
+    * [x] bar
+    * [ ] baz
+ 2. [ ] yyy:
+    * [ ] qux
+    * [x] quux
+    * [ ] quuz
+.
+<ol>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>xxx:
+<ul>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>foo</li>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>bar</li>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled>baz</li>
+</ul></li>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled>yyy:
+<ul>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled>qux</li>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>quux</li>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled>quuz</li>
+</ul></li>
+</ol>
+````````````````````````````````
+
+Also, ordinary lists can be nested in the task lists.
+
+```````````````````````````````` example
+ * [x] xxx:
+   * foo
+   * bar
+   * baz
+ * [ ] yyy:
+   * qux
+   * quux
+   * quuz
+.
+<ul>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled checked>xxx:
+<ul>
+<li>foo</li>
+<li>bar</li>
+<li>baz</li>
+</ul></li>
+<li class="task-list-item"><input type="checkbox" class="task-list-item-checkbox" disabled>yyy:
+<ul>
+<li>qux</li>
+<li>quux</li>
+<li>quuz</li>
+</ul></li>
+</ul>
+````````````````````````````````

+ 85 - 0
markdown.mod/md4c/test/toc-mark.txt

@@ -0,0 +1,85 @@
+# Table of content mark
+
+The TOC mark allow to place the toc where you need it.
+Run the example with --toc=[[__TOC__]]
+
+```````````````````````````````` example
+# title
+# table of content
+[[__TOC__]]
+# some chapter
+.
+<h1 id="title">title</h1>
+<h1 id="table-of-content">table of content</h1>
+<nav id="TOC" role="doc-toc">
+<ul>
+<li><a href="#title">title</a></li>
+<li><a href="#table-of-content">table of content</a></li>
+<li><a href="#some-chapter">some chapter</a></li>
+</ul>
+</nav>
+<h1 id="some-chapter">some chapter</h1>
+````````````````````````````````
+
+Only the first mark is replaced by the TOC
+
+ 
+
+```````````````````````````````` example
+# title 
+[[__TOC__]]
+[[__TOC__]]
+.
+<h1 id="title">title</h1>
+<nav id="TOC" role="doc-toc">
+<ul>
+<li><a href="#title">title</a></li>
+</ul>
+</nav>
+<p>[[<strong>TOC</strong>]]</p>
+````````````````````````````````
+  
+The TOC mark must be alone at start of a line or it is invalid:
+
+```````````````````````````````` example
+# title 
+invalid [[__TOC__]] mark 
+.
+<nav id="TOC" role="doc-toc">
+<ul>
+<li><a href="#title">title</a></li>
+</ul>
+</nav>
+<h1 id="title">title</h1>
+<p>invalid [[<strong>TOC</strong>]] mark </p>
+
+````````````````````````````````
+
+But you can have space at start of a line:
+
+```````````````````````````````` example
+# title 
+  [[__TOC__]] mark 
+.
+<h1 id="title">title</h1>
+<nav id="TOC" role="doc-toc">
+<ul>
+<li><a href="#title">title</a></li>
+</ul>
+</nav>
+````````````````````````````````
+  
+The text after the TOC mark is discarded:
+
+```````````````````````````````` example
+# title 
+[[__TOC__]] discarded text 
+.
+<h1 id="title">title</h1>
+<nav id="TOC" role="doc-toc">
+<ul>
+<li><a href="#title">title</a></li>
+</ul>
+</nav>
+````````````````````````````````
+

+ 104 - 0
markdown.mod/md4c/test/toc.txt

@@ -0,0 +1,104 @@
+# Table of content
+
+With the option `--table-of-content`, MD4C enables extension for output of
+toc.
+
+Basic toc may look as follows:
+
+```````````````````````````````` example
+# title
+.
+<nav id="TOC" role="doc-toc">
+<ul>
+<li><a href="#title">title</a></li>
+</ul>
+</nav>
+<h1 id="title">title</h1>
+````````````````````````````````
+
+By default, the toc-depth is limited to heading of level 3
+
+```````````````````````````````` example
+# title level 1
+## title level 2
+### title level 3
+#### title level 4
+##### title level 5
+.
+<nav id="TOC" role="doc-toc">
+<ul>
+<li><a href="#title-level-1">title level 1</a></li>
+<ul>
+<li><a href="#title-level-2">title level 2</a></li>
+<ul>
+<li><a href="#title-level-3">title level 3</a></li>
+</ul>
+</ul>
+</ul>
+</nav>
+<h1 id="title-level-1">title level 1</h1>
+<h2 id="title-level-2">title level 2</h2>
+<h3 id="title-level-3">title level 3</h3>
+<h4 id="title-level-4">title level 4</h4>
+<h5 id="title-level-5">title level 5</h5>
+````````````````````````````````
+
+The toc can skip some level 
+
+```````````````````````````````` example
+### title level 3
+# title level 1
+## title level 2
+##### title level 5
+### title level 3 again
+.
+<nav id="TOC" role="doc-toc">
+<ul>
+<ul>
+<ul>
+<li><a href="#title-level-3">title level 3</a></li>
+</ul>
+</ul>
+<li><a href="#title-level-1">title level 1</a></li>
+<ul>
+<li><a href="#title-level-2">title level 2</a></li>
+<ul>
+<li><a href="#title-level-3-again">title level 3 again</a></li>
+</ul>
+</ul>
+</ul>
+</nav>
+<h3 id="title-level-3">title level 3</h3>
+<h1 id="title-level-1">title level 1</h1>
+<h2 id="title-level-2">title level 2</h2>
+<h5 id="title-level-5">title level 5</h5>
+<h3 id="title-level-3-again">title level 3 again</h3> 
+````````````````````````````````
+
+# Coverage
+
+Additional test to improve test coverage.
+
+This sample will output TOC with heading suffix numbers. 
+
+
+```````````````````````````````` example
+# title
+## title
+### title
+.
+<nav id="TOC" role="doc-toc">
+<ul>
+<li><a href="#title">title</a></li>
+<ul>
+<li><a href="#title-1">title</a></li>
+<ul>
+<li><a href="#title-2">title</a></li>
+</ul>
+</ul>
+</ul>
+</nav>
+<h1 id="title">title</h1>
+<h2 id="title-1">title</h2>
+<h3 id="title-2">title</h3>
+````````````````````````````````

+ 39 - 0
markdown.mod/md4c/test/underline.txt

@@ -0,0 +1,39 @@
+
+# Underline
+
+With the flag `MD_FLAG_UNDERLINE`, MD4C sees underscore `_` rather as a mark
+denoting an underlined span rather than an ordinary emphasis (or a strong
+emphasis).
+
+```````````````````````````````` example
+_foo_
+.
+<p><u>foo</u></p>
+````````````````````````````````
+
+In sequences of multiple underscores, each single one translates into an
+underline span mark.
+
+```````````````````````````````` example
+___foo___
+.
+<p><u><u><u>foo</u></u></u></p>
+````````````````````````````````
+
+Intra-word underscores are not recognized as underline marks:
+
+```````````````````````````````` example
+foo_bar_baz
+.
+<p>foo_bar_baz</p>
+````````````````````````````````
+
+Also the parser follows the standard understanding when the underscore can
+or cannot open or close a span. Therefore there is no underline in the following
+example because no underline can be seen as a closing mark.
+
+```````````````````````````````` example
+_foo _bar
+.
+<p>_foo _bar</p>
+````````````````````````````````

+ 232 - 0
markdown.mod/md4c/test/wiki-links.txt

@@ -0,0 +1,232 @@
+
+# Wiki Links
+
+With the flag `MD_FLAG_WIKILINKS`, MD4C recognizes wiki links.
+
+The simple wiki-link is a wiki-link destination enclosed in `[[` followed with
+`]]`.
+
+```````````````````````````````` example
+[[foo]]
+.
+<p><x-wikilink data-target="foo">foo</x-wikilink></p>
+````````````````````````````````
+
+However wiki-link may contain an explicit label, delimited from the destination
+with `|`.
+
+```````````````````````````````` example
+[[foo|bar]]
+.
+<p><x-wikilink data-target="foo">bar</x-wikilink></p>
+````````````````````````````````
+
+A wiki-link destination cannot be empty.
+
+```````````````````````````````` example
+[[]]
+.
+<p>[[]]</p>
+````````````````````````````````
+
+```````````````````````````````` example
+[[|foo]]
+.
+<p>[[|foo]]</p>
+````````````````````````````````
+
+
+The wiki-link destination cannot contain a new line.
+
+```````````````````````````````` example
+[[foo
+bar]]
+.
+<p>[[foo
+bar]]</p>
+````````````````````````````````
+
+```````````````````````````````` example
+[[foo
+bar|baz]]
+.
+<p>[[foo
+bar|baz]]</p>
+````````````````````````````````
+
+The wiki-link destination is rendered verbatim; inline markup in it is not
+recognized.
+
+```````````````````````````````` example
+[[*foo*]]
+.
+<p><x-wikilink data-target="*foo*">*foo*</x-wikilink></p>
+````````````````````````````````
+
+```````````````````````````````` example
+[[foo|![bar](bar.jpg)]]
+.
+<p><x-wikilink data-target="foo"><img src="bar.jpg" alt="bar"></x-wikilink></p>
+````````````````````````````````
+
+With multiple `|` delimiters, only the first one is recognized and the other
+ones are part of the label.
+
+```````````````````````````````` example
+[[foo|bar|baz]]
+.
+<p><x-wikilink data-target="foo">bar|baz</x-wikilink></p>
+````````````````````````````````
+
+However the delimiter `|` can be escaped with `/`.
+
+```````````````````````````````` example
+[[foo\|bar|baz]]
+.
+<p><x-wikilink data-target="foo|bar">baz</x-wikilink></p>
+````````````````````````````````
+
+The label can contain inline elements.
+
+```````````````````````````````` example
+[[foo|*bar*]]
+.
+<p><x-wikilink data-target="foo"><em>bar</em></x-wikilink></p>
+````````````````````````````````
+
+Empty explicit label is the same as using the implicit label; i.e. the verbatim
+destination string is used as the label.
+
+```````````````````````````````` example
+[[foo|]]
+.
+<p><x-wikilink data-target="foo">foo</x-wikilink></p>
+````````````````````````````````
+
+The label can span multiple lines.
+
+```````````````````````````````` example
+[[foo|foo
+bar
+baz]]
+.
+<p><x-wikilink data-target="foo">foo
+bar
+baz</x-wikilink></p>
+````````````````````````````````
+
+Wiki-links have higher priority than links.
+
+```````````````````````````````` example
+[[foo]](foo.jpg)
+.
+<p><x-wikilink data-target="foo">foo</x-wikilink>(foo.jpg)</p>
+````````````````````````````````
+
+```````````````````````````````` example
+[foo]: /url
+
+[[foo]]
+.
+<p><x-wikilink data-target="foo">foo</x-wikilink></p>
+````````````````````````````````
+
+Wiki links can be inlined in tables.
+
+```````````````````````````````` example
+| A                | B   |
+|------------------|-----|
+| [[foo|*bar*]]    | baz |
+.
+<table>
+<thead>
+<tr>
+<th>A</th>
+<th>B</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><x-wikilink data-target="foo"><em>bar</em></x-wikilink></td>
+<td>baz</td>
+</tr>
+</tbody>
+</table>
+````````````````````````````````
+
+Wiki-links are not prioritized over images.
+
+```````````````````````````````` example
+![[foo]](foo.jpg)
+.
+<p><img src="foo.jpg" alt="[foo]"></p>
+````````````````````````````````
+
+Something that may look like a wiki-link at first, but turns out not to be,
+is recognized as a normal link.
+
+```````````````````````````````` example
+[[foo]
+
+[foo]: /url
+.
+<p>[<a href="/url">foo</a></p>
+````````````````````````````````
+
+Escaping the opening `[` escapes only that one character, not the whole `[[`
+opener:
+
+```````````````````````````````` example
+\[[foo]]
+
+[foo]: /url
+.
+<p>[<a href="/url">foo</a>]</p>
+````````````````````````````````
+
+Like with other inline links, the innermost wiki-link is preferred.
+
+```````````````````````````````` example
+[[foo[[bar]]]]
+.
+<p>[[foo<x-wikilink data-target="bar">bar</x-wikilink>]]</p>
+````````````````````````````````
+
+There is limit of 100 characters for the wiki-link destination.
+
+```````````````````````````````` example
+[[12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901]]
+[[12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901|foo]]
+.
+<p>[[12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901]]
+[[12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901|foo]]</p>
+````````````````````````````````
+
+100 characters inside a wiki link target works.
+
+```````````````````````````````` example
+[[1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890]]
+[[1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890|foo]]
+.
+<p><x-wikilink data-target="1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890">1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890</x-wikilink>
+<x-wikilink data-target="1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890">foo</x-wikilink></p>
+````````````````````````````````
+
+The limit on link content does not include any characters belonging to a block
+quote, if the label spans multiple lines contained in a block quote.
+
+```````````````````````````````` example
+> [[12345678901234567890123456789012345678901234567890|1234567890
+> 1234567890
+> 1234567890
+> 1234567890
+> 123456789]]
+.
+<blockquote>
+<p><x-wikilink data-target="12345678901234567890123456789012345678901234567890">1234567890
+1234567890
+1234567890
+1234567890
+123456789</x-wikilink></p>
+</blockquote>
+````````````````````````````````

+ 29 - 0
markdown.mod/source.bmx

@@ -0,0 +1,29 @@
+' Copyright (c) 2023 Bruce A Henderson
+' 
+' Permission is hereby granted, free of charge, to any person obtaining a copy
+' of this software and associated documentation files (the "Software"), to deal
+' in the Software without restriction, including without limitation the rights
+' to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+' copies of the Software, and to permit persons to whom the Software is
+' furnished to do so, subject to the following conditions:
+' 
+' The above copyright notice and this permission notice shall be included in
+' all copies or substantial portions of the Software.
+' 
+' THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+' IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+' FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+' AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+' LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+' OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+' THE SOFTWARE.
+' 
+SuperStrict
+
+Import "md4c/src/*.h"
+
+Import "md4c/src/entity.c"
+Import "md4c/src/md4c-html.c"
+Import "md4c/src/md4c.c"
+
+Import "glue.c"