
My personal build of CMark ✏️

John MacFarlane <>

Re-added tools/{, template.html, template.tex}.

These were inadvertently dropped in the commit that described them as being moved to tools/.


4 files changed, 504 insertions, 0 deletions

Status File Name N° Changes Insertions Deletions
Added tools/ 165 165 0
Modified tools/ 0 0 0
Added tools/template.html 110 110 0
Added tools/template.tex 229 229 0
diff --git a/tools/ b/tools/
@@ -0,0 +1,165 @@
+#!/usr/bin/env python3
+import re
+import sys
+from subprocess import *
+from string import Template
+if len(sys.argv) == 2:
+    specformat = sys.argv[1]
+    if not (specformat in ["html", "markdown"]):
+        sys.stderr.write("Format must be html or markdown\n")
+        exit(1)
+    sys.stderr.write("Usage: [html|markdown]\n")
+    exit(1)
+def toIdentifier(s):
+   return re.sub(r'\s+', '-', re.sub(r'\W+', ' ', s.strip().lower()))
+def parseYaml(yaml):
+    metadata = {}
+    def parseField(match):
+        key =
+        val =
+        if re.match(r'^\'', val):
+            val = val[1:len(val) - 1]
+        metadata[key] = val
+    fieldre = re.compile('^(\w+):(.*)$', re.MULTILINE)
+    re.sub(fieldre, parseField, yaml)
+    return metadata
+def pipe_through_prog(prog, text):
+    p1 = Popen(prog.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE)
+    [result, err] = p1.communicate(input=text.encode('utf-8'))
+    return [p1.returncode, result.decode('utf-8'), err]
+def replaceAnchor(match):
+    refs.append("[{0}]: #{1}".format(,
+    if specformat == "html":
+        return '<a id="{1}" href="#{1}" class="definition">{0}</a>'.format(,
+    else:
+        return
+stage = 0
+example = 0
+section = ""
+sections = []
+mdlines = []
+refs = []
+lastnum = []
+finishedMeta = False
+yamllines = []
+with open('spec.txt', 'r', encoding='utf-8') as spec:
+    for ln in spec:
+        if not finishedMeta:
+            yamllines.append(ln)
+            if re.match(r'^\.\.\.$', ln):
+                finishedMeta = True
+        elif re.match(r'^\.$', ln):
+            if stage == 0:
+                example += 1
+                mdlines.append("\n<div class=\"example\" id=\"example-{0}\" data-section=\"{1}\">\n".format(example, section))
+                mdlines.append("<div class=\"examplenum\"><a href=\"#example-{0}\">Example {0}</a>&nbsp;&nbsp;<a class=\"dingus\" title=\"open in interactive dingus\">(interact)</a></div>\n\n".format(example))
+                mdlines.append("````````````````````````````````````````````````````````` markdown\n")
+                stage = 1
+            elif stage == 1:
+                mdlines.append("`````````````````````````````````````````````````````````\n\n")
+                mdlines.append("````````````````````````````````````````````````````````` html\n")
+                stage = 2
+            elif stage == 2:
+                mdlines.append("`````````````````````````````````````````````````````````\n\n")
+                mdlines.append("</div>\n")
+                stage = 0
+            else:
+                sys.stderr.out("Encountered unknown stage {0}\n".format(stage))
+                sys.exit(1)
+        else:
+            if stage == 0:
+                match = re.match(r'^(#{1,6}) *(.*)', ln)
+                if match:
+                    section =
+                    lastlevel = len(lastnum)
+                    level = len(
+                    if'{-}$', section):
+                        section = re.sub(r' *{-} *$', '', section)
+                        if specformat == 'html':
+                            ln = re.sub(r' *{-} *$', '', ln)
+                        number = ''
+                    else:
+                        if lastlevel == level:
+                            lastnum[level - 1] = lastnum[level - 1] + 1
+                        elif lastlevel < level:
+                            while len(lastnum) < level:
+                                lastnum.append(1)
+                        else: # lastlevel > level
+                            lastnum = lastnum[0:level]
+                            lastnum[level - 1] = lastnum[level - 1] + 1
+                        number = '.'.join([str(x) for x in lastnum])
+                    ident = toIdentifier(section)
+                    ln = re.sub(r' ', ' ' + number + ' ', ln, count=1)
+                    sections.append(dict(level=level,
+                                         contents=section,
+                                         ident=ident,
+                                         number=number))
+                    refs.append("[{0}]: #{1}".format(section, ident))
+                    ln = re.sub(r'# +', '# <a id="{0}"></a> '.format(ident),
+                                ln, count=1)
+                else:
+                    ln = re.sub(r'\[([^]]*)\]\(@([^)]*)\)', replaceAnchor, ln)
+            else:
+                ln = re.sub(r' ', '␣', ln)
+            mdlines.append(ln)
+mdtext = ''.join(mdlines) + '\n\n' + '\n'.join(refs) + '\n'
+yaml = ''.join(yamllines)
+metadata = parseYaml(yaml)
+if specformat == "markdown":
+    sys.stdout.write(yaml + '\n\n' + mdtext)
+elif specformat == "html":
+    with open("template.html", "r", encoding="utf-8") as templatefile:
+        template = Template(
+    toclines = []
+    for section in sections:
+        indent = '    ' * (section['level'] - 1)
+        toclines.append(indent + '* [' + section['number'] + ' ' +
+                        section['contents'] + '](#' + section['ident'] + ')')
+    toc = '<div id="TOC">\n\n' + '\n'.join(toclines) + '\n\n</div>\n\n'
+    prog = "build/src/cmark"
+    [retcode, result, err] = pipe_through_prog(prog, toc + mdtext)
+    if retcode == 0:
+        result = re.sub(r'␣', '<span class="space"> </span>', result)
+        result = re.sub(r'<h([1-6])><a id="([^\"]*)"><\/a> ',
+                        "<h\\1 id=\"\\2\">", result)
+        # put plural s inside links for better visuals:
+        result = re.sub(r'<\/a>s', "s</a>", result)
+        sys.stdout.write(template.substitute(metadata, body=result))
+        # check for errors:
+        idents = []
+        for ident in re.findall(r'id="([^"]*)"', result):
+            if ident in idents:
+                sys.stderr.write("WARNING: duplicate identifier '" + ident +
+                                 "'\n")
+            else:
+                idents.append(ident)
+        for href in re.findall(r'href="#([^"]*)"', result):
+            if not (href in idents):
+                sys.stderr.write("WARNING: internal link with no anchor '" +
+                                 href + "'\n")
+        reftexts = []
+        for ref in refs:
+            ref = re.sub('].*',']',ref).upper()
+            if ref in reftexts:
+                sys.stderr.write("WARNING: duplicate reference link '" +
+                                 ref + "'\n")
+            else:
+                reftexts.append(ref)
+    else:
+        sys.stderr.write("Error converting markdown version of spec:\n")
+        sys.stderr.write(err)
+        exit(1)
diff --git a/tools/ b/tools/
diff --git a/tools/template.html b/tools/template.html
@@ -0,0 +1,110 @@
+<!DOCTYPE html>
+<meta charset="UTF-8">
+<style type="text/css">
+  body { font-family: Helvetica, arial, freesans, clean, sans-serif;
+    line-height: 1.4;
+    max-width: 48em;
+    margin: auto;
+    color: #333333;
+    background-color: #fff;
+    font-size: 13pt;
+  }
+div#TOC ul { list-style: none; }
+h1 { font-size: 140%; font-weight: bold; border-top: 1px solid gray; padding-top: 0.5em; }
+h2 { font-size: 120%; font-weight: bold; }
+h3 { font-size: 110%; font-weight: bold; }
+h4 { font-size: 100%; font-weight: bold; }
+a.definition { font-weight: bold; } { position: relative; } {
+  content: "·";
+  position: absolute;
+  /* create a mark that indicates a space (trick from D. Greenspan) */
+  top: 0px; bottom: 7px; left: 1px; right: 1px;
+  color: #AAA;
+div.example { overflow: hidden; }
+p { text-align: justify; }
+pre { padding: 0.5em; margin-left: 0; margin-right: 0; margin-top: 0.2em;
+  margin-bottom: 0.5em; font-size: 88%; }
+pre {
+ white-space: pre-wrap;       /* css-3 */
+ white-space: -moz-pre-wrap;  /* Mozilla, since 1999 */
+ white-space: -pre-wrap;      /* Opera 4-6 */
+ white-space: -o-pre-wrap;    /* Opera 7 */
+ word-wrap: break-word;       /* Internet Explorer 5.5+ */
+code { font-family: monospace; background-color: #D3E1E4; }
+pre > code { background-color: transparent; }
+div.example > pre { float:left; width: 48%; }
+div.example > pre:nth-child(2) { clear:left; background-color: #D3E1E4; }
+div.example > pre:nth-child(3) { clear:right; background-color: #C9CaCE; }
+#watermark {
+ position:fixed;
+ bottom:0px;
+ left:0px;
+ padding: 1em;
+ width: 100%;
+ font-size: 120%;
+ opacity:0.7;
+ z-index:99;
+ color: white;
+#watermark a { color: white; }
+div.examplenum { font-size: 82%; text-align: left; }
+a.dingus { color: red; cursor: pointer; }
+a.footnoteRef > sup:before {
+  content: "[";
+a.footnoteRef > sup:after {
+  content: "]";
+a.footnoteRef > sup {
+  vertical-align: baseline;
+  font-size: 100%;
+<script src="//"></script>
+<script type="text/javascript">
+$$(document).ready(function() {
+  $$("div.example").each(function(e) {
+    var t = $$(this).find('code.markdown').text();
+    $$(this).find('a.dingus').click(function(f) {
+'/dingus.html?text=' +
+        encodeURIComponent(t.replace(/→/g,"\t")));
+    });
+  });
+  $$("code.markdown").dblclick(function(e) {'/dingus.html?text=' +
+      encodeURIComponent($$(this).find('code').text()));
+  });
+<h1 class="title">${title}</h1>
+<div class="version">Version ${version} (${date})</div>
+<div class="authors">
+    <span class="author">${author}</span>
+<div class="license">
+<a rel="license"
+   href=""><img alt="Creative
+   Commons BY-SA" style="border-width:0"
+   src=""
+   /></a><br/><span style="display:none"><span xmlns:dct=""
+   href="" property="dct:title"
+   rel="dct:type">CommonMark Spec</span> by
+   <a xmlns:cc=""
+   href="" property="cc:attributionName"
+   rel="cc:attributionURL">John MacFarlane</a> is licensed under a
+   <a rel="license"
+   href="">Creative
+   Commons Attribution-ShareAlike 4.0 International License</a>.</span>
+<div id="watermark"></div>
diff --git a/tools/template.tex b/tools/template.tex
@@ -0,0 +1,229 @@
+\usepackage{fixltx2e} % provides \textsubscript
+\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
+  \usepackage[T1]{fontenc}
+  \usepackage[utf8]{inputenc}
+  \usepackage{eurosym}
+\else % if luatex or xelatex
+  \ifxetex
+    \usepackage{mathspec}
+    \usepackage{xltxtra,xunicode}
+  \else
+    \usepackage{fontspec}
+  \fi
+  \defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase}
+  \newcommand{\euro}{€}
+    \setmainfont{$mainfont$}
+    \setsansfont{$sansfont$}
+    \setmonofont[Mapping=tex-ansi]{$monofont$}
+    \setmathfont(Digits,Latin,Greek){$mathfont$}
+% use upquote if available, for straight quotes in verbatim environments
+% use microtype if available
+% Add ',fontsize=\small' for more characters per line
+% Scale images if necessary, so that they will not overflow the page
+% margins by default, and it is still possible to overwrite the defaults
+% using explicit options in \includegraphics[width, height, ...]{}
+  \usepackage[setpagesize=false, % page size defined by xetex
+              unicode=false, % unicode breaks when used with xetex
+              xetex]{hyperref}
+  \usepackage[unicode=true]{hyperref}
+            bookmarks=true,
+            pdfauthor={$author-meta$},
+            pdftitle={$title-meta$},
+            colorlinks=true,
+            citecolor=$if(citecolor)$$citecolor$$else$blue$endif$,
+            urlcolor=$if(urlcolor)$$urlcolor$$else$blue$endif$,
+            linkcolor=$if(linkcolor)$$linkcolor$$else$magenta$endif$,
+            pdfborder={0 0 0}}
+\urlstyle{same}  % don't use monospace font for urls
+% Make links footnotes instead of hotlinks:
+% avoid problems with \sout in headers with hyperref:
+\setlength{\parskip}{6pt plus 2pt minus 1pt}
+\setlength{\emergencystretch}{3em}  % prevent overfull lines
+\VerbatimFootnotes % allows verbatim text in footnotes
+  \usepackage{polyglossia}
+  \setmainlanguage{$mainlang$}
+  \usepackage[$lang$]{babel}
+\titleformat{\chapter}[hang]{\Huge\bfseries}{\thechapter\ }{0pt}{\Huge\bfseries}
+\lhead{\itshape $title$}
+\lfoot{v$version$ ($date$)}
+\title{$title$$if(subtitle)$\\\vspace{0.5em}{\large $subtitle$}$endif$}
+\author{$for(author)$$author$$sep$ \and $endfor$}