cmark
My personal build of CMark ✏️
- Commit
- 4b87bbb3e29c460940fc183f63a3b424ddf90187
- Parent
- 1ce9c8a9a4fd274beeac26ab506ce508745c47ab
- Author
- John MacFarlane <jgm@berkeley.edu>
- Date
Added python version of test runner.
This tests the dynamic library when run without a second argument.
The code makes use of an amended version of the normalization
method from karlcow/markdown-testsuite.
Closes #161.
Diffstat
8 files changed, 284 insertions, 193 deletions
diff --git a/LICENSE b/LICENSE
@@ -53,3 +53,18 @@ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-----
+
+The normalization code in runtests.py was derived from the
+markdowntest project, Copyright 2013 Karl Dubost:
+
+The MIT License (MIT)
+
+Copyright (c) 2013 Karl Dubost
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
diff --git a/runtests.pl b/runtests.pl
@@ -1,176 +0,0 @@
-#!/usr/bin/env perl
-use warnings;
-use strict;
-use Term::ANSIColor;
-use IO::Handle;
-use IPC::Open2;
-
-my $usage="runtests.pl SPEC PROGRAM\nSet ANSI_COLORS_DISABLED=1 if you redirect to a file.\nSet PATT='...' to restrict tests to sections matching a regex.\n";
-
-my $SPEC = shift @ARGV;
-my @PROG = @ARGV;
-my $PATT=$ENV{'PATT'};
-
-if (!(@PROG && defined $SPEC)) {
- print STDERR $usage;
- exit 1;
-}
-
-my $passed = 0;
-my $failed = 0;
-my $skipped = 0;
-my $errored = 0;
-
-# Markdown implementations vary on insignificant whitespace.
-# Some leave blanks between block elements, others don't.
-# This function tries to normalize the output so it can be
-# compared with our test. tidy takes two arguments: the
-# string containing the actual output, and a pathname of the
-# file to which the tidied output is to be saved.
-sub tidy
-{
- my $inpre = 0;
- my $out = "";
- my $outfh;
- open($outfh, '>', \$out);
- for (split /^/, $_[0]) {
- if (/<pre/) {
- $inpre = 1;
- } elsif (/<\/pre/) {
- $inpre = 0;
- }
- # remove \r to allow mixing linux/windows newlines
- s/\r//;
- if ($inpre) {
- print $outfh $_;
- } else {
- # remove leading spaces
- s/^ *//;
- # remove trailing spaces
- s/ *$//;
- # collapse consecutive spaces
- s/ */ /;
- # collapse space before /> in tag
- s/ *\/>/\/>/;
- s/>\n$/>/;
- # skip blank line
- if (/^$/) {
- next;
- }
- print $outfh $_;
- }
- }
- close $outfh;
- return $out;
-}
-
-# return 0 for passing test, -1 for failing, positive for error
-sub dotest
-{
- my $markdown = $_[0];
- my $html = $_[1];
- my $testname = $_[2];
- my $actual = "";
- # We use → to indicate tab and ␣ space in the spec
- $markdown =~ s/→/\t/g;s/␣/ /g;
- $html =~ s/→/\t/g;s/␣/ /g;
- my $pid = open2(my $out, my $in, @PROG);
- print $in $markdown;
- close $in;
- flush $out;
- $actual = do { local $/; <$out>; };
- close $out;
- waitpid($pid, 0);
- my $exit_status = $?;
- $html = &tidy($html);
- $actual = &tidy($actual);
- $actual =~ s/\'/'/g;
-
- if ($actual eq $html) {
- print colored("✓", "green");
- return 0;
- } else {
- print colored("\n✘ $testname", "red");
- print "\n";
- print color "cyan";
- print "=== markdown ===============\n";
- print $markdown;
- print "=== expected ===============\n";
- print $html;
- print "\n";
- print "=== got ====================\n";
- print $actual;
- print "\n";
- print color "black";
- if ($exit_status == 0) {
- return -1;
- } else {
- return $exit_status;
- }
- }
-}
-
-my $stage = 0;
-my $markdown = "";
-my $html = "";
-my $example = 0;
-my $linenum = 0;
-my $exampleline = 0;
-my @secnums = ();
-my $secheading;
-my $testresult;
-
-open(SPEC, "< $SPEC");
-while (<SPEC>) {
- $linenum++;
- if (/^\.$/) {
- $stage = ($stage + 1) % 3;
- if ($stage == 1) {
- $exampleline = $linenum;
- }
- if ($stage == 0) {
- $example++;
- if (!$PATT || $secheading =~ /$PATT/) {
- $testresult = &dotest($markdown, $html, "Example $example (line $exampleline)");
- if ($testresult == 0) {
- $passed++;
- } elsif ($testresult == -1) {
- $failed++;
- } else {
- $errored++;
- }
- } else {
- $skipped++;
- }
- $markdown = "";
- $html = "";
- }
- } elsif ($stage == 0 && $_ =~ /^<!-- END TESTS -->/) {
- last;
- } elsif ($stage == 0 && $_ =~ /^(#+) +(.*)/) {
- my $seclevel = length($1);
- $secheading = $2;
- if ($#secnums == $seclevel - 1) {
- $secnums[$#secnums]++;
- } elsif ($#secnums > $seclevel - 1) {
- @secnums = @secnums[0..($seclevel - 1)];
- $secnums[$#secnums]++;
- } else {
- while ($#secnums < $seclevel - 1) {
- push(@secnums, 1);
- }
- }
- if (!$PATT || $secheading =~ /$PATT/) {
- print ("\n", join(".", @secnums) . " " . $secheading, " ");
- }
- } elsif ($stage == 1) {
- $markdown .= $_;
- } elsif ($stage == 2) {
- $html .= $_;
- }
-}
-
-print "\n";
-print STDERR colored("$passed tests passed, $failed failed, $errored errored, $skipped skipped.", "bold");
-print STDERR "\n";
-exit $failed;
diff --git a/runtests.py b/runtests.py
@@ -0,0 +1,254 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from ctypes import CDLL, c_char_p, c_long
+import sys
+import platform
+from difflib import unified_diff
+from subprocess import *
+import argparse
+from HTMLParser import HTMLParser
+from htmlentitydefs import name2codepoint
+import re
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description='Run cmark tests.')
+ parser.add_argument('--program', dest='program', nargs='?', default=None,
+ help='program to test')
+ parser.add_argument('--spec', dest='spec', nargs='?', default='spec.txt',
+ help='path to spec')
+ parser.add_argument('--pattern', dest='pattern', nargs='?',
+ default=None, help='limit to sections matching regex pattern')
+ parser.add_argument('--library_dir', dest='library_dir', nargs='?',
+ default=None, help='directory containing dynamic library')
+ args = parser.parse_args(sys.argv[1:])
+
+if not args.program:
+ sysname = platform.system()
+ libname = "libcmark"
+ if sysname == 'Darwin':
+ libname += ".dylib"
+ elif sysname == 'Windows':
+ libname += ".dll"
+ else:
+ libname += ".so"
+ if args and args.library_dir:
+ libpath = args.library_dir + "/" + libname
+ else:
+ libpath = "build/src/" + libname
+ cmark = CDLL(libpath)
+
+ markdown = cmark.cmark_markdown_to_html
+ markdown.restype = c_char_p
+ markdown.argtypes = [c_char_p, c_long]
+
+def md2html(text, prog):
+ if prog:
+ p1 = Popen([prog], stdout=PIPE, stdin=PIPE, stderr=PIPE)
+ [result, err] = p1.communicate(input=text)
+ return [p1.returncode, result, err]
+ else:
+ return [0, markdown(text, len(text)), '']
+
+# Normalization code, adapted from
+# https://github.com/karlcow/markdown-testsuite/
+significant_attrs = ["alt", "href", "src", "title"]
+normalize_whitespace_re = re.compile('\s+')
+class MyHTMLParser(HTMLParser):
+ def __init__(self):
+ HTMLParser.__init__(self)
+ self.last = "starttag"
+ self.in_pre = False
+ self.output = u""
+ def handle_data(self, data):
+ if self.in_pre:
+ self.output += data
+ else:
+ data = normalize_whitespace_re.sub(' ', data)
+ data_strip = data.strip()
+ if (self.last == "ref") and data_strip and data[0] == " ":
+ self.output += " "
+ self.data_end_in_space_not_empty = (data[-1] == ' ' and data_strip)
+ self.output += data_strip
+ self.last = "data"
+ def handle_endtag(self, tag):
+ if tag == "pre":
+ self.in_pre = False
+ self.output += "</" + tag + ">"
+ self.last = "endtag"
+ def handle_starttag(self, tag, attrs):
+ if tag == "pre":
+ self.in_pre = True
+ self.output += "<" + tag
+ attrs = filter(lambda attr: attr[0] in significant_attrs, attrs)
+ if attrs:
+ attrs.sort()
+ for attr in attrs:
+ self.output += " " + attr[0] + "=" + '"' + attr[1] + '"'
+ self.output += ">"
+ self.last = "starttag"
+ def handle_startendtag(self, tag, attrs):
+ """Ignore closing tag for self-closing void elements."""
+ self.handle_starttag(tag, attrs)
+ def handle_entityref(self, name):
+ self.add_space_from_last_data()
+ try:
+ self.output += unichr(name2codepoint[name])
+ except KeyError:
+ self.output += name
+ self.last = "ref"
+ def handle_charref(self, name):
+ self.add_space_from_last_data()
+ try:
+ if name.startswith("x"):
+ c = unichr(int(name[1:], 16))
+ else:
+ c = unichr(int(name))
+ self.output += c
+ except ValueError:
+ self.output += name
+ self.last = "ref"
+ # Helpers.
+ def add_space_from_last_data(self):
+ """Maintain the space at: `a <span>b</span>`"""
+ if self.last == 'data' and self.data_end_in_space_not_empty:
+ self.output += ' '
+
+def normalize(html):
+ r"""
+ Return normalized form of HTML which igores insignificant output differences.
+ Multiple inner whitespaces to a single space
+ >>> normalize("<p>a \t\nb</p>")
+ u'<p>a b</p>'
+ Surrounding whitespaces are removed:
+ >>> normalize("<p> a</p>")
+ u'<p>a</p>'
+ >>> normalize("<p>a </p>")
+ u'<p>a</p>'
+ TODO: how to deal with the following cases without a full list of the void tags?
+ >>> normalize("<p>a <b>b</b></p>")
+ u'<p>a<b>b</b></p>'
+ >>> normalize("<p><b>b</b> c</p>")
+ u'<p><b>b</b>c</p>'
+ >>> normalize("<p>a <br></p>")
+ u'<p>a<br></p>'
+ `pre` elements preserve whitespace:
+ >>> normalize("<pre>a \t\nb</pre>")
+ u'<pre>a \t\nb</pre>'
+ Self-closing tags:
+ >>> normalize("<p><br /></p>")
+ u'<p><br></p>'
+ References are converted to Unicode:
+ >>> normalize("<p><</p>")
+ u'<p><</p>'
+ >>> normalize("<p><</p>")
+ u'<p><</p>'
+ >>> normalize("<p><</p>")
+ u'<p><</p>'
+ >>> normalize("<p>中</p>")
+ u'<p>\u4e2d</p>'
+ Spaces around entities are kept:
+ >>> normalize("<p>a < b</p>")
+ u'<p>a < b</p>'
+ >>> normalize("<p>a<b</p>")
+ u'<p>a<b</p>'
+ Most attributes are ignored:
+ >>> normalize('<p id="a"></p>')
+ u'<p></p>'
+ Critical attributes are considered and sorted alphabetically:
+ >>> normalize('<a href="a"></a>')
+ u'<a href="a"></a>'
+ >>> normalize('<img src="a" alt="a">')
+ u'<img alt="a" src="a">'
+ """
+ parser = MyHTMLParser()
+ parser.feed(html.decode(encoding='UTF-8'))
+ parser.close()
+ return parser.output
+
+def print_test_header(headertext, example_number, start_line, end_line):
+ print "Example %d (lines %d-%d) %s" % (example_number,start_line,end_line,headertext)
+
+def do_test(markdown_lines, expected_html_lines, headertext,
+ example_number, start_line, end_line, prog=None):
+ real_markdown_text = ''.join(markdown_lines).replace('→','\t')
+ [retcode, actual_html, err] = md2html(real_markdown_text, prog)
+ if retcode == 0:
+ actual_html_lines = actual_html.splitlines(True)
+ expected_html = ''.join(expected_html_lines)
+ if normalize(actual_html) == normalize(expected_html):
+ return 'pass'
+ else:
+ print_test_header(headertext, example_number,start_line,end_line)
+ sys.stdout.write(real_markdown_text)
+ for diffline in unified_diff(expected_html_lines, actual_html_lines,
+ "expected HTML", "actual HTML"):
+ sys.stdout.write(diffline)
+ sys.stdout.write('\n')
+ return 'fail'
+ else:
+ print_test_header(example_number,start_line,end_line)
+ print "program returned error code %d" % retcode
+ print(err)
+ return 'error'
+
+def do_tests(specfile, prog, pattern):
+ line_number = 0
+ start_line = 0
+ end_line = 0
+ example_number = 0
+ passed = 0
+ failed = 0
+ errored = 0
+ markdown_lines = []
+ html_lines = []
+ active = True
+ state = 0 # 0 regular text, 1 markdown example, 2 html output
+ headertext = ''
+
+ header_re = re.compile('#+ ')
+ if pattern:
+ pattern_re = re.compile(pattern)
+
+ with open(specfile, 'r') as specf:
+ for line in specf:
+ line_number = line_number + 1
+ if state == 0 and re.match(header_re, line):
+ headertext = header_re.sub('', line).strip()
+ if pattern:
+ if re.search(pattern_re, line):
+ active = True
+ else:
+ active = False
+ if line.strip() == ".":
+ state = (state + 1) % 3
+ if state == 0:
+ example_number = example_number + 1
+ end_line = line_number
+ if active:
+ result = do_test(markdown_lines, html_lines,
+ headertext, example_number,
+ start_line, end_line, prog)
+ if result == 'pass':
+ passed = passed + 1
+ elif result == 'fail':
+ failed = failed + 1
+ else:
+ errored = errored + 1
+ start_line = 0
+ markdown_lines = []
+ html_lines = []
+ elif state == 1:
+ if start_line == 0:
+ start_line = line_number
+ markdown_lines.append(line)
+ elif state == 2:
+ html_lines.append(line)
+ print "%d passed, %d failed, %d errored" % (passed, failed, errored)
+ return (failed == 0 and errored == 0)
+
+if __name__ == "__main__":
+ if do_tests(args.spec, args.program, args.pattern):
+ exit(0)
+ else:
+ exit(1)