cmark

My personal build of CMark ✏️

Commit
d1922eb6f17578774866a13fd5428cdd3bc2280d
Parent
4a7d305d220a4081ac7c106199baa940d838ce67
Author
John MacFarlane <jgm@berkeley.edu>
Date

Updated tests to use python3.

Diffstat

5 files changed, 49 insertions, 46 deletions

Status File Name N° Changes Insertions Deletions
Modified test/CMakeLists.txt 3 2 1
Modified test/cmark.py 10 6 4
Modified test/normalize.py 42 21 21
Modified test/pathological_tests.py 20 10 10
Modified test/spec_tests.py 20 10 10
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
@@ -1,6 +1,7 @@
 # To get verbose output: cmake --build build --target "test" -- ARGS='-V'
 
-set(PYTHON python)
+find_package(PythonInterp 3 REQUIRED)
+set(PYTHON ${PYTHON_EXECUTABLE})
 
 if (WIN32)
   file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR}/src WIN_DLL_DIR)
diff --git a/test/cmark.py b/test/cmark.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 from ctypes import CDLL, c_char_p, c_long
@@ -7,11 +7,13 @@ import platform
 
 def pipe_through_prog(prog, text):
     p1 = Popen(prog.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE)
-    [result, err] = p1.communicate(input=text)
-    return [p1.returncode, result, err]
+    [result, err] = p1.communicate(input=text.encode('utf-8'))
+    return [p1.returncode, result.decode('utf-8'), err]
 
 def use_library(lib, text):
-    return [0, lib(text, len(text)), '']
+    textbytes = text.encode('utf-8')
+    textlen = len(textbytes)
+    return [0, lib(textbytes, textlen).decode('utf-8'), '']
 
 class CMark:
     def __init__(self, prog=None, library_dir=None):
diff --git a/test/normalize.py b/test/normalize.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
-from HTMLParser import HTMLParser, HTMLParseError
-from htmlentitydefs import name2codepoint
+from html.parser import HTMLParser, HTMLParseError
+from html.entities import name2codepoint
 import sys
 import re
 import cgi
@@ -14,7 +14,7 @@ class MyHTMLParser(HTMLParser):
         HTMLParser.__init__(self)
         self.last = "starttag"
         self.in_pre = False
-        self.output = u""
+        self.output = ""
         self.last_tag = ""
     def handle_data(self, data):
         after_tag = self.last == "endtag" or self.last == "starttag"
@@ -74,7 +74,7 @@ class MyHTMLParser(HTMLParser):
         self.last = "pi"
     def handle_entityref(self, name):
         try:
-            c = unichr(name2codepoint[name])
+            c = chr(name2codepoint[name])
         except KeyError:
             c = None
         self.output_char(c, '&' + name + ';')
@@ -82,22 +82,22 @@ class MyHTMLParser(HTMLParser):
     def handle_charref(self, name):
         try:
             if name.startswith("x"):
-                c = unichr(int(name[1:], 16))
+                c = chr(int(name[1:], 16))
             else:
-                c = unichr(int(name))
+                c = chr(int(name))
         except ValueError:
                 c = None
         self.output_char(c, '&' + name + ';')
         self.last = "ref"
     # Helpers.
     def output_char(self, c, fallback):
-        if c == u'<':
+        if c == '<':
             self.output += "&lt;"
-        elif c == u'>':
+        elif c == '>':
             self.output += "&gt;"
-        elif c == u'&':
+        elif c == '&':
             self.output += "&amp;"
-        elif c == u'"':
+        elif c == '"':
             self.output += "&quot;"
         elif c == None:
             self.output += fallback
@@ -122,43 +122,43 @@ def normalize_html(html):
     in pre tags):
 
         >>> normalize_html("<p>a  \t b</p>")
-        u'<p>a b</p>'
+        '<p>a b</p>'
 
         >>> normalize_html("<p>a  \t\nb</p>")
-        u'<p>a b</p>'
+        '<p>a b</p>'
 
     * Whitespace surrounding block-level tags is removed.
 
         >>> normalize_html("<p>a  b</p>")
-        u'<p>a b</p>'
+        '<p>a b</p>'
 
         >>> normalize_html(" <p>a  b</p>")
-        u'<p>a b</p>'
+        '<p>a b</p>'
 
         >>> normalize_html("<p>a  b</p> ")
-        u'<p>a b</p>'
+        '<p>a b</p>'
 
         >>> normalize_html("\n\t<p>\n\t\ta  b\t\t</p>\n\t")
-        u'<p>a b</p>'
+        '<p>a b</p>'
 
         >>> normalize_html("<i>a  b</i> ")
-        u'<i>a b</i> '
+        '<i>a b</i> '
 
     * Self-closing tags are converted to open tags.
 
         >>> normalize_html("<br />")
-        u'<br>'
+        '<br>'
 
     * Attributes are sorted and lowercased.
 
         >>> normalize_html('<a title="bar" HREF="foo">x</a>')
-        u'<a href="foo" title="bar">x</a>'
+        '<a href="foo" title="bar">x</a>'
 
     * References are converted to unicode, except that '<', '>', '&', and
       '"' are rendered using entities.
 
         >>> normalize_html("&forall;&amp;&gt;&lt;&quot;")
-        u'\u2200&amp;&gt;&lt;&quot;'
+        '\u2200&amp;&gt;&lt;&quot;'
 
     """
     html_chunk_re = re.compile("(\<!\[CDATA\[.*?\]\]\>|\<[^>]*\>|[^<]+)")
@@ -171,7 +171,7 @@ def normalize_html(html):
             if chunk.group(0)[:8] == "<![CDATA":
                 parser.output += chunk.group(0)
             else:
-                parser.feed(chunk.group(0).decode(encoding='UTF-8'))
+                parser.feed(chunk.group(0))
         parser.close()
         return parser.output
     except HTMLParseError as e:
diff --git a/test/pathological_tests.py b/test/pathological_tests.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 import re
@@ -30,8 +30,8 @@ pathological = {
                  ((("> " * 50000) + "a"),
                   re.compile("(<blockquote>\n){50000}")),
     "U+0000 in input":
-                 ("abc\0de\0",
-                  re.compile("abc(�)?de(�)?"))
+                 ("abc\u0000de\u0000",
+                  re.compile("abc\ufffd?de\ufffd?"))
     }
 
 whitespace_re = re.compile('/s+/')
@@ -39,24 +39,24 @@ passed = 0
 errored = 0
 failed = 0
 
-print "Testing pathological cases:"
+print("Testing pathological cases:")
 for description in pathological:
-    print description
+    print(description)
     (inp, regex) = pathological[description]
     [rc, actual, err] = cmark.to_html(inp)
     if rc != 0:
         errored += 1
-        print description
-        print "program returned error code %d" % rc
+        print(description)
+        print("program returned error code %d" % rc)
         print(err)
     elif regex.search(actual):
         passed += 1
     else:
-        print description, 'failed'
-        print(actual)
+        print(description, 'failed')
+        print(repr(actual))
         failed += 1
 
-print "%d passed, %d failed, %d errored" % (passed, failed, errored)
+print("%d passed, %d failed, %d errored" % (passed, failed, errored))
 if (failed == 0 and errored == 0):
     exit(0)
 else:
diff --git a/test/spec_tests.py b/test/spec_tests.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
 import sys
@@ -33,7 +33,7 @@ if __name__ == "__main__":
     args = parser.parse_args(sys.argv[1:])
 
 def print_test_header(headertext, example_number, start_line, end_line):
-    print "Example %d (lines %d-%d) %s" % (example_number,start_line,end_line,headertext)
+    print("Example %d (lines %d-%d) %s" % (example_number,start_line,end_line,headertext))
 
 def do_test(test, normalize, result_counts):
     [retcode, actual_html, err] = cmark.to_html(test['markdown'])
@@ -43,7 +43,7 @@ def do_test(test, normalize, result_counts):
         if normalize:
             try:
                 passed = normalize_html(actual_html) == normalize_html(expected_html)
-            except UnicodeDecodeError, e:
+            except UnicodeDecodeError as e:
                 unicode_error = e
                 passed = False
         else:
@@ -54,9 +54,9 @@ def do_test(test, normalize, result_counts):
             print_test_header(test['section'], test['example'], test['start_line'], test['end_line'])
             sys.stdout.write(test['markdown'])
             if unicode_error:
-                print "Unicode error: " + str(unicode_error)
-                print "Expected: " + repr(expected_html)
-                print "Got:      " + repr(actual_html)
+                print("Unicode error: " + str(unicode_error))
+                print("Expected: " + repr(expected_html))
+                print("Got:      " + repr(actual_html))
             else:
                 expected_html_lines = expected_html.splitlines(True)
                 actual_html_lines = actual_html.splitlines(True)
@@ -67,7 +67,7 @@ def do_test(test, normalize, result_counts):
             result_counts['fail'] += 1
     else:
         print_test_header(test['section'], test['example'], test['start_line'], test['end_line'])
-        print "program returned error code %d" % retcode
+        print("program returned error code %d" % retcode)
         print(err)
         result_counts['error'] += 1
 
@@ -114,7 +114,7 @@ def get_tests(specfile):
 
 if __name__ == "__main__":
     if args.debug_normalization:
-        print normalize_html(sys.stdin.read())
+        print(normalize_html(sys.stdin.read()))
         exit(0)
 
     all_tests = get_tests(args.spec)
@@ -124,7 +124,7 @@ if __name__ == "__main__":
         pattern_re = re.compile('.')
     tests = [ test for test in all_tests if re.search(pattern_re, test['section']) and (not args.number or test['example'] == args.number) ]
     if args.dump_tests:
-        print json.dumps(tests, ensure_ascii=False, indent=2)
+        print(json.dumps(tests, ensure_ascii=False, indent=2))
         exit(0)
     else:
         skipped = len(all_tests) - len(tests)
@@ -132,7 +132,7 @@ if __name__ == "__main__":
         result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': skipped}
         for test in tests:
             do_test(test, args.normalize, result_counts)
-        print "{pass} passed, {fail} failed, {error} errored, {skip} skipped".format(**result_counts)
+        print("{pass} passed, {fail} failed, {error} errored, {skip} skipped".format(**result_counts))
         if result_counts['fail'] == 0 and result_counts['error'] == 0:
             exit(0)
         else: