cmark

My personal build of CMark ✏️

pathological_tests.py (5931B)

  1 #!/usr/bin/env python3
  2 # -*- coding: utf-8 -*-
  3 
  4 import re
  5 import argparse
  6 import sys
  7 import platform
  8 import itertools
  9 import multiprocessing
 10 import time
 11 from cmark import CMark
 12 
 13 TIMEOUT = 10
 14 
 15 parser = argparse.ArgumentParser(description='Run cmark tests.')
 16 parser.add_argument('--program', dest='program', nargs='?', default=None,
 17         help='program to test')
 18 parser.add_argument('--library-dir', dest='library_dir', nargs='?',
 19         default=None, help='directory containing dynamic library')
 20 args = parser.parse_args(sys.argv[1:])
 21 
 22 allowed_failures = {"many references": True}
 23 
 24 cmark = CMark(prog=args.program, library_dir=args.library_dir)
 25 
 26 def hash_collisions():
 27     REFMAP_SIZE = 16
 28     COUNT = 50000
 29 
 30     def badhash(ref):
 31         h = 0
 32         for c in ref:
 33             a = (h << 6) & 0xFFFFFFFF
 34             b = (h << 16) & 0xFFFFFFFF
 35             h = ord(c) + a + b - h
 36             h = h & 0xFFFFFFFF
 37 
 38         return (h % REFMAP_SIZE) == 0
 39 
 40     keys = ("x%d" % i for i in itertools.count())
 41     collisions = itertools.islice((k for k in keys if badhash(k)), COUNT)
 42     bad_key = next(collisions)
 43 
 44     document = ''.join("[%s]: /url\n\n[%s]\n\n" % (key, bad_key) for key in collisions)
 45 
 46     return document, re.compile("(<p>\[%s\]</p>\n){%d}" % (bad_key, COUNT-1))
 47 
 48 
 49 # list of pairs consisting of input and a regex that must match the output.
 50 pathological = {
 51     # note - some pythons have limit of 65535 for {num-matches} in re.
 52     "nested strong emph":
 53                 (("*a **a " * 65000) + "b" + (" a** a*" * 65000),
 54                  re.compile("(<em>a <strong>a ){65000}b( a</strong> a</em>){65000}")),
 55     "many emph closers with no openers":
 56                  (("a_ " * 65000),
 57                   re.compile("(a[_] ){64999}a_")),
 58     "many emph openers with no closers":
 59                  (("_a " * 65000),
 60                   re.compile("(_a ){64999}_a")),
 61     "many link closers with no openers":
 62                  (("a]" * 65000),
 63                   re.compile("(a\]){65000}")),
 64     "many link openers with no closers":
 65                  (("[a" * 65000),
 66                   re.compile("(\[a){65000}")),
 67     "mismatched openers and closers":
 68                  (("*a_ " * 50000),
 69                   re.compile("([*]a[_] ){49999}[*]a_")),
 70     "openers and closers multiple of 3":
 71                  (("a**b" + ("c* " * 50000)),
 72                   re.compile("a[*][*]b(c[*] ){49999}c[*]")),
 73     "link openers and emph closers":
 74                  (("[ a_" * 50000),
 75                   re.compile("(\[ a_){50000}")),
 76     "pattern [ (]( repeated":
 77                  (("[ (](" * 80000),
 78                   re.compile("(\[ \(\]\(){80000}")),
 79     "hard link/emph case":
 80                  ("**x [a*b**c*](d)",
 81                   re.compile("\\*\\*x <a href=\"d\">a<em>b\\*\\*c</em></a>")),
 82     "nested brackets":
 83                  (("[" * 50000) + "a" + ("]" * 50000),
 84                   re.compile("\[{50000}a\]{50000}")),
 85     "nested block quotes":
 86                  ((("> " * 50000) + "a"),
 87                   re.compile("(<blockquote>\n){50000}")),
 88     "deeply nested lists":
 89                  ("".join(map(lambda x: ("  " * x + "* a\n"), range(0,1000))),
 90                   re.compile("<ul>\n(<li>a\n<ul>\n){999}<li>a</li>\n</ul>\n(</li>\n</ul>\n){999}")),
 91     "U+0000 in input":
 92                  ("abc\u0000de\u0000",
 93                   re.compile("abc\ufffd?de\ufffd?")),
 94     "backticks":
 95                  ("".join(map(lambda x: ("e" + "`" * x), range(1,5000))),
 96                   re.compile("^<p>[e`]*</p>\n$")),
 97     "unclosed links A":
 98                  ("[a](<b" * 30000,
 99                   re.compile("(\[a\]\(&lt;b){30000}")),
100     "unclosed links B":
101                  ("[a](b" * 30000,
102                   re.compile("(\[a\]\(b){30000}")),
103     "reference collisions": hash_collisions()
104 #    "many references":
105 #                 ("".join(map(lambda x: ("[" + str(x) + "]: u\n"), range(1,5000 * 16))) + "[0] " * 5000,
106 #                  re.compile("(\[0\] ){4999}"))
107     }
108 
109 whitespace_re = re.compile('/s+/')
110 
111 results = {'passed': [], 'errored': [], 'failed': [], 'ignored': []}
112 
113 def run_pathological_test(description, results):
114     (inp, regex) = pathological[description]
115     [rc, actual, err] = cmark.to_html(inp)
116     extra = ""
117     if rc != 0:
118         print(description, '[ERRORED (return code %d)]' %rc)
119         print(err)
120         if allowed_failures[description]:
121             results['ignored'].append(description)
122         else:
123             results['errored'].append(description)
124     elif regex.search(actual):
125         print(description, '[PASSED]')
126         results['passed'].append(description)
127     else:
128         print(description, '[FAILED]')
129         print(repr(actual))
130         if allowed_failures[description]:
131             results['ignored'].append(description)
132         else:
133             results['failed'].append(description)
134 
135 def run_tests():
136     print("Testing pathological cases:")
137     for description in pathological:
138         p = multiprocessing.Process(target=run_pathological_test,
139                   args=(description, results,))
140         p.start()
141         # wait TIMEOUT seconds or until it finishes
142         p.join(TIMEOUT)
143         # kill it if still active
144         if p.is_alive():
145             print(description, '[TIMEOUT]')
146             if allowed_failures[description]:
147                 results['ignored'].append(description)
148             else:
149                 results['errored'].append(description)
150             p.terminate()
151             p.join()
152 
153     passed  = len(results['passed'])
154     failed  = len(results['failed'])
155     errored = len(results['errored'])
156     ignored = len(results['ignored'])
157 
158     print("%d passed, %d failed, %d errored" % (passed, failed, errored))
159     if ignored > 0:
160         print("Ignoring these allowed failures:")
161         for x in results['ignored']:
162             print(x)
163     if failed == 0 and errored == 0:
164         exit(0)
165     else:
166         exit(1)
167 
168 if __name__ == "__main__":
169     run_tests()