cmark

My personal build of CMark ✏️

Commit
04936d63235a229c30d2cf2cd23ca5a177f0c133
Parent
9d6697f9d37feb644a8ad31b8232f870df9df7f3
Author
John MacFarlane <jgm@berkeley.edu>
Date

Add pathological test for reference collisions (see #220).

This is taken from GitHub's fix: https://github.com/github/cmark-gfm/commit/66a0836dc91e1653f7931e1218446664493da520

Diffstat

1 file changed, 29 insertions, 2 deletions

Status File Name N° Changes Insertions Deletions
Modified test/pathological_tests.py 31 29 2
diff --git a/test/pathological_tests.py b/test/pathological_tests.py
@@ -5,10 +5,13 @@ import re
 import argparse
 import sys
 import platform
+import itertools
 import multiprocessing
 import time
 from cmark import CMark
 
+TIMEOUT = 5
+
 parser = argparse.ArgumentParser(description='Run cmark tests.')
 parser.add_argument('--program', dest='program', nargs='?', default=None,
         help='program to test')
@@ -20,6 +23,29 @@ allowed_failures = {"many references": True}
 
 cmark = CMark(prog=args.program, library_dir=args.library_dir)
 
+def hash_collisions():
+    REFMAP_SIZE = 16
+    COUNT = 50000
+
+    def badhash(ref):
+        h = 0
+        for c in ref:
+            a = (h << 6) & 0xFFFFFFFF
+            b = (h << 16) & 0xFFFFFFFF
+            h = ord(c) + a + b - h
+            h = h & 0xFFFFFFFF
+
+        return (h % REFMAP_SIZE) == 0
+
+    keys = ("x%d" % i for i in itertools.count())
+    collisions = itertools.islice((k for k in keys if badhash(k)), COUNT)
+    bad_key = next(collisions)
+
+    document = ''.join("[%s]: /url\n\n[%s]\n\n" % (key, bad_key) for key in collisions)
+
+    return document, re.compile("(<p>\[%s\]</p>\n){%d}" % (bad_key, COUNT-1))
+
+
 # list of pairs consisting of input and a regex that must match the output.
 pathological = {
     # note - some pythons have limit of 65535 for {num-matches} in re.
@@ -74,6 +100,7 @@ pathological = {
     "unclosed links B":
                  ("[a](b" * 30000,
                   re.compile("(\[a\]\(b){30000}")),
+    "reference collisions": hash_collisions()
 #    "many references":
 #                 ("".join(map(lambda x: ("[" + str(x) + "]: u\n"), range(1,5000 * 16))) + "[0] " * 5000,
 #                  re.compile("(\[0\] ){4999}"))
@@ -111,8 +138,8 @@ def run_tests():
         p = multiprocessing.Process(target=run_pathological_test,
                   args=(description, results,))
         p.start()
-        # wait 4 seconds or until it finishes
-        p.join(4)
+        # wait TIMEOUT seconds or until it finishes
+        p.join(TIMEOUT)
         # kill it if still active
         if p.is_alive():
             print(description, '[TIMEOUT]')