cmark

My personal build of CMark ✏️

Commit
64e1394ae76409f02b00c254f119a64a2d1ce11e
Parent
14fe768690b3948c7c1f67f463eb4620fc5746c9
Author
Yuki Izumi <kivikakk@github.com>
Date

Fix for non-matching entities (#161)

* Add test to illustrate issue * Provide some test fixes * Don't neglect CounterClockwiseContourIntegral * Fix ~10% of cases not matching strncmp returns 0 if the first 'len' bytes of cmark_entities[i].entity match s; we check equal length in the first if by checking if cmark_entities[i].entity[len] == 0, but we neglect the case where cmp == 0 && cmark_entities[i].entity[len] != 0. This should be treated as the same as cmp < 0, because strcmp("abc", "abcd") < 0. * Don't depend on py3.3 in tests

Diffstat

5 files changed, 75 insertions, 3 deletions

Status File Name N° Changes Insertions Deletions
Modified src/entities.inc 2 1 1
Modified src/houdini_html_u.c 2 1 1
Modified test/CMakeLists.txt 4 4 0
Added test/entity_tests.py 68 68 0
Modified tools/make_entities_inc.py 2 1 1
diff --git a/src/entities.inc b/src/entities.inc
@@ -6,7 +6,7 @@ struct cmark_entity_node {
 };
 
 #define CMARK_ENTITY_MIN_LENGTH 2
-#define CMARK_ENTITY_MAX_LENGTH 31
+#define CMARK_ENTITY_MAX_LENGTH 32
 #define CMARK_NUM_ENTITIES 2125
 
 static const struct cmark_entity_node cmark_entities[] = {
diff --git a/src/houdini_html_u.c b/src/houdini_html_u.c
@@ -16,7 +16,7 @@ static const unsigned char *S_lookup(int i, int low, int hi,
       strncmp((const char *)s, (const char *)cmark_entities[i].entity, len);
   if (cmp == 0 && cmark_entities[i].entity[len] == 0) {
     return (const unsigned char *)cmark_entities[i].bytes;
-  } else if (cmp < 0 && i > low) {
+  } else if (cmp <= 0 && i > low) {
     j = i - ((i - low) / 2);
     if (j == i)
       j -= 1;
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
@@ -60,6 +60,10 @@ IF (PYTHONINTERP_FOUND)
     "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark"
     )
 
+  add_test(entity_executable
+    ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/entity_tests.py"
+    "--library-dir" "${CMAKE_CURRENT_BINARY_DIR}/../src"
+    )
 
 ELSE(PYTHONINTERP_FOUND)
 
diff --git a/test/entity_tests.py b/test/entity_tests.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import re
+import os
+import argparse
+import sys
+import platform
+import html
+from cmark import CMark
+
+def get_entities():
+    regex = r'^{\(unsigned char\*\)"([^"]+)", \{([^}]+)\}'
+    with open(os.path.join(os.path.dirname(__file__), '..', 'src', 'entities.inc')) as f:
+        code = f.read()
+    entities = []
+    for entity, utf8 in re.findall(regex, code, re.MULTILINE):
+        utf8 = bytes(map(int, utf8.split(", ")[:-1])).decode('utf-8')
+        entities.append((entity, utf8))
+    return entities
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Run cmark tests.')
+    parser.add_argument('--program', dest='program', nargs='?', default=None,
+            help='program to test')
+    parser.add_argument('--library-dir', dest='library_dir', nargs='?',
+            default=None, help='directory containing dynamic library')
+    args = parser.parse_args(sys.argv[1:])
+
+cmark = CMark(prog=args.program, library_dir=args.library_dir)
+
+entities = get_entities()
+
+passed = 0
+errored = 0
+failed = 0
+
+exceptions = {
+    'quot': '&quot;',
+    'QUOT': '&quot;',
+
+    # These are broken, but I'm not too worried about them.
+    'nvlt': '&lt;⃒',
+    'nvgt': '&gt;⃒',
+}
+
+print("Testing entities:")
+for entity, utf8 in entities:
+    [rc, actual, err] = cmark.to_html("&{};".format(entity))
+    check = exceptions.get(entity, utf8)
+
+    if rc != 0:
+        errored += 1
+        print(entity, '[ERRORED (return code {})]'.format(rc))
+        print(err)
+    elif check in actual:
+        print(entity, '[PASSED]')
+        passed += 1
+    else:
+        print(entity, '[FAILED]')
+        print(repr(actual))
+        failed += 1
+
+print("{} passed, {} failed, {} errored".format(passed, failed, errored))
+if failed == 0 and errored == 0:
+    exit(0)
+else:
+    exit(1)
diff --git a/tools/make_entities_inc.py b/tools/make_entities_inc.py
@@ -20,7 +20,7 @@ struct cmark_entity_node {
 };
 
 #define CMARK_ENTITY_MIN_LENGTH 2
-#define CMARK_ENTITY_MAX_LENGTH 31""")
+#define CMARK_ENTITY_MAX_LENGTH 32""")
 
 print("#define CMARK_NUM_ENTITIES " + str(len(entities)));