cmark

My personal build of CMark ✏️

Commit
a8d97a098742413d0ffdc3602d1798df6e4f00a1
Parent
1ba48229420ac28152f87d27a77d0980d79ff1a9
Author
John MacFarlane <jgm@berkeley.edu>
Date

Fixed normalization bug, added more doctests for normalization.

* The tests test for removal of whitespace around block-level tags. * Previously whitespace wasn't removed before an initial block-level tag; this commit fixes that. * Also revised wording so it's clear that whitespace is removed on both sides of block-level tags.

Closes #246 in a slightly different way.

Diffstat

1 file changed, 17 insertions, 3 deletions

Status File Name N° Changes Insertions Deletions
Modified test/normalize.py 20 17 3
diff --git a/test/normalize.py b/test/normalize.py
@@ -32,7 +32,7 @@ class MyHTMLParser(HTMLParser):
     def handle_endtag(self, tag):
         if tag == "pre":
             self.in_pre = False
-        if self.is_block_tag(tag):
+        elif self.is_block_tag(tag):
             self.output = self.output.rstrip()
         self.output += "</" + tag + ">"
         self.last_tag = tag
@@ -40,6 +40,8 @@ class MyHTMLParser(HTMLParser):
     def handle_starttag(self, tag, attrs):
         if tag == "pre":
             self.in_pre = True
+        if self.is_block_tag(tag):
+            self.output = self.output.rstrip()
         self.output += "<" + tag
         # For now we don't strip out 'extra' attributes, because of
         # raw HTML test cases.
@@ -125,11 +127,23 @@ def normalize_html(html):
         >>> normalize_html("<p>a  \t\nb</p>")
         u'<p>a b</p>'
 
-    * Outer whitespace (outside block-level tags) is removed.
+    * Whitespace surrounding block-level tags is removed.
+
+        >>> normalize_html("<p>a  b</p>")
+        u'<p>a b</p>'
+
+        >>> normalize_html(" <p>a  b</p>")
+        u'<p>a b</p>'
 
-        >>> normalize_html("<p>a  b</p>  ")
+        >>> normalize_html("<p>a  b</p> ")
         u'<p>a b</p>'
 
+        >>> normalize_html("\n\t<p>\n\t\ta  b\t\t</p>\n\t")
+        u'<p>a b</p>'
+
+        >>> normalize_html("<i>a  b</i> ")
+        u'<i>a b</i> '
+
     * Self-closing tags are converted to open tags.
 
         >>> normalize_html("<br />")