cmark

My personal build of CMark ✏️

Commit: b28c97c9b8af266d4f12deb5febcf28807d9f5c6
Parent: b5f809582e073a3b4cb31a167e03f18145a04249
Author: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 19 Dec 2014 08:14:13 -0800

Added a few more doctests for HTML normalization (#245).

Diffstat

1 file changed, 21 insertions, 1 deletion

Status	File Name	N° Changes	Insertions	Deletions
Modified	test/normalize.py	22	21	1

diff --git a/test/normalize.py b/test/normalize.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 from HTMLParser import HTMLParser, HTMLParseError
 from htmlentitydefs import name2codepoint
 import sys
@@ -118,14 +119,33 @@ def normalize_html(html):
     Multiple inner whitespaces are collapsed to a single space (except
     in pre tags):
 
+        >>> normalize_html("<p>a  \t b</p>")
+        u'<p>a b</p>'
+
         >>> normalize_html("<p>a  \t\nb</p>")
         u'<p>a b</p>'
 
     * Outer whitespace (outside block-level tags) is removed.
+
+        >>> normalize_html("<p>a  b</p>  ")
+        u'<p>a b</p>'
+
     * Self-closing tags are converted to open tags.
+
+        >>> normalize_html("<br />")
+        u'<br>'
+
     * Attributes are sorted and lowercased.
+
+        >>> normalize_html('<a title="bar" HREF="foo">x</a>')
+        u'<a href="foo" title="bar">x</a>'
+
     * References are converted to unicode, except that '<', '>', '&', and
-      '&' are rendered using entities.
+      '"' are rendered using entities.
+
+        >>> normalize_html("&forall;&amp;&gt;&lt;&quot;")
+        u'\u2200&amp;&gt;&lt;&quot;'
+
     """
     html_chunk_re = re.compile("(\<!\[CDATA\[.*?\]\]\>|\<[^>]*\>|[^<]+)")
     try: