cmark

My personal build of CMark ✏️

Commit
325a1471d2a32bcc1e2d2580b973ff4ba1df85e8
Parent
ca8ef74a8d50fbd76fb0d22fb110e660ef9944a4
Author
John MacFarlane <jgm@berkeley.edu>
Date

Make rendering safe by default.

Removes CMARK_OPT_SAFE from options.

Adds CMARK_OPT_UNSAFE, with the opposite meaning. The new default behavior is to suppress raw HTML and potentially dangerous links. The CMARK_OPT_UNSAFE option has to be set explicitly to prevent this.

-------------------------------------------------------- NOTE: This change will require modifications in bindings for cmark and in most libraries and programs that use cmark. --------------------------------------------------------

Closes #239, #273.

Borrows heavily from @kivikakk's patch in github/cmark-gfm#123.

Diffstat

8 files changed, 32 insertions, 29 deletions

Status File Name N° Changes Insertions Deletions
Modified README.md 12 6 6
Modified api_test/main.c 5 3 2
Modified man/man3/cmark.3 10 5 5
Modified src/cmark.h 10 5 5
Modified src/html.c 12 6 6
Modified src/main.c 6 3 3
Modified test/cmark-fuzz.c 2 1 1
Modified test/cmark.py 4 3 1
diff --git a/README.md b/README.md
@@ -156,14 +156,14 @@ be found in the man pages in the `man` subdirectory.
 Security
 --------
 
-By default, the library will pass through raw HTML and potentially
+By default, the library will scrub raw HTML and potentially
 dangerous links (`javascript:`, `vbscript:`, `data:`, `file:`).
 
-It is recommended that users either disable this potentially unsafe
-feature by using the option `CMARK_OPT_SAFE` (or `--safe` with the
-command-line program), or run the output through an HTML sanitizer
-to protect against
-[XSS attacks](http://en.wikipedia.org/wiki/Cross-site_scripting).
+To allow these, use the option `CMARK_OPT_UNSAFE` (or
+`--unsafe`) with the command line program. If doing so, we
+recommend you use a HTML sanitizer specific to your needs to
+protect against [XSS
+attacks](http://en.wikipedia.org/wiki/Cross-site_scripting).
 
 Contributing
 ------------
diff --git a/api_test/main.c b/api_test/main.c
@@ -177,7 +177,8 @@ static void accessors(test_batch_runner *runner) {
   OK(runner, cmark_node_set_literal(string, literal + sizeof("prefix")),
      "set_literal suffix");
 
-  char *rendered_html = cmark_render_html(doc, CMARK_OPT_DEFAULT);
+  char *rendered_html = cmark_render_html(doc,
+		          CMARK_OPT_DEFAULT | CMARK_OPT_UNSAFE);
   static const char expected_html[] =
       "<h3>Header</h3>\n"
       "<ol start=\"3\">\n"
@@ -859,7 +860,7 @@ static void test_safe(test_batch_runner *runner) {
                                  "a>\n[link](JAVAscript:alert('hi'))\n![image]("
                                  "file:my.js)\n";
   char *html = cmark_markdown_to_html(raw_html, sizeof(raw_html) - 1,
-                                      CMARK_OPT_DEFAULT | CMARK_OPT_SAFE);
+                                      CMARK_OPT_DEFAULT);
   STR_EQ(runner, html, "<!-- raw HTML omitted -->\n<p><!-- raw HTML omitted "
                        "-->hi<!-- raw HTML omitted -->\n<a "
                        "href=\"\">link</a>\n<img src=\"\" alt=\"image\" "
diff --git a/man/man3/cmark.3 b/man/man3/cmark.3
@@ -1,4 +1,4 @@
-.TH cmark 3 "June 02, 2017" "LOCAL" "Library Functions Manual"
+.TH cmark 3 "March 17, 2019" "LOCAL" "Library Functions Manual"
 .SH
 NAME
 .PP
@@ -721,17 +721,17 @@ Render \f[C]softbreak\f[] elements as hard line breaks.
 .nf
 \fC
 .RS 0n
-#define CMARK_OPT_SAFE (1 << 3)
+#define CMARK_OPT_UNSAFE (1 << 17)
 .RE
 \f[]
 .fi
 
 .PP
-Suppress raw HTML and unsafe links (\f[C]javascript:\f[],
+Render raw HTML and unsafe links (\f[C]javascript:\f[],
 \f[C]vbscript:\f[], \f[C]file:\f[], and \f[C]data:\f[], except for
 \f[C]image/png\f[], \f[C]image/gif\f[], \f[C]image/jpeg\f[], or
-\f[C]image/webp\f[] mime types). Raw HTML is replaced by a placeholder
-HTML comment. Unsafe links are replaced by empty strings.
+\f[C]image/webp\f[] mime types). By default, raw HTML is replaced by a
+placeholder HTML comment. Unsafe links are replaced by empty strings.
 
 .PP
 .nf
diff --git a/src/cmark.h b/src/cmark.h
@@ -552,13 +552,13 @@ char *cmark_render_latex(cmark_node *root, int options, int width);
  */
 #define CMARK_OPT_HARDBREAKS (1 << 2)
 
-/** Suppress raw HTML and unsafe links (`javascript:`, `vbscript:`,
+/** Render raw HTML and unsafe links (`javascript:`, `vbscript:`,
  * `file:`, and `data:`, except for `image/png`, `image/gif`,
- * `image/jpeg`, or `image/webp` mime types).  Raw HTML is replaced
- * by a placeholder HTML comment. Unsafe links are replaced by
- * empty strings.
+ * `image/jpeg`, or `image/webp` mime types).  By default,
+ * raw HTML is replaced by a placeholder HTML comment. Unsafe
+ * links are replaced by empty strings.
  */
-#define CMARK_OPT_SAFE (1 << 3)
+#define CMARK_OPT_UNSAFE (1 << 17)
 
 /** Render `softbreak` elements as spaces.
  */
diff --git a/src/html.c b/src/html.c
@@ -170,7 +170,7 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
 
   case CMARK_NODE_HTML_BLOCK:
     cr(html);
-    if (options & CMARK_OPT_SAFE) {
+    if (!(options & CMARK_OPT_UNSAFE)) {
       cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
     } else {
       cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
@@ -242,7 +242,7 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
     break;
 
   case CMARK_NODE_HTML_INLINE:
-    if (options & CMARK_OPT_SAFE) {
+    if (!(options & CMARK_OPT_UNSAFE)) {
       cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
     } else {
       cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
@@ -278,8 +278,8 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
   case CMARK_NODE_LINK:
     if (entering) {
       cmark_strbuf_puts(html, "<a href=\"");
-      if (!((options & CMARK_OPT_SAFE) &&
-            scan_dangerous_url(&node->as.link.url, 0))) {
+      if ((options & CMARK_OPT_UNSAFE) ||
+            !(scan_dangerous_url(&node->as.link.url, 0))) {
         houdini_escape_href(html, node->as.link.url.data,
                             node->as.link.url.len);
       }
@@ -296,8 +296,8 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
   case CMARK_NODE_IMAGE:
     if (entering) {
       cmark_strbuf_puts(html, "<img src=\"");
-      if (!((options & CMARK_OPT_SAFE) &&
-            scan_dangerous_url(&node->as.link.url, 0))) {
+      if ((options & CMARK_OPT_UNSAFE) ||
+            !(scan_dangerous_url(&node->as.link.url, 0))) {
         houdini_escape_href(html, node->as.link.url.data,
                             node->as.link.url.len);
       }
diff --git a/src/main.c b/src/main.c
@@ -38,7 +38,7 @@ void print_usage() {
   printf("  --sourcepos      Include source position attribute\n");
   printf("  --hardbreaks     Treat newlines as hard line breaks\n");
   printf("  --nobreaks       Render soft line breaks as spaces\n");
-  printf("  --safe           Suppress raw HTML and dangerous URLs\n");
+  printf("  --unsafe         Render raw HTML and dangerous URLs\n");
   printf("  --smart          Use smart punctuation\n");
   printf("  --validate-utf8  Replace UTF-8 invalid sequences with U+FFFD\n");
   printf("  --help, -h       Print usage information\n");
@@ -112,8 +112,8 @@ int main(int argc, char *argv[]) {
       options |= CMARK_OPT_NOBREAKS;
     } else if (strcmp(argv[i], "--smart") == 0) {
       options |= CMARK_OPT_SMART;
-    } else if (strcmp(argv[i], "--safe") == 0) {
-      options |= CMARK_OPT_SAFE;
+    } else if (strcmp(argv[i], "--unsafe") == 0) {
+      options |= CMARK_OPT_UNSAFE;
     } else if (strcmp(argv[i], "--validate-utf8") == 0) {
       options |= CMARK_OPT_VALIDATE_UTF8;
     } else if ((strcmp(argv[i], "--help") == 0) ||
diff --git a/test/cmark-fuzz.c b/test/cmark-fuzz.c
@@ -13,7 +13,7 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
     memcpy(&fuzz_config, data, sizeof(fuzz_config));
 
     /* Mask off valid option bits */
-    fuzz_config.options &= (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_SAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART);
+    fuzz_config.options &= (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_UNSAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART);
 
     /* Remainder of input is the markdown */
     const char *markdown = (const char *)(data + sizeof(fuzz_config));
diff --git a/test/cmark.py b/test/cmark.py
@@ -17,7 +17,8 @@ def to_html(lib, text):
     markdown.argtypes = [c_char_p, c_size_t, c_int]
     textbytes = text.encode('utf-8')
     textlen = len(textbytes)
-    result = markdown(textbytes, textlen, 0).decode('utf-8')
+    # 1 << 17 == CMARK_OPT_UNSAFE
+    result = markdown(textbytes, textlen, 1 << 17).decode('utf-8')
     return [0, result, '']
 
 def to_commonmark(lib, text):
@@ -37,6 +38,7 @@ class CMark:
     def __init__(self, prog=None, library_dir=None):
         self.prog = prog
         if prog:
+            prog += ' --unsafe'
             self.to_html = lambda x: pipe_through_prog(prog, x)
             self.to_commonmark = lambda x: pipe_through_prog(prog + ' -t commonmark', x)
         else: