cmark

My personal build of CMark ✏️

Commit
78e2b78ee1b71bf0b4a3790a72a8f76538980976
Parent
e36d0b941a2cc5a9d74bf91733d3f094c4d60456
Author
Nick Wellnhofer <wellnhofer@aevum.de>
Date

Avoid quadratic output growth with reference links

Keep track of the number bytes added through expansion of reference links and limit the total to the size of the input document. Always allow a minimum of 100KB.

Unfortunately, cmark has no error handling, so all we can do is to stop expanding reference links without returning an error. This should never be an issue in practice though. The 100KB minimum alone should cover all real-world cases.

See issue #354.

Diffstat

4 files changed, 34 insertions, 1 deletion

Status File Name N° Changes Insertions Deletions
Modified src/blocks.c 14 14 0
Modified src/parser.h 1 1 0
Modified src/references.c 17 16 1
Modified src/references.h 3 3 0
diff --git a/src/blocks.c b/src/blocks.c
@@ -8,6 +8,7 @@
 #include <stdlib.h>
 #include <assert.h>
 #include <stdio.h>
+#include <limits.h>
 
 #include "cmark_ctype.h"
 #include "config.h"
@@ -518,6 +519,14 @@ static cmark_node *finalize_document(cmark_parser *parser) {
   }
 
   finalize(parser, parser->root);
+
+  // Limit total size of extra content created from reference links to
+  // document size to avoid superlinear growth. Always allow 100KB.
+  if (parser->total_size > 100000)
+    parser->refmap->max_ref_size = parser->total_size;
+  else
+    parser->refmap->max_ref_size = 100000;
+
   process_inlines(parser->mem, parser->root, parser->refmap, parser->options);
 
   cmark_strbuf_free(&parser->content);
@@ -564,6 +573,11 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
   const unsigned char *end = buffer + len;
   static const uint8_t repl[] = {239, 191, 189};
 
+  if (len > UINT_MAX - parser->total_size)
+    parser->total_size = UINT_MAX;
+  else
+    parser->total_size += len;
+
   // Skip UTF-8 BOM if present; see #334
   if (parser->line_number == 0 && parser->column == 0 && len >= 3 &&
       *buffer == 0xEF && *(buffer + 1) == 0xBB &&
diff --git a/src/parser.h b/src/parser.h
@@ -32,6 +32,7 @@ struct cmark_parser {
   cmark_strbuf content;
   int options;
   bool last_buffer_ended_with_cr;
+  unsigned int total_size;
 };
 
 #ifdef __cplusplus
diff --git a/src/references.c b/src/references.c
@@ -62,6 +62,11 @@ void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label,
   ref->age = map->size;
   ref->next = map->refs;
 
+  if (ref->url != NULL)
+    ref->size += strlen((char*)ref->url);
+  if (ref->title != NULL)
+    ref->size += strlen((char*)ref->title);
+
   map->refs = ref;
   map->size++;
 }
@@ -110,6 +115,7 @@ static void sort_references(cmark_reference_map *map) {
 cmark_reference *cmark_reference_lookup(cmark_reference_map *map,
                                         cmark_chunk *label) {
   cmark_reference **ref = NULL;
+  cmark_reference *r = NULL;
   unsigned char *norm;
 
   if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH)
@@ -128,7 +134,16 @@ cmark_reference *cmark_reference_lookup(cmark_reference_map *map,
   ref = (cmark_reference **)bsearch(norm, map->sorted, map->size, sizeof(cmark_reference *),
                 refsearch);
   map->mem->free(norm);
-  return ref ? ref[0] : NULL;
+
+  if (ref != NULL) {
+    r = ref[0];
+    /* Check for expansion limit */
+    if (map->max_ref_size && r->size > map->max_ref_size - map->ref_size)
+      return NULL;
+    map->ref_size += r->size;
+  }
+
+  return r;
 }
 
 void cmark_reference_map_free(cmark_reference_map *map) {
diff --git a/src/references.h b/src/references.h
@@ -13,6 +13,7 @@ struct cmark_reference {
   unsigned char *url;
   unsigned char *title;
   unsigned int age;
+  unsigned int size;
 };
 
 typedef struct cmark_reference cmark_reference;
@@ -22,6 +23,8 @@ struct cmark_reference_map {
   cmark_reference *refs;
   cmark_reference **sorted;
   unsigned int size;
+  unsigned int ref_size;
+  unsigned int max_ref_size;
 };
 
 typedef struct cmark_reference_map cmark_reference_map;