cmark

My personal build of CMark ✏️

Commit
b237924585e61532ada774bf9e70eadff00666dc
Parent
3acbdf0965859c55fa36c65a4c0e17e92012687c
Author
Nick Wellnhofer <wellnhofer@aevum.de>
Date

Use C string instead of chunk for link URL and title

Use zero-terminated C strings instead of cmark_chunks without storing the length. This introduces a few additional strlen computations, but overhead should be low.

Allows to reduce size of struct cmark_node later.

Diffstat

10 files changed, 68 insertions, 81 deletions

Status File Name N° Changes Insertions Deletions
Modified api_test/main.c 2 1 1
Modified src/commonmark.c 26 10 16
Modified src/html.c 22 12 10
Modified src/inlines.c 57 21 36
Modified src/inlines.h 7 5 2
Modified src/node.c 12 6 6
Modified src/node.h 4 2 2
Modified src/references.c 4 2 2
Modified src/references.h 4 2 2
Modified src/xml.c 11 7 4
diff --git a/api_test/main.c b/api_test/main.c
@@ -915,7 +915,7 @@ static void source_pos(test_batch_runner *runner) {
                       "  </heading>\n"
                       "  <paragraph sourcepos=\"3:1-4:42\">\n"
                       "    <text sourcepos=\"3:1-3:14\" xml:space=\"preserve\">Hello “ </text>\n"
-                      "    <link sourcepos=\"3:15-3:37\" destination=\"http://www.google.com\" title=\"\">\n"
+                      "    <link sourcepos=\"3:15-3:37\" destination=\"http://www.google.com\">\n"
                       "      <text sourcepos=\"3:16-3:36\" xml:space=\"preserve\">http://www.google.com</text>\n"
                       "    </link>\n"
                       "    <softbreak />\n"
diff --git a/src/commonmark.c b/src/commonmark.c
@@ -119,24 +119,22 @@ static int shortest_unused_backtick_sequence(const char *code) {
 }
 
 static bool is_autolink(cmark_node *node) {
-  cmark_chunk *title;
-  cmark_chunk *url;
+  const unsigned char *title;
+  const unsigned char *url;
   cmark_node *link_text;
-  char *realurl;
-  int realurllen;
 
   if (node->type != CMARK_NODE_LINK) {
     return false;
   }
 
-  url = &node->as.link.url;
-  if (url->len == 0 || scan_scheme(url, 0) == 0) {
+  url = node->as.link.url;
+  if (url == NULL || _scan_scheme(url) == 0) {
     return false;
   }
 
-  title = &node->as.link.title;
+  title = node->as.link.title;
   // if it has a title, we can't treat it as an autolink:
-  if (title->len > 0) {
+  if (title && title[0]) {
     return false;
   }
 
@@ -145,15 +143,11 @@ static bool is_autolink(cmark_node *node) {
     return false;
   }
   cmark_consolidate_text_nodes(link_text);
-  realurl = (char *)url->data;
-  realurllen = url->len;
-  if (strncmp(realurl, "mailto:", 7) == 0) {
-    realurl += 7;
-    realurllen -= 7;
+  if (strcmp((const char *)url, "mailto:") == 0) {
+    url += 7;
   }
-  return (realurllen == link_text->as.literal.len &&
-          strncmp(realurl, (char *)link_text->as.literal.data,
-                  link_text->as.literal.len) == 0);
+  return strncmp((const char *)url, (char *)link_text->as.literal.data,
+                 link_text->as.literal.len) == 0;
 }
 
 // if node is a block node, returns node.
diff --git a/src/html.c b/src/html.c
@@ -280,13 +280,14 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
     if (entering) {
       cmark_strbuf_puts(html, "<a href=\"");
       if ((options & CMARK_OPT_UNSAFE) ||
-            !(scan_dangerous_url(&node->as.link.url, 0))) {
-        houdini_escape_href(html, node->as.link.url.data,
-                            node->as.link.url.len);
+            !(_scan_dangerous_url(node->as.link.url))) {
+        houdini_escape_href(html, node->as.link.url,
+                            strlen((char *)node->as.link.url));
       }
-      if (node->as.link.title.len) {
+      if (node->as.link.title) {
         cmark_strbuf_puts(html, "\" title=\"");
-        escape_html(html, node->as.link.title.data, node->as.link.title.len);
+        escape_html(html, node->as.link.title,
+                    strlen((char *)node->as.link.title));
       }
       cmark_strbuf_puts(html, "\">");
     } else {
@@ -298,16 +299,17 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
     if (entering) {
       cmark_strbuf_puts(html, "<img src=\"");
       if ((options & CMARK_OPT_UNSAFE) ||
-            !(scan_dangerous_url(&node->as.link.url, 0))) {
-        houdini_escape_href(html, node->as.link.url.data,
-                            node->as.link.url.len);
+            !(_scan_dangerous_url(node->as.link.url))) {
+        houdini_escape_href(html, node->as.link.url,
+                            strlen((char *)node->as.link.url));
       }
       cmark_strbuf_puts(html, "\" alt=\"");
       state->plain = node;
     } else {
-      if (node->as.link.title.len) {
+      if (node->as.link.title) {
         cmark_strbuf_puts(html, "\" title=\"");
-        escape_html(html, node->as.link.title.data, node->as.link.title.len);
+        escape_html(html, node->as.link.title,
+                    strlen((char *)node->as.link.title));
       }
 
       cmark_strbuf_puts(html, "\" />");
diff --git a/src/inlines.c b/src/inlines.c
@@ -117,36 +117,27 @@ static cmark_node *make_str_with_entities(subject *subj,
 
 // Duplicate a chunk by creating a copy of the buffer not by reusing the
 // buffer like cmark_chunk_dup does.
-static cmark_chunk chunk_clone(cmark_mem *mem, cmark_chunk *src) {
-  cmark_chunk c;
-  bufsize_t len = src->len;
-
-  c.len = len;
-  c.data = (unsigned char *)mem->calloc(len + 1, 1);
-  c.alloc = 1;
-  if (len)
-    memcpy(c.data, src->data, len);
-  c.data[len] = '\0';
-
-  return c;
+static unsigned char *cmark_strdup(cmark_mem *mem, unsigned char *src) {
+  if (src == NULL) {
+    return NULL;
+  }
+  size_t len = strlen((char *)src);
+  unsigned char *data = (unsigned char *)mem->realloc(NULL, len + 1);
+  memcpy(data, src, len + 1);
+  return data;
 }
 
-static cmark_chunk cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url,
-                                        int is_email) {
+static unsigned char *cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url,
+                                           int is_email) {
   cmark_strbuf buf = CMARK_BUF_INIT(mem);
 
   cmark_chunk_trim(url);
 
-  if (url->len == 0) {
-    cmark_chunk result = CMARK_CHUNK_EMPTY;
-    return result;
-  }
-
   if (is_email)
     cmark_strbuf_puts(&buf, "mailto:");
 
   houdini_unescape_html_f(&buf, url->data, url->len);
-  return cmark_chunk_buf_detach(&buf);
+  return cmark_strbuf_detach(&buf);
 }
 
 static CMARK_INLINE cmark_node *make_autolink(subject *subj,
@@ -154,7 +145,7 @@ static CMARK_INLINE cmark_node *make_autolink(subject *subj,
                                               cmark_chunk url, int is_email) {
   cmark_node *link = make_simple(subj->mem, CMARK_NODE_LINK);
   link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email);
-  link->as.link.title = cmark_chunk_literal("");
+  link->as.link.title = NULL;
   link->start_line = link->end_line = subj->line;
   link->start_column = start_column + 1;
   link->end_column = end_column + 1;
@@ -799,29 +790,23 @@ static cmark_node *handle_entity(subject *subj) {
 
 // Clean a URL: remove surrounding whitespace, and remove \ that escape
 // punctuation.
-cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
+unsigned char *cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
   cmark_strbuf buf = CMARK_BUF_INIT(mem);
 
   cmark_chunk_trim(url);
 
-  if (url->len == 0) {
-    cmark_chunk result = CMARK_CHUNK_EMPTY;
-    return result;
-  }
-
-    houdini_unescape_html_f(&buf, url->data, url->len);
+  houdini_unescape_html_f(&buf, url->data, url->len);
 
   cmark_strbuf_unescape(&buf);
-  return cmark_chunk_buf_detach(&buf);
+  return cmark_strbuf_detach(&buf);
 }
 
-cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) {
+unsigned char *cmark_clean_title(cmark_mem *mem, cmark_chunk *title) {
   cmark_strbuf buf = CMARK_BUF_INIT(mem);
   unsigned char first, last;
 
   if (title->len == 0) {
-    cmark_chunk result = CMARK_CHUNK_EMPTY;
-    return result;
+    return NULL;
   }
 
   first = title->data[0];
@@ -836,7 +821,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) {
   }
 
   cmark_strbuf_unescape(&buf);
-  return cmark_chunk_buf_detach(&buf);
+  return cmark_strbuf_detach(&buf);
 }
 
 // Parse an autolink or HTML tag.
@@ -1003,7 +988,7 @@ static cmark_node *handle_close_bracket(subject *subj) {
   bufsize_t sps, n;
   cmark_reference *ref = NULL;
   cmark_chunk url_chunk, title_chunk;
-  cmark_chunk url, title;
+  unsigned char *url, *title;
   bracket *opener;
   cmark_node *inl;
   cmark_chunk raw_label;
@@ -1090,8 +1075,8 @@ static cmark_node *handle_close_bracket(subject *subj) {
   }
 
   if (ref != NULL) { // found
-    url = chunk_clone(subj->mem, &ref->url);
-    title = chunk_clone(subj->mem, &ref->title);
+    url = cmark_strdup(subj->mem, ref->url);
+    title = cmark_strdup(subj->mem, ref->title);
     goto match;
   } else {
     goto noMatch;
diff --git a/src/inlines.h b/src/inlines.h
@@ -1,12 +1,15 @@
 #ifndef CMARK_INLINES_H
 #define CMARK_INLINES_H
 
+#include "chunk.h"
+#include "references.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url);
-cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title);
+unsigned char *cmark_clean_url(cmark_mem *mem, cmark_chunk *url);
+unsigned char *cmark_clean_title(cmark_mem *mem, cmark_chunk *title);
 
 void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
                          cmark_reference_map *refmap, int options);
diff --git a/src/node.c b/src/node.c
@@ -120,8 +120,8 @@ static void S_free_nodes(cmark_node *e) {
       break;
     case CMARK_NODE_LINK:
     case CMARK_NODE_IMAGE:
-      cmark_chunk_free(NODE_MEM(e), &e->as.link.url);
-      cmark_chunk_free(NODE_MEM(e), &e->as.link.title);
+      NODE_MEM(e)->free(e->as.link.url);
+      NODE_MEM(e)->free(e->as.link.title);
       break;
     case CMARK_NODE_CUSTOM_BLOCK:
     case CMARK_NODE_CUSTOM_INLINE:
@@ -505,7 +505,7 @@ const char *cmark_node_get_url(cmark_node *node) {
   switch (node->type) {
   case CMARK_NODE_LINK:
   case CMARK_NODE_IMAGE:
-    return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.link.url);
+    return node->as.link.url ? (char *)node->as.link.url : "";
   default:
     break;
   }
@@ -521,7 +521,7 @@ int cmark_node_set_url(cmark_node *node, const char *url) {
   switch (node->type) {
   case CMARK_NODE_LINK:
   case CMARK_NODE_IMAGE:
-    cmark_chunk_set_cstr(NODE_MEM(node), &node->as.link.url, url);
+    cmark_set_cstr(NODE_MEM(node), &node->as.link.url, url);
     return 1;
   default:
     break;
@@ -538,7 +538,7 @@ const char *cmark_node_get_title(cmark_node *node) {
   switch (node->type) {
   case CMARK_NODE_LINK:
   case CMARK_NODE_IMAGE:
-    return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.link.title);
+    return node->as.link.title ? (char *)node->as.link.title : "";
   default:
     break;
   }
@@ -554,7 +554,7 @@ int cmark_node_set_title(cmark_node *node, const char *title) {
   switch (node->type) {
   case CMARK_NODE_LINK:
   case CMARK_NODE_IMAGE:
-    cmark_chunk_set_cstr(NODE_MEM(node), &node->as.link.title, title);
+    cmark_set_cstr(NODE_MEM(node), &node->as.link.title, title);
     return 1;
   default:
     break;
diff --git a/src/node.h b/src/node.h
@@ -37,8 +37,8 @@ typedef struct {
 } cmark_heading;
 
 typedef struct {
-  cmark_chunk url;
-  cmark_chunk title;
+  unsigned char *url;
+  unsigned char *title;
 } cmark_link;
 
 typedef struct {
diff --git a/src/references.c b/src/references.c
@@ -18,8 +18,8 @@ static void reference_free(cmark_reference_map *map, cmark_reference *ref) {
   cmark_mem *mem = map->mem;
   if (ref != NULL) {
     mem->free(ref->label);
-    cmark_chunk_free(mem, &ref->url);
-    cmark_chunk_free(mem, &ref->title);
+    mem->free(ref->url);
+    mem->free(ref->title);
     mem->free(ref);
   }
 }
diff --git a/src/references.h b/src/references.h
@@ -12,8 +12,8 @@ extern "C" {
 struct cmark_reference {
   struct cmark_reference *next;
   unsigned char *label;
-  cmark_chunk url;
-  cmark_chunk title;
+  unsigned char *url;
+  unsigned char *title;
   unsigned int hash;
 };
 
diff --git a/src/xml.c b/src/xml.c
@@ -121,11 +121,14 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
     case CMARK_NODE_LINK:
     case CMARK_NODE_IMAGE:
       cmark_strbuf_puts(xml, " destination=\"");
-      escape_xml(xml, node->as.link.url.data, node->as.link.url.len);
-      cmark_strbuf_putc(xml, '"');
-      cmark_strbuf_puts(xml, " title=\"");
-      escape_xml(xml, node->as.link.title.data, node->as.link.title.len);
+      escape_xml(xml, node->as.link.url, strlen((char *)node->as.link.url));
       cmark_strbuf_putc(xml, '"');
+      if (node->as.link.title) {
+        cmark_strbuf_puts(xml, " title=\"");
+        escape_xml(xml, node->as.link.title,
+                   strlen((char *)node->as.link.title));
+        cmark_strbuf_putc(xml, '"');
+      }
       break;
     default:
       break;