cmark

My personal build of CMark ✏️

Commit
3acbdf0965859c55fa36c65a4c0e17e92012687c
Parent
df7ef9ed7b5f418897df557c9de88eaba2174703
Author
Nick Wellnhofer <wellnhofer@aevum.de>
Date

Use C string instead of chunk for code info and literal

Use zero-terminated C strings instead of cmark_chunks without storing the length. The length of code literals will be readded in a later commit. strlen overhead for code info should be negligible.

Reduces size of struct cmark_node by 8 bytes.

Diffstat

5 files changed, 30 insertions, 24 deletions

Status File Name N° Changes Insertions Deletions
Modified src/blocks.c 20 12 8
Modified src/html.c 11 6 5
Modified src/node.c 12 6 6
Modified src/node.h 4 2 2
Modified src/xml.c 7 4 3
diff --git a/src/blocks.c b/src/blocks.c
@@ -302,11 +302,15 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
       }
       assert(pos < node_content->size);
 
-      cmark_strbuf tmp = CMARK_BUF_INIT(parser->mem);
-      houdini_unescape_html_f(&tmp, node_content->ptr, pos);
-      cmark_strbuf_trim(&tmp);
-      cmark_strbuf_unescape(&tmp);
-      b->as.code.info = cmark_chunk_buf_detach(&tmp);
+      if (pos == 0) {
+        b->as.code.info = NULL;
+      } else {
+        cmark_strbuf tmp = CMARK_BUF_INIT(parser->mem);
+        houdini_unescape_html_f(&tmp, node_content->ptr, pos);
+        cmark_strbuf_trim(&tmp);
+        cmark_strbuf_unescape(&tmp);
+        b->as.code.info = cmark_strbuf_detach(&tmp);
+      }
 
       if (node_content->ptr[pos] == '\r')
         pos += 1;
@@ -314,7 +318,7 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
         pos += 1;
       cmark_strbuf_drop(node_content, pos);
     }
-    b->as.code.literal = cmark_chunk_buf_detach(node_content);
+    b->as.code.literal = cmark_strbuf_detach(node_content);
     break;
 
   case CMARK_NODE_HTML_BLOCK:
@@ -972,7 +976,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
       (*container)->as.code.fence_length = (matched > 255) ? 255 : matched;
       (*container)->as.code.fence_offset =
           (int8_t)(parser->first_nonspace - parser->offset);
-      (*container)->as.code.info = cmark_chunk_literal("");
+      (*container)->as.code.info = NULL;
       S_advance_offset(parser, input,
                        parser->first_nonspace + matched - parser->offset,
                        false);
@@ -1074,7 +1078,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
       (*container)->as.code.fence_char = 0;
       (*container)->as.code.fence_length = 0;
       (*container)->as.code.fence_offset = 0;
-      (*container)->as.code.info = cmark_chunk_literal("");
+      (*container)->as.code.info = NULL;
 
     } else {
       break;
diff --git a/src/html.c b/src/html.c
@@ -146,25 +146,26 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
   case CMARK_NODE_CODE_BLOCK:
     cr(html);
 
-    if (node->as.code.info.len == 0) {
+    if (node->as.code.info == NULL || node->as.code.info[0] == 0) {
       cmark_strbuf_puts(html, "<pre");
       S_render_sourcepos(node, html, options);
       cmark_strbuf_puts(html, "><code>");
     } else {
       bufsize_t first_tag = 0;
-      while (first_tag < node->as.code.info.len &&
-             !cmark_isspace(node->as.code.info.data[first_tag])) {
+      while (node->as.code.info[first_tag] &&
+             !cmark_isspace(node->as.code.info[first_tag])) {
         first_tag += 1;
       }
 
       cmark_strbuf_puts(html, "<pre");
       S_render_sourcepos(node, html, options);
       cmark_strbuf_puts(html, "><code class=\"language-");
-      escape_html(html, node->as.code.info.data, first_tag);
+      escape_html(html, node->as.code.info, first_tag);
       cmark_strbuf_puts(html, "\">");
     }
 
-    escape_html(html, node->as.code.literal.data, node->as.code.literal.len);
+    escape_html(html, node->as.code.literal,
+                strlen((char *)node->as.code.literal));
     cmark_strbuf_puts(html, "</code></pre>\n");
     break;
 
diff --git a/src/node.c b/src/node.c
@@ -109,8 +109,8 @@ static void S_free_nodes(cmark_node *e) {
     cmark_strbuf_free(&e->content);
     switch (e->type) {
     case CMARK_NODE_CODE_BLOCK:
-      cmark_chunk_free(NODE_MEM(e), &e->as.code.info);
-      cmark_chunk_free(NODE_MEM(e), &e->as.code.literal);
+      NODE_MEM(e)->free(e->as.code.info);
+      NODE_MEM(e)->free(e->as.code.literal);
       break;
     case CMARK_NODE_TEXT:
     case CMARK_NODE_HTML_INLINE:
@@ -298,7 +298,7 @@ const char *cmark_node_get_literal(cmark_node *node) {
     return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.literal);
 
   case CMARK_NODE_CODE_BLOCK:
-    return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.code.literal);
+    return (char *)node->as.code.literal;
 
   default:
     break;
@@ -321,7 +321,7 @@ int cmark_node_set_literal(cmark_node *node, const char *content) {
     return 1;
 
   case CMARK_NODE_CODE_BLOCK:
-    cmark_chunk_set_cstr(NODE_MEM(node), &node->as.code.literal, content);
+    cmark_set_cstr(NODE_MEM(node), &node->as.code.literal, content);
     return 1;
 
   default:
@@ -478,7 +478,7 @@ const char *cmark_node_get_fence_info(cmark_node *node) {
   }
 
   if (node->type == CMARK_NODE_CODE_BLOCK) {
-    return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.code.info);
+    return node->as.code.info ? (char *)node->as.code.info : "";
   } else {
     return NULL;
   }
@@ -490,7 +490,7 @@ int cmark_node_set_fence_info(cmark_node *node, const char *info) {
   }
 
   if (node->type == CMARK_NODE_CODE_BLOCK) {
-    cmark_chunk_set_cstr(NODE_MEM(node), &node->as.code.info, info);
+    cmark_set_cstr(NODE_MEM(node), &node->as.code.info, info);
     return 1;
   } else {
     return 0;
diff --git a/src/node.h b/src/node.h
@@ -23,8 +23,8 @@ typedef struct {
 } cmark_list;
 
 typedef struct {
-  cmark_chunk info;
-  cmark_chunk literal;
+  unsigned char *info;
+  unsigned char *literal;
   uint8_t fence_length;
   uint8_t fence_offset;
   unsigned char fence_char;
diff --git a/src/xml.c b/src/xml.c
@@ -95,13 +95,14 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
       cmark_strbuf_puts(xml, buffer);
       break;
     case CMARK_NODE_CODE_BLOCK:
-      if (node->as.code.info.len > 0) {
+      if (node->as.code.info) {
         cmark_strbuf_puts(xml, " info=\"");
-        escape_xml(xml, node->as.code.info.data, node->as.code.info.len);
+        escape_xml(xml, node->as.code.info, strlen((char *)node->as.code.info));
         cmark_strbuf_putc(xml, '"');
       }
       cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
-      escape_xml(xml, node->as.code.literal.data, node->as.code.literal.len);
+      escape_xml(xml, node->as.code.literal,
+                 strlen((char *)node->as.code.literal));
       cmark_strbuf_puts(xml, "</");
       cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
       literal = true;