cmark

My personal build of CMark ✏️

Commit
880039601d2bc4baf4e17649a02480876917d0ae
Parent
76478c79d3d13a21871bbed784f75fd5d9e8b1c2
Author
John MacFarlane <jgm@berkeley.edu>
Date

Code span spec changes.

These affect both parsing and writing commonmark.

Diffstat

2 files changed, 44 insertions, 4 deletions

Status File Name N° Changes Insertions Deletions
Modified src/commonmark.c 8 6 2
Modified src/inlines.c 40 38 2
diff --git a/src/commonmark.c b/src/commonmark.c
@@ -167,6 +167,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
   int list_number;
   cmark_delim_type list_delim;
   int numticks;
+  bool extra_spaces;
   int i;
   bool entering = (ev_type == CMARK_EVENT_ENTER);
   const char *info, *code, *title;
@@ -363,14 +364,17 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
     code = cmark_node_get_literal(node);
     code_len = strlen(code);
     numticks = shortest_unused_backtick_sequence(code);
+    extra_spaces = code_len == 0 ||
+	    code[0] == '`' || code[code_len - 1] == '`' ||
+	    code[0] == ' ' || code[code_len - 1] == ' ';
     for (i = 0; i < numticks; i++) {
       LIT("`");
     }
-    if (code_len == 0 || code[0] == '`') {
+    if (extra_spaces) {
       LIT(" ");
     }
     OUT(cmark_node_get_literal(node), allow_wrap, LITERAL);
-    if (code_len == 0 || code[code_len - 1] == '`') {
+    if (extra_spaces) {
       LIT(" ");
     }
     for (i = 0; i < numticks; i++) {
diff --git a/src/inlines.c b/src/inlines.c
@@ -323,6 +323,42 @@ static bufsize_t scan_to_closing_backticks(subject *subj,
   return 0;
 }
 
+// Destructively modify string, converting newlines to
+// spaces or removing them if they're adjacent to spaces,
+// then removing a single leading + trailing space.
+static void S_normalize_code(cmark_strbuf *s) {
+  bool last_char_was_space = false;
+  bufsize_t r, w;
+
+  for (r = 0, w = 0; r < s->size; ++r) {
+    switch (s->ptr[r]) {
+    case '\r':
+      break;
+    case '\n':
+      if (!last_char_was_space && !cmark_isspace(s->ptr[r + 1])) {
+        s->ptr[w++] = ' ';
+        last_char_was_space = true;
+      } else {
+        last_char_was_space = false;
+      }
+      break;
+    default:
+      s->ptr[w++] = s->ptr[r];
+      last_char_was_space = (s->ptr[r] == ' ');
+    }
+  }
+
+  // begins and ends with space?
+  if (s->ptr[0] == ' ' && s->ptr[w - 1] == ' ') {
+    cmark_strbuf_drop(s, 1);
+    cmark_strbuf_truncate(s, w - 2);
+  } else {
+    cmark_strbuf_truncate(s, w);
+  }
+
+}
+
+
 // Parse backtick code section or raw backticks, return an inline.
 // Assumes that the subject has a backtick at the current position.
 static cmark_node *handle_backticks(subject *subj, int options) {
@@ -338,8 +374,7 @@ static cmark_node *handle_backticks(subject *subj, int options) {
 
     cmark_strbuf_set(&buf, subj->input.data + startpos,
                      endpos - startpos - openticks.len);
-    cmark_strbuf_trim(&buf);
-    cmark_strbuf_normalize_whitespace(&buf);
+    S_normalize_code(&buf);
 
     cmark_node *node = make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf));
     adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len, options);
@@ -347,6 +382,7 @@ static cmark_node *handle_backticks(subject *subj, int options) {
   }
 }
 
+
 // Scan ***, **, or * and return number scanned, or 0.
 // Advances position.
 static int scan_delims(subject *subj, unsigned char c, bool *can_open,