cmark

My personal build of CMark ✏️

Commit
c41bf11bb38ef513fa53f88b2c80afd1504aaeaf
Parent
5c20df20af6be9444f27a8c1bbfa5b027a1fa8d8
Author
John MacFarlane <jgm@berkeley.edu>
Date

Rewrote HTML renderer using cmark_walk.

This version is shorter, more readable, and more regular. It should serve as a template for creating new writers.

Performance is the same. All tests pass.

Diffstat

2 files changed, 180 insertions, 248 deletions

Status File Name N° Changes Insertions Deletions
Modified src/html.c 418 172 246
Modified src/node.c 10 8 2
diff --git a/src/html.c b/src/html.c
@@ -11,9 +11,6 @@
 
 // Functions to convert cmark_nodes to HTML strings.
 
-static bool
-finish_node(strbuf *html, cmark_node *node, bool tight);
-
 static void escape_html(strbuf *dest, const unsigned char *source, int length)
 {
 	if (length < 0)
@@ -36,322 +33,251 @@ static inline void cr(strbuf *html)
 		strbuf_putc(html, '\n');
 }
 
-// Convert the inline children of a node to a plain string.
-static void inlines_to_plain_html(strbuf *html, cmark_node* node)
+struct render_state {
+	strbuf* html;
+	cmark_node *plain;
+};
+
+static int
+S_render_node(cmark_node *node, int entering, void *vstate)
 {
-	cmark_node* cur = node->first_child;
+	struct render_state *state = vstate;
+	cmark_node *parent;
+	cmark_node *grandparent;
+	strbuf *html = state->html;
+	char start_header[] = "<h0>";
+	char end_header[] = "</h0>";
+	strbuf *info;
+	bool tight;
 
-	if (cur == NULL) {
-		return;
+	if (state->plain == node) { // back at original node
+		state->plain = NULL;
 	}
 
-	while (true) {
-		switch(cur->type) {
-		case NODE_TEXT:
-		case NODE_INLINE_CODE:
-		case NODE_INLINE_HTML:
-			escape_html(html, cur->as.literal.data, cur->as.literal.len);
+	if (state->plain != NULL) {
+		switch(node->type) {
+		case CMARK_NODE_TEXT:
+		case CMARK_NODE_INLINE_CODE:
+		case CMARK_NODE_INLINE_HTML:
+			escape_html(html, node->as.literal.data,
+				    node->as.literal.len);
 			break;
 
-		case NODE_LINEBREAK:
-		case NODE_SOFTBREAK:
+		case CMARK_NODE_LINEBREAK:
+		case CMARK_NODE_SOFTBREAK:
 			strbuf_putc(html, ' ');
 			break;
 
 		default:
 			break;
 		}
-
-		if (cur->first_child) {
-			cur = cur->first_child;
-			continue;
-		}
-
-	next_sibling:
-		if (cur->next) {
-			cur = cur->next;
-			continue;
-		}
-		cur = cur->parent;
-		if (cur == node) {
-			break;
-		}
-		goto next_sibling;
-	}
-}
-
-
-// Convert a cmark_node to HTML.
-static void node_to_html(strbuf *html, cmark_node *node)
-{
-	cmark_node *cur;
-	char start_header[] = "<h0>";
-	bool tight = false;
-	bool visit_children;
-	strbuf *info;
-
-	if (node == NULL) {
-		return;
+		return 1;
 	}
 
-	cur = node;
-	while (true) {
-		// Only NODE_IMAGE wants to skip its children.
-		visit_children = true;
-
-		switch(cur->type) {
-		case NODE_DOCUMENT:
-			break;
-
-		case NODE_PARAGRAPH:
-			if (!tight) {
-				cr(html);
-				strbuf_puts(html, "<p>");
-			}
-			break;
-
-		case NODE_BLOCK_QUOTE:
+	switch (node->type) {
+	case CMARK_NODE_BLOCK_QUOTE:
+		if (entering) {
 			cr(html);
 			strbuf_puts(html, "<blockquote>\n");
-			// BLOCK_QUOTE doesn't use any of the 'as' structs,
-			// so the 'list' member can be used to store the
-			// current value of 'tight'.
-			cur->as.list.tight = tight;
-			tight = false;
-			break;
-
-		case NODE_LIST_ITEM:
+		} else {
 			cr(html);
-			strbuf_puts(html, "<li>");
-			break;
+			strbuf_puts(html, "</blockquote>\n");
+		}
+		break;
 
-		case NODE_LIST: {
-			cmark_list *list = &cur->as.list;
-			bool tmp;
+	case CMARK_NODE_LIST: {
+		cmark_list_type list_type = node->as.list.list_type;
+		int start = node->as.list.start;
 
-			// make sure a list starts at the beginning of the line:
+		if (entering) {
 			cr(html);
-
-			if (list->list_type == CMARK_BULLET_LIST) {
+			if (list_type == CMARK_BULLET_LIST) {
 				strbuf_puts(html, "<ul>\n");
 			}
-			else if (list->start == 1) {
+			else if (start == 1) {
 				strbuf_puts(html, "<ol>\n");
 			}
 			else {
 				strbuf_printf(html, "<ol start=\"%d\">\n",
-					      list->start);
+					      start);
 			}
-
-			// Store the current value of 'tight' by swapping.
-			tmp = list->tight;
-			list->tight = tight;
-			tight = tmp;
-			break;
+		} else {
+			strbuf_puts(html,
+				    list_type == CMARK_BULLET_LIST ?
+				    "</ul>\n" : "</ol>\n");
 		}
+		break;
+	}
 
-		case NODE_HEADER:
-			cr(html);
-			start_header[2] = '0' + cur->as.header.level;
-			strbuf_puts(html, start_header);
-			break;
-
-		case NODE_CODE_BLOCK:
-			info = &cur->as.code.info;
-			cr(html);
-
-			if (&cur->as.code.fence_length == 0
-			    || strbuf_len(info) == 0) {
-				strbuf_puts(html, "<pre><code>");
-			}
-			else {
-				int first_tag = strbuf_strchr(info, ' ', 0);
-				if (first_tag < 0)
-					first_tag = strbuf_len(info);
-
-				strbuf_puts(html,
-					    "<pre><code class=\"language-");
-				escape_html(html, info->ptr, first_tag);
-				strbuf_puts(html, "\">");
-			}
-
-			escape_html(html, cur->string_content.ptr, cur->string_content.size);
-			break;
-
-		case NODE_HTML:
+	case CMARK_NODE_LIST_ITEM:
+		if (entering) {
 			cr(html);
-			strbuf_put(html, cur->string_content.ptr, cur->string_content.size);
-			break;
+			strbuf_puts(html, "<li>");
+		} else {
+			strbuf_puts(html, "</li>\n");
+		}
+		break;
 
-		case NODE_HRULE:
+	case CMARK_NODE_HEADER:
+		if (entering) {
 			cr(html);
-			strbuf_puts(html, "<hr />\n");
-			break;
-
-		case NODE_REFERENCE_DEF:
-			break;
-
-		case NODE_TEXT:
-			escape_html(html, cur->as.literal.data, cur->as.literal.len);
-			break;
-
-		case NODE_LINEBREAK:
-			strbuf_puts(html, "<br />\n");
-			break;
-
-		case NODE_SOFTBREAK:
+			start_header[2] = '0' + node->as.header.level;
+			strbuf_puts(html, start_header);
+		} else {
+			end_header[3] = '0' + node->as.header.level;
+			strbuf_puts(html, end_header);
 			strbuf_putc(html, '\n');
-			break;
-
-		case NODE_INLINE_CODE:
-			strbuf_puts(html, "<code>");
-			escape_html(html, cur->as.literal.data, cur->as.literal.len);
-			break;
-
-		case NODE_INLINE_HTML:
-			strbuf_put(html,
-				   cur->as.literal.data,
-				   cur->as.literal.len);
-			break;
+		}
+		break;
 
-		case NODE_LINK:
-			strbuf_puts(html, "<a href=\"");
-			if (cur->as.link.url)
-				escape_href(html, cur->as.link.url, -1);
+	case CMARK_NODE_CODE_BLOCK:
+		info = &node->as.code.info;
+		cr(html);
 
-			if (cur->as.link.title) {
-				strbuf_puts(html, "\" title=\"");
-				escape_html(html, cur->as.link.title, -1);
-			}
+		if (&node->as.code.fence_length == 0
+		    || strbuf_len(info) == 0) {
+			strbuf_puts(html, "<pre><code>");
+		}
+		else {
+			int first_tag = strbuf_strchr(info, ' ', 0);
+			if (first_tag < 0)
+				first_tag = strbuf_len(info);
 
+			strbuf_puts(html, "<pre><code class=\"language-");
+			escape_html(html, info->ptr, first_tag);
 			strbuf_puts(html, "\">");
-			break;
-
-		case NODE_IMAGE:
-			strbuf_puts(html, "<img src=\"");
-			if (cur->as.link.url)
-				escape_href(html, cur->as.link.url, -1);
-
-			strbuf_puts(html, "\" alt=\"");
-			inlines_to_plain_html(html, cur);
-
-			if (cur->as.link.title) {
-				strbuf_puts(html, "\" title=\"");
-				escape_html(html, cur->as.link.title, -1);
-			}
-
-			strbuf_puts(html, "\" />");
-			visit_children = false;
-			break;
+		}
 
-		case NODE_STRONG:
-			strbuf_puts(html, "<strong>");
-			break;
+		escape_html(html, node->string_content.ptr, node->string_content.size);
+		strbuf_puts(html, "</code></pre>\n");
+		break;
 
-		case NODE_EMPH:
-			strbuf_puts(html, "<em>");
-			break;
+	case CMARK_NODE_HTML:
+		cr(html);
+		strbuf_put(html, node->string_content.ptr,
+			   node->string_content.size);
+		break;
 
-		default:
-			assert(false);
-		}
+	case CMARK_NODE_HRULE:
+		cr(html);
+		strbuf_puts(html, "<hr />\n");
+		break;
 
-		if (visit_children && cur->first_child) {
-			cur = cur->first_child;
-			continue;
-		}
+	case CMARK_NODE_REFERENCE_DEF:
+		break;
 
-	next_sibling:
-		tight = finish_node(html, cur, tight);
-		if (cur == node) {
-			break;
-		}
-		if (cur->next) {
-			cur = cur->next;
-			continue;
+	case CMARK_NODE_PARAGRAPH:
+		parent = cmark_node_parent(node);
+		grandparent = cmark_node_parent(parent);
+		if (grandparent != NULL &&
+		    grandparent->type == CMARK_NODE_LIST) {
+			tight = grandparent->as.list.tight;
+		} else {
+			tight = false;
 		}
-		cur = cur->parent;
-		goto next_sibling;
-	}
-}
-
-// Returns the restored value of 'tight'.
-static bool
-finish_node(strbuf *html, cmark_node *node, bool tight)
-{
-	char end_header[] = "</h0>\n";
-
-	switch (node->type) {
-	case NODE_PARAGRAPH:
 		if (!tight) {
-			strbuf_puts(html, "</p>\n");
+			if (entering) {
+				cr(html);
+				strbuf_puts(html, "<p>");
+			} else {
+				strbuf_puts(html, "</p>\n");
+			}
 		}
 		break;
 
-	case NODE_BLOCK_QUOTE: {
-		cmark_list *list = &node->as.list;
-		strbuf_puts(html, "</blockquote>\n");
-		// Restore old 'tight' value.
-		tight = list->tight;
-		list->tight = false;
+	case CMARK_NODE_TEXT:
+		escape_html(html, node->as.literal.data,
+			    node->as.literal.len);
 		break;
-	}
 
-	case NODE_LIST_ITEM:
-		strbuf_puts(html, "</li>\n");
+	case CMARK_NODE_LINEBREAK:
+		strbuf_puts(html, "<br />\n");
 		break;
 
-	case NODE_LIST: {
-		cmark_list *list = &node->as.list;
-		bool tmp;
-		strbuf_puts(html,
-			    list->list_type == CMARK_BULLET_LIST ?
-			    "</ul>\n" : "</ol>\n");
-		// Restore old 'tight' value.
-		tmp = tight;
-		tight = list->tight;
-		list->tight = tmp;
+	case CMARK_NODE_SOFTBREAK:
+		strbuf_putc(html, '\n');
 		break;
-	}
 
-	case NODE_HEADER:
-		end_header[3] = '0' + node->as.header.level;
-		strbuf_puts(html, end_header);
+	case CMARK_NODE_INLINE_CODE:
+		strbuf_puts(html, "<code>");
+		escape_html(html, node->as.literal.data, node->as.literal.len);
+		strbuf_puts(html, "</code>");
 		break;
 
-	case NODE_CODE_BLOCK:
-		strbuf_puts(html, "</code></pre>\n");
+	case CMARK_NODE_INLINE_HTML:
+		strbuf_put(html, node->as.literal.data, node->as.literal.len);
 		break;
 
-	case NODE_INLINE_CODE:
-		strbuf_puts(html, "</code>");
+	case CMARK_NODE_STRONG:
+		if (entering) {
+			strbuf_puts(html, "<strong>");
+		} else {
+			strbuf_puts(html, "</strong>");
+		}
 		break;
 
-	case NODE_LINK:
-		strbuf_puts(html, "</a>");
+	case CMARK_NODE_EMPH:
+		if (entering) {
+			strbuf_puts(html, "<em>");
+		} else {
+			strbuf_puts(html, "</em>");
+		}
 		break;
 
-	case NODE_STRONG:
-		strbuf_puts(html, "</strong>");
+	case CMARK_NODE_LINK:
+		if (entering) {
+			strbuf_puts(html, "<a href=\"");
+			if (node->as.link.url)
+				escape_href(html, node->as.link.url, -1);
+
+			if (node->as.link.title) {
+				strbuf_puts(html, "\" title=\"");
+				escape_html(html, node->as.link.title, -1);
+			}
+
+			strbuf_puts(html, "\">");
+		} else {
+			strbuf_puts(html, "</a>");
+		}
 		break;
 
-	case NODE_EMPH:
-		strbuf_puts(html, "</em>");
+	case CMARK_NODE_IMAGE:
+		if (entering) {
+			strbuf_puts(html, "<img src=\"");
+			if (node->as.link.url)
+				escape_href(html, node->as.link.url, -1);
+
+			strbuf_puts(html, "\" alt=\"");
+			state->plain = node;
+		} else {
+			if (node->as.link.title) {
+				strbuf_puts(html, "\" title=\"");
+				escape_html(html, node->as.link.title, -1);
+			}
+
+			strbuf_puts(html, "\" />");
+		}
 		break;
 
 	default:
+		assert(false);
 		break;
 	}
 
-	return tight;
+	// strbuf_putc(html, 'x');
+	return 1;
 }
 
 char *cmark_render_html(cmark_node *root)
 {
 	char *result;
 	strbuf html = GH_BUF_INIT;
-	node_to_html(&html, root);
-	result = (char *)strbuf_detach(&html);
-	strbuf_free(&html);
-	return result;
+	struct render_state state = { &html, NULL };
+	if (cmark_walk(root, S_render_node, &state)) {
+		result = (char *)strbuf_detach(&html);
+		strbuf_free(&html);
+		return result;
+	} else {
+		return NULL;
+	}
 }
diff --git a/src/node.c b/src/node.c
@@ -773,6 +773,7 @@ int S_is_leaf_node(cmark_node *current_node)
 	switch (cmark_node_get_type(current_node)) {
 	case CMARK_NODE_HTML:
 	case CMARK_NODE_HRULE:
+	case CMARK_NODE_CODE_BLOCK:
 	case CMARK_NODE_REFERENCE_DEF:
 	case CMARK_NODE_TEXT:
 	case CMARK_NODE_SOFTBREAK:
@@ -815,8 +816,13 @@ int cmark_walk(cmark_node *root, cmark_node_handler handler, void *state)
 				parent = current_node->parent;
 			}
 			if (next) {
-				begin = 1;
-				current_node = next;
+				// don't go past root:
+				if (current_node == root) {
+					return 1;
+				} else {
+					begin = 1;
+					current_node = next;
+				}
 			} else {
 				begin = 0;
 				depth -= 1;