cmark

My personal build of CMark ✏️

Commit
1b6a4ce8ab921ddc98581abd395428e2cadd0c22
Parent
8ba087276c6cae9e1efde656ae973b4f714c88be
Author
John MacFarlane <jgm@berkeley.edu>
Date

Do not distinguish btw fenced and indented code in AST.

Use a single CMARK_NODE_CODE_BLOCK tag for both. Distinguish them when needed for parsing by looking at the fence_length attribute, which is 0 for indented blocks.

Diffstat

7 files changed, 68 insertions, 80 deletions

Status File Name N° Changes Insertions Deletions
Modified api_test/main.c 9 3 6
Modified commonmark.rb 8 2 6
Modified src/blocks.c 88 48 40
Modified src/cmark.h 6 2 4
Modified src/html/html.c 12 5 7
Modified src/node.c 15 6 9
Modified src/print.c 10 2 8
diff --git a/api_test/main.c b/api_test/main.c
@@ -13,8 +13,7 @@ static const cmark_node_type node_types[] = {
 	CMARK_NODE_BLOCK_QUOTE,
 	CMARK_NODE_LIST,
 	CMARK_NODE_LIST_ITEM,
-	CMARK_NODE_FENCED_CODE,
-	CMARK_NODE_INDENTED_CODE,
+	CMARK_NODE_CODE_BLOCK,
 	CMARK_NODE_HTML,
 	CMARK_NODE_PARAGRAPH,
 	CMARK_NODE_HEADER,
@@ -374,8 +373,7 @@ hierarchy(test_batch_runner *runner)
 	int top_level_blocks =
 		(1 << CMARK_NODE_BLOCK_QUOTE) |
 		(1 << CMARK_NODE_LIST) |
-		(1 << CMARK_NODE_FENCED_CODE) |
-		(1 << CMARK_NODE_INDENTED_CODE) |
+		(1 << CMARK_NODE_CODE_BLOCK) |
 		(1 << CMARK_NODE_HTML) |
 		(1 << CMARK_NODE_PARAGRAPH) |
 		(1 << CMARK_NODE_HEADER) |
@@ -396,8 +394,7 @@ hierarchy(test_batch_runner *runner)
 	test_content(runner, CMARK_NODE_BLOCK_QUOTE,   top_level_blocks);
 	test_content(runner, CMARK_NODE_LIST,          list_item_flag);
 	test_content(runner, CMARK_NODE_LIST_ITEM,     top_level_blocks);
-	test_content(runner, CMARK_NODE_FENCED_CODE,   0);
-	test_content(runner, CMARK_NODE_INDENTED_CODE, 0);
+	test_content(runner, CMARK_NODE_CODE_BLOCK ,   0);
 	test_content(runner, CMARK_NODE_HTML,          0);
 	test_content(runner, CMARK_NODE_PARAGRAPH,     all_inlines);
 	test_content(runner, CMARK_NODE_HEADER,        all_inlines);
diff --git a/commonmark.rb b/commonmark.rb
@@ -10,7 +10,7 @@ module CMark
   ffi_lib ['libcmark', 'cmark']
   typedef :pointer, :node
   enum :node_type, [:document, :blockquote, :list, :list_item,
-                    :fenced_code, :indented_code, :html, :paragraph,
+                    :code_block, :html, :paragraph,
                     :header, :hrule, :reference_def,
                     :str, :softbreak, :linebreak, :code, :inline_html,
                     :emph, :strong, :link, :image]
@@ -187,11 +187,7 @@ class Renderer
     self.out(node.children)
   end
 
-  def indented_code(node)
-    self.code_block(node)
-  end
-
-  def fenced_code(node)
+  def code_block(node)
     self.code_block(node)
   end
 
diff --git a/src/blocks.c b/src/blocks.c
@@ -98,8 +98,7 @@ static inline bool accepts_lines(cmark_node_type block_type)
 {
 	return (block_type == NODE_PARAGRAPH ||
 		block_type == NODE_HEADER ||
-		block_type == NODE_INDENTED_CODE ||
-		block_type == NODE_FENCED_CODE);
+		block_type == NODE_CODE_BLOCK);
 }
 
 static void add_line(cmark_node* cmark_node, chunk *ch, int offset)
@@ -194,27 +193,28 @@ static void finalize(cmark_doc_parser *parser, cmark_node* b, int line_number)
 			}
 			break;
 
-		case NODE_INDENTED_CODE:
-			remove_trailing_blank_lines(&b->string_content);
-			strbuf_putc(&b->string_content, '\n');
-			break;
+		case NODE_CODE_BLOCK:
+			if (b->as.code.fence_length == 0) { // indented code
+				remove_trailing_blank_lines(&b->string_content);
+				strbuf_putc(&b->string_content, '\n');
+				break;
+			} else {
 
-		case NODE_FENCED_CODE:
-			// first line of contents becomes info
-			firstlinelen = strbuf_strchr(&b->string_content, '\n', 0);
+				// first line of contents becomes info
+				firstlinelen = strbuf_strchr(&b->string_content, '\n', 0);
 
-			strbuf_init(&b->as.code.info, 0);
-			houdini_unescape_html_f(
-					&b->as.code.info,
-					b->string_content.ptr,
-					firstlinelen
-					);
+				houdini_unescape_html_f(
+						&b->as.code.info,
+						b->string_content.ptr,
+						firstlinelen
+						);
 
-			strbuf_drop(&b->string_content, firstlinelen + 1);
+				strbuf_drop(&b->string_content, firstlinelen + 1);
 
-			strbuf_trim(&b->as.code.info);
-			strbuf_unescape(&b->as.code.info);
-			break;
+				strbuf_trim(&b->as.code.info);
+				strbuf_unescape(&b->as.code.info);
+				break;
+			}
 
 		case NODE_LIST: // determine tight/loose status
 			b->as.list.tight = true; // tight by default
@@ -537,14 +537,23 @@ void cmark_process_line(cmark_doc_parser *parser, const char *buffer,
 				all_matched = false;
 			}
 
-		} else if (container->type == NODE_INDENTED_CODE) {
+		} else if (container->type == NODE_CODE_BLOCK) {
 
-			if (indent >= CODE_INDENT) {
-				offset += CODE_INDENT;
-			} else if (blank) {
-				offset = first_nonspace;
+			if (container->as.code.fence_length == 0) { // indented
+				if (indent >= CODE_INDENT) {
+					offset += CODE_INDENT;
+				} else if (blank) {
+					offset = first_nonspace;
+				} else {
+					all_matched = false;
+				}
 			} else {
-				all_matched = false;
+				// skip optional spaces of fence offset
+				i = container->as.code.fence_offset;
+				while (i > 0 && peek_at(&input, offset) == ' ') {
+					offset++;
+					i--;
+				}
 			}
 
 		} else if (container->type == NODE_HEADER) {
@@ -555,15 +564,6 @@ void cmark_process_line(cmark_doc_parser *parser, const char *buffer,
 				container->last_line_blank = true;
 			}
 
-		} else if (container->type == NODE_FENCED_CODE) {
-
-			// skip optional spaces of fence offset
-			i = container->as.code.fence_offset;
-			while (i > 0 && peek_at(&input, offset) == ' ') {
-				offset++;
-				i--;
-			}
-
 		} else if (container->type == NODE_HTML) {
 
 			if (blank) {
@@ -594,7 +594,7 @@ void cmark_process_line(cmark_doc_parser *parser, const char *buffer,
 	}
 
 	// unless last matched container is code cmark_node, try new container starts:
-	while (container->type != NODE_FENCED_CODE && container->type != NODE_INDENTED_CODE &&
+	while (container->type != NODE_CODE_BLOCK &&
 			container->type != NODE_HTML) {
 
 		first_nonspace = offset;
@@ -607,7 +607,11 @@ void cmark_process_line(cmark_doc_parser *parser, const char *buffer,
 		if (indent >= CODE_INDENT) {
 			if (cur->type != NODE_PARAGRAPH && !blank) {
 				offset += CODE_INDENT;
-				container = add_child(parser, container, NODE_INDENTED_CODE, parser->line_number, offset + 1);
+				container = add_child(parser, container, NODE_CODE_BLOCK, parser->line_number, offset + 1);
+				container->as.code.fence_char = 0;
+				container->as.code.fence_length = 0;
+				container->as.code.fence_offset = 0;
+				strbuf_init(&container->as.code.info, 0);
 			} else { // indent > 4 in lazy line
 				break;
 			}
@@ -636,10 +640,11 @@ void cmark_process_line(cmark_doc_parser *parser, const char *buffer,
 
 		} else if ((matched = scan_open_code_fence(&input, first_nonspace))) {
 
-			container = add_child(parser, container, NODE_FENCED_CODE, parser->line_number, first_nonspace + 1);
+			container = add_child(parser, container, NODE_CODE_BLOCK, parser->line_number, first_nonspace + 1);
 			container->as.code.fence_char = peek_at(&input, first_nonspace);
 			container->as.code.fence_length = matched;
 			container->as.code.fence_offset = first_nonspace - offset;
+			strbuf_init(&container->as.code.info, 0);
 			offset = first_nonspace + matched;
 
 		} else if ((matched = scan_html_block_tag(&input, first_nonspace))) {
@@ -731,7 +736,8 @@ void cmark_process_line(cmark_doc_parser *parser, const char *buffer,
 	container->last_line_blank = (blank &&
 			container->type != NODE_BLOCK_QUOTE &&
 			container->type != NODE_HEADER &&
-			container->type != NODE_FENCED_CODE &&
+			(container->type != NODE_CODE_BLOCK &&
+			 container->as.code.fence_length != 0) &&
 			!(container->type == NODE_LIST_ITEM &&
 				container->first_child == NULL &&
 				container->start_line == parser->line_number));
@@ -759,11 +765,13 @@ void cmark_process_line(cmark_doc_parser *parser, const char *buffer,
 			assert(cur != NULL);
 		}
 
-		if (container->type == NODE_INDENTED_CODE) {
+		if (container->type == NODE_CODE_BLOCK &&
+		    container->as.code.fence_length == 0) {
 
 			add_line(container, &input, offset);
 
-		} else if (container->type == NODE_FENCED_CODE) {
+		} else if (container->type == NODE_CODE_BLOCK &&
+			   container->as.code.fence_length != 0) {
 			matched = 0;
 
 			if (indent <= 3 &&
diff --git a/src/cmark.h b/src/cmark.h
@@ -16,8 +16,7 @@ typedef enum {
     CMARK_NODE_BLOCK_QUOTE,
     CMARK_NODE_LIST,
     CMARK_NODE_LIST_ITEM,
-    CMARK_NODE_FENCED_CODE,
-    CMARK_NODE_INDENTED_CODE,
+    CMARK_NODE_CODE_BLOCK,
     CMARK_NODE_HTML,
     CMARK_NODE_PARAGRAPH,
     CMARK_NODE_HEADER,
@@ -199,8 +198,7 @@ char *cmark_markdown_to_html(const char *text, int len);
   #define NODE_BLOCK_QUOTE          CMARK_NODE_BLOCK_QUOTE
   #define NODE_LIST                 CMARK_NODE_LIST
   #define NODE_LIST_ITEM            CMARK_NODE_LIST_ITEM
-  #define NODE_FENCED_CODE          CMARK_NODE_FENCED_CODE
-  #define NODE_INDENTED_CODE        CMARK_NODE_INDENTED_CODE
+  #define NODE_CODE_BLOCK           CMARK_NODE_CODE_BLOCK
   #define NODE_HTML                 CMARK_NODE_HTML
   #define NODE_PARAGRAPH            CMARK_NODE_PARAGRAPH
   #define NODE_HEADER		    CMARK_NODE_HEADER
diff --git a/src/html/html.c b/src/html/html.c
@@ -88,6 +88,7 @@ static void node_to_html(strbuf *html, cmark_node *node)
 	char start_header[] = "<h0>";
 	bool tight = false;
 	bool visit_children;
+	strbuf *info;
 
 	if (node == NULL) {
 		return;
@@ -155,12 +156,11 @@ static void node_to_html(strbuf *html, cmark_node *node)
 			strbuf_puts(html, start_header);
 			break;
 
-		case NODE_INDENTED_CODE:
-		case NODE_FENCED_CODE: {
-			strbuf *info = &cur->as.code.info;
+		case NODE_CODE_BLOCK:
+			info = &cur->as.code.info;
 			cr(html);
 
-			if (cur->type != NODE_FENCED_CODE
+			if (&cur->as.code.fence_length == 0
 			    || strbuf_len(info) == 0) {
 				strbuf_puts(html, "<pre><code>");
 			}
@@ -177,7 +177,6 @@ static void node_to_html(strbuf *html, cmark_node *node)
 
 			escape_html(html, cur->string_content.ptr, cur->string_content.size);
 			break;
-		}
 
 		case NODE_HTML:
 			cr(html);
@@ -320,8 +319,7 @@ finish_node(strbuf *html, cmark_node *node, bool tight)
 		strbuf_puts(html, end_header);
 		break;
 
-	case NODE_INDENTED_CODE:
-	case NODE_FENCED_CODE:
+	case NODE_CODE_BLOCK:
 		strbuf_puts(html, "</code></pre>\n");
 		break;
 
diff --git a/src/node.c b/src/node.c
@@ -53,8 +53,7 @@ S_type_string(cmark_node *node)
 	case CMARK_NODE_BLOCK_QUOTE:   return "BLOCK_QUOTE";
 	case CMARK_NODE_LIST:          return "LIST";
 	case CMARK_NODE_LIST_ITEM:     return "LIST_ITEM";
-	case CMARK_NODE_FENCED_CODE:   return "FENCED_CODE";
-	case CMARK_NODE_INDENTED_CODE: return "INDENTED_CODE";
+	case CMARK_NODE_CODE_BLOCK:    return "CODE_BLOCK";
 	case CMARK_NODE_HTML:          return "HTML";
 	case CMARK_NODE_PARAGRAPH:     return "PARAGRAPH";
 	case CMARK_NODE_HEADER:	       return "HEADER";
@@ -115,8 +114,7 @@ S_strdup(const char *str) {
 const char*
 cmark_node_get_string_content(cmark_node *node) {
 	switch (node->type) {
-	case NODE_INDENTED_CODE:
-	case NODE_FENCED_CODE:
+	case NODE_CODE_BLOCK:
 	case NODE_HTML:
 		return cmark_strbuf_cstr(&node->string_content);
 
@@ -135,8 +133,7 @@ cmark_node_get_string_content(cmark_node *node) {
 int
 cmark_node_set_string_content(cmark_node *node, const char *content) {
 	switch (node->type) {
-	case NODE_INDENTED_CODE:
-	case NODE_FENCED_CODE:
+	case NODE_CODE_BLOCK:
 	case NODE_HTML:
 		cmark_strbuf_sets(&node->string_content, content);
 		return 1;
@@ -258,7 +255,7 @@ cmark_node_set_list_tight(cmark_node *node, int tight) {
 
 const char*
 cmark_node_get_fence_info(cmark_node *node) {
-	if (node->type == NODE_FENCED_CODE) {
+	if (node->type == NODE_CODE_BLOCK) {
 		return cmark_strbuf_cstr(&node->as.code.info);
 	}
 	else {
@@ -268,7 +265,7 @@ cmark_node_get_fence_info(cmark_node *node) {
 
 int
 cmark_node_set_fence_info(cmark_node *node, const char *info) {
-	if (node->type == NODE_FENCED_CODE) {
+	if (node->type == NODE_CODE_BLOCK) {
 		cmark_strbuf_sets(&node->as.code.info, info);
 		return 1;
 	}
@@ -622,7 +619,7 @@ void cmark_free_nodes(cmark_node *e)
 	while (e != NULL) {
 		strbuf_free(&e->string_content);
 		switch (e->type){
-		case NODE_FENCED_CODE:
+		case NODE_CODE_BLOCK:
 			strbuf_free(&e->as.code.info);
 			break;
 		case NODE_STRING:
diff --git a/src/print.c b/src/print.c
@@ -143,14 +143,8 @@ static void print_blocks(cmark_node* b, int indent)
 		case NODE_HRULE:
 			printf("hrule\n");
 			break;
-		case NODE_INDENTED_CODE:
-			printf("indented_code ");
-			print_str(b->string_content.ptr, -1);
-			putchar('\n');
-			break;
-		case NODE_FENCED_CODE:
-			printf("fenced_code length=%d info=",
-			       b->as.code.fence_length);
+		case NODE_CODE_BLOCK:
+			printf("code block info=");
 			print_str(b->as.code.info.ptr, -1);
 			putchar(' ');
 			print_str(b->string_content.ptr, -1);