cmark

My personal build of CMark ✏️

Commit
4570eb2bff2e1b71fa5b6408abbc69c98ff5ff24
Parent
a71423f6ee1b77d9f79d42599ea00b4ca99f5da0
Author
John MacFarlane <jgm@berkeley.edu>
Date

Revert "Remove distinction btw atx and setext header in AST."

This reverts commit a71423f6ee1b77d9f79d42599ea00b4ca99f5da0.

Not quite sure about this change, so reverting for now. Note that we still have a distinction between fenced and indented code blocks in the AST. These two distinctions seem to stand or fall together.

Diffstat

9 files changed, 68 insertions, 37 deletions

Status File Name N° Changes Insertions Deletions
Modified api_test/main.c 12 8 4
Modified commonmark.rb 14 11 3
Modified js/lib/blocks.js 20 12 8
Modified js/lib/html-renderer.js 3 2 1
Modified src/blocks.c 23 12 11
Modified src/cmark.h 6 4 2
Modified src/html/html.c 6 4 2
Modified src/node.c 15 10 5
Modified src/print.c 6 5 1
diff --git a/api_test/main.c b/api_test/main.c
@@ -17,7 +17,8 @@ static const cmark_node_type node_types[] = {
 	CMARK_NODE_INDENTED_CODE,
 	CMARK_NODE_HTML,
 	CMARK_NODE_PARAGRAPH,
-	CMARK_NODE_HEADER,
+	CMARK_NODE_ATX_HEADER,
+	CMARK_NODE_SETEXT_HEADER,
 	CMARK_NODE_HRULE,
 	CMARK_NODE_REFERENCE_DEF,
 	CMARK_NODE_STRING,
@@ -47,7 +48,8 @@ constructor(test_batch_runner *runner)
 		       "get_type %d", type);
 
 		switch (node->type) {
-		case CMARK_NODE_HEADER:
+		case CMARK_NODE_ATX_HEADER:
+		case CMARK_NODE_SETEXT_HEADER:
 			INT_EQ(runner, cmark_node_get_header_level(node), 1,
 			       "default header level is 1");
 			node->as.header.level = 1;
@@ -378,7 +380,8 @@ hierarchy(test_batch_runner *runner)
 		(1 << CMARK_NODE_INDENTED_CODE) |
 		(1 << CMARK_NODE_HTML) |
 		(1 << CMARK_NODE_PARAGRAPH) |
-		(1 << CMARK_NODE_HEADER) |
+		(1 << CMARK_NODE_ATX_HEADER) |
+		(1 << CMARK_NODE_SETEXT_HEADER) |
 		(1 << CMARK_NODE_HRULE) |
 		(1 << CMARK_NODE_REFERENCE_DEF);
 	int all_inlines =
@@ -400,7 +403,8 @@ hierarchy(test_batch_runner *runner)
 	test_content(runner, CMARK_NODE_INDENTED_CODE, 0);
 	test_content(runner, CMARK_NODE_HTML,          0);
 	test_content(runner, CMARK_NODE_PARAGRAPH,     all_inlines);
-	test_content(runner, CMARK_NODE_HEADER,        all_inlines);
+	test_content(runner, CMARK_NODE_ATX_HEADER,    all_inlines);
+	test_content(runner, CMARK_NODE_SETEXT_HEADER, all_inlines);
 	test_content(runner, CMARK_NODE_HRULE,         0);
 	test_content(runner, CMARK_NODE_REFERENCE_DEF, 0);
 	test_content(runner, CMARK_NODE_STRING,        0);
diff --git a/commonmark.rb b/commonmark.rb
@@ -11,7 +11,7 @@ module CMark
   typedef :pointer, :node
   enum :node_type, [:document, :blockquote, :list, :list_item,
                     :fenced_code, :indented_code, :html, :paragraph,
-                    :header, :hrule, :reference_def,
+                    :atx_header, :setext_header, :hrule, :reference_def,
                     :str, :softbreak, :linebreak, :code, :inline_html,
                     :emph, :strong, :link, :image]
   enum :list_type, [:no_list, :bullet_list, :ordered_list]
@@ -55,7 +55,7 @@ class Node
       b = CMark::cmark_node_next(b)
     end
     @string_content = CMark::cmark_node_get_string_content(pointer)
-    if @type == :header
+    if @type == :atx_header || @type == :setext_header
       @header_level = CMark::cmark_node_get_header_level(pointer)
     end
     if @type == :list
@@ -195,6 +195,14 @@ class Renderer
     self.code_block(node)
   end
 
+  def setext_header(node)
+    self.header(node)
+  end
+
+  def atx_header(node)
+    self.header(node)
+  end
+
   def reference_def(node)
   end
 
@@ -367,7 +375,7 @@ end
 
 # Capitalize strings in headers
 doc.walk do |node|
-  if node.type == :header
+  if node.type == :setext_header or node.type == :atx_header
     node.walk do |subnode|
       if subnode.type == :str
         subnode.string_content = subnode.string_content.upcase
diff --git a/js/lib/blocks.js b/js/lib/blocks.js
@@ -261,7 +261,8 @@ var incorporateLine = function(ln, line_number) {
             }
             break;
 
-        case 'Header':
+        case 'ATXHeader':
+        case 'SetextHeader':
         case 'HorizontalRule':
             // a header can never container > 1 line, so fail to match:
             all_matched = false;
@@ -365,7 +366,7 @@ var incorporateLine = function(ln, line_number) {
             // ATX header
             offset = first_nonspace + match[0].length;
             closeUnmatchedBlocks(this);
-            container = this.addChild('Header', line_number, first_nonspace);
+            container = this.addChild('ATXHeader', line_number, first_nonspace);
             container.level = match[0].trim().length; // number of #s
             // remove trailing ###s:
             container.strings =
@@ -395,7 +396,7 @@ var incorporateLine = function(ln, line_number) {
                    ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) {
             // setext header line
             closeUnmatchedBlocks(this);
-            container.t = 'Header'; // convert Paragraph to SetextHeader
+            container.t = 'SetextHeader'; // convert Paragraph to SetextHeader
             container.level = match[0][0] === '=' ? 1 : 2;
             offset = ln.length;
 
@@ -468,7 +469,7 @@ var incorporateLine = function(ln, line_number) {
         // on an empty list item.
         container.last_line_blank = blank &&
             !(container.t == 'BlockQuote' ||
-              container.t == 'Header' ||
+              container.t == 'SetextHeader' ||
               container.t == 'FencedCode' ||
               (container.t == 'ListItem' &&
                container.children.length === 0 &&
@@ -499,7 +500,8 @@ var incorporateLine = function(ln, line_number) {
             }
             break;
 
-        case 'Header':
+        case 'ATXHeader':
+        case 'SetextHeader':
         case 'HorizontalRule':
             // nothing to do; we already added the contents.
             break;
@@ -510,7 +512,7 @@ var incorporateLine = function(ln, line_number) {
             } else if (blank) {
                 // do nothing
             } else if (container.t != 'HorizontalRule' &&
-                       container.t != 'Header') {
+                       container.t != 'SetextHeader') {
                 // create paragraph container for line
                 container = this.addChild('Paragraph', line_number, first_nonspace);
                 this.addLine(ln, first_nonspace);
@@ -559,7 +561,8 @@ var finalize = function(block, line_number) {
         }
         break;
 
-    case 'Header':
+    case 'ATXHeader':
+    case 'SetextHeader':
     case 'HtmlBlock':
         block.string_content = block.strings.join('\n');
         break;
@@ -629,7 +632,8 @@ var processInlines = function(block) {
         newblock.inline_content =
             this.inlineParser.parse(block.string_content.trim(), this.refmap);
         break;
-    case 'Header':
+    case 'SetextHeader':
+    case 'ATXHeader':
         newblock.inline_content =
             this.inlineParser.parse(block.string_content.trim(), this.refmap);
         newblock.level = block.level;
diff --git a/js/lib/html-renderer.js b/js/lib/html-renderer.js
@@ -103,7 +103,8 @@ var renderBlock = function(block, in_tight_list) {
         return inTags(tag, attr, this.innersep +
                       this.renderBlocks(block.children, block.tight) +
                       this.innersep);
-    case 'Header':
+    case 'ATXHeader':
+    case 'SetextHeader':
         tag = 'h' + block.level;
         return inTags(tag, [], this.renderInlines(block.inline_content));
     case 'IndentedCode':
diff --git a/src/blocks.c b/src/blocks.c
@@ -97,9 +97,9 @@ static inline bool can_contain(cmark_node_type parent_type, cmark_node_type chil
 static inline bool accepts_lines(cmark_node_type block_type)
 {
 	return (block_type == NODE_PARAGRAPH ||
-		block_type == NODE_HEADER ||
-		block_type == NODE_INDENTED_CODE ||
-		block_type == NODE_FENCED_CODE);
+			block_type == NODE_ATX_HEADER ||
+			block_type == NODE_INDENTED_CODE ||
+			block_type == NODE_FENCED_CODE);
 }
 
 static void add_line(cmark_node* cmark_node, chunk *ch, int offset)
@@ -293,7 +293,8 @@ static void process_inlines(cmark_node* cur, reference_map *refmap)
 	while (cur != NULL) {
 		switch (cur->type) {
 			case NODE_PARAGRAPH:
-			case NODE_HEADER:
+			case NODE_ATX_HEADER:
+			case NODE_SETEXT_HEADER:
 				parse_inlines(cur, refmap);
 				break;
 
@@ -547,7 +548,8 @@ void cmark_process_line(cmark_doc_parser *parser, const char *buffer,
 				all_matched = false;
 			}
 
-		} else if (container->type == NODE_HEADER) {
+		} else if (container->type == NODE_ATX_HEADER ||
+				container->type == NODE_SETEXT_HEADER) {
 
 			// a header can never contain more than one line
 			all_matched = false;
@@ -623,7 +625,7 @@ void cmark_process_line(cmark_doc_parser *parser, const char *buffer,
 		} else if ((matched = scan_atx_header_start(&input, first_nonspace))) {
 
 			offset = first_nonspace + matched;
-			container = add_child(parser, container, NODE_HEADER, parser->line_number, offset + 1);
+			container = add_child(parser, container, NODE_ATX_HEADER, parser->line_number, offset + 1);
 
 			int hashpos = chunk_strchr(&input, '#', first_nonspace);
 			int level = 0;
@@ -653,7 +655,7 @@ void cmark_process_line(cmark_doc_parser *parser, const char *buffer,
 				strbuf_strrchr(&container->string_content, '\n',
 					strbuf_len(&container->string_content) - 2) < 0) {
 
-			container->type = NODE_HEADER;
+			container->type = NODE_SETEXT_HEADER;
 			container->as.header.level = lev;
 			offset = input.len - 1;
 
@@ -730,7 +732,7 @@ void cmark_process_line(cmark_doc_parser *parser, const char *buffer,
 	// on an empty list item.
 	container->last_line_blank = (blank &&
 			container->type != NODE_BLOCK_QUOTE &&
-			container->type != NODE_HEADER &&
+			container->type != NODE_SETEXT_HEADER &&
 			container->type != NODE_FENCED_CODE &&
 			!(container->type == NODE_LIST_ITEM &&
 				container->first_child == NULL &&
@@ -789,7 +791,7 @@ void cmark_process_line(cmark_doc_parser *parser, const char *buffer,
 
 			// ??? do nothing
 
-		} else if (container->type == NODE_HEADER) {
+		} else if (container->type == NODE_ATX_HEADER) {
 
 			chop_trailing_hashtags(&input);
 			add_line(container, &input, first_nonspace);
@@ -800,8 +802,7 @@ void cmark_process_line(cmark_doc_parser *parser, const char *buffer,
 
 			add_line(container, &input, first_nonspace);
 
-		} else if (container->type != NODE_HRULE &&
-			   container->type != NODE_HEADER) {
+		} else if (container->type != NODE_HRULE && container->type != NODE_SETEXT_HEADER) {
 
 			// create paragraph container for line
 			container = add_child(parser, container, NODE_PARAGRAPH, parser->line_number, first_nonspace + 1);
diff --git a/src/cmark.h b/src/cmark.h
@@ -20,7 +20,8 @@ typedef enum {
     CMARK_NODE_INDENTED_CODE,
     CMARK_NODE_HTML,
     CMARK_NODE_PARAGRAPH,
-    CMARK_NODE_HEADER,
+    CMARK_NODE_ATX_HEADER,
+    CMARK_NODE_SETEXT_HEADER,
     CMARK_NODE_HRULE,
     CMARK_NODE_REFERENCE_DEF,
 
@@ -203,7 +204,8 @@ char *cmark_markdown_to_html(const char *text, int len);
   #define NODE_INDENTED_CODE        CMARK_NODE_INDENTED_CODE
   #define NODE_HTML                 CMARK_NODE_HTML
   #define NODE_PARAGRAPH            CMARK_NODE_PARAGRAPH
-  #define NODE_HEADER		    CMARK_NODE_HEADER
+  #define NODE_ATX_HEADER           CMARK_NODE_ATX_HEADER
+  #define NODE_SETEXT_HEADER        CMARK_NODE_SETEXT_HEADER
   #define NODE_HRULE                CMARK_NODE_HRULE
   #define NODE_REFERENCE_DEF        CMARK_NODE_REFERENCE_DEF
   #define NODE_STRING               CMARK_NODE_STRING
diff --git a/src/html/html.c b/src/html/html.c
@@ -149,7 +149,8 @@ static void node_to_html(strbuf *html, cmark_node *node)
 			break;
 		}
 
-		case NODE_HEADER:
+		case NODE_ATX_HEADER:
+		case NODE_SETEXT_HEADER:
 			cr(html);
 			start_header[2] = '0' + cur->as.header.level;
 			strbuf_puts(html, start_header);
@@ -315,7 +316,8 @@ finish_node(strbuf *html, cmark_node *node, bool tight)
 		break;
 	}
 
-	case NODE_HEADER:
+	case NODE_ATX_HEADER:
+	case NODE_SETEXT_HEADER:
 		end_header[3] = '0' + node->as.header.level;
 		strbuf_puts(html, end_header);
 		break;
diff --git a/src/node.c b/src/node.c
@@ -13,7 +13,8 @@ cmark_node_new(cmark_node_type type) {
 	node->type = type;
 
 	switch (node->type) {
-	case CMARK_NODE_HEADER:
+	case CMARK_NODE_ATX_HEADER:
+	case CMARK_NODE_SETEXT_HEADER:
 		node->as.header.level = 1;
 		break;
 
@@ -57,7 +58,8 @@ S_type_string(cmark_node *node)
 	case CMARK_NODE_INDENTED_CODE: return "INDENTED_CODE";
 	case CMARK_NODE_HTML:          return "HTML";
 	case CMARK_NODE_PARAGRAPH:     return "PARAGRAPH";
-	case CMARK_NODE_HEADER:	       return "HEADER";
+	case CMARK_NODE_ATX_HEADER:    return "ATX_HEADER";
+	case CMARK_NODE_SETEXT_HEADER: return "SETEXT_HEADER";
 	case CMARK_NODE_HRULE:         return "HRULE";
 	case CMARK_NODE_REFERENCE_DEF: return "REFERENCE_DEF";
 	case CMARK_NODE_STRING:        return "STRING";
@@ -157,7 +159,8 @@ cmark_node_set_string_content(cmark_node *node, const char *content) {
 int
 cmark_node_get_header_level(cmark_node *node) {
 	switch (node->type) {
-	case CMARK_NODE_HEADER:
+	case CMARK_NODE_ATX_HEADER:
+	case CMARK_NODE_SETEXT_HEADER:
 		return node->as.header.level;
 
 	default:
@@ -174,7 +177,8 @@ cmark_node_set_header_level(cmark_node *node, int level) {
 	}
 
 	switch (node->type) {
-	case CMARK_NODE_HEADER:
+	case CMARK_NODE_ATX_HEADER:
+	case CMARK_NODE_SETEXT_HEADER:
 		node->as.header.level = level;
 		return 1;
 
@@ -389,7 +393,8 @@ S_can_contain(cmark_node *node, cmark_node *child)
 		return child->type == CMARK_NODE_LIST_ITEM;
 
 	case CMARK_NODE_PARAGRAPH:
-	case CMARK_NODE_HEADER:
+	case CMARK_NODE_ATX_HEADER:
+	case CMARK_NODE_SETEXT_HEADER:
 	case CMARK_NODE_EMPH:
 	case CMARK_NODE_STRONG:
 	case CMARK_NODE_LINK:
diff --git a/src/print.c b/src/print.c
@@ -132,7 +132,11 @@ static void print_blocks(cmark_node* b, int indent)
 			}
 			print_blocks(b->first_child, indent + 2);
 			break;
-		case NODE_HEADER:
+		case NODE_ATX_HEADER:
+			printf("atx_header (level=%d)\n", b->as.header.level);
+			print_inlines(b->first_child, indent + 2);
+			break;
+		case NODE_SETEXT_HEADER:
 			printf("setext_header (level=%d)\n", b->as.header.level);
 			print_inlines(b->first_child, indent + 2);
 			break;