cmark

My personal build of CMark ✏️

Commit
0566fa09cf2369cef3ea6b459f3d4fcf3a27d0fc
Parent
96c7df6a8480b78ddc2540dd85877487af358ceb
Author
John MacFarlane <jgm@berkeley.edu>
Date

Added options parameter to renderers.

To keep the API simple and avoid API changes when new options are added, this is just a long integer.

Set it by disjoining options that are defined as powers of 2: e.g. `CMARK_HTML_SOURCEPOS | CMARK_HTML_HARDREAKS`.

Test options using `&`: `if (options & CMARK_HTML_SOURCEPOS)`.

Added `--hardbreaks` and `--sourcepos` command-line options.

Diffstat

7 files changed, 91 insertions, 34 deletions

Status File Name N° Changes Insertions Deletions
Modified api_test/main.c 12 6 6
Modified src/cmark.c 2 1 1
Modified src/cmark.h 18 15 3
Modified src/html.c 71 54 17
Modified src/main.c 18 13 5
Modified src/man.c 2 1 1
Modified src/xml.c 2 1 1
diff --git a/api_test/main.c b/api_test/main.c
@@ -210,7 +210,7 @@ accessors(test_batch_runner *runner)
 	OK(runner, cmark_node_set_literal(string, "LINK"),
 	   "set_literal string");
 
-	char *rendered_html = cmark_render_html(doc);
+	char *rendered_html = cmark_render_html(doc, CMARK_OPT_DEFAULT);
 	static const char expected_html[] =
 		"<h3>Header</h3>\n"
 		"<ol start=\"3\">\n"
@@ -355,7 +355,7 @@ create_tree(test_batch_runner *runner)
 	OK(runner, cmark_node_append_child(emph, str2), "append3");
 	INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append3 consistent");
 
-	html = cmark_render_html(doc);
+	html = cmark_render_html(doc, CMARK_OPT_DEFAULT);
 	STR_EQ(runner, html, "<p>Hello, <em>world</em>!</p>\n",
 	       "render_html");
 	free(html);
@@ -386,7 +386,7 @@ create_tree(test_batch_runner *runner)
 
 	cmark_node_unlink(emph);
 
-	html = cmark_render_html(doc);
+	html = cmark_render_html(doc, CMARK_OPT_DEFAULT);
 	STR_EQ(runner, html, "<p>Hello, !</p>\n",
 	       "render_html after shuffling");
 	free(html);
@@ -501,18 +501,18 @@ render_html(test_batch_runner *runner)
 	cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1);
 
 	cmark_node *paragraph = cmark_node_first_child(doc);
-	html = cmark_render_html(paragraph);
+	html = cmark_render_html(paragraph, CMARK_OPT_DEFAULT);
 	STR_EQ(runner, html, "<p>foo <em>bar</em></p>\n",
 	       "render single paragraph");
 	free(html);
 
 	cmark_node *string = cmark_node_first_child(paragraph);
-	html = cmark_render_html(string);
+	html = cmark_render_html(string, CMARK_OPT_DEFAULT);
 	STR_EQ(runner, html, "foo ", "render single inline");
 	free(html);
 
 	cmark_node *emph = cmark_node_next(string);
-	html = cmark_render_html(emph);
+	html = cmark_render_html(emph, CMARK_OPT_DEFAULT);
 	STR_EQ(runner, html, "<em>bar</em>", "render inline with children");
 	free(html);
 
diff --git a/src/cmark.c b/src/cmark.c
@@ -13,7 +13,7 @@ char *cmark_markdown_to_html(const char *text, int len)
 
 	doc = cmark_parse_document(text, len);
 
-	result = cmark_render_html(doc);
+	result = cmark_render_html(doc, CMARK_OPT_DEFAULT);
 	cmark_node_free(doc);
 
 	return result;
diff --git a/src/cmark.h b/src/cmark.h
@@ -416,18 +416,30 @@ cmark_node *cmark_parse_file(FILE *f);
 /** Render a 'node' tree as XML.
  */
 CMARK_EXPORT
-char *cmark_render_xml(cmark_node *root);
+char *cmark_render_xml(cmark_node *root, long options);
 
 /** Render a 'node' tree as an HTML fragment.  It is up to the user
  * to add an appropriate header and footer.
  */
 CMARK_EXPORT
-char *cmark_render_html(cmark_node *root);
+char *cmark_render_html(cmark_node *root, long options);
 
 /** Render a 'node' tree as a groff man page, without the header.
  */
 CMARK_EXPORT
-char *cmark_render_man(cmark_node *root);
+char *cmark_render_man(cmark_node *root, long options);
+
+/** Default writer options.
+ */
+#define CMARK_OPT_DEFAULT 0
+
+/** Include a `data-sourcepos` attribute on all block elements.
+ */
+#define CMARK_OPT_SOURCEPOS 1
+
+/** Render `softbreak` elements as hard line breaks.
+ */
+#define CMARK_OPT_HARDBREAKS 2
 
 /** # AUTHORS
  *
diff --git a/src/html.c b/src/html.c
@@ -38,15 +38,27 @@ struct render_state {
 	cmark_node *plain;
 };
 
+static void
+S_render_sourcepos(cmark_node *node, cmark_strbuf *html, long options) {
+	if (CMARK_OPT_SOURCEPOS & options) {
+		cmark_strbuf_printf(html, " data-sourcepos=\"%d:%d-%d:%d\"",
+				    cmark_node_get_start_line(node),
+				    cmark_node_get_start_column(node),
+				    cmark_node_get_end_line(node),
+				    cmark_node_get_end_column(node));
+	}
+}
+
 static int
-S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate)
+S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate,
+	long options)
 {
 	struct render_state *state = vstate;
 	cmark_node *parent;
 	cmark_node *grandparent;
 	cmark_strbuf *html = state->html;
-	char start_header[] = "<h0>";
-	char end_header[] = "</h0>";
+	char start_header[] = "<h0";
+	char end_header[] = "</h0";
 	bool tight;
 
 	bool entering = (ev_type == CMARK_EVENT_ENTER);
@@ -82,7 +94,9 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate)
 	case CMARK_NODE_BLOCK_QUOTE:
 		if (entering) {
 			cr(html);
-			cmark_strbuf_puts(html, "<blockquote>\n");
+			cmark_strbuf_puts(html, "<blockquote");
+			S_render_sourcepos(node, html, options);
+			cmark_strbuf_puts(html, ">\n");
 		} else {
 			cr(html);
 			cmark_strbuf_puts(html, "</blockquote>\n");
@@ -96,14 +110,21 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate)
 		if (entering) {
 			cr(html);
 			if (list_type == CMARK_BULLET_LIST) {
-				cmark_strbuf_puts(html, "<ul>\n");
+				cmark_strbuf_puts(html, "<ul");
+				S_render_sourcepos(node, html, options);
+				cmark_strbuf_puts(html, ">\n");
 			}
 			else if (start == 1) {
-				cmark_strbuf_puts(html, "<ol>\n");
+				cmark_strbuf_puts(html, "<ol");
+				S_render_sourcepos(node, html, options);
+				cmark_strbuf_puts(html, ">\n");
 			}
 			else {
-				cmark_strbuf_printf(html, "<ol start=\"%d\">\n",
-					      start);
+				cmark_strbuf_printf(html,
+						    "<ol start=\"%d\"",
+						    start);
+				S_render_sourcepos(node, html, options);
+				cmark_strbuf_puts(html, ">\n");
 			}
 		} else {
 			cmark_strbuf_puts(html,
@@ -116,7 +137,9 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate)
 	case CMARK_NODE_ITEM:
 		if (entering) {
 			cr(html);
-			cmark_strbuf_puts(html, "<li>");
+			cmark_strbuf_puts(html, "<li");
+			S_render_sourcepos(node, html, options);
+			cmark_strbuf_putc(html, '>');
 		} else {
 			cmark_strbuf_puts(html, "</li>\n");
 		}
@@ -127,10 +150,12 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate)
 			cr(html);
 			start_header[2] = '0' + node->as.header.level;
 			cmark_strbuf_puts(html, start_header);
+			S_render_sourcepos(node, html, options);
+			cmark_strbuf_putc(html, '>');
 		} else {
 			end_header[3] = '0' + node->as.header.level;
 			cmark_strbuf_puts(html, end_header);
-			cmark_strbuf_putc(html, '\n');
+			cmark_strbuf_puts(html, ">\n");
 		}
 		break;
 
@@ -138,7 +163,9 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate)
 		cr(html);
 
 		if (!node->as.code.fenced || node->as.code.info.len == 0) {
-			cmark_strbuf_puts(html, "<pre><code>");
+			cmark_strbuf_puts(html, "<pre");
+			S_render_sourcepos(node, html, options);
+			cmark_strbuf_puts(html, "><code>");
 		}
 		else {
 			int first_tag = 0;
@@ -147,7 +174,9 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate)
 				first_tag += 1;
 			}
 
-			cmark_strbuf_puts(html, "<pre><code class=\"language-");
+			cmark_strbuf_puts(html, "<pre");
+			S_render_sourcepos(node, html, options);
+			cmark_strbuf_puts(html, "><code class=\"language-");
 			escape_html(html, node->as.code.info.data, first_tag);
 			cmark_strbuf_puts(html, "\">");
 		}
@@ -164,7 +193,9 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate)
 
 	case CMARK_NODE_HRULE:
 		cr(html);
-		cmark_strbuf_puts(html, "<hr />\n");
+		cmark_strbuf_puts(html, "<hr");
+		S_render_sourcepos(node, html, options);
+		cmark_strbuf_puts(html, " />\n");
 		break;
 
 	case CMARK_NODE_PARAGRAPH:
@@ -179,7 +210,9 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate)
 		if (!tight) {
 			if (entering) {
 				cr(html);
-				cmark_strbuf_puts(html, "<p>");
+				cmark_strbuf_puts(html, "<p");
+				S_render_sourcepos(node, html, options);
+				cmark_strbuf_putc(html, '>');
 			} else {
 				cmark_strbuf_puts(html, "</p>\n");
 			}
@@ -196,7 +229,11 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate)
 		break;
 
 	case CMARK_NODE_SOFTBREAK:
-		cmark_strbuf_putc(html, '\n');
+		if (options & CMARK_OPT_HARDBREAKS) {
+			cmark_strbuf_puts(html, "<br />\n");
+		} else {
+			cmark_strbuf_putc(html, '\n');
+		}
 		break;
 
 	case CMARK_NODE_CODE:
@@ -269,7 +306,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate)
 	return 1;
 }
 
-char *cmark_render_html(cmark_node *root)
+char *cmark_render_html(cmark_node *root, long options)
 {
 	char *result;
 	cmark_strbuf html = GH_BUF_INIT;
@@ -280,7 +317,7 @@ char *cmark_render_html(cmark_node *root)
 
 	while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
 		cur = cmark_iter_get_node(iter);
-		S_render_node(cur, ev_type, &state);
+		S_render_node(cur, ev_type, &state, options);
 	}
 	result = (char *)cmark_strbuf_detach(&html);
 
diff --git a/src/main.c b/src/main.c
@@ -19,22 +19,25 @@ void print_usage()
 	printf("Usage:   cmark [FILE*]\n");
 	printf("Options:\n");
 	printf("  --to, -t FORMAT  Specify output format (html, xml, man)\n");
+	printf("  --sourcepos      Include source position attribute\n");
+	printf("  --hardbreaks     Treat newlines as hard line breaks\n");
 	printf("  --help, -h       Print usage information\n");
 	printf("  --version        Print version\n");
 }
 
-static void print_document(cmark_node *document, writer_format writer)
+static void print_document(cmark_node *document, writer_format writer,
+			   long options)
 {
 	char *result;
 	switch (writer) {
 	case FORMAT_HTML:
-		result = cmark_render_html(document);
+		result = cmark_render_html(document, options);
 		break;
 	case FORMAT_XML:
-		result = cmark_render_xml(document);
+		result = cmark_render_xml(document, options);
 		break;
 	case FORMAT_MAN:
-		result = cmark_render_man(document);
+		result = cmark_render_man(document, options);
 		break;
 	default:
 		fprintf(stderr, "Unknown format %d\n", writer);
@@ -53,6 +56,7 @@ int main(int argc, char *argv[])
 	size_t bytes;
 	cmark_node *document;
 	writer_format writer = FORMAT_HTML;
+	long options = CMARK_OPT_DEFAULT;
 
 	parser = cmark_parser_new();
 	files = (int *)malloc(argc * sizeof(*files));
@@ -62,6 +66,10 @@ int main(int argc, char *argv[])
 			printf("cmark %s", CMARK_VERSION);
 			printf(" - CommonMark converter (c) 2014 John MacFarlane\n");
 			exit(0);
+		} else if (strcmp(argv[i], "--sourcepos") == 0) {
+			options |= CMARK_OPT_SOURCEPOS;
+		} else if (strcmp(argv[i], "--hardbreaks") == 0) {
+			options |= CMARK_OPT_HARDBREAKS;
 		} else if ((strcmp(argv[i], "--help") == 0) ||
 			   (strcmp(argv[i], "-h") == 0)) {
 			print_usage();
@@ -130,7 +138,7 @@ int main(int argc, char *argv[])
 	cmark_parser_free(parser);
 
 	start_timer();
-	print_document(document, writer);
+	print_document(document, writer, options);
 	end_timer("print_document");
 
 	start_timer();
diff --git a/src/man.c b/src/man.c
@@ -226,7 +226,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate)
 	return 1;
 }
 
-char *cmark_render_man(cmark_node *root)
+char *cmark_render_man(cmark_node *root, long options)
 {
 	char *result;
 	cmark_strbuf man = GH_BUF_INIT;
diff --git a/src/xml.c b/src/xml.c
@@ -120,7 +120,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate)
 	return 1;
 }
 
-char *cmark_render_xml(cmark_node *root)
+char *cmark_render_xml(cmark_node *root, long options)
 {
 	char *result;
 	cmark_strbuf xml = GH_BUF_INIT;