cmark

My personal build of CMark ✏️

Commit
f4afff1ce6c59a9a6ad7a5d370aab902a8cdb4c9
Parent
3d46c2b594c1230cebb89c48c86b8a80aee43553
Author
John MacFarlane <jgm@berkeley.edu>
Date

Added commonmark renderer.

This is still incomplete. (See TODOs in the source.)

Diffstat

5 files changed, 356 insertions, 2 deletions

Status File Name N° Changes Insertions Deletions
Modified man/man3/cmark.3 8 7 1
Modified src/CMakeLists.txt 1 1 0
Modified src/cmark.h 5 5 0
Added src/commonmark.c 336 336 0
Modified src/main.c 8 7 1
diff --git a/man/man3/cmark.3 b/man/man3/cmark.3
@@ -1,4 +1,4 @@
-.TH cmark 3 "March 15, 2015" "LOCAL" "Library Functions Manual"
+.TH cmark 3 "March 18, 2015" "LOCAL" "Library Functions Manual"
 .SH
 NAME
 .PP
@@ -474,6 +474,12 @@ to add an appropriate header and footer.
 Render a \f[I]node\f[] tree as a groff man page, without the header.
 
 .PP
+\fIchar *\f[] \fBcmark_render_commonmark\f[](\fIcmark_node *root\f[], \fIint options\f[])
+
+.PP
+Render a \f[I]node\f[] tree as a commonmark document.
+
+.PP
 .nf
 \fC
 .RS 0n
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -32,6 +32,7 @@ set(LIBRARY_SOURCES
   man.c
   xml.c
   html.c
+  commonmark.c
   html_unescape.gperf
   houdini_href_e.c
   houdini_html_e.c
diff --git a/src/cmark.h b/src/cmark.h
@@ -481,6 +481,11 @@ char *cmark_render_html(cmark_node *root, int options);
 CMARK_EXPORT
 char *cmark_render_man(cmark_node *root, int options);
 
+/** Render a 'node' tree as a commonmark document.
+ */
+CMARK_EXPORT
+char *cmark_render_commonmark(cmark_node *root, int options);
+
 /** Default writer options.
  */
 #define CMARK_OPT_DEFAULT 0
diff --git a/src/commonmark.c b/src/commonmark.c
@@ -0,0 +1,336 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <ctype.h>
+
+#include "config.h"
+#include "cmark.h"
+#include "node.h"
+#include "buffer.h"
+#include "utf8.h"
+
+// Functions to convert cmark_nodes to commonmark strings.
+
+struct render_state {
+	cmark_strbuf* buffer;
+	cmark_strbuf* prefix;
+	int column;
+	int width;
+	int need_cr;
+	int last_breakable;
+	bool begin_line;
+};
+
+static inline void cr(struct render_state *state)
+{
+	if (state->need_cr < 1) {
+		state->need_cr = 1;
+	}
+}
+
+static inline void blankline(struct render_state *state)
+{
+	if (state->need_cr < 2) {
+		state->need_cr = 2;
+	}
+}
+
+static inline bool needs_escaping(int32_t c, unsigned char d)
+{
+	// TODO escape potential list markers at beginning of line
+	// (add param)
+	return (c == '*' || c == '_' || c == '[' || c == ']' ||
+		c == '<' || c == '>' || c == '\\' ||
+		(c == '&' && isalpha(d)) ||
+		(c == '!' && d == '['));
+}
+
+static inline void out(struct render_state *state,
+		       cmark_chunk str,
+		       bool wrap,
+		       bool escape)
+{
+	unsigned char* source = str.data;
+	int length = str.len;
+	unsigned char nextc;
+	int32_t c;
+	int i = 0;
+	int len;
+	cmark_chunk remainder = cmark_chunk_literal("");
+	int k = state->buffer->size - 1;
+
+	while (state->need_cr) {
+		if (k < 0 || state->buffer->ptr[k] == '\n') {
+			k -= 1;
+		} else {
+			cmark_strbuf_putc(state->buffer, '\n');
+			if (state->need_cr > 1) {
+				cmark_strbuf_put(state->buffer, state->prefix->ptr,
+						 state->prefix->size);
+			}
+		}
+		state->column = 0;
+		state->begin_line = true;
+		state->need_cr -= 1;
+	}
+
+	while (i < length) {
+		if (state->begin_line) {
+			cmark_strbuf_put(state->buffer, state->prefix->ptr,
+					 state->prefix->size);
+			// note: this assumes prefix is ascii:
+			state->column = state->prefix->size;
+		}
+
+		len = utf8proc_iterate(source + i, length - i, &c);
+		nextc = source[i + len];
+		if (c == 32 && wrap) {
+			if (!state->begin_line) {
+				cmark_strbuf_putc(state->buffer, ' ');
+				state->column += 1;
+				state->begin_line = false;
+				state->last_breakable = state->buffer->size -
+					1;
+				// skip following spaces
+				while (source[i + 1] == ' ') {
+					i++;
+				}
+			}
+
+		} else if (c == 10) {
+			cmark_strbuf_putc(state->buffer, '\n');
+			state->column = 0;
+			state->begin_line = true;
+			state->last_breakable = 0;
+		} else if (escape && needs_escaping(c, nextc)) {
+			cmark_strbuf_putc(state->buffer, '\\');
+			utf8proc_encode_char(c, state->buffer);
+			state->column += 2;
+			state->begin_line = false;
+		} else {
+			utf8proc_encode_char(c, state->buffer);
+			state->column += 1;
+			state->begin_line = false;
+		}
+
+		// If adding the character went beyond width, look for an
+		// earlier place where the line could be broken:
+		if (state->width > 0 &&
+		    state->column > state->width &&
+		    !state->begin_line &&
+		    state->last_breakable > 0) {
+
+			// copy from last_breakable to remainder
+			cmark_chunk_set_cstr(&remainder, (char *) state->buffer->ptr + state->last_breakable + 1);
+			// truncate at last_breakable
+			cmark_strbuf_truncate(state->buffer, state->last_breakable);
+			// add newline, prefix, and remainder
+			cmark_strbuf_putc(state->buffer, '\n');
+			cmark_strbuf_put(state->buffer, state->prefix->ptr,
+					 state->prefix->size);
+			cmark_strbuf_put(state->buffer, remainder.data, remainder.len);
+			state->column = state->prefix->size + remainder.len;
+			cmark_chunk_free(&remainder);
+			state->last_breakable = 0;
+			state->begin_line = false;
+		}
+
+		i += len;
+	}
+}
+
+static void lit(struct render_state *state, char *s, bool wrap)
+{
+	cmark_chunk str = cmark_chunk_literal(s);
+	out(state, str, wrap, false);
+}
+
+
+static int
+S_render_node(cmark_node *node, cmark_event_type ev_type,
+              struct render_state *state)
+{
+	cmark_node *tmp;
+	int list_number;
+	bool entering = (ev_type == CMARK_EVENT_ENTER);
+
+	switch (node->type) {
+	case CMARK_NODE_DOCUMENT:
+		if (!entering) {
+			cmark_strbuf_putc(state->buffer, '\n');
+		}
+		break;
+
+	case CMARK_NODE_BLOCK_QUOTE:
+		if (entering) {
+			lit(state, "> ", false);
+			cmark_strbuf_puts(state->prefix, "> ");
+		} else {
+			cmark_strbuf_truncate(state->prefix, state->prefix->size - 2);
+			blankline(state);
+		}
+		break;
+
+	case CMARK_NODE_LIST:
+		break;
+
+	case CMARK_NODE_ITEM:
+		if (entering) {
+			if (cmark_node_get_list_type(node->parent) ==
+			    CMARK_BULLET_LIST) {
+				lit(state, "- ", false);
+				cmark_strbuf_puts(state->prefix, "  ");
+			} else {
+				list_number = cmark_node_get_list_start(node->parent);
+				tmp = node;
+				while (tmp->prev) {
+					tmp = tmp->prev;
+					list_number += 1;
+				}
+				lit(state, "1.  ", false);
+				cmark_strbuf_puts(state->prefix, "    ");
+			}
+		} else {
+			cmark_strbuf_truncate(state->prefix, state->prefix->size -
+					      (cmark_node_get_list_type(node->parent) ==
+					       CMARK_BULLET_LIST ? 2 : 4));
+			cr(state);
+		}
+		break;
+
+	case CMARK_NODE_HEADER:
+		if (entering) {
+			for (int i = cmark_node_get_header_level(node); i > 0; i--) {
+				lit(state, "#", false);
+			}
+			lit(state, " ", false);
+			// TODO set a "nowrap" variable in state, and refer to this in out()
+		} else {
+			blankline(state);
+		}
+		break;
+
+	case CMARK_NODE_CODE_BLOCK:
+		blankline(state);
+		// TODO variable number of ticks
+		lit(state, "```", false);
+		cr(state);
+		// TODO info string
+		// TODO use indented form if no info string?
+		out(state, node->as.code.literal, false, true);
+		cr(state);
+		lit(state, "```", false);
+		blankline(state);
+		break;
+
+	case CMARK_NODE_HTML:
+		blankline(state);
+		out(state, node->as.code.literal, false, false);
+		blankline(state);
+		break;
+
+	case CMARK_NODE_HRULE:
+		blankline(state);
+		lit(state, "-----", false);
+		blankline(state);
+		break;
+
+	case CMARK_NODE_PARAGRAPH:
+		if (!entering) {
+			blankline(state);
+		}
+		break;
+
+	case CMARK_NODE_TEXT:
+		out(state, node->as.literal, true, true);
+		break;
+
+	case CMARK_NODE_LINEBREAK:
+		lit(state, "\\", false);
+		cr(state);
+		break;
+
+	case CMARK_NODE_SOFTBREAK:
+		lit(state, " ", true);
+		break;
+
+	case CMARK_NODE_CODE:
+		// TODO variable number of ticks
+		lit(state, "`", false);
+		out(state, node->as.literal, true, false);
+		lit(state, "`", false);
+		break;
+
+	case CMARK_NODE_INLINE_HTML:
+		out(state, node->as.literal, true, false);
+		break;
+
+	case CMARK_NODE_STRONG:
+		if (entering) {
+			lit(state, "**", false);
+		} else {
+			lit(state, "**", false);
+		}
+		break;
+
+	case CMARK_NODE_EMPH:
+		if (entering) {
+			lit(state, "*", false);
+		} else {
+			lit(state, "*", false);
+		}
+		break;
+
+	case CMARK_NODE_LINK:
+		if (entering) {
+			lit(state, "[", false);
+		} else {
+			lit(state, "](", false);
+			out(state, cmark_chunk_literal(cmark_node_get_url(node)), false, true);
+			// TODO title
+			lit(state, ")", false);
+		}
+		break;
+
+	case CMARK_NODE_IMAGE:
+		if (entering) {
+			lit(state, "![", false);
+		} else {
+			lit(state, "](", false);
+			out(state, cmark_chunk_literal(cmark_node_get_url(node)), false, true);
+			// TODO title
+			lit(state, ")", false);
+		}
+		break;
+
+	default:
+		assert(false);
+		break;
+	}
+
+	return 1;
+}
+
+char *cmark_render_commonmark(cmark_node *root, int options)
+{
+	char *result;
+	cmark_strbuf commonmark = GH_BUF_INIT;
+	cmark_strbuf prefix = GH_BUF_INIT;
+	struct render_state state = { &commonmark, &prefix, 0, 65, 0, 0, true };
+	cmark_node *cur;
+	cmark_event_type ev_type;
+	cmark_iter *iter = cmark_iter_new(root);
+
+	if (options == 0) options = 0; // avoid warning about unused parameters
+
+	while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+		cur = cmark_iter_get_node(iter);
+		S_render_node(cur, ev_type, &state);
+	}
+	result = (char *)cmark_strbuf_detach(&commonmark);
+
+	cmark_strbuf_free(&prefix);
+	cmark_iter_free(iter);
+	return result;
+}
diff --git a/src/main.c b/src/main.c
@@ -17,13 +17,14 @@ typedef enum {
 	FORMAT_HTML,
 	FORMAT_XML,
 	FORMAT_MAN,
+	FORMAT_COMMONMARK
 } writer_format;
 
 void print_usage()
 {
 	printf("Usage:   cmark [FILE*]\n");
 	printf("Options:\n");
-	printf("  --to, -t FORMAT  Specify output format (html, xml, man)\n");
+	printf("  --to, -t FORMAT  Specify output format (html, xml, man, commonmark)\n");
 	printf("  --sourcepos      Include source position attribute\n");
 	printf("  --hardbreaks     Treat newlines as hard line breaks\n");
 	printf("  --smart          Use smart punctuation\n");
@@ -46,6 +47,9 @@ static void print_document(cmark_node *document, writer_format writer,
 	case FORMAT_MAN:
 		result = cmark_render_man(document, options);
 		break;
+	case FORMAT_COMMONMARK:
+		result = cmark_render_commonmark(document, options);
+		break;
 	default:
 		fprintf(stderr, "Unknown format %d\n", writer);
 		exit(1);
@@ -98,6 +102,8 @@ int main(int argc, char *argv[])
 					writer = FORMAT_HTML;
 				} else if (strcmp(argv[i], "xml") == 0) {
 					writer = FORMAT_XML;
+				} else if (strcmp(argv[i], "commonmark") == 0) {
+					writer = FORMAT_COMMONMARK;
 				} else {
 					fprintf(stderr,
 					        "Unknown format %s\n", argv[i]);