cmark

My personal build of CMark ✏️

Commit
59cc3c9323dc0b7aa1fd5817e12884ef925461d4
Parent
a3030f985a973b3b835645313fdad1a8a72ff432
Author
John MacFarlane <jgm@berkeley.edu>
Date

Added cmark_render_man (man page writer).

cmark: Replaced `--man` and `--ast` with generic `--to` option.

Diffstat

6 files changed, 304 insertions, 19 deletions

Status File Name N° Changes Insertions Deletions
Modified man/man1/cmark.1 7 4 3
Modified man/man3/cmark.3 4 4 0
Modified src/CMakeLists.txt 1 1 0
Modified src/cmark.h 5 5 0
Modified src/main.c 61 45 16
Added src/man.c 245 245 0
diff --git a/man/man1/cmark.1 b/man/man1/cmark.1
@@ -13,14 +13,15 @@ acts as a pipe, reading from
 \fRstdin\fR
 or from the specified files and writing to
 \fRstdout\fR.
-It converts Markdown formatted plain text to HTML, using the conventions
+It converts Markdown formatted plain text to HTML (or groff man or
+an abstract representation of the AST), using the conventions
 described in the CommonMark spec.
 If multiple files are specified, the contents of the files are simply
 concatenated before parsing.
 .SH "OPTIONS"
 .TP 12n
-\-\--ast
-Print an abstract syntax tree instead of HTML.
+\-\-to, \-t \f[I]FORMAT\f[]
+Specify output format (\f[C]html\f[], \f[C]man\f[], \f[C]ast\f[]).
 .TP 12n
 \-\-help
 Print usage information.
diff --git a/man/man3/cmark.3 b/man/man3/cmark.3
@@ -292,6 +292,10 @@ typedef enum {
 
 .PP
 
+\fIchar *\fR \fBcmark_render_man\fR(\fIcmark_node *root\fR)
+
+.PP
+
 .SH AUTHORS
 
 John MacFarlane, Vicent Marti,  Kārlis Gaņģis, Nick Wellnhofer.
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -28,6 +28,7 @@ set(LIBRARY_SOURCES
   utf8.c
   buffer.c
   references.c
+  man.c
   html.c
   html_unescape.gperf
   houdini_href_e.c
diff --git a/src/cmark.h b/src/cmark.h
@@ -346,6 +346,11 @@ char *cmark_render_ast(cmark_node *root);
 CMARK_EXPORT
 char *cmark_render_html(cmark_node *root);
 
+/**
+ */
+CMARK_EXPORT
+char *cmark_render_man(cmark_node *root);
+
 /** .SH AUTHORS
  *
  * John MacFarlane, Vicent Marti,  Kārlis Gaņģis, Nick Wellnhofer.
diff --git a/src/main.c b/src/main.c
@@ -7,22 +7,38 @@
 #include "debug.h"
 #include "bench.h"
 
+typedef enum {
+	FORMAT_NONE,
+	FORMAT_HTML,
+	FORMAT_MAN,
+	FORMAT_AST
+} writer_format;
+
 void print_usage()
 {
 	printf("Usage:   cmark [FILE*]\n");
-	printf("Options: --help, -h    Print usage information\n");
-	printf("         --ast         Print AST instead of HTML\n");
-	printf("         --version     Print version\n");
+	printf("Options:\n");
+	printf("  --to, -t FORMAT  Specify output format (html, man, ast)\n");
+	printf("  --help, -h       Print usage information\n");
+	printf("  --version        Print version\n");
 }
 
-static void print_document(cmark_node *document, bool ast)
+static void print_document(cmark_node *document, writer_format writer)
 {
 	char *result;
-	if (ast) {
+	switch (writer) {
+	case FORMAT_AST:
 		result = cmark_render_ast(document);
-	} else {
-
+		break;
+	case FORMAT_HTML:
 		result = cmark_render_html(document);
+		break;
+	case FORMAT_MAN:
+		result = cmark_render_man(document);
+		break;
+	default:
+		fprintf(stderr, "Unknown format %d\n", writer);
+		exit(1);
 	}
 	printf("%s", result);
 	free(result);
@@ -31,12 +47,12 @@ static void print_document(cmark_node *document, bool ast)
 int main(int argc, char *argv[])
 {
 	int i, numfps = 0;
-	bool ast = false;
 	int *files;
 	char buffer[4096];
 	cmark_parser *parser;
 	size_t bytes;
 	cmark_node *document;
+	writer_format writer = FORMAT_HTML;
 
 	parser = cmark_parser_new();
 	files = (int *)malloc(argc * sizeof(*files));
@@ -50,8 +66,26 @@ int main(int argc, char *argv[])
 			   (strcmp(argv[i], "-h") == 0)) {
 			print_usage();
 			exit(0);
-		} else if (strcmp(argv[i], "--ast") == 0) {
-			ast = true;
+		} else if ((strcmp(argv[i], "-t") == 0) ||
+			   (strcmp(argv[i], "--to") == 0)) {
+			i += 1;
+			if (i < argc) {
+				if (strcmp(argv[i], "man") == 0) {
+					writer = FORMAT_MAN;
+				} else if (strcmp(argv[i], "html") == 0) {
+					writer = FORMAT_HTML;
+				} else if (strcmp(argv[i], "ast") == 0) {
+					writer = FORMAT_AST;
+				} else {
+					fprintf(stderr,
+						"Unknown format %s\n", argv[i]);
+					exit(1);
+				}
+			} else {
+				fprintf(stderr, "No argument provided for %s\n",
+					argv[i - 1]);
+				exit(1);
+			}
 		} else if (*argv[i] == '-') {
 			print_usage();
 			exit(1);
@@ -81,11 +115,6 @@ int main(int argc, char *argv[])
 	}
 
 	if (numfps == 0) {
-		/*
-		document = cmark_parse_file(stdin);
-		print_document(document, ast);
-		exit(0);
-		*/
 
 		while ((bytes = fread(buffer, 1, sizeof(buffer), stdin)) > 0) {
 			cmark_parser_feed(parser, buffer, bytes);
@@ -101,7 +130,7 @@ int main(int argc, char *argv[])
 	cmark_parser_free(parser);
 
 	start_timer();
-	print_document(document, ast);
+	print_document(document, writer);
 	end_timer("print_document");
 
 	start_timer();
diff --git a/src/man.c b/src/man.c
@@ -0,0 +1,245 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "config.h"
+#include "cmark.h"
+#include "node.h"
+#include "buffer.h"
+
+// Functions to convert cmark_nodes to groff man strings.
+
+static void escape_man(strbuf *dest, const unsigned char *source, int length)
+{
+	int i;
+	unsigned char c;
+
+	for (i = 0; i < length; i++) {
+		c = source[i];
+		if (c == '.' && i == 0) {
+			strbuf_puts(dest, "\\&.");
+		} else if (c == '\'' && i == 0) {
+			strbuf_puts(dest, "\\&'");
+		} else if (c == '-') {
+			strbuf_puts(dest, "\\-");
+		} else if (c == '\\') {
+			strbuf_puts(dest, "\\e");
+		} else {
+			strbuf_putc(dest, source[i]);
+		}
+	}
+}
+
+static inline void cr(strbuf *man)
+{
+	if (man->size && man->ptr[man->size - 1] != '\n')
+		strbuf_putc(man, '\n');
+}
+
+struct render_state {
+	strbuf* man;
+	cmark_node *plain;
+};
+
+static int
+S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate)
+{
+	struct render_state *state = vstate;
+	cmark_node *tmp;
+	strbuf *man = state->man;
+	int list_number;
+	bool entering = (ev_type == CMARK_EVENT_ENTER);
+
+	if (state->plain == node) { // back at original node
+		state->plain = NULL;
+	}
+
+	if (state->plain != NULL) {
+		switch(node->type) {
+		case CMARK_NODE_TEXT:
+		case CMARK_NODE_INLINE_CODE:
+		case CMARK_NODE_INLINE_HTML:
+			escape_man(man, node->as.literal.data,
+				    node->as.literal.len);
+			break;
+
+		case CMARK_NODE_LINEBREAK:
+		case CMARK_NODE_SOFTBREAK:
+			strbuf_putc(man, ' ');
+			break;
+
+		default:
+			break;
+		}
+		return 1;
+	}
+
+	switch (node->type) {
+	case CMARK_NODE_BLOCK_QUOTE:
+		if (entering) {
+			cr(man);
+			strbuf_puts(man, ".RS");
+			cr(man);
+		} else {
+			cr(man);
+			strbuf_puts(man, ".RE");
+			cr(man);
+		}
+		break;
+
+	case CMARK_NODE_LIST:
+		break;
+
+	case CMARK_NODE_LIST_ITEM:
+		if (entering) {
+			cr(man);
+			strbuf_puts(man, ".IP ");
+			if (cmark_node_get_list_type(node->parent) ==
+			    CMARK_BULLET_LIST) {
+				strbuf_puts(man, "\\[bu] 2");
+			} else {
+				list_number = cmark_node_get_list_start(node->parent);
+				tmp = node;
+				while (tmp->prev) {
+					tmp = tmp->prev;
+					list_number += 1;
+				}
+				strbuf_printf(man, "\"%d.\" 4", list_number);
+			}
+			cr(man);
+		} else {
+			cr(man);
+		}
+		break;
+
+	case CMARK_NODE_HEADER:
+		if (entering) {
+			cr(man);
+			strbuf_puts(man,
+				    cmark_node_get_header_level(node) == 1 ?
+				    ".SH" : ".SS");
+			cr(man);
+		} else {
+			cr(man);
+		}
+		break;
+
+	case CMARK_NODE_CODE_BLOCK:
+		cr(man);
+		strbuf_puts(man, ".IP\n.nf\n\\f[C]\n");
+		escape_man(man, node->string_content.ptr,
+			   node->string_content.size);
+		cr(man);
+		strbuf_puts(man, "\\f[]\n.fi");
+		cr(man);
+		break;
+
+	case CMARK_NODE_HTML:
+		break;
+
+	case CMARK_NODE_HRULE:
+		cr(man);
+		strbuf_puts(man, ".PP\n  *  *  *  *  *");
+		cr(man);
+		break;
+
+	case CMARK_NODE_PARAGRAPH:
+		if (entering) {
+			// no blank line if first paragraph in list:
+			if (node->parent &&
+			    node->parent->type == CMARK_NODE_LIST_ITEM &&
+			    node->prev == NULL) {
+				// no blank line or .PP
+			} else {
+				cr(man);
+				strbuf_puts(man, ".PP\n");
+			}
+		} else {
+			cr(man);
+		}
+		break;
+
+	case CMARK_NODE_TEXT:
+		escape_man(man, node->as.literal.data,
+			    node->as.literal.len);
+		break;
+
+	case CMARK_NODE_LINEBREAK:
+		strbuf_puts(man, ".PD 0\n.P\n.PD");
+		cr(man);
+		break;
+
+	case CMARK_NODE_SOFTBREAK:
+		strbuf_putc(man, '\n');
+		break;
+
+	case CMARK_NODE_INLINE_CODE:
+		strbuf_puts(man, "\\f[C]");
+		escape_man(man, node->as.literal.data, node->as.literal.len);
+		strbuf_puts(man, "\\f[]");
+		break;
+
+	case CMARK_NODE_INLINE_HTML:
+		break;
+
+	case CMARK_NODE_STRONG:
+		if (entering) {
+			strbuf_puts(man, "\\f[B]");
+		} else {
+			strbuf_puts(man, "\\f[]");
+		}
+		break;
+
+	case CMARK_NODE_EMPH:
+		if (entering) {
+			strbuf_puts(man, "\\f[I]");
+		} else {
+			strbuf_puts(man, "\\f[]");
+		}
+		break;
+
+	case CMARK_NODE_LINK:
+		if (!entering) {
+			strbuf_printf(man, " (%s)",
+				      cmark_node_get_url(node));
+		}
+		break;
+
+	case CMARK_NODE_IMAGE:
+		if (entering) {
+			strbuf_puts(man, "[IMAGE: ");
+			state->plain = node;
+		} else {
+			strbuf_puts(man, "]");
+		}
+		break;
+
+	default:
+		assert(false);
+		break;
+	}
+
+	// strbuf_putc(man, 'x');
+	return 1;
+}
+
+char *cmark_render_man(cmark_node *root)
+{
+	char *result;
+	strbuf man = GH_BUF_INIT;
+	struct render_state state = { &man, NULL };
+	cmark_node *cur;
+	cmark_event_type ev_type;
+	cmark_iter *iter = cmark_iter_new(root);
+
+	while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+		cur = cmark_iter_get_node(iter);
+		S_render_node(cur, ev_type, &state);
+	}
+	result = (char *)strbuf_detach(&man);
+
+	cmark_iter_free(iter);
+	strbuf_free(&man);
+	return result;
+}