cmark

My personal build of CMark ✏️

Commit
52c591d75433b16cf32f4fae319ccb60b20f6ae7
Parent
a29c16c5e283fb50ecd318477072687caf987d4a
Author
John MacFarlane <jgm@berkeley.edu>
Date

cmark: Add function & option to normalize text nodes.

So, instead of

<text>Hi</text> <text>&amp;</text> <text>lo</text>

we get

<text>Hi&amp;lo</text>

* Added exported `cmark_consolidate_text_nodes` function. * Added `CMARK_OPT_NORMALIZE` to options. * Added optional normalization in XML writer. * Added `--normalize` option to command-line program. * Updated man page.

Diffstat

5 files changed, 57 insertions, 0 deletions

Status File Name N° Changes Insertions Deletions
Modified man/man1/cmark.1 9 9 0
Modified src/cmark.h 9 9 0
Modified src/iterator.c 31 31 0
Modified src/main.c 3 3 0
Modified src/xml.c 5 5 0
diff --git a/man/man1/cmark.1 b/man/man1/cmark.1
@@ -23,6 +23,15 @@ concatenated before parsing.
 \-\-to, \-t \f[I]FORMAT\f[]
 Specify output format (\f[C]html\f[], \f[C]man\f[], \f[C]ast\f[]).
 .TP 12n
+\-\-sourcepos
+Include source position attribute.
+.TP 12n
+\-\-hardbreaks
+Treat newlines as hard line breaks.
+.TP 12n
+\-\-normalize
+Consolidate adjacent text nodes.
+.TP 12n
 \-\-help
 Print usage information.
 .TP 12n
diff --git a/src/cmark.h b/src/cmark.h
@@ -356,6 +356,11 @@ cmark_node_prepend_child(cmark_node *node, cmark_node *child);
 CMARK_EXPORT int
 cmark_node_append_child(cmark_node *node, cmark_node *child);
 
+/** Consolidates adjacent text nodes.
+ */
+CMARK_EXPORT void
+cmark_consolidate_text_nodes(cmark_node *root);
+
 /**
  * ## Parsing
  *
@@ -441,6 +446,10 @@ char *cmark_render_man(cmark_node *root, long options);
  */
 #define CMARK_OPT_HARDBREAKS 2
 
+/** Normalize tree by consolidating adjacent text nodes.
+ */
+#define CMARK_OPT_NORMALIZE 4
+
 /** # AUTHORS
  *
  * John MacFarlane, Vicent Marti,  Kārlis Gaņģis, Nick Wellnhofer.
diff --git a/src/iterator.c b/src/iterator.c
@@ -84,3 +84,34 @@ cmark_iter_get_node(cmark_iter *iter)
 
 	return cur;
 }
+
+
+void cmark_consolidate_text_nodes(cmark_node *root)
+{
+	cmark_iter *iter = cmark_iter_new(root);
+	cmark_strbuf buf = GH_BUF_INIT;
+	cmark_event_type ev_type;
+	cmark_node *cur, *tmp, *next;
+
+	while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+		cur = cmark_iter_get_node(iter);
+		if (ev_type == CMARK_EVENT_ENTER &&
+		    cur->type == CMARK_NODE_TEXT &&
+		    cur->next &&
+		    cur->next->type == CMARK_NODE_TEXT) {
+			cmark_strbuf_clear(&buf);
+			cmark_strbuf_puts(&buf, cmark_node_get_literal(cur));
+			tmp = cur->next;
+			while (tmp && tmp->type == CMARK_NODE_TEXT) {
+				cmark_iter_get_node(iter); // advance pointer
+				cmark_strbuf_puts(&buf, cmark_node_get_literal(tmp));
+				next = tmp->next;
+				cmark_node_free(tmp);
+				tmp = next;
+			}
+			cmark_node_set_literal(cur, (char *)cmark_strbuf_detach(&buf));
+		}
+	}
+
+	cmark_iter_free(iter);
+}
diff --git a/src/main.c b/src/main.c
@@ -26,6 +26,7 @@ void print_usage()
 	printf("  --to, -t FORMAT  Specify output format (html, xml, man)\n");
 	printf("  --sourcepos      Include source position attribute\n");
 	printf("  --hardbreaks     Treat newlines as hard line breaks\n");
+	printf("  --normalize      Consolidate adjacent text nodes\n");
 	printf("  --help, -h       Print usage information\n");
 	printf("  --version        Print version\n");
 }
@@ -79,6 +80,8 @@ int main(int argc, char *argv[])
 			options |= CMARK_OPT_SOURCEPOS;
 		} else if (strcmp(argv[i], "--hardbreaks") == 0) {
 			options |= CMARK_OPT_HARDBREAKS;
+		} else if (strcmp(argv[i], "--normalize") == 0) {
+			options |= CMARK_OPT_NORMALIZE;
 		} else if ((strcmp(argv[i], "--help") == 0) ||
 		           (strcmp(argv[i], "-h") == 0)) {
 			print_usage();
diff --git a/src/xml.c b/src/xml.c
@@ -123,6 +123,11 @@ char *cmark_render_xml(cmark_node *root, long options)
 	cmark_event_type ev_type;
 	cmark_node *cur;
 	struct render_state state = { &xml, 0 };
+
+	if (options & CMARK_OPT_NORMALIZE) {
+		cmark_consolidate_text_nodes(root);
+	}
+
 	cmark_iter *iter = cmark_iter_new(root);
 
 	cmark_strbuf_puts(state.xml,