cmark

My personal build of CMark ✏️

Commit
a3030f985a973b3b835645313fdad1a8a72ff432
Parent
8d6efe632aaeb1831c86b27e120a344c5e3ee7d4
Author
John MacFarlane <jgm@berkeley.edu>
Date

Added iterator interface to API, removed cmark_walk.

* Added `iterator.c`, `iterator.h`. * Removed `cmark_walk`. * Replaced `cmark_walk` with iterator in HTML renderer. * Replaced API test for `cmark_walk` with simple iterator test.

Diffstat

8 files changed, 205 insertions, 133 deletions

Status File Name N° Changes Insertions Deletions
Modified api_test/main.c 35 16 19
Modified man/man3/cmark.3 50 29 21
Modified src/CMakeLists.txt 2 2 0
Modified src/cmark.h 60 39 21
Modified src/html.c 18 12 6
Added src/iterator.c 86 86 0
Added src/iterator.h 21 21 0
Modified src/node.c 66 0 66
diff --git a/api_test/main.c b/api_test/main.c
@@ -293,27 +293,24 @@ node_check(test_batch_runner *runner) {
 	cmark_node_free(doc);
 }
 
-static int
-S_handler(cmark_node *node, cmark_event_type ev_type, void *state)
-{
-	int *textnodes = state;
-	if (ev_type == CMARK_EVENT_ENTER) {
-		if (node->type == CMARK_NODE_TEXT) {
-			*textnodes += 1;
-		}
-	}
-	return 1;
-}
-
 static void
-walk(test_batch_runner *runner) {
-	// Construct an incomplete tree.
+iterator(test_batch_runner *runner) {
 	cmark_node *doc = cmark_parse_document("> a *b*\n\nc", 10);
-	int textnodes = 0;
-	INT_EQ(runner, cmark_walk(doc, S_handler, &textnodes), 1,
-	       "walk succeeds");
-	INT_EQ(runner, textnodes, 3, "walk correctly counts text nodes");
+	int parnodes = 0;
+	cmark_event_type ev_type;
+	cmark_iter *iter = cmark_iter_new(doc);
+	cmark_node *cur;
+
+	while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+		cur = cmark_iter_get_node(iter);
+		if (cur->type == CMARK_NODE_PARAGRAPH &&
+		    ev_type == CMARK_EVENT_ENTER) {
+			parnodes += 1;
+		}
+	}
+	INT_EQ(runner, parnodes, 2, "iterate correctly counts paragraphs");
 
+	cmark_iter_free(iter);
 	cmark_node_free(doc);
 }
 
@@ -627,7 +624,7 @@ int main() {
 	constructor(runner);
 	accessors(runner);
 	node_check(runner);
-	walk(runner);
+	iterator(runner);
 	create_tree(runner);
 	hierarchy(runner);
 	parser(runner);
diff --git a/man/man3/cmark.3 b/man/man3/cmark.3
@@ -4,7 +4,9 @@
 .B cmark
 \- CommonMark parsing, manipulating, and rendering
 
-.SH SIMPLE INTERFACE
+.SH DESCRIPTION
+
+.SS Simple Interface
 
 .nf
 \f[C]
@@ -24,7 +26,7 @@ Convert \fItext\fR (assumed to be a UTF-8 encoded string with length
 \fIlen\fR from CommonMark Markdown to HTML, returning a null-terminated,
 UTF-8-encoded string.
 
-.SH NODE STRUCTURE
+.SS Node Structure
 
 .nf
 \f[C]
@@ -95,7 +97,7 @@ typedef enum {
 .PP
 
 
-.SH CREATING AND DESTROYING NODES
+.SS Creating and Destroying Nodes
 
 \fIcmark_node*\fR \fBcmark_node_new\fR(\fIcmark_node_type type\fR)
 
@@ -109,7 +111,7 @@ typedef enum {
 
 .PP
 
-.SH TREE TRAVERSAL
+.SS Tree Traversal
 
 \fIcmark_node*\fR \fBcmark_node_previous\fR(\fIcmark_node *node\fR)
 
@@ -128,7 +130,26 @@ typedef enum {
 .PP
 
 
-.SH ACCESSORS
+.SS Iterator
+
+\fIcmark_iter*\fR \fBcmark_iter_new\fR(\fIcmark_node *root\fR)
+
+.PP
+
+\fIvoid\fR \fBcmark_iter_free\fR(\fIcmark_iter *iter\fR)
+
+.PP
+
+\fIcmark_event_type\fR \fBcmark_iter_next\fR(\fIcmark_iter *iter\fR)
+
+.PP
+
+\fIcmark_node*\fR \fBcmark_iter_get_node\fR(\fIcmark_iter *iter\fR)
+
+.PP
+
+
+.SS Accessors
 
 \fIcmark_node_type\fR \fBcmark_node_get_type\fR(\fIcmark_node *node\fR)
 
@@ -211,7 +232,7 @@ typedef enum {
 .PP
 
 
-.SH TREE MANIPULATION
+.SS Tree Manipulation
 
 \fIvoid\fR \fBcmark_node_unlink\fR(\fIcmark_node *node\fR)
 
@@ -234,7 +255,7 @@ typedef enum {
 .PP
 
 
-.SH PARSING
+.SS Parsing
 
 \fIcmark_parser *\fR \fBcmark_parser_new\fR(\fI\fR)
 
@@ -261,7 +282,7 @@ typedef enum {
 .PP
 
 
-.SH RENDERING
+.SS Rendering
 
 \fIchar *\fR \fBcmark_render_ast\fR(\fIcmark_node *root\fR)
 
@@ -271,19 +292,6 @@ typedef enum {
 
 .PP
 
-\fIint\fR \fBcmark_walk\fR(\fIcmark_node *root\fR, \fIcmark_node_handler handler\fR, \fIvoid *state\fR)
-
-.PP
-Walks the tree starting from root, applying handler to each node.
-Nodes that can have children are visited twice, once on the way in
-and once on the way out.  handler is a function that takes a node
-pointer, a cmark_event_type,
-and a pointer to a state structure that can be consulted and
-updated by the handler.  The handler should return 1 on success,
-0 on failure.  cmark_walk returns 1 if it traversed the entire
-tree, 0 if it quit early in response to a 0 status from the
-handler.
-
 .SH AUTHORS
 
 John MacFarlane, Vicent Marti,  Kārlis Gaņģis, Nick Wellnhofer.
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -5,6 +5,7 @@ set(HEADERS
   parser.h
   buffer.h
   node.h
+  iterator.h
   chunk.h
   references.h
   debug.h
@@ -18,6 +19,7 @@ set(HEADERS
 set(LIBRARY_SOURCES
   cmark.c
   node.c
+  iterator.c
   blocks.c
   inlines.c
   print.c
diff --git a/src/cmark.h b/src/cmark.h
@@ -14,7 +14,9 @@ extern "C" {
  * \- CommonMark parsing, manipulating, and rendering
  */
 
-/** .SH SIMPLE INTERFACE
+/** .SH DESCRIPTION
+ *
+ * .SS Simple Interface
  */
 
 /** Current version of library.
@@ -28,7 +30,7 @@ extern "C" {
 CMARK_EXPORT
 char *cmark_markdown_to_html(const char *text, int len);
 
-/** .SH NODE STRUCTURE
+/** .SS Node Structure
  */
 
 /**
@@ -84,6 +86,7 @@ typedef enum {
 
 typedef struct cmark_node cmark_node;
 typedef struct cmark_parser cmark_parser;
+typedef struct cmark_iter cmark_iter;
 
 typedef enum {
 	CMARK_EVENT_DONE,
@@ -95,7 +98,7 @@ typedef int (*cmark_node_handler)(cmark_node *node, cmark_event_type ev_type,
 				  void *state);
 
 /**
- * .SH CREATING AND DESTROYING NODES
+ * .SS Creating and Destroying Nodes
  */
 
 /**
@@ -109,7 +112,7 @@ CMARK_EXPORT void
 cmark_node_free(cmark_node *node);
 
 /**
- * .SH TREE TRAVERSAL
+ * .SS Tree Traversal
  */
 CMARK_EXPORT cmark_node*
 cmark_node_next(cmark_node *node);
@@ -135,7 +138,35 @@ CMARK_EXPORT cmark_node*
 cmark_node_last_child(cmark_node *node);
 
 /**
- * .SH ACCESSORS
+ * .SS Iterator
+ */
+
+/**
+ */
+CMARK_EXPORT
+cmark_iter*
+cmark_iter_new(cmark_node *root);
+
+/**
+ */
+CMARK_EXPORT
+void
+cmark_iter_free(cmark_iter *iter);
+
+/**
+ */
+CMARK_EXPORT
+cmark_event_type
+cmark_iter_next(cmark_iter *iter);
+
+/**
+ */
+CMARK_EXPORT
+cmark_node*
+cmark_iter_get_node(cmark_iter *iter);
+
+/**
+ * .SS Accessors
  */
 
 /**
@@ -239,7 +270,7 @@ CMARK_EXPORT int
 cmark_node_get_end_line(cmark_node *node);
 
 /**
- * .SH TREE MANIPULATION
+ * .SS Tree Manipulation
  */
 
 /**
@@ -268,7 +299,7 @@ CMARK_EXPORT int
 cmark_node_append_child(cmark_node *node, cmark_node *child);
 
 /**
- * .SH PARSING
+ * .SS Parsing
  */
 
 /**
@@ -302,7 +333,7 @@ CMARK_EXPORT
 cmark_node *cmark_parse_file(FILE *f);
 
 /**
- * .SH RENDERING
+ * .SS Rendering
  */
 
 /**
@@ -315,19 +346,6 @@ char *cmark_render_ast(cmark_node *root);
 CMARK_EXPORT
 char *cmark_render_html(cmark_node *root);
 
-/** Walks the tree starting from root, applying handler to each node.
- * Nodes that can have children are visited twice, once on the way in
- * and once on the way out.  handler is a function that takes a node
- * pointer, a cmark_event_type,
- * and a pointer to a state structure that can be consulted and
- * updated by the handler.  The handler should return 1 on success,
- * 0 on failure.  cmark_walk returns 1 if it traversed the entire
- * tree, 0 if it quit early in response to a 0 status from the
- * handler.
- */
-CMARK_EXPORT
-int cmark_walk(cmark_node *root, cmark_node_handler handler, void *state);
-
 /** .SH AUTHORS
  *
  * John MacFarlane, Vicent Marti,  Kārlis Gaņģis, Nick Wellnhofer.
diff --git a/src/html.c b/src/html.c
@@ -271,12 +271,18 @@ char *cmark_render_html(cmark_node *root)
 {
 	char *result;
 	strbuf html = GH_BUF_INIT;
+	cmark_event_type ev_type;
+	cmark_node *cur;
 	struct render_state state = { &html, NULL };
-	if (cmark_walk(root, S_render_node, &state)) {
-		result = (char *)strbuf_detach(&html);
-		strbuf_free(&html);
-		return result;
-	} else {
-		return NULL;
+	cmark_iter *iter = cmark_iter_new(root);
+
+	while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+		cur = cmark_iter_get_node(iter);
+		S_render_node(cur, ev_type, &state);
 	}
+	result = (char *)strbuf_detach(&html);
+
+	cmark_iter_free(iter);
+	strbuf_free(&html);
+	return result;
 }
diff --git a/src/iterator.c b/src/iterator.c
@@ -0,0 +1,86 @@
+#include <stdlib.h>
+
+#include "config.h"
+#include "node.h"
+#include "cmark.h"
+#include "iterator.h"
+
+cmark_iter*
+cmark_iter_new(cmark_node *root)
+{
+	cmark_iter *iter = (cmark_iter*)malloc(sizeof(cmark_iter));
+	if (iter == NULL) {
+		return NULL;
+	} else {
+		iter->root = root;
+		iter->current = root;
+		iter->event_type = CMARK_EVENT_ENTER;
+		return iter;
+	}
+}
+
+void
+cmark_iter_free(cmark_iter *iter)
+{
+	free(iter);
+}
+
+cmark_event_type
+cmark_iter_next(cmark_iter *iter)
+{
+	return iter->event_type;
+}
+
+int S_is_leaf(cmark_node *node)
+{
+	switch (cmark_node_get_type(node)) {
+	case CMARK_NODE_HTML:
+	case CMARK_NODE_HRULE:
+	case CMARK_NODE_CODE_BLOCK:
+	case CMARK_NODE_TEXT:
+	case CMARK_NODE_SOFTBREAK:
+	case CMARK_NODE_LINEBREAK:
+	case CMARK_NODE_INLINE_CODE:
+	case CMARK_NODE_INLINE_HTML:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+cmark_node*
+cmark_iter_get_node(cmark_iter *iter)
+{
+	/* we'll return current */
+	cmark_node *cur = iter->current;
+
+	if (cur == NULL || iter->event_type == CMARK_EVENT_DONE) {
+		return NULL;
+	}
+
+	/* roll forward to next item, setting both fields */
+	if (iter->event_type == CMARK_EVENT_ENTER && !S_is_leaf(cur)) {
+		if (cur->first_child == NULL) {
+			/* stay on this node but exit */
+			iter->event_type = CMARK_EVENT_EXIT;
+		} else {
+			iter->current = cur->first_child;
+			iter->event_type = CMARK_EVENT_ENTER;
+		}
+	} else if (cur == iter->root) {
+		/* don't move past root */
+		iter->event_type = CMARK_EVENT_DONE;
+		iter->current = NULL;
+	} else if (cur->next) {
+		iter->event_type = CMARK_EVENT_ENTER;
+		iter->current = cur->next;
+	} else if (cur->parent) {
+		iter->event_type = CMARK_EVENT_EXIT;
+		iter->current = cur->parent;
+	} else {
+		iter->event_type = CMARK_EVENT_DONE;
+		iter->current = NULL;
+	}
+
+	return cur;
+}
diff --git a/src/iterator.h b/src/iterator.h
@@ -0,0 +1,21 @@
+#ifndef CMARK_ITERATOR_H
+#define CMARK_ITERATOR_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "cmark.h"
+#include "node.h"
+
+struct cmark_iter {
+	cmark_node       *current;
+	cmark_node       *root;
+	cmark_event_type event_type;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/node.c b/src/node.c
@@ -766,69 +766,3 @@ cmark_node_check(cmark_node *node, FILE *out)
 
 	return errors;
 }
-
-int S_is_leaf_node(cmark_node *current_node)
-{
-	switch (cmark_node_get_type(current_node)) {
-	case CMARK_NODE_HTML:
-	case CMARK_NODE_HRULE:
-	case CMARK_NODE_CODE_BLOCK:
-	case CMARK_NODE_TEXT:
-	case CMARK_NODE_SOFTBREAK:
-	case CMARK_NODE_LINEBREAK:
-	case CMARK_NODE_INLINE_CODE:
-	case CMARK_NODE_INLINE_HTML:
-		return 1;
-	default:
-		return 0;
-	}
-}
-
-int cmark_walk(cmark_node *root, cmark_node_handler handler, void *state)
-{
-	int ev_type = CMARK_EVENT_ENTER;
-	cmark_node *current_node = root;
-	int depth = 0;
-	cmark_node *next, *parent, *first_child;
-
-	while (current_node != NULL && depth >= 0) {
-
-		next = current_node->next;
-		parent = current_node->parent;
-
-		if (!handler(current_node, ev_type, state)) {
-			return 0;
-		}
-
-		if (ev_type == CMARK_EVENT_ENTER &&
-		    !S_is_leaf_node(current_node)) {
-			first_child = current_node->first_child;
-			if (first_child == NULL) {
-				ev_type = CMARK_EVENT_EXIT; // stay on this node
-			} else {
-				depth += 1;
-				current_node = first_child;
-			}
-		} else {
-			if (current_node) {
-				next = current_node->next;
-				parent = current_node->parent;
-			}
-			if (next) {
-				// don't go past root:
-				if (current_node == root) {
-					ev_type = CMARK_EVENT_DONE;
-					return 1;
-				} else {
-					ev_type = CMARK_EVENT_ENTER;
-					current_node = next;
-				}
-			} else {
-				ev_type = CMARK_EVENT_EXIT;
-				depth -= 1;
-				current_node = parent;
-			}
-		}
-	}
-	return 1;
-}