cmark

My personal build of CMark ✏️

Commit
bfe4d148098717f1603cbe12ba1cf306db09ce3e
Parent
4221ca8f33e2f3fc96e34de26418c04747888e76
Author
John MacFarlane <jgm@berkeley.edu>
Date

Merge pull request #277 from nwellnhof/iterator

Rework iterators

Diffstat

4 files changed, 152 insertions, 74 deletions

Status File Name N° Changes Insertions Deletions
Modified api_test/main.c 41 41 0
Modified src/cmark.h 52 35 17
Modified src/iterator.c 123 69 54
Modified src/iterator.h 10 7 3
diff --git a/api_test/main.c b/api_test/main.c
@@ -320,6 +320,46 @@ iterator(test_batch_runner *runner) {
 }
 
 static void
+iterator_delete(test_batch_runner *runner) {
+	static const char md[] =
+		"a *b* c\n"
+		"\n"
+		"* item1\n"
+		"* item2\n"
+		"\n"
+		"a `b` c\n"
+		"\n"
+		"* item1\n"
+		"* item2\n";
+	cmark_node *doc  = cmark_parse_document(md, sizeof(md) - 1);
+	cmark_iter *iter = cmark_iter_new(doc);
+	cmark_event_type ev_type;
+
+	while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+		cmark_node *node = cmark_iter_get_node(iter);
+		// Delete list, emph, and code nodes.
+		if ((ev_type == CMARK_EVENT_EXIT &&
+		     node->type == CMARK_NODE_LIST) ||
+		    (ev_type == CMARK_EVENT_EXIT &&
+		     node->type == CMARK_NODE_EMPH) ||
+		    (ev_type == CMARK_EVENT_ENTER &&
+		     node->type == CMARK_NODE_CODE)) {
+			cmark_node_free(node);
+		}
+	}
+
+	char *html = cmark_render_html(doc, CMARK_OPT_DEFAULT);
+	static const char expected[] =
+		"<p>a  c</p>\n"
+		"<p>a  c</p>\n";
+	STR_EQ(runner, html, expected, "iterate and delete nodes");
+
+	free(html);
+	cmark_iter_free(iter);
+	cmark_node_free(doc);
+}
+
+static void
 create_tree(test_batch_runner *runner)
 {
 	char *html;
@@ -630,6 +670,7 @@ int main() {
 	accessors(runner);
 	node_check(runner);
 	iterator(runner);
+	iterator_delete(runner);
 	create_tree(runner);
 	hierarchy(runner);
 	parser(runner);
diff --git a/src/cmark.h b/src/cmark.h
@@ -83,6 +83,7 @@ typedef struct cmark_parser cmark_parser;
 typedef struct cmark_iter cmark_iter;
 
 typedef enum {
+	CMARK_EVENT_NONE,
 	CMARK_EVENT_DONE,
 	CMARK_EVENT_ENTER,
 	CMARK_EVENT_EXIT
@@ -164,13 +165,24 @@ cmark_node_last_child(cmark_node *node);
  *         cmark_iter_free(iter);
  *     }
  *
- * Note that if you delete the current node, its first child, or its
- * next sibling, the iterator may point to a nonexistent note.
- * Use 'cmark_iter_reset' to set its pointer to the next node that
- * should be traversed.
+ * Iterators will never return `EXIT` events for leaf nodes, which are nodes
+ * of type:
+ *
+ * * CMARK_NODE_HTML
+ * * CMARK_NODE_HRULE
+ * * CMARK_NODE_CODE_BLOCK
+ * * CMARK_NODE_TEXT
+ * * CMARK_NODE_SOFTBREAK
+ * * CMARK_NODE_LINEBREAK
+ * * CMARK_NODE_CODE
+ * * CMARK_NODE_INLINE_HTML
+ *
+ * Nodes must only be modified after an `EXIT` event, or an `ENTER` event for
+ * leaf nodes.
  */
 
-/** Creates a new iterator starting at 'root'.
+/** Creates a new iterator starting at 'root'.  The current node and event
+ * type are undefined until `cmark_iter_next` is called for the first time.
  */
 CMARK_EXPORT
 cmark_iter*
@@ -182,28 +194,34 @@ CMARK_EXPORT
 void
 cmark_iter_free(cmark_iter *iter);
 
-/** Resets the iterator so that the current node is 'current' and
-    the event type is 'event_type'.  Use this to resume after destructively
-    modifying the tree structure.
- */
-CMARK_EXPORT
-void
-cmark_iter_reset(cmark_iter *iter, cmark_node *current,
-                 cmark_event_type event_type);
-
-/** Returns the event type (`CMARK_EVENT_ENTER`, `CMARK_EVENT_EXIT`,
- * or `CMARK_EVENT_DONE`) for the next node.
+/** Advances to the next node and returns the event type (`CMARK_EVENT_ENTER`,
+ * `CMARK_EVENT_EXIT` or `CMARK_EVENT_DONE`).
  */
 CMARK_EXPORT
 cmark_event_type
 cmark_iter_next(cmark_iter *iter);
 
-/** Returns the next node in the sequence described above.
+/** Returns the current node.
  */
 CMARK_EXPORT
 cmark_node*
 cmark_iter_get_node(cmark_iter *iter);
 
+/** Returns the current event type.
+ */
+CMARK_EXPORT
+cmark_event_type
+cmark_iter_get_event_type(cmark_iter *iter);
+
+/** Resets the iterator so that the current node is 'current' and
+ * the event type is 'event_type'.  The new current node must be a
+ * descendant of the root node or the root node itself.
+ */
+CMARK_EXPORT
+void
+cmark_iter_reset(cmark_iter *iter, cmark_node *current,
+                 cmark_event_type event_type);
+
 /**
  * ## Accessors
  */
diff --git a/src/iterator.c b/src/iterator.c
@@ -1,3 +1,4 @@
+#include <assert.h>
 #include <stdlib.h>
 
 #include "config.h"
@@ -5,18 +6,32 @@
 #include "cmark.h"
 #include "iterator.h"
 
+static const int S_leaf_mask =
+	(1 << CMARK_NODE_HTML)        |
+	(1 << CMARK_NODE_HRULE)       |
+	(1 << CMARK_NODE_CODE_BLOCK)  |
+	(1 << CMARK_NODE_TEXT)        |
+	(1 << CMARK_NODE_SOFTBREAK)   |
+	(1 << CMARK_NODE_LINEBREAK)   |
+	(1 << CMARK_NODE_CODE)        |
+	(1 << CMARK_NODE_INLINE_HTML);
+
 cmark_iter*
 cmark_iter_new(cmark_node *root)
 {
+	if (root == NULL) {
+		return NULL;
+	}
 	cmark_iter *iter = (cmark_iter*)malloc(sizeof(cmark_iter));
 	if (iter == NULL) {
 		return NULL;
-	} else {
-		iter->root = root;
-		iter->current = root;
-		iter->event_type = CMARK_EVENT_ENTER;
-		return iter;
 	}
+	iter->root         = root;
+	iter->cur.ev_type  = CMARK_EVENT_NONE;
+	iter->cur.node     = NULL;
+	iter->next.ev_type = CMARK_EVENT_ENTER;
+	iter->next.node    = root;
+	return iter;
 }
 
 void
@@ -25,72 +40,72 @@ cmark_iter_free(cmark_iter *iter)
 	free(iter);
 }
 
-cmark_event_type
-cmark_iter_next(cmark_iter *iter)
+static bool
+S_is_leaf(cmark_node *node)
 {
-	return iter->event_type;
+	return (1 << node->type) & S_leaf_mask;
 }
 
-int S_is_leaf(cmark_node *node)
+cmark_event_type
+cmark_iter_next(cmark_iter *iter)
 {
-	switch (cmark_node_get_type(node)) {
-	case CMARK_NODE_HTML:
-	case CMARK_NODE_HRULE:
-	case CMARK_NODE_CODE_BLOCK:
-	case CMARK_NODE_TEXT:
-	case CMARK_NODE_SOFTBREAK:
-	case CMARK_NODE_LINEBREAK:
-	case CMARK_NODE_CODE:
-	case CMARK_NODE_INLINE_HTML:
-		return 1;
-	default:
-		return 0;
+	cmark_event_type  ev_type = iter->next.ev_type;
+	cmark_node       *node    = iter->next.node;
+
+	iter->cur.ev_type = ev_type;
+	iter->cur.node    = node;
+
+	if (ev_type == CMARK_EVENT_DONE) {
+		return ev_type;
 	}
+
+	/* roll forward to next item, setting both fields */
+	if (ev_type == CMARK_EVENT_ENTER && !S_is_leaf(node)) {
+		if (node->first_child == NULL) {
+			/* stay on this node but exit */
+			iter->next.ev_type = CMARK_EVENT_EXIT;
+		} else {
+			iter->next.ev_type = CMARK_EVENT_ENTER;
+			iter->next.node    = node->first_child;
+		}
+	} else if (node == iter->root) {
+		/* don't move past root */
+		iter->next.ev_type = CMARK_EVENT_DONE;
+		iter->next.node    = NULL;
+	} else if (node->next) {
+		iter->next.ev_type = CMARK_EVENT_ENTER;
+		iter->next.node    = node->next;
+	} else if (node->parent) {
+		iter->next.ev_type = CMARK_EVENT_EXIT;
+		iter->next.node    = node->parent;
+	} else {
+		assert(false);
+		iter->next.ev_type = CMARK_EVENT_DONE;
+		iter->next.node    = NULL;
+	}
+
+	return ev_type;
 }
 
 void
 cmark_iter_reset(cmark_iter *iter, cmark_node *current,
                  cmark_event_type event_type)
 {
-	iter->event_type = event_type;
-	iter->current = current;
+	iter->next.ev_type = event_type;
+	iter->next.node    = current;
+	cmark_iter_next(iter);
 }
 
 cmark_node*
 cmark_iter_get_node(cmark_iter *iter)
 {
-	/* we'll return current */
-	cmark_node *cur = iter->current;
-
-	if (cur == NULL || iter->event_type == CMARK_EVENT_DONE) {
-		return NULL;
-	}
-
-	/* roll forward to next item, setting both fields */
-	if (iter->event_type == CMARK_EVENT_ENTER && !S_is_leaf(cur)) {
-		if (cur->first_child == NULL) {
-			/* stay on this node but exit */
-			iter->event_type = CMARK_EVENT_EXIT;
-		} else {
-			iter->current = cur->first_child;
-			iter->event_type = CMARK_EVENT_ENTER;
-		}
-	} else if (cur == iter->root) {
-		/* don't move past root */
-		iter->event_type = CMARK_EVENT_DONE;
-		iter->current = NULL;
-	} else if (cur->next) {
-		iter->event_type = CMARK_EVENT_ENTER;
-		iter->current = cur->next;
-	} else if (cur->parent) {
-		iter->event_type = CMARK_EVENT_EXIT;
-		iter->current = cur->parent;
-	} else {
-		iter->event_type = CMARK_EVENT_DONE;
-		iter->current = NULL;
-	}
+	return iter->cur.node;
+}
 
-	return cur;
+cmark_event_type
+cmark_iter_get_event_type(cmark_iter *iter)
+{
+	return iter->cur.ev_type;
 }
 
 
diff --git a/src/iterator.h b/src/iterator.h
@@ -6,12 +6,16 @@ extern "C" {
 #endif
 
 #include "cmark.h"
-#include "node.h"
+
+typedef struct {
+	cmark_event_type  ev_type;
+	cmark_node       *node;
+} cmark_iter_state;
 
 struct cmark_iter {
-	cmark_node       *current;
 	cmark_node       *root;
-	cmark_event_type event_type;
+	cmark_iter_state  cur;
+	cmark_iter_state  next;
 };
 
 #ifdef __cplusplus