cmark

My personal build of CMark ✏️

Commit: 8f523b0149f250f733d80357320f92ffbe2a8b8f
Parent: 694fa11266741aa061477aaca627e0445ba20723
Author: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 14 Nov 2014 23:07:28 -0800

Expose lower-level parsing API.

The new functions cmark_new_doc_parser, cmark_free_doc_parser, cmark_process_line, and cmark_finish allow you to feed lines one by one (possibly from several files) to the parser and call finish when you're done.

This is now used in main for mulitple files.

Diffstat

5 files changed, 262 insertions, 184 deletions

Status	File Name	N° Changes	Insertions	Deletions
Modified	src/blocks.c	337	181	156
Modified	src/buffer.c	2	1	1
Modified	src/buffer.h	2	2	0
Modified	src/cmark.h	35	32	3
Modified	src/main.c	70	46	24

diff --git a/src/blocks.c b/src/blocks.c
@@ -10,13 +10,10 @@
 #include "inlines.h"
 #include "html/houdini.h"
 #include "buffer.h"
-#include "bench.h"
+#include "debug.h"
 
 #define peek_at(i, n) (i)->data[n]
 
-static void incorporate_line(strbuf *ln, int line_number, node_block** curptr);
-static void finalize(node_block* b, int line_number);
-
 static node_block* make_block(int tag, int start_line, int start_column)
 {
 	node_block* e;
@@ -44,18 +41,42 @@ static node_block* make_document()
 	return e;
 }
 
+cmark_doc_parser *cmark_new_doc_parser()
+{
+	cmark_doc_parser *parser = (cmark_doc_parser*)malloc(sizeof(cmark_doc_parser));
+	node_block *document = make_document();
+	strbuf *line = (strbuf*)malloc(sizeof(strbuf));
+	cmark_strbuf_init(line, 256);
+
+	parser->head = document;
+	parser->current = document;
+	parser->line_number = 0;
+	parser->curline = line;
+
+	return parser;
+}
+
+void cmark_free_doc_parser(cmark_doc_parser *parser)
+{
+	cmark_strbuf_free(parser->curline);
+	free(parser->curline);
+	free(parser);
+}
+
+static void finalize(node_block* b, int line_number);
+
 // Returns true if line has only space characters, else false.
 static bool is_blank(strbuf *s, int offset)
 {
 	while (offset < s->size) {
 		switch (s->ptr[offset]) {
-		case '\n':
-			return true;
-		case ' ':
-			offset++;
-			break;
-		default:
-			return false;
+			case '\n':
+				return true;
+			case ' ':
+				offset++;
+				break;
+			default:
+				return false;
 		}
 	}
 
@@ -65,17 +86,17 @@ static bool is_blank(strbuf *s, int offset)
 static inline bool can_contain(int parent_type, int child_type)
 {
 	return ( parent_type == BLOCK_DOCUMENT ||
-		 parent_type == BLOCK_BQUOTE ||
-		 parent_type == BLOCK_LIST_ITEM ||
-		 (parent_type == BLOCK_LIST && child_type == BLOCK_LIST_ITEM) );
+			parent_type == BLOCK_BQUOTE ||
+			parent_type == BLOCK_LIST_ITEM ||
+			(parent_type == BLOCK_LIST && child_type == BLOCK_LIST_ITEM) );
 }
 
 static inline bool accepts_lines(int block_type)
 {
 	return (block_type == BLOCK_PARAGRAPH ||
-		block_type == BLOCK_ATX_HEADER ||
-		block_type == BLOCK_INDENTED_CODE ||
-		block_type == BLOCK_FENCED_CODE);
+			block_type == BLOCK_ATX_HEADER ||
+			block_type == BLOCK_INDENTED_CODE ||
+			block_type == BLOCK_FENCED_CODE);
 }
 
 static void add_line(node_block* node_block, chunk *ch, int offset)
@@ -158,77 +179,77 @@ static void finalize(node_block* b, int line_number)
 	}
 
 	switch (b->tag) {
-	case BLOCK_PARAGRAPH:
-		pos = 0;
-		while (strbuf_at(&b->string_content, 0) == '[' &&
-		       (pos = parse_reference_inline(&b->string_content, b->top->as.document.refmap))) {
+		case BLOCK_PARAGRAPH:
+			pos = 0;
+			while (strbuf_at(&b->string_content, 0) == '[' &&
+					(pos = parse_reference_inline(&b->string_content, b->top->as.document.refmap))) {
 
-			strbuf_drop(&b->string_content, pos);
-		}
-		if (is_blank(&b->string_content, 0)) {
-			b->tag = BLOCK_REFERENCE_DEF;
-		}
-		break;
-
-	case BLOCK_INDENTED_CODE:
-		remove_trailing_blank_lines(&b->string_content);
-		strbuf_putc(&b->string_content, '\n');
-		break;
-
-	case BLOCK_FENCED_CODE:
-		// first line of contents becomes info
-		firstlinelen = strbuf_strchr(&b->string_content, '\n', 0);
-
-		strbuf_init(&b->as.code.info, 0);
-		houdini_unescape_html_f(
-			&b->as.code.info,
-			b->string_content.ptr,
-			firstlinelen
-			);
-
-		strbuf_drop(&b->string_content, firstlinelen + 1);
-
-		strbuf_trim(&b->as.code.info);
-		strbuf_unescape(&b->as.code.info);
-		break;
-
-	case BLOCK_LIST: // determine tight/loose status
-		b->as.list.tight = true; // tight by default
-		item = b->children;
-
-		while (item) {
-			// check for non-final non-empty list item ending with blank line:
-			if (item->last_line_blank && item->next) {
-				b->as.list.tight = false;
-				break;
+				strbuf_drop(&b->string_content, pos);
 			}
-			// recurse into children of list item, to see if there are
-			// spaces between them:
-			subitem = item->children;
-			while (subitem) {
-				if (ends_with_blank_line(subitem) &&
-				    (item->next || subitem->next)) {
+			if (is_blank(&b->string_content, 0)) {
+				b->tag = BLOCK_REFERENCE_DEF;
+			}
+			break;
+
+		case BLOCK_INDENTED_CODE:
+			remove_trailing_blank_lines(&b->string_content);
+			strbuf_putc(&b->string_content, '\n');
+			break;
+
+		case BLOCK_FENCED_CODE:
+			// first line of contents becomes info
+			firstlinelen = strbuf_strchr(&b->string_content, '\n', 0);
+
+			strbuf_init(&b->as.code.info, 0);
+			houdini_unescape_html_f(
+					&b->as.code.info,
+					b->string_content.ptr,
+					firstlinelen
+					);
+
+			strbuf_drop(&b->string_content, firstlinelen + 1);
+
+			strbuf_trim(&b->as.code.info);
+			strbuf_unescape(&b->as.code.info);
+			break;
+
+		case BLOCK_LIST: // determine tight/loose status
+			b->as.list.tight = true; // tight by default
+			item = b->children;
+
+			while (item) {
+				// check for non-final non-empty list item ending with blank line:
+				if (item->last_line_blank && item->next) {
 					b->as.list.tight = false;
 					break;
 				}
-				subitem = subitem->next;
-			}
-			if (!(b->as.list.tight)) {
-				break;
+				// recurse into children of list item, to see if there are
+				// spaces between them:
+				subitem = item->children;
+				while (subitem) {
+					if (ends_with_blank_line(subitem) &&
+							(item->next || subitem->next)) {
+						b->as.list.tight = false;
+						break;
+					}
+					subitem = subitem->next;
+				}
+				if (!(b->as.list.tight)) {
+					break;
+				}
+				item = item->next;
 			}
-			item = item->next;
-		}
 
-		break;
+			break;
 
-	default:
-		break;
+		default:
+			break;
 	}
 }
 
 // Add a node_block as child of another.  Return pointer to child.
 static node_block* add_child(node_block* parent,
-			     int block_type, int start_line, int start_column)
+		int block_type, int start_line, int start_column)
 {
 	assert(parent);
 
@@ -269,14 +290,14 @@ static void process_inlines(node_block* cur, reference_map *refmap)
 
 	while (cur != NULL) {
 		switch (cur->tag) {
-		case BLOCK_PARAGRAPH:
-		case BLOCK_ATX_HEADER:
-		case BLOCK_SETEXT_HEADER:
-			cur->inline_content = parse_inlines(&cur->string_content, refmap);
-			break;
+			case BLOCK_PARAGRAPH:
+			case BLOCK_ATX_HEADER:
+			case BLOCK_SETEXT_HEADER:
+				cur->inline_content = parse_inlines(&cur->string_content, refmap);
+				break;
 
-		default:
-			break;
+			default:
+				break;
 		}
 
 		if (cur->children) {
@@ -373,14 +394,13 @@ static int parse_list_marker(chunk *input, int pos, struct ListData ** dataptr)
 static int lists_match(struct ListData *list_data, struct ListData *item_data)
 {
 	return (list_data->list_type == item_data->list_type &&
-		list_data->delimiter == item_data->delimiter &&
-		// list_data->marker_offset == item_data.marker_offset &&
-		list_data->bullet_char == item_data->bullet_char);
+			list_data->delimiter == item_data->delimiter &&
+			// list_data->marker_offset == item_data.marker_offset &&
+			list_data->bullet_char == item_data->bullet_char);
 }
 
 static node_block *finalize_document(node_block *document, int linenum)
 {
-	start_timer();
 	while (document != document->top) {
 		finalize(document, linenum);
 		document = document->parent;
@@ -388,56 +408,46 @@ static node_block *finalize_document(node_block *document, int linenum)
 
 	finalize(document, linenum);
 	process_inlines(document, document->as.document.refmap);
-	end_timer("finalize_document");
 
 	return document;
 }
 
 extern node_block *cmark_parse_file(FILE *f)
 {
-	strbuf line = GH_BUF_INIT;
 	unsigned char buffer[4096];
-	int linenum = 1;
-	node_block *document = make_document();
+	cmark_doc_parser *parser = cmark_new_doc_parser();
+	size_t offset;
+	node_block *document;
 
-	start_timer();
 	while (fgets((char *)buffer, sizeof(buffer), f)) {
-		utf8proc_detab(&line, buffer, strlen((char *)buffer));
-		incorporate_line(&line, linenum, &document);
-		strbuf_clear(&line);
-		linenum++;
+		offset = strlen((char *)buffer);
+		cmark_process_line(parser, buffer, offset);
 	}
-	end_timer("incorporate_line(s)");
 
-	strbuf_free(&line);
-	return finalize_document(document, linenum);
+	document = cmark_finish(parser);
+	cmark_free_doc_parser(parser);
+	return document;
 }
 
 extern node_block *cmark_parse_document(const unsigned char *buffer, size_t len)
 {
-	strbuf line = GH_BUF_INIT;
 	int linenum = 1;
 	const unsigned char *end = buffer + len;
-	node_block *document = make_document();
+	size_t offset;
+	cmark_doc_parser *parser = cmark_new_doc_parser();
+	node_block *document;
 
 	while (buffer < end) {
 		const unsigned char *eol = memchr(buffer, '\n', end - buffer);
-
-		if (!eol) {
-			utf8proc_detab(&line, buffer, end - buffer);
-			buffer = end;
-		} else {
-			utf8proc_detab(&line, buffer, (eol - buffer) + 1);
-			buffer += (eol - buffer) + 1;
-		}
-
-		incorporate_line(&line, linenum, &document);
-		strbuf_clear(&line);
+		offset = eol ? (eol - buffer) + 1 : eol - buffer;
+		cmark_process_line(parser, buffer, offset);
+		buffer += offset;
 		linenum++;
 	}
 
-	strbuf_free(&line);
-	return finalize_document(document, linenum);
+	document = cmark_finish(parser);
+	cmark_free_doc_parser(parser);
+	return document;
 }
 
 static void chop_trailing_hashtags(chunk *ch)
@@ -458,8 +468,8 @@ static void chop_trailing_hashtags(chunk *ch)
 	}
 }
 
-// Process one line at a time, modifying a node_block.
-static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
+void cmark_process_line(cmark_doc_parser *parser, const unsigned char *buffer,
+		 size_t bytes)
 {
 	node_block* last_matched_container;
 	int offset = 0;
@@ -469,22 +479,27 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
 	struct ListData * data = NULL;
 	bool all_matched = true;
 	node_block* container;
-	node_block* cur = *curptr;
+	node_block* cur = parser->current;
 	bool blank = false;
 	int first_nonspace;
 	int indent;
 	chunk input;
 
+	utf8proc_detab(parser->curline, buffer, bytes);
+
 	// Add a newline to the end if not present:
-	if (line->ptr[line->size - 1] != '\n') {
-		strbuf_putc(line, '\n');
+	// TODO this breaks abstraction:
+	if (parser->curline->ptr[parser->curline->size - 1] != '\n') {
+		strbuf_putc(parser->curline, '\n');
 	}
-	input.data = line->ptr;
-	input.len = line->size;
+	input.data = parser->curline->ptr;
+	input.len = parser->curline->size;
 
 	// container starts at the document root.
 	container = cur->top;
 
+	parser->line_number++;
+
 	// for each containing node_block, try to parse the associated line start.
 	// bail out on failure:  container will point to the last matching node_block.
 
@@ -512,7 +527,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
 		} else if (container->tag == BLOCK_LIST_ITEM) {
 
 			if (indent >= container->as.list.marker_offset +
-			    container->as.list.padding) {
+					container->as.list.padding) {
 				offset += container->as.list.marker_offset +
 					container->as.list.padding;
 			} else if (blank) {
@@ -532,7 +547,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
 			}
 
 		} else if (container->tag == BLOCK_ATX_HEADER ||
-			   container->tag == BLOCK_SETEXT_HEADER) {
+				container->tag == BLOCK_SETEXT_HEADER) {
 
 			// a header can never contain more than one line
 			all_matched = false;
@@ -571,12 +586,12 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
 
 	// check to see if we've hit 2nd blank line, break out of list:
 	if (blank && container->last_line_blank) {
-		break_out_of_lists(&container, line_number);
+		break_out_of_lists(&container, parser->line_number);
 	}
 
 	// unless last matched container is code node_block, try new container starts:
 	while (container->tag != BLOCK_FENCED_CODE && container->tag != BLOCK_INDENTED_CODE &&
-	       container->tag != BLOCK_HTML) {
+			container->tag != BLOCK_HTML) {
 
 		first_nonspace = offset;
 		while (peek_at(&input, first_nonspace) == ' ')
@@ -588,7 +603,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
 		if (indent >= CODE_INDENT) {
 			if (cur->tag != BLOCK_PARAGRAPH && !blank) {
 				offset += CODE_INDENT;
-				container = add_child(container, BLOCK_INDENTED_CODE, line_number, offset + 1);
+				container = add_child(container, BLOCK_INDENTED_CODE, parser->line_number, offset + 1);
 			} else { // indent > 4 in lazy line
 				break;
 			}
@@ -599,12 +614,12 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
 			// optional following character
 			if (peek_at(&input, offset) == ' ')
 				offset++;
-			container = add_child(container, BLOCK_BQUOTE, line_number, offset + 1);
+			container = add_child(container, BLOCK_BQUOTE, parser->line_number, offset + 1);
 
 		} else if ((matched = scan_atx_header_start(&input, first_nonspace))) {
 
 			offset = first_nonspace + matched;
-			container = add_child(container, BLOCK_ATX_HEADER, line_number, offset + 1);
+			container = add_child(container, BLOCK_ATX_HEADER, parser->line_number, offset + 1);
 
 			int hashpos = chunk_strchr(&input, '#', first_nonspace);
 			int level = 0;
@@ -617,7 +632,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
 
 		} else if ((matched = scan_open_code_fence(&input, first_nonspace))) {
 
-			container = add_child(container, BLOCK_FENCED_CODE, line_number, first_nonspace + 1);
+			container = add_child(container, BLOCK_FENCED_CODE, parser->line_number, first_nonspace + 1);
 			container->as.code.fence_char = peek_at(&input, first_nonspace);
 			container->as.code.fence_length = matched;
 			container->as.code.fence_offset = first_nonspace - offset;
@@ -625,25 +640,25 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
 
 		} else if ((matched = scan_html_block_tag(&input, first_nonspace))) {
 
-			container = add_child(container, BLOCK_HTML, line_number, first_nonspace + 1);
+			container = add_child(container, BLOCK_HTML, parser->line_number, first_nonspace + 1);
 			// note, we don't adjust offset because the tag is part of the text
 
 		} else if (container->tag == BLOCK_PARAGRAPH &&
-			   (lev = scan_setext_header_line(&input, first_nonspace)) &&
-			   // check that there is only one line in the paragraph:
-			   strbuf_strrchr(&container->string_content, '\n',
-					  strbuf_len(&container->string_content) - 2) < 0) {
+				(lev = scan_setext_header_line(&input, first_nonspace)) &&
+				// check that there is only one line in the paragraph:
+				strbuf_strrchr(&container->string_content, '\n',
+					strbuf_len(&container->string_content) - 2) < 0) {
 
 			container->tag = BLOCK_SETEXT_HEADER;
 			container->as.header.level = lev;
 			offset = input.len - 1;
 
 		} else if (!(container->tag == BLOCK_PARAGRAPH && !all_matched) &&
-			   (matched = scan_hrule(&input, first_nonspace))) {
+				(matched = scan_hrule(&input, first_nonspace))) {
 
 			// it's only now that we know the line is not part of a setext header:
-			container = add_child(container, BLOCK_HRULE, line_number, first_nonspace + 1);
-			finalize(container, line_number);
+			container = add_child(container, BLOCK_HRULE, parser->line_number, first_nonspace + 1);
+			finalize(container, parser->line_number);
 			container = container->parent;
 			offset = input.len - 1;
 
@@ -672,16 +687,16 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
 			data->marker_offset = indent;
 
 			if (container->tag != BLOCK_LIST ||
-			    !lists_match(&container->as.list, data)) {
-				container = add_child(container, BLOCK_LIST, line_number,
-						      first_nonspace + 1);
+					!lists_match(&container->as.list, data)) {
+				container = add_child(container, BLOCK_LIST, parser->line_number,
+						first_nonspace + 1);
 
 				memcpy(&container->as.list, data, sizeof(*data));
 			}
 
 			// add the list item
-			container = add_child(container, BLOCK_LIST_ITEM, line_number,
-					      first_nonspace + 1);
+			container = add_child(container, BLOCK_LIST_ITEM, parser->line_number,
+					first_nonspace + 1);
 			/* TODO: static */
 			memcpy(&container->as.list, data, sizeof(*data));
 			free(data);
@@ -710,11 +725,11 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
 	// lists or breaking out of lists.  we also don't set last_line_blank
 	// on an empty list item.
 	container->last_line_blank = (blank &&
-				      container->tag != BLOCK_BQUOTE &&
-				      container->tag != BLOCK_FENCED_CODE &&
-				      !(container->tag == BLOCK_LIST_ITEM &&
-					container->children == NULL &&
-					container->start_line == line_number));
+			container->tag != BLOCK_BQUOTE &&
+			container->tag != BLOCK_FENCED_CODE &&
+			!(container->tag == BLOCK_LIST_ITEM &&
+				container->children == NULL &&
+				container->start_line == parser->line_number));
 
 	node_block *cont = container;
 	while (cont->parent) {
@@ -723,10 +738,10 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
 	}
 
 	if (cur != last_matched_container &&
-	    container == last_matched_container &&
-	    !blank &&
-	    cur->tag == BLOCK_PARAGRAPH &&
-	    strbuf_len(&cur->string_content) > 0) {
+			container == last_matched_container &&
+			!blank &&
+			cur->tag == BLOCK_PARAGRAPH &&
+			strbuf_len(&cur->string_content) > 0) {
 
 		add_line(cur, &input, offset);
 
@@ -734,7 +749,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
 
 		// finalize any blocks that were not matched and set cur to container:
 		while (cur != last_matched_container) {
-			finalize(cur, line_number);
+			finalize(cur, parser->line_number);
 			cur = cur->parent;
 			assert(cur != NULL);
 		}
@@ -747,7 +762,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
 			matched = 0;
 
 			if (indent <= 3 &&
-			    peek_at(&input, first_nonspace) == container->as.code.fence_char) {
+					peek_at(&input, first_nonspace) == container->as.code.fence_char) {
 				int fence_len = scan_close_code_fence(&input, first_nonspace);
 				if (fence_len > container->as.code.fence_length)
 					matched = 1;
@@ -755,7 +770,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
 
 			if (matched) {
 				// if closing fence, don't add line to container; instead, close it:
-				finalize(container, line_number);
+				finalize(container, parser->line_number);
 				container = container->parent; // back up to parent
 			} else {
 				add_line(container, &input, offset);
@@ -773,7 +788,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
 
 			chop_trailing_hashtags(&input);
 			add_line(container, &input, first_nonspace);
-			finalize(container, line_number);
+			finalize(container, parser->line_number);
 			container = container->parent;
 
 		} else if (accepts_lines(container->tag)) {
@@ -783,13 +798,23 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
 		} else if (container->tag != BLOCK_HRULE && container->tag != BLOCK_SETEXT_HEADER) {
 
 			// create paragraph container for line
-			container = add_child(container, BLOCK_PARAGRAPH, line_number, first_nonspace + 1);
+			container = add_child(container, BLOCK_PARAGRAPH, parser->line_number, first_nonspace + 1);
 			add_line(container, &input, first_nonspace);
 
 		} else {
 			assert(false);
 		}
 
-		*curptr = container;
+		parser->current = container;
 	}
+	strbuf_clear(parser->curline);
+
 }
+
+node_block *cmark_finish(cmark_doc_parser *parser)
+{
+	finalize_document(parser->current, parser->line_number);
+	strbuf_free(parser->curline);
+	return parser->head;
+}
+

diff --git a/src/buffer.c b/src/buffer.c
@@ -205,7 +205,7 @@ int cmark_strbuf_printf(strbuf *buf, const char *format, ...)
 	return r;
 }
 
-static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf)
+inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf)
 {
 	return (char *)buf->ptr;
 }

diff --git a/src/buffer.h b/src/buffer.h
@@ -81,6 +81,8 @@ void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, int asize);
 CMARK_EXPORT
 unsigned char *cmark_strbuf_detach(cmark_strbuf *buf);
 CMARK_EXPORT
+inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf);
+CMARK_EXPORT
 void cmark_strbuf_copy_cstr(char *data, int datasize, const cmark_strbuf *buf);
 
 #define cmark_strbuf_at(buf, n) ((buf)->ptr[n])

diff --git a/src/cmark.h b/src/cmark.h
@@ -111,6 +111,15 @@ struct cmark_node_block {
 
 typedef struct cmark_node_block cmark_node_block;
 
+struct cmark_doc_parser {
+	cmark_node_block* head;
+	cmark_node_block* current;
+	int line_number;
+	cmark_strbuf *curline;
+};
+
+typedef struct cmark_doc_parser cmark_doc_parser;
+
 CMARK_EXPORT
 void cmark_free_blocks(cmark_node_block *e);
 
@@ -148,6 +157,21 @@ cmark_node_inl* cmark_make_simple(int t);
 #define cmark_make_strong(contents) cmark_make_inlines(INL_STRONG, contents)
 
 CMARK_EXPORT
+cmark_doc_parser *cmark_new_doc_parser();
+
+CMARK_EXPORT
+void cmark_free_doc_parser(cmark_doc_parser *parser);
+
+CMARK_EXPORT
+cmark_node_block *cmark_finish(cmark_doc_parser *parser);
+
+CMARK_EXPORT
+void cmark_process_line(cmark_doc_parser *parser, const unsigned char *buffer, size_t bytes);
+
+CMARK_EXPORT
+cmark_node_block *cmark_finish(cmark_doc_parser *parser);
+
+CMARK_EXPORT
 cmark_node_block *cmark_parse_document(const unsigned char *buffer, size_t len);
 
 CMARK_EXPORT
@@ -203,9 +227,14 @@ unsigned char *cmark_markdown_to_html(unsigned char *text, int len);
   #define make_softbreak            cmark_make_softbreak
   #define make_emph                 cmark_make_emph
   #define make_strong               cmark_make_strong
-  #define make_simple              cmark_make_simple
-  #define make_simple              cmark_make_simple
-  #define make_simple              cmark_make_simple
+  #define make_simple               cmark_make_simple
+  #define make_simple               cmark_make_simple
+  #define make_simple               cmark_make_simple
+  #define doc_parser                cmark_doc_parser
+  #define new_doc_parser            cmark_new_doc_parser
+  #define free_doc_parser           cmark_free_doc_parser
+  #define process_line              cmark_process_line
+  #define finish                    cmark_finish
 #endif
 
 #ifdef __cplusplus

diff --git a/src/main.c b/src/main.c
@@ -4,6 +4,7 @@
 #include <errno.h>
 #include "cmark.h"
 #include "buffer.h"
+#include "debug.h"
 #include "bench.h"
 
 void print_usage()
@@ -27,23 +28,17 @@ static void print_document(node_block *document, bool ast)
 	}
 }
 
-void parse_and_render(node_block *document, FILE *fp, bool ast)
-{
-	document = cmark_parse_file(fp);
-	start_timer();
-	print_document(document, ast);
-	end_timer("print_document");
-	start_timer();
-	cmark_free_blocks(document);
-	end_timer("free_blocks");
-}
-
 int main(int argc, char *argv[])
 {
 	int i, numfps = 0;
 	bool ast = false;
 	int files[argc];
-	node_block *document = NULL;
+	unsigned char buffer[4096];
+	cmark_doc_parser *parser;
+	size_t offset;
+	node_block *document;
+
+	parser = cmark_new_doc_parser();
 
 	for (i = 1; i < argc; i++) {
 		if (strcmp(argv[i], "--version") == 0) {
@@ -64,22 +59,49 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	if (numfps == 0) {
-		parse_and_render(document, stdin, ast);
-	} else {
-		for (i = 0; i < numfps; i++) {
-			FILE *fp = fopen(argv[files[i]], "r");
+	for (i = 0; i < numfps; i++) {
+		FILE *fp = fopen(argv[files[i]], "r");
+		if (fp == NULL) {
+			fprintf(stderr, "Error opening file %s: %s\n",
+				argv[files[i]], strerror(errno));
+			exit(1);
+		}
 
-			if (fp == NULL) {
-				fprintf(stderr, "Error opening file %s: %s\n",
-					argv[files[i]], strerror(errno));
-				exit(1);
-			}
+		start_timer();
+		while (fgets((char *)buffer, sizeof(buffer), fp)) {
+			offset = strlen((char *)buffer);
+			cmark_process_line(parser, buffer, offset);
+		}
+		end_timer("processing lines");
 
-			parse_and_render(document, fp, ast);
-			fclose(fp);
+		fclose(fp);
+	}
+
+	if (numfps == 0) {
+		/*
+		document = cmark_parse_file(stdin);
+		print_document(document, ast);
+		exit(0);
+		*/
+
+		while (fgets((char *)buffer, sizeof(buffer), stdin)) {
+			offset = strlen((char *)buffer);
+			cmark_process_line(parser, buffer, offset);
 		}
 	}
 
+	start_timer();
+	document = cmark_finish(parser);
+	end_timer("finishing document");
+	cmark_free_doc_parser(parser);
+
+	start_timer();
+	print_document(document, ast);
+	end_timer("print_document");
+
+	start_timer();
+	cmark_free_blocks(document);
+	end_timer("free_blocks");
+
 	return 0;
 }