cmark

My personal build of CMark ✏️

Commit
376f81ab8aa017ab01040e10d393d7682674562d
Parent
982ba5a528111dbf647e90f412498e315fcf432c
Author
John MacFarlane <jgm@berkeley.edu>
Date

Added options parameter to cmark_parse_document, cmark_parse_file.

Also to some non-exported functions in blocks and inlines.

Diffstat

9 files changed, 51 insertions, 27 deletions

Status File Name N° Changes Insertions Deletions
Modified api_test/main.c 10 6 4
Modified man/man3/cmark.3 6 3 3
Modified src/blocks.c 17 9 8
Modified src/cmark.c 2 1 1
Modified src/cmark.h 4 2 2
Modified src/inlines.c 34 27 7
Modified src/inlines.h 2 1 1
Modified src/main.c 2 1 1
Modified src/parser.h 1 1 0
diff --git a/api_test/main.c b/api_test/main.c
@@ -122,7 +122,7 @@ accessors(test_batch_runner *runner)
 		"\n"
 		"[link](url 'title')\n";
 
-	cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1);
+	cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT);
 
 	// Getters
 
@@ -308,7 +308,7 @@ node_check(test_batch_runner *runner) {
 
 static void
 iterator(test_batch_runner *runner) {
-	cmark_node *doc = cmark_parse_document("> a *b*\n\nc", 10);
+	cmark_node *doc = cmark_parse_document("> a *b*\n\nc", 10, CMARK_OPT_DEFAULT);
 	int parnodes = 0;
 	cmark_event_type ev_type;
 	cmark_iter *iter = cmark_iter_new(doc);
@@ -339,7 +339,8 @@ iterator_delete(test_batch_runner *runner) {
 		"\n"
 		"* item1\n"
 		"* item2\n";
-	cmark_node *doc  = cmark_parse_document(md, sizeof(md) - 1);
+	cmark_node *doc  = cmark_parse_document(md, sizeof(md) - 1,
+						CMARK_OPT_DEFAULT);
 	cmark_iter *iter = cmark_iter_new(doc);
 	cmark_event_type ev_type;
 
@@ -544,7 +545,8 @@ render_html(test_batch_runner *runner)
 		"foo *bar*\n"
 		"\n"
 		"paragraph 2\n";
-	cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1);
+	cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1,
+		CMARK_OPT_DEFAULT);
 
 	cmark_node *paragraph = cmark_node_first_child(doc);
 	html = cmark_render_html(paragraph, CMARK_OPT_DEFAULT);
diff --git a/man/man3/cmark.3 b/man/man3/cmark.3
@@ -1,4 +1,4 @@
-.TH cmark 3 "February 14, 2015" "LOCAL" "Library Functions Manual"
+.TH cmark 3 "February 15, 2015" "LOCAL" "Library Functions Manual"
 .SH
 NAME
 .PP
@@ -437,14 +437,14 @@ Feeds a string of length \f[I]len\f[] to \f[I]parser\f[]\&.
 Finish parsing and return a pointer to a tree of nodes.
 
 .PP
-\fIcmark_node *\f[] \fBcmark_parse_document\f[](\fIconst char *buffer\f[], \fIsize_t len\f[])
+\fIcmark_node *\f[] \fBcmark_parse_document\f[](\fIconst char *buffer\f[], \fIsize_t len\f[], \fIlong options\f[])
 
 .PP
 Parse a CommonMark document in \f[I]buffer\f[] of length \f[I]len\f[]\&.
 Returns a pointer to a tree of nodes.
 
 .PP
-\fIcmark_node *\f[] \fBcmark_parse_file\f[](\fIFILE *f\f[])
+\fIcmark_node *\f[] \fBcmark_parse_file\f[](\fIFILE *f\f[], \fIlong options\f[])
 
 .PP
 Parse a CommonMark document in file \f[I]f\f[], returning a pointer to
diff --git a/src/blocks.c b/src/blocks.c
@@ -50,7 +50,7 @@ static cmark_node* make_document()
 	return e;
 }
 
-cmark_parser *cmark_parser_new()
+cmark_parser *cmark_parser_new(long options)
 {
 	cmark_parser *parser = (cmark_parser*)malloc(sizeof(cmark_parser));
 	cmark_node *document = make_document();
@@ -66,6 +66,7 @@ cmark_parser *cmark_parser_new()
 	parser->curline = line;
 	parser->last_line_length = 0;
 	parser->linebuf = buf;
+	parser->options = options;
 
 	return parser;
 }
@@ -316,7 +317,7 @@ static cmark_node* add_child(cmark_parser *parser, cmark_node* parent,
 
 // Walk through cmark_node and all children, recursively, parsing
 // string content into inline content where appropriate.
-static void process_inlines(cmark_node* root, cmark_reference_map *refmap)
+static void process_inlines(cmark_node* root, cmark_reference_map *refmap, long options)
 {
 	cmark_iter *iter = cmark_iter_new(root);
 	cmark_node *cur;
@@ -327,7 +328,7 @@ static void process_inlines(cmark_node* root, cmark_reference_map *refmap)
 		if (ev_type == CMARK_EVENT_ENTER) {
 			if (cur->type == NODE_PARAGRAPH ||
 			    cur->type == NODE_HEADER) {
-				cmark_parse_inlines(cur, refmap);
+				cmark_parse_inlines(cur, refmap, options);
 			}
 		}
 	}
@@ -416,15 +417,15 @@ static cmark_node *finalize_document(cmark_parser *parser)
 	}
 
 	finalize(parser, parser->root);
-	process_inlines(parser->root, parser->refmap);
+	process_inlines(parser->root, parser->refmap, parser->options);
 
 	return parser->root;
 }
 
-cmark_node *cmark_parse_file(FILE *f)
+cmark_node *cmark_parse_file(FILE *f, long options)
 {
 	unsigned char buffer[4096];
-	cmark_parser *parser = cmark_parser_new();
+	cmark_parser *parser = cmark_parser_new(options);
 	size_t bytes;
 	cmark_node *document;
 
@@ -441,9 +442,9 @@ cmark_node *cmark_parse_file(FILE *f)
 	return document;
 }
 
-cmark_node *cmark_parse_document(const char *buffer, size_t len)
+cmark_node *cmark_parse_document(const char *buffer, size_t len, long options)
 {
-	cmark_parser *parser = cmark_parser_new();
+	cmark_parser *parser = cmark_parser_new(options);
 	cmark_node *document;
 
 	S_parser_feed(parser, (const unsigned char *)buffer, len, true);
diff --git a/src/cmark.c b/src/cmark.c
@@ -14,7 +14,7 @@ char *cmark_markdown_to_html(const char *text, int len)
 	cmark_node *doc;
 	char *result;
 
-	doc = cmark_parse_document(text, len);
+	doc = cmark_parse_document(text, len, CMARK_OPT_DEFAULT);
 
 	result = cmark_render_html(doc, CMARK_OPT_DEFAULT);
 	cmark_node_free(doc);
diff --git a/src/cmark.h b/src/cmark.h
@@ -452,13 +452,13 @@ cmark_node *cmark_parser_finish(cmark_parser *parser);
  * Returns a pointer to a tree of nodes.
  */
 CMARK_EXPORT
-cmark_node *cmark_parse_document(const char *buffer, size_t len);
+cmark_node *cmark_parse_document(const char *buffer, size_t len, long options);
 
 /** Parse a CommonMark document in file 'f', returning a pointer to
  * a tree of nodes.
  */
 CMARK_EXPORT
-cmark_node *cmark_parse_file(FILE *f);
+cmark_node *cmark_parse_file(FILE *f, long options);
 
 /**
  * ## Rendering
diff --git a/src/inlines.c b/src/inlines.c
@@ -44,11 +44,11 @@ typedef struct {
 static delimiter*
 S_insert_emph(subject *subj, delimiter *opener, delimiter *closer);
 
-static int parse_inline(subject* subj, cmark_node * parent);
+static int parse_inline(subject* subj, cmark_node * parent, long options);
 
 static void subject_from_buf(subject *e, cmark_strbuf *buffer,
                              cmark_reference_map *refmap);
-static int subject_find_special_char(subject *subj);
+static int subject_find_special_char(subject *subj, long options);
 
 static unsigned char *cmark_clean_autolink(cmark_chunk *url, int is_email)
 {
@@ -843,7 +843,7 @@ static cmark_node* handle_newline(subject *subj)
 	}
 }
 
-static int subject_find_special_char(subject *subj)
+static int subject_find_special_char(subject *subj, long options)
 {
 	// "\n\\`&_*[]<!"
 	static const int8_t SPECIAL_CHARS[256] = {
@@ -865,6 +865,26 @@ static int subject_find_special_char(subject *subj)
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 	};
 
+	// " ' . -
+	static const char SMART_PUNCT_TABLE[] = {
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	};
+
 	int n = subj->pos + 1;
 
 	while (n < subj->input.len) {
@@ -878,7 +898,7 @@ static int subject_find_special_char(subject *subj)
 
 // Parse an inline, advancing subject, and add it as a child of parent.
 // Return 0 if no inline can be parsed, 1 otherwise.
-static int parse_inline(subject* subj, cmark_node * parent)
+static int parse_inline(subject* subj, cmark_node * parent, long options)
 {
 	cmark_node* new_inl = NULL;
 	cmark_chunk contents;
@@ -927,7 +947,7 @@ static int parse_inline(subject* subj, cmark_node * parent)
 		}
 		break;
 	default:
-		endpos = subject_find_special_char(subj);
+		endpos = subject_find_special_char(subj, options);
 		contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
 		subj->pos = endpos;
 
@@ -946,12 +966,12 @@ static int parse_inline(subject* subj, cmark_node * parent)
 }
 
 // Parse inlines from parent's string_content, adding as children of parent.
-extern void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap)
+extern void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap, long options)
 {
 	subject subj;
 	subject_from_buf(&subj, &parent->string_content, refmap);
 
-	while (!is_eof(&subj) && parse_inline(&subj, parent)) ;
+	while (!is_eof(&subj) && parse_inline(&subj, parent, options)) ;
 
 	process_emphasis(&subj, NULL);
 }
diff --git a/src/inlines.h b/src/inlines.h
@@ -8,7 +8,7 @@ extern "C" {
 unsigned char *cmark_clean_url(cmark_chunk *url);
 unsigned char *cmark_clean_title(cmark_chunk *title);
 
-void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap);
+void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap, long options);
 
 int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap);
 
diff --git a/src/main.c b/src/main.c
@@ -69,7 +69,6 @@ int main(int argc, char *argv[])
 	_setmode(_fileno(stdout), _O_BINARY);
 #endif
 
-	parser = cmark_parser_new();
 	files = (int *)malloc(argc * sizeof(*files));
 
 	for (i = 1; i < argc; i++) {
@@ -117,6 +116,7 @@ int main(int argc, char *argv[])
 		}
 	}
 
+	parser = cmark_parser_new(options);
 	for (i = 0; i < numfps; i++) {
 		FILE *fp = fopen(argv[files[i]], "r");
 		if (fp == NULL) {
diff --git a/src/parser.h b/src/parser.h
@@ -19,6 +19,7 @@ struct cmark_parser {
 	cmark_strbuf *curline;
 	int last_line_length;
 	cmark_strbuf *linebuf;
+	long options;
 };
 
 #ifdef __cplusplus