cmark

My personal build of CMark ✏️

Commit
ee82af08672810bc03769b2fb5a5767627d30d36
Parent
657061c630cf493aa74ce4a09aebc869813b6916
Author
John MacFarlane <jgm@berkeley.edu>
Date

Merge branch 'master' of https://github.com/btrask/cmark into btrask-master

Conflicts: src/blocks.c

Diffstat

5 files changed, 337 insertions, 175 deletions

Status File Name N° Changes Insertions Deletions
Modified src/blocks.c 95 69 26
Modified src/inlines.c 13 7 6
Modified src/scanners.c 360 237 123
Modified src/scanners.re 26 13 13
Modified test/spec_tests.py 18 11 7
diff --git a/src/blocks.c b/src/blocks.c
@@ -89,6 +89,7 @@ static bool is_blank(cmark_strbuf *s, int offset)
 {
 	while (offset < s->size) {
 		switch (s->ptr[offset]) {
+		case '\r':
 		case '\n':
 			return true;
 		case ' ':
@@ -126,9 +127,10 @@ static void add_line(cmark_node* node, cmark_chunk *ch, int offset)
 static void remove_trailing_blank_lines(cmark_strbuf *ln)
 {
 	int i;
+	unsigned char c;
 
 	for (i = ln->size - 1; i >= 0; --i) {
-		unsigned char c = ln->ptr[i];
+		c = ln->ptr[i];
 
 		if (c != ' ' && c != '\t' && c != '\r' && c != '\n')
 			break;
@@ -139,9 +141,16 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln)
 		return;
 	}
 
-	i = cmark_strbuf_strchr(ln, '\n', i);
-	if (i >= 0)
+
+	for(; i < ln->size; ++i) {
+		c = ln->ptr[i];
+
+		if (c != '\r' && c != '\n')
+			continue;
+
 		cmark_strbuf_truncate(ln, i);
+		break;
+	}
 }
 
 // Check to see if a node ends with a blank line, descending
@@ -185,7 +194,6 @@ static int break_out_of_lists(cmark_parser *parser, cmark_node ** bptr)
 static cmark_node*
 finalize(cmark_parser *parser, cmark_node* b)
 {
-	int firstlinelen;
 	int pos;
 	cmark_node* item;
 	cmark_node* subitem;
@@ -204,9 +212,11 @@ finalize(cmark_parser *parser, cmark_node* b)
 	           (b->type == NODE_CODE_BLOCK && b->as.code.fenced) ||
 	           (b->type == NODE_HEADER && b->as.header.setext)) {
 		b->end_line = parser->line_number;
-		b->end_column = parser->curline->size -
-		                (parser->curline->ptr[parser->curline->size - 1] == '\n' ?
-		                 1 : 0);
+		b->end_column = parser->curline->size;
+		if (b->end_column && parser->curline->ptr[b->end_column - 1] == '\n')
+			b->end_column--;
+		if (b->end_column && parser->curline->ptr[b->end_column - 1] == '\r')
+			b->end_column--;
 	} else {
 		b->end_line = parser->line_number - 1;
 		b->end_column = parser->last_line_length;
@@ -232,19 +242,28 @@ finalize(cmark_parser *parser, cmark_node* b)
 		} else {
 
 			// first line of contents becomes info
-			firstlinelen = cmark_strbuf_strchr(&b->string_content, '\n', 0);
+			for (pos = 0; pos < b->string_content.size; ++pos) {
+				if (b->string_content.ptr[pos] == '\r' ||
+				    b->string_content.ptr[pos] == '\n')
+					break;
+			}
+			assert(pos < b->string_content.size);
 
 			cmark_strbuf tmp = GH_BUF_INIT;
 			houdini_unescape_html_f(
 			    &tmp,
 			    b->string_content.ptr,
-			    firstlinelen
+			    pos
 			);
 			cmark_strbuf_trim(&tmp);
 			cmark_strbuf_unescape(&tmp);
 			b->as.code.info = cmark_chunk_buf_detach(&tmp);
 
-			cmark_strbuf_drop(&b->string_content, firstlinelen + 1);
+			if (b->string_content.ptr[pos] == '\r')
+				pos += 1;
+			if (b->string_content.ptr[pos] == '\n')
+				pos += 1;
+			cmark_strbuf_drop(&b->string_content, pos);
 		}
 		b->as.code.literal = cmark_chunk_buf_detach(&b->string_content);
 		break;
@@ -467,13 +486,22 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len,
 	const unsigned char *end = buffer + len;
 
 	while (buffer < end) {
-		const unsigned char *eol
-		    = (const unsigned char *)memchr(buffer, '\n',
-		                                    end - buffer);
+		const unsigned char *eol;
 		size_t line_len;
 
+		for (eol = buffer; eol < end; ++eol) {
+			if (*eol == '\r' || *eol == '\n')
+				break;
+		}
+		if (eol >= end)
+			eol = NULL;
+
 		if (eol) {
-			line_len = eol + 1 - buffer;
+			if (eol < end && *eol == '\r')
+				eol++;
+			if (eol < end && *eol == '\n')
+				eol++;
+			line_len = eol - buffer;
 		} else if (eof) {
 			line_len = end - buffer;
 		} else {
@@ -534,9 +562,13 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 
 	// Add a newline to the end if not present:
 	// TODO this breaks abstraction:
-	if (parser->curline->ptr[parser->curline->size - 1] != '\n') {
-		cmark_strbuf_putc(parser->curline, '\n');
+	if (parser->curline->size && parser->curline->ptr[parser->curline->size - 1] == '\n') {
+		cmark_strbuf_truncate(parser->curline, parser->curline->size - 1);
+	}
+	if (parser->curline->size && parser->curline->ptr[parser->curline->size - 1] == '\r') {
+		cmark_strbuf_truncate(parser->curline, parser->curline->size - 1);
 	}
+	cmark_strbuf_putc(parser->curline, '\n');
 	input.data = parser->curline->ptr;
 	input.len = parser->curline->size;
 
@@ -557,7 +589,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 		}
 
 		indent = first_nonspace - offset;
-		blank = peek_at(&input, first_nonspace) == '\n';
+		blank = peek_at(&input, first_nonspace) == '\n' ||
+		        peek_at(&input, first_nonspace) == '\r';
 
 		if (container->type == NODE_BLOCK_QUOTE) {
 			matched = indent <= 3 && peek_at(&input, first_nonspace) == '>';
@@ -659,7 +692,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 
 		indent = first_nonspace - offset;
 		indented = indent >= CODE_INDENT;
-		blank = peek_at(&input, first_nonspace) == '\n';
+		blank = peek_at(&input, first_nonspace) == '\n' ||
+		        peek_at(&input, first_nonspace) == '\r';
 
 		if (indented && !maybe_lazy && !blank) {
 				offset += CODE_INDENT;
@@ -712,8 +746,10 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 			   container->type == NODE_PARAGRAPH &&
 		           (lev = scan_setext_header_line(&input, first_nonspace)) &&
 		           // check that there is only one line in the paragraph:
-		           cmark_strbuf_strrchr(&container->string_content, '\n',
-		                                cmark_strbuf_len(&container->string_content) - 2) < 0) {
+		           (cmark_strbuf_strrchr(&container->string_content, '\n',
+		                                 cmark_strbuf_len(&container->string_content) - 2) < 0 &&
+		           cmark_strbuf_strrchr(&container->string_content, '\r',
+		                                cmark_strbuf_len(&container->string_content) - 2) < 0)) {
 
 			container->type = NODE_HEADER;
 			container->as.header.level = lev;
@@ -739,7 +775,9 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 				i++;
 			}
 			// i = number of spaces after marker, up to 5
-			if (i >= 5 || i < 1 || peek_at(&input, offset) == '\n') {
+			if (i >= 5 || i < 1 ||
+			    peek_at(&input, offset) == '\n' ||
+			    peek_at(&input, offset) == '\r') {
 				data->padding = matched + 1;
 				if (i > 0) {
 					offset += 1;
@@ -787,7 +825,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 		first_nonspace++;
 
 	indent = first_nonspace - offset;
-	blank = peek_at(&input, first_nonspace) == '\n';
+	blank = peek_at(&input, first_nonspace) == '\n' ||
+	        peek_at(&input, first_nonspace) == '\r';
 
 	if (blank && container->last_child) {
 		container->last_child->last_line_blank = true;
@@ -855,10 +894,14 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 		parser->current = container;
 	}
 finished:
-	parser->last_line_length = parser->curline->size -
-	                           (parser->curline->ptr[parser->curline->size - 1] == '\n' ?
-	                            1 : 0);
-	;
+	parser->last_line_length = parser->curline->size;
+	if (parser->last_line_length &&
+	    parser->curline->ptr[parser->last_line_length - 1] == '\n')
+		parser->last_line_length--;
+	if (parser->last_line_length &&
+	    parser->curline->ptr[parser->last_line_length - 1] == '\r')
+		parser->last_line_length--;
+
 	cmark_strbuf_clear(parser->curline);
 
 }
diff --git a/src/inlines.c b/src/inlines.c
@@ -583,7 +583,7 @@ static cmark_node* handle_backslash(subject *subj)
 	if (cmark_ispunct(nextchar)) {  // only ascii symbols and newline can be escaped
 		advance(subj);
 		return make_str(cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
-	} else if (nextchar == '\n') {
+	} else if (nextchar == '\r' || nextchar == '\n') {
 		advance(subj);
 		return make_linebreak();
 	} else {
@@ -939,9 +939,9 @@ static cmark_node* handle_newline(subject *subj)
 
 static int subject_find_special_char(subject *subj, int options)
 {
-	// "\n\\`&_*[]<!"
+	// "\r\n\\`&_*[]<!"
 	static const int8_t SPECIAL_CHARS[256] = {
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 		0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
@@ -1006,6 +1006,7 @@ static int parse_inline(subject* subj, cmark_node * parent, int options)
 		return 0;
 	}
 	switch(c) {
+	case '\r':
 	case '\n':
 		new_inl = handle_newline(subj);
 		break;
@@ -1057,7 +1058,7 @@ static int parse_inline(subject* subj, cmark_node * parent, int options)
 		subj->pos = endpos;
 
 		// if we're at a newline, strip trailing spaces.
-		if (peek_char(subj) == '\n') {
+		if (peek_char(subj) == '\r' || peek_char(subj) == '\n') {
 			cmark_chunk_rtrim(&contents);
 		}
 
@@ -1087,7 +1088,7 @@ static void spnl(subject* subj)
 	bool seen_newline = false;
 	while (peek_char(subj) == ' ' ||
 	       (!seen_newline &&
-	        (seen_newline = peek_char(subj) == '\n'))) {
+	        (seen_newline = peek_char(subj) == '\r' || peek_char(subj) == '\n'))) {
 		advance(subj);
 	}
 }
@@ -1145,7 +1146,7 @@ int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refma
 	while (peek_char(&subj) == ' ') {
 		advance(&subj);
 	}
-	if (peek_char(&subj) == '\n') {
+	if (peek_char(&subj) == '\r' || peek_char(&subj) == '\n') {
 		advance(&subj);
 	} else if (peek_char(&subj) != 0) {
 		return 0;
diff --git a/src/scanners.c b/src/scanners.c
@@ -13253,7 +13253,7 @@ int _scan_link_url(const unsigned char *p)
 	unsigned int yyaccept = 0;
 	static const unsigned char yybm[] = {
 		  0,  64,  64,  64,  64,  64,  64,  64, 
-		 64,  64,   8,  64,  64,  64,  64,  64, 
+		 64,  64,   8,  64,  64,   8,  64,  64, 
 		 64,  64,  64,  64,  64,  64,  64,  64, 
 		 64,  64,  64,  64,  64,  64,  64,  64, 
 		 72, 112, 112, 112, 112, 112, 112, 112, 
@@ -13286,21 +13286,23 @@ int _scan_link_url(const unsigned char *p)
 		112, 112, 112, 112, 112, 112, 112, 112, 
 	};
 	yych = *p;
-	if (yych <= '(') {
-		if (yych <= 0x1F) {
+	if (yych <= '\'') {
+		if (yych <= '\f') {
 			if (yych == '\n') goto yy1589;
 			goto yy1597;
 		} else {
+			if (yych <= '\r') goto yy1591;
+			if (yych <= 0x1F) goto yy1597;
 			if (yych <= ' ') goto yy1591;
-			if (yych <= '\'') goto yy1593;
-			goto yy1596;
+			goto yy1593;
 		}
 	} else {
-		if (yych <= '<') {
+		if (yych <= ';') {
+			if (yych <= '(') goto yy1596;
 			if (yych <= ')') goto yy1597;
-			if (yych <= ';') goto yy1593;
-			goto yy1592;
+			goto yy1593;
 		} else {
+			if (yych <= '<') goto yy1592;
 			if (yych == '\\') goto yy1594;
 			goto yy1593;
 		}
@@ -13339,13 +13341,18 @@ yy1592:
 	if (yybm[0+yych] & 32) {
 		goto yy1605;
 	}
-	if (yych <= '\'') {
-		if (yych <= 0x00) goto yy1588;
-		if (yych == '\n') goto yy1588;
-		goto yy1612;
+	if (yych <= '\r') {
+		if (yych <= '\t') {
+			if (yych <= 0x00) goto yy1588;
+			goto yy1612;
+		} else {
+			if (yych <= '\n') goto yy1588;
+			if (yych <= '\f') goto yy1612;
+			goto yy1588;
+		}
 	} else {
 		if (yych <= ')') {
-			if (yych <= '(') goto yy1610;
+			if (yych == '(') goto yy1610;
 			goto yy1612;
 		} else {
 			if (yych <= '=') goto yy1602;
@@ -13459,13 +13466,18 @@ yy1605:
 	if (yybm[0+yych] & 32) {
 		goto yy1605;
 	}
-	if (yych <= '\'') {
-		if (yych <= 0x00) goto yy1588;
-		if (yych == '\n') goto yy1588;
-		goto yy1612;
+	if (yych <= '\r') {
+		if (yych <= '\t') {
+			if (yych <= 0x00) goto yy1588;
+			goto yy1612;
+		} else {
+			if (yych <= '\n') goto yy1588;
+			if (yych <= '\f') goto yy1612;
+			goto yy1588;
+		}
 	} else {
 		if (yych <= ')') {
-			if (yych <= '(') goto yy1610;
+			if (yych == '(') goto yy1610;
 			goto yy1612;
 		} else {
 			if (yych <= '=') goto yy1602;
@@ -13482,44 +13494,53 @@ yy1608:
 yy1609:
 	++p;
 	yych = *p;
-	if (yych <= '>') {
-		if (yych <= ' ') {
+	if (yych <= '=') {
+		if (yych <= '\f') {
 			if (yych <= 0x00) goto yy1600;
 			if (yych == '\n') goto yy1600;
 			goto yy1612;
 		} else {
-			if (yych <= '/') goto yy1605;
-			if (yych <= '9') goto yy1612;
-			if (yych <= '=') goto yy1605;
-			goto yy1622;
+			if (yych <= ' ') {
+				if (yych <= '\r') goto yy1600;
+				goto yy1612;
+			} else {
+				if (yych <= '/') goto yy1605;
+				if (yych <= '9') goto yy1612;
+				goto yy1605;
+			}
 		}
 	} else {
-		if (yych <= '\\') {
+		if (yych <= '[') {
+			if (yych <= '>') goto yy1622;
 			if (yych <= '@') goto yy1605;
 			if (yych <= 'Z') goto yy1612;
-			if (yych <= '[') goto yy1605;
-			goto yy1623;
+			goto yy1605;
 		} else {
-			if (yych <= '`') goto yy1605;
-			if (yych <= 'z') goto yy1612;
-			if (yych <= '~') goto yy1605;
-			goto yy1612;
+			if (yych <= '`') {
+				if (yych <= '\\') goto yy1623;
+				goto yy1605;
+			} else {
+				if (yych <= 'z') goto yy1612;
+				if (yych <= '~') goto yy1605;
+				goto yy1612;
+			}
 		}
 	}
 yy1610:
 	++p;
 	yych = *p;
-	if (yych <= ')') {
-		if (yych <= '\n') {
+	if (yych <= '(') {
+		if (yych <= '\f') {
 			if (yych <= 0x00) goto yy1600;
-			if (yych >= '\n') goto yy1600;
+			if (yych == '\n') goto yy1600;
 		} else {
+			if (yych <= '\r') goto yy1600;
 			if (yych <= ' ') goto yy1612;
 			if (yych <= '\'') goto yy1610;
-			if (yych >= ')') goto yy1605;
 		}
 	} else {
 		if (yych <= '=') {
+			if (yych <= ')') goto yy1605;
 			if (yych == '<') goto yy1598;
 			goto yy1610;
 		} else {
@@ -13545,11 +13566,12 @@ yy1615:
 	if (yybm[0+yych] & 128) {
 		goto yy1615;
 	}
-	if (yych <= '\n') {
+	if (yych <= '\f') {
 		if (yych <= 0x00) goto yy1600;
-		if (yych <= '\t') goto yy1612;
-		goto yy1600;
+		if (yych == '\n') goto yy1600;
+		goto yy1612;
 	} else {
+		if (yych <= '\r') goto yy1600;
 		if (yych != '>') goto yy1612;
 	}
 	yyaccept = 2;
@@ -13570,46 +13592,56 @@ yy1618:
 yy1619:
 	++p;
 	yych = *p;
-	if (yych <= '>') {
-		if (yych <= ' ') {
+	if (yych <= '=') {
+		if (yych <= '\f') {
 			if (yych <= 0x00) goto yy1600;
 			if (yych == '\n') goto yy1600;
 			goto yy1612;
 		} else {
-			if (yych <= '/') goto yy1610;
-			if (yych <= '9') goto yy1612;
-			if (yych <= '=') goto yy1610;
+			if (yych <= ' ') {
+				if (yych <= '\r') goto yy1600;
+				goto yy1612;
+			} else {
+				if (yych <= '/') goto yy1610;
+				if (yych <= '9') goto yy1612;
+				goto yy1610;
+			}
 		}
 	} else {
-		if (yych <= '\\') {
+		if (yych <= '[') {
+			if (yych <= '>') goto yy1620;
 			if (yych <= '@') goto yy1610;
 			if (yych <= 'Z') goto yy1612;
-			if (yych <= '[') goto yy1610;
-			goto yy1621;
+			goto yy1610;
 		} else {
-			if (yych <= '`') goto yy1610;
-			if (yych <= 'z') goto yy1612;
-			if (yych <= '~') goto yy1610;
-			goto yy1612;
+			if (yych <= '`') {
+				if (yych <= '\\') goto yy1621;
+				goto yy1610;
+			} else {
+				if (yych <= 'z') goto yy1612;
+				if (yych <= '~') goto yy1610;
+				goto yy1612;
+			}
 		}
 	}
 yy1620:
 	yyaccept = 2;
 	marker = ++p;
 	yych = *p;
-	if (yych <= ')') {
-		if (yych <= '\n') {
+	if (yych <= '(') {
+		if (yych <= '\f') {
 			if (yych <= 0x00) goto yy1608;
-			if (yych <= '\t') goto yy1612;
-			goto yy1608;
+			if (yych == '\n') goto yy1608;
+			goto yy1612;
 		} else {
+			if (yych <= '\r') goto yy1608;
 			if (yych <= ' ') goto yy1612;
 			if (yych <= '\'') goto yy1610;
-			if (yych <= '(') goto yy1612;
-			goto yy1605;
+			goto yy1612;
 		}
 	} else {
 		if (yych <= '=') {
+			if (yych <= ')') goto yy1605;
 			if (yych == '<') goto yy1598;
 			goto yy1610;
 		} else {
@@ -13621,22 +13653,23 @@ yy1620:
 yy1621:
 	++p;
 	yych = *p;
-	if (yych <= '(') {
+	if (yych <= '\'') {
 		if (yych <= '\n') {
 			if (yych <= 0x00) goto yy1600;
 			if (yych <= '\t') goto yy1612;
 			goto yy1600;
 		} else {
+			if (yych == '\r') goto yy1600;
 			if (yych <= ' ') goto yy1612;
-			if (yych <= '\'') goto yy1610;
-			goto yy1612;
+			goto yy1610;
 		}
 	} else {
-		if (yych <= '>') {
+		if (yych <= '=') {
+			if (yych <= '(') goto yy1612;
 			if (yych <= ')') goto yy1605;
-			if (yych <= '=') goto yy1610;
-			goto yy1620;
+			goto yy1610;
 		} else {
+			if (yych <= '>') goto yy1620;
 			if (yych == '\\') goto yy1619;
 			goto yy1610;
 		}
@@ -13648,13 +13681,18 @@ yy1622:
 	if (yybm[0+yych] & 32) {
 		goto yy1605;
 	}
-	if (yych <= '\'') {
-		if (yych <= 0x00) goto yy1608;
-		if (yych == '\n') goto yy1608;
-		goto yy1612;
+	if (yych <= '\r') {
+		if (yych <= '\t') {
+			if (yych <= 0x00) goto yy1608;
+			goto yy1612;
+		} else {
+			if (yych <= '\n') goto yy1608;
+			if (yych <= '\f') goto yy1612;
+			goto yy1608;
+		}
 	} else {
 		if (yych <= ')') {
-			if (yych <= '(') goto yy1610;
+			if (yych == '(') goto yy1610;
 			goto yy1612;
 		} else {
 			if (yych <= '=') goto yy1602;
@@ -13666,22 +13704,23 @@ yy1623:
 	yyaccept = 0;
 	marker = ++p;
 	yych = *p;
-	if (yych <= '(') {
+	if (yych <= '\'') {
 		if (yych <= '\n') {
 			if (yych <= 0x00) goto yy1588;
 			if (yych <= '\t') goto yy1612;
 			goto yy1588;
 		} else {
+			if (yych == '\r') goto yy1588;
 			if (yych <= ' ') goto yy1612;
-			if (yych <= '\'') goto yy1605;
-			goto yy1610;
+			goto yy1605;
 		}
 	} else {
-		if (yych <= '>') {
+		if (yych <= '=') {
+			if (yych <= '(') goto yy1610;
 			if (yych <= ')') goto yy1612;
-			if (yych <= '=') goto yy1605;
-			goto yy1622;
+			goto yy1605;
 		} else {
+			if (yych <= '>') goto yy1622;
 			if (yych == '\\') goto yy1609;
 			goto yy1605;
 		}
@@ -14006,9 +14045,14 @@ yy1664:
 	if (yybm[0+yych] & 128) {
 		goto yy1668;
 	}
-	if (yych == '\n') goto yy1666;
-	if (yych == '#') goto yy1670;
-	goto yy1663;
+	if (yych <= '\f') {
+		if (yych == '\n') goto yy1666;
+		goto yy1663;
+	} else {
+		if (yych <= '\r') goto yy1666;
+		if (yych == '#') goto yy1670;
+		goto yy1663;
+	}
 yy1665:
 	yych = *++p;
 	goto yy1663;
@@ -14028,8 +14072,12 @@ yy1670:
 	if (yybm[0+yych] & 128) {
 		goto yy1668;
 	}
-	if (yych == '\n') goto yy1666;
-	if (yych == '#') goto yy1672;
+	if (yych <= '\f') {
+		if (yych == '\n') goto yy1666;
+	} else {
+		if (yych <= '\r') goto yy1666;
+		if (yych == '#') goto yy1672;
+	}
 yy1671:
 	p = marker;
 	goto yy1663;
@@ -14038,31 +14086,47 @@ yy1672:
 	if (yybm[0+yych] & 128) {
 		goto yy1668;
 	}
-	if (yych == '\n') goto yy1666;
-	if (yych != '#') goto yy1671;
+	if (yych <= '\f') {
+		if (yych == '\n') goto yy1666;
+		goto yy1671;
+	} else {
+		if (yych <= '\r') goto yy1666;
+		if (yych != '#') goto yy1671;
+	}
 	yych = *++p;
 	if (yybm[0+yych] & 128) {
 		goto yy1668;
 	}
-	if (yych == '\n') goto yy1666;
-	if (yych != '#') goto yy1671;
+	if (yych <= '\f') {
+		if (yych == '\n') goto yy1666;
+		goto yy1671;
+	} else {
+		if (yych <= '\r') goto yy1666;
+		if (yych != '#') goto yy1671;
+	}
 	yych = *++p;
 	if (yybm[0+yych] & 128) {
 		goto yy1668;
 	}
-	if (yych == '\n') goto yy1666;
-	if (yych != '#') goto yy1671;
+	if (yych <= '\f') {
+		if (yych == '\n') goto yy1666;
+		goto yy1671;
+	} else {
+		if (yych <= '\r') goto yy1666;
+		if (yych != '#') goto yy1671;
+	}
 	++p;
 	if (yybm[0+(yych = *p)] & 128) {
 		goto yy1668;
 	}
 	if (yych == '\n') goto yy1666;
+	if (yych == '\r') goto yy1666;
 	goto yy1671;
 }
 
 }
 
-// Match sexext header line.  Return 1 for level-1 header,
+// Match setext header line.  Return 1 for level-1 header,
 // 2 for level-2, 0 for no match.
 int _scan_setext_header_line(const unsigned char *p)
 {
@@ -14119,17 +14183,27 @@ yy1679:
 	if (yybm[0+yych] & 128) {
 		goto yy1693;
 	}
-	if (yych == '\n') goto yy1691;
-	if (yych == ' ') goto yy1689;
-	goto yy1678;
+	if (yych <= '\f') {
+		if (yych == '\n') goto yy1691;
+		goto yy1678;
+	} else {
+		if (yych <= '\r') goto yy1691;
+		if (yych == ' ') goto yy1689;
+		goto yy1678;
+	}
 yy1680:
 	yych = *(marker = ++p);
 	if (yybm[0+yych] & 32) {
 		goto yy1682;
 	}
-	if (yych == '\n') goto yy1685;
-	if (yych == '-') goto yy1687;
-	goto yy1678;
+	if (yych <= '\f') {
+		if (yych == '\n') goto yy1685;
+		goto yy1678;
+	} else {
+		if (yych <= '\r') goto yy1685;
+		if (yych == '-') goto yy1687;
+		goto yy1678;
+	}
 yy1681:
 	yych = *++p;
 	goto yy1678;
@@ -14140,6 +14214,7 @@ yy1682:
 		goto yy1682;
 	}
 	if (yych == '\n') goto yy1685;
+	if (yych == '\r') goto yy1685;
 yy1684:
 	p = marker;
 	goto yy1678;
@@ -14152,15 +14227,24 @@ yy1687:
 	if (yybm[0+yych] & 32) {
 		goto yy1682;
 	}
-	if (yych == '\n') goto yy1685;
-	if (yych == '-') goto yy1687;
-	goto yy1684;
+	if (yych <= '\f') {
+		if (yych == '\n') goto yy1685;
+		goto yy1684;
+	} else {
+		if (yych <= '\r') goto yy1685;
+		if (yych == '-') goto yy1687;
+		goto yy1684;
+	}
 yy1689:
 	++p;
 	yych = *p;
-	if (yych == '\n') goto yy1691;
-	if (yych == ' ') goto yy1689;
-	goto yy1684;
+	if (yych <= '\f') {
+		if (yych != '\n') goto yy1684;
+	} else {
+		if (yych <= '\r') goto yy1691;
+		if (yych == ' ') goto yy1689;
+		goto yy1684;
+	}
 yy1691:
 	++p;
 	{ return 1; }
@@ -14170,9 +14254,14 @@ yy1693:
 	if (yybm[0+yych] & 128) {
 		goto yy1693;
 	}
-	if (yych == '\n') goto yy1691;
-	if (yych == ' ') goto yy1689;
-	goto yy1684;
+	if (yych <= '\f') {
+		if (yych == '\n') goto yy1691;
+		goto yy1684;
+	} else {
+		if (yych <= '\r') goto yy1691;
+		if (yych == ' ') goto yy1689;
+		goto yy1684;
+	}
 }
 
 }
@@ -14278,17 +14367,21 @@ yy1707:
 	if (yybm[0+yych] & 16) {
 		goto yy1707;
 	}
-	if (yych <= 0x08) goto yy1704;
-	if (yych <= '\t') goto yy1709;
-	if (yych <= '\n') goto yy1711;
-	goto yy1704;
+	if (yych <= '\n') {
+		if (yych <= 0x08) goto yy1704;
+		if (yych >= '\n') goto yy1711;
+	} else {
+		if (yych == '\r') goto yy1711;
+		goto yy1704;
+	}
 yy1709:
 	++p;
 	yych = *p;
 	if (yybm[0+yych] & 32) {
 		goto yy1709;
 	}
-	if (yych != '\n') goto yy1704;
+	if (yych == '\n') goto yy1711;
+	if (yych != '\r') goto yy1704;
 yy1711:
 	++p;
 	{ return (p - start); }
@@ -14308,17 +14401,22 @@ yy1717:
 	if (yybm[0+yych] & 64) {
 		goto yy1717;
 	}
-	if (yych <= 0x08) goto yy1704;
-	if (yych <= '\t') goto yy1719;
-	if (yych <= '\n') goto yy1721;
-	goto yy1704;
+	if (yych <= '\n') {
+		if (yych <= 0x08) goto yy1704;
+		if (yych >= '\n') goto yy1721;
+	} else {
+		if (yych == '\r') goto yy1721;
+		goto yy1704;
+	}
 yy1719:
 	++p;
 	yych = *p;
-	if (yych <= '\n') {
+	if (yych <= '\f') {
 		if (yych <= 0x08) goto yy1704;
 		if (yych <= '\t') goto yy1719;
+		if (yych >= '\v') goto yy1704;
 	} else {
+		if (yych <= '\r') goto yy1721;
 		if (yych == ' ') goto yy1719;
 		goto yy1704;
 	}
@@ -14341,17 +14439,22 @@ yy1727:
 	if (yybm[0+yych] & 128) {
 		goto yy1727;
 	}
-	if (yych <= 0x08) goto yy1704;
-	if (yych <= '\t') goto yy1729;
-	if (yych <= '\n') goto yy1731;
-	goto yy1704;
+	if (yych <= '\n') {
+		if (yych <= 0x08) goto yy1704;
+		if (yych >= '\n') goto yy1731;
+	} else {
+		if (yych == '\r') goto yy1731;
+		goto yy1704;
+	}
 yy1729:
 	++p;
 	yych = *p;
-	if (yych <= '\n') {
+	if (yych <= '\f') {
 		if (yych <= 0x08) goto yy1704;
 		if (yych <= '\t') goto yy1729;
+		if (yych >= '\v') goto yy1704;
 	} else {
+		if (yych <= '\r') goto yy1731;
 		if (yych == ' ') goto yy1729;
 		goto yy1704;
 	}
@@ -14372,7 +14475,7 @@ int _scan_open_code_fence(const unsigned char *p)
 	unsigned char yych;
 	static const unsigned char yybm[] = {
 		  0, 160, 160, 160, 160, 160, 160, 160, 
-		160, 160,   0, 160, 160, 160, 160, 160, 
+		160, 160,   0, 160, 160,   0, 160, 160, 
 		160, 160, 160, 160, 160, 160, 160, 160, 
 		160, 160, 160, 160, 160, 160, 160, 160, 
 		160, 160, 160, 160, 160, 160, 160, 160, 
@@ -14565,16 +14668,22 @@ yy1762:
 	if (yybm[0+yych] & 64) {
 		goto yy1764;
 	}
-	if (yych == '\n') goto yy1766;
-	if (yych == '~') goto yy1762;
-	goto yy1761;
+	if (yych <= '\f') {
+		if (yych == '\n') goto yy1766;
+		goto yy1761;
+	} else {
+		if (yych <= '\r') goto yy1766;
+		if (yych == '~') goto yy1762;
+		goto yy1761;
+	}
 yy1764:
 	++p;
 	yych = *p;
 	if (yybm[0+yych] & 64) {
 		goto yy1764;
 	}
-	if (yych != '\n') goto yy1761;
+	if (yych == '\n') goto yy1766;
+	if (yych != '\r') goto yy1761;
 yy1766:
 	++p;
 	p = marker;
@@ -14592,19 +14701,24 @@ yy1769:
 	if (yybm[0+yych] & 128) {
 		goto yy1769;
 	}
-	if (yych <= '\n') {
+	if (yych <= '\f') {
 		if (yych <= 0x08) goto yy1761;
-		if (yych >= '\n') goto yy1773;
+		if (yych <= '\t') goto yy1771;
+		if (yych <= '\n') goto yy1773;
+		goto yy1761;
 	} else {
+		if (yych <= '\r') goto yy1773;
 		if (yych != ' ') goto yy1761;
 	}
 yy1771:
 	++p;
 	yych = *p;
-	if (yych <= '\n') {
+	if (yych <= '\f') {
 		if (yych <= 0x08) goto yy1761;
 		if (yych <= '\t') goto yy1771;
+		if (yych >= '\v') goto yy1761;
 	} else {
+		if (yych <= '\r') goto yy1773;
 		if (yych == ' ') goto yy1771;
 		goto yy1761;
 	}
diff --git a/src/scanners.re b/src/scanners.re
@@ -140,8 +140,8 @@ int _scan_link_url(const unsigned char *p)
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  [ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); }
-  [ \n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); }
+  [ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); }
+  [ \r\n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); }
   .? { return 0; }
 */
 }
@@ -177,19 +177,19 @@ int _scan_atx_header_start(const unsigned char *p)
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  [#]{1,6} ([ ]+|[\n])  { return (p - start); }
+  [#]{1,6} ([ ]+|[\r\n])  { return (p - start); }
   .? { return 0; }
 */
 }
 
-// Match sexext header line.  Return 1 for level-1 header,
+// Match setext header line.  Return 1 for level-1 header,
 // 2 for level-2, 0 for no match.
 int _scan_setext_header_line(const unsigned char *p)
 {
   const unsigned char *marker = NULL;
 /*!re2c
-  [=]+ [ ]* [\n] { return 1; }
-  [-]+ [ ]* [\n] { return 2; }
+  [=]+ [ ]* [\r\n] { return 1; }
+  [-]+ [ ]* [\r\n] { return 2; }
   .? { return 0; }
 */
 }
@@ -202,9 +202,9 @@ int _scan_hrule(const unsigned char *p)
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  ([*][ ]*){3,} [ \t]* [\n] { return (p - start); }
-  ([_][ ]*){3,} [ \t]* [\n] { return (p - start); }
-  ([-][ ]*){3,} [ \t]* [\n] { return (p - start); }
+  ([*][ ]*){3,} [ \t]* [\r\n] { return (p - start); }
+  ([_][ ]*){3,} [ \t]* [\r\n] { return (p - start); }
+  ([-][ ]*){3,} [ \t]* [\r\n] { return (p - start); }
   .? { return 0; }
 */
 }
@@ -215,8 +215,8 @@ int _scan_open_code_fence(const unsigned char *p)
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  [`]{3,} / [^`\n\x00]*[\n] { return (p - start); }
-  [~]{3,} / [^~\n\x00]*[\n] { return (p - start); }
+  [`]{3,} / [^`\r\n\x00]*[\r\n] { return (p - start); }
+  [~]{3,} / [^~\r\n\x00]*[\r\n] { return (p - start); }
   .?                        { return 0; }
 */
 }
@@ -227,8 +227,8 @@ int _scan_close_code_fence(const unsigned char *p)
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  [`]{3,} / [ \t]*[\n] { return (p - start); }
-  [~]{3,} / [ \t]*[\n] { return (p - start); }
+  [`]{3,} / [ \t]*[\r\n] { return (p - start); }
+  [~]{3,} / [ \t]*[\r\n] { return (p - start); }
   .? { return 0; }
 */
 }
diff --git a/test/spec_tests.py b/test/spec_tests.py
@@ -36,7 +36,7 @@ def print_test_header(headertext, example_number, start_line, end_line):
     print("Example %d (lines %d-%d) %s" % (example_number,start_line,end_line,headertext))
 
 def do_test(test, normalize, result_counts):
-    [retcode, actual_html, err] = cmark.to_html(test['markdown'])
+    [retcode, actual_html, err] = cmark.to_html(re.sub(r"\n", "\r\n", test['markdown']))
     if retcode == 0:
         expected_html = test['html']
         unicode_error = None
@@ -52,17 +52,21 @@ def do_test(test, normalize, result_counts):
             result_counts['pass'] += 1
         else:
             print_test_header(test['section'], test['example'], test['start_line'], test['end_line'])
-            sys.stdout.write(test['markdown'])
+            print("Orig: "+repr(test['markdown']))
+            print("Conv: "+repr(re.sub(r"\n", "\r\n", test['markdown'])))
+#            sys.stdout.write(test['markdown'])
             if unicode_error:
                 print("Unicode error: " + str(unicode_error))
                 print("Expected: " + repr(expected_html))
                 print("Got:      " + repr(actual_html))
             else:
-                expected_html_lines = expected_html.splitlines(True)
-                actual_html_lines = actual_html.splitlines(True)
-                for diffline in unified_diff(expected_html_lines, actual_html_lines,
-                                "expected HTML", "actual HTML"):
-                    sys.stdout.write(diffline)
+                print("Expected: " + repr(expected_html))
+                print("Got:      " + repr(actual_html))
+#                expected_html_lines = expected_html.splitlines(True)
+#                actual_html_lines = actual_html.splitlines(True)
+#                for diffline in unified_diff(expected_html_lines, actual_html_lines,
+#                                "expected HTML", "actual HTML"):
+#                    sys.stdout.write(diffline)
             sys.stdout.write('\n')
             result_counts['fail'] += 1
     else: