cmark

My personal build of CMark ✏️

Commit
359fb5b47365abaebb1b76ae52aeb27efdd39ae7
Parent
7cb92c313c31fc808dded8f476f956447e912fe3
Author
John MacFarlane <jgm@berkeley.edu>
Date

Handle buffer split across a CRLF line ending (closes #117).

Adds an internal field to the parser struct to keep track of last_buffer_ended_with_cr.

Diffstat

2 files changed, 11 insertions, 1 deletion

Status File Name N° Changes Insertions Deletions
Modified src/blocks.c 11 10 1
Modified src/parser.h 1 1 0
diff --git a/src/blocks.c b/src/blocks.c
@@ -90,6 +90,7 @@ cmark_parser *cmark_parser_new(int options) {
   parser->last_line_length = 0;
   parser->linebuf = buf;
   parser->options = options;
+  parser->last_buffer_ended_with_cr = false;
 
   return parser;
 }
@@ -506,6 +507,11 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
   const unsigned char *end = buffer + len;
   static const uint8_t repl[] = {239, 191, 189};
 
+  if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
+    // skip NL if last buffer ended with CR ; see #117
+    buffer++;
+  }
+  parser->last_buffer_ended_with_cr = false;
   while (buffer < end) {
     const unsigned char *eol;
     bufsize_t chunk_len;
@@ -546,8 +552,11 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
 
     buffer += chunk_len;
     // skip over line ending characters:
-    if (buffer < end && *buffer == '\r')
+    if (buffer < end && *buffer == '\r') {
       buffer++;
+      if (buffer == end)
+	parser->last_buffer_ended_with_cr = true;
+    }
     if (buffer < end && *buffer == '\n')
       buffer++;
   }
diff --git a/src/parser.h b/src/parser.h
@@ -27,6 +27,7 @@ struct cmark_parser {
   bufsize_t last_line_length;
   cmark_strbuf *linebuf;
   int options;
+  bool last_buffer_ended_with_cr;
 };
 
 #ifdef __cplusplus