cmark
My personal build of CMark ✏️
git clone: git://git.pablopie.xyz/cmark
Commit
4fbe344df43ed7f60a3d3a53981088334cb709fc
Parent
c4c1d59ca29aceb1c5919908ac97d9476264fd96
Author
Nick Wellnhofer <wellnhofer@aevum.de >
Date
Sat, 31 Dec 2016 02:25:16 +0100
Change types for source map offsets (#174)
* Improve strbuf guarantees
Introduce BUFSIZE_MAX macro and make sure that the strbuf implementation
can handle strings up to this size.
* Abort early if document size exceeds internal limit
* Change types for source map offsets
Switch to size_t for the public API, making the public headers
C89-compatible again.
Switch to bufsize_t internally, reducing memory usage and improving
performance on 32-bit platforms.
* Make parser return NULL on internal index overflow
Make S_parser_feed set an error and ignore subsequent chunks if the
total input document size exceeds an internal limit. Make
cmark_parser_finish return NULL if an error was encountered. Add
public API functions to retrieve error code and error message.
strbuf overflow in renderers and OOM in parser or renderers still
cause an abort.
Diffstat
13 files changed, 187 insertions, 44 deletions
diff --git a/api_test/main.c b/api_test/main.c
@@ -5,6 +5,7 @@
#define CMARK_NO_SHORT_NAMES
#include "cmark.h"
#include "node.h"
+#include "parser.h"
#include "harness.h"
#include "cplusplus.h"
@@ -883,6 +884,41 @@ static void test_feed_across_line_ending(test_batch_runner *runner) {
cmark_node_free(document);
}
+static cmark_node *S_parse_with_fake_total(bufsize_t fake_total,
+ const char *str,
+ cmark_err_type *err) {
+ cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT);
+ parser->total_bytes = fake_total;
+ cmark_parser_feed(parser, str, strlen(str));
+ cmark_node *doc = cmark_parser_finish(parser);
+ *err = cmark_parser_get_error(parser);
+ cmark_parser_free(parser);
+ return doc;
+}
+
+static void test_bufsize_overflow(test_batch_runner *runner) {
+ cmark_node *doc;
+ cmark_err_type err;
+
+ doc = S_parse_with_fake_total(BUFSIZE_MAX, "a", &err);
+ OK(runner, doc == NULL, "parse 1 byte after BUFSIZE_MAX bytes fails");
+ INT_EQ(runner, err, CMARK_ERR_INPUT_TOO_LARGE,
+ "parse 1 byte after BUFSIZE_MAX bytes error code");
+
+ doc = S_parse_with_fake_total(BUFSIZE_MAX - 9, "0123456789", &err);
+ OK(runner, doc == NULL, "parse 10 byte after BUFSIZE_MAX-9 bytes fails");
+ INT_EQ(runner, err, CMARK_ERR_INPUT_TOO_LARGE,
+ "parse 10 byte after BUFSIZE_MAX-9 bytes error code");
+
+ doc = S_parse_with_fake_total(BUFSIZE_MAX - 1, "a", &err);
+ OK(runner, doc != NULL, "parse 1 byte after BUFSIZE_MAX-1 bytes");
+ cmark_node_free(doc);
+
+ doc = S_parse_with_fake_total(BUFSIZE_MAX - 10, "0123456789", &err);
+ OK(runner, doc != NULL, "parse 10 byte after BUFSIZE_MAX-10 bytes");
+ cmark_node_free(doc);
+}
+
int main() {
int retval;
test_batch_runner *runner = test_batch_runner_new();
@@ -908,6 +944,7 @@ int main() {
test_cplusplus(runner);
test_safe(runner);
test_feed_across_line_ending(runner);
+ test_bufsize_overflow(runner);
test_print_summary(runner);
retval = test_ok(runner) ? 0 : 1;
diff --git a/src/blocks.c b/src/blocks.c
@@ -96,6 +96,8 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {
parser->refmap = cmark_reference_map_new(mem);
parser->root = document;
parser->current = document;
+ parser->error_code = CMARK_ERR_NONE;
+ parser->total_bytes = 0;
parser->line_number = 0;
parser->line_offset = 0;
parser->offset = 0;
@@ -550,6 +552,20 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
const unsigned char *skipped;
static const uint8_t repl[] = {239, 191, 189};
+ if (parser->error_code) {
+ return;
+ }
+
+ // Limit maximum document size to BUFSIZE_MAX. This makes sure that we
+ // never create strbufs larger than BUFSIZE_MAX. Unfortunately, the
+ // public API doesn't have an error reporting mechanism, so all we can
+ // do is to abort.
+ if (len > (size_t)(BUFSIZE_MAX - parser->total_bytes)) {
+ parser->error_code = CMARK_ERR_INPUT_TOO_LARGE;
+ return;
+ }
+ parser->total_bytes += (bufsize_t)len;
+
if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
// skip NL if last buffer ended with CR ; see #117
buffer++;
@@ -1266,14 +1282,19 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) {
cmark_strbuf_clear(&parser->linebuf);
}
+ cmark_strbuf_clear(&parser->curline);
+
+ if (parser->error_code) {
+ cmark_node_free(parser->root);
+ return NULL;
+ }
+
finalize_document(parser);
if (parser->options & CMARK_OPT_NORMALIZE) {
cmark_consolidate_text_nodes(parser->root);
}
- cmark_strbuf_free(&parser->curline);
-
#if CMARK_DEBUG_NODES
if (cmark_node_check(parser->root, stderr)) {
abort();
@@ -1287,3 +1308,26 @@ cmark_parser_get_first_source_extent(cmark_parser *parser)
{
return parser->source_map->head;
}
+
+cmark_err_type cmark_parser_get_error(cmark_parser *parser) {
+ return parser->error_code;
+}
+
+const char *cmark_parser_get_error_message(cmark_parser *parser) {
+ const char *str = NULL;
+
+ switch (parser->error_code) {
+ case CMARK_ERR_OUT_OF_MEMORY:
+ str = "Out of memory";
+ break;
+ case CMARK_ERR_INPUT_TOO_LARGE:
+ str = "Input too large";
+ break;
+ default:
+ str = "Unknown error";
+ break;
+ }
+
+ return str;
+}
+
diff --git a/src/buffer.c b/src/buffer.c
@@ -33,6 +33,11 @@ void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf,
}
static CMARK_INLINE void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) {
+ // Safety check for overflow.
+ if (add > BUFSIZE_MAX - buf->size) {
+ fprintf(stderr, "Internal cmark_strbuf overflow");
+ abort();
+ }
cmark_strbuf_grow(buf, buf->size + add);
}
@@ -42,18 +47,25 @@ void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) {
if (target_size < buf->asize)
return;
- if (target_size > (bufsize_t)(INT32_MAX / 2))
- abort();
-
- /* Oversize the buffer by 50% to guarantee amortized linear time
- * complexity on append operations. */
- bufsize_t new_size = target_size + target_size / 2;
- new_size += 1;
- new_size = (new_size + 7) & ~7;
+ // Oversize the buffer by 50% to guarantee amortized linear time
+ // complexity on append operations.
+ bufsize_t add = target_size / 2;
+ // Account for terminating NUL byte.
+ add += 1;
+ // Round up to multiple of eight.
+ add = (add + 7) & ~7;
+
+ // Check for overflow but allow an additional NUL byte.
+ if (target_size + add > BUFSIZE_MAX + 1) {
+ target_size = BUFSIZE_MAX + 1;
+ }
+ else {
+ target_size += add;
+ }
buf->ptr = (unsigned char *)buf->mem->realloc(buf->asize ? buf->ptr : NULL,
- new_size);
- buf->asize = new_size;
+ target_size);
+ buf->asize = target_size;
}
bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; }
diff --git a/src/cmark.h b/src/cmark.h
@@ -2,7 +2,6 @@
#define CMARK_H
#include <stdio.h>
-#include <stdint.h>
#include <cmark_export.h>
#include <cmark_version.h>
@@ -23,7 +22,7 @@ extern "C" {
/** Convert 'text' (assumed to be a UTF-8 encoded string with length
* 'len') from CommonMark Markdown to HTML, returning a null-terminated,
* UTF-8-encoded string. It is the caller's responsibility
- * to free the returned buffer.
+ * to free the returned buffer. Returns NULL on error.
*/
CMARK_EXPORT
char *cmark_markdown_to_html(const char *text, size_t len, int options);
@@ -99,6 +98,12 @@ typedef enum {
CMARK_PAREN_DELIM
} cmark_delim_type;
+typedef enum {
+ CMARK_ERR_NONE,
+ CMARK_ERR_OUT_OF_MEMORY,
+ CMARK_ERR_INPUT_TOO_LARGE
+} cmark_err_type;
+
typedef struct cmark_node cmark_node;
typedef struct cmark_parser cmark_parser;
typedef struct cmark_iter cmark_iter;
@@ -489,12 +494,22 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem);
CMARK_EXPORT
void cmark_parser_free(cmark_parser *parser);
+/** Return the error code after a failed operation.
+ */
+CMARK_EXPORT
+cmark_err_type cmark_parser_get_error(cmark_parser *parser);
+
+/** Return the error code after a failed operation.
+ */
+CMARK_EXPORT
+const char *cmark_parser_get_error_message(cmark_parser *parser);
+
/** Feeds a string of length 'len' to 'parser'.
*/
CMARK_EXPORT
void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len);
-/** Finish parsing and return a pointer to a tree of nodes.
+/** Finish parsing and return a pointer to a tree of nodes or NULL on error.
*/
CMARK_EXPORT
cmark_node *cmark_parser_finish(cmark_parser *parser);
@@ -507,7 +522,7 @@ cmark_source_extent *cmark_parser_get_first_source_extent(cmark_parser *parser);
/** Parse a CommonMark document in 'buffer' of length 'len'.
* Returns a pointer to a tree of nodes. The memory allocated for
* the node tree should be released using 'cmark_node_free'
- * when it is no longer needed.
+ * when it is no longer needed. Returns NULL on error.
*/
CMARK_EXPORT
cmark_node *cmark_parse_document(const char *buffer, size_t len, int options);
@@ -515,22 +530,23 @@ cmark_node *cmark_parse_document(const char *buffer, size_t len, int options);
/** Parse a CommonMark document in file 'f', returning a pointer to
* a tree of nodes. The memory allocated for the node tree should be
* released using 'cmark_node_free' when it is no longer needed.
+ * Returns NULL on error.
*/
CMARK_EXPORT
cmark_node *cmark_parse_file(FILE *f, int options);
-/**
+/**
* ## Source map API
*/
/* Return the index, in bytes, of the start of this extent */
CMARK_EXPORT
-uint64_t cmark_source_extent_get_start(cmark_source_extent *extent);
+size_t cmark_source_extent_get_start(cmark_source_extent *extent);
-/* Return the index, in bytes, of the stop of this extent. This
+/* Return the index, in bytes, of the stop of this extent. This
* index is not included in the extent*/
CMARK_EXPORT
-uint64_t cmark_source_extent_get_stop(cmark_source_extent *extent);
+size_t cmark_source_extent_get_stop(cmark_source_extent *extent);
/* Return the extent immediately following 'extent' */
CMARK_EXPORT
diff --git a/src/inlines.c b/src/inlines.c
@@ -1230,7 +1230,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
// Parse inlines from parent's string_content, adding as children of parent.
extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
cmark_reference_map *refmap, int options,
- cmark_source_map *source_map, uint64_t total_length) {
+ cmark_source_map *source_map, bufsize_t total_length) {
subject subj;
subject_from_buf(mem, &subj, &parent->content, refmap, source_map);
bufsize_t initial_len = subj.input.len;
diff --git a/src/inlines.h b/src/inlines.h
@@ -14,7 +14,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title);
void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
cmark_reference_map *refmap, int options,
- cmark_source_map *source_map, uint64_t total_length);
+ cmark_source_map *source_map, bufsize_t total_length);
bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
cmark_reference_map *refmap,
diff --git a/src/source_map.c b/src/source_map.c
@@ -19,7 +19,7 @@ source_map_free(cmark_source_map *self)
}
cmark_source_extent *
-source_map_append_extent(cmark_source_map *self, uint64_t start, uint64_t stop, cmark_node *node, cmark_extent_type type)
+source_map_append_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop, cmark_node *node, cmark_extent_type type)
{
assert (start <= stop);
assert (!self->tail || self->tail->stop <= start);
@@ -46,7 +46,7 @@ source_map_append_extent(cmark_source_map *self, uint64_t start, uint64_t stop,
cmark_source_extent *
source_map_insert_extent(cmark_source_map *self, cmark_source_extent *previous,
- uint64_t start, uint64_t stop, cmark_node *node, cmark_extent_type type)
+ bufsize_t start, bufsize_t stop, cmark_node *node, cmark_extent_type type)
{
if (start == stop)
return previous;
@@ -101,7 +101,7 @@ source_map_free_extent(cmark_source_map *self, cmark_source_extent *extent)
cmark_source_extent *
source_map_stitch_extent(cmark_source_map *self, cmark_source_extent *extent,
- cmark_node *node, uint64_t total_length)
+ cmark_node *node, bufsize_t total_length)
{
cmark_source_extent *next_extent = extent->next;
cmark_source_extent *res;
@@ -135,7 +135,7 @@ source_map_stitch_extent(cmark_source_map *self, cmark_source_extent *extent,
}
cmark_source_extent *
-source_map_splice_extent(cmark_source_map *self, uint64_t start, uint64_t stop,
+source_map_splice_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop,
cmark_node *node, cmark_extent_type type)
{
if (!self->next_cursor) {
@@ -154,7 +154,7 @@ source_map_splice_extent(cmark_source_map *self, uint64_t start, uint64_t stop,
return self->cursor;
} else if (start + self->cursor_offset < self->next_cursor->start) {
- uint64_t new_start = self->next_cursor->start - self->cursor_offset;
+ bufsize_t new_start = self->next_cursor->start - self->cursor_offset;
self->cursor = source_map_insert_extent(self,
self->cursor,
@@ -196,17 +196,17 @@ source_map_pretty_print(cmark_source_map *self) {
cmark_source_extent *tmp;
for (tmp = self->head; tmp; tmp = tmp->next) {
- printf ("%lu:%lu - %s, %s (%p)\n", tmp->start, tmp->stop,
- cmark_node_get_type_string(tmp->node),
+ printf ("%d:%d - %s, %s (%p)\n", tmp->start, tmp->stop,
+ cmark_node_get_type_string(tmp->node),
cmark_source_extent_get_type_string(tmp),
(void *) tmp->node);
}
}
bool
-source_map_check(cmark_source_map *self, uint64_t total_length)
+source_map_check(cmark_source_map *self, bufsize_t total_length)
{
- uint64_t last_stop = 0;
+ bufsize_t last_stop = 0;
cmark_source_extent *tmp;
for (tmp = self->head; tmp; tmp = tmp->next) {
@@ -224,13 +224,13 @@ source_map_check(cmark_source_map *self, uint64_t total_length)
}
-uint64_t
+size_t
cmark_source_extent_get_start(cmark_source_extent *extent)
{
return extent->start;
}
-uint64_t
+size_t
cmark_source_extent_get_stop(cmark_source_extent *extent)
{
return extent->stop;
diff --git a/src/source_map.h b/src/source_map.h
@@ -3,6 +3,7 @@
#include "cmark.h"
#include "config.h"
+#include "buffer.h"
typedef struct _cmark_source_map
{
@@ -10,14 +11,14 @@ typedef struct _cmark_source_map
cmark_source_extent *tail;
cmark_source_extent *cursor;
cmark_source_extent *next_cursor;
- uint64_t cursor_offset;
+ bufsize_t cursor_offset;
cmark_mem *mem;
} cmark_source_map;
struct cmark_source_extent
{
- uint64_t start;
- uint64_t stop;
+ bufsize_t start;
+ bufsize_t stop;
struct cmark_source_extent *next;
struct cmark_source_extent *prev;
cmark_node *node;
@@ -29,20 +30,20 @@ cmark_source_map * source_map_new (cmark_mem *mem);
void source_map_free (cmark_source_map *self);
bool source_map_check (cmark_source_map *self,
- uint64_t total_length);
+ bufsize_t total_length);
void source_map_pretty_print (cmark_source_map *self);
cmark_source_extent * source_map_append_extent(cmark_source_map *self,
- uint64_t start,
- uint64_t stop,
+ bufsize_t start,
+ bufsize_t stop,
cmark_node *node,
cmark_extent_type type);
cmark_source_extent * source_map_insert_extent(cmark_source_map *self,
cmark_source_extent *previous,
- uint64_t start,
- uint64_t stop,
+ bufsize_t start,
+ bufsize_t stop,
cmark_node *node,
cmark_extent_type type);
@@ -52,11 +53,11 @@ cmark_source_extent * source_map_free_extent (cmark_source_map *self,
cmark_source_extent * source_map_stitch_extent(cmark_source_map *self,
cmark_source_extent *extent,
cmark_node *node,
- uint64_t total_length);
+ bufsize_t total_length);
cmark_source_extent * source_map_splice_extent(cmark_source_map *self,
- uint64_t start,
- uint64_t stop,
+ bufsize_t start,
+ bufsize_t stop,
cmark_node *node,
cmark_extent_type type);
diff --git a/test/cmark.py b/test/cmark.py
@@ -30,6 +30,8 @@ def to_commonmark(lib, text):
render_commonmark.restype = c_char_p
render_commonmark.argtypes = [c_void_p, c_int, c_int]
node = parse_document(textbytes, textlen, 0)
+ if node is None:
+ raise Exception("parse_document failed")
result = render_commonmark(node, 0, 0).decode('utf-8')
return [0, result, '']