cmark

My personal build of CMark ✏️

Commit
60b6962db0b0488667180e11cc6cfb1cec1b41ea
Parent
cc50a3aba3e34dc58ca819a65b907871e2ea6fd9
Author
John MacFarlane <jgm@berkeley.edu>
Date

Revert "Change types for source map offsets (#174)"

This reverts commit 4fbe344df43ed7f60a3d3a53981088334cb709fc.

Diffstat

13 files changed, 44 insertions, 187 deletions

Status File Name N° Changes Insertions Deletions
Modified api_test/main.c 37 0 37
Modified src/blocks.c 48 2 46
Modified src/buffer.c 32 10 22
Modified src/buffer.h 20 0 20
Modified src/cmark.c 3 0 3
Modified src/cmark.h 32 8 24
Modified src/inlines.c 2 1 1
Modified src/inlines.h 2 1 1
Modified src/main.c 5 0 5
Modified src/parser.h 3 0 3
Modified src/source_map.c 22 11 11
Modified src/source_map.h 23 11 12
Modified test/cmark.py 2 0 2
diff --git a/api_test/main.c b/api_test/main.c
@@ -5,7 +5,6 @@
 #define CMARK_NO_SHORT_NAMES
 #include "cmark.h"
 #include "node.h"
-#include "parser.h"
 
 #include "harness.h"
 #include "cplusplus.h"
@@ -884,41 +883,6 @@ static void test_feed_across_line_ending(test_batch_runner *runner) {
   cmark_node_free(document);
 }
 
-static cmark_node *S_parse_with_fake_total(bufsize_t fake_total,
-                                           const char *str,
-                                           cmark_err_type *err) {
-  cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT);
-  parser->total_bytes = fake_total;
-  cmark_parser_feed(parser, str, strlen(str));
-  cmark_node *doc = cmark_parser_finish(parser);
-  *err = cmark_parser_get_error(parser);
-  cmark_parser_free(parser);
-  return doc;
-}
-
-static void test_bufsize_overflow(test_batch_runner *runner) {
-  cmark_node *doc;
-  cmark_err_type err;
-
-  doc = S_parse_with_fake_total(BUFSIZE_MAX, "a", &err);
-  OK(runner, doc == NULL, "parse 1 byte after BUFSIZE_MAX bytes fails");
-  INT_EQ(runner, err, CMARK_ERR_INPUT_TOO_LARGE,
-         "parse 1 byte after BUFSIZE_MAX bytes error code");
-
-  doc = S_parse_with_fake_total(BUFSIZE_MAX - 9, "0123456789", &err);
-  OK(runner, doc == NULL, "parse 10 byte after BUFSIZE_MAX-9 bytes fails");
-  INT_EQ(runner, err, CMARK_ERR_INPUT_TOO_LARGE,
-         "parse 10 byte after BUFSIZE_MAX-9 bytes error code");
-
-  doc = S_parse_with_fake_total(BUFSIZE_MAX - 1, "a", &err);
-  OK(runner, doc != NULL, "parse 1 byte after BUFSIZE_MAX-1 bytes");
-  cmark_node_free(doc);
-
-  doc = S_parse_with_fake_total(BUFSIZE_MAX - 10, "0123456789", &err);
-  OK(runner, doc != NULL, "parse 10 byte after BUFSIZE_MAX-10 bytes");
-  cmark_node_free(doc);
-}
-
 int main() {
   int retval;
   test_batch_runner *runner = test_batch_runner_new();
@@ -944,7 +908,6 @@ int main() {
   test_cplusplus(runner);
   test_safe(runner);
   test_feed_across_line_ending(runner);
-  test_bufsize_overflow(runner);
 
   test_print_summary(runner);
   retval = test_ok(runner) ? 0 : 1;
diff --git a/src/blocks.c b/src/blocks.c
@@ -96,8 +96,6 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {
   parser->refmap = cmark_reference_map_new(mem);
   parser->root = document;
   parser->current = document;
-  parser->error_code = CMARK_ERR_NONE;
-  parser->total_bytes = 0;
   parser->line_number = 0;
   parser->line_offset = 0;
   parser->offset = 0;
@@ -552,20 +550,6 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
   const unsigned char *skipped;
   static const uint8_t repl[] = {239, 191, 189};
 
-  if (parser->error_code) {
-    return;
-  }
-
-  // Limit maximum document size to BUFSIZE_MAX. This makes sure that we
-  // never create strbufs larger than BUFSIZE_MAX. Unfortunately, the
-  // public API doesn't have an error reporting mechanism, so all we can
-  // do is to abort.
-  if (len > (size_t)(BUFSIZE_MAX - parser->total_bytes)) {
-    parser->error_code = CMARK_ERR_INPUT_TOO_LARGE;
-    return;
-  }
-  parser->total_bytes += (bufsize_t)len;
-
   if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
     // skip NL if last buffer ended with CR ; see #117
     buffer++;
@@ -1282,19 +1266,14 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) {
     cmark_strbuf_clear(&parser->linebuf);
   }
 
-  cmark_strbuf_clear(&parser->curline);
-
-  if (parser->error_code) {
-    cmark_node_free(parser->root);
-    return NULL;
-  }
-
   finalize_document(parser);
 
   if (parser->options & CMARK_OPT_NORMALIZE) {
     cmark_consolidate_text_nodes(parser->root);
   }
 
+  cmark_strbuf_free(&parser->curline);
+
 #if CMARK_DEBUG_NODES
   if (cmark_node_check(parser->root, stderr)) {
     abort();
@@ -1308,26 +1287,3 @@ cmark_parser_get_first_source_extent(cmark_parser *parser)
 {
   return parser->source_map->head;
 }
-
-cmark_err_type cmark_parser_get_error(cmark_parser *parser) {
-  return parser->error_code;
-}
-
-const char *cmark_parser_get_error_message(cmark_parser *parser) {
-  const char *str = NULL;
-
-  switch (parser->error_code) {
-    case CMARK_ERR_OUT_OF_MEMORY:
-      str = "Out of memory";
-      break;
-    case CMARK_ERR_INPUT_TOO_LARGE:
-      str = "Input too large";
-      break;
-    default:
-      str = "Unknown error";
-      break;
-  }
-
-  return str;
-}
-
diff --git a/src/buffer.c b/src/buffer.c
@@ -33,11 +33,6 @@ void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf,
 }
 
 static CMARK_INLINE void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) {
-  // Safety check for overflow.
-  if (add > BUFSIZE_MAX - buf->size) {
-    fprintf(stderr, "Internal cmark_strbuf overflow");
-    abort();
-  }
   cmark_strbuf_grow(buf, buf->size + add);
 }
 
@@ -47,25 +42,18 @@ void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) {
   if (target_size < buf->asize)
     return;
 
-  // Oversize the buffer by 50% to guarantee amortized linear time
-  // complexity on append operations.
-  bufsize_t add = target_size / 2;
-  // Account for terminating NUL byte.
-  add += 1;
-  // Round up to multiple of eight.
-  add = (add + 7) & ~7;
-
-  // Check for overflow but allow an additional NUL byte.
-  if (target_size + add > BUFSIZE_MAX + 1) {
-    target_size = BUFSIZE_MAX + 1;
-  }
-  else {
-    target_size += add;
-  }
+  if (target_size > (bufsize_t)(INT32_MAX / 2))
+    abort();
+
+  /* Oversize the buffer by 50% to guarantee amortized linear time
+   * complexity on append operations. */
+  bufsize_t new_size = target_size + target_size / 2;
+  new_size += 1;
+  new_size = (new_size + 7) & ~7;
 
   buf->ptr = (unsigned char *)buf->mem->realloc(buf->asize ? buf->ptr : NULL,
-                                                target_size);
-  buf->asize = target_size;
+                                                new_size);
+  buf->asize = new_size;
 }
 
 bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; }
diff --git a/src/buffer.h b/src/buffer.h
@@ -13,28 +13,8 @@
 extern "C" {
 #endif
 
-#ifndef CMARK_HUGE_DOCS
-
-// Maximum strbuf size without terminating NUL byte.
-#define BUFSIZE_MAX (INT32_MAX - 1)
-
 typedef int32_t bufsize_t;
 
-#else // CMARK_HUGE_DOCS
-
-// This is an untested proof of concept of how to handle multi-gigabyte
-// documents on 64-bit platforms at the expense of internal struct sizes.
-
-#ifdef PTRDIFF_MAX
-  #define BUFSIZE_MAX (PTRDIFF_MAX - 1)
-#else
-  #define BUFSIZE_MAX (ptrdiff_t)((size_t)-1 / 2)
-#endif
-
-typedef ptrdiff_t bufsize_t;
-
-#endif // CMARK_HUGE_DOCS
-
 typedef struct {
   cmark_mem *mem;
   unsigned char *ptr;
diff --git a/src/cmark.c b/src/cmark.c
@@ -36,9 +36,6 @@ char *cmark_markdown_to_html(const char *text, size_t len, int options) {
   char *result;
 
   doc = cmark_parse_document(text, len, options);
-  if (doc == NULL) {
-    return NULL;
-  }
 
   result = cmark_render_html(doc, options);
   cmark_node_free(doc);
diff --git a/src/cmark.h b/src/cmark.h
@@ -2,6 +2,7 @@
 #define CMARK_H
 
 #include <stdio.h>
+#include <stdint.h>
 #include <cmark_export.h>
 #include <cmark_version.h>
 
@@ -22,7 +23,7 @@ extern "C" {
 /** Convert 'text' (assumed to be a UTF-8 encoded string with length
  * 'len') from CommonMark Markdown to HTML, returning a null-terminated,
  * UTF-8-encoded string. It is the caller's responsibility
- * to free the returned buffer. Returns NULL on error.
+ * to free the returned buffer.
  */
 CMARK_EXPORT
 char *cmark_markdown_to_html(const char *text, size_t len, int options);
@@ -98,12 +99,6 @@ typedef enum {
   CMARK_PAREN_DELIM
 } cmark_delim_type;
 
-typedef enum {
-  CMARK_ERR_NONE,
-  CMARK_ERR_OUT_OF_MEMORY,
-  CMARK_ERR_INPUT_TOO_LARGE
-} cmark_err_type;
-
 typedef struct cmark_node cmark_node;
 typedef struct cmark_parser cmark_parser;
 typedef struct cmark_iter cmark_iter;
@@ -494,22 +489,12 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem);
 CMARK_EXPORT
 void cmark_parser_free(cmark_parser *parser);
 
-/** Return the error code after a failed operation.
- */
-CMARK_EXPORT
-cmark_err_type cmark_parser_get_error(cmark_parser *parser);
-
-/** Return the error code after a failed operation.
- */
-CMARK_EXPORT
-const char *cmark_parser_get_error_message(cmark_parser *parser);
-
 /** Feeds a string of length 'len' to 'parser'.
  */
 CMARK_EXPORT
 void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len);
 
-/** Finish parsing and return a pointer to a tree of nodes or NULL on error.
+/** Finish parsing and return a pointer to a tree of nodes.
  */
 CMARK_EXPORT
 cmark_node *cmark_parser_finish(cmark_parser *parser);
@@ -522,7 +507,7 @@ cmark_source_extent *cmark_parser_get_first_source_extent(cmark_parser *parser);
 /** Parse a CommonMark document in 'buffer' of length 'len'.
  * Returns a pointer to a tree of nodes.  The memory allocated for
  * the node tree should be released using 'cmark_node_free'
- * when it is no longer needed.  Returns NULL on error.
+ * when it is no longer needed.
  */
 CMARK_EXPORT
 cmark_node *cmark_parse_document(const char *buffer, size_t len, int options);
@@ -530,23 +515,22 @@ cmark_node *cmark_parse_document(const char *buffer, size_t len, int options);
 /** Parse a CommonMark document in file 'f', returning a pointer to
  * a tree of nodes.  The memory allocated for the node tree should be
  * released using 'cmark_node_free' when it is no longer needed.
- * Returns NULL on error.
  */
 CMARK_EXPORT
 cmark_node *cmark_parse_file(FILE *f, int options);
 
-/**
+/** 
  * ## Source map API
  */
 
 /* Return the index, in bytes, of the start of this extent */
 CMARK_EXPORT
-size_t cmark_source_extent_get_start(cmark_source_extent *extent);
+uint64_t cmark_source_extent_get_start(cmark_source_extent *extent);
 
-/* Return the index, in bytes, of the stop of this extent. This
+/* Return the index, in bytes, of the stop of this extent. This 
  * index is not included in the extent*/
 CMARK_EXPORT
-size_t cmark_source_extent_get_stop(cmark_source_extent *extent);
+uint64_t cmark_source_extent_get_stop(cmark_source_extent *extent);
 
 /* Return the extent immediately following 'extent' */
 CMARK_EXPORT
diff --git a/src/inlines.c b/src/inlines.c
@@ -1229,7 +1229,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
 // Parse inlines from parent's string_content, adding as children of parent.
 extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
                                 cmark_reference_map *refmap, int options,
-                                cmark_source_map *source_map, bufsize_t total_length) {
+                                cmark_source_map *source_map, uint64_t total_length) {
   subject subj;
   subject_from_buf(mem, &subj, &parent->content, refmap, source_map);
   bufsize_t initial_len = subj.input.len;
diff --git a/src/inlines.h b/src/inlines.h
@@ -14,7 +14,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title);
 
 void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
                          cmark_reference_map *refmap, int options,
-                         cmark_source_map *source_map, bufsize_t total_length);
+                         cmark_source_map *source_map, uint64_t total_length);
 
 bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
                                        cmark_reference_map *refmap,
diff --git a/src/main.c b/src/main.c
@@ -181,11 +181,6 @@ int main(int argc, char *argv[]) {
   document = cmark_parser_finish(parser);
   cmark_parser_free(parser);
 
-  if (document == NULL) {
-    fprintf(stderr, "%s", cmark_parser_get_error_message(parser));
-    exit(1);
-  }
-
   print_document(document, writer, options, width);
 
   cmark_node_free(document);
diff --git a/src/parser.h b/src/parser.h
@@ -2,7 +2,6 @@
 #define CMARK_AST_H
 
 #include <stdio.h>
-#include "cmark.h"
 #include "node.h"
 #include "buffer.h"
 #include "memory.h"
@@ -19,8 +18,6 @@ struct cmark_parser {
   struct cmark_reference_map *refmap;
   struct cmark_node *root;
   struct cmark_node *current;
-  cmark_err_type error_code;
-  bufsize_t total_bytes;
   int line_number;
   bufsize_t offset;
   bufsize_t column;
diff --git a/src/source_map.c b/src/source_map.c
@@ -19,7 +19,7 @@ source_map_free(cmark_source_map *self)
 }
 
 cmark_source_extent *
-source_map_append_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop, cmark_node *node, cmark_extent_type type)
+source_map_append_extent(cmark_source_map *self, uint64_t start, uint64_t stop, cmark_node *node, cmark_extent_type type)
 {
   assert (start <= stop);
   assert (!self->tail || self->tail->stop <= start);
@@ -46,7 +46,7 @@ source_map_append_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop
 
 cmark_source_extent *
 source_map_insert_extent(cmark_source_map *self, cmark_source_extent *previous,
-                         bufsize_t start, bufsize_t stop, cmark_node *node, cmark_extent_type type)
+                         uint64_t start, uint64_t stop, cmark_node *node, cmark_extent_type type)
 {
   if (start == stop)
     return previous;
@@ -101,7 +101,7 @@ source_map_free_extent(cmark_source_map *self, cmark_source_extent *extent)
 
 cmark_source_extent *
 source_map_stitch_extent(cmark_source_map *self, cmark_source_extent *extent,
-                         cmark_node *node, bufsize_t total_length)
+                         cmark_node *node, uint64_t total_length)
 {
   cmark_source_extent *next_extent = extent->next;
   cmark_source_extent *res;
@@ -135,7 +135,7 @@ source_map_stitch_extent(cmark_source_map *self, cmark_source_extent *extent,
 }
 
 cmark_source_extent *
-source_map_splice_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop,
+source_map_splice_extent(cmark_source_map *self, uint64_t start, uint64_t stop,
                          cmark_node *node, cmark_extent_type type)
 {
   if (!self->next_cursor) {
@@ -154,7 +154,7 @@ source_map_splice_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop
 
     return self->cursor;
   } else if (start + self->cursor_offset < self->next_cursor->start) {
-    bufsize_t new_start = self->next_cursor->start - self->cursor_offset;
+    uint64_t new_start = self->next_cursor->start - self->cursor_offset;
 
     self->cursor = source_map_insert_extent(self,
                                             self->cursor,
@@ -196,17 +196,17 @@ source_map_pretty_print(cmark_source_map *self) {
   cmark_source_extent *tmp;
 
   for (tmp = self->head; tmp; tmp = tmp->next) {
-    printf ("%d:%d - %s, %s (%p)\n", tmp->start, tmp->stop,
-						cmark_node_get_type_string(tmp->node),
+    printf ("%lu:%lu - %s, %s (%p)\n", tmp->start, tmp->stop,
+						cmark_node_get_type_string(tmp->node), 
             cmark_source_extent_get_type_string(tmp),
             (void *) tmp->node);
   }
 }
 
 bool
-source_map_check(cmark_source_map *self, bufsize_t total_length)
+source_map_check(cmark_source_map *self, uint64_t total_length)
 {
-  bufsize_t last_stop = 0;
+  uint64_t last_stop = 0;
   cmark_source_extent *tmp;
 
   for (tmp = self->head; tmp; tmp = tmp->next) {
@@ -224,13 +224,13 @@ source_map_check(cmark_source_map *self, bufsize_t total_length)
 }
 
 
-size_t
+uint64_t
 cmark_source_extent_get_start(cmark_source_extent *extent)
 {
   return extent->start;
 }
 
-size_t
+uint64_t
 cmark_source_extent_get_stop(cmark_source_extent *extent)
 {
   return extent->stop;
diff --git a/src/source_map.h b/src/source_map.h
@@ -3,7 +3,6 @@
 
 #include "cmark.h"
 #include "config.h"
-#include "buffer.h"
 
 typedef struct _cmark_source_map
 {
@@ -11,14 +10,14 @@ typedef struct _cmark_source_map
   cmark_source_extent *tail;
   cmark_source_extent *cursor;
   cmark_source_extent *next_cursor;
-  bufsize_t cursor_offset;
+  uint64_t cursor_offset;
   cmark_mem *mem;
 } cmark_source_map;
 
 struct cmark_source_extent
 {
-  bufsize_t start;
-  bufsize_t stop;
+  uint64_t start;
+  uint64_t stop;
   struct cmark_source_extent *next;
   struct cmark_source_extent *prev;
   cmark_node *node;
@@ -30,20 +29,20 @@ cmark_source_map    * source_map_new          (cmark_mem *mem);
 void                  source_map_free         (cmark_source_map *self);
 
 bool                  source_map_check        (cmark_source_map *self,
-                                               bufsize_t total_length);
+                                               uint64_t total_length);
 
 void                  source_map_pretty_print (cmark_source_map *self);
 
 cmark_source_extent * source_map_append_extent(cmark_source_map *self,
-                                               bufsize_t start,
-                                               bufsize_t stop,
+                                               uint64_t start,
+                                               uint64_t stop,
                                                cmark_node *node,
                                                cmark_extent_type type);
 
 cmark_source_extent * source_map_insert_extent(cmark_source_map *self,
                                                cmark_source_extent *previous,
-                                               bufsize_t start,
-                                               bufsize_t stop,
+                                               uint64_t start,
+                                               uint64_t stop,
                                                cmark_node *node,
                                                cmark_extent_type type);
 
@@ -53,11 +52,11 @@ cmark_source_extent * source_map_free_extent  (cmark_source_map *self,
 cmark_source_extent * source_map_stitch_extent(cmark_source_map *self,
                                                cmark_source_extent *extent,
                                                cmark_node *node,
-                                               bufsize_t total_length);
+                                               uint64_t total_length);
 
 cmark_source_extent * source_map_splice_extent(cmark_source_map *self,
-                                               bufsize_t start,
-                                               bufsize_t stop,
+                                               uint64_t start,
+                                               uint64_t stop,
                                                cmark_node *node,
                                                cmark_extent_type type);
 
diff --git a/test/cmark.py b/test/cmark.py
@@ -30,8 +30,6 @@ def to_commonmark(lib, text):
     render_commonmark.restype = c_char_p
     render_commonmark.argtypes = [c_void_p, c_int, c_int]
     node = parse_document(textbytes, textlen, 0)
-    if node is None:
-      raise Exception("parse_document failed")
     result = render_commonmark(node, 0, 0).decode('utf-8')
     return [0, result, '']