cmark

My personal build of CMark ✏️

Commit
5d67ee2a1c44ac3d793e6b43addf3c9221f2f31f
Parent
f97750517aa62066c1feab178262b32e370f22ce
Author
John MacFarlane <jgm@berkeley.edu>
Date

Merge branch 'MathieuDuponchelle-refactor-S_processLine'

Diffstat

1 file changed, 280 insertions, 219 deletions

Status File Name N° Changes Insertions Deletions
Modified src/blocks.c 499 280 219
diff --git a/src/blocks.c b/src/blocks.c
@@ -613,255 +613,284 @@ static void S_advance_offset(cmark_parser *parser, cmark_chunk *input,
   }
 }
 
-static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
-                           bufsize_t bytes) {
-  cmark_node *last_matched_container;
+static bool S_last_child_is_open(cmark_node *container) {
+  return container->last_child && container->last_child->open;
+}
+
+static bool S_parse_block_quote(cmark_parser *parser,
+                                cmark_chunk *input)
+{
+  bool res = false;
   bufsize_t matched = 0;
-  int lev = 0;
-  int i;
-  cmark_list *data = NULL;
-  bool all_matched = true;
-  cmark_node *container;
-  bool indented;
-  cmark_chunk input;
-  bool maybe_lazy;
-  char c;
-  bool save_partially_consumed_tab;
-  int save_offset;
-  int save_column;
 
-  if (parser->options & CMARK_OPT_VALIDATE_UTF8) {
-    cmark_utf8proc_check(parser->curline, buffer, bytes);
-  } else {
-    cmark_strbuf_put(parser->curline, buffer, bytes);
-  }
-  // ensure line ends with a newline:
-  if (bytes == 0 || !S_is_line_end_char(parser->curline->ptr[bytes - 1])) {
-    cmark_strbuf_putc(parser->curline, '\n');
-  }
-  parser->offset = 0;
-  parser->column = 0;
-  parser->blank = false;
+  matched =
+      parser->indent <= 3 && peek_at(input, parser->first_nonspace) == '>';
+  if (matched) {
+    char c;
 
-  input.data = parser->curline->ptr;
-  input.len = parser->curline->size;
+    S_advance_offset(parser, input, parser->indent + 1, true);
+    c = peek_at(input, parser->offset);
 
-  // container starts at the document root.
-  container = parser->root;
+    if (c == ' ' || c == '\t')
+      S_advance_offset(parser, input, 1, true);
 
-  parser->line_number++;
+    res = true;
+  }
+  return res;
+}
 
-  // for each containing node, try to parse the associated line start.
-  // bail out on failure:  container will point to the last matching node.
+static bool S_parse_node_item(cmark_parser *parser,
+                              cmark_chunk *input,
+                              cmark_node *container)
+{
+  bool res = false;
+
+  if (parser->indent >=
+      container->as.list.marker_offset + container->as.list.padding) {
+    S_advance_offset(parser, input, container->as.list.marker_offset +
+                                         container->as.list.padding,
+                     true);
+    res = true;
+  } else if (parser->blank && container->first_child != NULL) {
+    // if container->first_child is NULL, then the opening line
+    // of the list item was blank after the list marker; in this
+    // case, we are done with the list item.
+    S_advance_offset(parser, input,
+                     parser->first_nonspace - parser->offset, false);
+    res = true;
+  }
+  return res;
+}
 
-  while (container->last_child && container->last_child->open) {
-    container = container->last_child;
+static bool S_parse_code_block(cmark_parser *parser,
+                               cmark_chunk *input,
+                               cmark_node *container,
+                               bool *should_continue)
+{
+  bool res = false;
+
+  if (!container->as.code.fenced) { // indented
+    if (parser->indent >= CODE_INDENT) {
+      S_advance_offset(parser, input, CODE_INDENT, true);
+      res = true;
+    } else if (parser->blank) {
+      S_advance_offset(parser, input,
+                       parser->first_nonspace - parser->offset, false);
+      res = true;
+    }
+  } else { // fenced
+    bufsize_t matched = 0;
 
-    S_find_first_nonspace(parser, &input);
+    if (parser->indent <= 3 && (peek_at(input, parser->first_nonspace) ==
+                                container->as.code.fence_char)) {
+      matched = scan_close_code_fence(input, parser->first_nonspace);
+    }
 
-    if (container->type == CMARK_NODE_BLOCK_QUOTE) {
-      matched =
-          parser->indent <= 3 && peek_at(&input, parser->first_nonspace) == '>';
-      if (matched) {
-        S_advance_offset(parser, &input, parser->indent + 1, true);
-        c = peek_at(&input, parser->offset);
-	if (c == ' ' || c == '\t') {
-          S_advance_offset(parser, &input, 1, true);
-	}
-      } else {
-        all_matched = false;
-      }
+    if (matched >= container->as.code.fence_length) {
+      // closing fence - and since we're at
+      // the end of a line, we can stop processing it:
+      *should_continue = false;
+      S_advance_offset(parser, input, matched, false);
+      parser->current = finalize(parser, container);
+    } else {
+      // skip opt. spaces of fence parser->offset
+      char c;
+      int i = container->as.code.fence_offset;
 
-    } else if (container->type == CMARK_NODE_ITEM) {
-      if (parser->indent >=
-          container->as.list.marker_offset + container->as.list.padding) {
-        S_advance_offset(parser, &input, container->as.list.marker_offset +
-                                             container->as.list.padding,
-                         true);
-      } else if (parser->blank && container->first_child != NULL) {
-        // if container->first_child is NULL, then the opening line
-        // of the list item was blank after the list marker; in this
-        // case, we are done with the list item.
-        S_advance_offset(parser, &input,
-                         parser->first_nonspace - parser->offset, false);
-      } else {
-        all_matched = false;
+      while (i > 0 && (c = peek_at(input, parser->offset)) && (c == ' ' || c == '\t')) {
+        S_advance_offset(parser, input, 1, true);
+        i--;
       }
+      res = true;
+    }
+  }
 
-    } else if (container->type == CMARK_NODE_CODE_BLOCK) {
-
-      if (!container->as.code.fenced) { // indented
-        if (parser->indent >= CODE_INDENT) {
-          S_advance_offset(parser, &input, CODE_INDENT, true);
-        } else if (parser->blank) {
-          S_advance_offset(parser, &input,
-                           parser->first_nonspace - parser->offset, false);
-        } else {
-          all_matched = false;
-        }
-      } else { // fenced
-        matched = 0;
-        if (parser->indent <= 3 && (peek_at(&input, parser->first_nonspace) ==
-                                    container->as.code.fence_char)) {
-          matched = scan_close_code_fence(&input, parser->first_nonspace);
-        }
-        if (matched >= container->as.code.fence_length) {
-          // closing fence - and since we're at
-          // the end of a line, we can return:
-          all_matched = false;
-          S_advance_offset(parser, &input, matched, false);
-          parser->current = finalize(parser, container);
-          goto finished;
-        } else {
-          // skip opt. spaces of fence parser->offset
-          i = container->as.code.fence_offset;
-          while (i > 0 && (c = peek_at(&input, parser->offset)) && (c == ' ' || c == '\t')) {
-            S_advance_offset(parser, &input, 1, true);
-            i--;
-          }
-        }
-      }
-    } else if (container->type == CMARK_NODE_HEADING) {
+  return res;
+}
 
-      // a heading can never contain more than one line
-      all_matched = false;
+static bool S_parse_html_block(cmark_parser *parser,
+                               cmark_node *container)
+{
+  bool res = false;
+  int html_block_type = container->as.html_block_type;
+
+  assert(html_block_type >= 1 && html_block_type <= 7);
+  switch (html_block_type) {
+    case 1:
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+      // these types of blocks can accept blanks
+      res = true;
+      break;
+    case 6:
+    case 7:
+      res = !parser->blank;
+      break;
+  }
 
-    } else if (container->type == CMARK_NODE_HTML_BLOCK) {
+  return res;
+}
 
-      switch (container->as.html_block_type) {
-      case 1:
-      case 2:
-      case 3:
-      case 4:
-      case 5:
-        // these types of blocks can accept blanks
+// for each containing node, tries to parse the associated line start.
+// bails out on failure:  container will point to the last matching node.
+static bool S_try_parse_line_start(cmark_parser *parser,
+                                   cmark_chunk *input,
+                                   cmark_node **container,
+                                   bool *all_matched)
+{
+  bool should_continue = true;
+  *all_matched = false;
+  cmark_node_type cont_type;
+
+  while (S_last_child_is_open(*container)) {
+    *container = (*container)->last_child;
+    cont_type = (*container)->type;
+
+    S_find_first_nonspace(parser, input);
+
+    switch (cont_type) {
+      case CMARK_NODE_BLOCK_QUOTE:
+        if (!S_parse_block_quote(parser, input))
+          goto done;
         break;
-      case 6:
-      case 7:
-        if (parser->blank) {
-          all_matched = false;
-        }
+      case CMARK_NODE_ITEM:
+        if (!S_parse_node_item(parser, input, *container))
+          goto done;
+        break;
+      case CMARK_NODE_CODE_BLOCK:
+        if (!S_parse_code_block(parser, input, *container, &should_continue))
+          goto done;
+        break;
+      case CMARK_NODE_HEADING:
+        // a heading can never contain more than one line
+        goto done;
+      case CMARK_NODE_HTML_BLOCK:
+        if (!S_parse_html_block(parser, *container))
+          goto done;
+        break;
+      case CMARK_NODE_PARAGRAPH:
+        if (parser->blank)
+          goto done;
         break;
       default:
-        fprintf(stderr, "Error (%s:%d): Unknown HTML block type %d\n", __FILE__,
-                __LINE__, container->as.html_block_type);
-        exit(1);
-      }
-
-    } else if (container->type == CMARK_NODE_PARAGRAPH) {
-
-      if (parser->blank) {
-        all_matched = false;
-      }
-    }
-
-    if (!all_matched) {
-      container = container->parent; // back up to last matching node
-      break;
+        break;
     }
   }
 
-  last_matched_container = container;
+  *all_matched = true;
 
-  // check to see if we've hit 2nd blank line, break out of list:
-  if (parser->blank && container->last_line_blank) {
-    break_out_of_lists(parser, &container);
+done:
+  if (!*all_matched) {
+    *container = (*container)->parent; // back up to last matching node
   }
+  return should_continue;
+}
 
-  maybe_lazy = parser->current->type == CMARK_NODE_PARAGRAPH;
-  // try new container starts:
-  while (container->type != CMARK_NODE_CODE_BLOCK &&
-         container->type != CMARK_NODE_HTML_BLOCK) {
+static void try_new_container_starts(cmark_parser *parser,
+                                     cmark_node **container,
+                                     cmark_chunk *input,
+                                     bool all_matched)
+{
+  bool indented;
+  cmark_list *data = NULL;
+  bool maybe_lazy = parser->current->type == CMARK_NODE_PARAGRAPH;
+  cmark_node_type cont_type = (*container)->type;
+  bufsize_t matched = 0;
+  int lev = 0;
+  char c;
+  bool save_partially_consumed_tab;
+  int save_offset;
+  int save_column;
+
+  while (cont_type != CMARK_NODE_CODE_BLOCK &&
+         cont_type != CMARK_NODE_HTML_BLOCK) {
 
-    S_find_first_nonspace(parser, &input);
+    S_find_first_nonspace(parser, input);
     indented = parser->indent >= CODE_INDENT;
 
-    if (!indented && peek_at(&input, parser->first_nonspace) == '>') {
+    if (!indented && peek_at(input, parser->first_nonspace) == '>') {
 
-      S_advance_offset(parser, &input,
+      S_advance_offset(parser, input,
                        parser->first_nonspace + 1 - parser->offset, false);
       // optional following character
-      c = peek_at(&input, parser->offset);
+      c = peek_at(input, parser->offset);
       if (c == ' ' || c == '\t') {
-        S_advance_offset(parser, &input, 1, true);
+        S_advance_offset(parser, input, 1, true);
       }
-      container = add_child(parser, container, CMARK_NODE_BLOCK_QUOTE,
+      *container = add_child(parser, *container, CMARK_NODE_BLOCK_QUOTE,
                             parser->offset + 1);
 
     } else if (!indented && (matched = scan_atx_heading_start(
-                                 &input, parser->first_nonspace))) {
+                                 input, parser->first_nonspace))) {
+      bufsize_t hashpos;
+      int level = 0;
 
-      S_advance_offset(parser, &input,
+      S_advance_offset(parser, input,
                        parser->first_nonspace + matched - parser->offset,
                        false);
-      container =
-          add_child(parser, container, CMARK_NODE_HEADING, parser->offset + 1);
+      *container =
+          add_child(parser, *container, CMARK_NODE_HEADING, parser->offset + 1);
 
-      bufsize_t hashpos =
-          cmark_chunk_strchr(&input, '#', parser->first_nonspace);
-      int level = 0;
+      hashpos = cmark_chunk_strchr(input, '#', parser->first_nonspace);
 
-      while (peek_at(&input, hashpos) == '#') {
+      while (peek_at(input, hashpos) == '#') {
         level++;
         hashpos++;
       }
-      container->as.heading.level = level;
-      container->as.heading.setext = false;
 
-    } else if (!indented && (matched = scan_open_code_fence(
-                                 &input, parser->first_nonspace))) {
+      (*container)->as.heading.level = level;
+      (*container)->as.heading.setext = false;
 
-      container = add_child(parser, container, CMARK_NODE_CODE_BLOCK,
+    } else if (!indented && (matched = scan_open_code_fence(
+                                 input, parser->first_nonspace))) {
+      *container = add_child(parser, *container, CMARK_NODE_CODE_BLOCK,
                             parser->first_nonspace + 1);
-      container->as.code.fenced = true;
-      container->as.code.fence_char = peek_at(&input, parser->first_nonspace);
-      container->as.code.fence_length = matched;
-      container->as.code.fence_offset =
+      (*container)->as.code.fenced = true;
+      (*container)->as.code.fence_char = peek_at(input, parser->first_nonspace);
+      (*container)->as.code.fence_length = matched;
+      (*container)->as.code.fence_offset =
           (int8_t)(parser->first_nonspace - parser->offset);
-      container->as.code.info = cmark_chunk_literal("");
-      S_advance_offset(parser, &input,
+      (*container)->as.code.info = cmark_chunk_literal("");
+      S_advance_offset(parser, input,
                        parser->first_nonspace + matched - parser->offset,
                        false);
 
     } else if (!indented && ((matched = scan_html_block_start(
-                                  &input, parser->first_nonspace)) ||
-                             (container->type != CMARK_NODE_PARAGRAPH &&
+                                  input, parser->first_nonspace)) ||
+                             (cont_type != CMARK_NODE_PARAGRAPH &&
                               (matched = scan_html_block_start_7(
-                                   &input, parser->first_nonspace))))) {
-
-      container = add_child(parser, container, CMARK_NODE_HTML_BLOCK,
+                                   input, parser->first_nonspace))))) {
+      *container = add_child(parser, *container, CMARK_NODE_HTML_BLOCK,
                             parser->first_nonspace + 1);
-      container->as.html_block_type = matched;
+      (*container)->as.html_block_type = matched;
       // note, we don't adjust parser->offset because the tag is part of the
       // text
-
-    } else if (!indented && container->type == CMARK_NODE_PARAGRAPH &&
+    } else if (!indented && cont_type == CMARK_NODE_PARAGRAPH &&
                (lev =
-                    scan_setext_heading_line(&input, parser->first_nonspace))) {
-
-      container->type = CMARK_NODE_HEADING;
-      container->as.heading.level = lev;
-      container->as.heading.setext = true;
-      S_advance_offset(parser, &input, input.len - 1 - parser->offset, false);
-
+                    scan_setext_heading_line(input, parser->first_nonspace))) {
+      (*container)->type = CMARK_NODE_HEADING;
+      (*container)->as.heading.level = lev;
+      (*container)->as.heading.setext = true;
+      S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
     } else if (!indented &&
-               !(container->type == CMARK_NODE_PARAGRAPH && !all_matched) &&
+               !(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) &&
                (matched =
-                    scan_thematic_break(&input, parser->first_nonspace))) {
-
+                    scan_thematic_break(input, parser->first_nonspace))) {
       // it's only now that we know the line is not part of a setext heading:
-      container = add_child(parser, container, CMARK_NODE_THEMATIC_BREAK,
+      *container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK,
                             parser->first_nonspace + 1);
-      S_advance_offset(parser, &input, input.len - 1 - parser->offset, false);
-
+      S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
     } else if ((matched =
-                    parse_list_marker(&input, parser->first_nonspace, &data)) &&
-               (!indented || container->type == CMARK_NODE_LIST)) {
+                    parse_list_marker(input, parser->first_nonspace, &data)) &&
+               (!indented || cont_type == CMARK_NODE_LIST)) {
       // Note that we can have new list items starting with >= 4
       // spaces indent, as long as the list container is still open.
+      int i = 0;
 
       // compute padding:
-      S_advance_offset(parser, &input,
+      S_advance_offset(parser, input,
                        parser->first_nonspace + matched - parser->offset,
                        false);
 
@@ -870,9 +899,9 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
       save_column = parser->column;
 
       while (parser->column - save_column <= 5 &&
-		(c = peek_at(&input, parser->offset)) &&
+		(c = peek_at(input, parser->offset)) &&
 		(c == ' ' || c == '\t')) {
-        S_advance_offset(parser, &input, 1, true);
+        S_advance_offset(parser, input, 1, true);
       }
 
       i = parser->column - save_column;
@@ -882,7 +911,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
 	parser->column = save_column;
 	parser->partially_consumed_tab = save_partially_consumed_tab;
         if (i > 0) {
-          S_advance_offset(parser, &input, 1, true);
+          S_advance_offset(parser, input, 1, true);
         }
       } else {
         data->padding = matched + i;
@@ -893,50 +922,92 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
 
       data->marker_offset = parser->indent;
 
-      if (container->type != CMARK_NODE_LIST ||
-          !lists_match(&container->as.list, data)) {
-        container = add_child(parser, container, CMARK_NODE_LIST,
+      if (cont_type != CMARK_NODE_LIST ||
+          !lists_match(&((*container)->as.list), data)) {
+        *container = add_child(parser, *container, CMARK_NODE_LIST,
                               parser->first_nonspace + 1);
 
-        memcpy(&container->as.list, data, sizeof(*data));
+        memcpy(&((*container)->as.list), data, sizeof(*data));
       }
 
       // add the list item
-      container = add_child(parser, container, CMARK_NODE_ITEM,
+      *container = add_child(parser, *container, CMARK_NODE_ITEM,
                             parser->first_nonspace + 1);
       /* TODO: static */
-      memcpy(&container->as.list, data, sizeof(*data));
+      memcpy(&((*container)->as.list), data, sizeof(*data));
       free(data);
-
     } else if (indented && !maybe_lazy && !parser->blank) {
-      S_advance_offset(parser, &input, CODE_INDENT, true);
-      container = add_child(parser, container, CMARK_NODE_CODE_BLOCK,
+      S_advance_offset(parser, input, CODE_INDENT, true);
+      *container = add_child(parser, *container, CMARK_NODE_CODE_BLOCK,
                             parser->offset + 1);
-      container->as.code.fenced = false;
-      container->as.code.fence_char = 0;
-      container->as.code.fence_length = 0;
-      container->as.code.fence_offset = 0;
-      container->as.code.info = cmark_chunk_literal("");
+      (*container)->as.code.fenced = false;
+      (*container)->as.code.fence_char = 0;
+      (*container)->as.code.fence_length = 0;
+      (*container)->as.code.fence_offset = 0;
+      (*container)->as.code.info = cmark_chunk_literal("");
 
     } else {
       break;
     }
 
-    if (accepts_lines(container->type)) {
+    if (accepts_lines((*container)->type)) {
       // if it's a line container, it can't contain other containers
       break;
     }
+
+    cont_type = (*container)->type;
     maybe_lazy = false;
   }
+}
+
+static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
+                           bufsize_t bytes) {
+  cmark_node *last_matched_container;
+  bool all_matched = true;
+  cmark_node *container, *tmp;
+  cmark_chunk input;
+
+  if (parser->options & CMARK_OPT_VALIDATE_UTF8) {
+    cmark_utf8proc_check(parser->curline, buffer, bytes);
+  } else {
+    cmark_strbuf_put(parser->curline, buffer, bytes);
+  }
+
+  // ensure line ends with a newline:
+  if (bytes == 0 || !S_is_line_end_char(parser->curline->ptr[bytes - 1])) {
+    cmark_strbuf_putc(parser->curline, '\n');
+  }
+
+  parser->offset = 0;
+  parser->column = 0;
+  parser->blank = false;
+
+  input.data = parser->curline->ptr;
+  input.len = parser->curline->size;
+
+  // container starts at the document root.
+  container = parser->root;
+
+  parser->line_number++;
+
+  if (!S_try_parse_line_start(parser, &input, &container, &all_matched))
+    goto finished;
+
+  last_matched_container = container;
+
+  // check to see if we've hit 2nd blank line, break out of list:
+  if (parser->blank && container->last_line_blank)
+    break_out_of_lists(parser, &container);
+
+  try_new_container_starts(parser, &container, &input, all_matched);
 
   // what remains at parser->offset is a text line.  add the text to the
   // appropriate container.
 
   S_find_first_nonspace(parser, &input);
 
-  if (parser->blank && container->last_child) {
+  if (parser->blank && container->last_child)
     container->last_child->last_line_blank = true;
-  }
 
   // block quote lines are never blank as they start with >
   // and we don't count blanks in fenced code for purposes of tight/loose
@@ -951,21 +1022,18 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
        !(container->type == CMARK_NODE_ITEM && container->first_child == NULL &&
          container->start_line == parser->line_number));
 
-  cmark_node *cont = container;
-  while (cont->parent) {
-    cont->parent->last_line_blank = false;
-    cont = cont->parent;
+  tmp = container;
+  while (tmp->parent) {
+    tmp->parent->last_line_blank = false;
+    tmp = tmp->parent;
   }
 
   if (parser->current != last_matched_container &&
       container == last_matched_container && !parser->blank &&
       parser->current->type == CMARK_NODE_PARAGRAPH &&
       cmark_strbuf_len(&parser->current->string_content) > 0) {
-
     add_line(parser->current, &input, parser);
-
   } else { // not a lazy continuation
-
     // finalize any blocks that were not matched and set cur to container:
     while (parser->current != last_matched_container) {
       parser->current = finalize(parser, parser->current);
@@ -973,11 +1041,8 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
     }
 
     if (container->type == CMARK_NODE_CODE_BLOCK) {
-
       add_line(container, &input, parser);
-
     } else if (container->type == CMARK_NODE_HTML_BLOCK) {
-
       add_line(container, &input, parser);
 
       int matches_end_condition;
@@ -1016,20 +1081,15 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
         container = finalize(parser, container);
         assert(parser->current != NULL);
       }
-
     } else if (parser->blank) {
-
       // ??? do nothing
-
     } else if (accepts_lines(container->type)) {
-
       if (container->type == CMARK_NODE_HEADING &&
           container->as.heading.setext == false) {
         chop_trailing_hashtags(&input);
       }
       S_advance_offset(parser, &input, parser->first_nonspace - parser->offset, false);
       add_line(container, &input, parser);
-
     } else {
       // create paragraph container for line
       container = add_child(parser, container, CMARK_NODE_PARAGRAPH,
@@ -1040,6 +1100,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
 
     parser->current = container;
   }
+
 finished:
   parser->last_line_length = input.len;
   if (parser->last_line_length &&