- Commit
- 5d67ee2a1c44ac3d793e6b43addf3c9221f2f31f
- Parent
- f97750517aa62066c1feab178262b32e370f22ce
- Author
- John MacFarlane <jgm@berkeley.edu>
- Date
Merge branch 'MathieuDuponchelle-refactor-S_processLine'
My personal build of CMark ✏️
Merge branch 'MathieuDuponchelle-refactor-S_processLine'
1 file changed, 280 insertions, 219 deletions
Status | File Name | N° Changes | Insertions | Deletions |
Modified | src/blocks.c | 499 | 280 | 219 |
diff --git a/src/blocks.c b/src/blocks.c @@ -613,255 +613,284 @@ static void S_advance_offset(cmark_parser *parser, cmark_chunk *input, } } -static void S_process_line(cmark_parser *parser, const unsigned char *buffer, - bufsize_t bytes) { - cmark_node *last_matched_container; +static bool S_last_child_is_open(cmark_node *container) { + return container->last_child && container->last_child->open; +} + +static bool S_parse_block_quote(cmark_parser *parser, + cmark_chunk *input) +{ + bool res = false; bufsize_t matched = 0; - int lev = 0; - int i; - cmark_list *data = NULL; - bool all_matched = true; - cmark_node *container; - bool indented; - cmark_chunk input; - bool maybe_lazy; - char c; - bool save_partially_consumed_tab; - int save_offset; - int save_column; - if (parser->options & CMARK_OPT_VALIDATE_UTF8) { - cmark_utf8proc_check(parser->curline, buffer, bytes); - } else { - cmark_strbuf_put(parser->curline, buffer, bytes); - } - // ensure line ends with a newline: - if (bytes == 0 || !S_is_line_end_char(parser->curline->ptr[bytes - 1])) { - cmark_strbuf_putc(parser->curline, '\n'); - } - parser->offset = 0; - parser->column = 0; - parser->blank = false; + matched = + parser->indent <= 3 && peek_at(input, parser->first_nonspace) == '>'; + if (matched) { + char c; - input.data = parser->curline->ptr; - input.len = parser->curline->size; + S_advance_offset(parser, input, parser->indent + 1, true); + c = peek_at(input, parser->offset); - // container starts at the document root. - container = parser->root; + if (c == ' ' || c == '\t') + S_advance_offset(parser, input, 1, true); - parser->line_number++; + res = true; + } + return res; +} - // for each containing node, try to parse the associated line start. - // bail out on failure: container will point to the last matching node. +static bool S_parse_node_item(cmark_parser *parser, + cmark_chunk *input, + cmark_node *container) +{ + bool res = false; + + if (parser->indent >= + container->as.list.marker_offset + container->as.list.padding) { + S_advance_offset(parser, input, container->as.list.marker_offset + + container->as.list.padding, + true); + res = true; + } else if (parser->blank && container->first_child != NULL) { + // if container->first_child is NULL, then the opening line + // of the list item was blank after the list marker; in this + // case, we are done with the list item. + S_advance_offset(parser, input, + parser->first_nonspace - parser->offset, false); + res = true; + } + return res; +} - while (container->last_child && container->last_child->open) { - container = container->last_child; +static bool S_parse_code_block(cmark_parser *parser, + cmark_chunk *input, + cmark_node *container, + bool *should_continue) +{ + bool res = false; + + if (!container->as.code.fenced) { // indented + if (parser->indent >= CODE_INDENT) { + S_advance_offset(parser, input, CODE_INDENT, true); + res = true; + } else if (parser->blank) { + S_advance_offset(parser, input, + parser->first_nonspace - parser->offset, false); + res = true; + } + } else { // fenced + bufsize_t matched = 0; - S_find_first_nonspace(parser, &input); + if (parser->indent <= 3 && (peek_at(input, parser->first_nonspace) == + container->as.code.fence_char)) { + matched = scan_close_code_fence(input, parser->first_nonspace); + } - if (container->type == CMARK_NODE_BLOCK_QUOTE) { - matched = - parser->indent <= 3 && peek_at(&input, parser->first_nonspace) == '>'; - if (matched) { - S_advance_offset(parser, &input, parser->indent + 1, true); - c = peek_at(&input, parser->offset); - if (c == ' ' || c == '\t') { - S_advance_offset(parser, &input, 1, true); - } - } else { - all_matched = false; - } + if (matched >= container->as.code.fence_length) { + // closing fence - and since we're at + // the end of a line, we can stop processing it: + *should_continue = false; + S_advance_offset(parser, input, matched, false); + parser->current = finalize(parser, container); + } else { + // skip opt. spaces of fence parser->offset + char c; + int i = container->as.code.fence_offset; - } else if (container->type == CMARK_NODE_ITEM) { - if (parser->indent >= - container->as.list.marker_offset + container->as.list.padding) { - S_advance_offset(parser, &input, container->as.list.marker_offset + - container->as.list.padding, - true); - } else if (parser->blank && container->first_child != NULL) { - // if container->first_child is NULL, then the opening line - // of the list item was blank after the list marker; in this - // case, we are done with the list item. - S_advance_offset(parser, &input, - parser->first_nonspace - parser->offset, false); - } else { - all_matched = false; + while (i > 0 && (c = peek_at(input, parser->offset)) && (c == ' ' || c == '\t')) { + S_advance_offset(parser, input, 1, true); + i--; } + res = true; + } + } - } else if (container->type == CMARK_NODE_CODE_BLOCK) { - - if (!container->as.code.fenced) { // indented - if (parser->indent >= CODE_INDENT) { - S_advance_offset(parser, &input, CODE_INDENT, true); - } else if (parser->blank) { - S_advance_offset(parser, &input, - parser->first_nonspace - parser->offset, false); - } else { - all_matched = false; - } - } else { // fenced - matched = 0; - if (parser->indent <= 3 && (peek_at(&input, parser->first_nonspace) == - container->as.code.fence_char)) { - matched = scan_close_code_fence(&input, parser->first_nonspace); - } - if (matched >= container->as.code.fence_length) { - // closing fence - and since we're at - // the end of a line, we can return: - all_matched = false; - S_advance_offset(parser, &input, matched, false); - parser->current = finalize(parser, container); - goto finished; - } else { - // skip opt. spaces of fence parser->offset - i = container->as.code.fence_offset; - while (i > 0 && (c = peek_at(&input, parser->offset)) && (c == ' ' || c == '\t')) { - S_advance_offset(parser, &input, 1, true); - i--; - } - } - } - } else if (container->type == CMARK_NODE_HEADING) { + return res; +} - // a heading can never contain more than one line - all_matched = false; +static bool S_parse_html_block(cmark_parser *parser, + cmark_node *container) +{ + bool res = false; + int html_block_type = container->as.html_block_type; + + assert(html_block_type >= 1 && html_block_type <= 7); + switch (html_block_type) { + case 1: + case 2: + case 3: + case 4: + case 5: + // these types of blocks can accept blanks + res = true; + break; + case 6: + case 7: + res = !parser->blank; + break; + } - } else if (container->type == CMARK_NODE_HTML_BLOCK) { + return res; +} - switch (container->as.html_block_type) { - case 1: - case 2: - case 3: - case 4: - case 5: - // these types of blocks can accept blanks +// for each containing node, tries to parse the associated line start. +// bails out on failure: container will point to the last matching node. +static bool S_try_parse_line_start(cmark_parser *parser, + cmark_chunk *input, + cmark_node **container, + bool *all_matched) +{ + bool should_continue = true; + *all_matched = false; + cmark_node_type cont_type; + + while (S_last_child_is_open(*container)) { + *container = (*container)->last_child; + cont_type = (*container)->type; + + S_find_first_nonspace(parser, input); + + switch (cont_type) { + case CMARK_NODE_BLOCK_QUOTE: + if (!S_parse_block_quote(parser, input)) + goto done; break; - case 6: - case 7: - if (parser->blank) { - all_matched = false; - } + case CMARK_NODE_ITEM: + if (!S_parse_node_item(parser, input, *container)) + goto done; + break; + case CMARK_NODE_CODE_BLOCK: + if (!S_parse_code_block(parser, input, *container, &should_continue)) + goto done; + break; + case CMARK_NODE_HEADING: + // a heading can never contain more than one line + goto done; + case CMARK_NODE_HTML_BLOCK: + if (!S_parse_html_block(parser, *container)) + goto done; + break; + case CMARK_NODE_PARAGRAPH: + if (parser->blank) + goto done; break; default: - fprintf(stderr, "Error (%s:%d): Unknown HTML block type %d\n", __FILE__, - __LINE__, container->as.html_block_type); - exit(1); - } - - } else if (container->type == CMARK_NODE_PARAGRAPH) { - - if (parser->blank) { - all_matched = false; - } - } - - if (!all_matched) { - container = container->parent; // back up to last matching node - break; + break; } } - last_matched_container = container; + *all_matched = true; - // check to see if we've hit 2nd blank line, break out of list: - if (parser->blank && container->last_line_blank) { - break_out_of_lists(parser, &container); +done: + if (!*all_matched) { + *container = (*container)->parent; // back up to last matching node } + return should_continue; +} - maybe_lazy = parser->current->type == CMARK_NODE_PARAGRAPH; - // try new container starts: - while (container->type != CMARK_NODE_CODE_BLOCK && - container->type != CMARK_NODE_HTML_BLOCK) { +static void try_new_container_starts(cmark_parser *parser, + cmark_node **container, + cmark_chunk *input, + bool all_matched) +{ + bool indented; + cmark_list *data = NULL; + bool maybe_lazy = parser->current->type == CMARK_NODE_PARAGRAPH; + cmark_node_type cont_type = (*container)->type; + bufsize_t matched = 0; + int lev = 0; + char c; + bool save_partially_consumed_tab; + int save_offset; + int save_column; + + while (cont_type != CMARK_NODE_CODE_BLOCK && + cont_type != CMARK_NODE_HTML_BLOCK) { - S_find_first_nonspace(parser, &input); + S_find_first_nonspace(parser, input); indented = parser->indent >= CODE_INDENT; - if (!indented && peek_at(&input, parser->first_nonspace) == '>') { + if (!indented && peek_at(input, parser->first_nonspace) == '>') { - S_advance_offset(parser, &input, + S_advance_offset(parser, input, parser->first_nonspace + 1 - parser->offset, false); // optional following character - c = peek_at(&input, parser->offset); + c = peek_at(input, parser->offset); if (c == ' ' || c == '\t') { - S_advance_offset(parser, &input, 1, true); + S_advance_offset(parser, input, 1, true); } - container = add_child(parser, container, CMARK_NODE_BLOCK_QUOTE, + *container = add_child(parser, *container, CMARK_NODE_BLOCK_QUOTE, parser->offset + 1); } else if (!indented && (matched = scan_atx_heading_start( - &input, parser->first_nonspace))) { + input, parser->first_nonspace))) { + bufsize_t hashpos; + int level = 0; - S_advance_offset(parser, &input, + S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false); - container = - add_child(parser, container, CMARK_NODE_HEADING, parser->offset + 1); + *container = + add_child(parser, *container, CMARK_NODE_HEADING, parser->offset + 1); - bufsize_t hashpos = - cmark_chunk_strchr(&input, '#', parser->first_nonspace); - int level = 0; + hashpos = cmark_chunk_strchr(input, '#', parser->first_nonspace); - while (peek_at(&input, hashpos) == '#') { + while (peek_at(input, hashpos) == '#') { level++; hashpos++; } - container->as.heading.level = level; - container->as.heading.setext = false; - } else if (!indented && (matched = scan_open_code_fence( - &input, parser->first_nonspace))) { + (*container)->as.heading.level = level; + (*container)->as.heading.setext = false; - container = add_child(parser, container, CMARK_NODE_CODE_BLOCK, + } else if (!indented && (matched = scan_open_code_fence( + input, parser->first_nonspace))) { + *container = add_child(parser, *container, CMARK_NODE_CODE_BLOCK, parser->first_nonspace + 1); - container->as.code.fenced = true; - container->as.code.fence_char = peek_at(&input, parser->first_nonspace); - container->as.code.fence_length = matched; - container->as.code.fence_offset = + (*container)->as.code.fenced = true; + (*container)->as.code.fence_char = peek_at(input, parser->first_nonspace); + (*container)->as.code.fence_length = matched; + (*container)->as.code.fence_offset = (int8_t)(parser->first_nonspace - parser->offset); - container->as.code.info = cmark_chunk_literal(""); - S_advance_offset(parser, &input, + (*container)->as.code.info = cmark_chunk_literal(""); + S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false); } else if (!indented && ((matched = scan_html_block_start( - &input, parser->first_nonspace)) || - (container->type != CMARK_NODE_PARAGRAPH && + input, parser->first_nonspace)) || + (cont_type != CMARK_NODE_PARAGRAPH && (matched = scan_html_block_start_7( - &input, parser->first_nonspace))))) { - - container = add_child(parser, container, CMARK_NODE_HTML_BLOCK, + input, parser->first_nonspace))))) { + *container = add_child(parser, *container, CMARK_NODE_HTML_BLOCK, parser->first_nonspace + 1); - container->as.html_block_type = matched; + (*container)->as.html_block_type = matched; // note, we don't adjust parser->offset because the tag is part of the // text - - } else if (!indented && container->type == CMARK_NODE_PARAGRAPH && + } else if (!indented && cont_type == CMARK_NODE_PARAGRAPH && (lev = - scan_setext_heading_line(&input, parser->first_nonspace))) { - - container->type = CMARK_NODE_HEADING; - container->as.heading.level = lev; - container->as.heading.setext = true; - S_advance_offset(parser, &input, input.len - 1 - parser->offset, false); - + scan_setext_heading_line(input, parser->first_nonspace))) { + (*container)->type = CMARK_NODE_HEADING; + (*container)->as.heading.level = lev; + (*container)->as.heading.setext = true; + S_advance_offset(parser, input, input->len - 1 - parser->offset, false); } else if (!indented && - !(container->type == CMARK_NODE_PARAGRAPH && !all_matched) && + !(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) && (matched = - scan_thematic_break(&input, parser->first_nonspace))) { - + scan_thematic_break(input, parser->first_nonspace))) { // it's only now that we know the line is not part of a setext heading: - container = add_child(parser, container, CMARK_NODE_THEMATIC_BREAK, + *container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK, parser->first_nonspace + 1); - S_advance_offset(parser, &input, input.len - 1 - parser->offset, false); - + S_advance_offset(parser, input, input->len - 1 - parser->offset, false); } else if ((matched = - parse_list_marker(&input, parser->first_nonspace, &data)) && - (!indented || container->type == CMARK_NODE_LIST)) { + parse_list_marker(input, parser->first_nonspace, &data)) && + (!indented || cont_type == CMARK_NODE_LIST)) { // Note that we can have new list items starting with >= 4 // spaces indent, as long as the list container is still open. + int i = 0; // compute padding: - S_advance_offset(parser, &input, + S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false); @@ -870,9 +899,9 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, save_column = parser->column; while (parser->column - save_column <= 5 && - (c = peek_at(&input, parser->offset)) && + (c = peek_at(input, parser->offset)) && (c == ' ' || c == '\t')) { - S_advance_offset(parser, &input, 1, true); + S_advance_offset(parser, input, 1, true); } i = parser->column - save_column; @@ -882,7 +911,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, parser->column = save_column; parser->partially_consumed_tab = save_partially_consumed_tab; if (i > 0) { - S_advance_offset(parser, &input, 1, true); + S_advance_offset(parser, input, 1, true); } } else { data->padding = matched + i; @@ -893,50 +922,92 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, data->marker_offset = parser->indent; - if (container->type != CMARK_NODE_LIST || - !lists_match(&container->as.list, data)) { - container = add_child(parser, container, CMARK_NODE_LIST, + if (cont_type != CMARK_NODE_LIST || + !lists_match(&((*container)->as.list), data)) { + *container = add_child(parser, *container, CMARK_NODE_LIST, parser->first_nonspace + 1); - memcpy(&container->as.list, data, sizeof(*data)); + memcpy(&((*container)->as.list), data, sizeof(*data)); } // add the list item - container = add_child(parser, container, CMARK_NODE_ITEM, + *container = add_child(parser, *container, CMARK_NODE_ITEM, parser->first_nonspace + 1); /* TODO: static */ - memcpy(&container->as.list, data, sizeof(*data)); + memcpy(&((*container)->as.list), data, sizeof(*data)); free(data); - } else if (indented && !maybe_lazy && !parser->blank) { - S_advance_offset(parser, &input, CODE_INDENT, true); - container = add_child(parser, container, CMARK_NODE_CODE_BLOCK, + S_advance_offset(parser, input, CODE_INDENT, true); + *container = add_child(parser, *container, CMARK_NODE_CODE_BLOCK, parser->offset + 1); - container->as.code.fenced = false; - container->as.code.fence_char = 0; - container->as.code.fence_length = 0; - container->as.code.fence_offset = 0; - container->as.code.info = cmark_chunk_literal(""); + (*container)->as.code.fenced = false; + (*container)->as.code.fence_char = 0; + (*container)->as.code.fence_length = 0; + (*container)->as.code.fence_offset = 0; + (*container)->as.code.info = cmark_chunk_literal(""); } else { break; } - if (accepts_lines(container->type)) { + if (accepts_lines((*container)->type)) { // if it's a line container, it can't contain other containers break; } + + cont_type = (*container)->type; maybe_lazy = false; } +} + +static void S_process_line(cmark_parser *parser, const unsigned char *buffer, + bufsize_t bytes) { + cmark_node *last_matched_container; + bool all_matched = true; + cmark_node *container, *tmp; + cmark_chunk input; + + if (parser->options & CMARK_OPT_VALIDATE_UTF8) { + cmark_utf8proc_check(parser->curline, buffer, bytes); + } else { + cmark_strbuf_put(parser->curline, buffer, bytes); + } + + // ensure line ends with a newline: + if (bytes == 0 || !S_is_line_end_char(parser->curline->ptr[bytes - 1])) { + cmark_strbuf_putc(parser->curline, '\n'); + } + + parser->offset = 0; + parser->column = 0; + parser->blank = false; + + input.data = parser->curline->ptr; + input.len = parser->curline->size; + + // container starts at the document root. + container = parser->root; + + parser->line_number++; + + if (!S_try_parse_line_start(parser, &input, &container, &all_matched)) + goto finished; + + last_matched_container = container; + + // check to see if we've hit 2nd blank line, break out of list: + if (parser->blank && container->last_line_blank) + break_out_of_lists(parser, &container); + + try_new_container_starts(parser, &container, &input, all_matched); // what remains at parser->offset is a text line. add the text to the // appropriate container. S_find_first_nonspace(parser, &input); - if (parser->blank && container->last_child) { + if (parser->blank && container->last_child) container->last_child->last_line_blank = true; - } // block quote lines are never blank as they start with > // and we don't count blanks in fenced code for purposes of tight/loose @@ -951,21 +1022,18 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, !(container->type == CMARK_NODE_ITEM && container->first_child == NULL && container->start_line == parser->line_number)); - cmark_node *cont = container; - while (cont->parent) { - cont->parent->last_line_blank = false; - cont = cont->parent; + tmp = container; + while (tmp->parent) { + tmp->parent->last_line_blank = false; + tmp = tmp->parent; } if (parser->current != last_matched_container && container == last_matched_container && !parser->blank && parser->current->type == CMARK_NODE_PARAGRAPH && cmark_strbuf_len(&parser->current->string_content) > 0) { - add_line(parser->current, &input, parser); - } else { // not a lazy continuation - // finalize any blocks that were not matched and set cur to container: while (parser->current != last_matched_container) { parser->current = finalize(parser, parser->current); @@ -973,11 +1041,8 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, } if (container->type == CMARK_NODE_CODE_BLOCK) { - add_line(container, &input, parser); - } else if (container->type == CMARK_NODE_HTML_BLOCK) { - add_line(container, &input, parser); int matches_end_condition; @@ -1016,20 +1081,15 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, container = finalize(parser, container); assert(parser->current != NULL); } - } else if (parser->blank) { - // ??? do nothing - } else if (accepts_lines(container->type)) { - if (container->type == CMARK_NODE_HEADING && container->as.heading.setext == false) { chop_trailing_hashtags(&input); } S_advance_offset(parser, &input, parser->first_nonspace - parser->offset, false); add_line(container, &input, parser); - } else { // create paragraph container for line container = add_child(parser, container, CMARK_NODE_PARAGRAPH, @@ -1040,6 +1100,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, parser->current = container; } + finished: parser->last_line_length = input.len; if (parser->last_line_length &&