cmark
My personal build of CMark ✏️
commonmark.c (12853B)
1 #include <stdlib.h> 2 #include <stdio.h> 3 #include <string.h> 4 #include <stdint.h> 5 #include <assert.h> 6 7 #include "config.h" 8 #include "cmark.h" 9 #include "node.h" 10 #include "buffer.h" 11 #include "utf8.h" 12 #include "scanners.h" 13 #include "render.h" 14 15 #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) 16 #define LIT(s) renderer->out(renderer, s, false, LITERAL) 17 #define CR() renderer->cr(renderer) 18 #define BLANKLINE() renderer->blankline(renderer) 19 #define ENCODED_SIZE 20 20 #define LISTMARKER_SIZE 20 21 22 // Functions to convert cmark_nodes to commonmark strings. 23 24 static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape, 25 int32_t c, unsigned char nextc) { 26 bool needs_escaping = false; 27 bool follows_digit = 28 renderer->buffer->size > 0 && 29 cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]); 30 char encoded[ENCODED_SIZE]; 31 int options = renderer->options; 32 33 needs_escaping = 34 c < 0x80 && escape != LITERAL && 35 ((escape == NORMAL && 36 (c < 0x20 || 37 c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' || 38 c == '>' || c == '\\' || c == '`' || c == '!' || 39 (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') || 40 ((CMARK_OPT_SMART & options) && 41 ((c == '-' && nextc == '-') || 42 (c == '.' && nextc == '.') || 43 c == '"' || c == '\'')) || 44 (renderer->begin_content && (c == '-' || c == '+' || c == '=') && 45 // begin_content doesn't get set to false til we've passed digits 46 // at the beginning of line, so... 47 !follows_digit) || 48 (renderer->begin_content && (c == '.' || c == ')') && follows_digit && 49 (nextc == 0 || cmark_isspace(nextc))))) || 50 (escape == URL && 51 (c == '`' || c == '<' || c == '>' || cmark_isspace(c) || c == '\\' || 52 c == ')' || c == '(')) || 53 (escape == TITLE && 54 (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\'))); 55 56 if (needs_escaping) { 57 if (escape == URL && cmark_isspace(c)) { 58 // use percent encoding for spaces 59 snprintf(encoded, ENCODED_SIZE, "%%%2X", c); 60 cmark_strbuf_puts(renderer->buffer, encoded); 61 renderer->column += 3; 62 } else if (cmark_ispunct(c)) { 63 cmark_render_ascii(renderer, "\\"); 64 cmark_render_code_point(renderer, c); 65 } else { // render as entity 66 snprintf(encoded, ENCODED_SIZE, "&#%d;", c); 67 cmark_strbuf_puts(renderer->buffer, encoded); 68 renderer->column += strlen(encoded); 69 } 70 } else { 71 cmark_render_code_point(renderer, c); 72 } 73 } 74 75 static int longest_backtick_sequence(const char *code) { 76 int longest = 0; 77 int current = 0; 78 size_t i = 0; 79 size_t code_len = strlen(code); 80 while (i <= code_len) { 81 if (code[i] == '`') { 82 current++; 83 } else { 84 if (current > longest) { 85 longest = current; 86 } 87 current = 0; 88 } 89 i++; 90 } 91 return longest; 92 } 93 94 static int shortest_unused_backtick_sequence(const char *code) { 95 // note: if the shortest sequence is >= 32, this returns 32 96 // so as not to overflow the bit array. 97 uint32_t used = 1; 98 int current = 0; 99 size_t i = 0; 100 size_t code_len = strlen(code); 101 while (i <= code_len) { 102 if (code[i] == '`') { 103 current++; 104 } else { 105 if (current > 0 && current < 32) { 106 used |= (1U << current); 107 } 108 current = 0; 109 } 110 i++; 111 } 112 // return number of first bit that is 0: 113 i = 0; 114 while (i < 32 && used & 1) { 115 used = used >> 1; 116 i++; 117 } 118 return (int)i; 119 } 120 121 static bool is_autolink(cmark_node *node) { 122 const unsigned char *title; 123 const unsigned char *url; 124 cmark_node *link_text; 125 126 if (node->type != CMARK_NODE_LINK) { 127 return false; 128 } 129 130 url = node->as.link.url; 131 if (url == NULL || _scan_scheme(url) == 0) { 132 return false; 133 } 134 135 title = node->as.link.title; 136 // if it has a title, we can't treat it as an autolink: 137 if (title && title[0]) { 138 return false; 139 } 140 141 link_text = node->first_child; 142 if (link_text == NULL) { 143 return false; 144 } 145 cmark_consolidate_text_nodes(link_text); 146 if (strncmp((const char *)url, "mailto:", 7) == 0) { 147 url += 7; 148 } 149 return link_text->data != NULL && 150 strcmp((const char *)url, (char *)link_text->data) == 0; 151 } 152 153 // if node is a block node, returns node. 154 // otherwise returns first block-level node that is an ancestor of node. 155 // if there is no block-level ancestor, returns NULL. 156 static cmark_node *get_containing_block(cmark_node *node) { 157 while (node) { 158 if (node->type >= CMARK_NODE_FIRST_BLOCK && 159 node->type <= CMARK_NODE_LAST_BLOCK) { 160 return node; 161 } else { 162 node = node->parent; 163 } 164 } 165 return NULL; 166 } 167 168 static int S_render_node(cmark_renderer *renderer, cmark_node *node, 169 cmark_event_type ev_type, int options) { 170 cmark_node *tmp; 171 int list_number; 172 cmark_delim_type list_delim; 173 size_t numticks; 174 bool extra_spaces; 175 size_t i; 176 bool entering = (ev_type == CMARK_EVENT_ENTER); 177 const char *info, *code, *title; 178 char fencechar[2] = {'\0', '\0'}; 179 size_t code_len; 180 char listmarker[LISTMARKER_SIZE]; 181 const char *emph_delim; 182 bool first_in_list_item; 183 bufsize_t marker_width; 184 bool has_nonspace; 185 bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) && 186 !(CMARK_OPT_HARDBREAKS & options); 187 188 // Don't adjust tight list status til we've started the list. 189 // Otherwise we loose the blank line between a paragraph and 190 // a following list. 191 if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) { 192 tmp = get_containing_block(node); 193 renderer->in_tight_list_item = 194 tmp && // tmp might be NULL if there is no containing block 195 ((tmp->type == CMARK_NODE_ITEM && 196 cmark_node_get_list_tight(tmp->parent)) || 197 (tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM && 198 cmark_node_get_list_tight(tmp->parent->parent))); 199 } 200 201 switch (node->type) { 202 case CMARK_NODE_DOCUMENT: 203 break; 204 205 case CMARK_NODE_BLOCK_QUOTE: 206 if (entering) { 207 LIT("> "); 208 renderer->begin_content = true; 209 cmark_strbuf_puts(renderer->prefix, "> "); 210 } else { 211 cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2); 212 BLANKLINE(); 213 } 214 break; 215 216 case CMARK_NODE_LIST: 217 if (!entering && node->next && (node->next->type == CMARK_NODE_LIST)) { 218 // this ensures that a following indented code block or list will be 219 // inteprereted correctly. 220 CR(); 221 LIT("<!-- end list -->"); 222 BLANKLINE(); 223 } 224 break; 225 226 case CMARK_NODE_ITEM: 227 if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { 228 marker_width = 4; 229 } else { 230 list_number = cmark_node_get_list_start(node->parent); 231 list_delim = cmark_node_get_list_delim(node->parent); 232 tmp = node; 233 while (tmp->prev) { 234 tmp = tmp->prev; 235 list_number += 1; 236 } 237 // we ensure a width of at least 4 so 238 // we get nice transition from single digits 239 // to double 240 snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number, 241 list_delim == CMARK_PAREN_DELIM ? ")" : ".", 242 list_number < 10 ? " " : " "); 243 marker_width = strlen(listmarker); 244 } 245 if (entering) { 246 if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { 247 LIT(" - "); 248 renderer->begin_content = true; 249 } else { 250 LIT(listmarker); 251 renderer->begin_content = true; 252 } 253 for (i = marker_width; i--;) { 254 cmark_strbuf_putc(renderer->prefix, ' '); 255 } 256 } else { 257 cmark_strbuf_truncate(renderer->prefix, 258 renderer->prefix->size - marker_width); 259 CR(); 260 } 261 break; 262 263 case CMARK_NODE_HEADING: 264 if (entering) { 265 for (i = cmark_node_get_heading_level(node); i > 0; i--) { 266 LIT("#"); 267 } 268 LIT(" "); 269 renderer->begin_content = true; 270 renderer->no_linebreaks = true; 271 } else { 272 renderer->no_linebreaks = false; 273 BLANKLINE(); 274 } 275 break; 276 277 case CMARK_NODE_CODE_BLOCK: 278 279 first_in_list_item = node->prev == NULL && node->parent && 280 node->parent->type == CMARK_NODE_ITEM; 281 282 if (!first_in_list_item) { 283 BLANKLINE(); 284 } 285 info = cmark_node_get_fence_info(node); 286 fencechar[0] = strchr(info, '`') == NULL ? '`' : '~'; 287 code = cmark_node_get_literal(node); 288 289 numticks = longest_backtick_sequence(code) + 1; 290 if (numticks < 3) { 291 numticks = 3; 292 } 293 for (i = 0; i < numticks; i++) { 294 LIT(fencechar); 295 } 296 LIT(" "); 297 OUT(info, false, LITERAL); 298 CR(); 299 OUT(cmark_node_get_literal(node), false, LITERAL); 300 CR(); 301 for (i = 0; i < numticks; i++) { 302 LIT(fencechar); 303 } 304 305 BLANKLINE(); 306 break; 307 308 case CMARK_NODE_HTML_BLOCK: 309 BLANKLINE(); 310 OUT(cmark_node_get_literal(node), false, LITERAL); 311 BLANKLINE(); 312 break; 313 314 case CMARK_NODE_CUSTOM_BLOCK: 315 BLANKLINE(); 316 OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), 317 false, LITERAL); 318 BLANKLINE(); 319 break; 320 321 case CMARK_NODE_THEMATIC_BREAK: 322 BLANKLINE(); 323 LIT("-----"); 324 BLANKLINE(); 325 break; 326 327 case CMARK_NODE_PARAGRAPH: 328 if (!entering) { 329 BLANKLINE(); 330 } 331 break; 332 333 case CMARK_NODE_TEXT: 334 OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); 335 break; 336 337 case CMARK_NODE_LINEBREAK: 338 if (!(CMARK_OPT_HARDBREAKS & options)) { 339 LIT(" "); 340 } 341 CR(); 342 break; 343 344 case CMARK_NODE_SOFTBREAK: 345 if (CMARK_OPT_HARDBREAKS & options) { 346 LIT(" "); 347 CR(); 348 } else if (!renderer->no_linebreaks && renderer->width == 0 && 349 !(CMARK_OPT_HARDBREAKS & options) && 350 !(CMARK_OPT_NOBREAKS & options)) { 351 CR(); 352 } else { 353 OUT(" ", allow_wrap, LITERAL); 354 } 355 break; 356 357 case CMARK_NODE_CODE: 358 code = cmark_node_get_literal(node); 359 code_len = strlen(code); 360 numticks = shortest_unused_backtick_sequence(code); 361 has_nonspace = false; 362 for (i=0; i < code_len; i++) { 363 if (code[i] != ' ') { 364 has_nonspace = true; 365 break; 366 } 367 } 368 extra_spaces = code_len == 0 || 369 code[0] == '`' || code[code_len - 1] == '`' || 370 (has_nonspace && code[0] == ' ' && code[code_len - 1] == ' '); 371 for (i = 0; i < numticks; i++) { 372 LIT("`"); 373 } 374 if (extra_spaces) { 375 LIT(" "); 376 } 377 OUT(cmark_node_get_literal(node), allow_wrap, LITERAL); 378 if (extra_spaces) { 379 LIT(" "); 380 } 381 for (i = 0; i < numticks; i++) { 382 LIT("`"); 383 } 384 break; 385 386 case CMARK_NODE_HTML_INLINE: 387 OUT(cmark_node_get_literal(node), false, LITERAL); 388 break; 389 390 case CMARK_NODE_CUSTOM_INLINE: 391 OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), 392 false, LITERAL); 393 break; 394 395 case CMARK_NODE_STRONG: 396 if (entering) { 397 LIT("**"); 398 } else { 399 LIT("**"); 400 } 401 break; 402 403 case CMARK_NODE_EMPH: 404 // If we have EMPH(EMPH(x)), we need to use *_x_* 405 // because **x** is STRONG(x): 406 if (node->parent && node->parent->type == CMARK_NODE_EMPH && 407 node->next == NULL && node->prev == NULL) { 408 emph_delim = "_"; 409 } else { 410 emph_delim = "*"; 411 } 412 if (entering) { 413 LIT(emph_delim); 414 } else { 415 LIT(emph_delim); 416 } 417 break; 418 419 case CMARK_NODE_LINK: 420 if (is_autolink(node)) { 421 if (entering) { 422 LIT("<"); 423 if (strncmp(cmark_node_get_url(node), "mailto:", 7) == 0) { 424 LIT((const char *)cmark_node_get_url(node) + 7); 425 } else { 426 LIT((const char *)cmark_node_get_url(node)); 427 } 428 LIT(">"); 429 // return signal to skip contents of node... 430 return 0; 431 } 432 } else { 433 if (entering) { 434 LIT("["); 435 } else { 436 LIT("]("); 437 OUT(cmark_node_get_url(node), false, URL); 438 title = cmark_node_get_title(node); 439 if (strlen(title) > 0) { 440 LIT(" \""); 441 OUT(title, false, TITLE); 442 LIT("\""); 443 } 444 LIT(")"); 445 } 446 } 447 break; 448 449 case CMARK_NODE_IMAGE: 450 if (entering) { 451 LIT("!["); 452 } else { 453 LIT("]("); 454 OUT(cmark_node_get_url(node), false, URL); 455 title = cmark_node_get_title(node); 456 if (strlen(title) > 0) { 457 OUT(" \"", allow_wrap, LITERAL); 458 OUT(title, false, TITLE); 459 LIT("\""); 460 } 461 LIT(")"); 462 } 463 break; 464 465 default: 466 assert(false); 467 break; 468 } 469 470 return 1; 471 } 472 473 char *cmark_render_commonmark(cmark_node *root, int options, int width) { 474 if (options & CMARK_OPT_HARDBREAKS) { 475 // disable breaking on width, since it has 476 // a different meaning with OPT_HARDBREAKS 477 width = 0; 478 } 479 return cmark_render(root, options, width, outc, S_render_node); 480 }