cmark
My personal build of CMark ✏️
inlines.c (42610B)
1 #include <stdlib.h> 2 #include <string.h> 3 #include <stdio.h> 4 5 #include "cmark_ctype.h" 6 #include "config.h" 7 #include "node.h" 8 #include "parser.h" 9 #include "references.h" 10 #include "cmark.h" 11 #include "houdini.h" 12 #include "utf8.h" 13 #include "scanners.h" 14 #include "inlines.h" 15 16 static const char *EMDASH = "\xE2\x80\x94"; 17 static const char *ENDASH = "\xE2\x80\x93"; 18 static const char *ELLIPSES = "\xE2\x80\xA6"; 19 static const char *LEFTDOUBLEQUOTE = "\xE2\x80\x9C"; 20 static const char *RIGHTDOUBLEQUOTE = "\xE2\x80\x9D"; 21 static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98"; 22 static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99"; 23 24 // Macros for creating various kinds of simple. 25 #define make_linebreak(mem) make_simple(mem, CMARK_NODE_LINEBREAK) 26 #define make_softbreak(mem) make_simple(mem, CMARK_NODE_SOFTBREAK) 27 #define make_emph(mem) make_simple(mem, CMARK_NODE_EMPH) 28 #define make_strong(mem) make_simple(mem, CMARK_NODE_STRONG) 29 30 #define MAXBACKTICKS 1000 31 32 typedef struct delimiter { 33 struct delimiter *previous; 34 struct delimiter *next; 35 cmark_node *inl_text; 36 bufsize_t length; 37 unsigned char delim_char; 38 bool can_open; 39 bool can_close; 40 } delimiter; 41 42 typedef struct bracket { 43 struct bracket *previous; 44 struct delimiter *previous_delimiter; 45 cmark_node *inl_text; 46 bufsize_t position; 47 bool image; 48 bool active; 49 bool bracket_after; 50 } bracket; 51 52 typedef struct { 53 cmark_mem *mem; 54 cmark_chunk input; 55 int line; 56 bufsize_t pos; 57 int block_offset; 58 int column_offset; 59 cmark_reference_map *refmap; 60 delimiter *last_delim; 61 bracket *last_bracket; 62 bufsize_t backticks[MAXBACKTICKS + 1]; 63 bool scanned_for_backticks; 64 } subject; 65 66 static CMARK_INLINE bool S_is_line_end_char(char c) { 67 return (c == '\n' || c == '\r'); 68 } 69 70 static delimiter *S_insert_emph(subject *subj, delimiter *opener, 71 delimiter *closer); 72 73 static int parse_inline(subject *subj, cmark_node *parent, int options); 74 75 static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e, 76 cmark_chunk *chunk, cmark_reference_map *refmap); 77 static bufsize_t subject_find_special_char(subject *subj, int options); 78 79 // Create an inline with a literal string value. 80 static CMARK_INLINE cmark_node *make_literal(subject *subj, cmark_node_type t, 81 int start_column, int end_column) { 82 cmark_node *e = (cmark_node *)subj->mem->calloc(1, sizeof(*e)); 83 e->mem = subj->mem; 84 e->type = (uint16_t)t; 85 e->start_line = e->end_line = subj->line; 86 // columns are 1 based. 87 e->start_column = start_column + 1 + subj->column_offset + subj->block_offset; 88 e->end_column = end_column + 1 + subj->column_offset + subj->block_offset; 89 return e; 90 } 91 92 // Create an inline with no value. 93 static CMARK_INLINE cmark_node *make_simple(cmark_mem *mem, cmark_node_type t) { 94 cmark_node *e = (cmark_node *)mem->calloc(1, sizeof(*e)); 95 e->mem = mem; 96 e->type = t; 97 return e; 98 } 99 100 static cmark_node *make_str(subject *subj, int sc, int ec, cmark_chunk s) { 101 cmark_node *e = make_literal(subj, CMARK_NODE_TEXT, sc, ec); 102 e->data = (unsigned char *)subj->mem->realloc(NULL, s.len + 1); 103 if (s.data != NULL) { 104 memcpy(e->data, s.data, s.len); 105 } 106 e->data[s.len] = 0; 107 e->len = s.len; 108 return e; 109 } 110 111 static cmark_node *make_str_from_buf(subject *subj, int sc, int ec, 112 cmark_strbuf *buf) { 113 cmark_node *e = make_literal(subj, CMARK_NODE_TEXT, sc, ec); 114 e->len = buf->size; 115 e->data = cmark_strbuf_detach(buf); 116 return e; 117 } 118 119 // Like make_str, but parses entities. 120 static cmark_node *make_str_with_entities(subject *subj, 121 int start_column, int end_column, 122 cmark_chunk *content) { 123 cmark_strbuf unescaped = CMARK_BUF_INIT(subj->mem); 124 125 if (houdini_unescape_html(&unescaped, content->data, content->len)) { 126 return make_str_from_buf(subj, start_column, end_column, &unescaped); 127 } else { 128 return make_str(subj, start_column, end_column, *content); 129 } 130 } 131 132 // Like cmark_node_append_child but without costly sanity checks. 133 // Assumes that child was newly created. 134 static void append_child(cmark_node *node, cmark_node *child) { 135 cmark_node *old_last_child = node->last_child; 136 137 child->next = NULL; 138 child->prev = old_last_child; 139 child->parent = node; 140 node->last_child = child; 141 142 if (old_last_child) { 143 old_last_child->next = child; 144 } else { 145 // Also set first_child if node previously had no children. 146 node->first_child = child; 147 } 148 } 149 150 // Duplicate a chunk by creating a copy of the buffer not by reusing the 151 // buffer like cmark_chunk_dup does. 152 static unsigned char *cmark_strdup(cmark_mem *mem, unsigned char *src) { 153 if (src == NULL) { 154 return NULL; 155 } 156 size_t len = strlen((char *)src); 157 unsigned char *data = (unsigned char *)mem->realloc(NULL, len + 1); 158 memcpy(data, src, len + 1); 159 return data; 160 } 161 162 static unsigned char *cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url, 163 int is_email) { 164 cmark_strbuf buf = CMARK_BUF_INIT(mem); 165 166 cmark_chunk_trim(url); 167 168 if (is_email) 169 cmark_strbuf_puts(&buf, "mailto:"); 170 171 houdini_unescape_html_f(&buf, url->data, url->len); 172 return cmark_strbuf_detach(&buf); 173 } 174 175 static CMARK_INLINE cmark_node *make_autolink(subject *subj, 176 int start_column, int end_column, 177 cmark_chunk url, int is_email) { 178 cmark_node *link = make_simple(subj->mem, CMARK_NODE_LINK); 179 link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email); 180 link->as.link.title = NULL; 181 link->start_line = link->end_line = subj->line; 182 link->start_column = start_column + 1; 183 link->end_column = end_column + 1; 184 append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url)); 185 return link; 186 } 187 188 static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e, 189 cmark_chunk *chunk, cmark_reference_map *refmap) { 190 int i; 191 e->mem = mem; 192 e->input = *chunk; 193 e->line = line_number; 194 e->pos = 0; 195 e->block_offset = block_offset; 196 e->column_offset = 0; 197 e->refmap = refmap; 198 e->last_delim = NULL; 199 e->last_bracket = NULL; 200 for (i = 0; i <= MAXBACKTICKS; i++) { 201 e->backticks[i] = 0; 202 } 203 e->scanned_for_backticks = false; 204 } 205 206 static CMARK_INLINE int isbacktick(int c) { return (c == '`'); } 207 208 static CMARK_INLINE unsigned char peek_char(subject *subj) { 209 // NULL bytes should have been stripped out by now. If they're 210 // present, it's a programming error: 211 assert(!(subj->pos < subj->input.len && subj->input.data[subj->pos] == 0)); 212 return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0; 213 } 214 215 static CMARK_INLINE unsigned char peek_at(subject *subj, bufsize_t pos) { 216 return subj->input.data[pos]; 217 } 218 219 // Return true if there are more characters in the subject. 220 static CMARK_INLINE int is_eof(subject *subj) { 221 return (subj->pos >= subj->input.len); 222 } 223 224 // Advance the subject. Doesn't check for eof. 225 #define advance(subj) (subj)->pos += 1 226 227 static CMARK_INLINE bool skip_spaces(subject *subj) { 228 bool skipped = false; 229 while (peek_char(subj) == ' ' || peek_char(subj) == '\t') { 230 advance(subj); 231 skipped = true; 232 } 233 return skipped; 234 } 235 236 static CMARK_INLINE bool skip_line_end(subject *subj) { 237 bool seen_line_end_char = false; 238 if (peek_char(subj) == '\r') { 239 advance(subj); 240 seen_line_end_char = true; 241 } 242 if (peek_char(subj) == '\n') { 243 advance(subj); 244 seen_line_end_char = true; 245 } 246 return seen_line_end_char || is_eof(subj); 247 } 248 249 // Take characters while a predicate holds, and return a string. 250 static CMARK_INLINE cmark_chunk take_while(subject *subj, int (*f)(int)) { 251 unsigned char c; 252 bufsize_t startpos = subj->pos; 253 bufsize_t len = 0; 254 255 while ((c = peek_char(subj)) && (*f)(c)) { 256 advance(subj); 257 len++; 258 } 259 260 return cmark_chunk_dup(&subj->input, startpos, len); 261 } 262 263 // Return the number of newlines in a given span of text in a subject. If 264 // the number is greater than zero, also return the number of characters 265 // between the last newline and the end of the span in `since_newline`. 266 static int count_newlines(subject *subj, bufsize_t from, bufsize_t len, int *since_newline) { 267 int nls = 0; 268 int since_nl = 0; 269 270 while (len--) { 271 if (subj->input.data[from++] == '\n') { 272 ++nls; 273 since_nl = 0; 274 } else { 275 ++since_nl; 276 } 277 } 278 279 if (!nls) 280 return 0; 281 282 *since_newline = since_nl; 283 return nls; 284 } 285 286 // Adjust `node`'s `end_line`, `end_column`, and `subj`'s `line` and 287 // `column_offset` according to the number of newlines in a just-matched span 288 // of text in `subj`. 289 static void adjust_subj_node_newlines(subject *subj, cmark_node *node, int matchlen, int extra, int options) { 290 if (!(options & CMARK_OPT_SOURCEPOS)) { 291 return; 292 } 293 294 int since_newline; 295 int newlines = count_newlines(subj, subj->pos - matchlen - extra, matchlen, &since_newline); 296 if (newlines) { 297 subj->line += newlines; 298 node->end_line += newlines; 299 node->end_column = since_newline; 300 subj->column_offset = -subj->pos + since_newline + extra; 301 } 302 } 303 304 // Try to process a backtick code span that began with a 305 // span of ticks of length openticklength length (already 306 // parsed). Return 0 if you don't find matching closing 307 // backticks, otherwise return the position in the subject 308 // after the closing backticks. 309 static bufsize_t scan_to_closing_backticks(subject *subj, 310 bufsize_t openticklength) { 311 312 bool found = false; 313 if (openticklength > MAXBACKTICKS) { 314 // we limit backtick string length because of the array subj->backticks: 315 return 0; 316 } 317 if (subj->scanned_for_backticks && 318 subj->backticks[openticklength] <= subj->pos) { 319 // return if we already know there's no closer 320 return 0; 321 } 322 while (!found) { 323 // read non backticks 324 unsigned char c; 325 while ((c = peek_char(subj)) && c != '`') { 326 advance(subj); 327 } 328 if (is_eof(subj)) { 329 break; 330 } 331 bufsize_t numticks = 0; 332 while (peek_char(subj) == '`') { 333 advance(subj); 334 numticks++; 335 } 336 // store position of ender 337 if (numticks <= MAXBACKTICKS) { 338 subj->backticks[numticks] = subj->pos - numticks; 339 } 340 if (numticks == openticklength) { 341 return (subj->pos); 342 } 343 } 344 // got through whole input without finding closer 345 subj->scanned_for_backticks = true; 346 return 0; 347 } 348 349 // Destructively modify string, converting newlines to 350 // spaces, then removing a single leading + trailing space, 351 // unless the code span consists entirely of space characters. 352 static void S_normalize_code(cmark_strbuf *s) { 353 bufsize_t r, w; 354 bool contains_nonspace = false; 355 356 for (r = 0, w = 0; r < s->size; ++r) { 357 switch (s->ptr[r]) { 358 case '\r': 359 if (s->ptr[r + 1] != '\n') { 360 s->ptr[w++] = ' '; 361 } 362 break; 363 case '\n': 364 s->ptr[w++] = ' '; 365 break; 366 default: 367 s->ptr[w++] = s->ptr[r]; 368 } 369 if (s->ptr[r] != ' ') { 370 contains_nonspace = true; 371 } 372 } 373 374 // begins and ends with space? 375 if (contains_nonspace && 376 s->ptr[0] == ' ' && s->ptr[w - 1] == ' ') { 377 cmark_strbuf_drop(s, 1); 378 cmark_strbuf_truncate(s, w - 2); 379 } else { 380 cmark_strbuf_truncate(s, w); 381 } 382 383 } 384 385 386 // Parse backtick code section or raw backticks, return an inline. 387 // Assumes that the subject has a backtick at the current position. 388 static cmark_node *handle_backticks(subject *subj, int options) { 389 cmark_chunk openticks = take_while(subj, isbacktick); 390 bufsize_t startpos = subj->pos; 391 bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len); 392 393 if (endpos == 0) { // not found 394 subj->pos = startpos; // rewind 395 return make_str(subj, subj->pos, subj->pos, openticks); 396 } else { 397 cmark_strbuf buf = CMARK_BUF_INIT(subj->mem); 398 399 cmark_strbuf_set(&buf, subj->input.data + startpos, 400 endpos - startpos - openticks.len); 401 S_normalize_code(&buf); 402 403 cmark_node *node = make_literal(subj, CMARK_NODE_CODE, startpos, 404 endpos - openticks.len - 1); 405 node->len = buf.size; 406 node->data = cmark_strbuf_detach(&buf); 407 adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len, options); 408 return node; 409 } 410 } 411 412 413 // Scan ***, **, or * and return number scanned, or 0. 414 // Advances position. 415 static int scan_delims(subject *subj, unsigned char c, bool *can_open, 416 bool *can_close) { 417 int numdelims = 0; 418 bufsize_t before_char_pos; 419 int32_t after_char = 0; 420 int32_t before_char = 0; 421 int len; 422 bool left_flanking, right_flanking; 423 424 if (subj->pos == 0) { 425 before_char = 10; 426 } else { 427 before_char_pos = subj->pos - 1; 428 // walk back to the beginning of the UTF_8 sequence: 429 while (peek_at(subj, before_char_pos) >> 6 == 2 && before_char_pos > 0) { 430 before_char_pos -= 1; 431 } 432 len = cmark_utf8proc_iterate(subj->input.data + before_char_pos, 433 subj->pos - before_char_pos, &before_char); 434 if (len == -1) { 435 before_char = 10; 436 } 437 } 438 439 if (c == '\'' || c == '"') { 440 numdelims++; 441 advance(subj); // limit to 1 delim for quotes 442 } else { 443 while (peek_char(subj) == c) { 444 numdelims++; 445 advance(subj); 446 } 447 } 448 449 len = cmark_utf8proc_iterate(subj->input.data + subj->pos, 450 subj->input.len - subj->pos, &after_char); 451 if (len == -1) { 452 after_char = 10; 453 } 454 left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) && 455 (!cmark_utf8proc_is_punctuation(after_char) || 456 cmark_utf8proc_is_space(before_char) || 457 cmark_utf8proc_is_punctuation(before_char)); 458 right_flanking = numdelims > 0 && !cmark_utf8proc_is_space(before_char) && 459 (!cmark_utf8proc_is_punctuation(before_char) || 460 cmark_utf8proc_is_space(after_char) || 461 cmark_utf8proc_is_punctuation(after_char)); 462 if (c == '_') { 463 *can_open = left_flanking && 464 (!right_flanking || cmark_utf8proc_is_punctuation(before_char)); 465 *can_close = right_flanking && 466 (!left_flanking || cmark_utf8proc_is_punctuation(after_char)); 467 } else if (c == '\'' || c == '"') { 468 *can_open = left_flanking && 469 (!right_flanking || before_char == '(' || before_char == '[') && 470 before_char != ']' && before_char != ')'; 471 *can_close = right_flanking; 472 } else { 473 *can_open = left_flanking; 474 *can_close = right_flanking; 475 } 476 return numdelims; 477 } 478 479 /* 480 static void print_delimiters(subject *subj) 481 { 482 delimiter *delim; 483 delim = subj->last_delim; 484 while (delim != NULL) { 485 printf("Item at stack pos %p: %d %d %d next(%p) prev(%p)\n", 486 (void*)delim, delim->delim_char, 487 delim->can_open, delim->can_close, 488 (void*)delim->next, (void*)delim->previous); 489 delim = delim->previous; 490 } 491 } 492 */ 493 494 static void remove_delimiter(subject *subj, delimiter *delim) { 495 if (delim == NULL) 496 return; 497 if (delim->next == NULL) { 498 // end of list: 499 assert(delim == subj->last_delim); 500 subj->last_delim = delim->previous; 501 } else { 502 delim->next->previous = delim->previous; 503 } 504 if (delim->previous != NULL) { 505 delim->previous->next = delim->next; 506 } 507 subj->mem->free(delim); 508 } 509 510 static void pop_bracket(subject *subj) { 511 bracket *b; 512 if (subj->last_bracket == NULL) 513 return; 514 b = subj->last_bracket; 515 subj->last_bracket = subj->last_bracket->previous; 516 subj->mem->free(b); 517 } 518 519 static void push_delimiter(subject *subj, unsigned char c, bool can_open, 520 bool can_close, cmark_node *inl_text) { 521 delimiter *delim = (delimiter *)subj->mem->calloc(1, sizeof(delimiter)); 522 delim->delim_char = c; 523 delim->can_open = can_open; 524 delim->can_close = can_close; 525 delim->inl_text = inl_text; 526 delim->length = inl_text->len; 527 delim->previous = subj->last_delim; 528 delim->next = NULL; 529 if (delim->previous != NULL) { 530 delim->previous->next = delim; 531 } 532 subj->last_delim = delim; 533 } 534 535 static void push_bracket(subject *subj, bool image, cmark_node *inl_text) { 536 bracket *b = (bracket *)subj->mem->calloc(1, sizeof(bracket)); 537 if (subj->last_bracket != NULL) { 538 subj->last_bracket->bracket_after = true; 539 } 540 b->image = image; 541 b->active = true; 542 b->inl_text = inl_text; 543 b->previous = subj->last_bracket; 544 b->previous_delimiter = subj->last_delim; 545 b->position = subj->pos; 546 b->bracket_after = false; 547 subj->last_bracket = b; 548 } 549 550 // Assumes the subject has a c at the current position. 551 static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) { 552 bufsize_t numdelims; 553 cmark_node *inl_text; 554 bool can_open, can_close; 555 cmark_chunk contents; 556 557 numdelims = scan_delims(subj, c, &can_open, &can_close); 558 559 if (c == '\'' && smart) { 560 contents = cmark_chunk_literal(RIGHTSINGLEQUOTE); 561 } else if (c == '"' && smart) { 562 contents = 563 cmark_chunk_literal(can_close ? RIGHTDOUBLEQUOTE : LEFTDOUBLEQUOTE); 564 } else { 565 contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims); 566 } 567 568 inl_text = make_str(subj, subj->pos - numdelims, subj->pos - 1, contents); 569 570 if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) { 571 push_delimiter(subj, c, can_open, can_close, inl_text); 572 } 573 574 return inl_text; 575 } 576 577 // Assumes we have a hyphen at the current position. 578 static cmark_node *handle_hyphen(subject *subj, bool smart) { 579 int startpos = subj->pos; 580 581 advance(subj); 582 583 if (!smart || peek_char(subj) != '-') { 584 return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("-")); 585 } 586 587 while (smart && peek_char(subj) == '-') { 588 advance(subj); 589 } 590 591 int numhyphens = subj->pos - startpos; 592 int en_count = 0; 593 int em_count = 0; 594 int i; 595 cmark_strbuf buf = CMARK_BUF_INIT(subj->mem); 596 597 if (numhyphens % 3 == 0) { // if divisible by 3, use all em dashes 598 em_count = numhyphens / 3; 599 } else if (numhyphens % 2 == 0) { // if divisible by 2, use all en dashes 600 en_count = numhyphens / 2; 601 } else if (numhyphens % 3 == 2) { // use one en dash at end 602 en_count = 1; 603 em_count = (numhyphens - 2) / 3; 604 } else { // use two en dashes at the end 605 en_count = 2; 606 em_count = (numhyphens - 4) / 3; 607 } 608 609 for (i = em_count; i > 0; i--) { 610 cmark_strbuf_puts(&buf, EMDASH); 611 } 612 613 for (i = en_count; i > 0; i--) { 614 cmark_strbuf_puts(&buf, ENDASH); 615 } 616 617 return make_str_from_buf(subj, startpos, subj->pos - 1, &buf); 618 } 619 620 // Assumes we have a period at the current position. 621 static cmark_node *handle_period(subject *subj, bool smart) { 622 advance(subj); 623 if (smart && peek_char(subj) == '.') { 624 advance(subj); 625 if (peek_char(subj) == '.') { 626 advance(subj); 627 return make_str(subj, subj->pos - 3, subj->pos - 1, cmark_chunk_literal(ELLIPSES)); 628 } else { 629 return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("..")); 630 } 631 } else { 632 return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal(".")); 633 } 634 } 635 636 static void process_emphasis(subject *subj, delimiter *stack_bottom) { 637 delimiter *closer = subj->last_delim; 638 delimiter *opener; 639 delimiter *old_closer; 640 bool opener_found; 641 int openers_bottom_index = 0; 642 delimiter *openers_bottom[6] = {stack_bottom, stack_bottom, stack_bottom, 643 stack_bottom, stack_bottom, stack_bottom}; 644 645 // move back to first relevant delim. 646 while (closer != NULL && closer->previous != stack_bottom) { 647 closer = closer->previous; 648 } 649 650 // now move forward, looking for closers, and handling each 651 while (closer != NULL) { 652 if (closer->can_close) { 653 switch (closer->delim_char) { 654 case '"': 655 openers_bottom_index = 0; 656 break; 657 case '\'': 658 openers_bottom_index = 1; 659 break; 660 case '_': 661 openers_bottom_index = 2; 662 break; 663 case '*': 664 openers_bottom_index = 3 + (closer->length % 3); 665 break; 666 default: 667 assert(false); 668 } 669 670 // Now look backwards for first matching opener: 671 opener = closer->previous; 672 opener_found = false; 673 while (opener != NULL && opener != openers_bottom[openers_bottom_index]) { 674 if (opener->can_open && opener->delim_char == closer->delim_char) { 675 // interior closer of size 2 can't match opener of size 1 676 // or of size 1 can't match 2 677 if (!(closer->can_open || opener->can_close) || 678 closer->length % 3 == 0 || 679 (opener->length + closer->length) % 3 != 0) { 680 opener_found = true; 681 break; 682 } 683 } 684 opener = opener->previous; 685 } 686 old_closer = closer; 687 if (closer->delim_char == '*' || closer->delim_char == '_') { 688 if (opener_found) { 689 closer = S_insert_emph(subj, opener, closer); 690 } else { 691 closer = closer->next; 692 } 693 } else if (closer->delim_char == '\'') { 694 cmark_node_set_literal(closer->inl_text, RIGHTSINGLEQUOTE); 695 if (opener_found) { 696 cmark_node_set_literal(opener->inl_text, LEFTSINGLEQUOTE); 697 } 698 closer = closer->next; 699 } else if (closer->delim_char == '"') { 700 cmark_node_set_literal(closer->inl_text, RIGHTDOUBLEQUOTE); 701 if (opener_found) { 702 cmark_node_set_literal(opener->inl_text, LEFTDOUBLEQUOTE); 703 } 704 closer = closer->next; 705 } 706 if (!opener_found) { 707 // set lower bound for future searches for openers 708 openers_bottom[openers_bottom_index] = old_closer->previous; 709 if (!old_closer->can_open) { 710 // we can remove a closer that can't be an 711 // opener, once we've seen there's no 712 // matching opener: 713 remove_delimiter(subj, old_closer); 714 } 715 } 716 } else { 717 closer = closer->next; 718 } 719 } 720 // free all delimiters in list until stack_bottom: 721 while (subj->last_delim != NULL && subj->last_delim != stack_bottom) { 722 remove_delimiter(subj, subj->last_delim); 723 } 724 } 725 726 static delimiter *S_insert_emph(subject *subj, delimiter *opener, 727 delimiter *closer) { 728 delimiter *delim, *tmp_delim; 729 bufsize_t use_delims; 730 cmark_node *opener_inl = opener->inl_text; 731 cmark_node *closer_inl = closer->inl_text; 732 bufsize_t opener_num_chars = opener_inl->len; 733 bufsize_t closer_num_chars = closer_inl->len; 734 cmark_node *tmp, *tmpnext, *emph; 735 736 // calculate the actual number of characters used from this closer 737 use_delims = (closer_num_chars >= 2 && opener_num_chars >= 2) ? 2 : 1; 738 739 // remove used characters from associated inlines. 740 opener_num_chars -= use_delims; 741 closer_num_chars -= use_delims; 742 opener_inl->len = opener_num_chars; 743 opener_inl->data[opener_num_chars] = 0; 744 closer_inl->len = closer_num_chars; 745 closer_inl->data[closer_num_chars] = 0; 746 747 // free delimiters between opener and closer 748 delim = closer->previous; 749 while (delim != NULL && delim != opener) { 750 tmp_delim = delim->previous; 751 remove_delimiter(subj, delim); 752 delim = tmp_delim; 753 } 754 755 // create new emph or strong, and splice it in to our inlines 756 // between the opener and closer 757 emph = use_delims == 1 ? make_emph(subj->mem) : make_strong(subj->mem); 758 759 tmp = opener_inl->next; 760 while (tmp && tmp != closer_inl) { 761 tmpnext = tmp->next; 762 cmark_node_unlink(tmp); 763 append_child(emph, tmp); 764 tmp = tmpnext; 765 } 766 cmark_node_insert_after(opener_inl, emph); 767 768 emph->start_line = opener_inl->start_line; 769 emph->end_line = closer_inl->end_line; 770 emph->start_column = opener_inl->start_column; 771 emph->end_column = closer_inl->end_column; 772 773 // if opener has 0 characters, remove it and its associated inline 774 if (opener_num_chars == 0) { 775 cmark_node_free(opener_inl); 776 remove_delimiter(subj, opener); 777 } 778 779 // if closer has 0 characters, remove it and its associated inline 780 if (closer_num_chars == 0) { 781 // remove empty closer inline 782 cmark_node_free(closer_inl); 783 // remove closer from list 784 tmp_delim = closer->next; 785 remove_delimiter(subj, closer); 786 closer = tmp_delim; 787 } 788 789 return closer; 790 } 791 792 // Parse backslash-escape or just a backslash, returning an inline. 793 static cmark_node *handle_backslash(subject *subj) { 794 advance(subj); 795 unsigned char nextchar = peek_char(subj); 796 if (cmark_ispunct( 797 nextchar)) { // only ascii symbols and newline can be escaped 798 advance(subj); 799 return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_dup(&subj->input, subj->pos - 1, 1)); 800 } else if (!is_eof(subj) && skip_line_end(subj)) { 801 return make_linebreak(subj->mem); 802 } else { 803 return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("\\")); 804 } 805 } 806 807 // Parse an entity or a regular "&" string. 808 // Assumes the subject has an '&' character at the current position. 809 static cmark_node *handle_entity(subject *subj) { 810 cmark_strbuf ent = CMARK_BUF_INIT(subj->mem); 811 bufsize_t len; 812 813 advance(subj); 814 815 len = houdini_unescape_ent(&ent, subj->input.data + subj->pos, 816 subj->input.len - subj->pos); 817 818 if (len <= 0) 819 return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("&")); 820 821 subj->pos += len; 822 return make_str_from_buf(subj, subj->pos - 1 - len, subj->pos - 1, &ent); 823 } 824 825 // Clean a URL: remove surrounding whitespace, and remove \ that escape 826 // punctuation. 827 unsigned char *cmark_clean_url(cmark_mem *mem, cmark_chunk *url) { 828 cmark_strbuf buf = CMARK_BUF_INIT(mem); 829 830 cmark_chunk_trim(url); 831 832 houdini_unescape_html_f(&buf, url->data, url->len); 833 834 cmark_strbuf_unescape(&buf); 835 return cmark_strbuf_detach(&buf); 836 } 837 838 unsigned char *cmark_clean_title(cmark_mem *mem, cmark_chunk *title) { 839 cmark_strbuf buf = CMARK_BUF_INIT(mem); 840 unsigned char first, last; 841 842 if (title->len == 0) { 843 return NULL; 844 } 845 846 first = title->data[0]; 847 last = title->data[title->len - 1]; 848 849 // remove surrounding quotes if any: 850 if ((first == '\'' && last == '\'') || (first == '(' && last == ')') || 851 (first == '"' && last == '"')) { 852 houdini_unescape_html_f(&buf, title->data + 1, title->len - 2); 853 } else { 854 houdini_unescape_html_f(&buf, title->data, title->len); 855 } 856 857 cmark_strbuf_unescape(&buf); 858 return cmark_strbuf_detach(&buf); 859 } 860 861 // Parse an autolink or HTML tag. 862 // Assumes the subject has a '<' character at the current position. 863 static cmark_node *handle_pointy_brace(subject *subj, int options) { 864 bufsize_t matchlen = 0; 865 cmark_chunk contents; 866 867 advance(subj); // advance past first < 868 869 // first try to match a URL autolink 870 matchlen = scan_autolink_uri(&subj->input, subj->pos); 871 if (matchlen > 0) { 872 contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1); 873 subj->pos += matchlen; 874 875 return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 0); 876 } 877 878 // next try to match an email autolink 879 matchlen = scan_autolink_email(&subj->input, subj->pos); 880 if (matchlen > 0) { 881 contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1); 882 subj->pos += matchlen; 883 884 return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 1); 885 } 886 887 // finally, try to match an html tag 888 matchlen = scan_html_tag(&subj->input, subj->pos); 889 if (matchlen > 0) { 890 const unsigned char *src = subj->input.data + subj->pos - 1; 891 bufsize_t len = matchlen + 1; 892 subj->pos += matchlen; 893 cmark_node *node = make_literal(subj, CMARK_NODE_HTML_INLINE, 894 subj->pos - matchlen - 1, subj->pos - 1); 895 node->data = (unsigned char *)subj->mem->realloc(NULL, len + 1); 896 memcpy(node->data, src, len); 897 node->data[len] = 0; 898 node->len = len; 899 adjust_subj_node_newlines(subj, node, matchlen, 1, options); 900 return node; 901 } 902 903 // if nothing matches, just return the opening <: 904 return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("<")); 905 } 906 907 // Parse a link label. Returns 1 if successful. 908 // Note: unescaped brackets are not allowed in labels. 909 // The label begins with `[` and ends with the first `]` character 910 // encountered. Backticks in labels do not start code spans. 911 static int link_label(subject *subj, cmark_chunk *raw_label) { 912 bufsize_t startpos = subj->pos; 913 int length = 0; 914 unsigned char c; 915 916 // advance past [ 917 if (peek_char(subj) == '[') { 918 advance(subj); 919 } else { 920 return 0; 921 } 922 923 while ((c = peek_char(subj)) && c != '[' && c != ']') { 924 if (c == '\\') { 925 advance(subj); 926 length++; 927 if (cmark_ispunct(peek_char(subj))) { 928 advance(subj); 929 length++; 930 } 931 } else { 932 advance(subj); 933 length++; 934 } 935 if (length > MAX_LINK_LABEL_LENGTH) { 936 goto noMatch; 937 } 938 } 939 940 if (c == ']') { // match found 941 *raw_label = 942 cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1)); 943 cmark_chunk_trim(raw_label); 944 advance(subj); // advance past ] 945 return 1; 946 } 947 948 noMatch: 949 subj->pos = startpos; // rewind 950 return 0; 951 } 952 953 static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset, 954 cmark_chunk *output) { 955 bufsize_t i = offset; 956 size_t nb_p = 0; 957 958 while (i < input->len) { 959 if (input->data[i] == '\\' && 960 i + 1 < input-> len && 961 cmark_ispunct(input->data[i+1])) 962 i += 2; 963 else if (input->data[i] == '(') { 964 ++nb_p; 965 ++i; 966 if (nb_p > 32) 967 return -1; 968 } else if (input->data[i] == ')') { 969 if (nb_p == 0) 970 break; 971 --nb_p; 972 ++i; 973 } else if (cmark_isspace(input->data[i])) { 974 if (i == offset) { 975 return -1; 976 } 977 break; 978 } else { 979 ++i; 980 } 981 } 982 983 if (i >= input->len || nb_p != 0) 984 return -1; 985 986 { 987 cmark_chunk result = {input->data + offset, i - offset}; 988 *output = result; 989 } 990 return i - offset; 991 } 992 993 static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset, 994 cmark_chunk *output) { 995 bufsize_t i = offset; 996 997 if (i < input->len && input->data[i] == '<') { 998 ++i; 999 while (i < input->len) { 1000 if (input->data[i] == '>') { 1001 ++i; 1002 break; 1003 } else if (input->data[i] == '\\') 1004 i += 2; 1005 else if (input->data[i] == '\n' || input->data[i] == '<') 1006 return -1; 1007 else 1008 ++i; 1009 } 1010 } else { 1011 return manual_scan_link_url_2(input, offset, output); 1012 } 1013 1014 if (i >= input->len) 1015 return -1; 1016 1017 { 1018 cmark_chunk result = {input->data + offset + 1, i - 2 - offset}; 1019 *output = result; 1020 } 1021 return i - offset; 1022 } 1023 1024 // Return a link, an image, or a literal close bracket. 1025 static cmark_node *handle_close_bracket(subject *subj) { 1026 bufsize_t initial_pos, after_link_text_pos; 1027 bufsize_t endurl, starttitle, endtitle, endall; 1028 bufsize_t sps, n; 1029 cmark_reference *ref = NULL; 1030 cmark_chunk url_chunk, title_chunk; 1031 unsigned char *url, *title; 1032 bracket *opener; 1033 cmark_node *inl; 1034 cmark_chunk raw_label; 1035 int found_label; 1036 cmark_node *tmp, *tmpnext; 1037 bool is_image; 1038 1039 advance(subj); // advance past ] 1040 initial_pos = subj->pos; 1041 1042 // get last [ or ![ 1043 opener = subj->last_bracket; 1044 1045 if (opener == NULL) { 1046 return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]")); 1047 } 1048 1049 if (!opener->active) { 1050 // take delimiter off stack 1051 pop_bracket(subj); 1052 return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]")); 1053 } 1054 1055 // If we got here, we matched a potential link/image text. 1056 // Now we check to see if it's a link/image. 1057 is_image = opener->image; 1058 1059 after_link_text_pos = subj->pos; 1060 1061 // First, look for an inline link. 1062 if (peek_char(subj) == '(' && 1063 ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) && 1064 ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps, 1065 &url_chunk)) > -1)) { 1066 1067 // try to parse an explicit link: 1068 endurl = subj->pos + 1 + sps + n; 1069 starttitle = endurl + scan_spacechars(&subj->input, endurl); 1070 1071 // ensure there are spaces btw url and title 1072 endtitle = (starttitle == endurl) 1073 ? starttitle 1074 : starttitle + scan_link_title(&subj->input, starttitle); 1075 1076 endall = endtitle + scan_spacechars(&subj->input, endtitle); 1077 1078 if (peek_at(subj, endall) == ')') { 1079 subj->pos = endall + 1; 1080 1081 title_chunk = 1082 cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle); 1083 url = cmark_clean_url(subj->mem, &url_chunk); 1084 title = cmark_clean_title(subj->mem, &title_chunk); 1085 cmark_chunk_free(&url_chunk); 1086 cmark_chunk_free(&title_chunk); 1087 goto match; 1088 1089 } else { 1090 // it could still be a shortcut reference link 1091 subj->pos = after_link_text_pos; 1092 } 1093 } 1094 1095 // Next, look for a following [link label] that matches in refmap. 1096 // skip spaces 1097 raw_label = cmark_chunk_literal(""); 1098 found_label = link_label(subj, &raw_label); 1099 if (!found_label) { 1100 // If we have a shortcut reference link, back up 1101 // to before the spacse we skipped. 1102 subj->pos = initial_pos; 1103 } 1104 1105 if ((!found_label || raw_label.len == 0) && !opener->bracket_after) { 1106 cmark_chunk_free(&raw_label); 1107 raw_label = cmark_chunk_dup(&subj->input, opener->position, 1108 initial_pos - opener->position - 1); 1109 found_label = true; 1110 } 1111 1112 if (found_label) { 1113 ref = cmark_reference_lookup(subj->refmap, &raw_label); 1114 cmark_chunk_free(&raw_label); 1115 } 1116 1117 if (ref != NULL) { // found 1118 url = cmark_strdup(subj->mem, ref->url); 1119 title = cmark_strdup(subj->mem, ref->title); 1120 goto match; 1121 } else { 1122 goto noMatch; 1123 } 1124 1125 noMatch: 1126 // If we fall through to here, it means we didn't match a link: 1127 pop_bracket(subj); // remove this opener from delimiter list 1128 subj->pos = initial_pos; 1129 return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]")); 1130 1131 match: 1132 inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK); 1133 inl->as.link.url = url; 1134 inl->as.link.title = title; 1135 inl->start_line = inl->end_line = subj->line; 1136 inl->start_column = opener->inl_text->start_column; 1137 inl->end_column = subj->pos + subj->column_offset + subj->block_offset; 1138 cmark_node_insert_before(opener->inl_text, inl); 1139 // Add link text: 1140 tmp = opener->inl_text->next; 1141 while (tmp) { 1142 tmpnext = tmp->next; 1143 cmark_node_unlink(tmp); 1144 append_child(inl, tmp); 1145 tmp = tmpnext; 1146 } 1147 1148 // Free the bracket [: 1149 cmark_node_free(opener->inl_text); 1150 1151 process_emphasis(subj, opener->previous_delimiter); 1152 pop_bracket(subj); 1153 1154 // Now, if we have a link, we also want to deactivate earlier link 1155 // delimiters. (This code can be removed if we decide to allow links 1156 // inside links.) 1157 if (!is_image) { 1158 opener = subj->last_bracket; 1159 while (opener != NULL) { 1160 if (!opener->image) { 1161 if (!opener->active) { 1162 break; 1163 } else { 1164 opener->active = false; 1165 } 1166 } 1167 opener = opener->previous; 1168 } 1169 } 1170 1171 return NULL; 1172 } 1173 1174 // Parse a hard or soft linebreak, returning an inline. 1175 // Assumes the subject has a cr or newline at the current position. 1176 static cmark_node *handle_newline(subject *subj) { 1177 bufsize_t nlpos = subj->pos; 1178 // skip over cr, crlf, or lf: 1179 if (peek_at(subj, subj->pos) == '\r') { 1180 advance(subj); 1181 } 1182 if (peek_at(subj, subj->pos) == '\n') { 1183 advance(subj); 1184 } 1185 ++subj->line; 1186 subj->column_offset = -subj->pos; 1187 // skip spaces at beginning of line 1188 skip_spaces(subj); 1189 if (nlpos > 1 && peek_at(subj, nlpos - 1) == ' ' && 1190 peek_at(subj, nlpos - 2) == ' ') { 1191 return make_linebreak(subj->mem); 1192 } else { 1193 return make_softbreak(subj->mem); 1194 } 1195 } 1196 1197 static bufsize_t subject_find_special_char(subject *subj, int options) { 1198 // "\r\n\\`&_*[]<!" 1199 static const int8_t SPECIAL_CHARS[256] = { 1200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1201 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1202 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1203 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1204 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1205 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1206 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1207 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1208 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1209 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 1211 1212 // " ' . - 1213 static const char SMART_PUNCT_CHARS[] = { 1214 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1215 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1216 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1217 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1218 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1220 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1221 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1222 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1223 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1224 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1225 }; 1226 1227 bufsize_t n = subj->pos + 1; 1228 1229 while (n < subj->input.len) { 1230 if (SPECIAL_CHARS[subj->input.data[n]]) 1231 return n; 1232 if (options & CMARK_OPT_SMART && SMART_PUNCT_CHARS[subj->input.data[n]]) 1233 return n; 1234 n++; 1235 } 1236 1237 return subj->input.len; 1238 } 1239 1240 // Parse an inline, advancing subject, and add it as a child of parent. 1241 // Return 0 if no inline can be parsed, 1 otherwise. 1242 static int parse_inline(subject *subj, cmark_node *parent, int options) { 1243 cmark_node *new_inl = NULL; 1244 cmark_chunk contents; 1245 unsigned char c; 1246 bufsize_t startpos, endpos; 1247 c = peek_char(subj); 1248 if (c == 0) { 1249 return 0; 1250 } 1251 switch (c) { 1252 case '\r': 1253 case '\n': 1254 new_inl = handle_newline(subj); 1255 break; 1256 case '`': 1257 new_inl = handle_backticks(subj, options); 1258 break; 1259 case '\\': 1260 new_inl = handle_backslash(subj); 1261 break; 1262 case '&': 1263 new_inl = handle_entity(subj); 1264 break; 1265 case '<': 1266 new_inl = handle_pointy_brace(subj, options); 1267 break; 1268 case '*': 1269 case '_': 1270 case '\'': 1271 case '"': 1272 new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0); 1273 break; 1274 case '-': 1275 new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0); 1276 break; 1277 case '.': 1278 new_inl = handle_period(subj, (options & CMARK_OPT_SMART) != 0); 1279 break; 1280 case '[': 1281 advance(subj); 1282 new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("[")); 1283 push_bracket(subj, false, new_inl); 1284 break; 1285 case ']': 1286 new_inl = handle_close_bracket(subj); 1287 break; 1288 case '!': 1289 advance(subj); 1290 if (peek_char(subj) == '[') { 1291 advance(subj); 1292 new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("![")); 1293 push_bracket(subj, true, new_inl); 1294 } else { 1295 new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("!")); 1296 } 1297 break; 1298 default: 1299 endpos = subject_find_special_char(subj, options); 1300 contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos); 1301 startpos = subj->pos; 1302 subj->pos = endpos; 1303 1304 // if we're at a newline, strip trailing spaces. 1305 if (S_is_line_end_char(peek_char(subj))) { 1306 cmark_chunk_rtrim(&contents); 1307 } 1308 1309 new_inl = make_str(subj, startpos, endpos - 1, contents); 1310 } 1311 if (new_inl != NULL) { 1312 append_child(parent, new_inl); 1313 } 1314 1315 return 1; 1316 } 1317 1318 // Parse inlines from parent's string_content, adding as children of parent. 1319 void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, 1320 cmark_reference_map *refmap, int options) { 1321 subject subj; 1322 cmark_chunk content = {parent->data, parent->len}; 1323 subject_from_buf(mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &content, refmap); 1324 cmark_chunk_rtrim(&subj.input); 1325 1326 while (!is_eof(&subj) && parse_inline(&subj, parent, options)) 1327 ; 1328 1329 process_emphasis(&subj, NULL); 1330 // free bracket and delim stack 1331 while (subj.last_delim) { 1332 remove_delimiter(&subj, subj.last_delim); 1333 } 1334 while (subj.last_bracket) { 1335 pop_bracket(&subj); 1336 } 1337 } 1338 1339 // Parse zero or more space characters, including at most one newline. 1340 static void spnl(subject *subj) { 1341 skip_spaces(subj); 1342 if (skip_line_end(subj)) { 1343 skip_spaces(subj); 1344 } 1345 } 1346 1347 // Parse reference. Assumes string begins with '[' character. 1348 // Modify refmap if a reference is encountered. 1349 // Return 0 if no reference found, otherwise position of subject 1350 // after reference is parsed. 1351 bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input, 1352 cmark_reference_map *refmap) { 1353 subject subj; 1354 1355 cmark_chunk lab; 1356 cmark_chunk url; 1357 cmark_chunk title; 1358 1359 bufsize_t matchlen = 0; 1360 bufsize_t beforetitle; 1361 1362 subject_from_buf(mem, -1, 0, &subj, input, NULL); 1363 1364 // parse label: 1365 if (!link_label(&subj, &lab) || lab.len == 0) 1366 return 0; 1367 1368 // colon: 1369 if (peek_char(&subj) == ':') { 1370 advance(&subj); 1371 } else { 1372 return 0; 1373 } 1374 1375 // parse link url: 1376 spnl(&subj); 1377 if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1) { 1378 subj.pos += matchlen; 1379 } else { 1380 return 0; 1381 } 1382 1383 // parse optional link_title 1384 beforetitle = subj.pos; 1385 spnl(&subj); 1386 matchlen = subj.pos == beforetitle ? 0 : scan_link_title(&subj.input, subj.pos); 1387 if (matchlen) { 1388 title = cmark_chunk_dup(&subj.input, subj.pos, matchlen); 1389 subj.pos += matchlen; 1390 } else { 1391 subj.pos = beforetitle; 1392 title = cmark_chunk_literal(""); 1393 } 1394 1395 // parse final spaces and newline: 1396 skip_spaces(&subj); 1397 if (!skip_line_end(&subj)) { 1398 if (matchlen) { // try rewinding before title 1399 subj.pos = beforetitle; 1400 skip_spaces(&subj); 1401 if (!skip_line_end(&subj)) { 1402 return 0; 1403 } 1404 } else { 1405 return 0; 1406 } 1407 } 1408 // insert reference into refmap 1409 cmark_reference_create(refmap, &lab, &url, &title); 1410 return subj.pos; 1411 }