cmark

My personal build of CMark ✏️

inlines.c (42610B)

   1 #include <stdlib.h>
   2 #include <string.h>
   3 #include <stdio.h>
   4 
   5 #include "cmark_ctype.h"
   6 #include "config.h"
   7 #include "node.h"
   8 #include "parser.h"
   9 #include "references.h"
  10 #include "cmark.h"
  11 #include "houdini.h"
  12 #include "utf8.h"
  13 #include "scanners.h"
  14 #include "inlines.h"
  15 
  16 static const char *EMDASH = "\xE2\x80\x94";
  17 static const char *ENDASH = "\xE2\x80\x93";
  18 static const char *ELLIPSES = "\xE2\x80\xA6";
  19 static const char *LEFTDOUBLEQUOTE = "\xE2\x80\x9C";
  20 static const char *RIGHTDOUBLEQUOTE = "\xE2\x80\x9D";
  21 static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98";
  22 static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99";
  23 
  24 // Macros for creating various kinds of simple.
  25 #define make_linebreak(mem) make_simple(mem, CMARK_NODE_LINEBREAK)
  26 #define make_softbreak(mem) make_simple(mem, CMARK_NODE_SOFTBREAK)
  27 #define make_emph(mem) make_simple(mem, CMARK_NODE_EMPH)
  28 #define make_strong(mem) make_simple(mem, CMARK_NODE_STRONG)
  29 
  30 #define MAXBACKTICKS 1000
  31 
  32 typedef struct delimiter {
  33   struct delimiter *previous;
  34   struct delimiter *next;
  35   cmark_node *inl_text;
  36   bufsize_t length;
  37   unsigned char delim_char;
  38   bool can_open;
  39   bool can_close;
  40 } delimiter;
  41 
  42 typedef struct bracket {
  43   struct bracket *previous;
  44   struct delimiter *previous_delimiter;
  45   cmark_node *inl_text;
  46   bufsize_t position;
  47   bool image;
  48   bool active;
  49   bool bracket_after;
  50 } bracket;
  51 
  52 typedef struct {
  53   cmark_mem *mem;
  54   cmark_chunk input;
  55   int line;
  56   bufsize_t pos;
  57   int block_offset;
  58   int column_offset;
  59   cmark_reference_map *refmap;
  60   delimiter *last_delim;
  61   bracket *last_bracket;
  62   bufsize_t backticks[MAXBACKTICKS + 1];
  63   bool scanned_for_backticks;
  64 } subject;
  65 
  66 static CMARK_INLINE bool S_is_line_end_char(char c) {
  67   return (c == '\n' || c == '\r');
  68 }
  69 
  70 static delimiter *S_insert_emph(subject *subj, delimiter *opener,
  71                                 delimiter *closer);
  72 
  73 static int parse_inline(subject *subj, cmark_node *parent, int options);
  74 
  75 static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e,
  76                              cmark_chunk *chunk, cmark_reference_map *refmap);
  77 static bufsize_t subject_find_special_char(subject *subj, int options);
  78 
  79 // Create an inline with a literal string value.
  80 static CMARK_INLINE cmark_node *make_literal(subject *subj, cmark_node_type t,
  81                                              int start_column, int end_column) {
  82   cmark_node *e = (cmark_node *)subj->mem->calloc(1, sizeof(*e));
  83   e->mem = subj->mem;
  84   e->type = (uint16_t)t;
  85   e->start_line = e->end_line = subj->line;
  86   // columns are 1 based.
  87   e->start_column = start_column + 1 + subj->column_offset + subj->block_offset;
  88   e->end_column = end_column + 1 + subj->column_offset + subj->block_offset;
  89   return e;
  90 }
  91 
  92 // Create an inline with no value.
  93 static CMARK_INLINE cmark_node *make_simple(cmark_mem *mem, cmark_node_type t) {
  94   cmark_node *e = (cmark_node *)mem->calloc(1, sizeof(*e));
  95   e->mem = mem;
  96   e->type = t;
  97   return e;
  98 }
  99 
 100 static cmark_node *make_str(subject *subj, int sc, int ec, cmark_chunk s) {
 101   cmark_node *e = make_literal(subj, CMARK_NODE_TEXT, sc, ec);
 102   e->data = (unsigned char *)subj->mem->realloc(NULL, s.len + 1);
 103   if (s.data != NULL) {
 104     memcpy(e->data, s.data, s.len);
 105   }
 106   e->data[s.len] = 0;
 107   e->len = s.len;
 108   return e;
 109 }
 110 
 111 static cmark_node *make_str_from_buf(subject *subj, int sc, int ec,
 112                                      cmark_strbuf *buf) {
 113   cmark_node *e = make_literal(subj, CMARK_NODE_TEXT, sc, ec);
 114   e->len = buf->size;
 115   e->data = cmark_strbuf_detach(buf);
 116   return e;
 117 }
 118 
 119 // Like make_str, but parses entities.
 120 static cmark_node *make_str_with_entities(subject *subj,
 121                                           int start_column, int end_column,
 122                                           cmark_chunk *content) {
 123   cmark_strbuf unescaped = CMARK_BUF_INIT(subj->mem);
 124 
 125   if (houdini_unescape_html(&unescaped, content->data, content->len)) {
 126     return make_str_from_buf(subj, start_column, end_column, &unescaped);
 127   } else {
 128     return make_str(subj, start_column, end_column, *content);
 129   }
 130 }
 131 
 132 // Like cmark_node_append_child but without costly sanity checks.
 133 // Assumes that child was newly created.
 134 static void append_child(cmark_node *node, cmark_node *child) {
 135   cmark_node *old_last_child = node->last_child;
 136 
 137   child->next = NULL;
 138   child->prev = old_last_child;
 139   child->parent = node;
 140   node->last_child = child;
 141 
 142   if (old_last_child) {
 143     old_last_child->next = child;
 144   } else {
 145     // Also set first_child if node previously had no children.
 146     node->first_child = child;
 147   }
 148 }
 149 
 150 // Duplicate a chunk by creating a copy of the buffer not by reusing the
 151 // buffer like cmark_chunk_dup does.
 152 static unsigned char *cmark_strdup(cmark_mem *mem, unsigned char *src) {
 153   if (src == NULL) {
 154     return NULL;
 155   }
 156   size_t len = strlen((char *)src);
 157   unsigned char *data = (unsigned char *)mem->realloc(NULL, len + 1);
 158   memcpy(data, src, len + 1);
 159   return data;
 160 }
 161 
 162 static unsigned char *cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url,
 163                                            int is_email) {
 164   cmark_strbuf buf = CMARK_BUF_INIT(mem);
 165 
 166   cmark_chunk_trim(url);
 167 
 168   if (is_email)
 169     cmark_strbuf_puts(&buf, "mailto:");
 170 
 171   houdini_unescape_html_f(&buf, url->data, url->len);
 172   return cmark_strbuf_detach(&buf);
 173 }
 174 
 175 static CMARK_INLINE cmark_node *make_autolink(subject *subj,
 176                                               int start_column, int end_column,
 177                                               cmark_chunk url, int is_email) {
 178   cmark_node *link = make_simple(subj->mem, CMARK_NODE_LINK);
 179   link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email);
 180   link->as.link.title = NULL;
 181   link->start_line = link->end_line = subj->line;
 182   link->start_column = start_column + 1;
 183   link->end_column = end_column + 1;
 184   append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url));
 185   return link;
 186 }
 187 
 188 static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e,
 189                              cmark_chunk *chunk, cmark_reference_map *refmap) {
 190   int i;
 191   e->mem = mem;
 192   e->input = *chunk;
 193   e->line = line_number;
 194   e->pos = 0;
 195   e->block_offset = block_offset;
 196   e->column_offset = 0;
 197   e->refmap = refmap;
 198   e->last_delim = NULL;
 199   e->last_bracket = NULL;
 200   for (i = 0; i <= MAXBACKTICKS; i++) {
 201     e->backticks[i] = 0;
 202   }
 203   e->scanned_for_backticks = false;
 204 }
 205 
 206 static CMARK_INLINE int isbacktick(int c) { return (c == '`'); }
 207 
 208 static CMARK_INLINE unsigned char peek_char(subject *subj) {
 209   // NULL bytes should have been stripped out by now.  If they're
 210   // present, it's a programming error:
 211   assert(!(subj->pos < subj->input.len && subj->input.data[subj->pos] == 0));
 212   return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
 213 }
 214 
 215 static CMARK_INLINE unsigned char peek_at(subject *subj, bufsize_t pos) {
 216   return subj->input.data[pos];
 217 }
 218 
 219 // Return true if there are more characters in the subject.
 220 static CMARK_INLINE int is_eof(subject *subj) {
 221   return (subj->pos >= subj->input.len);
 222 }
 223 
 224 // Advance the subject.  Doesn't check for eof.
 225 #define advance(subj) (subj)->pos += 1
 226 
 227 static CMARK_INLINE bool skip_spaces(subject *subj) {
 228   bool skipped = false;
 229   while (peek_char(subj) == ' ' || peek_char(subj) == '\t') {
 230     advance(subj);
 231     skipped = true;
 232   }
 233   return skipped;
 234 }
 235 
 236 static CMARK_INLINE bool skip_line_end(subject *subj) {
 237   bool seen_line_end_char = false;
 238   if (peek_char(subj) == '\r') {
 239     advance(subj);
 240     seen_line_end_char = true;
 241   }
 242   if (peek_char(subj) == '\n') {
 243     advance(subj);
 244     seen_line_end_char = true;
 245   }
 246   return seen_line_end_char || is_eof(subj);
 247 }
 248 
 249 // Take characters while a predicate holds, and return a string.
 250 static CMARK_INLINE cmark_chunk take_while(subject *subj, int (*f)(int)) {
 251   unsigned char c;
 252   bufsize_t startpos = subj->pos;
 253   bufsize_t len = 0;
 254 
 255   while ((c = peek_char(subj)) && (*f)(c)) {
 256     advance(subj);
 257     len++;
 258   }
 259 
 260   return cmark_chunk_dup(&subj->input, startpos, len);
 261 }
 262 
 263 // Return the number of newlines in a given span of text in a subject.  If
 264 // the number is greater than zero, also return the number of characters
 265 // between the last newline and the end of the span in `since_newline`.
 266 static int count_newlines(subject *subj, bufsize_t from, bufsize_t len, int *since_newline) {
 267   int nls = 0;
 268   int since_nl = 0;
 269 
 270   while (len--) {
 271     if (subj->input.data[from++] == '\n') {
 272       ++nls;
 273       since_nl = 0;
 274     } else {
 275       ++since_nl;
 276     }
 277   }
 278 
 279   if (!nls)
 280     return 0;
 281 
 282   *since_newline = since_nl;
 283   return nls;
 284 }
 285 
 286 // Adjust `node`'s `end_line`, `end_column`, and `subj`'s `line` and
 287 // `column_offset` according to the number of newlines in a just-matched span
 288 // of text in `subj`.
 289 static void adjust_subj_node_newlines(subject *subj, cmark_node *node, int matchlen, int extra, int options) {
 290   if (!(options & CMARK_OPT_SOURCEPOS)) {
 291     return;
 292   }
 293 
 294   int since_newline;
 295   int newlines = count_newlines(subj, subj->pos - matchlen - extra, matchlen, &since_newline);
 296   if (newlines) {
 297     subj->line += newlines;
 298     node->end_line += newlines;
 299     node->end_column = since_newline;
 300     subj->column_offset = -subj->pos + since_newline + extra;
 301   }
 302 }
 303 
 304 // Try to process a backtick code span that began with a
 305 // span of ticks of length openticklength length (already
 306 // parsed).  Return 0 if you don't find matching closing
 307 // backticks, otherwise return the position in the subject
 308 // after the closing backticks.
 309 static bufsize_t scan_to_closing_backticks(subject *subj,
 310                                            bufsize_t openticklength) {
 311 
 312   bool found = false;
 313   if (openticklength > MAXBACKTICKS) {
 314     // we limit backtick string length because of the array subj->backticks:
 315     return 0;
 316   }
 317   if (subj->scanned_for_backticks &&
 318       subj->backticks[openticklength] <= subj->pos) {
 319     // return if we already know there's no closer
 320     return 0;
 321   }
 322   while (!found) {
 323     // read non backticks
 324     unsigned char c;
 325     while ((c = peek_char(subj)) && c != '`') {
 326       advance(subj);
 327     }
 328     if (is_eof(subj)) {
 329       break;
 330     }
 331     bufsize_t numticks = 0;
 332     while (peek_char(subj) == '`') {
 333       advance(subj);
 334       numticks++;
 335     }
 336     // store position of ender
 337     if (numticks <= MAXBACKTICKS) {
 338       subj->backticks[numticks] = subj->pos - numticks;
 339     }
 340     if (numticks == openticklength) {
 341       return (subj->pos);
 342     }
 343   }
 344   // got through whole input without finding closer
 345   subj->scanned_for_backticks = true;
 346   return 0;
 347 }
 348 
 349 // Destructively modify string, converting newlines to
 350 // spaces, then removing a single leading + trailing space,
 351 // unless the code span consists entirely of space characters.
 352 static void S_normalize_code(cmark_strbuf *s) {
 353   bufsize_t r, w;
 354   bool contains_nonspace = false;
 355 
 356   for (r = 0, w = 0; r < s->size; ++r) {
 357     switch (s->ptr[r]) {
 358     case '\r':
 359       if (s->ptr[r + 1] != '\n') {
 360 	s->ptr[w++] = ' ';
 361       }
 362       break;
 363     case '\n':
 364       s->ptr[w++] = ' ';
 365       break;
 366     default:
 367       s->ptr[w++] = s->ptr[r];
 368     }
 369     if (s->ptr[r] != ' ') {
 370       contains_nonspace = true;
 371     }
 372   }
 373 
 374   // begins and ends with space?
 375   if (contains_nonspace &&
 376       s->ptr[0] == ' ' && s->ptr[w - 1] == ' ') {
 377     cmark_strbuf_drop(s, 1);
 378     cmark_strbuf_truncate(s, w - 2);
 379   } else {
 380     cmark_strbuf_truncate(s, w);
 381   }
 382 
 383 }
 384 
 385 
 386 // Parse backtick code section or raw backticks, return an inline.
 387 // Assumes that the subject has a backtick at the current position.
 388 static cmark_node *handle_backticks(subject *subj, int options) {
 389   cmark_chunk openticks = take_while(subj, isbacktick);
 390   bufsize_t startpos = subj->pos;
 391   bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);
 392 
 393   if (endpos == 0) {      // not found
 394     subj->pos = startpos; // rewind
 395     return make_str(subj, subj->pos, subj->pos, openticks);
 396   } else {
 397     cmark_strbuf buf = CMARK_BUF_INIT(subj->mem);
 398 
 399     cmark_strbuf_set(&buf, subj->input.data + startpos,
 400                      endpos - startpos - openticks.len);
 401     S_normalize_code(&buf);
 402 
 403     cmark_node *node = make_literal(subj, CMARK_NODE_CODE, startpos,
 404                                     endpos - openticks.len - 1);
 405     node->len = buf.size;
 406     node->data = cmark_strbuf_detach(&buf);
 407     adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len, options);
 408     return node;
 409   }
 410 }
 411 
 412 
 413 // Scan ***, **, or * and return number scanned, or 0.
 414 // Advances position.
 415 static int scan_delims(subject *subj, unsigned char c, bool *can_open,
 416                        bool *can_close) {
 417   int numdelims = 0;
 418   bufsize_t before_char_pos;
 419   int32_t after_char = 0;
 420   int32_t before_char = 0;
 421   int len;
 422   bool left_flanking, right_flanking;
 423 
 424   if (subj->pos == 0) {
 425     before_char = 10;
 426   } else {
 427     before_char_pos = subj->pos - 1;
 428     // walk back to the beginning of the UTF_8 sequence:
 429     while (peek_at(subj, before_char_pos) >> 6 == 2 && before_char_pos > 0) {
 430       before_char_pos -= 1;
 431     }
 432     len = cmark_utf8proc_iterate(subj->input.data + before_char_pos,
 433                                  subj->pos - before_char_pos, &before_char);
 434     if (len == -1) {
 435       before_char = 10;
 436     }
 437   }
 438 
 439   if (c == '\'' || c == '"') {
 440     numdelims++;
 441     advance(subj); // limit to 1 delim for quotes
 442   } else {
 443     while (peek_char(subj) == c) {
 444       numdelims++;
 445       advance(subj);
 446     }
 447   }
 448 
 449   len = cmark_utf8proc_iterate(subj->input.data + subj->pos,
 450                                subj->input.len - subj->pos, &after_char);
 451   if (len == -1) {
 452     after_char = 10;
 453   }
 454   left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) &&
 455                   (!cmark_utf8proc_is_punctuation(after_char) ||
 456                    cmark_utf8proc_is_space(before_char) ||
 457                    cmark_utf8proc_is_punctuation(before_char));
 458   right_flanking = numdelims > 0 && !cmark_utf8proc_is_space(before_char) &&
 459                    (!cmark_utf8proc_is_punctuation(before_char) ||
 460                     cmark_utf8proc_is_space(after_char) ||
 461                     cmark_utf8proc_is_punctuation(after_char));
 462   if (c == '_') {
 463     *can_open = left_flanking &&
 464                 (!right_flanking || cmark_utf8proc_is_punctuation(before_char));
 465     *can_close = right_flanking &&
 466                  (!left_flanking || cmark_utf8proc_is_punctuation(after_char));
 467   } else if (c == '\'' || c == '"') {
 468     *can_open = left_flanking &&
 469          (!right_flanking || before_char == '(' || before_char == '[') &&
 470          before_char != ']' && before_char != ')';
 471     *can_close = right_flanking;
 472   } else {
 473     *can_open = left_flanking;
 474     *can_close = right_flanking;
 475   }
 476   return numdelims;
 477 }
 478 
 479 /*
 480 static void print_delimiters(subject *subj)
 481 {
 482         delimiter *delim;
 483         delim = subj->last_delim;
 484         while (delim != NULL) {
 485                 printf("Item at stack pos %p: %d %d %d next(%p) prev(%p)\n",
 486                        (void*)delim, delim->delim_char,
 487                        delim->can_open, delim->can_close,
 488                        (void*)delim->next, (void*)delim->previous);
 489                 delim = delim->previous;
 490         }
 491 }
 492 */
 493 
 494 static void remove_delimiter(subject *subj, delimiter *delim) {
 495   if (delim == NULL)
 496     return;
 497   if (delim->next == NULL) {
 498     // end of list:
 499     assert(delim == subj->last_delim);
 500     subj->last_delim = delim->previous;
 501   } else {
 502     delim->next->previous = delim->previous;
 503   }
 504   if (delim->previous != NULL) {
 505     delim->previous->next = delim->next;
 506   }
 507   subj->mem->free(delim);
 508 }
 509 
 510 static void pop_bracket(subject *subj) {
 511   bracket *b;
 512   if (subj->last_bracket == NULL)
 513     return;
 514   b = subj->last_bracket;
 515   subj->last_bracket = subj->last_bracket->previous;
 516   subj->mem->free(b);
 517 }
 518 
 519 static void push_delimiter(subject *subj, unsigned char c, bool can_open,
 520                            bool can_close, cmark_node *inl_text) {
 521   delimiter *delim = (delimiter *)subj->mem->calloc(1, sizeof(delimiter));
 522   delim->delim_char = c;
 523   delim->can_open = can_open;
 524   delim->can_close = can_close;
 525   delim->inl_text = inl_text;
 526   delim->length = inl_text->len;
 527   delim->previous = subj->last_delim;
 528   delim->next = NULL;
 529   if (delim->previous != NULL) {
 530     delim->previous->next = delim;
 531   }
 532   subj->last_delim = delim;
 533 }
 534 
 535 static void push_bracket(subject *subj, bool image, cmark_node *inl_text) {
 536   bracket *b = (bracket *)subj->mem->calloc(1, sizeof(bracket));
 537   if (subj->last_bracket != NULL) {
 538     subj->last_bracket->bracket_after = true;
 539   }
 540   b->image = image;
 541   b->active = true;
 542   b->inl_text = inl_text;
 543   b->previous = subj->last_bracket;
 544   b->previous_delimiter = subj->last_delim;
 545   b->position = subj->pos;
 546   b->bracket_after = false;
 547   subj->last_bracket = b;
 548 }
 549 
 550 // Assumes the subject has a c at the current position.
 551 static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) {
 552   bufsize_t numdelims;
 553   cmark_node *inl_text;
 554   bool can_open, can_close;
 555   cmark_chunk contents;
 556 
 557   numdelims = scan_delims(subj, c, &can_open, &can_close);
 558 
 559   if (c == '\'' && smart) {
 560     contents = cmark_chunk_literal(RIGHTSINGLEQUOTE);
 561   } else if (c == '"' && smart) {
 562     contents =
 563         cmark_chunk_literal(can_close ? RIGHTDOUBLEQUOTE : LEFTDOUBLEQUOTE);
 564   } else {
 565     contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims);
 566   }
 567 
 568   inl_text = make_str(subj, subj->pos - numdelims, subj->pos - 1, contents);
 569 
 570   if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) {
 571     push_delimiter(subj, c, can_open, can_close, inl_text);
 572   }
 573 
 574   return inl_text;
 575 }
 576 
 577 // Assumes we have a hyphen at the current position.
 578 static cmark_node *handle_hyphen(subject *subj, bool smart) {
 579   int startpos = subj->pos;
 580 
 581   advance(subj);
 582 
 583   if (!smart || peek_char(subj) != '-') {
 584     return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("-"));
 585   }
 586 
 587   while (smart && peek_char(subj) == '-') {
 588     advance(subj);
 589   }
 590 
 591   int numhyphens = subj->pos - startpos;
 592   int en_count = 0;
 593   int em_count = 0;
 594   int i;
 595   cmark_strbuf buf = CMARK_BUF_INIT(subj->mem);
 596 
 597   if (numhyphens % 3 == 0) { // if divisible by 3, use all em dashes
 598     em_count = numhyphens / 3;
 599   } else if (numhyphens % 2 == 0) { // if divisible by 2, use all en dashes
 600     en_count = numhyphens / 2;
 601   } else if (numhyphens % 3 == 2) { // use one en dash at end
 602     en_count = 1;
 603     em_count = (numhyphens - 2) / 3;
 604   } else { // use two en dashes at the end
 605     en_count = 2;
 606     em_count = (numhyphens - 4) / 3;
 607   }
 608 
 609   for (i = em_count; i > 0; i--) {
 610     cmark_strbuf_puts(&buf, EMDASH);
 611   }
 612 
 613   for (i = en_count; i > 0; i--) {
 614     cmark_strbuf_puts(&buf, ENDASH);
 615   }
 616 
 617   return make_str_from_buf(subj, startpos, subj->pos - 1, &buf);
 618 }
 619 
 620 // Assumes we have a period at the current position.
 621 static cmark_node *handle_period(subject *subj, bool smart) {
 622   advance(subj);
 623   if (smart && peek_char(subj) == '.') {
 624     advance(subj);
 625     if (peek_char(subj) == '.') {
 626       advance(subj);
 627       return make_str(subj, subj->pos - 3, subj->pos - 1, cmark_chunk_literal(ELLIPSES));
 628     } else {
 629       return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal(".."));
 630     }
 631   } else {
 632     return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("."));
 633   }
 634 }
 635 
 636 static void process_emphasis(subject *subj, delimiter *stack_bottom) {
 637   delimiter *closer = subj->last_delim;
 638   delimiter *opener;
 639   delimiter *old_closer;
 640   bool opener_found;
 641   int openers_bottom_index = 0;
 642   delimiter *openers_bottom[6] = {stack_bottom, stack_bottom, stack_bottom,
 643                                   stack_bottom, stack_bottom, stack_bottom};
 644 
 645   // move back to first relevant delim.
 646   while (closer != NULL && closer->previous != stack_bottom) {
 647     closer = closer->previous;
 648   }
 649 
 650   // now move forward, looking for closers, and handling each
 651   while (closer != NULL) {
 652     if (closer->can_close) {
 653       switch (closer->delim_char) {
 654       case '"':
 655         openers_bottom_index = 0;
 656         break;
 657       case '\'':
 658         openers_bottom_index = 1;
 659         break;
 660       case '_':
 661         openers_bottom_index = 2;
 662         break;
 663       case '*':
 664         openers_bottom_index = 3 + (closer->length % 3);
 665         break;
 666       default:
 667         assert(false);
 668       }
 669 
 670       // Now look backwards for first matching opener:
 671       opener = closer->previous;
 672       opener_found = false;
 673       while (opener != NULL && opener != openers_bottom[openers_bottom_index]) {
 674         if (opener->can_open && opener->delim_char == closer->delim_char) {
 675           // interior closer of size 2 can't match opener of size 1
 676           // or of size 1 can't match 2
 677           if (!(closer->can_open || opener->can_close) ||
 678 	      closer->length % 3 == 0 ||
 679               (opener->length + closer->length) % 3 != 0) {
 680             opener_found = true;
 681             break;
 682           }
 683         }
 684         opener = opener->previous;
 685       }
 686       old_closer = closer;
 687       if (closer->delim_char == '*' || closer->delim_char == '_') {
 688         if (opener_found) {
 689           closer = S_insert_emph(subj, opener, closer);
 690         } else {
 691           closer = closer->next;
 692         }
 693       } else if (closer->delim_char == '\'') {
 694         cmark_node_set_literal(closer->inl_text, RIGHTSINGLEQUOTE);
 695         if (opener_found) {
 696           cmark_node_set_literal(opener->inl_text, LEFTSINGLEQUOTE);
 697         }
 698         closer = closer->next;
 699       } else if (closer->delim_char == '"') {
 700         cmark_node_set_literal(closer->inl_text, RIGHTDOUBLEQUOTE);
 701         if (opener_found) {
 702           cmark_node_set_literal(opener->inl_text, LEFTDOUBLEQUOTE);
 703         }
 704         closer = closer->next;
 705       }
 706       if (!opener_found) {
 707         // set lower bound for future searches for openers
 708         openers_bottom[openers_bottom_index] = old_closer->previous;
 709         if (!old_closer->can_open) {
 710           // we can remove a closer that can't be an
 711           // opener, once we've seen there's no
 712           // matching opener:
 713           remove_delimiter(subj, old_closer);
 714         }
 715       }
 716     } else {
 717       closer = closer->next;
 718     }
 719   }
 720   // free all delimiters in list until stack_bottom:
 721   while (subj->last_delim != NULL && subj->last_delim != stack_bottom) {
 722     remove_delimiter(subj, subj->last_delim);
 723   }
 724 }
 725 
 726 static delimiter *S_insert_emph(subject *subj, delimiter *opener,
 727                                 delimiter *closer) {
 728   delimiter *delim, *tmp_delim;
 729   bufsize_t use_delims;
 730   cmark_node *opener_inl = opener->inl_text;
 731   cmark_node *closer_inl = closer->inl_text;
 732   bufsize_t opener_num_chars = opener_inl->len;
 733   bufsize_t closer_num_chars = closer_inl->len;
 734   cmark_node *tmp, *tmpnext, *emph;
 735 
 736   // calculate the actual number of characters used from this closer
 737   use_delims = (closer_num_chars >= 2 && opener_num_chars >= 2) ? 2 : 1;
 738 
 739   // remove used characters from associated inlines.
 740   opener_num_chars -= use_delims;
 741   closer_num_chars -= use_delims;
 742   opener_inl->len = opener_num_chars;
 743   opener_inl->data[opener_num_chars] = 0;
 744   closer_inl->len = closer_num_chars;
 745   closer_inl->data[closer_num_chars] = 0;
 746 
 747   // free delimiters between opener and closer
 748   delim = closer->previous;
 749   while (delim != NULL && delim != opener) {
 750     tmp_delim = delim->previous;
 751     remove_delimiter(subj, delim);
 752     delim = tmp_delim;
 753   }
 754 
 755   // create new emph or strong, and splice it in to our inlines
 756   // between the opener and closer
 757   emph = use_delims == 1 ? make_emph(subj->mem) : make_strong(subj->mem);
 758 
 759   tmp = opener_inl->next;
 760   while (tmp && tmp != closer_inl) {
 761     tmpnext = tmp->next;
 762     cmark_node_unlink(tmp);
 763     append_child(emph, tmp);
 764     tmp = tmpnext;
 765   }
 766   cmark_node_insert_after(opener_inl, emph);
 767 
 768   emph->start_line = opener_inl->start_line;
 769   emph->end_line = closer_inl->end_line;
 770   emph->start_column = opener_inl->start_column;
 771   emph->end_column = closer_inl->end_column;
 772 
 773   // if opener has 0 characters, remove it and its associated inline
 774   if (opener_num_chars == 0) {
 775     cmark_node_free(opener_inl);
 776     remove_delimiter(subj, opener);
 777   }
 778 
 779   // if closer has 0 characters, remove it and its associated inline
 780   if (closer_num_chars == 0) {
 781     // remove empty closer inline
 782     cmark_node_free(closer_inl);
 783     // remove closer from list
 784     tmp_delim = closer->next;
 785     remove_delimiter(subj, closer);
 786     closer = tmp_delim;
 787   }
 788 
 789   return closer;
 790 }
 791 
 792 // Parse backslash-escape or just a backslash, returning an inline.
 793 static cmark_node *handle_backslash(subject *subj) {
 794   advance(subj);
 795   unsigned char nextchar = peek_char(subj);
 796   if (cmark_ispunct(
 797           nextchar)) { // only ascii symbols and newline can be escaped
 798     advance(subj);
 799     return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
 800   } else if (!is_eof(subj) && skip_line_end(subj)) {
 801     return make_linebreak(subj->mem);
 802   } else {
 803     return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("\\"));
 804   }
 805 }
 806 
 807 // Parse an entity or a regular "&" string.
 808 // Assumes the subject has an '&' character at the current position.
 809 static cmark_node *handle_entity(subject *subj) {
 810   cmark_strbuf ent = CMARK_BUF_INIT(subj->mem);
 811   bufsize_t len;
 812 
 813   advance(subj);
 814 
 815   len = houdini_unescape_ent(&ent, subj->input.data + subj->pos,
 816                              subj->input.len - subj->pos);
 817 
 818   if (len <= 0)
 819     return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("&"));
 820 
 821   subj->pos += len;
 822   return make_str_from_buf(subj, subj->pos - 1 - len, subj->pos - 1, &ent);
 823 }
 824 
 825 // Clean a URL: remove surrounding whitespace, and remove \ that escape
 826 // punctuation.
 827 unsigned char *cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
 828   cmark_strbuf buf = CMARK_BUF_INIT(mem);
 829 
 830   cmark_chunk_trim(url);
 831 
 832   houdini_unescape_html_f(&buf, url->data, url->len);
 833 
 834   cmark_strbuf_unescape(&buf);
 835   return cmark_strbuf_detach(&buf);
 836 }
 837 
 838 unsigned char *cmark_clean_title(cmark_mem *mem, cmark_chunk *title) {
 839   cmark_strbuf buf = CMARK_BUF_INIT(mem);
 840   unsigned char first, last;
 841 
 842   if (title->len == 0) {
 843     return NULL;
 844   }
 845 
 846   first = title->data[0];
 847   last = title->data[title->len - 1];
 848 
 849   // remove surrounding quotes if any:
 850   if ((first == '\'' && last == '\'') || (first == '(' && last == ')') ||
 851       (first == '"' && last == '"')) {
 852     houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
 853   } else {
 854     houdini_unescape_html_f(&buf, title->data, title->len);
 855   }
 856 
 857   cmark_strbuf_unescape(&buf);
 858   return cmark_strbuf_detach(&buf);
 859 }
 860 
 861 // Parse an autolink or HTML tag.
 862 // Assumes the subject has a '<' character at the current position.
 863 static cmark_node *handle_pointy_brace(subject *subj, int options) {
 864   bufsize_t matchlen = 0;
 865   cmark_chunk contents;
 866 
 867   advance(subj); // advance past first <
 868 
 869   // first try to match a URL autolink
 870   matchlen = scan_autolink_uri(&subj->input, subj->pos);
 871   if (matchlen > 0) {
 872     contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
 873     subj->pos += matchlen;
 874 
 875     return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 0);
 876   }
 877 
 878   // next try to match an email autolink
 879   matchlen = scan_autolink_email(&subj->input, subj->pos);
 880   if (matchlen > 0) {
 881     contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
 882     subj->pos += matchlen;
 883 
 884     return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 1);
 885   }
 886 
 887   // finally, try to match an html tag
 888   matchlen = scan_html_tag(&subj->input, subj->pos);
 889   if (matchlen > 0) {
 890     const unsigned char *src = subj->input.data + subj->pos - 1;
 891     bufsize_t len = matchlen + 1;
 892     subj->pos += matchlen;
 893     cmark_node *node = make_literal(subj, CMARK_NODE_HTML_INLINE,
 894                                     subj->pos - matchlen - 1, subj->pos - 1);
 895     node->data = (unsigned char *)subj->mem->realloc(NULL, len + 1);
 896     memcpy(node->data, src, len);
 897     node->data[len] = 0;
 898     node->len = len;
 899     adjust_subj_node_newlines(subj, node, matchlen, 1, options);
 900     return node;
 901   }
 902 
 903   // if nothing matches, just return the opening <:
 904   return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("<"));
 905 }
 906 
 907 // Parse a link label.  Returns 1 if successful.
 908 // Note:  unescaped brackets are not allowed in labels.
 909 // The label begins with `[` and ends with the first `]` character
 910 // encountered.  Backticks in labels do not start code spans.
 911 static int link_label(subject *subj, cmark_chunk *raw_label) {
 912   bufsize_t startpos = subj->pos;
 913   int length = 0;
 914   unsigned char c;
 915 
 916   // advance past [
 917   if (peek_char(subj) == '[') {
 918     advance(subj);
 919   } else {
 920     return 0;
 921   }
 922 
 923   while ((c = peek_char(subj)) && c != '[' && c != ']') {
 924     if (c == '\\') {
 925       advance(subj);
 926       length++;
 927       if (cmark_ispunct(peek_char(subj))) {
 928         advance(subj);
 929         length++;
 930       }
 931     } else {
 932       advance(subj);
 933       length++;
 934     }
 935     if (length > MAX_LINK_LABEL_LENGTH) {
 936       goto noMatch;
 937     }
 938   }
 939 
 940   if (c == ']') { // match found
 941     *raw_label =
 942         cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
 943     cmark_chunk_trim(raw_label);
 944     advance(subj); // advance past ]
 945     return 1;
 946   }
 947 
 948 noMatch:
 949   subj->pos = startpos; // rewind
 950   return 0;
 951 }
 952 
 953 static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset,
 954                                         cmark_chunk *output) {
 955   bufsize_t i = offset;
 956   size_t nb_p = 0;
 957 
 958   while (i < input->len) {
 959     if (input->data[i] == '\\' &&
 960         i + 1 < input-> len &&
 961         cmark_ispunct(input->data[i+1]))
 962       i += 2;
 963     else if (input->data[i] == '(') {
 964       ++nb_p;
 965       ++i;
 966       if (nb_p > 32)
 967         return -1;
 968     } else if (input->data[i] == ')') {
 969       if (nb_p == 0)
 970         break;
 971       --nb_p;
 972       ++i;
 973     } else if (cmark_isspace(input->data[i])) {
 974       if (i == offset) {
 975         return -1;
 976       }
 977       break;
 978     } else {
 979       ++i;
 980     }
 981   }
 982 
 983   if (i >= input->len || nb_p != 0)
 984     return -1;
 985 
 986   {
 987     cmark_chunk result = {input->data + offset, i - offset};
 988     *output = result;
 989   }
 990   return i - offset;
 991 }
 992 
 993 static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset,
 994                                       cmark_chunk *output) {
 995   bufsize_t i = offset;
 996 
 997   if (i < input->len && input->data[i] == '<') {
 998     ++i;
 999     while (i < input->len) {
1000       if (input->data[i] == '>') {
1001         ++i;
1002         break;
1003       } else if (input->data[i] == '\\')
1004         i += 2;
1005       else if (input->data[i] == '\n' || input->data[i] == '<')
1006         return -1;
1007       else
1008         ++i;
1009     }
1010   } else {
1011     return manual_scan_link_url_2(input, offset, output);
1012   }
1013 
1014   if (i >= input->len)
1015     return -1;
1016 
1017   {
1018     cmark_chunk result = {input->data + offset + 1, i - 2 - offset};
1019     *output = result;
1020   }
1021   return i - offset;
1022 }
1023 
1024 // Return a link, an image, or a literal close bracket.
1025 static cmark_node *handle_close_bracket(subject *subj) {
1026   bufsize_t initial_pos, after_link_text_pos;
1027   bufsize_t endurl, starttitle, endtitle, endall;
1028   bufsize_t sps, n;
1029   cmark_reference *ref = NULL;
1030   cmark_chunk url_chunk, title_chunk;
1031   unsigned char *url, *title;
1032   bracket *opener;
1033   cmark_node *inl;
1034   cmark_chunk raw_label;
1035   int found_label;
1036   cmark_node *tmp, *tmpnext;
1037   bool is_image;
1038 
1039   advance(subj); // advance past ]
1040   initial_pos = subj->pos;
1041 
1042   // get last [ or ![
1043   opener = subj->last_bracket;
1044 
1045   if (opener == NULL) {
1046     return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
1047   }
1048 
1049   if (!opener->active) {
1050     // take delimiter off stack
1051     pop_bracket(subj);
1052     return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
1053   }
1054 
1055   // If we got here, we matched a potential link/image text.
1056   // Now we check to see if it's a link/image.
1057   is_image = opener->image;
1058 
1059   after_link_text_pos = subj->pos;
1060 
1061   // First, look for an inline link.
1062   if (peek_char(subj) == '(' &&
1063       ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
1064       ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps,
1065                                  &url_chunk)) > -1)) {
1066 
1067     // try to parse an explicit link:
1068     endurl = subj->pos + 1 + sps + n;
1069     starttitle = endurl + scan_spacechars(&subj->input, endurl);
1070 
1071     // ensure there are spaces btw url and title
1072     endtitle = (starttitle == endurl)
1073                    ? starttitle
1074                    : starttitle + scan_link_title(&subj->input, starttitle);
1075 
1076     endall = endtitle + scan_spacechars(&subj->input, endtitle);
1077 
1078     if (peek_at(subj, endall) == ')') {
1079       subj->pos = endall + 1;
1080 
1081       title_chunk =
1082           cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle);
1083       url = cmark_clean_url(subj->mem, &url_chunk);
1084       title = cmark_clean_title(subj->mem, &title_chunk);
1085       cmark_chunk_free(&url_chunk);
1086       cmark_chunk_free(&title_chunk);
1087       goto match;
1088 
1089     } else {
1090       // it could still be a shortcut reference link
1091       subj->pos = after_link_text_pos;
1092     }
1093   }
1094 
1095   // Next, look for a following [link label] that matches in refmap.
1096   // skip spaces
1097   raw_label = cmark_chunk_literal("");
1098   found_label = link_label(subj, &raw_label);
1099   if (!found_label) {
1100     // If we have a shortcut reference link, back up
1101     // to before the spacse we skipped.
1102     subj->pos = initial_pos;
1103   }
1104 
1105   if ((!found_label || raw_label.len == 0) && !opener->bracket_after) {
1106     cmark_chunk_free(&raw_label);
1107     raw_label = cmark_chunk_dup(&subj->input, opener->position,
1108                                 initial_pos - opener->position - 1);
1109     found_label = true;
1110   }
1111 
1112   if (found_label) {
1113     ref = cmark_reference_lookup(subj->refmap, &raw_label);
1114     cmark_chunk_free(&raw_label);
1115   }
1116 
1117   if (ref != NULL) { // found
1118     url = cmark_strdup(subj->mem, ref->url);
1119     title = cmark_strdup(subj->mem, ref->title);
1120     goto match;
1121   } else {
1122     goto noMatch;
1123   }
1124 
1125 noMatch:
1126   // If we fall through to here, it means we didn't match a link:
1127   pop_bracket(subj); // remove this opener from delimiter list
1128   subj->pos = initial_pos;
1129   return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
1130 
1131 match:
1132   inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK);
1133   inl->as.link.url = url;
1134   inl->as.link.title = title;
1135   inl->start_line = inl->end_line = subj->line;
1136   inl->start_column = opener->inl_text->start_column;
1137   inl->end_column = subj->pos + subj->column_offset + subj->block_offset;
1138   cmark_node_insert_before(opener->inl_text, inl);
1139   // Add link text:
1140   tmp = opener->inl_text->next;
1141   while (tmp) {
1142     tmpnext = tmp->next;
1143     cmark_node_unlink(tmp);
1144     append_child(inl, tmp);
1145     tmp = tmpnext;
1146   }
1147 
1148   // Free the bracket [:
1149   cmark_node_free(opener->inl_text);
1150 
1151   process_emphasis(subj, opener->previous_delimiter);
1152   pop_bracket(subj);
1153 
1154   // Now, if we have a link, we also want to deactivate earlier link
1155   // delimiters. (This code can be removed if we decide to allow links
1156   // inside links.)
1157   if (!is_image) {
1158     opener = subj->last_bracket;
1159     while (opener != NULL) {
1160       if (!opener->image) {
1161         if (!opener->active) {
1162           break;
1163         } else {
1164           opener->active = false;
1165         }
1166       }
1167       opener = opener->previous;
1168     }
1169   }
1170 
1171   return NULL;
1172 }
1173 
1174 // Parse a hard or soft linebreak, returning an inline.
1175 // Assumes the subject has a cr or newline at the current position.
1176 static cmark_node *handle_newline(subject *subj) {
1177   bufsize_t nlpos = subj->pos;
1178   // skip over cr, crlf, or lf:
1179   if (peek_at(subj, subj->pos) == '\r') {
1180     advance(subj);
1181   }
1182   if (peek_at(subj, subj->pos) == '\n') {
1183     advance(subj);
1184   }
1185   ++subj->line;
1186   subj->column_offset = -subj->pos;
1187   // skip spaces at beginning of line
1188   skip_spaces(subj);
1189   if (nlpos > 1 && peek_at(subj, nlpos - 1) == ' ' &&
1190       peek_at(subj, nlpos - 2) == ' ') {
1191     return make_linebreak(subj->mem);
1192   } else {
1193     return make_softbreak(subj->mem);
1194   }
1195 }
1196 
1197 static bufsize_t subject_find_special_char(subject *subj, int options) {
1198   // "\r\n\\`&_*[]<!"
1199   static const int8_t SPECIAL_CHARS[256] = {
1200       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1201       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
1202       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1203       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
1204       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1205       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1206       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1207       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1208       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1209       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1210       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1211 
1212   // " ' . -
1213   static const char SMART_PUNCT_CHARS[] = {
1214       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1215       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
1216       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1217       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1218       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1219       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1220       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1221       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1222       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1223       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1224       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1225   };
1226 
1227   bufsize_t n = subj->pos + 1;
1228 
1229   while (n < subj->input.len) {
1230     if (SPECIAL_CHARS[subj->input.data[n]])
1231       return n;
1232     if (options & CMARK_OPT_SMART && SMART_PUNCT_CHARS[subj->input.data[n]])
1233       return n;
1234     n++;
1235   }
1236 
1237   return subj->input.len;
1238 }
1239 
1240 // Parse an inline, advancing subject, and add it as a child of parent.
1241 // Return 0 if no inline can be parsed, 1 otherwise.
1242 static int parse_inline(subject *subj, cmark_node *parent, int options) {
1243   cmark_node *new_inl = NULL;
1244   cmark_chunk contents;
1245   unsigned char c;
1246   bufsize_t startpos, endpos;
1247   c = peek_char(subj);
1248   if (c == 0) {
1249     return 0;
1250   }
1251   switch (c) {
1252   case '\r':
1253   case '\n':
1254     new_inl = handle_newline(subj);
1255     break;
1256   case '`':
1257     new_inl = handle_backticks(subj, options);
1258     break;
1259   case '\\':
1260     new_inl = handle_backslash(subj);
1261     break;
1262   case '&':
1263     new_inl = handle_entity(subj);
1264     break;
1265   case '<':
1266     new_inl = handle_pointy_brace(subj, options);
1267     break;
1268   case '*':
1269   case '_':
1270   case '\'':
1271   case '"':
1272     new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0);
1273     break;
1274   case '-':
1275     new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0);
1276     break;
1277   case '.':
1278     new_inl = handle_period(subj, (options & CMARK_OPT_SMART) != 0);
1279     break;
1280   case '[':
1281     advance(subj);
1282     new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("["));
1283     push_bracket(subj, false, new_inl);
1284     break;
1285   case ']':
1286     new_inl = handle_close_bracket(subj);
1287     break;
1288   case '!':
1289     advance(subj);
1290     if (peek_char(subj) == '[') {
1291       advance(subj);
1292       new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("!["));
1293       push_bracket(subj, true, new_inl);
1294     } else {
1295       new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("!"));
1296     }
1297     break;
1298   default:
1299     endpos = subject_find_special_char(subj, options);
1300     contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
1301     startpos = subj->pos;
1302     subj->pos = endpos;
1303 
1304     // if we're at a newline, strip trailing spaces.
1305     if (S_is_line_end_char(peek_char(subj))) {
1306       cmark_chunk_rtrim(&contents);
1307     }
1308 
1309     new_inl = make_str(subj, startpos, endpos - 1, contents);
1310   }
1311   if (new_inl != NULL) {
1312     append_child(parent, new_inl);
1313   }
1314 
1315   return 1;
1316 }
1317 
1318 // Parse inlines from parent's string_content, adding as children of parent.
1319 void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
1320                          cmark_reference_map *refmap, int options) {
1321   subject subj;
1322   cmark_chunk content = {parent->data, parent->len};
1323   subject_from_buf(mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &content, refmap);
1324   cmark_chunk_rtrim(&subj.input);
1325 
1326   while (!is_eof(&subj) && parse_inline(&subj, parent, options))
1327     ;
1328 
1329   process_emphasis(&subj, NULL);
1330   // free bracket and delim stack
1331   while (subj.last_delim) {
1332     remove_delimiter(&subj, subj.last_delim);
1333   }
1334   while (subj.last_bracket) {
1335     pop_bracket(&subj);
1336   }
1337 }
1338 
1339 // Parse zero or more space characters, including at most one newline.
1340 static void spnl(subject *subj) {
1341   skip_spaces(subj);
1342   if (skip_line_end(subj)) {
1343     skip_spaces(subj);
1344   }
1345 }
1346 
1347 // Parse reference.  Assumes string begins with '[' character.
1348 // Modify refmap if a reference is encountered.
1349 // Return 0 if no reference found, otherwise position of subject
1350 // after reference is parsed.
1351 bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
1352                                        cmark_reference_map *refmap) {
1353   subject subj;
1354 
1355   cmark_chunk lab;
1356   cmark_chunk url;
1357   cmark_chunk title;
1358 
1359   bufsize_t matchlen = 0;
1360   bufsize_t beforetitle;
1361 
1362   subject_from_buf(mem, -1, 0, &subj, input, NULL);
1363 
1364   // parse label:
1365   if (!link_label(&subj, &lab) || lab.len == 0)
1366     return 0;
1367 
1368   // colon:
1369   if (peek_char(&subj) == ':') {
1370     advance(&subj);
1371   } else {
1372     return 0;
1373   }
1374 
1375   // parse link url:
1376   spnl(&subj);
1377   if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1) {
1378     subj.pos += matchlen;
1379   } else {
1380     return 0;
1381   }
1382 
1383   // parse optional link_title
1384   beforetitle = subj.pos;
1385   spnl(&subj);
1386   matchlen = subj.pos == beforetitle ? 0 : scan_link_title(&subj.input, subj.pos);
1387   if (matchlen) {
1388     title = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
1389     subj.pos += matchlen;
1390   } else {
1391     subj.pos = beforetitle;
1392     title = cmark_chunk_literal("");
1393   }
1394 
1395   // parse final spaces and newline:
1396   skip_spaces(&subj);
1397   if (!skip_line_end(&subj)) {
1398     if (matchlen) { // try rewinding before title
1399       subj.pos = beforetitle;
1400       skip_spaces(&subj);
1401       if (!skip_line_end(&subj)) {
1402         return 0;
1403       }
1404     } else {
1405       return 0;
1406     }
1407   }
1408   // insert reference into refmap
1409   cmark_reference_create(refmap, &lab, &url, &title);
1410   return subj.pos;
1411 }