cmark
My personal build of CMark ✏️
latex.c (10457B)
1 #include <stdlib.h> 2 #include <stdio.h> 3 #include <string.h> 4 #include <assert.h> 5 6 #include "config.h" 7 #include "cmark.h" 8 #include "node.h" 9 #include "buffer.h" 10 #include "utf8.h" 11 #include "scanners.h" 12 #include "render.h" 13 14 #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) 15 #define LIT(s) renderer->out(renderer, s, false, LITERAL) 16 #define CR() renderer->cr(renderer) 17 #define BLANKLINE() renderer->blankline(renderer) 18 #define LIST_NUMBER_STRING_SIZE 20 19 20 static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape, 21 int32_t c, unsigned char nextc) { 22 if (escape == LITERAL) { 23 cmark_render_code_point(renderer, c); 24 return; 25 } 26 27 switch (c) { 28 case 123: // '{' 29 case 125: // '}' 30 case 35: // '#' 31 case 37: // '%' 32 case 38: // '&' 33 cmark_render_ascii(renderer, "\\"); 34 cmark_render_code_point(renderer, c); 35 break; 36 case 36: // '$' 37 case 95: // '_' 38 if (escape == NORMAL) { 39 cmark_render_ascii(renderer, "\\"); 40 } 41 cmark_render_code_point(renderer, c); 42 break; 43 case 45: // '-' 44 if (nextc == 45) { // prevent ligature 45 cmark_render_ascii(renderer, "-{}"); 46 } else { 47 cmark_render_ascii(renderer, "-"); 48 } 49 break; 50 case 126: // '~' 51 if (escape == NORMAL) { 52 cmark_render_ascii(renderer, "\\textasciitilde{}"); 53 } else { 54 cmark_render_code_point(renderer, c); 55 } 56 break; 57 case 94: // '^' 58 cmark_render_ascii(renderer, "\\^{}"); 59 break; 60 case 92: // '\\' 61 if (escape == URL) { 62 // / acts as path sep even on windows: 63 cmark_render_ascii(renderer, "/"); 64 } else { 65 cmark_render_ascii(renderer, "\\textbackslash{}"); 66 } 67 break; 68 case 124: // '|' 69 cmark_render_ascii(renderer, "\\textbar{}"); 70 break; 71 case 60: // '<' 72 cmark_render_ascii(renderer, "\\textless{}"); 73 break; 74 case 62: // '>' 75 cmark_render_ascii(renderer, "\\textgreater{}"); 76 break; 77 case 91: // '[' 78 case 93: // ']' 79 cmark_render_ascii(renderer, "{"); 80 cmark_render_code_point(renderer, c); 81 cmark_render_ascii(renderer, "}"); 82 break; 83 case 34: // '"' 84 cmark_render_ascii(renderer, "\\textquotedbl{}"); 85 // requires \usepackage[T1]{fontenc} 86 break; 87 case 39: // '\'' 88 cmark_render_ascii(renderer, "\\textquotesingle{}"); 89 // requires \usepackage{textcomp} 90 break; 91 case 160: // nbsp 92 cmark_render_ascii(renderer, "~"); 93 break; 94 case 8230: // hellip 95 cmark_render_ascii(renderer, "\\ldots{}"); 96 break; 97 case 8216: // lsquo 98 if (escape == NORMAL) { 99 cmark_render_ascii(renderer, "`"); 100 } else { 101 cmark_render_code_point(renderer, c); 102 } 103 break; 104 case 8217: // rsquo 105 if (escape == NORMAL) { 106 cmark_render_ascii(renderer, "\'"); 107 } else { 108 cmark_render_code_point(renderer, c); 109 } 110 break; 111 case 8220: // ldquo 112 if (escape == NORMAL) { 113 cmark_render_ascii(renderer, "``"); 114 } else { 115 cmark_render_code_point(renderer, c); 116 } 117 break; 118 case 8221: // rdquo 119 if (escape == NORMAL) { 120 cmark_render_ascii(renderer, "''"); 121 } else { 122 cmark_render_code_point(renderer, c); 123 } 124 break; 125 case 8212: // emdash 126 if (escape == NORMAL) { 127 cmark_render_ascii(renderer, "---"); 128 } else { 129 cmark_render_code_point(renderer, c); 130 } 131 break; 132 case 8211: // endash 133 if (escape == NORMAL) { 134 cmark_render_ascii(renderer, "--"); 135 } else { 136 cmark_render_code_point(renderer, c); 137 } 138 break; 139 default: 140 cmark_render_code_point(renderer, c); 141 } 142 } 143 144 typedef enum { 145 NO_LINK, 146 URL_AUTOLINK, 147 EMAIL_AUTOLINK, 148 NORMAL_LINK, 149 INTERNAL_LINK 150 } link_type; 151 152 static link_type get_link_type(cmark_node *node) { 153 size_t title_len, url_len; 154 cmark_node *link_text; 155 char *realurl; 156 int realurllen; 157 bool isemail = false; 158 159 if (node->type != CMARK_NODE_LINK) { 160 return NO_LINK; 161 } 162 163 const char *url = cmark_node_get_url(node); 164 cmark_chunk url_chunk = cmark_chunk_literal(url); 165 166 if (url && *url == '#') { 167 return INTERNAL_LINK; 168 } 169 170 url_len = strlen(url); 171 if (url_len == 0 || scan_scheme(&url_chunk, 0) == 0) { 172 return NO_LINK; 173 } 174 175 const char *title = cmark_node_get_title(node); 176 title_len = strlen(title); 177 // if it has a title, we can't treat it as an autolink: 178 if (title_len == 0) { 179 180 link_text = node->first_child; 181 cmark_consolidate_text_nodes(link_text); 182 183 if (!link_text) 184 return NO_LINK; 185 186 realurl = (char *)url; 187 realurllen = (int)url_len; 188 if (strncmp(realurl, "mailto:", 7) == 0) { 189 realurl += 7; 190 realurllen -= 7; 191 isemail = true; 192 } 193 if (realurllen == link_text->len && 194 strncmp(realurl, (char *)link_text->data, 195 link_text->len) == 0) { 196 if (isemail) { 197 return EMAIL_AUTOLINK; 198 } else { 199 return URL_AUTOLINK; 200 } 201 } 202 } 203 204 return NORMAL_LINK; 205 } 206 207 static int S_get_enumlevel(cmark_node *node) { 208 int enumlevel = 0; 209 cmark_node *tmp = node; 210 while (tmp) { 211 if (tmp->type == CMARK_NODE_LIST && 212 cmark_node_get_list_type(node) == CMARK_ORDERED_LIST) { 213 enumlevel++; 214 } 215 tmp = tmp->parent; 216 } 217 return enumlevel; 218 } 219 220 static int S_render_node(cmark_renderer *renderer, cmark_node *node, 221 cmark_event_type ev_type, int options) { 222 int list_number; 223 int enumlevel; 224 char list_number_string[LIST_NUMBER_STRING_SIZE]; 225 bool entering = (ev_type == CMARK_EVENT_ENTER); 226 cmark_list_type list_type; 227 bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options); 228 229 // avoid warning about unused parameter: 230 (void)(options); 231 232 switch (node->type) { 233 case CMARK_NODE_DOCUMENT: 234 break; 235 236 case CMARK_NODE_BLOCK_QUOTE: 237 if (entering) { 238 LIT("\\begin{quote}"); 239 CR(); 240 } else { 241 LIT("\\end{quote}"); 242 BLANKLINE(); 243 } 244 break; 245 246 case CMARK_NODE_LIST: 247 list_type = cmark_node_get_list_type(node); 248 if (entering) { 249 LIT("\\begin{"); 250 LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize"); 251 LIT("}"); 252 CR(); 253 list_number = cmark_node_get_list_start(node); 254 if (list_number > 1) { 255 enumlevel = S_get_enumlevel(node); 256 // latex normally supports only five levels 257 if (enumlevel >= 1 && enumlevel <= 5) { 258 snprintf(list_number_string, LIST_NUMBER_STRING_SIZE, "%d", 259 list_number); 260 LIT("\\setcounter{enum"); 261 switch (enumlevel) { 262 case 1: LIT("i"); break; 263 case 2: LIT("ii"); break; 264 case 3: LIT("iii"); break; 265 case 4: LIT("iv"); break; 266 case 5: LIT("v"); break; 267 default: LIT("i"); break; 268 } 269 LIT("}{"); 270 OUT(list_number_string, false, NORMAL); 271 LIT("}"); 272 } 273 CR(); 274 } 275 } else { 276 LIT("\\end{"); 277 LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize"); 278 LIT("}"); 279 BLANKLINE(); 280 } 281 break; 282 283 case CMARK_NODE_ITEM: 284 if (entering) { 285 LIT("\\item "); 286 } else { 287 CR(); 288 } 289 break; 290 291 case CMARK_NODE_HEADING: 292 if (entering) { 293 switch (cmark_node_get_heading_level(node)) { 294 case 1: 295 LIT("\\section"); 296 break; 297 case 2: 298 LIT("\\subsection"); 299 break; 300 case 3: 301 LIT("\\subsubsection"); 302 break; 303 case 4: 304 LIT("\\paragraph"); 305 break; 306 case 5: 307 LIT("\\subparagraph"); 308 break; 309 } 310 LIT("{"); 311 } else { 312 LIT("}"); 313 BLANKLINE(); 314 } 315 break; 316 317 case CMARK_NODE_CODE_BLOCK: 318 CR(); 319 LIT("\\begin{verbatim}"); 320 CR(); 321 OUT(cmark_node_get_literal(node), false, LITERAL); 322 CR(); 323 LIT("\\end{verbatim}"); 324 BLANKLINE(); 325 break; 326 327 case CMARK_NODE_HTML_BLOCK: 328 break; 329 330 case CMARK_NODE_CUSTOM_BLOCK: 331 CR(); 332 OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), 333 false, LITERAL); 334 CR(); 335 break; 336 337 case CMARK_NODE_THEMATIC_BREAK: 338 BLANKLINE(); 339 LIT("\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}"); 340 BLANKLINE(); 341 break; 342 343 case CMARK_NODE_PARAGRAPH: 344 if (!entering) { 345 BLANKLINE(); 346 } 347 break; 348 349 case CMARK_NODE_TEXT: 350 OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); 351 break; 352 353 case CMARK_NODE_LINEBREAK: 354 LIT("\\\\"); 355 CR(); 356 break; 357 358 case CMARK_NODE_SOFTBREAK: 359 if (options & CMARK_OPT_HARDBREAKS) { 360 LIT("\\\\"); 361 CR(); 362 } else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) { 363 CR(); 364 } else { 365 OUT(" ", allow_wrap, NORMAL); 366 } 367 break; 368 369 case CMARK_NODE_CODE: 370 LIT("\\texttt{"); 371 OUT(cmark_node_get_literal(node), false, NORMAL); 372 LIT("}"); 373 break; 374 375 case CMARK_NODE_HTML_INLINE: 376 break; 377 378 case CMARK_NODE_CUSTOM_INLINE: 379 OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), 380 false, LITERAL); 381 break; 382 383 case CMARK_NODE_STRONG: 384 if (entering) { 385 LIT("\\textbf{"); 386 } else { 387 LIT("}"); 388 } 389 break; 390 391 case CMARK_NODE_EMPH: 392 if (entering) { 393 LIT("\\emph{"); 394 } else { 395 LIT("}"); 396 } 397 break; 398 399 case CMARK_NODE_LINK: 400 if (entering) { 401 const char *url = cmark_node_get_url(node); 402 // requires \usepackage{hyperref} 403 switch (get_link_type(node)) { 404 case URL_AUTOLINK: 405 LIT("\\url{"); 406 OUT(url, false, URL); 407 LIT("}"); 408 return 0; // Don't process further nodes to avoid double-rendering artefacts 409 case EMAIL_AUTOLINK: 410 LIT("\\href{"); 411 OUT(url, false, URL); 412 LIT("}\\nolinkurl{"); 413 break; 414 case NORMAL_LINK: 415 LIT("\\href{"); 416 OUT(url, false, URL); 417 LIT("}{"); 418 break; 419 case INTERNAL_LINK: 420 LIT("\\protect\\hyperlink{"); 421 OUT(url + 1, false, URL); 422 LIT("}{"); 423 break; 424 case NO_LINK: 425 LIT("{"); // error? 426 } 427 } else { 428 LIT("}"); 429 } 430 431 break; 432 433 case CMARK_NODE_IMAGE: 434 if (entering) { 435 LIT("\\protect\\includegraphics{"); 436 // requires \include{graphicx} 437 OUT(cmark_node_get_url(node), false, URL); 438 LIT("}"); 439 return 0; 440 } 441 break; 442 443 default: 444 assert(false); 445 break; 446 } 447 448 return 1; 449 } 450 451 char *cmark_render_latex(cmark_node *root, int options, int width) { 452 return cmark_render(root, options, width, outc, S_render_node); 453 }