cmark

My personal build of CMark ✏️

latex.c (10457B)

  1 #include <stdlib.h>
  2 #include <stdio.h>
  3 #include <string.h>
  4 #include <assert.h>
  5 
  6 #include "config.h"
  7 #include "cmark.h"
  8 #include "node.h"
  9 #include "buffer.h"
 10 #include "utf8.h"
 11 #include "scanners.h"
 12 #include "render.h"
 13 
 14 #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
 15 #define LIT(s) renderer->out(renderer, s, false, LITERAL)
 16 #define CR() renderer->cr(renderer)
 17 #define BLANKLINE() renderer->blankline(renderer)
 18 #define LIST_NUMBER_STRING_SIZE 20
 19 
 20 static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape,
 21                               int32_t c, unsigned char nextc) {
 22   if (escape == LITERAL) {
 23     cmark_render_code_point(renderer, c);
 24     return;
 25   }
 26 
 27   switch (c) {
 28   case 123: // '{'
 29   case 125: // '}'
 30   case 35:  // '#'
 31   case 37:  // '%'
 32   case 38:  // '&'
 33     cmark_render_ascii(renderer, "\\");
 34     cmark_render_code_point(renderer, c);
 35     break;
 36   case 36: // '$'
 37   case 95: // '_'
 38     if (escape == NORMAL) {
 39       cmark_render_ascii(renderer, "\\");
 40     }
 41     cmark_render_code_point(renderer, c);
 42     break;
 43   case 45:             // '-'
 44     if (nextc == 45) { // prevent ligature
 45       cmark_render_ascii(renderer, "-{}");
 46     } else {
 47       cmark_render_ascii(renderer, "-");
 48     }
 49     break;
 50   case 126: // '~'
 51     if (escape == NORMAL) {
 52       cmark_render_ascii(renderer, "\\textasciitilde{}");
 53     } else {
 54       cmark_render_code_point(renderer, c);
 55     }
 56     break;
 57   case 94: // '^'
 58     cmark_render_ascii(renderer, "\\^{}");
 59     break;
 60   case 92: // '\\'
 61     if (escape == URL) {
 62       // / acts as path sep even on windows:
 63       cmark_render_ascii(renderer, "/");
 64     } else {
 65       cmark_render_ascii(renderer, "\\textbackslash{}");
 66     }
 67     break;
 68   case 124: // '|'
 69     cmark_render_ascii(renderer, "\\textbar{}");
 70     break;
 71   case 60: // '<'
 72     cmark_render_ascii(renderer, "\\textless{}");
 73     break;
 74   case 62: // '>'
 75     cmark_render_ascii(renderer, "\\textgreater{}");
 76     break;
 77   case 91: // '['
 78   case 93: // ']'
 79     cmark_render_ascii(renderer, "{");
 80     cmark_render_code_point(renderer, c);
 81     cmark_render_ascii(renderer, "}");
 82     break;
 83   case 34: // '"'
 84     cmark_render_ascii(renderer, "\\textquotedbl{}");
 85     // requires \usepackage[T1]{fontenc}
 86     break;
 87   case 39: // '\''
 88     cmark_render_ascii(renderer, "\\textquotesingle{}");
 89     // requires \usepackage{textcomp}
 90     break;
 91   case 160: // nbsp
 92     cmark_render_ascii(renderer, "~");
 93     break;
 94   case 8230: // hellip
 95     cmark_render_ascii(renderer, "\\ldots{}");
 96     break;
 97   case 8216: // lsquo
 98     if (escape == NORMAL) {
 99       cmark_render_ascii(renderer, "`");
100     } else {
101       cmark_render_code_point(renderer, c);
102     }
103     break;
104   case 8217: // rsquo
105     if (escape == NORMAL) {
106       cmark_render_ascii(renderer, "\'");
107     } else {
108       cmark_render_code_point(renderer, c);
109     }
110     break;
111   case 8220: // ldquo
112     if (escape == NORMAL) {
113       cmark_render_ascii(renderer, "``");
114     } else {
115       cmark_render_code_point(renderer, c);
116     }
117     break;
118   case 8221: // rdquo
119     if (escape == NORMAL) {
120       cmark_render_ascii(renderer, "''");
121     } else {
122       cmark_render_code_point(renderer, c);
123     }
124     break;
125   case 8212: // emdash
126     if (escape == NORMAL) {
127       cmark_render_ascii(renderer, "---");
128     } else {
129       cmark_render_code_point(renderer, c);
130     }
131     break;
132   case 8211: // endash
133     if (escape == NORMAL) {
134       cmark_render_ascii(renderer, "--");
135     } else {
136       cmark_render_code_point(renderer, c);
137     }
138     break;
139   default:
140     cmark_render_code_point(renderer, c);
141   }
142 }
143 
144 typedef enum {
145   NO_LINK,
146   URL_AUTOLINK,
147   EMAIL_AUTOLINK,
148   NORMAL_LINK,
149   INTERNAL_LINK
150 } link_type;
151 
152 static link_type get_link_type(cmark_node *node) {
153   size_t title_len, url_len;
154   cmark_node *link_text;
155   char *realurl;
156   int realurllen;
157   bool isemail = false;
158 
159   if (node->type != CMARK_NODE_LINK) {
160     return NO_LINK;
161   }
162 
163   const char *url = cmark_node_get_url(node);
164   cmark_chunk url_chunk = cmark_chunk_literal(url);
165 
166   if (url && *url == '#') {
167     return INTERNAL_LINK;
168   }
169 
170   url_len = strlen(url);
171   if (url_len == 0 || scan_scheme(&url_chunk, 0) == 0) {
172     return NO_LINK;
173   }
174 
175   const char *title = cmark_node_get_title(node);
176   title_len = strlen(title);
177   // if it has a title, we can't treat it as an autolink:
178   if (title_len == 0) {
179 
180     link_text = node->first_child;
181     cmark_consolidate_text_nodes(link_text);
182 
183     if (!link_text)
184       return NO_LINK;
185 
186     realurl = (char *)url;
187     realurllen = (int)url_len;
188     if (strncmp(realurl, "mailto:", 7) == 0) {
189       realurl += 7;
190       realurllen -= 7;
191       isemail = true;
192     }
193     if (realurllen == link_text->len &&
194         strncmp(realurl, (char *)link_text->data,
195                 link_text->len) == 0) {
196       if (isemail) {
197         return EMAIL_AUTOLINK;
198       } else {
199         return URL_AUTOLINK;
200       }
201     }
202   }
203 
204   return NORMAL_LINK;
205 }
206 
207 static int S_get_enumlevel(cmark_node *node) {
208   int enumlevel = 0;
209   cmark_node *tmp = node;
210   while (tmp) {
211     if (tmp->type == CMARK_NODE_LIST &&
212         cmark_node_get_list_type(node) == CMARK_ORDERED_LIST) {
213       enumlevel++;
214     }
215     tmp = tmp->parent;
216   }
217   return enumlevel;
218 }
219 
220 static int S_render_node(cmark_renderer *renderer, cmark_node *node,
221                          cmark_event_type ev_type, int options) {
222   int list_number;
223   int enumlevel;
224   char list_number_string[LIST_NUMBER_STRING_SIZE];
225   bool entering = (ev_type == CMARK_EVENT_ENTER);
226   cmark_list_type list_type;
227   bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options);
228 
229   // avoid warning about unused parameter:
230   (void)(options);
231 
232   switch (node->type) {
233   case CMARK_NODE_DOCUMENT:
234     break;
235 
236   case CMARK_NODE_BLOCK_QUOTE:
237     if (entering) {
238       LIT("\\begin{quote}");
239       CR();
240     } else {
241       LIT("\\end{quote}");
242       BLANKLINE();
243     }
244     break;
245 
246   case CMARK_NODE_LIST:
247     list_type = cmark_node_get_list_type(node);
248     if (entering) {
249       LIT("\\begin{");
250       LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize");
251       LIT("}");
252       CR();
253       list_number = cmark_node_get_list_start(node);
254       if (list_number > 1) {
255         enumlevel = S_get_enumlevel(node);
256         // latex normally supports only five levels
257         if (enumlevel >= 1 && enumlevel <= 5) {
258           snprintf(list_number_string, LIST_NUMBER_STRING_SIZE, "%d",
259                    list_number);
260           LIT("\\setcounter{enum");
261           switch (enumlevel) {
262           case 1: LIT("i"); break;
263           case 2: LIT("ii"); break;
264           case 3: LIT("iii"); break;
265           case 4: LIT("iv"); break;
266           case 5: LIT("v"); break;
267           default: LIT("i"); break;
268 	  }
269           LIT("}{");
270           OUT(list_number_string, false, NORMAL);
271           LIT("}");
272         }
273         CR();
274       }
275     } else {
276       LIT("\\end{");
277       LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize");
278       LIT("}");
279       BLANKLINE();
280     }
281     break;
282 
283   case CMARK_NODE_ITEM:
284     if (entering) {
285       LIT("\\item ");
286     } else {
287       CR();
288     }
289     break;
290 
291   case CMARK_NODE_HEADING:
292     if (entering) {
293       switch (cmark_node_get_heading_level(node)) {
294       case 1:
295         LIT("\\section");
296         break;
297       case 2:
298         LIT("\\subsection");
299         break;
300       case 3:
301         LIT("\\subsubsection");
302         break;
303       case 4:
304         LIT("\\paragraph");
305         break;
306       case 5:
307         LIT("\\subparagraph");
308         break;
309       }
310       LIT("{");
311     } else {
312       LIT("}");
313       BLANKLINE();
314     }
315     break;
316 
317   case CMARK_NODE_CODE_BLOCK:
318     CR();
319     LIT("\\begin{verbatim}");
320     CR();
321     OUT(cmark_node_get_literal(node), false, LITERAL);
322     CR();
323     LIT("\\end{verbatim}");
324     BLANKLINE();
325     break;
326 
327   case CMARK_NODE_HTML_BLOCK:
328     break;
329 
330   case CMARK_NODE_CUSTOM_BLOCK:
331     CR();
332     OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
333         false, LITERAL);
334     CR();
335     break;
336 
337   case CMARK_NODE_THEMATIC_BREAK:
338     BLANKLINE();
339     LIT("\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}");
340     BLANKLINE();
341     break;
342 
343   case CMARK_NODE_PARAGRAPH:
344     if (!entering) {
345       BLANKLINE();
346     }
347     break;
348 
349   case CMARK_NODE_TEXT:
350     OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
351     break;
352 
353   case CMARK_NODE_LINEBREAK:
354     LIT("\\\\");
355     CR();
356     break;
357 
358   case CMARK_NODE_SOFTBREAK:
359     if (options & CMARK_OPT_HARDBREAKS) {
360       LIT("\\\\");
361       CR();
362     } else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) {
363       CR();
364     } else {
365       OUT(" ", allow_wrap, NORMAL);
366     }
367     break;
368 
369   case CMARK_NODE_CODE:
370     LIT("\\texttt{");
371     OUT(cmark_node_get_literal(node), false, NORMAL);
372     LIT("}");
373     break;
374 
375   case CMARK_NODE_HTML_INLINE:
376     break;
377 
378   case CMARK_NODE_CUSTOM_INLINE:
379     OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
380         false, LITERAL);
381     break;
382 
383   case CMARK_NODE_STRONG:
384     if (entering) {
385       LIT("\\textbf{");
386     } else {
387       LIT("}");
388     }
389     break;
390 
391   case CMARK_NODE_EMPH:
392     if (entering) {
393       LIT("\\emph{");
394     } else {
395       LIT("}");
396     }
397     break;
398 
399   case CMARK_NODE_LINK:
400     if (entering) {
401       const char *url = cmark_node_get_url(node);
402       // requires \usepackage{hyperref}
403       switch (get_link_type(node)) {
404       case URL_AUTOLINK:
405         LIT("\\url{");
406         OUT(url, false, URL);
407         LIT("}");
408         return 0; // Don't process further nodes to avoid double-rendering artefacts
409       case EMAIL_AUTOLINK:
410         LIT("\\href{");
411         OUT(url, false, URL);
412         LIT("}\\nolinkurl{");
413         break;
414       case NORMAL_LINK:
415         LIT("\\href{");
416         OUT(url, false, URL);
417         LIT("}{");
418         break;
419       case INTERNAL_LINK:
420         LIT("\\protect\\hyperlink{");
421         OUT(url + 1, false, URL);
422         LIT("}{");
423         break;
424       case NO_LINK:
425         LIT("{"); // error?
426       }
427     } else {
428       LIT("}");
429     }
430 
431     break;
432 
433   case CMARK_NODE_IMAGE:
434     if (entering) {
435       LIT("\\protect\\includegraphics{");
436       // requires \include{graphicx}
437       OUT(cmark_node_get_url(node), false, URL);
438       LIT("}");
439       return 0;
440     }
441     break;
442 
443   default:
444     assert(false);
445     break;
446   }
447 
448   return 1;
449 }
450 
451 char *cmark_render_latex(cmark_node *root, int options, int width) {
452   return cmark_render(root, options, width, outc, S_render_node);
453 }