cmark

My personal build of CMark ✏️

html.c (9287B)

  1 #include <stdlib.h>
  2 #include <stdio.h>
  3 #include <string.h>
  4 #include <assert.h>
  5 #include "cmark_ctype.h"
  6 #include "config.h"
  7 #include "cmark.h"
  8 #include "node.h"
  9 #include "buffer.h"
 10 #include "houdini.h"
 11 #include "scanners.h"
 12 
 13 #define BUFFER_SIZE 100
 14 
 15 // Functions to convert cmark_nodes to HTML strings.
 16 
 17 static void escape_html(cmark_strbuf *dest, const unsigned char *source,
 18                         bufsize_t length) {
 19   houdini_escape_html0(dest, source, length, 0);
 20 }
 21 
 22 static CMARK_INLINE void cr(cmark_strbuf *html) {
 23   if (html->size && html->ptr[html->size - 1] != '\n')
 24     cmark_strbuf_putc(html, '\n');
 25 }
 26 
 27 struct render_state {
 28   cmark_strbuf *html;
 29   cmark_node *plain;
 30 };
 31 
 32 static void S_render_sourcepos(cmark_node *node, cmark_strbuf *html,
 33                                int options) {
 34   char buffer[BUFFER_SIZE];
 35   if (CMARK_OPT_SOURCEPOS & options) {
 36     snprintf(buffer, BUFFER_SIZE, " data-sourcepos=\"%d:%d-%d:%d\"",
 37              cmark_node_get_start_line(node), cmark_node_get_start_column(node),
 38              cmark_node_get_end_line(node), cmark_node_get_end_column(node));
 39     cmark_strbuf_puts(html, buffer);
 40   }
 41 }
 42 
 43 static int S_render_node(cmark_node *node, cmark_event_type ev_type,
 44                          struct render_state *state, int options) {
 45   cmark_node *parent;
 46   cmark_node *grandparent;
 47   cmark_strbuf *html = state->html;
 48   char start_heading[] = "<h0";
 49   char end_heading[] = "</h0";
 50   bool tight;
 51   char buffer[BUFFER_SIZE];
 52 
 53   bool entering = (ev_type == CMARK_EVENT_ENTER);
 54 
 55   if (state->plain == node) { // back at original node
 56     state->plain = NULL;
 57   }
 58 
 59   if (state->plain != NULL) {
 60     switch (node->type) {
 61     case CMARK_NODE_TEXT:
 62     case CMARK_NODE_CODE:
 63     case CMARK_NODE_HTML_INLINE:
 64       escape_html(html, node->data, node->len);
 65       break;
 66 
 67     case CMARK_NODE_LINEBREAK:
 68     case CMARK_NODE_SOFTBREAK:
 69       cmark_strbuf_putc(html, ' ');
 70       break;
 71 
 72     default:
 73       break;
 74     }
 75     return 1;
 76   }
 77 
 78   switch (node->type) {
 79   case CMARK_NODE_DOCUMENT:
 80     break;
 81 
 82   case CMARK_NODE_BLOCK_QUOTE:
 83     if (entering) {
 84       cr(html);
 85       cmark_strbuf_puts(html, "<blockquote");
 86       S_render_sourcepos(node, html, options);
 87       cmark_strbuf_puts(html, ">\n");
 88     } else {
 89       cr(html);
 90       cmark_strbuf_puts(html, "</blockquote>\n");
 91     }
 92     break;
 93 
 94   case CMARK_NODE_LIST: {
 95     cmark_list_type list_type = (cmark_list_type)node->as.list.list_type;
 96     int start = node->as.list.start;
 97 
 98     if (entering) {
 99       cr(html);
100       if (list_type == CMARK_BULLET_LIST) {
101         cmark_strbuf_puts(html, "<ul");
102         S_render_sourcepos(node, html, options);
103         cmark_strbuf_puts(html, ">\n");
104       } else if (start == 1) {
105         cmark_strbuf_puts(html, "<ol");
106         S_render_sourcepos(node, html, options);
107         cmark_strbuf_puts(html, ">\n");
108       } else {
109         snprintf(buffer, BUFFER_SIZE, "<ol start=\"%d\"", start);
110         cmark_strbuf_puts(html, buffer);
111         S_render_sourcepos(node, html, options);
112         cmark_strbuf_puts(html, ">\n");
113       }
114     } else {
115       cmark_strbuf_puts(html,
116                         list_type == CMARK_BULLET_LIST ? "</ul>\n" : "</ol>\n");
117     }
118     break;
119   }
120 
121   case CMARK_NODE_ITEM:
122     if (entering) {
123       cr(html);
124       cmark_strbuf_puts(html, "<li");
125       S_render_sourcepos(node, html, options);
126       cmark_strbuf_putc(html, '>');
127     } else {
128       cmark_strbuf_puts(html, "</li>\n");
129     }
130     break;
131 
132   case CMARK_NODE_HEADING:
133     if (entering) {
134       cr(html);
135       start_heading[2] = (char)('0' + node->as.heading.level);
136       cmark_strbuf_puts(html, start_heading);
137       S_render_sourcepos(node, html, options);
138       cmark_strbuf_putc(html, '>');
139     } else {
140       end_heading[3] = (char)('0' + node->as.heading.level);
141       cmark_strbuf_puts(html, end_heading);
142       cmark_strbuf_puts(html, ">\n");
143     }
144     break;
145 
146   case CMARK_NODE_CODE_BLOCK:
147     cr(html);
148 
149     if (node->as.code.info == NULL || node->as.code.info[0] == 0) {
150       cmark_strbuf_puts(html, "<div class=\"codeblock\"");
151       S_render_sourcepos(node, html, options);
152       cmark_strbuf_puts(html, "><pre><code>");
153     } else {
154       bufsize_t first_tag = 0;
155       while (node->as.code.info[first_tag] &&
156              !cmark_isspace(node->as.code.info[first_tag])) {
157         first_tag += 1;
158       }
159 
160       cmark_strbuf_puts(html, "<div class=\"codeblock\"");
161       S_render_sourcepos(node, html, options);
162       cmark_strbuf_puts(html, "><pre data-lang=\"");
163       escape_html(html, node->as.code.info, first_tag);
164       cmark_strbuf_puts(html, "\"><code>");
165     }
166 
167     escape_html(html, node->data, node->len);
168     cmark_strbuf_puts(html, "</code></pre></div>\n");
169     break;
170 
171   case CMARK_NODE_HTML_BLOCK:
172     cr(html);
173     if (!(options & CMARK_OPT_UNSAFE)) {
174       cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
175     } else {
176       cmark_strbuf_put(html, node->data, node->len);
177     }
178     cr(html);
179     break;
180 
181   case CMARK_NODE_CUSTOM_BLOCK: {
182     unsigned char *block = entering ? node->as.custom.on_enter :
183                                       node->as.custom.on_exit;
184     cr(html);
185     if (block) {
186       cmark_strbuf_puts(html, (char *)block);
187     }
188     cr(html);
189     break;
190   }
191 
192   case CMARK_NODE_THEMATIC_BREAK:
193     cr(html);
194     cmark_strbuf_puts(html, "<hr");
195     S_render_sourcepos(node, html, options);
196     cmark_strbuf_puts(html, " />\n");
197     break;
198 
199   case CMARK_NODE_PARAGRAPH:
200     parent = cmark_node_parent(node);
201     grandparent = cmark_node_parent(parent);
202     if (grandparent != NULL && grandparent->type == CMARK_NODE_LIST) {
203       tight = grandparent->as.list.tight;
204     } else {
205       tight = false;
206     }
207     if (!tight) {
208       if (entering) {
209         cr(html);
210         cmark_strbuf_puts(html, "<p");
211         S_render_sourcepos(node, html, options);
212         cmark_strbuf_putc(html, '>');
213       } else {
214         cmark_strbuf_puts(html, "</p>\n");
215       }
216     }
217     break;
218 
219   case CMARK_NODE_TEXT:
220     escape_html(html, node->data, node->len);
221     break;
222 
223   case CMARK_NODE_LINEBREAK:
224     cmark_strbuf_puts(html, "<br />\n");
225     break;
226 
227   case CMARK_NODE_SOFTBREAK:
228     if (options & CMARK_OPT_HARDBREAKS) {
229       cmark_strbuf_puts(html, "<br />\n");
230     } else if (options & CMARK_OPT_NOBREAKS) {
231       cmark_strbuf_putc(html, ' ');
232     } else {
233       cmark_strbuf_putc(html, '\n');
234     }
235     break;
236 
237   case CMARK_NODE_CODE:
238     cmark_strbuf_puts(html, "<code class=\"inline\">");
239     escape_html(html, node->data, node->len);
240     cmark_strbuf_puts(html, "</code>");
241     break;
242 
243   case CMARK_NODE_HTML_INLINE:
244     if (!(options & CMARK_OPT_UNSAFE)) {
245       cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
246     } else {
247       cmark_strbuf_put(html, node->data, node->len);
248     }
249     break;
250 
251   case CMARK_NODE_CUSTOM_INLINE: {
252     unsigned char *block = entering ? node->as.custom.on_enter :
253                                       node->as.custom.on_exit;
254     if (block) {
255       cmark_strbuf_puts(html, (char *)block);
256     }
257     break;
258   }
259 
260   case CMARK_NODE_STRONG:
261     if (entering) {
262       cmark_strbuf_puts(html, "<strong>");
263     } else {
264       cmark_strbuf_puts(html, "</strong>");
265     }
266     break;
267 
268   case CMARK_NODE_EMPH:
269     if (entering) {
270       cmark_strbuf_puts(html, "<em>");
271     } else {
272       cmark_strbuf_puts(html, "</em>");
273     }
274     break;
275 
276   case CMARK_NODE_LINK:
277     if (entering) {
278       cmark_strbuf_puts(html, "<a href=\"");
279       if (node->as.link.url && ((options & CMARK_OPT_UNSAFE) ||
280                                 !(_scan_dangerous_url(node->as.link.url)))) {
281         houdini_escape_href(html, node->as.link.url,
282                             strlen((char *)node->as.link.url));
283       }
284       if (node->as.link.title) {
285         cmark_strbuf_puts(html, "\" title=\"");
286         escape_html(html, node->as.link.title,
287                     strlen((char *)node->as.link.title));
288       }
289       cmark_strbuf_puts(html, "\">");
290     } else {
291       cmark_strbuf_puts(html, "</a>");
292     }
293     break;
294 
295   case CMARK_NODE_IMAGE:
296     if (entering) {
297       cmark_strbuf_puts(html, "<img src=\"");
298       if (node->as.link.url && ((options & CMARK_OPT_UNSAFE) ||
299                                 !(_scan_dangerous_url(node->as.link.url)))) {
300         houdini_escape_href(html, node->as.link.url,
301                             strlen((char *)node->as.link.url));
302       }
303       cmark_strbuf_puts(html, "\" alt=\"");
304       state->plain = node;
305     } else {
306       if (node->as.link.title) {
307         cmark_strbuf_puts(html, "\" title=\"");
308         escape_html(html, node->as.link.title,
309                     strlen((char *)node->as.link.title));
310       }
311 
312       cmark_strbuf_puts(html, "\" />");
313     }
314     break;
315 
316   default:
317     assert(false);
318     break;
319   }
320 
321   // cmark_strbuf_putc(html, 'x');
322   return 1;
323 }
324 
325 char *cmark_render_html(cmark_node *root, int options) {
326   char *result;
327   cmark_strbuf html = CMARK_BUF_INIT(root->mem);
328   cmark_event_type ev_type;
329   cmark_node *cur;
330   struct render_state state = {&html, NULL};
331   cmark_iter *iter = cmark_iter_new(root);
332 
333   while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
334     cur = cmark_iter_get_node(iter);
335     S_render_node(cur, ev_type, &state, options);
336   }
337   result = (char *)cmark_strbuf_detach(&html);
338 
339   cmark_iter_free(iter);
340   return result;
341 }