cmark

My personal build of CMark ✏️

xml.c (7238B)

  1 #include <stdlib.h>
  2 #include <stdio.h>
  3 #include <string.h>
  4 #include <assert.h>
  5 
  6 #include "config.h"
  7 #include "cmark.h"
  8 #include "node.h"
  9 #include "buffer.h"
 10 
 11 #define BUFFER_SIZE 100
 12 #define MAX_INDENT 40
 13 
 14 // Functions to convert cmark_nodes to XML strings.
 15 
 16 // C0 control characters, U+FFFE and U+FFF aren't allowed in XML.
 17 static const char XML_ESCAPE_TABLE[256] = {
 18     /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1,
 19     /* 0x10 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 20     /* 0x20 */ 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 21     /* 0x30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0,
 22     /* 0x40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 23     /* 0x50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 24     /* 0x60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 25     /* 0x70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 26     /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 27     /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 28     /* 0xA0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 29     /* 0xB0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9,
 30     /* 0xC0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 31     /* 0xD0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 32     /* 0xE0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 33     /* 0xF0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 34 };
 35 
 36 // U+FFFD Replacement Character encoded in UTF-8
 37 #define UTF8_REPL "\xEF\xBF\xBD"
 38 
 39 static const char *XML_ESCAPES[] = {
 40   "", UTF8_REPL, "&quot;", "&amp;", "&lt;", "&gt;"
 41 };
 42 
 43 static void escape_xml(cmark_strbuf *ob, const unsigned char *src,
 44                        bufsize_t size) {
 45   bufsize_t i = 0, org, esc = 0;
 46 
 47   while (i < size) {
 48     org = i;
 49     while (i < size && (esc = XML_ESCAPE_TABLE[src[i]]) == 0)
 50       i++;
 51 
 52     if (i > org)
 53       cmark_strbuf_put(ob, src + org, i - org);
 54 
 55     if (i >= size)
 56       break;
 57 
 58     if (esc == 9) {
 59       // To replace U+FFFE and U+FFFF with U+FFFD, only the last byte has to
 60       // be changed.
 61       // We know that src[i] is 0xBE or 0xBF.
 62       if (i >= 2 && src[i-2] == 0xEF && src[i-1] == 0xBF) {
 63         cmark_strbuf_putc(ob, 0xBD);
 64       } else {
 65         cmark_strbuf_putc(ob, src[i]);
 66       }
 67     } else {
 68       cmark_strbuf_puts(ob, XML_ESCAPES[esc]);
 69     }
 70 
 71     i++;
 72   }
 73 }
 74 
 75 static void escape_xml_str(cmark_strbuf *dest, const unsigned char *source) {
 76   if (source)
 77     escape_xml(dest, source, strlen((char *)source));
 78 }
 79 
 80 struct render_state {
 81   cmark_strbuf *xml;
 82   int indent;
 83 };
 84 
 85 static CMARK_INLINE void indent(struct render_state *state) {
 86   int i;
 87   for (i = 0; i < state->indent && i < MAX_INDENT; i++) {
 88     cmark_strbuf_putc(state->xml, ' ');
 89   }
 90 }
 91 
 92 static int S_render_node(cmark_node *node, cmark_event_type ev_type,
 93                          struct render_state *state, int options) {
 94   cmark_strbuf *xml = state->xml;
 95   bool literal = false;
 96   cmark_delim_type delim;
 97   bool entering = (ev_type == CMARK_EVENT_ENTER);
 98   char buffer[BUFFER_SIZE];
 99 
100   if (entering) {
101     indent(state);
102     cmark_strbuf_putc(xml, '<');
103     cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
104 
105     if (options & CMARK_OPT_SOURCEPOS && node->start_line != 0) {
106       snprintf(buffer, BUFFER_SIZE, " sourcepos=\"%d:%d-%d:%d\"",
107                node->start_line, node->start_column, node->end_line,
108                node->end_column);
109       cmark_strbuf_puts(xml, buffer);
110     }
111 
112     literal = false;
113 
114     switch (node->type) {
115     case CMARK_NODE_DOCUMENT:
116       cmark_strbuf_puts(xml, " xmlns=\"http://commonmark.org/xml/1.0\"");
117       break;
118     case CMARK_NODE_TEXT:
119     case CMARK_NODE_CODE:
120     case CMARK_NODE_HTML_BLOCK:
121     case CMARK_NODE_HTML_INLINE:
122       cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
123       escape_xml(xml, node->data, node->len);
124       cmark_strbuf_puts(xml, "</");
125       cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
126       literal = true;
127       break;
128     case CMARK_NODE_LIST:
129       switch (cmark_node_get_list_type(node)) {
130       case CMARK_ORDERED_LIST:
131         cmark_strbuf_puts(xml, " type=\"ordered\"");
132         snprintf(buffer, BUFFER_SIZE, " start=\"%d\"",
133                  cmark_node_get_list_start(node));
134         cmark_strbuf_puts(xml, buffer);
135         delim = cmark_node_get_list_delim(node);
136         if (delim == CMARK_PAREN_DELIM) {
137           cmark_strbuf_puts(xml, " delim=\"paren\"");
138         } else if (delim == CMARK_PERIOD_DELIM) {
139           cmark_strbuf_puts(xml, " delim=\"period\"");
140         }
141         break;
142       case CMARK_BULLET_LIST:
143         cmark_strbuf_puts(xml, " type=\"bullet\"");
144         break;
145       default:
146         break;
147       }
148       snprintf(buffer, BUFFER_SIZE, " tight=\"%s\"",
149                (cmark_node_get_list_tight(node) ? "true" : "false"));
150       cmark_strbuf_puts(xml, buffer);
151       break;
152     case CMARK_NODE_HEADING:
153       snprintf(buffer, BUFFER_SIZE, " level=\"%d\"", node->as.heading.level);
154       cmark_strbuf_puts(xml, buffer);
155       break;
156     case CMARK_NODE_CODE_BLOCK:
157       if (node->as.code.info) {
158         cmark_strbuf_puts(xml, " info=\"");
159         escape_xml_str(xml, node->as.code.info);
160         cmark_strbuf_putc(xml, '"');
161       }
162       cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
163       escape_xml(xml, node->data, node->len);
164       cmark_strbuf_puts(xml, "</");
165       cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
166       literal = true;
167       break;
168     case CMARK_NODE_CUSTOM_BLOCK:
169     case CMARK_NODE_CUSTOM_INLINE:
170       cmark_strbuf_puts(xml, " on_enter=\"");
171       escape_xml_str(xml, node->as.custom.on_enter);
172       cmark_strbuf_putc(xml, '"');
173       cmark_strbuf_puts(xml, " on_exit=\"");
174       escape_xml_str(xml, node->as.custom.on_exit);
175       cmark_strbuf_putc(xml, '"');
176       break;
177     case CMARK_NODE_LINK:
178     case CMARK_NODE_IMAGE:
179       cmark_strbuf_puts(xml, " destination=\"");
180       escape_xml_str(xml, node->as.link.url);
181       cmark_strbuf_putc(xml, '"');
182       if (node->as.link.title) {
183         cmark_strbuf_puts(xml, " title=\"");
184         escape_xml_str(xml, node->as.link.title);
185         cmark_strbuf_putc(xml, '"');
186       }
187       break;
188     default:
189       break;
190     }
191     if (node->first_child) {
192       state->indent += 2;
193     } else if (!literal) {
194       cmark_strbuf_puts(xml, " /");
195     }
196     cmark_strbuf_puts(xml, ">\n");
197 
198   } else if (node->first_child) {
199     state->indent -= 2;
200     indent(state);
201     cmark_strbuf_puts(xml, "</");
202     cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
203     cmark_strbuf_puts(xml, ">\n");
204   }
205 
206   return 1;
207 }
208 
209 char *cmark_render_xml(cmark_node *root, int options) {
210   char *result;
211   cmark_strbuf xml = CMARK_BUF_INIT(root->mem);
212   cmark_event_type ev_type;
213   cmark_node *cur;
214   struct render_state state = {&xml, 0};
215 
216   cmark_iter *iter = cmark_iter_new(root);
217 
218   cmark_strbuf_puts(state.xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
219   cmark_strbuf_puts(state.xml,
220                     "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n");
221   while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
222     cur = cmark_iter_get_node(iter);
223     S_render_node(cur, ev_type, &state, options);
224   }
225   result = (char *)cmark_strbuf_detach(&xml);
226 
227   cmark_iter_free(iter);
228   return result;
229 }