cmark
My personal build of CMark ✏️
xml.c (7238B)
1 #include <stdlib.h> 2 #include <stdio.h> 3 #include <string.h> 4 #include <assert.h> 5 6 #include "config.h" 7 #include "cmark.h" 8 #include "node.h" 9 #include "buffer.h" 10 11 #define BUFFER_SIZE 100 12 #define MAX_INDENT 40 13 14 // Functions to convert cmark_nodes to XML strings. 15 16 // C0 control characters, U+FFFE and U+FFF aren't allowed in XML. 17 static const char XML_ESCAPE_TABLE[256] = { 18 /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 19 /* 0x10 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20 /* 0x20 */ 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21 /* 0x30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0, 22 /* 0x40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23 /* 0x50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24 /* 0x60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25 /* 0x70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26 /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27 /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28 /* 0xA0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 29 /* 0xB0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 30 /* 0xC0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31 /* 0xD0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32 /* 0xE0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33 /* 0xF0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34 }; 35 36 // U+FFFD Replacement Character encoded in UTF-8 37 #define UTF8_REPL "\xEF\xBF\xBD" 38 39 static const char *XML_ESCAPES[] = { 40 "", UTF8_REPL, """, "&", "<", ">" 41 }; 42 43 static void escape_xml(cmark_strbuf *ob, const unsigned char *src, 44 bufsize_t size) { 45 bufsize_t i = 0, org, esc = 0; 46 47 while (i < size) { 48 org = i; 49 while (i < size && (esc = XML_ESCAPE_TABLE[src[i]]) == 0) 50 i++; 51 52 if (i > org) 53 cmark_strbuf_put(ob, src + org, i - org); 54 55 if (i >= size) 56 break; 57 58 if (esc == 9) { 59 // To replace U+FFFE and U+FFFF with U+FFFD, only the last byte has to 60 // be changed. 61 // We know that src[i] is 0xBE or 0xBF. 62 if (i >= 2 && src[i-2] == 0xEF && src[i-1] == 0xBF) { 63 cmark_strbuf_putc(ob, 0xBD); 64 } else { 65 cmark_strbuf_putc(ob, src[i]); 66 } 67 } else { 68 cmark_strbuf_puts(ob, XML_ESCAPES[esc]); 69 } 70 71 i++; 72 } 73 } 74 75 static void escape_xml_str(cmark_strbuf *dest, const unsigned char *source) { 76 if (source) 77 escape_xml(dest, source, strlen((char *)source)); 78 } 79 80 struct render_state { 81 cmark_strbuf *xml; 82 int indent; 83 }; 84 85 static CMARK_INLINE void indent(struct render_state *state) { 86 int i; 87 for (i = 0; i < state->indent && i < MAX_INDENT; i++) { 88 cmark_strbuf_putc(state->xml, ' '); 89 } 90 } 91 92 static int S_render_node(cmark_node *node, cmark_event_type ev_type, 93 struct render_state *state, int options) { 94 cmark_strbuf *xml = state->xml; 95 bool literal = false; 96 cmark_delim_type delim; 97 bool entering = (ev_type == CMARK_EVENT_ENTER); 98 char buffer[BUFFER_SIZE]; 99 100 if (entering) { 101 indent(state); 102 cmark_strbuf_putc(xml, '<'); 103 cmark_strbuf_puts(xml, cmark_node_get_type_string(node)); 104 105 if (options & CMARK_OPT_SOURCEPOS && node->start_line != 0) { 106 snprintf(buffer, BUFFER_SIZE, " sourcepos=\"%d:%d-%d:%d\"", 107 node->start_line, node->start_column, node->end_line, 108 node->end_column); 109 cmark_strbuf_puts(xml, buffer); 110 } 111 112 literal = false; 113 114 switch (node->type) { 115 case CMARK_NODE_DOCUMENT: 116 cmark_strbuf_puts(xml, " xmlns=\"http://commonmark.org/xml/1.0\""); 117 break; 118 case CMARK_NODE_TEXT: 119 case CMARK_NODE_CODE: 120 case CMARK_NODE_HTML_BLOCK: 121 case CMARK_NODE_HTML_INLINE: 122 cmark_strbuf_puts(xml, " xml:space=\"preserve\">"); 123 escape_xml(xml, node->data, node->len); 124 cmark_strbuf_puts(xml, "</"); 125 cmark_strbuf_puts(xml, cmark_node_get_type_string(node)); 126 literal = true; 127 break; 128 case CMARK_NODE_LIST: 129 switch (cmark_node_get_list_type(node)) { 130 case CMARK_ORDERED_LIST: 131 cmark_strbuf_puts(xml, " type=\"ordered\""); 132 snprintf(buffer, BUFFER_SIZE, " start=\"%d\"", 133 cmark_node_get_list_start(node)); 134 cmark_strbuf_puts(xml, buffer); 135 delim = cmark_node_get_list_delim(node); 136 if (delim == CMARK_PAREN_DELIM) { 137 cmark_strbuf_puts(xml, " delim=\"paren\""); 138 } else if (delim == CMARK_PERIOD_DELIM) { 139 cmark_strbuf_puts(xml, " delim=\"period\""); 140 } 141 break; 142 case CMARK_BULLET_LIST: 143 cmark_strbuf_puts(xml, " type=\"bullet\""); 144 break; 145 default: 146 break; 147 } 148 snprintf(buffer, BUFFER_SIZE, " tight=\"%s\"", 149 (cmark_node_get_list_tight(node) ? "true" : "false")); 150 cmark_strbuf_puts(xml, buffer); 151 break; 152 case CMARK_NODE_HEADING: 153 snprintf(buffer, BUFFER_SIZE, " level=\"%d\"", node->as.heading.level); 154 cmark_strbuf_puts(xml, buffer); 155 break; 156 case CMARK_NODE_CODE_BLOCK: 157 if (node->as.code.info) { 158 cmark_strbuf_puts(xml, " info=\""); 159 escape_xml_str(xml, node->as.code.info); 160 cmark_strbuf_putc(xml, '"'); 161 } 162 cmark_strbuf_puts(xml, " xml:space=\"preserve\">"); 163 escape_xml(xml, node->data, node->len); 164 cmark_strbuf_puts(xml, "</"); 165 cmark_strbuf_puts(xml, cmark_node_get_type_string(node)); 166 literal = true; 167 break; 168 case CMARK_NODE_CUSTOM_BLOCK: 169 case CMARK_NODE_CUSTOM_INLINE: 170 cmark_strbuf_puts(xml, " on_enter=\""); 171 escape_xml_str(xml, node->as.custom.on_enter); 172 cmark_strbuf_putc(xml, '"'); 173 cmark_strbuf_puts(xml, " on_exit=\""); 174 escape_xml_str(xml, node->as.custom.on_exit); 175 cmark_strbuf_putc(xml, '"'); 176 break; 177 case CMARK_NODE_LINK: 178 case CMARK_NODE_IMAGE: 179 cmark_strbuf_puts(xml, " destination=\""); 180 escape_xml_str(xml, node->as.link.url); 181 cmark_strbuf_putc(xml, '"'); 182 if (node->as.link.title) { 183 cmark_strbuf_puts(xml, " title=\""); 184 escape_xml_str(xml, node->as.link.title); 185 cmark_strbuf_putc(xml, '"'); 186 } 187 break; 188 default: 189 break; 190 } 191 if (node->first_child) { 192 state->indent += 2; 193 } else if (!literal) { 194 cmark_strbuf_puts(xml, " /"); 195 } 196 cmark_strbuf_puts(xml, ">\n"); 197 198 } else if (node->first_child) { 199 state->indent -= 2; 200 indent(state); 201 cmark_strbuf_puts(xml, "</"); 202 cmark_strbuf_puts(xml, cmark_node_get_type_string(node)); 203 cmark_strbuf_puts(xml, ">\n"); 204 } 205 206 return 1; 207 } 208 209 char *cmark_render_xml(cmark_node *root, int options) { 210 char *result; 211 cmark_strbuf xml = CMARK_BUF_INIT(root->mem); 212 cmark_event_type ev_type; 213 cmark_node *cur; 214 struct render_state state = {&xml, 0}; 215 216 cmark_iter *iter = cmark_iter_new(root); 217 218 cmark_strbuf_puts(state.xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); 219 cmark_strbuf_puts(state.xml, 220 "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"); 221 while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { 222 cur = cmark_iter_get_node(iter); 223 S_render_node(cur, ev_type, &state, options); 224 } 225 result = (char *)cmark_strbuf_detach(&xml); 226 227 cmark_iter_free(iter); 228 return result; 229 }