cmark
My personal build of CMark ✏️
houdini_href_e.c (3016B)
1 #include <assert.h> 2 #include <stdio.h> 3 #include <string.h> 4 5 #include "houdini.h" 6 7 /* 8 * The following characters will not be escaped: 9 * 10 * -_.+!*'(),%#@?=;:/,+&$ alphanum 11 * 12 * Note that this character set is the addition of: 13 * 14 * - The characters which are safe to be in an URL 15 * - The characters which are *not* safe to be in 16 * an URL because they are RESERVED characters. 17 * 18 * We assume (lazily) that any RESERVED char that 19 * appears inside an URL is actually meant to 20 * have its native function (i.e. as an URL 21 * component/separator) and hence needs no escaping. 22 * 23 * There are two exceptions: the characters & (amp) 24 * and ' (single quote) do not appear in the table. 25 * They are meant to appear in the URL as components, 26 * yet they require special HTML-entity escaping 27 * to generate valid HTML markup. 28 * 29 * All other characters will be escaped to %XX. 30 * 31 */ 32 static const char HREF_SAFE[] = { 33 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 37 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 38 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44 }; 45 46 int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { 47 static const uint8_t hex_chars[] = "0123456789ABCDEF"; 48 bufsize_t i = 0, org; 49 uint8_t hex_str[3]; 50 51 hex_str[0] = '%'; 52 53 while (i < size) { 54 org = i; 55 while (i < size && HREF_SAFE[src[i]] != 0) 56 i++; 57 58 if (likely(i > org)) 59 cmark_strbuf_put(ob, src + org, i - org); 60 61 /* escaping */ 62 if (i >= size) 63 break; 64 65 switch (src[i]) { 66 /* amp appears all the time in URLs, but needs 67 * HTML-entity escaping to be inside an href */ 68 case '&': 69 cmark_strbuf_puts(ob, "&"); 70 break; 71 72 /* the single quote is a valid URL character 73 * according to the standard; it needs HTML 74 * entity escaping too */ 75 case '\'': 76 cmark_strbuf_puts(ob, "'"); 77 break; 78 79 /* the space can be escaped to %20 or a plus 80 * sign. we're going with the generic escape 81 * for now. the plus thing is more commonly seen 82 * when building GET strings */ 83 #if 0 84 case ' ': 85 cmark_strbuf_putc(ob, '+'); 86 break; 87 #endif 88 89 /* every other character goes with a %XX escaping */ 90 default: 91 hex_str[1] = hex_chars[(src[i] >> 4) & 0xF]; 92 hex_str[2] = hex_chars[src[i] & 0xF]; 93 cmark_strbuf_put(ob, hex_str, 3); 94 } 95 96 i++; 97 } 98 99 return 1; 100 }