cmark
My personal build of CMark ✏️
- Commit
- bb26b18173df983c57459809e8b1691b89907a58
- Parent
- c6417fc0b9cd240eb175501d44f68ea9d4406ec4
- Author
- John MacFarlane <jgm@berkeley.edu>
- Date
Added CMARK_OPT_SMARTPUNCT and --smart option.
So far this is only implemented for the HTML renderer.
Ultimately some of this should be factored out into a form that
can be used in other renderers.
Diffstat
4 files changed, 106 insertions, 3 deletions
diff --git a/src/html.c b/src/html.c
@@ -6,6 +6,7 @@
#include "config.h"
#include "cmark.h"
#include "node.h"
+#include "utf8.h"
#include "buffer.h"
#include "houdini.h"
@@ -60,6 +61,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
char start_header[] = "<h0";
char end_header[] = "</h0";
bool tight;
+ int lastout, i;
+ cmark_chunk lit;
+ char before_char, after_char, c;
+ bool left_flanking, right_flanking;
bool entering = (ev_type == CMARK_EVENT_ENTER);
@@ -217,8 +222,87 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
break;
case CMARK_NODE_TEXT:
- escape_html(html, node->as.literal.data,
- node->as.literal.len);
+ if (options & CMARK_OPT_SMARTPUNCT) {
+ lastout = 0;
+ i = 0;
+ lit = node->as.literal;
+ while (i < lit.len) {
+ c = lit.data[i];
+ // replace with efficient lookup table:
+ if (c != '"' && c != '-' && c != '\'' && c != '.') {
+ i++;
+ continue;
+ }
+ escape_html(html, lit.data + lastout,
+ i - lastout);
+ if (c == '\'' || c == '"') {
+ before_char = i == 0 ? ',' : lit.data[i - 1];
+ after_char = i == lit.len - 1 ? ',' : lit.data[i + 1];
+ left_flanking = !utf8proc_is_space(after_char) &&
+ !(utf8proc_is_punctuation(after_char) &&
+ !utf8proc_is_space(before_char) &&
+ !utf8proc_is_punctuation(before_char));
+ right_flanking = !utf8proc_is_space(before_char) &&
+ !(utf8proc_is_punctuation(before_char) &&
+ !utf8proc_is_space(after_char) &&
+ !utf8proc_is_punctuation(after_char));
+ }
+ switch (lit.data[i]) {
+ case '"':
+ if (right_flanking) {
+ cmark_strbuf_puts(html, "”");
+ } else {
+ cmark_strbuf_puts(html, "“");
+ }
+ i += 1;
+ break;
+ case '\'':
+ if (left_flanking && !right_flanking) {
+ cmark_strbuf_puts(html, "‘");
+ } else {
+ cmark_strbuf_puts(html, "’");
+ }
+ i += 1;
+ break;
+ case '-':
+ if (i < lit.len - 1 && lit.data[i + 1] == '-') {
+ if (lit.data[i + 2] == '-') {
+ cmark_strbuf_puts(html,
+ "—");
+ i += 3;
+ } else {
+ cmark_strbuf_puts(html, "–");
+ i += 2;
+ }
+ } else {
+ cmark_strbuf_putc(html, c);
+ i += 1;
+ }
+ break;
+ case '.':
+ if (i < lit.len - 2 && lit.data[i + 1] == '.' &&
+ lit.data[i + 2] == '.') {
+ cmark_strbuf_puts(html,
+ "…");
+ i += 3;
+ } else {
+ cmark_strbuf_putc(html, c);
+ i += 1;
+ }
+ break;
+ default:
+ cmark_strbuf_putc(html, c);
+ i++;
+ }
+ lastout = i;
+ }
+ escape_html(html, node->as.literal.data + lastout,
+ i - lastout);
+
+ } else {
+ escape_html(html, node->as.literal.data,
+ node->as.literal.len);
+ }
break;
case CMARK_NODE_LINEBREAK:
diff --git a/src/main.c b/src/main.c
@@ -26,6 +26,7 @@ void print_usage()
printf(" --to, -t FORMAT Specify output format (html, xml, man)\n");
printf(" --sourcepos Include source position attribute\n");
printf(" --hardbreaks Treat newlines as hard line breaks\n");
+ printf(" --smart Use smart punctuation\n");
printf(" --normalize Consolidate adjacent text nodes\n");
printf(" --help, -h Print usage information\n");
printf(" --version Print version\n");
@@ -80,6 +81,8 @@ int main(int argc, char *argv[])
options |= CMARK_OPT_SOURCEPOS;
} else if (strcmp(argv[i], "--hardbreaks") == 0) {
options |= CMARK_OPT_HARDBREAKS;
+ } else if (strcmp(argv[i], "--smart") == 0) {
+ options |= CMARK_OPT_SMARTPUNCT;
} else if (strcmp(argv[i], "--normalize") == 0) {
options |= CMARK_OPT_NORMALIZE;
} else if ((strcmp(argv[i], "--help") == 0) ||