cmark

My personal build of CMark ✏️

Commit
d51bb0e45be412f1f36b010f7458d2c392e28f2b
Parent
55d7b8808a7bba08345bcc94b7a462176c182fc9
Author
John MacFarlane <jgm@berkeley.edu>
Date

Optimize S_find_first_nonspace.

We were needlessly redoing things we'd already done. Now we skip the work if the first nonspace is greater than the current offset.

This fixes pathological slowdown with deeply nested lists (#255). For N = 3000, the time goes from over 17s to about 0.7s.

Thanks to @mity for diagnosing the problem.

Diffstat

1 file changed, 19 insertions, 14 deletions

Status File Name N° Changes Insertions Deletions
Modified src/blocks.c 33 19 14
diff --git a/src/blocks.c b/src/blocks.c
@@ -615,22 +615,24 @@ static void S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input) {
   char c;
   int chars_to_tab = TAB_STOP - (parser->column % TAB_STOP);
 
-  parser->first_nonspace = parser->offset;
-  parser->first_nonspace_column = parser->column;
-  while ((c = peek_at(input, parser->first_nonspace))) {
-    if (c == ' ') {
-      parser->first_nonspace += 1;
-      parser->first_nonspace_column += 1;
-      chars_to_tab = chars_to_tab - 1;
-      if (chars_to_tab == 0) {
+  if (parser->first_nonspace <= parser->offset) {
+    parser->first_nonspace = parser->offset;
+    parser->first_nonspace_column = parser->column;
+    while ((c = peek_at(input, parser->first_nonspace))) {
+      if (c == ' ') {
+        parser->first_nonspace += 1;
+        parser->first_nonspace_column += 1;
+        chars_to_tab = chars_to_tab - 1;
+        if (chars_to_tab == 0) {
+          chars_to_tab = TAB_STOP;
+        }
+      } else if (c == '\t') {
+        parser->first_nonspace += 1;
+        parser->first_nonspace_column += chars_to_tab;
         chars_to_tab = TAB_STOP;
+      } else {
+        break;
       }
-    } else if (c == '\t') {
-      parser->first_nonspace += 1;
-      parser->first_nonspace_column += chars_to_tab;
-      chars_to_tab = TAB_STOP;
-    } else {
-      break;
     }
   }
 
@@ -1160,6 +1162,9 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
 
   parser->offset = 0;
   parser->column = 0;
+  parser->first_nonspace = 0;
+  parser->first_nonspace_column = 0;
+  parser->indent = 0;
   parser->blank = false;
   parser->partially_consumed_tab = false;