cmark

My personal build of CMark ✏️

Commit
a79b2ed9b7051b259f1ab874e5168a853723c314
Parent
52b2f3e8c7f8d039eb1ba509a75f98046afb4c5e
Author
John MacFarlane <jgm@berkeley.edu>
Date

Revert "Merge pull request #58 from nwellnhof/optimize_utf8proc_detab"

This reverts commit 54d1249c2caebf45a24d691dc765fb93c9a5e594, reversing changes made to bc14d869323650e936c7143dcf941b28ccd5b57d.

Diffstat

1 file changed, 37 insertions, 34 deletions

Status File Name N° Changes Insertions Deletions
Modified src/utf8.c 71 37 34
diff --git a/src/utf8.c b/src/utf8.c
@@ -54,11 +54,9 @@ static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len)
 }
 
 // Validate a single UTF-8 character according to RFC 3629.
-// Assumes a multi-byte UTF-8 sequence.
 static int utf8proc_valid(const uint8_t *str, bufsize_t str_len)
 {
 	int length = utf8proc_utf8class[str[0]];
-	assert(length != 1);
 
 	if (!length)
 		return -1;
@@ -66,48 +64,53 @@ static int utf8proc_valid(const uint8_t *str, bufsize_t str_len)
 	if ((bufsize_t)length > str_len)
 		return -str_len;
 
-	if ((str[1] & 0xC0) != 0x80)
-		return -1;
-
-	if (length == 2) {
+	switch (length) {
+	case 2:
+		if ((str[1] & 0xC0) != 0x80)
+			return -1;
 		if (str[0] < 0xC2) {
 			// Overlong
 			return -length;
 		}
-	}
-	else {
+		break;
+
+	case 3:
+		if ((str[1] & 0xC0) != 0x80)
+			return -1;
 		if ((str[2] & 0xC0) != 0x80)
 			return -2;
-
-		if (length == 3) {
-			if (str[0] == 0xE0) {
-				if (str[1] < 0xA0) {
-					// Overlong
-					return -length;
-				}
-			} else if (str[0] == 0xED) {
-				if (str[1] >= 0xA0) {
-					// Surrogate
-					return -length;
-				}
+		if (str[0] == 0xE0) {
+			if (str[1] < 0xA0) {
+				// Overlong
+				return -length;
+			}
+		} else if (str[0] == 0xED) {
+			if (str[1] >= 0xA0) {
+				// Surrogate
+				return -length;
 			}
 		}
-		else {
-			if ((str[3] & 0xC0) != 0x80)
-				return -3;
-
-			if (str[0] == 0xF0) {
-				if (str[1] < 0x90) {
-					// Overlong
-					return -length;
-				}
-			} else if (str[0] >= 0xF4) {
-				if (str[0] > 0xF4 || str[1] >= 0x90) {
-					// Above 0x10FFFF
-					return -length;
-				}
+		break;
+
+	case 4:
+		if ((str[1] & 0xC0) != 0x80)
+			return -1;
+		if ((str[2] & 0xC0) != 0x80)
+			return -2;
+		if ((str[3] & 0xC0) != 0x80)
+			return -3;
+		if (str[0] == 0xF0) {
+			if (str[1] < 0x90) {
+				// Overlong
+				return -length;
+			}
+		} else if (str[0] >= 0xF4) {
+			if (str[0] > 0xF4 || str[1] >= 0x90) {
+				// Above 0x10FFFF
+				return -length;
 			}
 		}
+		break;
 	}
 
 	return length;