yagit

Yet another static site generator for Git 🙀️

Commit
72a0e3104448a5ca0fcd2f5d3883cae40d2dea74
Parent
33c3ff8f19757b5f262df820d5e1188f7862e6c2
Author
Pablo <pablo-pie@riseup.net>
Date

Implemented SIMD-optimized escaping

Diffstats

3 files changed, 191 insertions, 21 deletions

Status Name Changes Insertions Deletions
Added src/escape.rs 1 file changed 187 0
Modified src/main.rs 2 files changed 2 21
Modified src/markdown.rs 2 files changed 2 0
diff --git /dev/null b/src/escape.rs
@@ -0,0 +1,187 @@
+//! HTML Escaping
+//!
+//! Stolen from pulldown-cmark-escape
+//! <https://github.com/pulldown-cmark/pulldown-cmark/>
+
+use std::fmt::{self, Display};
+
+static ESCAPE_TABLE: [Option<&str>; 256] = create_html_escape_table();
+
+/// A wrapper for HTML-escaped strings
+pub struct Escaped<'a>(pub &'a str);
+
+// stolen from pulldown-cmark-escape
+impl Display for Escaped<'_> {
+  #[cfg(target_arch = "x86_64")]
+  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    // The SIMD accelerated code uses the PSHUFB instruction, which is part
+    // of the SSSE3 instruction set. Further, we can only use this code if
+    // the buffer is at least one VECTOR_SIZE in length to prevent reading
+    // out of bounds
+    if is_x86_feature_detected!("ssse3") && self.0.len() >= simd::VECTOR_SIZE {
+      simd::fmt_escaped_html(self.0, f)
+    } else {
+      fmt_escaped_html_scalar(self.0, f)
+    }
+  }
+
+  #[cfg(not(target_arch = "x86_64"))]
+  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    fmt_escaped_html_scalar(self.0, f)
+  }
+}
+
+fn fmt_escaped_html_scalar(
+  s: &str,
+  f: &mut fmt::Formatter<'_>
+) -> fmt::Result {
+  for c in s.chars() {
+    if let Some(replacement) = ESCAPE_TABLE[c as usize] {
+      f.write_str(replacement)?;
+    } else {
+      c.fmt(f)?;
+    }
+  }
+
+  Ok(())
+}
+
+const fn create_html_escape_table() -> [Option<&'static str>; 256] {
+  let mut table = [None; 256];
+  table[b'<'  as usize] = Some("&lt;");
+  table[b'>'  as usize] = Some("&gt;");
+  table[b'&'  as usize] = Some("&amp;");
+  table[b'"'  as usize] = Some("&quot;");
+  table[b'\'' as usize] = Some("&apos;");
+  table
+}
+
+// stolen from pulldown-cmark-escape
+#[cfg(target_arch = "x86_64")]
+mod simd {
+  use std::{
+    arch::x86_64::{
+      __m128i,
+      _mm_loadu_si128,
+      _mm_shuffle_epi8,
+      _mm_cmpeq_epi8,
+      _mm_movemask_epi8,
+    },
+    mem,
+    fmt,
+  };
+
+  pub const VECTOR_SIZE: usize = mem::size_of::<__m128i>();
+
+  pub fn fmt_escaped_html(s: &str, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    let bytes = s.as_bytes();
+    let mut mark = 0;
+
+    unsafe {
+      foreach_special_simd(bytes, 0, |i| {
+        let c = *bytes.get_unchecked(i) as usize;
+        let entry = super::ESCAPE_TABLE[c];
+        f.write_str(s.get_unchecked(mark..i))?;
+        mark = i + 1; // all escaped characters are ASCII
+        if let Some(replacement) = entry {
+          f.write_str(replacement)
+        } else {
+          f.write_str(s.get_unchecked(i..mark))
+        } 
+      })?;
+      f.write_str(s.get_unchecked(mark..))
+    }
+  }
+
+  unsafe fn foreach_special_simd<F: FnMut(usize) -> fmt::Result>(
+    bytes: &[u8],
+    mut offset: usize,
+    mut callback: F,
+  ) -> fmt::Result {
+    // The strategy here is to walk the byte buffer in chunks of
+    // VECTOR_SIZE (16) bytes at a time starting at the given offset.
+    // For each chunk, we compute a a bitmask indicating whether the
+    // corresponding byte is a HTML special byte. We then iterate over all
+    // the 1 bits in this mask and call the callback function with the
+    // corresponding index in the buffer.
+    //
+    // When the number of HTML special bytes in the buffer is relatively low,
+    // this allows us to quickly go through the buffer without a lookup and
+    // for every single byte.
+
+    debug_assert!(bytes.len() >= VECTOR_SIZE);
+    let upperbound = bytes.len() - VECTOR_SIZE;
+    while offset < upperbound {
+      let mut mask = compute_mask(bytes, offset);
+      while mask != 0 {
+        let ix = mask.trailing_zeros();
+        callback(offset + ix as usize)?;
+        mask ^= mask & -mask;
+      }
+      offset += VECTOR_SIZE;
+    }
+
+    // Final iteration. We align the read with the end of the slice and
+    // shift off the bytes at start we have already scanned.
+    let mut mask = compute_mask(bytes, upperbound);
+    mask >>= offset - upperbound;
+    while mask != 0 {
+      let ix = mask.trailing_zeros();
+      callback(offset + ix as usize)?;
+      mask ^= mask & -mask;
+    }
+    Ok(())
+  }
+
+  #[target_feature(enable = "ssse3")]
+  /// Computes a byte mask at given offset in the byte buffer. Its first 16
+  /// (least significant) bits correspond to whether there is an HTML special
+  /// byte (&, <, ", >) at the 16 bytes `bytes[offset..]`. For example, the
+  /// mask `(1 << 3)` states that there is an HTML byte at `offset + 3`. It is
+  /// only safe to call this function when `bytes.len() >= offset +
+  /// VECTOR_SIZE`.
+  unsafe fn compute_mask(bytes: &[u8], offset: usize) -> i32 {
+    debug_assert!(bytes.len() >= offset + VECTOR_SIZE);
+
+    let table = create_lookup();
+    let lookup = _mm_loadu_si128(table.as_ptr() as *const __m128i);
+    let raw_ptr = bytes.as_ptr().add(offset) as *const __m128i;
+
+    // Load the vector from memory.
+    let vector = _mm_loadu_si128(raw_ptr);
+
+    // We take the least significant 4 bits of every byte and use them as
+    // indices to map into the lookup vector.
+    //
+    // Note that shuffle maps bytes with their most significant bit set to
+    // lookup[0]. Bytes that share their lower nibble with an HTML special
+    // byte get mapped to that corresponding special byte. Note that all HTML
+    // special bytes have distinct lower nibbles. Other bytes either get
+    // mapped to 0 or 127.
+    let expected = _mm_shuffle_epi8(lookup, vector);
+
+    // We compare the original vector to the mapped output. Bytes that shared
+    // a lower nibble with an HTML special byte match *only* if they are that
+    // special byte. Bytes that have either a 0 lower nibble or their most
+    // significant bit set were mapped to 127 and will hence never match. All
+    // other bytes have non-zero lower nibbles but were mapped to 0 and will
+    // therefore also not match.
+    let matches = _mm_cmpeq_epi8(expected, vector);
+
+    // Translate matches to a bitmask, where every 1 corresponds to a HTML
+    // special character and a 0 is a non-HTML byte.
+    _mm_movemask_epi8(matches)
+  }
+
+  /// Creates the lookup table for use in `compute_mask`.
+  const fn create_lookup() -> [u8; 16] {
+    let mut table = [0; 16];
+    table[(b'<' & 0x0f)  as usize] = b'<';
+    table[(b'>' & 0x0f)  as usize] = b'>';
+    table[(b'&' & 0x0f)  as usize] = b'&';
+    table[(b'"' & 0x0f)  as usize] = b'"';
+    table[(b'\'' & 0x0f) as usize] = b'\'';
+    table[0] = 0b0111_1111;
+    table
+  }
+}
diff --git a/src/main.rs b/src/main.rs
@@ -29,6 +29,7 @@ use git2::{
 use time::{DateTime, Date, FullDate};
 use command::{Cmd, SubCmd, Flags};
 use config::{TREE_SUBDIR, BLOB_SUBDIR, COMMIT_SUBDIR};
+use escape::Escaped;
 
 #[cfg(not(debug_assertions))]
 use std::borrow::Cow;
@@ -36,6 +37,7 @@ use std::borrow::Cow;
 #[macro_use]
 mod log;
 
+mod escape;
 mod markdown;
 mod time;
 mod command;
@@ -44,27 +46,6 @@ mod config;
 const README_NAMES: &[&str] = &["README", "README.txt", "README.md"];
 const LICENSE_NAME: &str    = "LICENSE";
 
-/// A wrapper for HTML-escaped strings
-struct Escaped<'a>(pub &'a str);
-
-impl Display for Escaped<'_> {
-  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
-    // TODO: [optimize]: use SIMD for this?
-    for c in self.0.chars() {
-      match c {
-        '<'  => write!(f, "&lt;")?,
-        '>'  => write!(f, "&gt;")?,
-        '&'  => write!(f, "&amp;")?,
-        '"'  => write!(f, "&quot;")?,
-        '\'' => write!(f, "&apos;")?,
-        c    => c.fmt(f)?,
-      }
-    }
-
-    Ok(())
-  }
-}
-
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 enum PageTitle<'a> {
   Index,
diff --git a/src/markdown.rs b/src/markdown.rs
@@ -9,6 +9,7 @@ struct State {
 }
 
 // Addapted from pulldown_cmark/html.rs
+// <https://github.com/pulldown-cmark/pulldown-cmark/>
 pub fn render_html<W: Write>(w: &mut W, src: &String) -> io::Result<()> {
   let mut opt = Options::empty();
   opt.insert(Options::ENABLE_TABLES);
@@ -62,6 +63,7 @@ pub fn render_html(w: &mut W, src: &String) -> io::Result<()> {
 }
 
 // Addapted from pulldown_cmark/html.rs
+// <https://github.com/pulldown-cmark/pulldown-cmark/>
 /// Returns `Ok(t)` if successful,
 /// where `t` indicates whether or not we are in a non-writting block
 fn start_tag<W: Write>(