From 673ea5a0cc44b425610612dfcbaa83cdf8fc25ac Mon Sep 17 00:00:00 2001
From: LongYinan <lynweklm@gmail.com>
Date: Mon, 29 Jul 2024 01:03:54 +0800
Subject: [PATCH] use v_jsonescape

---
 Cargo.lock                         |  16 ++
 Cargo.toml                         |   1 +
 crates/oxc_sourcemap/Cargo.toml    |  11 +-
 crates/oxc_sourcemap/src/encode.rs | 366 +----------------------------
 4 files changed, 29 insertions(+), 365 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 95bff8255815a..9f9aa742e543a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -183,6 +183,12 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "buf-min"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22d5698cf6842742ed64805705798f8b351fff53fa546fd45c52184bee58dc90"
+
 [[package]]
 name = "bumpalo"
 version = "3.16.0"
@@ -1745,6 +1751,7 @@ dependencies = [
  "rustc-hash",
  "serde",
  "serde_json",
+ "v_jsonescape",
 ]
 
 [[package]]
@@ -3010,6 +3017,15 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "v_jsonescape"
+version = "0.7.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be8219cc464ba10c48c3231a6871f11d26d831c5c45a47467eea387ea7bb10e8"
+dependencies = [
+ "buf-min",
+]
+
 [[package]]
 name = "valuable"
 version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index 62634502de38c..408fc3accd00d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -184,6 +184,7 @@ unicode-id-start    = "1" # Relaxed version so the user can decide which unicode
 unicode-width       = "0.1.13"
 ureq                = { version = "2.9.6", default-features = false }
 url                 = "2.5.2"
+v_jsonescape        = "0.7.3"
 walkdir             = "2.5.0"
 wasm-bindgen        = "0.2.92"
 
diff --git a/crates/oxc_sourcemap/Cargo.toml b/crates/oxc_sourcemap/Cargo.toml
index 0532b8ab55489..cf3469e5847fe 100644
--- a/crates/oxc_sourcemap/Cargo.toml
+++ b/crates/oxc_sourcemap/Cargo.toml
@@ -19,11 +19,12 @@ workspace = true
 doctest = false
 
 [dependencies]
-rustc-hash  = { workspace = true }
-serde       = { workspace = true, features = ["derive"] }
-serde_json  = { workspace = true }
-base64-simd = { workspace = true }
-cfg-if      = { workspace = true }
+rustc-hash   = { workspace = true }
+serde        = { workspace = true, features = ["derive"] }
+serde_json   = { workspace = true }
+base64-simd  = { workspace = true }
+cfg-if       = { workspace = true }
+v_jsonescape = { workspace = true, features = ["bytes-buf"] }
 
 rayon = { workspace = true, optional = true }
 
diff --git a/crates/oxc_sourcemap/src/encode.rs b/crates/oxc_sourcemap/src/encode.rs
index 4f2501648b452..f4bb42e90d7de 100644
--- a/crates/oxc_sourcemap/src/encode.rs
+++ b/crates/oxc_sourcemap/src/encode.rs
@@ -1,12 +1,8 @@
-#![cfg_attr(target_arch = "x86_64", allow(clippy::cast_ptr_alignment))]
-#![cfg_attr(target_arch = "x86_64", allow(clippy::cast_possible_wrap))]
-#![cfg_attr(target_arch = "x86_64", allow(clippy::cast_sign_loss))]
-#![cfg_attr(target_arch = "x86_64", allow(clippy::transmute_ptr_to_ptr))]
-
 use std::borrow::Cow;
 
 #[cfg(feature = "concurrent")]
 use rayon::prelude::*;
+use v_jsonescape::b_escape as simd_escape;
 
 use crate::JSONSourceMap;
 /// Port from https://github.com/getsentry/rust-sourcemap/blob/master/src/encoder.rs
@@ -241,363 +237,13 @@ impl<'a> PreAllocatedString<'a> {
     }
 }
 
-// Copied from https://github.com/serde-rs/json/blob/v1.0.120/src/ser.rs#L2097-L2127
-
-const BB: u8 = b'b'; // \x08
-const TT: u8 = b't'; // \x09
-const NN: u8 = b'n'; // \x0A
-const FF: u8 = b'f'; // \x0C
-const RR: u8 = b'r'; // \x0D
-const QU: u8 = b'"'; // \x22
-const BS: u8 = b'\\'; // \x5C
-const UU: u8 = b'u'; // \x00...\x1F except the ones above
-const __: u8 = 0;
-
-// Lookup table of escape sequences. A value of b'x' at index i means that byte
-// i is escaped as "\x" in JSON. A value of 0 means that byte i is not escaped.
-static ESCAPE: [u8; 256] = [
-    //   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
-    UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0
-    UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1
-    __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
-    __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
-];
-
-const UTF8_CHAR_WIDTH: [u8; 256] = [
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0,
-];
-
-const HEX_DIGITS: &[u8; 16] = b"0123456789abcdef";
-
-#[cfg(target_arch = "aarch64")]
-#[inline]
-fn escape_json_string<S: AsRef<str>>(input: S) -> String {
-    use core::arch::aarch64::{uint8x16_t, vld1q_u8, vqtbl1q_u8, vst1q_u8};
-
-    let input = input.as_ref();
-    let bytes = input.as_bytes();
-    let len = bytes.len();
-    let mut result = String::with_capacity(len * 2 + 2);
-    let mut chunk_head = 0;
-
-    result.push('"');
-
-    // Safety: simd is naturally unsafe.
-    unsafe {
-        let mut escape_result = [0u8; 16];
-        while chunk_head + 16 <= len {
-            let chunk: uint8x16_t = vld1q_u8(bytes[chunk_head..].as_ptr());
-            // Use ESCAPE table to check for characters that need escaping
-            let escape = vqtbl1q_u8(vld1q_u8(ESCAPE.as_ptr()), chunk);
-
-            // Store the escape results in a temporary array
-            vst1q_u8(escape_result.as_mut_ptr(), escape);
-
-            // Process each byte in the chunk
-            let mut head = 0;
-            while head < 16 {
-                let b = bytes[chunk_head + head];
-                let e = escape_result[head];
-                if e == 0 {
-                    if b & 0x80 == 0 {
-                        // ASCII character
-                        result.push(b as char);
-                        head += 1;
-                    } else {
-                        // Unicode character
-                        let char_len = UTF8_CHAR_WIDTH[b as usize] as usize;
-                        if chunk_head + head + char_len <= len {
-                            let c = input[chunk_head + head..chunk_head + head + char_len]
-                                .chars()
-                                .next()
-                                .unwrap();
-                            if c.is_control() {
-                                result.push_str(&format!("\\u{:04x}", c as u32));
-                            } else {
-                                result.push(c);
-                            }
-                        } else {
-                            // Incomplete UTF-8 sequence, just copy the bytes
-                            result.push_str(&input[chunk_head + head..]);
-                            head = 16; // Exit the loop
-                        }
-                        head += char_len;
-                    }
-                } else if e == UU {
-                    // For control characters, use unicode escape
-                    result.push_str(&format!("\\u{:04x}", u32::from(b)));
-                    head += 1;
-                } else {
-                    // For other escaped characters
-                    result.push('\\');
-                    result.push(e as char);
-                    head += 1;
-                }
-            }
-
-            chunk_head += 16;
-        }
-    }
-
-    // Process remaining bytes
-    escape_json_string_fallback(&input[chunk_head..], &mut result);
-
-    result
-}
-
-#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
-#[inline]
-fn escape_json_string<S: AsRef<str>>(input: S) -> String {
-    use std::arch::x86_64::{
-        __m128i, __m256i, _mm256_loadu_si256, _mm256_movemask_epi8, _mm256_shuffle_epi8,
-        _mm_loadu_si128, _mm_movemask_epi8, _mm_shuffle_epi8,
-    };
-
-    let input = input.as_ref();
-    let bytes = input.as_bytes();
-    let len = bytes.len();
-
-    let mut result = String::with_capacity(len * 2 + 2);
-    result.push('"');
-
-    let mut i = 0;
-    let mut escape_buf = [b'\\', b'u', b'0', b'0', b'0', b'0'];
-
-    while i < len {
-        if is_x86_feature_detected!("avx2") && i + 32 <= len {
-            // Safety: SIMD operations are unsafe
-            unsafe {
-                let escape_table = _mm256_loadu_si256(ESCAPE.as_ptr().cast::<__m256i>());
-                let chunk = _mm256_loadu_si256(bytes[i..].as_ptr().cast::<__m256i>());
-                let escape = _mm256_shuffle_epi8(escape_table, chunk);
-                let mask = _mm256_movemask_epi8(escape);
-
-                if mask == 0 {
-                    // No characters need escaping
-                    result.push_str(std::str::from_utf8(&bytes[i..i + 32]).unwrap());
-                    i += 32;
-                } else {
-                    // Some characters need escaping, process byte by byte
-                    break;
-                }
-            }
-        } else if is_x86_feature_detected!("sse2") && i + 16 <= len {
-            // Safety: SIMD operations are unsafe
-            unsafe {
-                let escape_table = _mm_loadu_si128(ESCAPE.as_ptr().cast::<__m128i>());
-                let chunk = _mm_loadu_si128(bytes[i..].as_ptr().cast::<__m128i>());
-                let escape = _mm_shuffle_epi8(escape_table, chunk);
-                let mask = _mm_movemask_epi8(escape);
-
-                if mask == 0 {
-                    // No characters need escaping
-                    result.push_str(std::str::from_utf8_unchecked(&bytes[i..i + 16]));
-                    i += 16;
-                } else {
-                    // Some characters need escaping, process byte by byte
-                    break;
-                }
-            }
-        } else {
-            // Process byte by byte
-            break;
-        }
-    }
-
-    // Process remaining bytes
-    while i < len {
-        let byte = bytes[i];
-        let escape = ESCAPE[byte as usize];
-        if escape == 0 {
-            let char_len = UTF8_CHAR_WIDTH[byte as usize] as usize;
-            if i + char_len <= len {
-                result.push_str(&input[i..i + char_len]);
-                i += char_len;
-            } else {
-                // Incomplete UTF-8 sequence, just copy the byte
-                result.push(byte as char);
-                i += 1;
-            }
-        } else if escape == UU {
-            escape_buf[4] = HEX_DIGITS[(byte >> 4) as usize];
-            escape_buf[5] = HEX_DIGITS[(byte & 0xF) as usize];
-            // Safety: escape_buf is always valid utf-8
-            result.push_str(unsafe { std::str::from_utf8_unchecked(&escape_buf) });
-            i += 1;
-        } else {
-            result.push('\\');
-            result.push(escape as char);
-            i += 1;
-        }
-    }
-
-    result.push('"');
-    result
-}
-
-#[cfg(target_arch = "wasm32")]
-#[inline]
 fn escape_json_string<S: AsRef<str>>(s: S) -> String {
-    use core::arch::wasm32::{u8x16_swizzle, v128, v128_load, v128_store};
-
     let s = s.as_ref();
-    let bytes = s.as_bytes();
-    let len = bytes.len();
-    let mut result = String::with_capacity(len * 2 + 2);
-    let mut i = 0;
-
-    result.push('"');
-
-    let mut escape_buf = [b'\\', b'u', b'0', b'0', b'0', b'0'];
-
-    // Safety: SIMD operations are unsafe
-    unsafe {
-        let mut escape_result = [0u8; 16];
-        let escape_table = v128_load(ESCAPE.as_ptr() as *const v128);
-
-        while i + 16 <= len {
-            let chunk = v128_load(bytes[i..].as_ptr() as *const v128);
-            // Use ESCAPE table to check for characters that need escaping
-            let escape = u8x16_swizzle(escape_table, chunk);
-
-            // Store the escape results in a temporary array
-            v128_store(escape_result.as_mut_ptr() as *mut v128, escape);
-
-            // Process each byte in the chunk
-            let mut j = 0;
-            while j < 16 {
-                let b = bytes[i + j];
-                let e = escape_result[j];
-                if e == 0 {
-                    if b & 0x80 == 0 {
-                        // ASCII character
-                        result.push(b as char);
-                        j += 1;
-                    } else {
-                        // Unicode character
-                        let char_len = UTF8_CHAR_WIDTH[b as usize] as usize;
-                        if i + j + char_len <= len {
-                            let c = s[i + j..i + j + char_len].chars().next().unwrap();
-                            if c.is_control() {
-                                let c_u32 = c as u32;
-                                let buf = [
-                                    b'\\',
-                                    b'u',
-                                    HEX_DIGITS[((c_u32 >> 12) & 0xF) as usize],
-                                    HEX_DIGITS[((c_u32 >> 8) & 0xF) as usize],
-                                    HEX_DIGITS[((c_u32 >> 4) & 0xF) as usize],
-                                    HEX_DIGITS[(c_u32 & 0xF) as usize],
-                                ];
-                                result.push_str(std::str::from_utf8_unchecked(&buf));
-                            } else {
-                                result.push(c);
-                            }
-                        } else {
-                            // Incomplete UTF-8 sequence, just copy the bytes
-                            result.push_str(&s[i + j..]);
-                            j = 16; // Exit the loop
-                        }
-                        j += char_len;
-                    }
-                } else if e == UU {
-                    // For control characters, use unicode escape
-                    escape_buf[0] = b'\\';
-                    escape_buf[1] = b'u';
-                    escape_buf[2] = b'0';
-                    escape_buf[3] = b'0';
-                    escape_buf[4] = HEX_DIGITS[(b >> 4) as usize];
-                    escape_buf[5] = HEX_DIGITS[(b & 0xF) as usize];
-                    result.push_str(std::str::from_utf8(&escape_buf).unwrap());
-                    j += 1;
-                } else {
-                    // For other escaped characters
-                    result.push('\\');
-                    result.push(e as char);
-                    j += 1;
-                }
-            }
-
-            i += 16;
-        }
-    }
-
-    // Process remaining bytes
-    escape_json_string_fallback(&s[i..], &mut result);
-
-    result
-}
-
-#[cfg(not(any(
-    target_arch = "aarch64",
-    target_arch = "x86_64",
-    target_arch = "x86",
-    target_arch = "wasm32"
-)))]
-#[inline]
-fn escape_json_string<S: AsRef<str>>(s: S) -> String {
-    let mut result = String::with_capacity(s.as_ref().len() * 2 + 2);
-    result.push('"');
-    escape_json_string_fallback(s.as_ref(), &mut result);
-    result
-}
-
-#[allow(unused)]
-#[inline]
-fn escape_json_string_fallback(s: &str, result: &mut String) {
-    let mut escape_buf = [b'\\', b'u', b'0', b'0', b'0', b'0'];
-    for c in s.chars() {
-        if c.is_ascii() {
-            let b = c as u8;
-            let e = ESCAPE[b as usize];
-            if e == 0 {
-                result.push(c);
-            } else if e == UU {
-                // For control characters, use unicode escape
-                escape_buf[4] = HEX_DIGITS[(b >> 4) as usize];
-                escape_buf[5] = HEX_DIGITS[(b & 0xF) as usize];
-                // Safety: escape_buf is always valid utf-8
-                result.push_str(unsafe { std::str::from_utf8_unchecked(&escape_buf) });
-            } else {
-                // For other escaped characters
-                result.push('\\');
-                result.push(e as char);
-            }
-        } else if c.is_control() {
-            let c_u32 = c as u32;
-            let buf = [
-                b'\\',
-                b'u',
-                HEX_DIGITS[((c_u32 >> 12) & 0xF) as usize],
-                HEX_DIGITS[((c_u32 >> 8) & 0xF) as usize],
-                HEX_DIGITS[((c_u32 >> 4) & 0xF) as usize],
-                HEX_DIGITS[(c_u32 & 0xF) as usize],
-            ];
-            // Safety: buf is always valid utf-8
-            result.push_str(unsafe { std::str::from_utf8_unchecked(&buf) });
-        } else {
-            result.push(c);
-        }
-    }
-    result.push('"');
+    let mut escaped = String::with_capacity(s.len() * 2 + 2);
+    escaped.push('"');
+    simd_escape(s.as_bytes(), &mut escaped);
+    escaped.push('"');
+    escaped
 }
 
 #[test]