diff --git a/.gitignore b/.gitignore index 70f6595a5c11c..57ad144b21e8b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .vscode build +target .idea .DS_Store .vimrc diff --git a/buildtools/aho-corasick/Cargo.lock b/buildtools/aho-corasick/Cargo.lock new file mode 100644 index 0000000000000..d6cbe48ba1586 --- /dev/null +++ b/buildtools/aho-corasick/Cargo.lock @@ -0,0 +1,25 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "0.7.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +dependencies = [ + "memchr", +] + +[[package]] +name = "aho-corasick-c" +version = "0.1.0" +dependencies = [ + "aho-corasick", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" diff --git a/buildtools/aho-corasick/Cargo.toml b/buildtools/aho-corasick/Cargo.toml new file mode 100644 index 0000000000000..1c088800539a7 --- /dev/null +++ b/buildtools/aho-corasick/Cargo.toml @@ -0,0 +1,13 @@ +[workspace] + +[package] +name = "aho-corasick-c" +version = "0.1.0" +description = "C wrapper for aho-corasick for loading from Wasm" + +[lib] +crate-type = ["staticlib"] +name = "aho_corasick" + +[dependencies] +aho-corasick = "0.7.20" diff --git a/buildtools/aho-corasick/Dockerfile b/buildtools/aho-corasick/Dockerfile index 88e3d24c7b1d6..6cd20e403b16b 100644 --- a/buildtools/aho-corasick/Dockerfile +++ b/buildtools/aho-corasick/Dockerfile @@ -3,13 +3,11 @@ FROM rust:1-alpine -RUN apk add --no-cache curl patch && rustup target add wasm32-wasi +RUN rustup target add wasm32-wasi -RUN mkdir -p /aho-corasick && curl -L https://github.com/BurntSushi/aho-corasick/archive/refs/tags/0.7.19.tar.gz | tar -xz --strip-components 1 -C /aho-corasick +ADD . /aho-corasick WORKDIR /aho-corasick -ADD aho-corasick.patch aho-corasick.patch -RUN patch -p1 < aho-corasick.patch ENV RUSTFLAGS "-C target-feature=-crt-static" RUN cargo build --release --target wasm32-wasi -CMD ["cp", "target/wasm32-wasi/release/libaho_corasick.a", "/out/libaho_corasick.a"] \ No newline at end of file +CMD ["cp", "target/wasm32-wasi/release/libaho_corasick.a", "/out/libaho_corasick.a"] diff --git a/buildtools/aho-corasick/aho-corasick.patch b/buildtools/aho-corasick/aho-corasick.patch deleted file mode 100644 index f36ac3f13edb9..0000000000000 --- a/buildtools/aho-corasick/aho-corasick.patch +++ /dev/null @@ -1,147 +0,0 @@ -diff --git a/.gitignore b/.gitignore -index f1a4d65..d6ff1a3 100644 ---- a/.gitignore -+++ b/.gitignore -@@ -1,3 +1,4 @@ -+.idea - .*.swp - doc - tags -diff --git a/Cargo.toml b/Cargo.toml -index 610bd4d..55e2f37 100644 ---- a/Cargo.toml -+++ b/Cargo.toml -@@ -19,6 +19,7 @@ edition = "2018" - members = ["aho-corasick-debug", "bench"] - - [lib] -+crate-type = ["staticlib"] - name = "aho_corasick" - - [features] -diff --git a/src/exports.rs b/src/exports.rs -new file mode 100644 -index 0000000..29c203d ---- /dev/null -+++ b/src/exports.rs -@@ -0,0 +1,107 @@ -+use std::mem::MaybeUninit; -+use std::slice; -+use std::str; -+use crate::{AhoCorasick, AhoCorasickBuilder, MatchKind}; -+ -+static mut MATCHERS: Vec = Vec::new(); -+ -+#[no_mangle] -+pub extern "C" fn new_matcher(patterns_ptr: *mut u8, patterns_len: usize) -> usize { -+ let all_patterns = unsafe { -+ slice::from_raw_parts(patterns_ptr, patterns_len) -+ }; -+ -+ let mut patterns = Vec::new(); -+ -+ let mut off = 0; -+ while off < patterns_len { -+ let pattern_len = u32::from_le_bytes([all_patterns[off], all_patterns[off+1], all_patterns[off+2], all_patterns[off+3]]) as usize; -+ off += 4; -+ let pattern = unsafe { -+ str::from_utf8_unchecked(&all_patterns[off..off+pattern_len]) -+ }; -+ patterns.push(pattern); -+ off += pattern_len; -+ } -+ -+ let ac = AhoCorasickBuilder::new() -+ .ascii_case_insensitive(true) -+ .dfa(true) -+ .match_kind(MatchKind::LeftmostLongest) -+ .build(patterns); -+ -+ unsafe { -+ MATCHERS.push(ac); -+ MATCHERS.len() - 1 -+ } -+ -+} -+ -+#[no_mangle] -+pub extern "C" fn matches(matcher_ptr: usize, value_ptr: usize, value_len: usize, n: usize, matches: *mut usize) -> usize { -+ let ac = unsafe { -+ let matcher = MATCHERS.get_unchecked(matcher_ptr); -+ matcher -+ }; -+ -+ let value = ptr_to_string(value_ptr, value_len); -+ std::mem::forget(&value); -+ -+ let mut num = 0; -+ for value in ac.find_iter(value.as_bytes()) { -+ if num == n { -+ break; -+ } -+ unsafe { -+ *matches.offset(2*num as isize) = value.start(); -+ *matches.offset((2*num+1) as isize) = value.end(); -+ } -+ num += 1; -+ } -+ -+ return num -+} -+ -+/// WebAssembly export that allocates a pointer (linear memory offset) that can -+/// be used for a string. -+/// -+/// This is an ownership transfer, which means the caller must call -+/// [`deallocate`] when finished. -+#[cfg_attr(all(target_arch = "wasm32"), export_name = "allocate")] -+#[no_mangle] -+pub extern "C" fn _allocate(size: usize) -> *mut u8 { -+ allocate(size as usize) -+} -+ -+/// Allocates size bytes and leaks the pointer where they start. -+fn allocate(size: usize) -> *mut u8 { -+ // Allocate the amount of bytes needed. -+ let vec: Vec> = Vec::with_capacity(size); -+ -+ // into_raw leaks the memory to the caller. -+ Box::into_raw(vec.into_boxed_slice()) as *mut u8 -+} -+ -+ -+/// WebAssembly export that deallocates a pointer of the given size (linear -+/// memory offset, byteCount) allocated by [`allocate`]. -+#[cfg_attr(all(target_arch = "wasm32"), export_name = "deallocate")] -+#[no_mangle] -+pub unsafe extern "C" fn _deallocate(ptr: usize, size: usize) { -+ deallocate(ptr as *mut u8, size); -+} -+ -+/// Retakes the pointer which allows its memory to be freed. -+unsafe fn deallocate(ptr: *mut u8, size: usize) { -+ let _ = Vec::from_raw_parts(ptr, 0, size); -+} -+ -+/// Returns a string from WebAssembly compatible numeric types representing -+/// its pointer and length. -+fn ptr_to_string(ptr: usize, len: usize) -> String { -+ unsafe { -+ let slice = slice::from_raw_parts_mut(ptr as *mut u8, len as usize); -+ let utf8 = std::str::from_utf8_unchecked_mut(slice); -+ return String::from(utf8); -+ } -+} -\ No newline at end of file -diff --git a/src/lib.rs b/src/lib.rs -index 4465a56..9997a02 100644 ---- a/src/lib.rs -+++ b/src/lib.rs -@@ -213,6 +213,7 @@ mod prefilter; - mod state_id; - #[cfg(test)] - mod tests; -+mod exports; - - /// A representation of a match reported by an Aho-Corasick automaton. - /// diff --git a/buildtools/aho-corasick/src/lib.rs b/buildtools/aho-corasick/src/lib.rs new file mode 100644 index 0000000000000..b2de263319273 --- /dev/null +++ b/buildtools/aho-corasick/src/lib.rs @@ -0,0 +1,112 @@ +// Copyright The OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +extern crate aho_corasick; + +use std::mem::MaybeUninit; +use std::slice; +use std::str; +use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind}; + +static mut MATCHERS: Vec = Vec::new(); + +#[no_mangle] +pub extern "C" fn new_matcher(patterns_ptr: *mut u8, patterns_len: usize) -> usize { + let all_patterns = unsafe { + slice::from_raw_parts(patterns_ptr, patterns_len) + }; + + let mut patterns = Vec::new(); + + let mut off = 0; + while off < patterns_len { + let pattern_len = u32::from_le_bytes([all_patterns[off], all_patterns[off+1], all_patterns[off+2], all_patterns[off+3]]) as usize; + off += 4; + let pattern = unsafe { + str::from_utf8_unchecked(&all_patterns[off..off+pattern_len]) + }; + patterns.push(pattern); + off += pattern_len; + } + + let ac = AhoCorasickBuilder::new() + .ascii_case_insensitive(true) + .dfa(true) + .match_kind(MatchKind::LeftmostLongest) + .build(patterns); + + unsafe { + MATCHERS.push(ac); + MATCHERS.len() - 1 + } + +} + +#[no_mangle] +pub extern "C" fn matches(matcher_ptr: usize, value_ptr: usize, value_len: usize, n: usize, matches: *mut usize) -> usize { + let ac = unsafe { + let matcher = MATCHERS.get_unchecked(matcher_ptr); + matcher + }; + + let value = ptr_to_string(value_ptr, value_len); + std::mem::forget(&value); + + let mut num = 0; + for value in ac.find_iter(value.as_bytes()) { + if num == n { + break; + } + unsafe { + *matches.offset(2*num as isize) = value.start(); + *matches.offset((2*num+1) as isize) = value.end(); + } + num += 1; + } + + return num +} + +/// WebAssembly export that allocates a pointer (linear memory offset) that can +/// be used for a string. +/// +/// This is an ownership transfer, which means the caller must call +/// [`deallocate`] when finished. +#[cfg_attr(all(target_arch = "wasm32"), export_name = "allocate")] +#[no_mangle] +pub extern "C" fn _allocate(size: usize) -> *mut u8 { + allocate(size as usize) +} + +/// Allocates size bytes and leaks the pointer where they start. +fn allocate(size: usize) -> *mut u8 { + // Allocate the amount of bytes needed. + let vec: Vec> = Vec::with_capacity(size); + + // into_raw leaks the memory to the caller. + Box::into_raw(vec.into_boxed_slice()) as *mut u8 +} + + +/// WebAssembly export that deallocates a pointer of the given size (linear +/// memory offset, byteCount) allocated by [`allocate`]. +#[cfg_attr(all(target_arch = "wasm32"), export_name = "deallocate")] +#[no_mangle] +pub unsafe extern "C" fn _deallocate(ptr: usize, size: usize) { + deallocate(ptr as *mut u8, size); +} + +/// Retakes the pointer which allows its memory to be freed. +unsafe fn deallocate(ptr: *mut u8, size: usize) { + let _ = Vec::from_raw_parts(ptr, 0, size); +} + +/// Returns a string from WebAssembly compatible numeric types representing +/// its pointer and length. +fn ptr_to_string(ptr: usize, len: usize) -> String { + unsafe { + let slice = slice::from_raw_parts_mut(ptr as *mut u8, len as usize); + let utf8 = std::str::from_utf8_unchecked_mut(slice); + return String::from(utf8); + } +} diff --git a/lib/libaho_corasick.a b/lib/libaho_corasick.a index 4c38a734b8b30..345f9dab20134 100644 Binary files a/lib/libaho_corasick.a and b/lib/libaho_corasick.a differ