Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: faster prefix record deser #777

Merged
merged 9 commits into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ smallvec = { version = "1.13.2", features = [
"const_generics",
"union",
] }
strum = { version = "0.26.2", features = ["derive"] }
strum = { version = "0.26.3", features = ["derive"] }
baszalmstra marked this conversation as resolved.
Show resolved Hide resolved
superslice = "1.0.0"
syn = "2.0.59"
sysinfo = "0.30.10"
Expand Down
5 changes: 5 additions & 0 deletions crates/rattler_conda_types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ purl = { workspace = true, features = ["serde"] }
rattler_digest = { path = "../rattler_digest", version = "0.19.4", default-features = false, features = ["serde"] }
rattler_macros = { path = "../rattler_macros", version = "0.19.4", default-features = false }
regex = { workspace = true }
simd-json = { version = "*", features = ["serde_impl"]}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a minimal version here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

made this workspace=true and added simd-json in root Cargo.toml

serde = { workspace = true, features = ["derive", "rc"] }
serde_json = { workspace = true }
serde_repr = { workspace = true }
Expand Down Expand Up @@ -50,3 +51,7 @@ tools = { path = "../tools" }
[[bench]]
name = "parse"
harness = false

[[bench]]
name = "prefix_record_from_path"
harness = false
27 changes: 27 additions & 0 deletions crates/rattler_conda_types/benches/prefix_record_from_path.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
use std::{env::current_dir, fs, path::PathBuf};

Check failure on line 1 in crates/rattler_conda_types/benches/prefix_record_from_path.rs

View workflow job for this annotation

GitHub Actions / Format and Lint

unused import: `env::current_dir`

use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rattler_conda_types::PrefixRecord;

fn process_json_files_from_dir(dir: PathBuf) {
let entries = fs::read_dir(dir).expect("Directory not found");

for entry in entries {
let entry = entry.expect("Unable to read entry");
let path = entry.path();

PrefixRecord::from_path(path).unwrap();
}
}

fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("process_json_files", |b| {
let manifest_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
b.iter(|| {
process_json_files_from_dir(black_box(manifest_dir.join("../../test-data/conda-meta")));
});
});
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
12 changes: 11 additions & 1 deletion crates/rattler_conda_types/src/prefix_record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ use std::io::{BufWriter, Read};
use std::path::{Path, PathBuf};
use std::str::FromStr;

use simd_json;
nichmor marked this conversation as resolved.
Show resolved Hide resolved

/// Information about every file installed with the package.
///
/// This struct is similar to the [`crate::package::PathsJson`] struct. The difference is that this
Expand Down Expand Up @@ -176,7 +178,7 @@ impl PrefixRecord {
pub fn from_reader(mut reader: impl Read) -> Result<Self, std::io::Error> {
let mut str = String::new();
reader.read_to_string(&mut str)?;
Self::from_str(&str)
Self::from_str_mut(&mut str)
}

/// Creates a `PrefixRecord` from a `RepoDataRecord`.
Expand Down Expand Up @@ -263,6 +265,14 @@ impl PrefixRecord {
}
Ok(records)
}

fn from_str_mut(s: &mut str) -> Result<Self, std::io::Error> {
// here we are using unsafe block for as_bytes_mut which has the same safety guarantees as str::as_bytes_mut
// quoting: "the caller must ensure that the content of the slice is valid UTF-8
// before the borrow ends and the underlying `str` is used."
// In our case, underlying `str` is not used after the borrow ends.
unsafe { simd_json::serde::from_slice(s.as_bytes_mut()).map_err(Into::into) }
}
}

impl FromStr for PrefixRecord {
Expand Down
213 changes: 213 additions & 0 deletions test-data/conda-meta/bzip2-1.0.8-h93a5062_5.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
{
"build": "h93a5062_5",
"build_number": 5,
"depends": [],
"license": "bzip2-1.0.6",
"license_family": "BSD",
"md5": "1bbc659ca658bfd49a481b5ef7a0f40f",
"name": "bzip2",
"sha256": "bfa84296a638bea78a8bb29abc493ee95f2a0218775642474a840411b950fe5f",
"size": 122325,
"subdir": "osx-arm64",
"timestamp": 1699280294368,
"version": "1.0.8",
"fn": "bzip2-1.0.8-h93a5062_5.conda",
"url": "https://conda.anaconda.org/conda-forge/osx-arm64/bzip2-1.0.8-h93a5062_5.conda",
"channel": "https://conda.anaconda.org/conda-forge/",
"extracted_package_dir": "/Users/graf/Library/Caches/rattler/cache/pkgs/bzip2-1.0.8-h93a5062_5",
"files": [
"bin/bunzip2",
"bin/bzcat",
"bin/bzcmp",
"bin/bzdiff",
"bin/bzegrep",
"bin/bzfgrep",
"bin/bzgrep",
"bin/bzip2",
"bin/bzip2recover",
"bin/bzless",
"bin/bzmore",
"include/bzlib.h",
"lib/libbz2.1.0.8.dylib",
"lib/libbz2.a",
"lib/libbz2.dylib",
"man/man1/bzcmp.1",
"man/man1/bzdiff.1",
"man/man1/bzegrep.1",
"man/man1/bzfgrep.1",
"man/man1/bzgrep.1",
"man/man1/bzip2.1",
"man/man1/bzless.1",
"man/man1/bzmore.1"
],
"paths_data": {
"paths_version": 1,
"paths": [
{
"_path": "bin/bunzip2",
"path_type": "hardlink",
"sha256": "7e726a0a98c1542ed69b57571bb47992330a54983aa40f67e63cb3e62acf435a",
"sha256_in_prefix": "7e726a0a98c1542ed69b57571bb47992330a54983aa40f67e63cb3e62acf435a",
"size_in_bytes": 146976
},
{
"_path": "bin/bzcat",
"path_type": "hardlink",
"sha256": "564ca5d306567d79c76fb4731f9964f5ff744adc309353c87bd3a0b498aeab5a",
"sha256_in_prefix": "564ca5d306567d79c76fb4731f9964f5ff744adc309353c87bd3a0b498aeab5a",
"size_in_bytes": 146976
},
{
"_path": "bin/bzcmp",
"path_type": "softlink",
"sha256": "1c1f96193cdf14b85ea65f140a7557a07ece8783a53ec5ba6b5c30644a9d3012",
"sha256_in_prefix": "d5e2951edcc0388feda0726ee69b5ac079bf91e4bc79ce095b34a56b38db29b7",
"size_in_bytes": 2140
},
{
"_path": "bin/bzdiff",
"path_type": "hardlink",
"sha256": "1c1f96193cdf14b85ea65f140a7557a07ece8783a53ec5ba6b5c30644a9d3012",
"sha256_in_prefix": "1c1f96193cdf14b85ea65f140a7557a07ece8783a53ec5ba6b5c30644a9d3012",
"size_in_bytes": 2140
},
{
"_path": "bin/bzegrep",
"path_type": "softlink",
"sha256": "a8e368a31766c7862b8d0feeffe274c3bb43b969e3ccb4f9e77d13bfa447a5c9",
"sha256_in_prefix": "aa3149e6182875b6fc8b393c9b556fa49427b8732e87c5def2e109904143caa3",
"size_in_bytes": 2054
},
{
"_path": "bin/bzfgrep",
"path_type": "softlink",
"sha256": "a8e368a31766c7862b8d0feeffe274c3bb43b969e3ccb4f9e77d13bfa447a5c9",
"sha256_in_prefix": "aa3149e6182875b6fc8b393c9b556fa49427b8732e87c5def2e109904143caa3",
"size_in_bytes": 2054
},
{
"_path": "bin/bzgrep",
"path_type": "hardlink",
"sha256": "a8e368a31766c7862b8d0feeffe274c3bb43b969e3ccb4f9e77d13bfa447a5c9",
"sha256_in_prefix": "a8e368a31766c7862b8d0feeffe274c3bb43b969e3ccb4f9e77d13bfa447a5c9",
"size_in_bytes": 2054
},
{
"_path": "bin/bzip2",
"path_type": "hardlink",
"sha256": "31f2b2843c9f2e93cb45b38b963463062adf89b033900ac9e7f4c5109629b7d8",
"sha256_in_prefix": "31f2b2843c9f2e93cb45b38b963463062adf89b033900ac9e7f4c5109629b7d8",
"size_in_bytes": 146976
},
{
"_path": "bin/bzip2recover",
"path_type": "hardlink",
"sha256": "f041762c0dab08ba7e85e69ab9ef49fa29d2ffdb424c2c60d4c189b08b99af9d",
"sha256_in_prefix": "f041762c0dab08ba7e85e69ab9ef49fa29d2ffdb424c2c60d4c189b08b99af9d",
"size_in_bytes": 53152
},
{
"_path": "bin/bzless",
"path_type": "softlink",
"sha256": "836536d4c7469788c730355d59f8ae8d16ba07cb0754174878d99ed90f04448d",
"sha256_in_prefix": "97fc73f3676c65ae05a49cad2afb3126a00baf9dbf2613996e3b95eac6364c32",
"size_in_bytes": 1259
},
{
"_path": "bin/bzmore",
"path_type": "hardlink",
"sha256": "836536d4c7469788c730355d59f8ae8d16ba07cb0754174878d99ed90f04448d",
"sha256_in_prefix": "836536d4c7469788c730355d59f8ae8d16ba07cb0754174878d99ed90f04448d",
"size_in_bytes": 1259
},
{
"_path": "include/bzlib.h",
"path_type": "hardlink",
"sha256": "6ac62e811669598ee30c9e1c379b9e627f6ff17a5a3dc1e0b4fa8b8ea75e580d",
"sha256_in_prefix": "6ac62e811669598ee30c9e1c379b9e627f6ff17a5a3dc1e0b4fa8b8ea75e580d",
"size_in_bytes": 6240
},
{
"_path": "lib/libbz2.1.0.8.dylib",
"path_type": "hardlink",
"sha256": "919fc98f23464442b499aaebc07d75efde1cbcc561d0046aecfdb81880989a7f",
"sha256_in_prefix": "919fc98f23464442b499aaebc07d75efde1cbcc561d0046aecfdb81880989a7f",
"size_in_bytes": 108416
},
{
"_path": "lib/libbz2.a",
"path_type": "hardlink",
"sha256": "95ae2b389c3000f0416e8e2c37907bf70ca47fa954a66b27f5f399a9aefbcf2d",
"sha256_in_prefix": "95ae2b389c3000f0416e8e2c37907bf70ca47fa954a66b27f5f399a9aefbcf2d",
"size_in_bytes": 220608
},
{
"_path": "lib/libbz2.dylib",
"path_type": "softlink",
"sha256": "919fc98f23464442b499aaebc07d75efde1cbcc561d0046aecfdb81880989a7f",
"sha256_in_prefix": "7be502d1e0218e1853f952a218d40422321f7cc947454c53eb1a2bf13961a0d2",
"size_in_bytes": 108416
},
{
"_path": "man/man1/bzcmp.1",
"path_type": "hardlink",
"sha256": "172cde42c47a6d50c244e39d993097dcd3882427d57303078643849cf10a81c4",
"sha256_in_prefix": "172cde42c47a6d50c244e39d993097dcd3882427d57303078643849cf10a81c4",
"size_in_bytes": 18
},
{
"_path": "man/man1/bzdiff.1",
"path_type": "hardlink",
"sha256": "32d1a7cd115430398e58537532584ef2ab76343c9f094dcd1253d9c4c0f705bf",
"sha256_in_prefix": "32d1a7cd115430398e58537532584ef2ab76343c9f094dcd1253d9c4c0f705bf",
"size_in_bytes": 897
},
{
"_path": "man/man1/bzegrep.1",
"path_type": "hardlink",
"sha256": "cf1c98d3fa055506c8af2f8bba4da9c17d367c6409c6ad83a2bc67ccb6630182",
"sha256_in_prefix": "cf1c98d3fa055506c8af2f8bba4da9c17d367c6409c6ad83a2bc67ccb6630182",
"size_in_bytes": 18
},
{
"_path": "man/man1/bzfgrep.1",
"path_type": "hardlink",
"sha256": "cf1c98d3fa055506c8af2f8bba4da9c17d367c6409c6ad83a2bc67ccb6630182",
"sha256_in_prefix": "cf1c98d3fa055506c8af2f8bba4da9c17d367c6409c6ad83a2bc67ccb6630182",
"size_in_bytes": 18
},
{
"_path": "man/man1/bzgrep.1",
"path_type": "hardlink",
"sha256": "924aa4a7c7c1467400181e4c0ee1b527db142b6399a717171f2351b72b5899df",
"sha256_in_prefix": "924aa4a7c7c1467400181e4c0ee1b527db142b6399a717171f2351b72b5899df",
"size_in_bytes": 1297
},
{
"_path": "man/man1/bzip2.1",
"path_type": "hardlink",
"sha256": "27b984bb2e8bbee2651d11cda87449cfc4138d2e479b9eaa77b8f60fa5d0bf5d",
"sha256_in_prefix": "27b984bb2e8bbee2651d11cda87449cfc4138d2e479b9eaa77b8f60fa5d0bf5d",
"size_in_bytes": 16266
},
{
"_path": "man/man1/bzless.1",
"path_type": "hardlink",
"sha256": "216898f9b8acf61eeb471ecf23e47c1452dfd648f7f38d7d3bf48627072dc52c",
"sha256_in_prefix": "216898f9b8acf61eeb471ecf23e47c1452dfd648f7f38d7d3bf48627072dc52c",
"size_in_bytes": 18
},
{
"_path": "man/man1/bzmore.1",
"path_type": "hardlink",
"sha256": "ccfcf3f995e11adae3035e287252091bb72d165da21e0c385a4965d17c9051c7",
"sha256_in_prefix": "ccfcf3f995e11adae3035e287252091bb72d165da21e0c385a4965d17c9051c7",
"size_in_bytes": 4310
}
]
},
"link": {
"source": "/Users/graf/Library/Caches/rattler/cache/pkgs/bzip2-1.0.8-h93a5062_5",
"type": 1
}
}
44 changes: 44 additions & 0 deletions test-data/conda-meta/ca-certificates-2024.2.2-hf0a4a13_0.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"build": "hf0a4a13_0",
"build_number": 0,
"depends": [],
"license": "ISC",
"md5": "fb416a1795f18dcc5a038bc2dc54edf9",
"name": "ca-certificates",
"sha256": "49bc3439816ac72d0c0e0f144b8cc870fdcc4adec2e861407ec818d8116b2204",
"size": 155725,
"subdir": "osx-arm64",
"timestamp": 1706844034242,
"version": "2024.2.2",
"fn": "ca-certificates-2024.2.2-hf0a4a13_0.conda",
"url": "https://conda.anaconda.org/conda-forge/osx-arm64/ca-certificates-2024.2.2-hf0a4a13_0.conda",
"channel": "https://conda.anaconda.org/conda-forge/",
"extracted_package_dir": "/Users/graf/Library/Caches/rattler/cache/pkgs/ca-certificates-2024.2.2-hf0a4a13_0",
"files": [
"ssl/cacert.pem",
"ssl/cert.pem"
],
"paths_data": {
"paths_version": 1,
"paths": [
{
"_path": "ssl/cacert.pem",
"path_type": "hardlink",
"sha256": "7a347ca8fef6e29f82b6e4785355a6635c17fa755e0940f65f15aa8fc7bd7f92",
"sha256_in_prefix": "7a347ca8fef6e29f82b6e4785355a6635c17fa755e0940f65f15aa8fc7bd7f92",
"size_in_bytes": 292541
},
{
"_path": "ssl/cert.pem",
"path_type": "softlink",
"sha256": "7a347ca8fef6e29f82b6e4785355a6635c17fa755e0940f65f15aa8fc7bd7f92",
"sha256_in_prefix": "0ebfd54889b22df75fcaac3a16c2262216c3036f967e7ecc2ed2bdf0956c16ee",
"size_in_bytes": 292541
}
]
},
"link": {
"source": "/Users/graf/Library/Caches/rattler/cache/pkgs/ca-certificates-2024.2.2-hf0a4a13_0",
"type": 1
}
}
Loading
Loading