From bad27d074f59c83ef84ee7eab973f59aaf048f83 Mon Sep 17 00:00:00 2001 From: Bas Zalmstra Date: Tue, 10 Sep 2024 09:50:13 +0200 Subject: [PATCH] refactor: use typed path in lock file --- crates/file_url/src/lib.rs | 36 +++-- crates/rattler_lock/Cargo.toml | 1 + crates/rattler_lock/src/conda.rs | 16 +- crates/rattler_lock/src/parse/serialize.rs | 2 +- ...attler_lock__url_or_path__test__order.snap | 12 ++ crates/rattler_lock/src/url_or_path.rs | 138 +++++++++++++----- .../src/utils/serde/url_or_path.rs | 8 +- py-rattler/Cargo.lock | 69 ++++++--- 8 files changed, 205 insertions(+), 77 deletions(-) create mode 100644 crates/rattler_lock/src/snapshots/rattler_lock__url_or_path__test__order.snap diff --git a/crates/file_url/src/lib.rs b/crates/file_url/src/lib.rs index a9c73c6dd..4321e2532 100644 --- a/crates/file_url/src/lib.rs +++ b/crates/file_url/src/lib.rs @@ -9,7 +9,8 @@ use std::path::PathBuf; use std::str::FromStr; use thiserror::Error; use typed_path::{ - Utf8TypedComponent, Utf8TypedPath, Utf8UnixComponent, Utf8WindowsComponent, Utf8WindowsPrefix, + Utf8TypedComponent, Utf8TypedPath, Utf8TypedPathBuf, Utf8UnixComponent, Utf8WindowsComponent, + Utf8WindowsPrefix, }; use url::{Host, Url}; @@ -33,14 +34,7 @@ fn is_windows_drive_letter_segment(segment: &str) -> Option { None } -/// Tries to convert a `file://` based URL to a path. -/// -/// We assume that any passed URL that represents a path is an absolute path. -/// -/// [`Url::to_file_path`] has a different code path for Windows and other operating systems, this -/// can cause URLs to parse perfectly fine on Windows, but fail to parse on Linux. This function -/// tries to parse the URL as a path on all operating systems. -pub fn url_to_path(url: &Url) -> Option { +fn url_to_path_inner>(url: &Url) -> Option { if url.scheme() != "file" { return None; } @@ -77,7 +71,29 @@ pub fn url_to_path(url: &Url) -> Option { } } - Some(PathBuf::from(path)) + Some(T::from(path)) +} + +/// Tries to convert a `file://` based URL to a path. +/// +/// We assume that any passed URL that represents a path is an absolute path. +/// +/// [`Url::to_file_path`] has a different code path for Windows and other operating systems, this +/// can cause URLs to parse perfectly fine on Windows, but fail to parse on Linux. This function +/// tries to parse the URL as a path on all operating systems. +pub fn url_to_path(url: &Url) -> Option { + url_to_path_inner(url) +} + +/// Tries to convert a `file://` based URL to a path. +/// +/// We assume that any passed URL that represents a path is an absolute path. +/// +/// [`Url::to_file_path`] has a different code path for Windows and other operating systems, this +/// can cause URLs to parse perfectly fine on Windows, but fail to parse on Linux. This function +/// tries to parse the URL as a path on all operating systems. +pub fn url_to_typed_path(url: &Url) -> Option { + url_to_path_inner(url) } const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`'); diff --git a/crates/rattler_lock/Cargo.toml b/crates/rattler_lock/Cargo.toml index 6cb3b05a2..b9a9005c4 100644 --- a/crates/rattler_lock/Cargo.toml +++ b/crates/rattler_lock/Cargo.toml @@ -26,6 +26,7 @@ serde_with = { workspace = true, features = ["indexmap_2"] } serde_repr = { workspace = true } thiserror = { workspace = true } url = { workspace = true, features = ["serde"] } +typed-path = { workspace = true } [dev-dependencies] insta = { workspace = true, features = ["yaml"] } diff --git a/crates/rattler_lock/src/conda.rs b/crates/rattler_lock/src/conda.rs index 353da9aad..9727a2838 100644 --- a/crates/rattler_lock/src/conda.rs +++ b/crates/rattler_lock/src/conda.rs @@ -1,12 +1,13 @@ +use std::cmp::Ordering; + use rattler_conda_types::{PackageRecord, RepoDataRecord}; use serde::{Deserialize, Serialize}; use serde_with::{serde_as, skip_serializing_none}; -use std::cmp::Ordering; use url::Url; -/// A locked conda dependency is just a [`PackageRecord`] with some additional information on where -/// it came from. It is very similar to a [`RepoDataRecord`], but it does not explicitly contain the -/// channel name. +/// A locked conda dependency is just a [`PackageRecord`] with some additional +/// information on where it came from. It is very similar to a +/// [`RepoDataRecord`], but it does not explicitly contain the channel name. #[serde_as] #[skip_serializing_none] #[derive(Serialize, Deserialize, Eq, PartialEq, Clone, Debug, Hash)] @@ -18,7 +19,8 @@ pub struct CondaPackageData { /// The location of the package. pub url: Url, - /// The filename of the package if the last segment of the url does not refer to the filename. + /// The filename of the package if the last segment of the url does not + /// refer to the filename. pub(crate) file_name: Option, /// The channel of the package if this cannot be derived from the url. @@ -137,8 +139,8 @@ fn file_name_from_url(url: &Url) -> Option<&str> { fn channel_from_url(url: &Url) -> Option { let mut result = url.clone(); - // Strip the last two path segments. We assume the first one contains the file_name, and the - // other the subdirectory. + // Strip the last two path segments. We assume the first one contains the + // file_name, and the other the subdirectory. result.path_segments_mut().ok()?.pop().pop(); Some(result) diff --git a/crates/rattler_lock/src/parse/serialize.rs b/crates/rattler_lock/src/parse/serialize.rs index 7812370dc..3a0bc641d 100644 --- a/crates/rattler_lock/src/parse/serialize.rs +++ b/crates/rattler_lock/src/parse/serialize.rs @@ -128,7 +128,7 @@ impl<'a> Ord for SerializablePackageSelector<'a> { (UrlOrPath::Url(a), UrlOrPath::Url(b)) => compare_url_by_filename(a, b), (UrlOrPath::Url(_), UrlOrPath::Path(_)) => Ordering::Less, (UrlOrPath::Path(_), UrlOrPath::Url(_)) => Ordering::Greater, - (UrlOrPath::Path(a), UrlOrPath::Path(b)) => a.cmp(b), + (UrlOrPath::Path(a), UrlOrPath::Path(b)) => a.as_str().cmp(b.as_str()), }, } } diff --git a/crates/rattler_lock/src/snapshots/rattler_lock__url_or_path__test__order.snap b/crates/rattler_lock/src/snapshots/rattler_lock__url_or_path__test__order.snap new file mode 100644 index 000000000..3d02b20be --- /dev/null +++ b/crates/rattler_lock/src/snapshots/rattler_lock__url_or_path__test__order.snap @@ -0,0 +1,12 @@ +--- +source: crates/rattler_lock/src/url_or_path.rs +expression: sorted_entries +--- +../_libgcc_mutex-0.1-conda_forge.tar.bz2 +..\_libgcc_mutex-0.1-conda_forge.tar.bz2 +/packages/_libgcc_mutex-0.1-conda_forge.conda +/packages/_libgcc_mutex-0.1-conda_forge.tar.bz2 +/packages/_libgcc_mutex-0.1-conda_forge.tar.bz2 +C:\packages\_libgcc_mutex-0.1-conda_forge.tar.bz2 +https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.conda +https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 diff --git a/crates/rattler_lock/src/url_or_path.rs b/crates/rattler_lock/src/url_or_path.rs index ad290c817..e1e295a92 100644 --- a/crates/rattler_lock/src/url_or_path.rs +++ b/crates/rattler_lock/src/url_or_path.rs @@ -1,22 +1,23 @@ -use file_url::url_to_path; -use itertools::Itertools; -use serde_with::{DeserializeFromStr, SerializeDisplay}; -use std::borrow::Cow; -use std::cmp::Ordering; -use std::hash::Hash; -use std::path::Path; use std::{ + borrow::Cow, + cmp::Ordering, fmt::{Display, Formatter}, - path::PathBuf, + hash::Hash, str::FromStr, }; + +use file_url::url_to_typed_path; +use itertools::Itertools; +use serde_with::{DeserializeFromStr, SerializeDisplay}; use thiserror::Error; +use typed_path::{Utf8TypedPath, Utf8TypedPathBuf}; use url::Url; /// Represents either a URL or a path. /// -/// URLs have stricter requirements on their format, they must be absolute and they with the -/// [`url`] we can only create urls for absolute file paths for the current os. +/// URLs have stricter requirements on their format, they must be absolute and +/// they with the [`url`] we can only create urls for absolute file paths for +/// the current os. /// /// This also looks better when looking at the lockfile. #[derive(Debug, Clone, Eq, SerializeDisplay, DeserializeFromStr)] @@ -25,7 +26,7 @@ pub enum UrlOrPath { Url(Url), /// A local (or networked) path. - Path(PathBuf), + Path(Utf8TypedPathBuf), } impl PartialOrd for UrlOrPath { @@ -38,7 +39,9 @@ impl Ord for UrlOrPath { fn cmp(&self, other: &Self) -> Ordering { match (self, other) { (UrlOrPath::Url(self_url), UrlOrPath::Url(other_url)) => self_url.cmp(other_url), - (UrlOrPath::Path(self_path), UrlOrPath::Path(other_path)) => self_path.cmp(other_path), + (UrlOrPath::Path(self_path), UrlOrPath::Path(other_path)) => { + self_path.as_str().cmp(other_path.as_str()) + } (UrlOrPath::Url(_), UrlOrPath::Path(_)) => Ordering::Greater, (UrlOrPath::Path(_), UrlOrPath::Url(_)) => Ordering::Less, } @@ -47,7 +50,7 @@ impl Ord for UrlOrPath { impl PartialEq for UrlOrPath { fn eq(&self, other: &Self) -> bool { - match (self.canonicalize().as_ref(), other.canonicalize().as_ref()) { + match (self.normalize().as_ref(), other.normalize().as_ref()) { (UrlOrPath::Path(a), UrlOrPath::Path(b)) => a == b, (UrlOrPath::Url(a), UrlOrPath::Url(b)) => a == b, _ => false, @@ -57,21 +60,21 @@ impl PartialEq for UrlOrPath { impl Hash for UrlOrPath { fn hash(&self, state: &mut H) { - match self.canonicalize().as_ref() { + match self.normalize().as_ref() { UrlOrPath::Url(url) => url.hash(state), - UrlOrPath::Path(path) => path.hash(state), + UrlOrPath::Path(path) => path.as_str().hash(state), } } } -impl From for UrlOrPath { - fn from(value: PathBuf) -> Self { +impl From for UrlOrPath { + fn from(value: Utf8TypedPathBuf) -> Self { UrlOrPath::Path(value) } } -impl From<&Path> for UrlOrPath { - fn from(value: &Path) -> Self { +impl<'a> From> for UrlOrPath { + fn from(value: Utf8TypedPath<'a>) -> Self { UrlOrPath::Path(value.to_path_buf()) } } @@ -79,7 +82,7 @@ impl From<&Path> for UrlOrPath { impl From for UrlOrPath { fn from(value: Url) -> Self { // Try to normalize the URL to a path if possible. - if let Some(path) = url_to_path(&value) { + if let Some(path) = url_to_typed_path(&value) { UrlOrPath::Path(path) } else { UrlOrPath::Url(value) @@ -90,7 +93,7 @@ impl From for UrlOrPath { impl Display for UrlOrPath { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { - UrlOrPath::Path(path) => write!(f, "{}", path.display()), + UrlOrPath::Path(path) => write!(f, "{path}"), UrlOrPath::Url(url) => write!(f, "{url}"), } } @@ -106,22 +109,37 @@ impl UrlOrPath { } /// Returns the path if this is a path. - pub fn as_path(&self) -> Option<&Path> { + pub fn as_path(&self) -> Option> { match self { - UrlOrPath::Path(path) => Some(path), + UrlOrPath::Path(path) => Some(path.to_path()), UrlOrPath::Url(_) => None, } } - /// Canonicalizes the instance to be a path if possible. + /// Normalizes the instance to be a path if possible and resolving `..` and + /// `.` segments. /// /// If this instance is a URL with a `file://` scheme, this will try to convert it to a path. - pub fn canonicalize(&self) -> Cow<'_, Self> { - if let Some(path) = self.as_url().and_then(url_to_path) { - return Cow::Owned(UrlOrPath::Path(path)); + pub fn normalize(&self) -> Cow<'_, Self> { + match self { + UrlOrPath::Url(url) => { + if let Some(path) = url_to_typed_path(url) { + return Cow::Owned(UrlOrPath::Path(path.normalize())); + } + Cow::Borrowed(self) + } + UrlOrPath::Path(path) => Cow::Owned(UrlOrPath::Path(path.normalize())), } + } - Cow::Borrowed(self) + /// Returns the file name of the path or url. If the path or url ends in a + /// directory seperator `None` is returned. + pub fn file_name(&self) -> Option<&str> { + match self { + UrlOrPath::Path(path) if !path.as_str().ends_with(['/', '\\']) => path.file_name(), + UrlOrPath::Url(url) if !url.as_str().ends_with('/') => url.path_segments()?.last(), + _ => None, + } } } @@ -143,23 +161,46 @@ impl FromStr for UrlOrPath { } // First try to parse the string as a path. - return match Url::from_str(s) { + match Url::from_str(s) { Ok(url) => Ok(if scheme_is_drive_letter(url.scheme()) { - UrlOrPath::Path(PathBuf::from(s)) + UrlOrPath::Path(s.into()) } else { - UrlOrPath::Url(url).canonicalize().into_owned() + UrlOrPath::Url(url).normalize().into_owned() }), - Err(url::ParseError::RelativeUrlWithoutBase) => Ok(UrlOrPath::Path(PathBuf::from(s))), + Err(url::ParseError::RelativeUrlWithoutBase) => Ok(UrlOrPath::Path(s.into())), Err(e) => Err(PathOrUrlError::InvalidUrl(e)), - }; + } } } #[cfg(test)] mod test { - use super::*; use std::str::FromStr; + use rstest::*; + + use super::*; + + #[rstest] + #[case( + "https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2", + Some("_libgcc_mutex-0.1-conda_forge.tar.bz2") + )] + #[case( + "C:\\packages\\_libgcc_mutex-0.1-conda_forge.tar.bz2", + Some("_libgcc_mutex-0.1-conda_forge.tar.bz2") + )] + #[case( + "/packages/_libgcc_mutex-0.1-conda_forge.tar.bz2", + Some("_libgcc_mutex-0.1-conda_forge.tar.bz2") + )] + #[case("https://conda.anaconda.org/conda-forge/linux-64/", None)] + #[case("C:\\packages\\", None)] + #[case("/packages/", None)] + fn test_file_name(#[case] case: UrlOrPath, #[case] expected_filename: Option<&str>) { + assert_eq!(case.file_name(), expected_filename); + } + #[test] fn test_equality() { let tests = [ @@ -169,7 +210,7 @@ mod test { // Absolute paths as file and direct path (UrlOrPath::Url("file:///home/bob/test-file.txt".parse().unwrap()), - UrlOrPath::Path("/home/bob/test-file.txt".parse().unwrap())), + UrlOrPath::Path("/home/bob/test-file.txt".into())), ]; for (a, b) in &tests { @@ -228,11 +269,34 @@ mod test { "\\\\127.0.0.1\\c$\\temp\\test-file.txt", ]; - for path in &paths { + for path in paths { assert_eq!( UrlOrPath::from_str(path).unwrap(), - UrlOrPath::Path(path.parse().unwrap()) + UrlOrPath::Path(path.into()) ); } } + + #[test] + fn test_order() { + let entries = [ + "https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2", + "https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.conda", + "file:///packages/_libgcc_mutex-0.1-conda_forge.tar.bz2", + "file:///packages/_libgcc_mutex-0.1-conda_forge.conda", + "C:\\packages\\_libgcc_mutex-0.1-conda_forge.tar.bz2", + "/packages/_libgcc_mutex-0.1-conda_forge.tar.bz2", + "../_libgcc_mutex-0.1-conda_forge.tar.bz2", + "..\\_libgcc_mutex-0.1-conda_forge.tar.bz2", + ]; + + let sorted_entries = entries + .iter() + .map(|p| UrlOrPath::from_str(p).unwrap()) + .sorted() + .map(|p| p.to_string()) + .format("\n") + .to_string(); + insta::assert_snapshot!(sorted_entries); + } } diff --git a/crates/rattler_lock/src/utils/serde/url_or_path.rs b/crates/rattler_lock/src/utils/serde/url_or_path.rs index 001a0bea3..713bbe663 100644 --- a/crates/rattler_lock/src/utils/serde/url_or_path.rs +++ b/crates/rattler_lock/src/utils/serde/url_or_path.rs @@ -9,7 +9,7 @@ use crate::UrlOrPath; use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use std::{borrow::Cow, path::PathBuf}; +use std::borrow::Cow; use url::Url; #[derive(Serialize, Deserialize)] @@ -17,7 +17,7 @@ struct RawUrlOrPath<'a> { #[serde(skip_serializing_if = "Option::is_none")] url: Option>, #[serde(skip_serializing_if = "Option::is_none")] - path: Option>, + path: Option>, } pub fn serialize(value: &UrlOrPath, serializer: S) -> Result @@ -31,7 +31,7 @@ where }, UrlOrPath::Path(path) => RawUrlOrPath { url: None, - path: Some(Cow::Borrowed(path)), + path: Some(Cow::Borrowed(path.as_str())), }, }; @@ -45,7 +45,7 @@ where let raw = RawUrlOrPath::<'de>::deserialize(deserializer)?; match (raw.url, raw.path) { (Some(url), None) => Ok(UrlOrPath::Url(url.into_owned())), - (None, Some(path)) => Ok(UrlOrPath::Path(path.into_owned())), + (None, Some(path)) => Ok(UrlOrPath::Path(path.into_owned().into())), _ => Err(serde::de::Error::custom("expected either a url or a path")), } } diff --git a/py-rattler/Cargo.lock b/py-rattler/Cargo.lock index bfa564535..058ac1194 100644 --- a/py-rattler/Cargo.lock +++ b/py-rattler/Cargo.lock @@ -159,6 +159,21 @@ dependencies = [ "slab", ] +[[package]] +name = "async-fd-lock" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7569377d7062165f6f7834d9cb3051974a2d141433cc201c2f94c149e993cccf" +dependencies = [ + "async-trait", + "cfg-if", + "pin-project", + "rustix", + "thiserror", + "tokio", + "windows-sys 0.52.0", +] + [[package]] name = "async-fs" version = "2.1.2" @@ -832,7 +847,7 @@ checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" [[package]] name = "file_url" -version = "0.1.4" +version = "0.1.5" dependencies = [ "itertools 0.13.0", "percent-encoding", @@ -924,6 +939,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "88a41f105fe1d5b6b34b2055e3dc59bb79b46b48b2040b9e6c7b4b5de097aa41" dependencies = [ "autocfg", + "tokio", +] + +[[package]] +name = "fs4" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8c6b3bd49c37d2aa3f3f2220233b29a7cd23f79d1fe70e5337d25fb390793de" +dependencies = [ + "fs-err", + "rustix", + "windows-sys 0.52.0", ] [[package]] @@ -1799,7 +1826,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] @@ -2619,7 +2646,7 @@ dependencies = [ [[package]] name = "rattler" -version = "0.27.6" +version = "0.27.11" dependencies = [ "anyhow", "console", @@ -2657,11 +2684,14 @@ dependencies = [ [[package]] name = "rattler_cache" -version = "0.1.8" +version = "0.2.3" dependencies = [ "anyhow", + "dashmap", "digest", "dirs", + "fs4", + "futures", "fxhash", "itertools 0.13.0", "parking_lot", @@ -2671,6 +2701,7 @@ dependencies = [ "rattler_package_streaming", "reqwest 0.12.4", "reqwest-middleware", + "simple_spawn_blocking", "thiserror", "tokio", "tracing", @@ -2679,7 +2710,7 @@ dependencies = [ [[package]] name = "rattler_conda_types" -version = "0.27.2" +version = "0.27.6" dependencies = [ "chrono", "dirs", @@ -2713,7 +2744,7 @@ dependencies = [ [[package]] name = "rattler_digest" -version = "1.0.1" +version = "1.0.2" dependencies = [ "blake2", "digest", @@ -2728,7 +2759,7 @@ dependencies = [ [[package]] name = "rattler_index" -version = "0.19.24" +version = "0.19.28" dependencies = [ "fs-err", "rattler_conda_types", @@ -2741,7 +2772,7 @@ dependencies = [ [[package]] name = "rattler_lock" -version = "0.22.20" +version = "0.22.24" dependencies = [ "chrono", "file_url", @@ -2757,12 +2788,13 @@ dependencies = [ "serde_with", "serde_yaml", "thiserror", + "typed-path", "url", ] [[package]] name = "rattler_macros" -version = "1.0.1" +version = "1.0.2" dependencies = [ "quote", "syn 2.0.66", @@ -2770,7 +2802,7 @@ dependencies = [ [[package]] name = "rattler_networking" -version = "0.21.2" +version = "0.21.4" dependencies = [ "anyhow", "async-trait", @@ -2796,7 +2828,7 @@ dependencies = [ [[package]] name = "rattler_package_streaming" -version = "0.22.3" +version = "0.22.7" dependencies = [ "bzip2", "chrono", @@ -2822,7 +2854,7 @@ dependencies = [ [[package]] name = "rattler_redaction" -version = "0.1.1" +version = "0.1.2" dependencies = [ "reqwest 0.12.4", "reqwest-middleware", @@ -2831,10 +2863,11 @@ dependencies = [ [[package]] name = "rattler_repodata_gateway" -version = "0.21.8" +version = "0.21.13" dependencies = [ "anyhow", "async-compression", + "async-fd-lock", "async-trait", "blake2", "bytes", @@ -2882,7 +2915,7 @@ dependencies = [ [[package]] name = "rattler_shell" -version = "0.21.6" +version = "0.22.1" dependencies = [ "enum_dispatch", "indexmap 2.2.6", @@ -2897,7 +2930,7 @@ dependencies = [ [[package]] name = "rattler_solve" -version = "1.0.3" +version = "1.0.7" dependencies = [ "chrono", "futures", @@ -2913,7 +2946,7 @@ dependencies = [ [[package]] name = "rattler_virtual_packages" -version = "1.0.4" +version = "1.1.4" dependencies = [ "archspec", "libloading", @@ -3125,9 +3158,9 @@ dependencies = [ [[package]] name = "resolvo" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "014783b06e2d02bee01fe3c3247454fb34d0fc35765334e825034cdadec422fa" +checksum = "1a472ebbac5a18c9e235e874df3aa0c8fb0b55611155dd5e5515a55a16520d76" dependencies = [ "ahash", "bitvec",