Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: use typed path in lock file #858

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 26 additions & 10 deletions crates/file_url/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ use std::path::PathBuf;
use std::str::FromStr;
use thiserror::Error;
use typed_path::{
Utf8TypedComponent, Utf8TypedPath, Utf8UnixComponent, Utf8WindowsComponent, Utf8WindowsPrefix,
Utf8TypedComponent, Utf8TypedPath, Utf8TypedPathBuf, Utf8UnixComponent, Utf8WindowsComponent,
Utf8WindowsPrefix,
};
use url::{Host, Url};

Expand All @@ -33,14 +34,7 @@ fn is_windows_drive_letter_segment(segment: &str) -> Option<String> {
None
}

/// Tries to convert a `file://` based URL to a path.
///
/// We assume that any passed URL that represents a path is an absolute path.
///
/// [`Url::to_file_path`] has a different code path for Windows and other operating systems, this
/// can cause URLs to parse perfectly fine on Windows, but fail to parse on Linux. This function
/// tries to parse the URL as a path on all operating systems.
pub fn url_to_path(url: &Url) -> Option<PathBuf> {
fn url_to_path_inner<T: From<String>>(url: &Url) -> Option<T> {
if url.scheme() != "file" {
return None;
}
Expand Down Expand Up @@ -77,7 +71,29 @@ pub fn url_to_path(url: &Url) -> Option<PathBuf> {
}
}

Some(PathBuf::from(path))
Some(T::from(path))
}

/// Tries to convert a `file://` based URL to a path.
///
/// We assume that any passed URL that represents a path is an absolute path.
///
/// [`Url::to_file_path`] has a different code path for Windows and other operating systems, this
/// can cause URLs to parse perfectly fine on Windows, but fail to parse on Linux. This function
/// tries to parse the URL as a path on all operating systems.
pub fn url_to_path(url: &Url) -> Option<PathBuf> {
url_to_path_inner(url)
}

/// Tries to convert a `file://` based URL to a path.
///
/// We assume that any passed URL that represents a path is an absolute path.
///
/// [`Url::to_file_path`] has a different code path for Windows and other operating systems, this
/// can cause URLs to parse perfectly fine on Windows, but fail to parse on Linux. This function
/// tries to parse the URL as a path on all operating systems.
pub fn url_to_typed_path(url: &Url) -> Option<Utf8TypedPathBuf> {
url_to_path_inner(url)
}

const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
Expand Down
1 change: 1 addition & 0 deletions crates/rattler_lock/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ serde_with = { workspace = true, features = ["indexmap_2"] }
serde_repr = { workspace = true }
thiserror = { workspace = true }
url = { workspace = true, features = ["serde"] }
typed-path = { workspace = true }

[dev-dependencies]
insta = { workspace = true, features = ["yaml"] }
Expand Down
16 changes: 9 additions & 7 deletions crates/rattler_lock/src/conda.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
use std::cmp::Ordering;

use rattler_conda_types::{PackageRecord, RepoDataRecord};
use serde::{Deserialize, Serialize};
use serde_with::{serde_as, skip_serializing_none};
use std::cmp::Ordering;
use url::Url;

/// A locked conda dependency is just a [`PackageRecord`] with some additional information on where
/// it came from. It is very similar to a [`RepoDataRecord`], but it does not explicitly contain the
/// channel name.
/// A locked conda dependency is just a [`PackageRecord`] with some additional
/// information on where it came from. It is very similar to a
/// [`RepoDataRecord`], but it does not explicitly contain the channel name.
#[serde_as]
#[skip_serializing_none]
#[derive(Serialize, Deserialize, Eq, PartialEq, Clone, Debug, Hash)]
Expand All @@ -18,7 +19,8 @@ pub struct CondaPackageData {
/// The location of the package.
pub url: Url,

/// The filename of the package if the last segment of the url does not refer to the filename.
/// The filename of the package if the last segment of the url does not
/// refer to the filename.
pub(crate) file_name: Option<String>,

/// The channel of the package if this cannot be derived from the url.
Expand Down Expand Up @@ -137,8 +139,8 @@ fn file_name_from_url(url: &Url) -> Option<&str> {
fn channel_from_url(url: &Url) -> Option<Url> {
let mut result = url.clone();

// Strip the last two path segments. We assume the first one contains the file_name, and the
// other the subdirectory.
// Strip the last two path segments. We assume the first one contains the
// file_name, and the other the subdirectory.
result.path_segments_mut().ok()?.pop().pop();

Some(result)
Expand Down
2 changes: 1 addition & 1 deletion crates/rattler_lock/src/parse/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ impl<'a> Ord for SerializablePackageSelector<'a> {
(UrlOrPath::Url(a), UrlOrPath::Url(b)) => compare_url_by_filename(a, b),
(UrlOrPath::Url(_), UrlOrPath::Path(_)) => Ordering::Less,
(UrlOrPath::Path(_), UrlOrPath::Url(_)) => Ordering::Greater,
(UrlOrPath::Path(a), UrlOrPath::Path(b)) => a.cmp(b),
(UrlOrPath::Path(a), UrlOrPath::Path(b)) => a.as_str().cmp(b.as_str()),
},
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
source: crates/rattler_lock/src/url_or_path.rs
expression: sorted_entries
---
../_libgcc_mutex-0.1-conda_forge.tar.bz2
..\_libgcc_mutex-0.1-conda_forge.tar.bz2
/packages/_libgcc_mutex-0.1-conda_forge.conda
/packages/_libgcc_mutex-0.1-conda_forge.tar.bz2
/packages/_libgcc_mutex-0.1-conda_forge.tar.bz2
C:\packages\_libgcc_mutex-0.1-conda_forge.tar.bz2
https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.conda
https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2
138 changes: 101 additions & 37 deletions crates/rattler_lock/src/url_or_path.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
use file_url::url_to_path;
use itertools::Itertools;
use serde_with::{DeserializeFromStr, SerializeDisplay};
use std::borrow::Cow;
use std::cmp::Ordering;
use std::hash::Hash;
use std::path::Path;
use std::{
borrow::Cow,
cmp::Ordering,
fmt::{Display, Formatter},
path::PathBuf,
hash::Hash,
str::FromStr,
};

use file_url::url_to_typed_path;
use itertools::Itertools;
use serde_with::{DeserializeFromStr, SerializeDisplay};
use thiserror::Error;
use typed_path::{Utf8TypedPath, Utf8TypedPathBuf};
use url::Url;

/// Represents either a URL or a path.
///
/// URLs have stricter requirements on their format, they must be absolute and they with the
/// [`url`] we can only create urls for absolute file paths for the current os.
/// URLs have stricter requirements on their format, they must be absolute and
/// they with the [`url`] we can only create urls for absolute file paths for
/// the current os.
///
/// This also looks better when looking at the lockfile.
#[derive(Debug, Clone, Eq, SerializeDisplay, DeserializeFromStr)]
Expand All @@ -25,7 +26,7 @@ pub enum UrlOrPath {
Url(Url),

/// A local (or networked) path.
Path(PathBuf),
Path(Utf8TypedPathBuf),
}

impl PartialOrd<Self> for UrlOrPath {
Expand All @@ -38,7 +39,9 @@ impl Ord for UrlOrPath {
fn cmp(&self, other: &Self) -> Ordering {
match (self, other) {
(UrlOrPath::Url(self_url), UrlOrPath::Url(other_url)) => self_url.cmp(other_url),
(UrlOrPath::Path(self_path), UrlOrPath::Path(other_path)) => self_path.cmp(other_path),
(UrlOrPath::Path(self_path), UrlOrPath::Path(other_path)) => {
self_path.as_str().cmp(other_path.as_str())
}
(UrlOrPath::Url(_), UrlOrPath::Path(_)) => Ordering::Greater,
(UrlOrPath::Path(_), UrlOrPath::Url(_)) => Ordering::Less,
}
Expand All @@ -47,7 +50,7 @@ impl Ord for UrlOrPath {

impl PartialEq for UrlOrPath {
fn eq(&self, other: &Self) -> bool {
match (self.canonicalize().as_ref(), other.canonicalize().as_ref()) {
match (self.normalize().as_ref(), other.normalize().as_ref()) {
(UrlOrPath::Path(a), UrlOrPath::Path(b)) => a == b,
(UrlOrPath::Url(a), UrlOrPath::Url(b)) => a == b,
_ => false,
Expand All @@ -57,29 +60,29 @@ impl PartialEq for UrlOrPath {

impl Hash for UrlOrPath {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
match self.canonicalize().as_ref() {
match self.normalize().as_ref() {
UrlOrPath::Url(url) => url.hash(state),
UrlOrPath::Path(path) => path.hash(state),
UrlOrPath::Path(path) => path.as_str().hash(state),
}
}
}

impl From<PathBuf> for UrlOrPath {
fn from(value: PathBuf) -> Self {
impl From<Utf8TypedPathBuf> for UrlOrPath {
fn from(value: Utf8TypedPathBuf) -> Self {
UrlOrPath::Path(value)
}
}

impl From<&Path> for UrlOrPath {
fn from(value: &Path) -> Self {
impl<'a> From<Utf8TypedPath<'a>> for UrlOrPath {
fn from(value: Utf8TypedPath<'a>) -> Self {
UrlOrPath::Path(value.to_path_buf())
}
}

impl From<Url> for UrlOrPath {
fn from(value: Url) -> Self {
// Try to normalize the URL to a path if possible.
if let Some(path) = url_to_path(&value) {
if let Some(path) = url_to_typed_path(&value) {
UrlOrPath::Path(path)
} else {
UrlOrPath::Url(value)
Expand All @@ -90,7 +93,7 @@ impl From<Url> for UrlOrPath {
impl Display for UrlOrPath {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
UrlOrPath::Path(path) => write!(f, "{}", path.display()),
UrlOrPath::Path(path) => write!(f, "{path}"),
UrlOrPath::Url(url) => write!(f, "{url}"),
}
}
Expand All @@ -106,22 +109,37 @@ impl UrlOrPath {
}

/// Returns the path if this is a path.
pub fn as_path(&self) -> Option<&Path> {
pub fn as_path(&self) -> Option<Utf8TypedPath<'_>> {
match self {
UrlOrPath::Path(path) => Some(path),
UrlOrPath::Path(path) => Some(path.to_path()),
UrlOrPath::Url(_) => None,
}
}

/// Canonicalizes the instance to be a path if possible.
/// Normalizes the instance to be a path if possible and resolving `..` and
/// `.` segments.
///
/// If this instance is a URL with a `file://` scheme, this will try to convert it to a path.
pub fn canonicalize(&self) -> Cow<'_, Self> {
if let Some(path) = self.as_url().and_then(url_to_path) {
return Cow::Owned(UrlOrPath::Path(path));
pub fn normalize(&self) -> Cow<'_, Self> {
match self {
UrlOrPath::Url(url) => {
if let Some(path) = url_to_typed_path(url) {
return Cow::Owned(UrlOrPath::Path(path.normalize()));
}
Cow::Borrowed(self)
}
UrlOrPath::Path(path) => Cow::Owned(UrlOrPath::Path(path.normalize())),
}
}

Cow::Borrowed(self)
/// Returns the file name of the path or url. If the path or url ends in a
/// directory seperator `None` is returned.
pub fn file_name(&self) -> Option<&str> {
match self {
UrlOrPath::Path(path) if !path.as_str().ends_with(['/', '\\']) => path.file_name(),
UrlOrPath::Url(url) if !url.as_str().ends_with('/') => url.path_segments()?.last(),
_ => None,
}
}
}

Expand All @@ -143,23 +161,46 @@ impl FromStr for UrlOrPath {
}

// First try to parse the string as a path.
return match Url::from_str(s) {
match Url::from_str(s) {
Ok(url) => Ok(if scheme_is_drive_letter(url.scheme()) {
UrlOrPath::Path(PathBuf::from(s))
UrlOrPath::Path(s.into())
} else {
UrlOrPath::Url(url).canonicalize().into_owned()
UrlOrPath::Url(url).normalize().into_owned()
}),
Err(url::ParseError::RelativeUrlWithoutBase) => Ok(UrlOrPath::Path(PathBuf::from(s))),
Err(url::ParseError::RelativeUrlWithoutBase) => Ok(UrlOrPath::Path(s.into())),
Err(e) => Err(PathOrUrlError::InvalidUrl(e)),
};
}
}
}

#[cfg(test)]
mod test {
use super::*;
use std::str::FromStr;

use rstest::*;

use super::*;

#[rstest]
#[case(
"https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2",
Some("_libgcc_mutex-0.1-conda_forge.tar.bz2")
)]
#[case(
"C:\\packages\\_libgcc_mutex-0.1-conda_forge.tar.bz2",
Some("_libgcc_mutex-0.1-conda_forge.tar.bz2")
)]
#[case(
"/packages/_libgcc_mutex-0.1-conda_forge.tar.bz2",
Some("_libgcc_mutex-0.1-conda_forge.tar.bz2")
)]
#[case("https://conda.anaconda.org/conda-forge/linux-64/", None)]
#[case("C:\\packages\\", None)]
#[case("/packages/", None)]
fn test_file_name(#[case] case: UrlOrPath, #[case] expected_filename: Option<&str>) {
assert_eq!(case.file_name(), expected_filename);
}

#[test]
fn test_equality() {
let tests = [
Expand All @@ -169,7 +210,7 @@ mod test {

// Absolute paths as file and direct path
(UrlOrPath::Url("file:///home/bob/test-file.txt".parse().unwrap()),
UrlOrPath::Path("/home/bob/test-file.txt".parse().unwrap())),
UrlOrPath::Path("/home/bob/test-file.txt".into())),
];

for (a, b) in &tests {
Expand Down Expand Up @@ -228,11 +269,34 @@ mod test {
"\\\\127.0.0.1\\c$\\temp\\test-file.txt",
];

for path in &paths {
for path in paths {
assert_eq!(
UrlOrPath::from_str(path).unwrap(),
UrlOrPath::Path(path.parse().unwrap())
UrlOrPath::Path(path.into())
);
}
}

#[test]
fn test_order() {
let entries = [
"https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2",
"https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.conda",
"file:///packages/_libgcc_mutex-0.1-conda_forge.tar.bz2",
"file:///packages/_libgcc_mutex-0.1-conda_forge.conda",
"C:\\packages\\_libgcc_mutex-0.1-conda_forge.tar.bz2",
"/packages/_libgcc_mutex-0.1-conda_forge.tar.bz2",
"../_libgcc_mutex-0.1-conda_forge.tar.bz2",
"..\\_libgcc_mutex-0.1-conda_forge.tar.bz2",
];

let sorted_entries = entries
.iter()
.map(|p| UrlOrPath::from_str(p).unwrap())
.sorted()
.map(|p| p.to_string())
.format("\n")
.to_string();
insta::assert_snapshot!(sorted_entries);
}
}
Loading
Loading