Skip to content

Commit

Permalink
refactor: use typed path in lock file
Browse files Browse the repository at this point in the history
  • Loading branch information
baszalmstra committed Sep 10, 2024
1 parent 4885895 commit bad27d0
Show file tree
Hide file tree
Showing 8 changed files with 205 additions and 77 deletions.
36 changes: 26 additions & 10 deletions crates/file_url/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ use std::path::PathBuf;
use std::str::FromStr;
use thiserror::Error;
use typed_path::{
Utf8TypedComponent, Utf8TypedPath, Utf8UnixComponent, Utf8WindowsComponent, Utf8WindowsPrefix,
Utf8TypedComponent, Utf8TypedPath, Utf8TypedPathBuf, Utf8UnixComponent, Utf8WindowsComponent,
Utf8WindowsPrefix,
};
use url::{Host, Url};

Expand All @@ -33,14 +34,7 @@ fn is_windows_drive_letter_segment(segment: &str) -> Option<String> {
None
}

/// Tries to convert a `file://` based URL to a path.
///
/// We assume that any passed URL that represents a path is an absolute path.
///
/// [`Url::to_file_path`] has a different code path for Windows and other operating systems, this
/// can cause URLs to parse perfectly fine on Windows, but fail to parse on Linux. This function
/// tries to parse the URL as a path on all operating systems.
pub fn url_to_path(url: &Url) -> Option<PathBuf> {
fn url_to_path_inner<T: From<String>>(url: &Url) -> Option<T> {
if url.scheme() != "file" {
return None;
}
Expand Down Expand Up @@ -77,7 +71,29 @@ pub fn url_to_path(url: &Url) -> Option<PathBuf> {
}
}

Some(PathBuf::from(path))
Some(T::from(path))
}

/// Tries to convert a `file://` based URL to a path.
///
/// We assume that any passed URL that represents a path is an absolute path.
///
/// [`Url::to_file_path`] has a different code path for Windows and other operating systems, this
/// can cause URLs to parse perfectly fine on Windows, but fail to parse on Linux. This function
/// tries to parse the URL as a path on all operating systems.
pub fn url_to_path(url: &Url) -> Option<PathBuf> {
url_to_path_inner(url)
}

/// Tries to convert a `file://` based URL to a path.
///
/// We assume that any passed URL that represents a path is an absolute path.
///
/// [`Url::to_file_path`] has a different code path for Windows and other operating systems, this
/// can cause URLs to parse perfectly fine on Windows, but fail to parse on Linux. This function
/// tries to parse the URL as a path on all operating systems.
pub fn url_to_typed_path(url: &Url) -> Option<Utf8TypedPathBuf> {
url_to_path_inner(url)
}

const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
Expand Down
1 change: 1 addition & 0 deletions crates/rattler_lock/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ serde_with = { workspace = true, features = ["indexmap_2"] }
serde_repr = { workspace = true }
thiserror = { workspace = true }
url = { workspace = true, features = ["serde"] }
typed-path = { workspace = true }

[dev-dependencies]
insta = { workspace = true, features = ["yaml"] }
Expand Down
16 changes: 9 additions & 7 deletions crates/rattler_lock/src/conda.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
use std::cmp::Ordering;

use rattler_conda_types::{PackageRecord, RepoDataRecord};
use serde::{Deserialize, Serialize};
use serde_with::{serde_as, skip_serializing_none};
use std::cmp::Ordering;
use url::Url;

/// A locked conda dependency is just a [`PackageRecord`] with some additional information on where
/// it came from. It is very similar to a [`RepoDataRecord`], but it does not explicitly contain the
/// channel name.
/// A locked conda dependency is just a [`PackageRecord`] with some additional
/// information on where it came from. It is very similar to a
/// [`RepoDataRecord`], but it does not explicitly contain the channel name.
#[serde_as]
#[skip_serializing_none]
#[derive(Serialize, Deserialize, Eq, PartialEq, Clone, Debug, Hash)]
Expand All @@ -18,7 +19,8 @@ pub struct CondaPackageData {
/// The location of the package.
pub url: Url,

/// The filename of the package if the last segment of the url does not refer to the filename.
/// The filename of the package if the last segment of the url does not
/// refer to the filename.
pub(crate) file_name: Option<String>,

/// The channel of the package if this cannot be derived from the url.
Expand Down Expand Up @@ -137,8 +139,8 @@ fn file_name_from_url(url: &Url) -> Option<&str> {
fn channel_from_url(url: &Url) -> Option<Url> {
let mut result = url.clone();

// Strip the last two path segments. We assume the first one contains the file_name, and the
// other the subdirectory.
// Strip the last two path segments. We assume the first one contains the
// file_name, and the other the subdirectory.
result.path_segments_mut().ok()?.pop().pop();

Some(result)
Expand Down
2 changes: 1 addition & 1 deletion crates/rattler_lock/src/parse/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ impl<'a> Ord for SerializablePackageSelector<'a> {
(UrlOrPath::Url(a), UrlOrPath::Url(b)) => compare_url_by_filename(a, b),
(UrlOrPath::Url(_), UrlOrPath::Path(_)) => Ordering::Less,
(UrlOrPath::Path(_), UrlOrPath::Url(_)) => Ordering::Greater,
(UrlOrPath::Path(a), UrlOrPath::Path(b)) => a.cmp(b),
(UrlOrPath::Path(a), UrlOrPath::Path(b)) => a.as_str().cmp(b.as_str()),
},
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
source: crates/rattler_lock/src/url_or_path.rs
expression: sorted_entries
---
../_libgcc_mutex-0.1-conda_forge.tar.bz2
..\_libgcc_mutex-0.1-conda_forge.tar.bz2
/packages/_libgcc_mutex-0.1-conda_forge.conda
/packages/_libgcc_mutex-0.1-conda_forge.tar.bz2
/packages/_libgcc_mutex-0.1-conda_forge.tar.bz2
C:\packages\_libgcc_mutex-0.1-conda_forge.tar.bz2
https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.conda
https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2
138 changes: 101 additions & 37 deletions crates/rattler_lock/src/url_or_path.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
use file_url::url_to_path;
use itertools::Itertools;
use serde_with::{DeserializeFromStr, SerializeDisplay};
use std::borrow::Cow;
use std::cmp::Ordering;
use std::hash::Hash;
use std::path::Path;
use std::{
borrow::Cow,
cmp::Ordering,
fmt::{Display, Formatter},
path::PathBuf,
hash::Hash,
str::FromStr,
};

use file_url::url_to_typed_path;
use itertools::Itertools;
use serde_with::{DeserializeFromStr, SerializeDisplay};
use thiserror::Error;
use typed_path::{Utf8TypedPath, Utf8TypedPathBuf};
use url::Url;

/// Represents either a URL or a path.
///
/// URLs have stricter requirements on their format, they must be absolute and they with the
/// [`url`] we can only create urls for absolute file paths for the current os.
/// URLs have stricter requirements on their format, they must be absolute and
/// they with the [`url`] we can only create urls for absolute file paths for
/// the current os.
///
/// This also looks better when looking at the lockfile.
#[derive(Debug, Clone, Eq, SerializeDisplay, DeserializeFromStr)]
Expand All @@ -25,7 +26,7 @@ pub enum UrlOrPath {
Url(Url),

/// A local (or networked) path.
Path(PathBuf),
Path(Utf8TypedPathBuf),
}

impl PartialOrd<Self> for UrlOrPath {
Expand All @@ -38,7 +39,9 @@ impl Ord for UrlOrPath {
fn cmp(&self, other: &Self) -> Ordering {
match (self, other) {
(UrlOrPath::Url(self_url), UrlOrPath::Url(other_url)) => self_url.cmp(other_url),
(UrlOrPath::Path(self_path), UrlOrPath::Path(other_path)) => self_path.cmp(other_path),
(UrlOrPath::Path(self_path), UrlOrPath::Path(other_path)) => {
self_path.as_str().cmp(other_path.as_str())
}
(UrlOrPath::Url(_), UrlOrPath::Path(_)) => Ordering::Greater,
(UrlOrPath::Path(_), UrlOrPath::Url(_)) => Ordering::Less,
}
Expand All @@ -47,7 +50,7 @@ impl Ord for UrlOrPath {

impl PartialEq for UrlOrPath {
fn eq(&self, other: &Self) -> bool {
match (self.canonicalize().as_ref(), other.canonicalize().as_ref()) {
match (self.normalize().as_ref(), other.normalize().as_ref()) {
(UrlOrPath::Path(a), UrlOrPath::Path(b)) => a == b,
(UrlOrPath::Url(a), UrlOrPath::Url(b)) => a == b,
_ => false,
Expand All @@ -57,29 +60,29 @@ impl PartialEq for UrlOrPath {

impl Hash for UrlOrPath {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
match self.canonicalize().as_ref() {
match self.normalize().as_ref() {
UrlOrPath::Url(url) => url.hash(state),
UrlOrPath::Path(path) => path.hash(state),
UrlOrPath::Path(path) => path.as_str().hash(state),
}
}
}

impl From<PathBuf> for UrlOrPath {
fn from(value: PathBuf) -> Self {
impl From<Utf8TypedPathBuf> for UrlOrPath {
fn from(value: Utf8TypedPathBuf) -> Self {
UrlOrPath::Path(value)
}
}

impl From<&Path> for UrlOrPath {
fn from(value: &Path) -> Self {
impl<'a> From<Utf8TypedPath<'a>> for UrlOrPath {
fn from(value: Utf8TypedPath<'a>) -> Self {
UrlOrPath::Path(value.to_path_buf())
}
}

impl From<Url> for UrlOrPath {
fn from(value: Url) -> Self {
// Try to normalize the URL to a path if possible.
if let Some(path) = url_to_path(&value) {
if let Some(path) = url_to_typed_path(&value) {
UrlOrPath::Path(path)
} else {
UrlOrPath::Url(value)
Expand All @@ -90,7 +93,7 @@ impl From<Url> for UrlOrPath {
impl Display for UrlOrPath {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
UrlOrPath::Path(path) => write!(f, "{}", path.display()),
UrlOrPath::Path(path) => write!(f, "{path}"),
UrlOrPath::Url(url) => write!(f, "{url}"),
}
}
Expand All @@ -106,22 +109,37 @@ impl UrlOrPath {
}

/// Returns the path if this is a path.
pub fn as_path(&self) -> Option<&Path> {
pub fn as_path(&self) -> Option<Utf8TypedPath<'_>> {
match self {
UrlOrPath::Path(path) => Some(path),
UrlOrPath::Path(path) => Some(path.to_path()),
UrlOrPath::Url(_) => None,
}
}

/// Canonicalizes the instance to be a path if possible.
/// Normalizes the instance to be a path if possible and resolving `..` and
/// `.` segments.
///
/// If this instance is a URL with a `file://` scheme, this will try to convert it to a path.
pub fn canonicalize(&self) -> Cow<'_, Self> {
if let Some(path) = self.as_url().and_then(url_to_path) {
return Cow::Owned(UrlOrPath::Path(path));
pub fn normalize(&self) -> Cow<'_, Self> {
match self {
UrlOrPath::Url(url) => {
if let Some(path) = url_to_typed_path(url) {
return Cow::Owned(UrlOrPath::Path(path.normalize()));
}
Cow::Borrowed(self)
}
UrlOrPath::Path(path) => Cow::Owned(UrlOrPath::Path(path.normalize())),
}
}

Cow::Borrowed(self)
/// Returns the file name of the path or url. If the path or url ends in a
/// directory seperator `None` is returned.
pub fn file_name(&self) -> Option<&str> {
match self {
UrlOrPath::Path(path) if !path.as_str().ends_with(['/', '\\']) => path.file_name(),
UrlOrPath::Url(url) if !url.as_str().ends_with('/') => url.path_segments()?.last(),
_ => None,
}
}
}

Expand All @@ -143,23 +161,46 @@ impl FromStr for UrlOrPath {
}

// First try to parse the string as a path.
return match Url::from_str(s) {
match Url::from_str(s) {
Ok(url) => Ok(if scheme_is_drive_letter(url.scheme()) {
UrlOrPath::Path(PathBuf::from(s))
UrlOrPath::Path(s.into())
} else {
UrlOrPath::Url(url).canonicalize().into_owned()
UrlOrPath::Url(url).normalize().into_owned()
}),
Err(url::ParseError::RelativeUrlWithoutBase) => Ok(UrlOrPath::Path(PathBuf::from(s))),
Err(url::ParseError::RelativeUrlWithoutBase) => Ok(UrlOrPath::Path(s.into())),
Err(e) => Err(PathOrUrlError::InvalidUrl(e)),
};
}
}
}

#[cfg(test)]
mod test {
use super::*;
use std::str::FromStr;

use rstest::*;

use super::*;

#[rstest]
#[case(
"https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2",
Some("_libgcc_mutex-0.1-conda_forge.tar.bz2")
)]
#[case(
"C:\\packages\\_libgcc_mutex-0.1-conda_forge.tar.bz2",
Some("_libgcc_mutex-0.1-conda_forge.tar.bz2")
)]
#[case(
"/packages/_libgcc_mutex-0.1-conda_forge.tar.bz2",
Some("_libgcc_mutex-0.1-conda_forge.tar.bz2")
)]
#[case("https://conda.anaconda.org/conda-forge/linux-64/", None)]
#[case("C:\\packages\\", None)]
#[case("/packages/", None)]
fn test_file_name(#[case] case: UrlOrPath, #[case] expected_filename: Option<&str>) {
assert_eq!(case.file_name(), expected_filename);
}

#[test]
fn test_equality() {
let tests = [
Expand All @@ -169,7 +210,7 @@ mod test {

// Absolute paths as file and direct path
(UrlOrPath::Url("file:///home/bob/test-file.txt".parse().unwrap()),
UrlOrPath::Path("/home/bob/test-file.txt".parse().unwrap())),
UrlOrPath::Path("/home/bob/test-file.txt".into())),
];

for (a, b) in &tests {
Expand Down Expand Up @@ -228,11 +269,34 @@ mod test {
"\\\\127.0.0.1\\c$\\temp\\test-file.txt",
];

for path in &paths {
for path in paths {
assert_eq!(
UrlOrPath::from_str(path).unwrap(),
UrlOrPath::Path(path.parse().unwrap())
UrlOrPath::Path(path.into())
);
}
}

#[test]
fn test_order() {
let entries = [
"https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2",
"https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.conda",
"file:///packages/_libgcc_mutex-0.1-conda_forge.tar.bz2",
"file:///packages/_libgcc_mutex-0.1-conda_forge.conda",
"C:\\packages\\_libgcc_mutex-0.1-conda_forge.tar.bz2",
"/packages/_libgcc_mutex-0.1-conda_forge.tar.bz2",
"../_libgcc_mutex-0.1-conda_forge.tar.bz2",
"..\\_libgcc_mutex-0.1-conda_forge.tar.bz2",
];

let sorted_entries = entries
.iter()
.map(|p| UrlOrPath::from_str(p).unwrap())
.sorted()
.map(|p| p.to_string())
.format("\n")
.to_string();
insta::assert_snapshot!(sorted_entries);
}
}
Loading

0 comments on commit bad27d0

Please sign in to comment.