Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Allow for long symlinks by using append_link #6838

Merged
merged 4 commits into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 29 additions & 44 deletions crates/turborepo-cache/src/cache_archive/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use std::{
};

use tar::{EntryType, Header};
use turbopath::{AbsoluteSystemPath, AnchoredSystemPath};
use turbopath::{AbsoluteSystemPath, AnchoredSystemPath, IntoUnix};

use crate::CacheError;

Expand All @@ -26,6 +26,15 @@ impl<'a> CacheWriter<'a> {
Ok(self.builder.append_data(header, path, body)?)
}

fn append_link(
&mut self,
header: &mut Header,
path: impl AsRef<Path>,
target: impl AsRef<Path>,
) -> Result<(), CacheError> {
Ok(self.builder.append_link(header, path, target)?)
}

pub fn finish(mut self) -> Result<(), CacheError> {
Ok(self.builder.finish()?)
}
Expand Down Expand Up @@ -86,22 +95,24 @@ impl<'a> CacheWriter<'a> {
let mut file_path = file_path.to_unix();
file_path.make_canonical_for_tar(file_info.is_dir());

let mut header = Self::create_header(&source_path, &file_info)?;
let mut header = Self::create_header(&file_info)?;

if matches!(header.entry_type(), EntryType::Regular) && file_info.len() > 0 {
let file = source_path.open()?;
self.append_data(&mut header, file_path.as_str(), file)?;
} else if matches!(header.entry_type(), EntryType::Symlink) {
// We convert to a Unix path because all paths in tar should be
// Unix-style. This will get restored to a system path.
let target = source_path.read_link()?.into_unix();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a note about the switch to a unix-style path? Is this going to be converted back on Windows during restore?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I was kinda surprised that this hadn't been causing more issues. I guess we don't validate symlink targets to be Windows safe.

self.append_link(&mut header, file_path.as_str(), &target)?;
} else {
self.append_data(&mut header, file_path.as_str(), &mut std::io::empty())?;
}

Ok(())
}

fn create_header(
source_path: &AbsoluteSystemPath,
file_info: &fs::Metadata,
) -> Result<Header, CacheError> {
fn create_header(file_info: &fs::Metadata) -> Result<Header, CacheError> {
let mut header = Header::new_gnu();

let mode: u32;
Expand All @@ -118,11 +129,11 @@ impl<'a> CacheWriter<'a> {
}
header.set_mode(mode);

// Do we need to populate the additional linkname field in Header?
if file_info.is_symlink() {
let link = source_path.read_link()?;
header.set_link_name(link)?;
// We do *not* set the linkname here because it could be too long
// Instead we set it when we add the file to the archive
header.set_entry_type(EntryType::Symlink);
header.set_size(0);
} else if file_info.is_dir() {
header.set_size(0);
header.set_entry_type(EntryType::Directory);
Expand All @@ -148,7 +159,7 @@ impl<'a> CacheWriter<'a> {

#[cfg(test)]
mod tests {
use std::path::PathBuf;
use std::{collections::HashSet, path::PathBuf};

use anyhow::Result;
use tempfile::tempdir;
Expand Down Expand Up @@ -243,9 +254,6 @@ mod tests {
file_type: FileType::File,
}
],
"bf0b4bf722f8d845dce7627606ab8af30bb6454d7c0379219e4c036a484960fe78e3d98e29ca0bac9b69b858d446b89d2d691c524e2884389032be799b6699f6",
"bf0b4bf722f8d845dce7627606ab8af30bb6454d7c0379219e4c036a484960fe78e3d98e29ca0bac9b69b858d446b89d2d691c524e2884389032be799b6699f6",
"4f1357753cceec5df1c8a36110ce256f3e8c5c1f62cab3283013b6266d6e97b3884711ccdd45462a4607bee7ac7a8e414d0acea4672a9f0306bcf364281edc2f",
None
; "create regular file"
)]
Expand All @@ -272,9 +280,6 @@ mod tests {
file_type: FileType::File,
}
],
"2e6febdd2e8180f91f481ae58510e4afd3f071e66b7b64d82616ebb2d2d560b9a8a814e41f723cdaa5faec90405818421d590fcf8e617df0aabaa6fc61427d4f",
"0ece16efdb0b7e2a087e622ed52f29f21a4c080d77c31c4ed940b57dcdcb1f60b910d15232c0a2747325c22dadbfd069f15de969626dc49746be2d4b9b22e239",
"2e8ad9651964faa76082306dc95bff86fa0db821681e7a8acb982244ce0a9375417e867c3a9cb82f70bc6f03c7fb085e402712d3e9f27b980d5a0c22e086f4e2",
None
; "create symlinks"
)]
Expand All @@ -291,9 +296,6 @@ mod tests {
file_type: FileType::File,
},
],
"973c16d7e8662d3483ff0679b5337d7b9ba2001dbe863604fc8cc60254305750616312b9f988112db918d50fd087d89444d43a64beb4e8102109c5c628510131",
"973c16d7e8662d3483ff0679b5337d7b9ba2001dbe863604fc8cc60254305750616312b9f988112db918d50fd087d89444d43a64beb4e8102109c5c628510131",
"b8d51875d79a3cd56938e2ca8c3cad8eed6c96f7c38e152669ddfa7d7a1c44f62e4c3a13b299a30a895156c62b07ddbc46fdbf07a01870b965050359c19a9e06",
None
; "create directory"
)]
Expand All @@ -305,9 +307,6 @@ mod tests {
file_type: FileType::Symlink { linkname: "two".to_string() },
},
],
"40ce0d42109bb5e5a6b1d4ba9087a317b4c1c6c51822a57c9cb983f878b0ff765637c05fadd4bac32c8dd2b496c2a24825b183d9720b0cdd5b33f9248b692cc1",
"c113763393a9fb498cc676e1fe4843206cda665afe2144829fe7434da9e81f0cf6d11386fa79877d3c514d108f9696740256af952b57d32216fbed2eb2fb049d",
"fe692a000551a60da6cc303a9552a16d7ed5c462e33153a96824e96596da6d642fc671448f06f34e9685a13fe5bbb4220f59db73a856626b8a0962916a8f5ea3",
None
; "create broken symlink"
)]
Expand All @@ -319,17 +318,11 @@ mod tests {
file_type: FileType::Fifo,
}
],
"",
"",
"",
Some("attempted to create unsupported file type")
; "create unsupported"
)]
fn test_create(
files: Vec<CreateFileDefinition>,
#[allow(unused_variables)] expected_darwin: &str,
#[allow(unused_variables)] expected_unix: &str,
#[allow(unused_variables)] expected_windows: &str,
#[allow(unused_variables)] expected_err: Option<&str>,
) -> Result<()> {
'outer: for compressed in [false, true] {
Expand Down Expand Up @@ -361,21 +354,6 @@ mod tests {
}

cache_archive.finish()?;

if compressed {
let opened_cache_archive = CacheReader::open(&archive_path)?;
let sha_one = opened_cache_archive.get_sha()?;
let snapshot = hex::encode(&sha_one);

#[cfg(target_os = "macos")]
assert_eq!(snapshot, expected_darwin);

#[cfg(windows)]
assert_eq!(snapshot, expected_windows);

#[cfg(all(unix, not(target_os = "macos")))]
assert_eq!(snapshot, expected_unix);
}
}

Ok(())
Expand Down Expand Up @@ -418,10 +396,16 @@ mod tests {
let tar_path = tar_dir_path.join_component("test.tar");
let mut archive = CacheWriter::create(&tar_path)?;
let really_long_file = AnchoredSystemPath::new("this-is-a-really-really-really-long-path-like-so-very-long-that-i-can-list-all-of-my-favorite-directors-like-edward-yang-claire-denis-lucrecia-martel-wong-kar-wai-even-kurosawa").unwrap();
let really_long_symlink = AnchoredSystemPath::new("this-is-a-really-really-really-long-symlink-like-so-very-long-that-i-can-list-all-of-my-other-favorite-directors-like-jim-jarmusch-michelangelo-antonioni-and-terrence-malick-symlink").unwrap();

let really_long_path = archive_dir_path.resolve(really_long_file);
really_long_path.create_with_contents("The End!")?;

let really_long_symlink_path = archive_dir_path.resolve(really_long_symlink);
really_long_symlink_path.symlink_to_file(really_long_file.as_str())?;

archive.add_file(archive_dir_path, really_long_file)?;
archive.add_file(archive_dir_path, really_long_symlink)?;

archive.finish()?;

Expand All @@ -430,8 +414,9 @@ mod tests {

let mut restore = CacheReader::open(&tar_path)?;
let files = restore.restore(restore_dir_path)?;
assert_eq!(files.len(), 1);
assert_eq!(files.len(), 2);
assert_eq!(files[0].as_str(), really_long_file.as_str());
assert_eq!(files[1].as_str(), really_long_symlink.as_str());
Ok(())
}

Expand Down
10 changes: 5 additions & 5 deletions crates/turborepo-cache/src/cache_archive/restore.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ impl<'a> CacheReader<'a> {
symlinks: &[Entry<'_, T>],
) -> Result<Vec<AnchoredSystemPathBuf>, CacheError> {
let mut graph = DiGraph::new();
let mut header_lookup = HashMap::new();
let mut entry_lookup = HashMap::new();
let mut restored = Vec::new();
let mut nodes = HashMap::new();

Expand All @@ -144,7 +144,7 @@ impl<'a> CacheReader<'a> {

graph.add_edge(source_node, link_node, ());

header_lookup.insert(processed_sourcename, entry.header().clone());
entry_lookup.insert(processed_sourcename, entry);
}

let nodes = petgraph::algo::toposort(&graph, None)
Expand All @@ -153,10 +153,10 @@ impl<'a> CacheReader<'a> {
for node in nodes {
let key = &graph[node];

let Some(header) = header_lookup.get(key) else {
let Some(entry) = entry_lookup.get(key) else {
continue;
};
let file = restore_symlink_allow_missing_target(dir_cache, anchor, header)?;
let file = restore_symlink_allow_missing_target(dir_cache, anchor, entry)?;
restored.push(file);
}

Expand All @@ -174,7 +174,7 @@ fn restore_entry<T: Read>(
match header.entry_type() {
tar::EntryType::Directory => restore_directory(dir_cache, anchor, entry.header()),
tar::EntryType::Regular => restore_regular(dir_cache, anchor, entry),
tar::EntryType::Symlink => restore_symlink(dir_cache, anchor, entry.header()),
tar::EntryType::Symlink => restore_symlink(dir_cache, anchor, entry),
ty => Err(CacheError::RestoreUnsupportedFileType(
ty,
Backtrace::capture(),
Expand Down
22 changes: 11 additions & 11 deletions crates/turborepo-cache/src/cache_archive/restore_symlink.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::backtrace::Backtrace;
use std::{backtrace::Backtrace, io::Read};

use camino::Utf8Path;
use turbopath::{
Expand All @@ -11,11 +11,11 @@ use crate::{cache_archive::restore_directory::CachedDirTree, CacheError};
pub fn restore_symlink(
dir_cache: &mut CachedDirTree,
anchor: &AbsoluteSystemPath,
header: &tar::Header,
entry: &tar::Entry<impl Read>,
) -> Result<AnchoredSystemPathBuf, CacheError> {
let processed_name = AnchoredSystemPathBuf::from_system_path(&header.path()?)?;
let processed_name = AnchoredSystemPathBuf::from_system_path(&entry.path()?)?;

let linkname = header
let linkname = entry
.link_name()?
.ok_or_else(|| CacheError::MalformedTar(Backtrace::capture()))?;

Expand All @@ -28,19 +28,19 @@ pub fn restore_symlink(
));
}

actually_restore_symlink(dir_cache, anchor, &processed_name, header)?;
actually_restore_symlink(dir_cache, anchor, &processed_name, entry)?;

Ok(processed_name)
}

pub fn restore_symlink_allow_missing_target(
dir_cache: &mut CachedDirTree,
anchor: &AbsoluteSystemPath,
header: &tar::Header,
entry: &tar::Entry<impl Read>,
) -> Result<AnchoredSystemPathBuf, CacheError> {
let processed_name = AnchoredSystemPathBuf::from_system_path(&header.path()?)?;
let processed_name = AnchoredSystemPathBuf::from_system_path(&entry.path()?)?;

actually_restore_symlink(dir_cache, anchor, &processed_name, header)?;
actually_restore_symlink(dir_cache, anchor, &processed_name, entry)?;

Ok(processed_name)
}
Expand All @@ -49,15 +49,15 @@ fn actually_restore_symlink<'a>(
dir_cache: &mut CachedDirTree,
anchor: &AbsoluteSystemPath,
processed_name: &'a AnchoredSystemPath,
header: &tar::Header,
entry: &tar::Entry<impl Read>,
) -> Result<&'a AnchoredSystemPath, CacheError> {
dir_cache.safe_mkdir_file(anchor, processed_name)?;

let symlink_from = anchor.resolve(processed_name);

_ = symlink_from.remove();

let link_name = header.link_name()?.expect("have linkname");
let link_name = entry.link_name()?.expect("have linkname");
let symlink_to = link_name.to_str().ok_or_else(|| {
CacheError::PathError(
PathError::InvalidUnicode(link_name.to_string_lossy().to_string()),
Expand All @@ -76,7 +76,7 @@ fn actually_restore_symlink<'a>(
use std::os::unix::fs::PermissionsExt;
let metadata = symlink_from.symlink_metadata()?;
let mut permissions = metadata.permissions();
if let Ok(mode) = header.mode() {
if let Ok(mode) = entry.header().mode() {
permissions.set_mode(mode);
}
}
Expand Down
Loading