Skip to content

Commit

Permalink
fix: Allow for long symlinks by using append_link (#6838)
Browse files Browse the repository at this point in the history
### Description

We were using `Header::set_link_name` instead of using
`Builder::append_link`, which handles long link names correctly. In
general we should avoid using methods on `Header` and instead use the
ones on `Entry`.

### Testing Instructions

Modifies the long name test to include a symlink with a long link name.

Closes TURBO-1968
  • Loading branch information
NicholasLYang committed Dec 21, 2023
1 parent 7e9fc5a commit ed9d3ac
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 60 deletions.
73 changes: 29 additions & 44 deletions crates/turborepo-cache/src/cache_archive/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use std::{
};

use tar::{EntryType, Header};
use turbopath::{AbsoluteSystemPath, AnchoredSystemPath};
use turbopath::{AbsoluteSystemPath, AnchoredSystemPath, IntoUnix};

use crate::CacheError;

Expand All @@ -26,6 +26,15 @@ impl<'a> CacheWriter<'a> {
Ok(self.builder.append_data(header, path, body)?)
}

fn append_link(
&mut self,
header: &mut Header,
path: impl AsRef<Path>,
target: impl AsRef<Path>,
) -> Result<(), CacheError> {
Ok(self.builder.append_link(header, path, target)?)
}

pub fn finish(mut self) -> Result<(), CacheError> {
Ok(self.builder.finish()?)
}
Expand Down Expand Up @@ -86,22 +95,24 @@ impl<'a> CacheWriter<'a> {
let mut file_path = file_path.to_unix();
file_path.make_canonical_for_tar(file_info.is_dir());

let mut header = Self::create_header(&source_path, &file_info)?;
let mut header = Self::create_header(&file_info)?;

if matches!(header.entry_type(), EntryType::Regular) && file_info.len() > 0 {
let file = source_path.open()?;
self.append_data(&mut header, file_path.as_str(), file)?;
} else if matches!(header.entry_type(), EntryType::Symlink) {
// We convert to a Unix path because all paths in tar should be
// Unix-style. This will get restored to a system path.
let target = source_path.read_link()?.into_unix();
self.append_link(&mut header, file_path.as_str(), &target)?;
} else {
self.append_data(&mut header, file_path.as_str(), &mut std::io::empty())?;
}

Ok(())
}

fn create_header(
source_path: &AbsoluteSystemPath,
file_info: &fs::Metadata,
) -> Result<Header, CacheError> {
fn create_header(file_info: &fs::Metadata) -> Result<Header, CacheError> {
let mut header = Header::new_gnu();

let mode: u32;
Expand All @@ -118,11 +129,11 @@ impl<'a> CacheWriter<'a> {
}
header.set_mode(mode);

// Do we need to populate the additional linkname field in Header?
if file_info.is_symlink() {
let link = source_path.read_link()?;
header.set_link_name(link)?;
// We do *not* set the linkname here because it could be too long
// Instead we set it when we add the file to the archive
header.set_entry_type(EntryType::Symlink);
header.set_size(0);
} else if file_info.is_dir() {
header.set_size(0);
header.set_entry_type(EntryType::Directory);
Expand All @@ -148,7 +159,7 @@ impl<'a> CacheWriter<'a> {

#[cfg(test)]
mod tests {
use std::path::PathBuf;
use std::{collections::HashSet, path::PathBuf};

use anyhow::Result;
use tempfile::tempdir;
Expand Down Expand Up @@ -243,9 +254,6 @@ mod tests {
file_type: FileType::File,
}
],
"bf0b4bf722f8d845dce7627606ab8af30bb6454d7c0379219e4c036a484960fe78e3d98e29ca0bac9b69b858d446b89d2d691c524e2884389032be799b6699f6",
"bf0b4bf722f8d845dce7627606ab8af30bb6454d7c0379219e4c036a484960fe78e3d98e29ca0bac9b69b858d446b89d2d691c524e2884389032be799b6699f6",
"4f1357753cceec5df1c8a36110ce256f3e8c5c1f62cab3283013b6266d6e97b3884711ccdd45462a4607bee7ac7a8e414d0acea4672a9f0306bcf364281edc2f",
None
; "create regular file"
)]
Expand All @@ -272,9 +280,6 @@ mod tests {
file_type: FileType::File,
}
],
"2e6febdd2e8180f91f481ae58510e4afd3f071e66b7b64d82616ebb2d2d560b9a8a814e41f723cdaa5faec90405818421d590fcf8e617df0aabaa6fc61427d4f",
"0ece16efdb0b7e2a087e622ed52f29f21a4c080d77c31c4ed940b57dcdcb1f60b910d15232c0a2747325c22dadbfd069f15de969626dc49746be2d4b9b22e239",
"2e8ad9651964faa76082306dc95bff86fa0db821681e7a8acb982244ce0a9375417e867c3a9cb82f70bc6f03c7fb085e402712d3e9f27b980d5a0c22e086f4e2",
None
; "create symlinks"
)]
Expand All @@ -291,9 +296,6 @@ mod tests {
file_type: FileType::File,
},
],
"973c16d7e8662d3483ff0679b5337d7b9ba2001dbe863604fc8cc60254305750616312b9f988112db918d50fd087d89444d43a64beb4e8102109c5c628510131",
"973c16d7e8662d3483ff0679b5337d7b9ba2001dbe863604fc8cc60254305750616312b9f988112db918d50fd087d89444d43a64beb4e8102109c5c628510131",
"b8d51875d79a3cd56938e2ca8c3cad8eed6c96f7c38e152669ddfa7d7a1c44f62e4c3a13b299a30a895156c62b07ddbc46fdbf07a01870b965050359c19a9e06",
None
; "create directory"
)]
Expand All @@ -305,9 +307,6 @@ mod tests {
file_type: FileType::Symlink { linkname: "two".to_string() },
},
],
"40ce0d42109bb5e5a6b1d4ba9087a317b4c1c6c51822a57c9cb983f878b0ff765637c05fadd4bac32c8dd2b496c2a24825b183d9720b0cdd5b33f9248b692cc1",
"c113763393a9fb498cc676e1fe4843206cda665afe2144829fe7434da9e81f0cf6d11386fa79877d3c514d108f9696740256af952b57d32216fbed2eb2fb049d",
"fe692a000551a60da6cc303a9552a16d7ed5c462e33153a96824e96596da6d642fc671448f06f34e9685a13fe5bbb4220f59db73a856626b8a0962916a8f5ea3",
None
; "create broken symlink"
)]
Expand All @@ -319,17 +318,11 @@ mod tests {
file_type: FileType::Fifo,
}
],
"",
"",
"",
Some("attempted to create unsupported file type")
; "create unsupported"
)]
fn test_create(
files: Vec<CreateFileDefinition>,
#[allow(unused_variables)] expected_darwin: &str,
#[allow(unused_variables)] expected_unix: &str,
#[allow(unused_variables)] expected_windows: &str,
#[allow(unused_variables)] expected_err: Option<&str>,
) -> Result<()> {
'outer: for compressed in [false, true] {
Expand Down Expand Up @@ -361,21 +354,6 @@ mod tests {
}

cache_archive.finish()?;

if compressed {
let opened_cache_archive = CacheReader::open(&archive_path)?;
let sha_one = opened_cache_archive.get_sha()?;
let snapshot = hex::encode(&sha_one);

#[cfg(target_os = "macos")]
assert_eq!(snapshot, expected_darwin);

#[cfg(windows)]
assert_eq!(snapshot, expected_windows);

#[cfg(all(unix, not(target_os = "macos")))]
assert_eq!(snapshot, expected_unix);
}
}

Ok(())
Expand Down Expand Up @@ -418,10 +396,16 @@ mod tests {
let tar_path = tar_dir_path.join_component("test.tar");
let mut archive = CacheWriter::create(&tar_path)?;
let really_long_file = AnchoredSystemPath::new("this-is-a-really-really-really-long-path-like-so-very-long-that-i-can-list-all-of-my-favorite-directors-like-edward-yang-claire-denis-lucrecia-martel-wong-kar-wai-even-kurosawa").unwrap();
let really_long_symlink = AnchoredSystemPath::new("this-is-a-really-really-really-long-symlink-like-so-very-long-that-i-can-list-all-of-my-other-favorite-directors-like-jim-jarmusch-michelangelo-antonioni-and-terrence-malick-symlink").unwrap();

let really_long_path = archive_dir_path.resolve(really_long_file);
really_long_path.create_with_contents("The End!")?;

let really_long_symlink_path = archive_dir_path.resolve(really_long_symlink);
really_long_symlink_path.symlink_to_file(really_long_file.as_str())?;

archive.add_file(archive_dir_path, really_long_file)?;
archive.add_file(archive_dir_path, really_long_symlink)?;

archive.finish()?;

Expand All @@ -430,8 +414,9 @@ mod tests {

let mut restore = CacheReader::open(&tar_path)?;
let files = restore.restore(restore_dir_path)?;
assert_eq!(files.len(), 1);
assert_eq!(files.len(), 2);
assert_eq!(files[0].as_str(), really_long_file.as_str());
assert_eq!(files[1].as_str(), really_long_symlink.as_str());
Ok(())
}

Expand Down
10 changes: 5 additions & 5 deletions crates/turborepo-cache/src/cache_archive/restore.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ impl<'a> CacheReader<'a> {
symlinks: &[Entry<'_, T>],
) -> Result<Vec<AnchoredSystemPathBuf>, CacheError> {
let mut graph = DiGraph::new();
let mut header_lookup = HashMap::new();
let mut entry_lookup = HashMap::new();
let mut restored = Vec::new();
let mut nodes = HashMap::new();

Expand All @@ -144,7 +144,7 @@ impl<'a> CacheReader<'a> {

graph.add_edge(source_node, link_node, ());

header_lookup.insert(processed_sourcename, entry.header().clone());
entry_lookup.insert(processed_sourcename, entry);
}

let nodes = petgraph::algo::toposort(&graph, None)
Expand All @@ -153,10 +153,10 @@ impl<'a> CacheReader<'a> {
for node in nodes {
let key = &graph[node];

let Some(header) = header_lookup.get(key) else {
let Some(entry) = entry_lookup.get(key) else {
continue;
};
let file = restore_symlink_allow_missing_target(dir_cache, anchor, header)?;
let file = restore_symlink_allow_missing_target(dir_cache, anchor, entry)?;
restored.push(file);
}

Expand All @@ -174,7 +174,7 @@ fn restore_entry<T: Read>(
match header.entry_type() {
tar::EntryType::Directory => restore_directory(dir_cache, anchor, entry.header()),
tar::EntryType::Regular => restore_regular(dir_cache, anchor, entry),
tar::EntryType::Symlink => restore_symlink(dir_cache, anchor, entry.header()),
tar::EntryType::Symlink => restore_symlink(dir_cache, anchor, entry),
ty => Err(CacheError::RestoreUnsupportedFileType(
ty,
Backtrace::capture(),
Expand Down
22 changes: 11 additions & 11 deletions crates/turborepo-cache/src/cache_archive/restore_symlink.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::backtrace::Backtrace;
use std::{backtrace::Backtrace, io::Read};

use camino::Utf8Path;
use turbopath::{
Expand All @@ -11,11 +11,11 @@ use crate::{cache_archive::restore_directory::CachedDirTree, CacheError};
pub fn restore_symlink(
dir_cache: &mut CachedDirTree,
anchor: &AbsoluteSystemPath,
header: &tar::Header,
entry: &tar::Entry<impl Read>,
) -> Result<AnchoredSystemPathBuf, CacheError> {
let processed_name = AnchoredSystemPathBuf::from_system_path(&header.path()?)?;
let processed_name = AnchoredSystemPathBuf::from_system_path(&entry.path()?)?;

let linkname = header
let linkname = entry
.link_name()?
.ok_or_else(|| CacheError::MalformedTar(Backtrace::capture()))?;

Expand All @@ -28,19 +28,19 @@ pub fn restore_symlink(
));
}

actually_restore_symlink(dir_cache, anchor, &processed_name, header)?;
actually_restore_symlink(dir_cache, anchor, &processed_name, entry)?;

Ok(processed_name)
}

pub fn restore_symlink_allow_missing_target(
dir_cache: &mut CachedDirTree,
anchor: &AbsoluteSystemPath,
header: &tar::Header,
entry: &tar::Entry<impl Read>,
) -> Result<AnchoredSystemPathBuf, CacheError> {
let processed_name = AnchoredSystemPathBuf::from_system_path(&header.path()?)?;
let processed_name = AnchoredSystemPathBuf::from_system_path(&entry.path()?)?;

actually_restore_symlink(dir_cache, anchor, &processed_name, header)?;
actually_restore_symlink(dir_cache, anchor, &processed_name, entry)?;

Ok(processed_name)
}
Expand All @@ -49,15 +49,15 @@ fn actually_restore_symlink<'a>(
dir_cache: &mut CachedDirTree,
anchor: &AbsoluteSystemPath,
processed_name: &'a AnchoredSystemPath,
header: &tar::Header,
entry: &tar::Entry<impl Read>,
) -> Result<&'a AnchoredSystemPath, CacheError> {
dir_cache.safe_mkdir_file(anchor, processed_name)?;

let symlink_from = anchor.resolve(processed_name);

_ = symlink_from.remove();

let link_name = header.link_name()?.expect("have linkname");
let link_name = entry.link_name()?.expect("have linkname");
let symlink_to = link_name.to_str().ok_or_else(|| {
CacheError::PathError(
PathError::InvalidUnicode(link_name.to_string_lossy().to_string()),
Expand All @@ -76,7 +76,7 @@ fn actually_restore_symlink<'a>(
use std::os::unix::fs::PermissionsExt;
let metadata = symlink_from.symlink_metadata()?;
let mut permissions = metadata.permissions();
if let Ok(mode) = header.mode() {
if let Ok(mode) = entry.header().mode() {
permissions.set_mode(mode);
}
}
Expand Down

0 comments on commit ed9d3ac

Please sign in to comment.