Skip to content

Commit

Permalink
Auto merge of rust-lang#123246 - Kobzol:tarball-reproducible, r=<try>
Browse files Browse the repository at this point in the history
Make source tarball generation more reproducible

This PR performs several changes to source tarball generation (`x dist rustc-src`) in order to make it more reproducible (in light of the recent "xz backdoor"...). I want to follow up on it with making a separate CI workflow for generating the tarball.

After this PR, running this locally produces identical checksums:
```bash
$ ./x dist rustc-src
$ sha256sum build/dist/rustc-1.79.0-src.tar.gz

$ ./x dist rustc-src
$ sha256sum build/dist/rustc-1.79.0-src.tar.gz
```

r? `@Mark-Simulacrum`
  • Loading branch information
bors committed Mar 30, 2024
2 parents 8df7e72 + f57139c commit bec0625
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 29 deletions.
20 changes: 17 additions & 3 deletions src/bootstrap/src/core/build_steps/dist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -995,9 +995,9 @@ impl Step for PlainSourceTarball {
if builder.rust_info().is_managed_git_subrepository()
|| builder.rust_info().is_from_tarball()
{
if builder.rust_info().is_managed_git_subrepository() {
// Ensure we have the submodules checked out.
builder.update_submodule(Path::new("src/tools/cargo"));
// Ensure we have all submodules from src and other directories checked out.
for submodule in builder.get_all_submodules() {
builder.update_submodule(Path::new(submodule));
}

// Vendor all Cargo dependencies
Expand Down Expand Up @@ -1028,6 +1028,20 @@ impl Step for PlainSourceTarball {
builder.create(&cargo_config_dir.join("config.toml"), &config);
}

// Delete extraneous directories
// FIXME: if we're managed by git, we should probably instead ask git if the given path
// is managed by it?
for entry in walkdir::WalkDir::new(tarball.image_dir())
.follow_links(true)
.into_iter()
.filter_map(|e| e.ok())
{
if entry.path().is_dir() && entry.path().file_name() == Some(OsStr::new("__pycache__"))
{
t!(fs::remove_dir_all(entry.path()));
}
}

tarball.bare()
}
}
Expand Down
55 changes: 32 additions & 23 deletions src/bootstrap/src/core/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -554,29 +554,7 @@ impl<'a> ShouldRun<'a> {
///
/// [`path`]: ShouldRun::path
pub fn paths(mut self, paths: &[&str]) -> Self {
static SUBMODULES_PATHS: OnceLock<Vec<String>> = OnceLock::new();

let init_submodules_paths = |src: &PathBuf| {
let file = File::open(src.join(".gitmodules")).unwrap();

let mut submodules_paths = vec![];
for line in BufReader::new(file).lines() {
if let Ok(line) = line {
let line = line.trim();

if line.starts_with("path") {
let actual_path =
line.split(' ').last().expect("Couldn't get value of path");
submodules_paths.push(actual_path.to_owned());
}
}
}

submodules_paths
};

let submodules_paths =
SUBMODULES_PATHS.get_or_init(|| init_submodules_paths(&self.builder.src));
let submodules_paths = self.builder.get_all_submodules();

self.paths.insert(PathSet::Set(
paths
Expand Down Expand Up @@ -2151,6 +2129,37 @@ impl<'a> Builder<'a> {
out
}

/// Return paths of all submodules managed by git.
/// If the current checkout is not managed by git, returns an empty slice.
pub fn get_all_submodules(&self) -> &[String] {
if !self.rust_info().is_managed_git_subrepository() {
return &[];
}

static SUBMODULES_PATHS: OnceLock<Vec<String>> = OnceLock::new();

let init_submodules_paths = |src: &PathBuf| {
let file = File::open(src.join(".gitmodules")).unwrap();

let mut submodules_paths = vec![];
for line in BufReader::new(file).lines() {
if let Ok(line) = line {
let line = line.trim();

if line.starts_with("path") {
let actual_path =
line.split(' ').last().expect("Couldn't get value of path");
submodules_paths.push(actual_path.to_owned());
}
}
}

submodules_paths
};

&SUBMODULES_PATHS.get_or_init(|| init_submodules_paths(&self.src))
}

/// Ensure that a given step is built *only if it's supposed to be built by default*, returning
/// its output. This will cache the step, so it's safe (and good!) to call this as often as
/// needed to ensure that all dependencies are build.
Expand Down
12 changes: 9 additions & 3 deletions src/tools/rust-installer/src/tarballer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use anyhow::{bail, Context, Result};
use std::fs::{read_link, symlink_metadata};
use std::io::{BufWriter, Write};
use std::path::Path;
use tar::{Builder, Header};
use tar::{Builder, Header, HeaderMode};
use walkdir::WalkDir;

use crate::{
Expand Down Expand Up @@ -53,14 +53,19 @@ impl Tarballer {
// Sort files by their suffix, to group files with the same name from
// different locations (likely identical) and files with the same
// extension (likely containing similar data).
let (dirs, mut files) = get_recursive_paths(&self.work_dir, &self.input)
// Sorting of file and directory paths also helps with the reproducibility
// of the resulting archive.
let (mut dirs, mut files) = get_recursive_paths(&self.work_dir, &self.input)
.context("failed to collect file paths")?;
dirs.sort();
files.sort_by(|a, b| a.bytes().rev().cmp(b.bytes().rev()));

// Write the tar into both encoded files. We write all directories
// first, so files may be directly created. (See rust-lang/rustup.rs#1092.)
let buf = BufWriter::with_capacity(1024 * 1024, encoder);
let mut builder = Builder::new(buf);
// Make uid, gid and mtime deterministic to improve reproducibility
builder.mode(HeaderMode::Deterministic);

let pool = rayon::ThreadPoolBuilder::new().num_threads(2).build().unwrap();
pool.install(move || {
Expand Down Expand Up @@ -91,7 +96,8 @@ impl Tarballer {
fn append_path<W: Write>(builder: &mut Builder<W>, src: &Path, path: &String) -> Result<()> {
let stat = symlink_metadata(src)?;
let mut header = Header::new_gnu();
header.set_metadata(&stat);
header.set_metadata_in_mode(&stat, HeaderMode::Deterministic);

if stat.file_type().is_symlink() {
let link = read_link(src)?;
builder.append_link(&mut header, path, &link)?;
Expand Down

0 comments on commit bec0625

Please sign in to comment.