From 18f9e5ff784f0a287efac73e3a4eaff255c5db35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Sat, 30 Mar 2024 19:56:08 +0100 Subject: [PATCH 1/5] Move submodule lookup to `Builder` --- src/bootstrap/src/core/builder.rs | 55 ++++++++++++++++++------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/src/bootstrap/src/core/builder.rs b/src/bootstrap/src/core/builder.rs index 7f93fdc72ef08..e9906ebd83600 100644 --- a/src/bootstrap/src/core/builder.rs +++ b/src/bootstrap/src/core/builder.rs @@ -554,29 +554,7 @@ impl<'a> ShouldRun<'a> { /// /// [`path`]: ShouldRun::path pub fn paths(mut self, paths: &[&str]) -> Self { - static SUBMODULES_PATHS: OnceLock> = OnceLock::new(); - - let init_submodules_paths = |src: &PathBuf| { - let file = File::open(src.join(".gitmodules")).unwrap(); - - let mut submodules_paths = vec![]; - for line in BufReader::new(file).lines() { - if let Ok(line) = line { - let line = line.trim(); - - if line.starts_with("path") { - let actual_path = - line.split(' ').last().expect("Couldn't get value of path"); - submodules_paths.push(actual_path.to_owned()); - } - } - } - - submodules_paths - }; - - let submodules_paths = - SUBMODULES_PATHS.get_or_init(|| init_submodules_paths(&self.builder.src)); + let submodules_paths = self.builder.get_all_submodules(); self.paths.insert(PathSet::Set( paths @@ -2151,6 +2129,37 @@ impl<'a> Builder<'a> { out } + /// Return paths of all submodules managed by git. + /// If the current checkout is not managed by git, returns an empty slice. + pub fn get_all_submodules(&self) -> &[String] { + if !self.rust_info().is_managed_git_subrepository() { + return &[]; + } + + static SUBMODULES_PATHS: OnceLock> = OnceLock::new(); + + let init_submodules_paths = |src: &PathBuf| { + let file = File::open(src.join(".gitmodules")).unwrap(); + + let mut submodules_paths = vec![]; + for line in BufReader::new(file).lines() { + if let Ok(line) = line { + let line = line.trim(); + + if line.starts_with("path") { + let actual_path = + line.split(' ').last().expect("Couldn't get value of path"); + submodules_paths.push(actual_path.to_owned()); + } + } + } + + submodules_paths + }; + + &SUBMODULES_PATHS.get_or_init(|| init_submodules_paths(&self.src)) + } + /// Ensure that a given step is built *only if it's supposed to be built by default*, returning /// its output. This will cache the step, so it's safe (and good!) to call this as often as /// needed to ensure that all dependencies are build. From 47cb48e6010a0ff1d0f16eecbfb343335d9b5e32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Sat, 30 Mar 2024 19:58:30 +0100 Subject: [PATCH 2/5] Checkout all submodules when building source tarballs --- src/bootstrap/src/core/build_steps/dist.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bootstrap/src/core/build_steps/dist.rs b/src/bootstrap/src/core/build_steps/dist.rs index d9c7032d0db8e..08d05920dc4b3 100644 --- a/src/bootstrap/src/core/build_steps/dist.rs +++ b/src/bootstrap/src/core/build_steps/dist.rs @@ -995,9 +995,9 @@ impl Step for PlainSourceTarball { if builder.rust_info().is_managed_git_subrepository() || builder.rust_info().is_from_tarball() { - if builder.rust_info().is_managed_git_subrepository() { - // Ensure we have the submodules checked out. - builder.update_submodule(Path::new("src/tools/cargo")); + // Ensure we have all submodules from src and other directories checked out. + for submodule in builder.get_all_submodules() { + builder.update_submodule(Path::new(submodule)); } // Vendor all Cargo dependencies From dfd24b538f406be62f6f146b43365b7ff0b3bf78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Sat, 30 Mar 2024 21:21:58 +0100 Subject: [PATCH 3/5] Remove potential `__pycache__` directories from src tarballs --- src/bootstrap/src/core/build_steps/dist.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/bootstrap/src/core/build_steps/dist.rs b/src/bootstrap/src/core/build_steps/dist.rs index 08d05920dc4b3..ecea62140a2e1 100644 --- a/src/bootstrap/src/core/build_steps/dist.rs +++ b/src/bootstrap/src/core/build_steps/dist.rs @@ -1028,6 +1028,20 @@ impl Step for PlainSourceTarball { builder.create(&cargo_config_dir.join("config.toml"), &config); } + // Delete extraneous directories + // FIXME: if we're managed by git, we should probably instead ask git if the given path + // is managed by it? + for entry in walkdir::WalkDir::new(tarball.image_dir()) + .follow_links(true) + .into_iter() + .filter_map(|e| e.ok()) + { + if entry.path().is_dir() && entry.path().file_name() == Some(OsStr::new("__pycache__")) + { + t!(fs::remove_dir_all(entry.path())); + } + } + tarball.bare() } } From 45c36628b637075c67520f1ecad667e0fe45b3fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Sat, 30 Mar 2024 21:27:58 +0100 Subject: [PATCH 4/5] Make tarball generation more deterministic --- src/tools/rust-installer/src/tarballer.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/tools/rust-installer/src/tarballer.rs b/src/tools/rust-installer/src/tarballer.rs index e5a925b2cbf2d..24d341db78e5e 100644 --- a/src/tools/rust-installer/src/tarballer.rs +++ b/src/tools/rust-installer/src/tarballer.rs @@ -2,7 +2,7 @@ use anyhow::{bail, Context, Result}; use std::fs::{read_link, symlink_metadata}; use std::io::{BufWriter, Write}; use std::path::Path; -use tar::{Builder, Header}; +use tar::{Builder, Header, HeaderMode}; use walkdir::WalkDir; use crate::{ @@ -61,6 +61,8 @@ impl Tarballer { // first, so files may be directly created. (See rust-lang/rustup.rs#1092.) let buf = BufWriter::with_capacity(1024 * 1024, encoder); let mut builder = Builder::new(buf); + // Make uid, gid and mtime deterministic to improve reproducibility + builder.mode(HeaderMode::Deterministic); let pool = rayon::ThreadPoolBuilder::new().num_threads(2).build().unwrap(); pool.install(move || { @@ -91,7 +93,8 @@ impl Tarballer { fn append_path(builder: &mut Builder, src: &Path, path: &String) -> Result<()> { let stat = symlink_metadata(src)?; let mut header = Header::new_gnu(); - header.set_metadata(&stat); + header.set_metadata_in_mode(&stat, HeaderMode::Deterministic); + if stat.file_type().is_symlink() { let link = read_link(src)?; builder.append_link(&mut header, path, &link)?; From f57139c8a0f14d6e71f046b56a64a4f1b0718ec8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Sat, 30 Mar 2024 22:03:11 +0100 Subject: [PATCH 5/5] Sort directories when generating tarballs --- src/tools/rust-installer/src/tarballer.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/tools/rust-installer/src/tarballer.rs b/src/tools/rust-installer/src/tarballer.rs index 24d341db78e5e..2f093e7ad17fd 100644 --- a/src/tools/rust-installer/src/tarballer.rs +++ b/src/tools/rust-installer/src/tarballer.rs @@ -53,8 +53,11 @@ impl Tarballer { // Sort files by their suffix, to group files with the same name from // different locations (likely identical) and files with the same // extension (likely containing similar data). - let (dirs, mut files) = get_recursive_paths(&self.work_dir, &self.input) + // Sorting of file and directory paths also helps with the reproducibility + // of the resulting archive. + let (mut dirs, mut files) = get_recursive_paths(&self.work_dir, &self.input) .context("failed to collect file paths")?; + dirs.sort(); files.sort_by(|a, b| a.bytes().rev().cmp(b.bytes().rev())); // Write the tar into both encoded files. We write all directories