Skip to content

Commit

Permalink
Add a command-line option to configure the number of concurrent downl…
Browse files Browse the repository at this point in the history
…oads

Set the default value to 64
  • Loading branch information
ergrelet committed Jun 24, 2023
1 parent 4276b04 commit ae43859
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 17 deletions.
5 changes: 4 additions & 1 deletion windiff_cli/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@ pub struct WinDiffOpt {
/// Path to the output directory that'll contain the generated files.
#[structopt(default_value = "", parse(from_os_str))]
pub output_directory: PathBuf,
#[structopt(short, long)]
/// Enable "low storage" mode. This might be needed if you run the tool in
/// a constrained environment (e.g., a CI runner)
#[structopt(short, long)]
pub low_storage_mode: bool,
/// Number of concurrent downloads to perform while downloading files. Defaults to 64.
#[structopt(short, long, default_value = "64")]
pub concurrent_downloads: usize,
}
34 changes: 22 additions & 12 deletions windiff_cli/src/download.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,22 @@ use crate::{
pub async fn download_all_binaries(
cfg: &WinDiffConfiguration,
output_directory: &Path,
concurrent_downloads: usize,
) -> Result<Vec<DownloadedPEVersion>> {
// Note(ergrelet): arbitrarily defined value
const CONCURRENT_PE_DOWNLOADS: usize = 16;

// Fetch all binaries concurrently and fold results into a single `Vec`
let result: Vec<DownloadedPEVersion> =
futures::stream::iter(cfg.binaries.keys().map(|binary_name| async move {
log::trace!("Fetching '{}' binaries ...", binary_name);

download_single_binary(binary_name, &cfg.oses, output_directory).await
download_single_binary(
binary_name,
&cfg.oses,
output_directory,
concurrent_downloads,
)
.await
}))
.buffer_unordered(CONCURRENT_PE_DOWNLOADS)
.buffer_unordered(concurrent_downloads)
.collect::<Vec<Result<Vec<DownloadedPEVersion>>>>()
.await
.into_iter()
Expand All @@ -42,22 +46,29 @@ pub async fn download_single_binary(
binary_name: &str,
os_descriptions: &[OSDescription],
output_directory: &Path,
concurrent_downloads: usize,
) -> Result<Vec<DownloadedPEVersion>> {
// Retrieve the index file for that PE file
let pe_index = winbindex::get_remote_index_for_pe(binary_name).await?;

// Download all requested versions of this PE file
Ok(download_pe_versions(os_descriptions, &pe_index, binary_name, output_directory).await)
Ok(download_pe_versions(
os_descriptions,
&pe_index,
binary_name,
output_directory,
concurrent_downloads,
)
.await)
}

async fn download_pe_versions(
os_descriptions: &[OSDescription],
pe_index: &serde_json::Value,
binary_name: &str,
output_directory: &Path,
concurrent_downloads: usize,
) -> Vec<DownloadedPEVersion> {
const CONCURRENT_PE_DOWNLOADS: usize = 16;

// Download all requested versions concurrently
futures::stream::iter(os_descriptions.iter().map(|os_desc| async {
let download_result = winbindex::download_pe_version(
Expand All @@ -82,7 +93,7 @@ async fn download_pe_versions(

download_result
}))
.buffer_unordered(CONCURRENT_PE_DOWNLOADS)
.buffer_unordered(concurrent_downloads)
// Ignore errors and simply skip the corresponding files
.filter_map(|result| async { result.ok() })
.collect()
Expand All @@ -92,16 +103,15 @@ async fn download_pe_versions(
pub async fn download_all_pdbs(
downloaded_pes: Vec<DownloadedPEVersion>,
output_directory: &Path,
concurrent_downloads: usize,
) -> Vec<(DownloadedPEVersion, Option<PathBuf>)> {
const CONCURRENT_PDB_DOWNLOADS: usize = 16;

// Download all requested versions concurrently
futures::stream::iter(
downloaded_pes.into_iter().map(|pe_version| async move {
download_single_pdb(pe_version, output_directory).await
}),
)
.buffer_unordered(CONCURRENT_PDB_DOWNLOADS)
.buffer_unordered(concurrent_downloads)
// Ignore errors and simply skip the corresponding files
.filter_map(|result: Result<(DownloadedPEVersion, Option<PathBuf>)>| async { result.ok() })
.collect()
Expand Down
17 changes: 13 additions & 4 deletions windiff_cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,15 @@ async fn low_storage_mode(opt: WinDiffOpt, cfg: WinDiffConfiguration) -> Result<
let tmp_directory_path = tmp_directory.path();

log::info!("Downloading binaries for '{}' ...", pe_name);
let downloaded_pes = download_single_binary(pe_name, &cfg.oses, tmp_directory_path).await?;
let mut downloaded_binaries = download_all_pdbs(downloaded_pes, tmp_directory_path).await;
let downloaded_pes = download_single_binary(
pe_name,
&cfg.oses,
tmp_directory_path,
opt.concurrent_downloads,
)
.await?;
let mut downloaded_binaries =
download_all_pdbs(downloaded_pes, tmp_directory_path, opt.concurrent_downloads).await;
// Extract information from PEs and generate databases for all versions
log::info!("Generating databases for '{}' ...", pe_name);
generate_databases(&cfg, &downloaded_binaries, false, &opt.output_directory).await?;
Expand All @@ -76,12 +83,14 @@ async fn normal_mode(opt: WinDiffOpt, cfg: WinDiffConfiguration) -> Result<()> {

// Download requested PEs
log::info!("Downloading PEs...");
let downloaded_pes = download_all_binaries(&cfg, tmp_directory_path).await?;
let downloaded_pes =
download_all_binaries(&cfg, tmp_directory_path, opt.concurrent_downloads).await?;
log::trace!("PEs downloaded!");

// Download PDBs
log::info!("Downloading PDBs...");
let downloaded_binaries = download_all_pdbs(downloaded_pes, tmp_directory_path).await;
let downloaded_binaries =
download_all_pdbs(downloaded_pes, tmp_directory_path, opt.concurrent_downloads).await;
log::trace!("PDBs downloaded!");

// Extract information from PEs
Expand Down

0 comments on commit ae43859

Please sign in to comment.