Skip to content

Commit

Permalink
Auto merge of rust-lang#6813 - matthiaskrgr:lintcheck_refactor, r=fli…
Browse files Browse the repository at this point in the history
…p1995

lintcheck, do some refactoring and add more sources

refactor: add a Config object
don't run in parallel mode by default (it didn't make sense because cargo would lock the shared target dir anyway)
show full paths (from repo root) to the source files in clippy warnings so we can just copy the path from the logfile
fix more bugs
add more crates by dtolnay and embark to the sources toml

changelog: lintcheck: refactor some code and add more sources
  • Loading branch information
bors committed Mar 1, 2021
2 parents ac2f041 + 25f9098 commit 5ae1e17
Show file tree
Hide file tree
Showing 3 changed files with 3,626 additions and 3,471 deletions.
14 changes: 13 additions & 1 deletion clippy_dev/lintcheck_crates.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,22 @@ bitflags = {name = "bitflags", versions = ['1.2.1']}
libc = {name = "libc", versions = ['0.2.81']}
log = {name = "log", versions = ['0.4.11']}
proc-macro2 = {name = "proc-macro2", versions = ['1.0.24']}
puffin = {name = "puffin", git_url = "https://github.com/EmbarkStudios/puffin", git_hash = "02dd4a3"}
quote = {name = "quote", versions = ['1.0.7']}
rand = {name = "rand", versions = ['0.7.3']}
rand_core = {name = "rand_core", versions = ['0.6.0']}
regex = {name = "regex", versions = ['1.3.2']}
syn = {name = "syn", versions = ['1.0.54']}
unicode-xid = {name = "unicode-xid", versions = ['0.2.1']}
# some more of dtolnays crates
anyhow = {name = "anyhow", versions = ['1.0.38']}
async-trait = {name = "async-trait", versions = ['0.1.42']}
cxx = {name = "cxx", versions = ['1.0.32']}
ryu = {name = "ryu", version = ['1.0.5']}
serde_yaml = {name = "serde_yaml", versions = ['0.8.17']}
thiserror = {name = "thiserror", versions = ['1.0.24']}
# some embark crates, there are other interesting crates but
# unfortunately adding them increases lintcheck runtime drastically
cfg-expr = {name = "cfg-expr", versions = ['0.7.1']}
puffin = {name = "puffin", git_url = "https://github.com/EmbarkStudios/puffin", git_hash = "02dd4a3"}
rpmalloc = {name = "rpmalloc", versions = ['0.2.0']}
tame-oidc = {name = "tame-oidc", versions = ['0.1.0']}
218 changes: 129 additions & 89 deletions clippy_dev/src/lintcheck.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use serde_json::Value;

const CLIPPY_DRIVER_PATH: &str = "target/debug/clippy-driver";
const CARGO_CLIPPY_PATH: &str = "target/debug/cargo-clippy";

const LINTCHECK_DOWNLOADS: &str = "target/lintcheck/downloads";
const LINTCHECK_SOURCES: &str = "target/lintcheck/sources";

/// List of sources to check, loaded from a .toml file
#[derive(Debug, Serialize, Deserialize)]
struct SourceList {
Expand Down Expand Up @@ -86,7 +92,7 @@ impl std::fmt::Display for ClippyWarning {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(
f,
r#"{}-{}/{}:{}:{} {} "{}""#,
r#"target/lintcheck/sources/{}-{}/{}:{}:{} {} "{}""#,
&self.crate_name, &self.crate_version, &self.file, &self.line, &self.column, &self.linttype, &self.message
)
}
Expand All @@ -99,8 +105,8 @@ impl CrateSource {
fn download_and_extract(&self) -> Crate {
match self {
CrateSource::CratesIo { name, version, options } => {
let extract_dir = PathBuf::from("target/lintcheck/crates");
let krate_download_dir = PathBuf::from("target/lintcheck/downloads");
let extract_dir = PathBuf::from(LINTCHECK_SOURCES);
let krate_download_dir = PathBuf::from(LINTCHECK_DOWNLOADS);

// url to download the crate from crates.io
let url = format!("https://crates.io/api/v1/crates/{}/{}/download", name, version);
Expand Down Expand Up @@ -140,7 +146,7 @@ impl CrateSource {
options,
} => {
let repo_path = {
let mut repo_path = PathBuf::from("target/lintcheck/crates");
let mut repo_path = PathBuf::from(LINTCHECK_SOURCES);
// add a -git suffix in case we have the same crate from crates.io and a git repo
repo_path.push(format!("{}-git", name));
repo_path
Expand Down Expand Up @@ -182,7 +188,7 @@ impl CrateSource {
use fs_extra::dir;

// simply copy the entire directory into our target dir
let copy_dest = PathBuf::from("target/lintcheck/crates/");
let copy_dest = PathBuf::from(format!("{}/", LINTCHECK_SOURCES));

// the source path of the crate we copied, ${copy_dest}/crate_name
let crate_root = copy_dest.join(name); // .../crates/local_crate
Expand Down Expand Up @@ -287,6 +293,64 @@ impl Crate {
}
}

#[derive(Debug)]
struct LintcheckConfig {
// max number of jobs to spawn (default 1)
max_jobs: usize,
// we read the sources to check from here
sources_toml_path: PathBuf,
// we save the clippy lint results here
lintcheck_results_path: PathBuf,
}

impl LintcheckConfig {
fn from_clap(clap_config: &ArgMatches) -> Self {
// first, check if we got anything passed via the LINTCHECK_TOML env var,
// if not, ask clap if we got any value for --crates-toml <foo>
// if not, use the default "clippy_dev/lintcheck_crates.toml"
let sources_toml = env::var("LINTCHECK_TOML").unwrap_or(
clap_config
.value_of("crates-toml")
.clone()
.unwrap_or("clippy_dev/lintcheck_crates.toml")
.to_string(),
);

let sources_toml_path = PathBuf::from(sources_toml);

// for the path where we save the lint results, get the filename without extension (so for
// wasd.toml, use "wasd"...)
let filename: PathBuf = sources_toml_path.file_stem().unwrap().into();
let lintcheck_results_path = PathBuf::from(format!("lintcheck-logs/{}_logs.txt", filename.display()));

// look at the --threads arg, if 0 is passed, ask rayon rayon how many threads it would spawn and
// use half of that for the physical core count
// by default use a single thread
let max_jobs = match clap_config.value_of("threads") {
Some(threads) => {
let threads: usize = threads
.parse()
.expect(&format!("Failed to parse '{}' to a digit", threads));
if threads == 0 {
// automatic choice
// Rayon seems to return thread count so half that for core count
(rayon::current_num_threads() / 2) as usize
} else {
threads
}
},
// no -j passed, use a single thread
None => 1,
};

LintcheckConfig {
max_jobs,
sources_toml_path,
lintcheck_results_path,
}
}
}

/// takes a single json-formatted clippy warnings and returns true (we are interested in that line)
/// or false (we aren't)
fn filter_clippy_warnings(line: &str) -> bool {
Expand All @@ -310,19 +374,6 @@ fn filter_clippy_warnings(line: &str) -> bool {
false
}

/// get the path to lintchecks crate sources .toml file, check LINTCHECK_TOML first but if it's
/// empty use the default path
fn lintcheck_config_toml(toml_path: Option<&str>) -> PathBuf {
PathBuf::from(
env::var("LINTCHECK_TOML").unwrap_or(
toml_path
.clone()
.unwrap_or("clippy_dev/lintcheck_crates.toml")
.to_string(),
),
)
}

/// Builds clippy inside the repo to make sure we have a clippy executable we can use.
fn build_clippy() {
let status = Command::new("cargo")
Expand All @@ -336,9 +387,7 @@ fn build_clippy() {
}

/// Read a `toml` file and return a list of `CrateSources` that we want to check with clippy
fn read_crates(toml_path: &PathBuf) -> (String, Vec<CrateSource>) {
// save it so that we can use the name of the sources.toml as name for the logfile later.
let toml_filename = toml_path.file_stem().unwrap().to_str().unwrap().to_string();
fn read_crates(toml_path: &PathBuf) -> Vec<CrateSource> {
let toml_content: String =
std::fs::read_to_string(&toml_path).unwrap_or_else(|_| panic!("Failed to read {}", toml_path.display()));
let crate_list: SourceList =
Expand Down Expand Up @@ -398,7 +447,7 @@ fn read_crates(toml_path: &PathBuf) -> (String, Vec<CrateSource>) {
// sort the crates
crate_sources.sort();

(toml_filename, crate_sources)
crate_sources
}

/// Parse the json output of clippy and return a `ClippyWarning`
Expand Down Expand Up @@ -450,42 +499,39 @@ fn gather_stats(clippy_warnings: &[ClippyWarning]) -> (String, HashMap<&String,

/// check if the latest modification of the logfile is older than the modification date of the
/// clippy binary, if this is true, we should clean the lintchec shared target directory and recheck
fn lintcheck_needs_rerun(toml_path: &PathBuf) -> bool {
fn lintcheck_needs_rerun(lintcheck_logs_path: &PathBuf) -> bool {
let clippy_modified: std::time::SystemTime = {
let mut times = ["target/debug/clippy-driver", "target/debug/cargo-clippy"]
.iter()
.map(|p| {
std::fs::metadata(p)
.expect("failed to get metadata of file")
.modified()
.expect("failed to get modification date")
});
let mut times = [CLIPPY_DRIVER_PATH, CARGO_CLIPPY_PATH].iter().map(|p| {
std::fs::metadata(p)
.expect("failed to get metadata of file")
.modified()
.expect("failed to get modification date")
});
// the oldest modification of either of the binaries
std::cmp::min(times.next().unwrap(), times.next().unwrap())
std::cmp::max(times.next().unwrap(), times.next().unwrap())
};

let logs_modified: std::time::SystemTime = std::fs::metadata(toml_path)
let logs_modified: std::time::SystemTime = std::fs::metadata(lintcheck_logs_path)
.expect("failed to get metadata of file")
.modified()
.expect("failed to get modification date");

// if clippys modification time is smaller (older) than the logs mod time, we need to rerun
// lintcheck
clippy_modified < logs_modified
// time is represented in seconds since X
// logs_modified 2 and clippy_modified 5 means clippy binary is older and we need to recheck
logs_modified < clippy_modified
}

/// lintchecks `main()` function
pub fn run(clap_config: &ArgMatches) {
let config = LintcheckConfig::from_clap(clap_config);

println!("Compiling clippy...");
build_clippy();
println!("Done compiling");

let clap_toml_path: Option<&str> = clap_config.value_of("crates-toml");
let toml_path: PathBuf = lintcheck_config_toml(clap_toml_path);

// if the clippy bin is newer than our logs, throw away target dirs to force clippy to
// refresh the logs
if lintcheck_needs_rerun(&toml_path) {
if lintcheck_needs_rerun(&config.lintcheck_results_path) {
let shared_target_dir = "target/lintcheck/shared_target_dir";
match std::fs::metadata(&shared_target_dir) {
Ok(metadata) => {
Expand All @@ -495,12 +541,11 @@ pub fn run(clap_config: &ArgMatches) {
.expect("failed to remove target/lintcheck/shared_target_dir");
}
},
Err(_) => { // dir probably does not exist, don't remove anything
},
Err(_) => { /* dir probably does not exist, don't remove anything */ },
}
}

let cargo_clippy_path: PathBuf = PathBuf::from("target/debug/cargo-clippy")
let cargo_clippy_path: PathBuf = PathBuf::from(CARGO_CLIPPY_PATH)
.canonicalize()
.expect("failed to canonicalize path to clippy binary");

Expand All @@ -511,7 +556,7 @@ pub fn run(clap_config: &ArgMatches) {
cargo_clippy_path.display()
);

let clippy_ver = std::process::Command::new("target/debug/cargo-clippy")
let clippy_ver = std::process::Command::new(CARGO_CLIPPY_PATH)
.arg("--version")
.output()
.map(|o| String::from_utf8_lossy(&o.stdout).into_owned())
Expand All @@ -520,9 +565,10 @@ pub fn run(clap_config: &ArgMatches) {
// download and extract the crates, then run clippy on them and collect clippys warnings
// flatten into one big list of warnings

let (filename, crates) = read_crates(&toml_path);
let file = format!("lintcheck-logs/{}_logs.txt", filename);
let old_stats = read_stats_from_file(&file);
let crates = read_crates(&config.sources_toml_path);
let old_stats = read_stats_from_file(&config.lintcheck_results_path);

let counter = AtomicUsize::new(1);

let clippy_warnings: Vec<ClippyWarning> = if let Some(only_one_crate) = clap_config.value_of("only") {
// if we don't have the specified crate in the .toml, throw an error
Expand Down Expand Up @@ -550,44 +596,39 @@ pub fn run(clap_config: &ArgMatches) {
.flatten()
.collect()
} else {
let counter = std::sync::atomic::AtomicUsize::new(0);

// Ask rayon for thread count. Assume that half of that is the number of physical cores
// Use one target dir for each core so that we can run N clippys in parallel.
// We need to use different target dirs because cargo would lock them for a single build otherwise,
// killing the parallelism. However this also means that deps will only be reused half/a
// quarter of the time which might result in a longer wall clock runtime

// This helps when we check many small crates with dep-trees that don't have a lot of branches in
// order to achive some kind of parallelism

// by default, use a single thread
let num_cpus = match clap_config.value_of("threads") {
Some(threads) => {
let threads: usize = threads
.parse()
.expect(&format!("Failed to parse '{}' to a digit", threads));
if threads == 0 {
// automatic choice
// Rayon seems to return thread count so half that for core count
(rayon::current_num_threads() / 2) as usize
} else {
threads
}
},
// no -j passed, use a single thread
None => 1,
};

let num_crates = crates.len();

// check all crates (default)
crates
.into_par_iter()
.map(|krate| krate.download_and_extract())
.map(|krate| krate.run_clippy_lints(&cargo_clippy_path, &counter, num_cpus, num_crates))
.flatten()
.collect()
if config.max_jobs > 1 {
// run parallel with rayon

// Ask rayon for thread count. Assume that half of that is the number of physical cores
// Use one target dir for each core so that we can run N clippys in parallel.
// We need to use different target dirs because cargo would lock them for a single build otherwise,
// killing the parallelism. However this also means that deps will only be reused half/a
// quarter of the time which might result in a longer wall clock runtime

// This helps when we check many small crates with dep-trees that don't have a lot of branches in
// order to achive some kind of parallelism

// by default, use a single thread
let num_cpus = config.max_jobs;
let num_crates = crates.len();

// check all crates (default)
crates
.into_par_iter()
.map(|krate| krate.download_and_extract())
.map(|krate| krate.run_clippy_lints(&cargo_clippy_path, &counter, num_cpus, num_crates))
.flatten()
.collect()
} else {
// run sequential
let num_crates = crates.len();
crates
.into_iter()
.map(|krate| krate.download_and_extract())
.map(|krate| krate.run_clippy_lints(&cargo_clippy_path, &counter, 1, num_crates))
.flatten()
.collect()
}
};

// generate some stats
Expand All @@ -612,15 +653,14 @@ pub fn run(clap_config: &ArgMatches) {
ices.iter()
.for_each(|(cratename, msg)| text.push_str(&format!("{}: '{}'", cratename, msg)));

println!("Writing logs to {}", file);
write(&file, text).unwrap();
println!("Writing logs to {}", config.lintcheck_results_path.display());
write(&config.lintcheck_results_path, text).unwrap();

print_stats(old_stats, new_stats);
}

/// read the previous stats from the lintcheck-log file
fn read_stats_from_file(file_path: &String) -> HashMap<String, usize> {
let file_path = PathBuf::from(file_path);
fn read_stats_from_file(file_path: &PathBuf) -> HashMap<String, usize> {
let file_content: String = match std::fs::read_to_string(file_path).ok() {
Some(content) => content,
None => {
Expand Down
Loading

0 comments on commit 5ae1e17

Please sign in to comment.