X-Git-Url: https://git.lizzy.rs/?a=blobdiff_plain;f=src%2Ftools%2Fclippy%2Fclippy_dev%2Fsrc%2Flintcheck.rs;h=b806f5452846505b94b9a00e8666e7752a7a795a;hb=8250a2510dbbd826c0a79fbb8eca014a339ed3b9;hp=749a791b280e20f423759f270d2fae53ff671044;hpb=ed58a2b03b6284b070fae2349898b16df98b7765;p=rust.git diff --git a/src/tools/clippy/clippy_dev/src/lintcheck.rs b/src/tools/clippy/clippy_dev/src/lintcheck.rs index 749a791b280..b806f545284 100644 --- a/src/tools/clippy/clippy_dev/src/lintcheck.rs +++ b/src/tools/clippy/clippy_dev/src/lintcheck.rs @@ -11,20 +11,22 @@ use std::collections::HashMap; use std::process::Command; -use std::{fmt, fs::write, path::PathBuf}; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::{env, fmt, fs::write, path::PathBuf}; use clap::ArgMatches; +use rayon::prelude::*; use serde::{Deserialize, Serialize}; use serde_json::Value; -// use this to store the crates when interacting with the crates.toml file +/// List of sources to check, loaded from a .toml file #[derive(Debug, Serialize, Deserialize)] -struct CrateList { +struct SourceList { crates: HashMap, } -// crate data we stored in the toml, can have multiple versions per crate -// A single TomlCrate is laster mapped to several CrateSources in that case +/// A crate source stored inside the .toml +/// will be translated into on one of the `CrateSource` variants #[derive(Debug, Serialize, Deserialize)] struct TomlCrate { name: String, @@ -32,27 +34,42 @@ struct TomlCrate { git_url: Option, git_hash: Option, path: Option, + options: Option>, } -// represents an archive we download from crates.io, or a git repo, or a local repo -#[derive(Debug, Serialize, Deserialize, Eq, Hash, PartialEq)] +/// Represents an archive we download from crates.io, or a git repo, or a local repo/folder +/// Once processed (downloaded/extracted/cloned/copied...), this will be translated into a `Crate` +#[derive(Debug, Serialize, Deserialize, Eq, Hash, PartialEq, Ord, PartialOrd)] enum CrateSource { - CratesIo { name: String, version: String }, - Git { name: String, url: String, commit: String }, - Path { name: String, path: PathBuf }, + CratesIo { + name: String, + version: String, + options: Option>, + }, + Git { + name: String, + url: String, + commit: String, + options: Option>, + }, + Path { + name: String, + path: PathBuf, + options: Option>, + }, } -// represents the extracted sourcecode of a crate -// we actually don't need to special-case git repos here because it does not matter for clippy, yay! -// (clippy only needs a simple path) +/// Represents the actual source code of a crate that we ran "cargo clippy" on #[derive(Debug)] struct Crate { version: String, name: String, // path to the extracted sources that clippy can check path: PathBuf, + options: Option>, } +/// A single warning that clippy issued while checking a `Crate` #[derive(Debug)] struct ClippyWarning { crate_name: String, @@ -62,7 +79,7 @@ struct ClippyWarning { column: String, linttype: String, message: String, - ice: bool, + is_ice: bool, } impl std::fmt::Display for ClippyWarning { @@ -76,9 +93,12 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { } impl CrateSource { + /// Makes the sources available on the disk for clippy to check. + /// Clones a git repo and checks out the specified commit or downloads a crate from crates.io or + /// copies a local folder fn download_and_extract(&self) -> Crate { match self { - CrateSource::CratesIo { name, version } => { + CrateSource::CratesIo { name, version, options } => { let extract_dir = PathBuf::from("target/lintcheck/crates"); let krate_download_dir = PathBuf::from("target/lintcheck/downloads"); @@ -110,9 +130,15 @@ fn download_and_extract(&self) -> Crate { version: version.clone(), name: name.clone(), path: extract_dir.join(format!("{}-{}/", name, version)), + options: options.clone(), } }, - CrateSource::Git { name, url, commit } => { + CrateSource::Git { + name, + url, + commit, + options, + } => { let repo_path = { let mut repo_path = PathBuf::from("target/lintcheck/crates"); // add a -git suffix in case we have the same crate from crates.io and a git repo @@ -122,27 +148,37 @@ fn download_and_extract(&self) -> Crate { // clone the repo if we have not done so if !repo_path.is_dir() { println!("Cloning {} and checking out {}", url, commit); - Command::new("git") + if !Command::new("git") .arg("clone") .arg(url) .arg(&repo_path) - .output() - .expect("Failed to clone git repo!"); + .status() + .expect("Failed to clone git repo!") + .success() + { + eprintln!("Failed to clone {} into {}", url, repo_path.display()) + } } // check out the commit/branch/whatever - Command::new("git") + if !Command::new("git") .arg("checkout") .arg(commit) - .output() - .expect("Failed to check out commit"); + .current_dir(&repo_path) + .status() + .expect("Failed to check out commit") + .success() + { + eprintln!("Failed to checkout {} of repo at {}", commit, repo_path.display()) + } Crate { version: commit.clone(), name: name.clone(), path: repo_path, + options: options.clone(), } }, - CrateSource::Path { name, path } => { + CrateSource::Path { name, path, options } => { use fs_extra::dir; // simply copy the entire directory into our target dir @@ -171,6 +207,7 @@ fn download_and_extract(&self) -> Crate { version: String::from("local"), name: name.clone(), path: crate_root, + options: options.clone(), } }, } @@ -178,24 +215,56 @@ fn download_and_extract(&self) -> Crate { } impl Crate { - fn run_clippy_lints(&self, cargo_clippy_path: &PathBuf) -> Vec { - println!("Linting {} {}...", &self.name, &self.version); + /// Run `cargo clippy` on the `Crate` and collect and return all the lint warnings that clippy + /// issued + fn run_clippy_lints( + &self, + cargo_clippy_path: &PathBuf, + target_dir_index: &AtomicUsize, + thread_limit: usize, + total_crates_to_lint: usize, + ) -> Vec { + // advance the atomic index by one + let index = target_dir_index.fetch_add(1, Ordering::SeqCst); + // "loop" the index within 0..thread_limit + let target_dir_index = index % thread_limit; + let perc = ((index * 100) as f32 / total_crates_to_lint as f32) as u8; + + if thread_limit == 1 { + println!( + "{}/{} {}% Linting {} {}", + index, total_crates_to_lint, perc, &self.name, &self.version + ); + } else { + println!( + "{}/{} {}% Linting {} {} in target dir {:?}", + index, total_crates_to_lint, perc, &self.name, &self.version, target_dir_index + ); + } + let cargo_clippy_path = std::fs::canonicalize(cargo_clippy_path).unwrap(); - let shared_target_dir = clippy_project_root().join("target/lintcheck/shared_target_dir/"); + let shared_target_dir = clippy_project_root().join("target/lintcheck/shared_target_dir"); + + let mut args = vec!["--", "--message-format=json", "--", "--cap-lints=warn"]; + + if let Some(options) = &self.options { + for opt in options { + args.push(opt); + } + } else { + args.extend(&["-Wclippy::pedantic", "-Wclippy::cargo"]) + } let all_output = std::process::Command::new(&cargo_clippy_path) - .env("CARGO_TARGET_DIR", shared_target_dir) + // use the looping index to create individual target dirs + .env( + "CARGO_TARGET_DIR", + shared_target_dir.join(format!("_{:?}", target_dir_index)), + ) // lint warnings will look like this: // src/cargo/ops/cargo_compile.rs:127:35: warning: usage of `FromIterator::from_iter` - .args(&[ - "--", - "--message-format=json", - "--", - "--cap-lints=warn", - "-Wclippy::pedantic", - "-Wclippy::cargo", - ]) + .args(&args) .current_dir(&self.path) .output() .unwrap_or_else(|error| { @@ -211,28 +280,69 @@ fn run_clippy_lints(&self, cargo_clippy_path: &PathBuf) -> Vec { let warnings: Vec = output_lines .into_iter() // get all clippy warnings and ICEs - .filter(|line| line.contains("clippy::") || line.contains("internal compiler error: ")) + .filter(|line| filter_clippy_warnings(&line)) .map(|json_msg| parse_json_message(json_msg, &self)) .collect(); warnings } } +/// takes a single json-formatted clippy warnings and returns true (we are interested in that line) +/// or false (we aren't) +fn filter_clippy_warnings(line: &str) -> bool { + // we want to collect ICEs because clippy might have crashed. + // these are summarized later + if line.contains("internal compiler error: ") { + return true; + } + // in general, we want all clippy warnings + // however due to some kind of bug, sometimes there are absolute paths + // to libcore files inside the message + // or we end up with cargo-metadata output (https://github.com/rust-lang/rust-clippy/issues/6508) + + // filter out these message to avoid unnecessary noise in the logs + if line.contains("clippy::") + && !(line.contains("could not read cargo metadata") + || (line.contains(".rustup") && line.contains("toolchains"))) + { + return true; + } + false +} + +/// get the path to lintchecks crate sources .toml file, check LINTCHECK_TOML first but if it's +/// empty use the default path +fn lintcheck_config_toml(toml_path: Option<&str>) -> PathBuf { + PathBuf::from( + env::var("LINTCHECK_TOML").unwrap_or( + toml_path + .clone() + .unwrap_or("clippy_dev/lintcheck_crates.toml") + .to_string(), + ), + ) +} + +/// Builds clippy inside the repo to make sure we have a clippy executable we can use. fn build_clippy() { - Command::new("cargo") + let status = Command::new("cargo") .arg("build") - .output() + .status() .expect("Failed to build clippy!"); + if !status.success() { + eprintln!("Error: Failed to compile Clippy!"); + std::process::exit(1); + } } -// get a list of CrateSources we want to check from a "lintcheck_crates.toml" file. +/// Read a `toml` file and return a list of `CrateSources` that we want to check with clippy fn read_crates(toml_path: Option<&str>) -> (String, Vec) { - let toml_path = PathBuf::from(toml_path.unwrap_or("clippy_dev/lintcheck_crates.toml")); + let toml_path = lintcheck_config_toml(toml_path); // save it so that we can use the name of the sources.toml as name for the logfile later. let toml_filename = toml_path.file_stem().unwrap().to_str().unwrap().to_string(); let toml_content: String = std::fs::read_to_string(&toml_path).unwrap_or_else(|_| panic!("Failed to read {}", toml_path.display())); - let crate_list: CrateList = + let crate_list: SourceList = toml::from_str(&toml_content).unwrap_or_else(|e| panic!("Failed to parse {}: \n{}", toml_path.display(), e)); // parse the hashmap of the toml file into a list of crates let tomlcrates: Vec = crate_list @@ -249,6 +359,7 @@ fn read_crates(toml_path: Option<&str>) -> (String, Vec) { crate_sources.push(CrateSource::Path { name: tk.name.clone(), path: PathBuf::from(path), + options: tk.options.clone(), }); } @@ -258,6 +369,7 @@ fn read_crates(toml_path: Option<&str>) -> (String, Vec) { crate_sources.push(CrateSource::CratesIo { name: tk.name.clone(), version: ver.to_string(), + options: tk.options.clone(), }); }) } @@ -267,6 +379,7 @@ fn read_crates(toml_path: Option<&str>) -> (String, Vec) { name: tk.name.clone(), url: tk.git_url.clone().unwrap(), commit: tk.git_hash.clone().unwrap(), + options: tk.options.clone(), }); } // if we have a version as well as a git data OR only one git data, something is funky @@ -283,10 +396,13 @@ fn read_crates(toml_path: Option<&str>) -> (String, Vec) { unreachable!("Failed to translate TomlCrate into CrateSource!"); } }); + // sort the crates + crate_sources.sort(); + (toml_filename, crate_sources) } -// extract interesting data from a json lint message +/// Parse the json output of clippy and return a `ClippyWarning` fn parse_json_message(json_message: &str, krate: &Crate) -> ClippyWarning { let jmsg: Value = serde_json::from_str(&json_message).unwrap_or_else(|e| panic!("Failed to parse json:\n{:?}", e)); @@ -307,18 +423,84 @@ fn parse_json_message(json_message: &str, krate: &Crate) -> ClippyWarning { .into(), linttype: jmsg["message"]["code"]["code"].to_string().trim_matches('"').into(), message: jmsg["message"]["message"].to_string().trim_matches('"').into(), - ice: json_message.contains("internal compiler error: "), + is_ice: json_message.contains("internal compiler error: "), } } -// the main fn -pub fn run(clap_config: &ArgMatches) { - let cargo_clippy_path: PathBuf = PathBuf::from("target/debug/cargo-clippy"); +/// Generate a short list of occuring lints-types and their count +fn gather_stats(clippy_warnings: &[ClippyWarning]) -> String { + // count lint type occurrences + let mut counter: HashMap<&String, usize> = HashMap::new(); + clippy_warnings + .iter() + .for_each(|wrn| *counter.entry(&wrn.linttype).or_insert(0) += 1); + // collect into a tupled list for sorting + let mut stats: Vec<(&&String, &usize)> = counter.iter().map(|(lint, count)| (lint, count)).collect(); + // sort by "000{count} {clippy::lintname}" + // to not have a lint with 200 and 2 warnings take the same spot + stats.sort_by_key(|(lint, count)| format!("{:0>4}, {}", count, lint)); + + stats + .iter() + .map(|(lint, count)| format!("{} {}\n", lint, count)) + .collect::() +} + +/// check if the latest modification of the logfile is older than the modification date of the +/// clippy binary, if this is true, we should clean the lintchec shared target directory and recheck +fn lintcheck_needs_rerun(toml_path: Option<&str>) -> bool { + let clippy_modified: std::time::SystemTime = { + let mut times = ["target/debug/clippy-driver", "target/debug/cargo-clippy"] + .iter() + .map(|p| { + std::fs::metadata(p) + .expect("failed to get metadata of file") + .modified() + .expect("failed to get modification date") + }); + // the lates modification of either of the binaries + std::cmp::max(times.next().unwrap(), times.next().unwrap()) + }; + + let logs_modified: std::time::SystemTime = std::fs::metadata(lintcheck_config_toml(toml_path)) + .expect("failed to get metadata of file") + .modified() + .expect("failed to get modification date"); + + // if clippys modification time is bigger (older) than the logs mod time, we need to rerun lintcheck + clippy_modified > logs_modified +} + +/// lintchecks `main()` function +pub fn run(clap_config: &ArgMatches) { println!("Compiling clippy..."); build_clippy(); println!("Done compiling"); + let clap_toml_path = clap_config.value_of("crates-toml"); + + // if the clippy bin is newer than our logs, throw away target dirs to force clippy to + // refresh the logs + if lintcheck_needs_rerun(clap_toml_path) { + let shared_target_dir = "target/lintcheck/shared_target_dir"; + match std::fs::metadata(&shared_target_dir) { + Ok(metadata) => { + if metadata.is_dir() { + println!("Clippy is newer than lint check logs, clearing lintcheck shared target dir..."); + std::fs::remove_dir_all(&shared_target_dir) + .expect("failed to remove target/lintcheck/shared_target_dir"); + } + }, + Err(_) => { // dir probably does not exist, don't remove anything + }, + } + } + + let cargo_clippy_path: PathBuf = PathBuf::from("target/debug/cargo-clippy") + .canonicalize() + .expect("failed to canonicalize path to clippy binary"); + // assert that clippy is found assert!( cargo_clippy_path.is_file(), @@ -335,7 +517,7 @@ pub fn run(clap_config: &ArgMatches) { // download and extract the crates, then run clippy on them and collect clippys warnings // flatten into one big list of warnings - let (filename, crates) = read_crates(clap_config.value_of("crates-toml")); + let (filename, crates) = read_crates(clap_toml_path); let clippy_warnings: Vec = if let Some(only_one_crate) = clap_config.value_of("only") { // if we don't have the specified crate in the .toml, throw an error @@ -359,45 +541,60 @@ pub fn run(clap_config: &ArgMatches) { .into_iter() .map(|krate| krate.download_and_extract()) .filter(|krate| krate.name == only_one_crate) - .map(|krate| krate.run_clippy_lints(&cargo_clippy_path)) + .map(|krate| krate.run_clippy_lints(&cargo_clippy_path, &AtomicUsize::new(0), 1, 1)) .flatten() .collect() } else { + let counter = std::sync::atomic::AtomicUsize::new(0); + + // Ask rayon for thread count. Assume that half of that is the number of physical cores + // Use one target dir for each core so that we can run N clippys in parallel. + // We need to use different target dirs because cargo would lock them for a single build otherwise, + // killing the parallelism. However this also means that deps will only be reused half/a + // quarter of the time which might result in a longer wall clock runtime + + // This helps when we check many small crates with dep-trees that don't have a lot of branches in + // order to achive some kind of parallelism + + // by default, use a single thread + let num_cpus = match clap_config.value_of("threads") { + Some(threads) => { + let threads: usize = threads + .parse() + .expect(&format!("Failed to parse '{}' to a digit", threads)); + if threads == 0 { + // automatic choice + // Rayon seems to return thread count so half that for core count + (rayon::current_num_threads() / 2) as usize + } else { + threads + } + }, + // no -j passed, use a single thread + None => 1, + }; + + let num_crates = crates.len(); + // check all crates (default) crates - .into_iter() + .into_par_iter() .map(|krate| krate.download_and_extract()) - .map(|krate| krate.run_clippy_lints(&cargo_clippy_path)) + .map(|krate| krate.run_clippy_lints(&cargo_clippy_path, &counter, num_cpus, num_crates)) .flatten() .collect() }; - // generate some stats: + // generate some stats + let stats_formatted = gather_stats(&clippy_warnings); // grab crashes/ICEs, save the crate name and the ice message let ices: Vec<(&String, &String)> = clippy_warnings .iter() - .filter(|warning| warning.ice) + .filter(|warning| warning.is_ice) .map(|w| (&w.crate_name, &w.message)) .collect(); - // count lint type occurrences - let mut counter: HashMap<&String, usize> = HashMap::new(); - clippy_warnings - .iter() - .for_each(|wrn| *counter.entry(&wrn.linttype).or_insert(0) += 1); - - // collect into a tupled list for sorting - let mut stats: Vec<(&&String, &usize)> = counter.iter().map(|(lint, count)| (lint, count)).collect(); - // sort by "000{count} {clippy::lintname}" - // to not have a lint with 200 and 2 warnings take the same spot - stats.sort_by_key(|(lint, count)| format!("{:0>4}, {}", count, lint)); - - let stats_formatted: String = stats - .iter() - .map(|(lint, count)| format!("{} {}\n", lint, count)) - .collect::(); - let mut all_msgs: Vec = clippy_warnings.iter().map(|warning| warning.to_string()).collect(); all_msgs.sort(); all_msgs.push("\n\n\n\nStats\n\n".into()); @@ -411,5 +608,6 @@ pub fn run(clap_config: &ArgMatches) { .for_each(|(cratename, msg)| text.push_str(&format!("{}: '{}'", cratename, msg))); let file = format!("lintcheck-logs/{}_logs.txt", filename); + println!("Writing logs to {}", file); write(file, text).unwrap(); }