use std::collections::HashMap;
use std::process::Command;
-use std::{fmt, fs::write, path::PathBuf};
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::{env, fmt, fs::write, path::PathBuf};
use clap::ArgMatches;
+use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use serde_json::Value;
-// use this to store the crates when interacting with the crates.toml file
+/// List of sources to check, loaded from a .toml file
#[derive(Debug, Serialize, Deserialize)]
-struct CrateList {
+struct SourceList {
crates: HashMap<String, TomlCrate>,
}
-// crate data we stored in the toml, can have multiple versions per crate
-// A single TomlCrate is laster mapped to several CrateSources in that case
+/// A crate source stored inside the .toml
+/// will be translated into on one of the `CrateSource` variants
#[derive(Debug, Serialize, Deserialize)]
struct TomlCrate {
name: String,
git_url: Option<String>,
git_hash: Option<String>,
path: Option<String>,
+ options: Option<Vec<String>>,
}
-// represents an archive we download from crates.io, or a git repo, or a local repo
-#[derive(Debug, Serialize, Deserialize, Eq, Hash, PartialEq)]
+/// Represents an archive we download from crates.io, or a git repo, or a local repo/folder
+/// Once processed (downloaded/extracted/cloned/copied...), this will be translated into a `Crate`
+#[derive(Debug, Serialize, Deserialize, Eq, Hash, PartialEq, Ord, PartialOrd)]
enum CrateSource {
- CratesIo { name: String, version: String },
- Git { name: String, url: String, commit: String },
- Path { name: String, path: PathBuf },
+ CratesIo {
+ name: String,
+ version: String,
+ options: Option<Vec<String>>,
+ },
+ Git {
+ name: String,
+ url: String,
+ commit: String,
+ options: Option<Vec<String>>,
+ },
+ Path {
+ name: String,
+ path: PathBuf,
+ options: Option<Vec<String>>,
+ },
}
-// represents the extracted sourcecode of a crate
-// we actually don't need to special-case git repos here because it does not matter for clippy, yay!
-// (clippy only needs a simple path)
+/// Represents the actual source code of a crate that we ran "cargo clippy" on
#[derive(Debug)]
struct Crate {
version: String,
name: String,
// path to the extracted sources that clippy can check
path: PathBuf,
+ options: Option<Vec<String>>,
}
+/// A single warning that clippy issued while checking a `Crate`
#[derive(Debug)]
struct ClippyWarning {
crate_name: String,
column: String,
linttype: String,
message: String,
- ice: bool,
+ is_ice: bool,
}
impl std::fmt::Display for ClippyWarning {
}
impl CrateSource {
+ /// Makes the sources available on the disk for clippy to check.
+ /// Clones a git repo and checks out the specified commit or downloads a crate from crates.io or
+ /// copies a local folder
fn download_and_extract(&self) -> Crate {
match self {
- CrateSource::CratesIo { name, version } => {
+ CrateSource::CratesIo { name, version, options } => {
let extract_dir = PathBuf::from("target/lintcheck/crates");
let krate_download_dir = PathBuf::from("target/lintcheck/downloads");
version: version.clone(),
name: name.clone(),
path: extract_dir.join(format!("{}-{}/", name, version)),
+ options: options.clone(),
}
},
- CrateSource::Git { name, url, commit } => {
+ CrateSource::Git {
+ name,
+ url,
+ commit,
+ options,
+ } => {
let repo_path = {
let mut repo_path = PathBuf::from("target/lintcheck/crates");
// add a -git suffix in case we have the same crate from crates.io and a git repo
// clone the repo if we have not done so
if !repo_path.is_dir() {
println!("Cloning {} and checking out {}", url, commit);
- Command::new("git")
+ if !Command::new("git")
.arg("clone")
.arg(url)
.arg(&repo_path)
- .output()
- .expect("Failed to clone git repo!");
+ .status()
+ .expect("Failed to clone git repo!")
+ .success()
+ {
+ eprintln!("Failed to clone {} into {}", url, repo_path.display())
+ }
}
// check out the commit/branch/whatever
- Command::new("git")
+ if !Command::new("git")
.arg("checkout")
.arg(commit)
- .output()
- .expect("Failed to check out commit");
+ .current_dir(&repo_path)
+ .status()
+ .expect("Failed to check out commit")
+ .success()
+ {
+ eprintln!("Failed to checkout {} of repo at {}", commit, repo_path.display())
+ }
Crate {
version: commit.clone(),
name: name.clone(),
path: repo_path,
+ options: options.clone(),
}
},
- CrateSource::Path { name, path } => {
+ CrateSource::Path { name, path, options } => {
use fs_extra::dir;
// simply copy the entire directory into our target dir
version: String::from("local"),
name: name.clone(),
path: crate_root,
+ options: options.clone(),
}
},
}
}
impl Crate {
- fn run_clippy_lints(&self, cargo_clippy_path: &PathBuf) -> Vec<ClippyWarning> {
- println!("Linting {} {}...", &self.name, &self.version);
+ /// Run `cargo clippy` on the `Crate` and collect and return all the lint warnings that clippy
+ /// issued
+ fn run_clippy_lints(
+ &self,
+ cargo_clippy_path: &PathBuf,
+ target_dir_index: &AtomicUsize,
+ thread_limit: usize,
+ total_crates_to_lint: usize,
+ ) -> Vec<ClippyWarning> {
+ // advance the atomic index by one
+ let index = target_dir_index.fetch_add(1, Ordering::SeqCst);
+ // "loop" the index within 0..thread_limit
+ let target_dir_index = index % thread_limit;
+ let perc = ((index * 100) as f32 / total_crates_to_lint as f32) as u8;
+
+ if thread_limit == 1 {
+ println!(
+ "{}/{} {}% Linting {} {}",
+ index, total_crates_to_lint, perc, &self.name, &self.version
+ );
+ } else {
+ println!(
+ "{}/{} {}% Linting {} {} in target dir {:?}",
+ index, total_crates_to_lint, perc, &self.name, &self.version, target_dir_index
+ );
+ }
+
let cargo_clippy_path = std::fs::canonicalize(cargo_clippy_path).unwrap();
- let shared_target_dir = clippy_project_root().join("target/lintcheck/shared_target_dir/");
+ let shared_target_dir = clippy_project_root().join("target/lintcheck/shared_target_dir");
+
+ let mut args = vec!["--", "--message-format=json", "--", "--cap-lints=warn"];
+
+ if let Some(options) = &self.options {
+ for opt in options {
+ args.push(opt);
+ }
+ } else {
+ args.extend(&["-Wclippy::pedantic", "-Wclippy::cargo"])
+ }
let all_output = std::process::Command::new(&cargo_clippy_path)
- .env("CARGO_TARGET_DIR", shared_target_dir)
+ // use the looping index to create individual target dirs
+ .env(
+ "CARGO_TARGET_DIR",
+ shared_target_dir.join(format!("_{:?}", target_dir_index)),
+ )
// lint warnings will look like this:
// src/cargo/ops/cargo_compile.rs:127:35: warning: usage of `FromIterator::from_iter`
- .args(&[
- "--",
- "--message-format=json",
- "--",
- "--cap-lints=warn",
- "-Wclippy::pedantic",
- "-Wclippy::cargo",
- ])
+ .args(&args)
.current_dir(&self.path)
.output()
.unwrap_or_else(|error| {
let warnings: Vec<ClippyWarning> = output_lines
.into_iter()
// get all clippy warnings and ICEs
- .filter(|line| line.contains("clippy::") || line.contains("internal compiler error: "))
+ .filter(|line| filter_clippy_warnings(&line))
.map(|json_msg| parse_json_message(json_msg, &self))
.collect();
warnings
}
}
+/// takes a single json-formatted clippy warnings and returns true (we are interested in that line)
+/// or false (we aren't)
+fn filter_clippy_warnings(line: &str) -> bool {
+ // we want to collect ICEs because clippy might have crashed.
+ // these are summarized later
+ if line.contains("internal compiler error: ") {
+ return true;
+ }
+ // in general, we want all clippy warnings
+ // however due to some kind of bug, sometimes there are absolute paths
+ // to libcore files inside the message
+ // or we end up with cargo-metadata output (https://github.com/rust-lang/rust-clippy/issues/6508)
+
+ // filter out these message to avoid unnecessary noise in the logs
+ if line.contains("clippy::")
+ && !(line.contains("could not read cargo metadata")
+ || (line.contains(".rustup") && line.contains("toolchains")))
+ {
+ return true;
+ }
+ false
+}
+
+/// get the path to lintchecks crate sources .toml file, check LINTCHECK_TOML first but if it's
+/// empty use the default path
+fn lintcheck_config_toml(toml_path: Option<&str>) -> PathBuf {
+ PathBuf::from(
+ env::var("LINTCHECK_TOML").unwrap_or(
+ toml_path
+ .clone()
+ .unwrap_or("clippy_dev/lintcheck_crates.toml")
+ .to_string(),
+ ),
+ )
+}
+
+/// Builds clippy inside the repo to make sure we have a clippy executable we can use.
fn build_clippy() {
- Command::new("cargo")
+ let status = Command::new("cargo")
.arg("build")
- .output()
+ .status()
.expect("Failed to build clippy!");
+ if !status.success() {
+ eprintln!("Error: Failed to compile Clippy!");
+ std::process::exit(1);
+ }
}
-// get a list of CrateSources we want to check from a "lintcheck_crates.toml" file.
+/// Read a `toml` file and return a list of `CrateSources` that we want to check with clippy
fn read_crates(toml_path: Option<&str>) -> (String, Vec<CrateSource>) {
- let toml_path = PathBuf::from(toml_path.unwrap_or("clippy_dev/lintcheck_crates.toml"));
+ let toml_path = lintcheck_config_toml(toml_path);
// save it so that we can use the name of the sources.toml as name for the logfile later.
let toml_filename = toml_path.file_stem().unwrap().to_str().unwrap().to_string();
let toml_content: String =
std::fs::read_to_string(&toml_path).unwrap_or_else(|_| panic!("Failed to read {}", toml_path.display()));
- let crate_list: CrateList =
+ let crate_list: SourceList =
toml::from_str(&toml_content).unwrap_or_else(|e| panic!("Failed to parse {}: \n{}", toml_path.display(), e));
// parse the hashmap of the toml file into a list of crates
let tomlcrates: Vec<TomlCrate> = crate_list
crate_sources.push(CrateSource::Path {
name: tk.name.clone(),
path: PathBuf::from(path),
+ options: tk.options.clone(),
});
}
crate_sources.push(CrateSource::CratesIo {
name: tk.name.clone(),
version: ver.to_string(),
+ options: tk.options.clone(),
});
})
}
name: tk.name.clone(),
url: tk.git_url.clone().unwrap(),
commit: tk.git_hash.clone().unwrap(),
+ options: tk.options.clone(),
});
}
// if we have a version as well as a git data OR only one git data, something is funky
unreachable!("Failed to translate TomlCrate into CrateSource!");
}
});
+ // sort the crates
+ crate_sources.sort();
+
(toml_filename, crate_sources)
}
-// extract interesting data from a json lint message
+/// Parse the json output of clippy and return a `ClippyWarning`
fn parse_json_message(json_message: &str, krate: &Crate) -> ClippyWarning {
let jmsg: Value = serde_json::from_str(&json_message).unwrap_or_else(|e| panic!("Failed to parse json:\n{:?}", e));
.into(),
linttype: jmsg["message"]["code"]["code"].to_string().trim_matches('"').into(),
message: jmsg["message"]["message"].to_string().trim_matches('"').into(),
- ice: json_message.contains("internal compiler error: "),
+ is_ice: json_message.contains("internal compiler error: "),
}
}
-// the main fn
-pub fn run(clap_config: &ArgMatches) {
- let cargo_clippy_path: PathBuf = PathBuf::from("target/debug/cargo-clippy");
+/// Generate a short list of occuring lints-types and their count
+fn gather_stats(clippy_warnings: &[ClippyWarning]) -> String {
+ // count lint type occurrences
+ let mut counter: HashMap<&String, usize> = HashMap::new();
+ clippy_warnings
+ .iter()
+ .for_each(|wrn| *counter.entry(&wrn.linttype).or_insert(0) += 1);
+ // collect into a tupled list for sorting
+ let mut stats: Vec<(&&String, &usize)> = counter.iter().map(|(lint, count)| (lint, count)).collect();
+ // sort by "000{count} {clippy::lintname}"
+ // to not have a lint with 200 and 2 warnings take the same spot
+ stats.sort_by_key(|(lint, count)| format!("{:0>4}, {}", count, lint));
+
+ stats
+ .iter()
+ .map(|(lint, count)| format!("{} {}\n", lint, count))
+ .collect::<String>()
+}
+
+/// check if the latest modification of the logfile is older than the modification date of the
+/// clippy binary, if this is true, we should clean the lintchec shared target directory and recheck
+fn lintcheck_needs_rerun(toml_path: Option<&str>) -> bool {
+ let clippy_modified: std::time::SystemTime = {
+ let mut times = ["target/debug/clippy-driver", "target/debug/cargo-clippy"]
+ .iter()
+ .map(|p| {
+ std::fs::metadata(p)
+ .expect("failed to get metadata of file")
+ .modified()
+ .expect("failed to get modification date")
+ });
+ // the lates modification of either of the binaries
+ std::cmp::max(times.next().unwrap(), times.next().unwrap())
+ };
+
+ let logs_modified: std::time::SystemTime = std::fs::metadata(lintcheck_config_toml(toml_path))
+ .expect("failed to get metadata of file")
+ .modified()
+ .expect("failed to get modification date");
+
+ // if clippys modification time is bigger (older) than the logs mod time, we need to rerun lintcheck
+ clippy_modified > logs_modified
+}
+
+/// lintchecks `main()` function
+pub fn run(clap_config: &ArgMatches) {
println!("Compiling clippy...");
build_clippy();
println!("Done compiling");
+ let clap_toml_path = clap_config.value_of("crates-toml");
+
+ // if the clippy bin is newer than our logs, throw away target dirs to force clippy to
+ // refresh the logs
+ if lintcheck_needs_rerun(clap_toml_path) {
+ let shared_target_dir = "target/lintcheck/shared_target_dir";
+ match std::fs::metadata(&shared_target_dir) {
+ Ok(metadata) => {
+ if metadata.is_dir() {
+ println!("Clippy is newer than lint check logs, clearing lintcheck shared target dir...");
+ std::fs::remove_dir_all(&shared_target_dir)
+ .expect("failed to remove target/lintcheck/shared_target_dir");
+ }
+ },
+ Err(_) => { // dir probably does not exist, don't remove anything
+ },
+ }
+ }
+
+ let cargo_clippy_path: PathBuf = PathBuf::from("target/debug/cargo-clippy")
+ .canonicalize()
+ .expect("failed to canonicalize path to clippy binary");
+
// assert that clippy is found
assert!(
cargo_clippy_path.is_file(),
// download and extract the crates, then run clippy on them and collect clippys warnings
// flatten into one big list of warnings
- let (filename, crates) = read_crates(clap_config.value_of("crates-toml"));
+ let (filename, crates) = read_crates(clap_toml_path);
let clippy_warnings: Vec<ClippyWarning> = if let Some(only_one_crate) = clap_config.value_of("only") {
// if we don't have the specified crate in the .toml, throw an error
.into_iter()
.map(|krate| krate.download_and_extract())
.filter(|krate| krate.name == only_one_crate)
- .map(|krate| krate.run_clippy_lints(&cargo_clippy_path))
+ .map(|krate| krate.run_clippy_lints(&cargo_clippy_path, &AtomicUsize::new(0), 1, 1))
.flatten()
.collect()
} else {
+ let counter = std::sync::atomic::AtomicUsize::new(0);
+
+ // Ask rayon for thread count. Assume that half of that is the number of physical cores
+ // Use one target dir for each core so that we can run N clippys in parallel.
+ // We need to use different target dirs because cargo would lock them for a single build otherwise,
+ // killing the parallelism. However this also means that deps will only be reused half/a
+ // quarter of the time which might result in a longer wall clock runtime
+
+ // This helps when we check many small crates with dep-trees that don't have a lot of branches in
+ // order to achive some kind of parallelism
+
+ // by default, use a single thread
+ let num_cpus = match clap_config.value_of("threads") {
+ Some(threads) => {
+ let threads: usize = threads
+ .parse()
+ .expect(&format!("Failed to parse '{}' to a digit", threads));
+ if threads == 0 {
+ // automatic choice
+ // Rayon seems to return thread count so half that for core count
+ (rayon::current_num_threads() / 2) as usize
+ } else {
+ threads
+ }
+ },
+ // no -j passed, use a single thread
+ None => 1,
+ };
+
+ let num_crates = crates.len();
+
// check all crates (default)
crates
- .into_iter()
+ .into_par_iter()
.map(|krate| krate.download_and_extract())
- .map(|krate| krate.run_clippy_lints(&cargo_clippy_path))
+ .map(|krate| krate.run_clippy_lints(&cargo_clippy_path, &counter, num_cpus, num_crates))
.flatten()
.collect()
};
- // generate some stats:
+ // generate some stats
+ let stats_formatted = gather_stats(&clippy_warnings);
// grab crashes/ICEs, save the crate name and the ice message
let ices: Vec<(&String, &String)> = clippy_warnings
.iter()
- .filter(|warning| warning.ice)
+ .filter(|warning| warning.is_ice)
.map(|w| (&w.crate_name, &w.message))
.collect();
- // count lint type occurrences
- let mut counter: HashMap<&String, usize> = HashMap::new();
- clippy_warnings
- .iter()
- .for_each(|wrn| *counter.entry(&wrn.linttype).or_insert(0) += 1);
-
- // collect into a tupled list for sorting
- let mut stats: Vec<(&&String, &usize)> = counter.iter().map(|(lint, count)| (lint, count)).collect();
- // sort by "000{count} {clippy::lintname}"
- // to not have a lint with 200 and 2 warnings take the same spot
- stats.sort_by_key(|(lint, count)| format!("{:0>4}, {}", count, lint));
-
- let stats_formatted: String = stats
- .iter()
- .map(|(lint, count)| format!("{} {}\n", lint, count))
- .collect::<String>();
-
let mut all_msgs: Vec<String> = clippy_warnings.iter().map(|warning| warning.to_string()).collect();
all_msgs.sort();
all_msgs.push("\n\n\n\nStats\n\n".into());
.for_each(|(cratename, msg)| text.push_str(&format!("{}: '{}'", cratename, msg)));
let file = format!("lintcheck-logs/{}_logs.txt", filename);
+ println!("Writing logs to {}", file);
write(file, text).unwrap();
}