1 // Run clippy on a fixed set of crates and collect the warnings.
2 // This helps observing the impact clippy changs have on a set of real-world code.
4 // When a new lint is introduced, we can search the results for new warnings and check for false
7 #![cfg(feature = "lintcheck")]
8 #![allow(clippy::filter_map)]
10 use crate::clippy_project_root;
12 use std::collections::HashMap;
13 use std::process::Command;
14 use std::sync::atomic::{AtomicUsize, Ordering};
15 use std::{env, fmt, fs::write, path::PathBuf};
18 use rayon::prelude::*;
19 use serde::{Deserialize, Serialize};
20 use serde_json::Value;
22 /// List of sources to check, loaded from a .toml file
23 #[derive(Debug, Serialize, Deserialize)]
25 crates: HashMap<String, TomlCrate>,
28 /// A crate source stored inside the .toml
29 /// will be translated into on one of the `CrateSource` variants
30 #[derive(Debug, Serialize, Deserialize)]
33 versions: Option<Vec<String>>,
34 git_url: Option<String>,
35 git_hash: Option<String>,
37 options: Option<Vec<String>>,
40 /// Represents an archive we download from crates.io, or a git repo, or a local repo/folder
41 /// Once processed (downloaded/extracted/cloned/copied...), this will be translated into a `Crate`
42 #[derive(Debug, Serialize, Deserialize, Eq, Hash, PartialEq, Ord, PartialOrd)]
47 options: Option<Vec<String>>,
53 options: Option<Vec<String>>,
58 options: Option<Vec<String>>,
62 /// Represents the actual source code of a crate that we ran "cargo clippy" on
67 // path to the extracted sources that clippy can check
69 options: Option<Vec<String>>,
72 /// A single warning that clippy issued while checking a `Crate`
74 struct ClippyWarning {
76 crate_version: String,
85 impl std::fmt::Display for ClippyWarning {
86 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
89 r#"{}-{}/{}:{}:{} {} "{}""#,
90 &self.crate_name, &self.crate_version, &self.file, &self.line, &self.column, &self.linttype, &self.message
96 /// Makes the sources available on the disk for clippy to check.
97 /// Clones a git repo and checks out the specified commit or downloads a crate from crates.io or
98 /// copies a local folder
99 fn download_and_extract(&self) -> Crate {
101 CrateSource::CratesIo { name, version, options } => {
102 let extract_dir = PathBuf::from("target/lintcheck/crates");
103 let krate_download_dir = PathBuf::from("target/lintcheck/downloads");
105 // url to download the crate from crates.io
106 let url = format!("https://crates.io/api/v1/crates/{}/{}/download", name, version);
107 println!("Downloading and extracting {} {} from {}", name, version, url);
108 let _ = std::fs::create_dir("target/lintcheck/");
109 let _ = std::fs::create_dir(&krate_download_dir);
110 let _ = std::fs::create_dir(&extract_dir);
112 let krate_file_path = krate_download_dir.join(format!("{}-{}.crate.tar.gz", name, version));
113 // don't download/extract if we already have done so
114 if !krate_file_path.is_file() {
115 // create a file path to download and write the crate data into
116 let mut krate_dest = std::fs::File::create(&krate_file_path).unwrap();
117 let mut krate_req = ureq::get(&url).call().unwrap().into_reader();
118 // copy the crate into the file
119 std::io::copy(&mut krate_req, &mut krate_dest).unwrap();
122 let ungz_tar = flate2::read::GzDecoder::new(std::fs::File::open(&krate_file_path).unwrap());
123 // extract the tar archive
124 let mut archive = tar::Archive::new(ungz_tar);
125 archive.unpack(&extract_dir).expect("Failed to extract!");
127 // crate is extracted, return a new Krate object which contains the path to the extracted
128 // sources that clippy can check
130 version: version.clone(),
132 path: extract_dir.join(format!("{}-{}/", name, version)),
133 options: options.clone(),
143 let mut repo_path = PathBuf::from("target/lintcheck/crates");
144 // add a -git suffix in case we have the same crate from crates.io and a git repo
145 repo_path.push(format!("{}-git", name));
148 // clone the repo if we have not done so
149 if !repo_path.is_dir() {
150 println!("Cloning {} and checking out {}", url, commit);
151 if !Command::new("git")
156 .expect("Failed to clone git repo!")
159 eprintln!("Failed to clone {} into {}", url, repo_path.display())
162 // check out the commit/branch/whatever
163 if !Command::new("git")
166 .current_dir(&repo_path)
168 .expect("Failed to check out commit")
171 eprintln!("Failed to checkout {} of repo at {}", commit, repo_path.display())
175 version: commit.clone(),
178 options: options.clone(),
181 CrateSource::Path { name, path, options } => {
184 // simply copy the entire directory into our target dir
185 let copy_dest = PathBuf::from("target/lintcheck/crates/");
187 // the source path of the crate we copied, ${copy_dest}/crate_name
188 let crate_root = copy_dest.join(name); // .../crates/local_crate
190 if !crate_root.exists() {
191 println!("Copying {} to {}", path.display(), copy_dest.display());
193 dir::copy(path, ©_dest, &dir::CopyOptions::new()).expect(&format!(
194 "Failed to copy from {}, to {}",
200 "Not copying {} to {}, destination already exists",
207 version: String::from("local"),
210 options: options.clone(),
218 /// Run `cargo clippy` on the `Crate` and collect and return all the lint warnings that clippy
222 cargo_clippy_path: &PathBuf,
223 target_dir_index: &AtomicUsize,
225 total_crates_to_lint: usize,
226 ) -> Vec<ClippyWarning> {
227 // advance the atomic index by one
228 let index = target_dir_index.fetch_add(1, Ordering::SeqCst);
229 // "loop" the index within 0..thread_limit
230 let target_dir_index = index % thread_limit;
231 let perc = ((index * 100) as f32 / total_crates_to_lint as f32) as u8;
233 if thread_limit == 1 {
235 "{}/{} {}% Linting {} {}",
236 index, total_crates_to_lint, perc, &self.name, &self.version
240 "{}/{} {}% Linting {} {} in target dir {:?}",
241 index, total_crates_to_lint, perc, &self.name, &self.version, target_dir_index
245 let cargo_clippy_path = std::fs::canonicalize(cargo_clippy_path).unwrap();
247 let shared_target_dir = clippy_project_root().join("target/lintcheck/shared_target_dir");
249 let mut args = vec!["--", "--message-format=json", "--", "--cap-lints=warn"];
251 if let Some(options) = &self.options {
256 args.extend(&["-Wclippy::pedantic", "-Wclippy::cargo"])
259 let all_output = std::process::Command::new(&cargo_clippy_path)
260 // use the looping index to create individual target dirs
263 shared_target_dir.join(format!("_{:?}", target_dir_index)),
265 // lint warnings will look like this:
266 // src/cargo/ops/cargo_compile.rs:127:35: warning: usage of `FromIterator::from_iter`
268 .current_dir(&self.path)
270 .unwrap_or_else(|error| {
272 "Encountered error:\n{:?}\ncargo_clippy_path: {}\ncrate path:{}\n",
274 &cargo_clippy_path.display(),
278 let stdout = String::from_utf8_lossy(&all_output.stdout);
279 let output_lines = stdout.lines();
280 let warnings: Vec<ClippyWarning> = output_lines
282 // get all clippy warnings and ICEs
283 .filter(|line| filter_clippy_warnings(&line))
284 .map(|json_msg| parse_json_message(json_msg, &self))
290 /// takes a single json-formatted clippy warnings and returns true (we are interested in that line)
291 /// or false (we aren't)
292 fn filter_clippy_warnings(line: &str) -> bool {
293 // we want to collect ICEs because clippy might have crashed.
294 // these are summarized later
295 if line.contains("internal compiler error: ") {
298 // in general, we want all clippy warnings
299 // however due to some kind of bug, sometimes there are absolute paths
300 // to libcore files inside the message
301 // or we end up with cargo-metadata output (https://github.com/rust-lang/rust-clippy/issues/6508)
303 // filter out these message to avoid unnecessary noise in the logs
304 if line.contains("clippy::")
305 && !(line.contains("could not read cargo metadata")
306 || (line.contains(".rustup") && line.contains("toolchains")))
313 /// get the path to lintchecks crate sources .toml file, check LINTCHECK_TOML first but if it's
314 /// empty use the default path
315 fn lintcheck_config_toml(toml_path: Option<&str>) -> PathBuf {
317 env::var("LINTCHECK_TOML").unwrap_or(
320 .unwrap_or("clippy_dev/lintcheck_crates.toml")
326 /// Builds clippy inside the repo to make sure we have a clippy executable we can use.
328 let status = Command::new("cargo")
331 .expect("Failed to build clippy!");
332 if !status.success() {
333 eprintln!("Error: Failed to compile Clippy!");
334 std::process::exit(1);
338 /// Read a `toml` file and return a list of `CrateSources` that we want to check with clippy
339 fn read_crates(toml_path: Option<&str>) -> (String, Vec<CrateSource>) {
340 let toml_path = lintcheck_config_toml(toml_path);
341 // save it so that we can use the name of the sources.toml as name for the logfile later.
342 let toml_filename = toml_path.file_stem().unwrap().to_str().unwrap().to_string();
343 let toml_content: String =
344 std::fs::read_to_string(&toml_path).unwrap_or_else(|_| panic!("Failed to read {}", toml_path.display()));
345 let crate_list: SourceList =
346 toml::from_str(&toml_content).unwrap_or_else(|e| panic!("Failed to parse {}: \n{}", toml_path.display(), e));
347 // parse the hashmap of the toml file into a list of crates
348 let tomlcrates: Vec<TomlCrate> = crate_list
351 .map(|(_cratename, tomlcrate)| tomlcrate)
354 // flatten TomlCrates into CrateSources (one TomlCrates may represent several versions of a crate =>
355 // multiple Cratesources)
356 let mut crate_sources = Vec::new();
357 tomlcrates.into_iter().for_each(|tk| {
358 if let Some(ref path) = tk.path {
359 crate_sources.push(CrateSource::Path {
360 name: tk.name.clone(),
361 path: PathBuf::from(path),
362 options: tk.options.clone(),
366 // if we have multiple versions, save each one
367 if let Some(ref versions) = tk.versions {
368 versions.iter().for_each(|ver| {
369 crate_sources.push(CrateSource::CratesIo {
370 name: tk.name.clone(),
371 version: ver.to_string(),
372 options: tk.options.clone(),
376 // otherwise, we should have a git source
377 if tk.git_url.is_some() && tk.git_hash.is_some() {
378 crate_sources.push(CrateSource::Git {
379 name: tk.name.clone(),
380 url: tk.git_url.clone().unwrap(),
381 commit: tk.git_hash.clone().unwrap(),
382 options: tk.options.clone(),
385 // if we have a version as well as a git data OR only one git data, something is funky
386 if tk.versions.is_some() && (tk.git_url.is_some() || tk.git_hash.is_some())
387 || tk.git_hash.is_some() != tk.git_url.is_some()
389 eprintln!("tomlkrate: {:?}", tk);
390 if tk.git_hash.is_some() != tk.git_url.is_some() {
391 panic!("Error: Encountered TomlCrate with only one of git_hash and git_url!");
393 if tk.path.is_some() && (tk.git_hash.is_some() || tk.versions.is_some()) {
394 panic!("Error: TomlCrate can only have one of 'git_.*', 'version' or 'path' fields");
396 unreachable!("Failed to translate TomlCrate into CrateSource!");
400 crate_sources.sort();
402 (toml_filename, crate_sources)
405 /// Parse the json output of clippy and return a `ClippyWarning`
406 fn parse_json_message(json_message: &str, krate: &Crate) -> ClippyWarning {
407 let jmsg: Value = serde_json::from_str(&json_message).unwrap_or_else(|e| panic!("Failed to parse json:\n{:?}", e));
410 crate_name: krate.name.to_string(),
411 crate_version: krate.version.to_string(),
412 file: jmsg["message"]["spans"][0]["file_name"]
416 line: jmsg["message"]["spans"][0]["line_start"]
420 column: jmsg["message"]["spans"][0]["text"][0]["highlight_start"]
424 linttype: jmsg["message"]["code"]["code"].to_string().trim_matches('"').into(),
425 message: jmsg["message"]["message"].to_string().trim_matches('"').into(),
426 is_ice: json_message.contains("internal compiler error: "),
430 /// Generate a short list of occuring lints-types and their count
431 fn gather_stats(clippy_warnings: &[ClippyWarning]) -> String {
432 // count lint type occurrences
433 let mut counter: HashMap<&String, usize> = HashMap::new();
436 .for_each(|wrn| *counter.entry(&wrn.linttype).or_insert(0) += 1);
438 // collect into a tupled list for sorting
439 let mut stats: Vec<(&&String, &usize)> = counter.iter().map(|(lint, count)| (lint, count)).collect();
440 // sort by "000{count} {clippy::lintname}"
441 // to not have a lint with 200 and 2 warnings take the same spot
442 stats.sort_by_key(|(lint, count)| format!("{:0>4}, {}", count, lint));
446 .map(|(lint, count)| format!("{} {}\n", lint, count))
450 /// check if the latest modification of the logfile is older than the modification date of the
451 /// clippy binary, if this is true, we should clean the lintchec shared target directory and recheck
452 fn lintcheck_needs_rerun(toml_path: Option<&str>) -> bool {
453 let clippy_modified: std::time::SystemTime = {
454 let mut times = ["target/debug/clippy-driver", "target/debug/cargo-clippy"]
458 .expect("failed to get metadata of file")
460 .expect("failed to get modification date")
462 // the lates modification of either of the binaries
463 std::cmp::max(times.next().unwrap(), times.next().unwrap())
466 let logs_modified: std::time::SystemTime = std::fs::metadata(lintcheck_config_toml(toml_path))
467 .expect("failed to get metadata of file")
469 .expect("failed to get modification date");
471 // if clippys modification time is bigger (older) than the logs mod time, we need to rerun lintcheck
472 clippy_modified > logs_modified
475 /// lintchecks `main()` function
476 pub fn run(clap_config: &ArgMatches) {
477 println!("Compiling clippy...");
479 println!("Done compiling");
481 let clap_toml_path = clap_config.value_of("crates-toml");
483 // if the clippy bin is newer than our logs, throw away target dirs to force clippy to
485 if lintcheck_needs_rerun(clap_toml_path) {
486 let shared_target_dir = "target/lintcheck/shared_target_dir";
487 match std::fs::metadata(&shared_target_dir) {
489 if metadata.is_dir() {
490 println!("Clippy is newer than lint check logs, clearing lintcheck shared target dir...");
491 std::fs::remove_dir_all(&shared_target_dir)
492 .expect("failed to remove target/lintcheck/shared_target_dir");
495 Err(_) => { // dir probably does not exist, don't remove anything
500 let cargo_clippy_path: PathBuf = PathBuf::from("target/debug/cargo-clippy")
502 .expect("failed to canonicalize path to clippy binary");
504 // assert that clippy is found
506 cargo_clippy_path.is_file(),
507 "target/debug/cargo-clippy binary not found! {}",
508 cargo_clippy_path.display()
511 let clippy_ver = std::process::Command::new("target/debug/cargo-clippy")
514 .map(|o| String::from_utf8_lossy(&o.stdout).into_owned())
515 .expect("could not get clippy version!");
517 // download and extract the crates, then run clippy on them and collect clippys warnings
518 // flatten into one big list of warnings
520 let (filename, crates) = read_crates(clap_toml_path);
522 let clippy_warnings: Vec<ClippyWarning> = if let Some(only_one_crate) = clap_config.value_of("only") {
523 // if we don't have the specified crate in the .toml, throw an error
524 if !crates.iter().any(|krate| {
525 let name = match krate {
526 CrateSource::CratesIo { name, .. } => name,
527 CrateSource::Git { name, .. } => name,
528 CrateSource::Path { name, .. } => name,
530 name == only_one_crate
533 "ERROR: could not find crate '{}' in clippy_dev/lintcheck_crates.toml",
536 std::process::exit(1);
539 // only check a single crate that was passed via cmdline
542 .map(|krate| krate.download_and_extract())
543 .filter(|krate| krate.name == only_one_crate)
544 .map(|krate| krate.run_clippy_lints(&cargo_clippy_path, &AtomicUsize::new(0), 1, 1))
548 let counter = std::sync::atomic::AtomicUsize::new(0);
550 // Ask rayon for thread count. Assume that half of that is the number of physical cores
551 // Use one target dir for each core so that we can run N clippys in parallel.
552 // We need to use different target dirs because cargo would lock them for a single build otherwise,
553 // killing the parallelism. However this also means that deps will only be reused half/a
554 // quarter of the time which might result in a longer wall clock runtime
556 // This helps when we check many small crates with dep-trees that don't have a lot of branches in
557 // order to achive some kind of parallelism
559 // by default, use a single thread
560 let num_cpus = match clap_config.value_of("threads") {
562 let threads: usize = threads
564 .expect(&format!("Failed to parse '{}' to a digit", threads));
567 // Rayon seems to return thread count so half that for core count
568 (rayon::current_num_threads() / 2) as usize
573 // no -j passed, use a single thread
577 let num_crates = crates.len();
579 // check all crates (default)
582 .map(|krate| krate.download_and_extract())
583 .map(|krate| krate.run_clippy_lints(&cargo_clippy_path, &counter, num_cpus, num_crates))
588 // generate some stats
589 let stats_formatted = gather_stats(&clippy_warnings);
591 // grab crashes/ICEs, save the crate name and the ice message
592 let ices: Vec<(&String, &String)> = clippy_warnings
594 .filter(|warning| warning.is_ice)
595 .map(|w| (&w.crate_name, &w.message))
598 let mut all_msgs: Vec<String> = clippy_warnings.iter().map(|warning| warning.to_string()).collect();
600 all_msgs.push("\n\n\n\nStats\n\n".into());
601 all_msgs.push(stats_formatted);
603 // save the text into lintcheck-logs/logs.txt
604 let mut text = clippy_ver; // clippy version number on top
605 text.push_str(&format!("\n{}", all_msgs.join("")));
606 text.push_str("ICEs:\n");
608 .for_each(|(cratename, msg)| text.push_str(&format!("{}: '{}'", cratename, msg)));
610 let file = format!("lintcheck-logs/{}_logs.txt", filename);
611 println!("Writing logs to {}", file);
612 write(file, text).unwrap();