1 // Run clippy on a fixed set of crates and collect the warnings.
2 // This helps observing the impact clippy changes have on a set of real-world code (and not just our
5 // When a new lint is introduced, we can search the results for new warnings and check for false
8 #![allow(clippy::collapsible_else_if)]
14 use crate::config::LintcheckConfig;
15 use crate::recursive::LintcheckServer;
17 use std::collections::{HashMap, HashSet};
19 use std::env::consts::EXE_SUFFIX;
20 use std::fmt::Write as _;
22 use std::io::ErrorKind;
23 use std::path::{Path, PathBuf};
24 use std::process::Command;
25 use std::sync::atomic::{AtomicUsize, Ordering};
27 use std::time::Duration;
29 use cargo_metadata::diagnostic::{Diagnostic, DiagnosticLevel};
30 use cargo_metadata::Message;
31 use rayon::prelude::*;
32 use serde::{Deserialize, Serialize};
33 use walkdir::{DirEntry, WalkDir};
35 const LINTCHECK_DOWNLOADS: &str = "target/lintcheck/downloads";
36 const LINTCHECK_SOURCES: &str = "target/lintcheck/sources";
38 /// List of sources to check, loaded from a .toml file
39 #[derive(Debug, Serialize, Deserialize)]
41 crates: HashMap<String, TomlCrate>,
43 recursive: RecursiveOptions,
46 #[derive(Debug, Serialize, Deserialize, Default)]
47 struct RecursiveOptions {
48 ignore: HashSet<String>,
51 /// A crate source stored inside the .toml
52 /// will be translated into on one of the `CrateSource` variants
53 #[derive(Debug, Serialize, Deserialize)]
56 versions: Option<Vec<String>>,
57 git_url: Option<String>,
58 git_hash: Option<String>,
60 options: Option<Vec<String>>,
63 /// Represents an archive we download from crates.io, or a git repo, or a local repo/folder
64 /// Once processed (downloaded/extracted/cloned/copied...), this will be translated into a `Crate`
65 #[derive(Debug, Serialize, Deserialize, Eq, Hash, PartialEq, Ord, PartialOrd)]
70 options: Option<Vec<String>>,
76 options: Option<Vec<String>>,
81 options: Option<Vec<String>>,
85 /// Represents the actual source code of a crate that we ran "cargo clippy" on
90 // path to the extracted sources that clippy can check
92 options: Option<Vec<String>>,
95 /// A single warning that clippy issued while checking a `Crate`
97 struct ClippyWarning {
109 fn new(diag: Diagnostic, crate_name: &str, crate_version: &str) -> Option<Self> {
110 let lint_type = diag.code?.code;
111 if !(lint_type.contains("clippy") || diag.message.contains("clippy"))
112 || diag.message.contains("could not read cargo metadata")
117 let span = diag.spans.into_iter().find(|span| span.is_primary)?;
119 let file = if let Ok(stripped) = Path::new(&span.file_name).strip_prefix(env!("CARGO_HOME")) {
120 format!("$CARGO_HOME/{}", stripped.display())
123 "target/lintcheck/sources/{crate_name}-{crate_version}/{}",
129 crate_name: crate_name.to_owned(),
131 line: span.line_start,
132 column: span.column_start,
134 message: diag.message,
135 is_ice: diag.level == DiagnosticLevel::Ice,
139 fn to_output(&self, markdown: bool) -> String {
140 let file_with_pos = format!("{}:{}:{}", &self.file, &self.line, &self.column);
142 let mut file = self.file.clone();
143 if !file.starts_with('$') {
144 file.insert_str(0, "../");
147 let mut output = String::from("| ");
148 let _ = write!(output, "[`{file_with_pos}`]({file}#L{})", self.line);
149 let _ = write!(output, r#" | `{:<50}` | "{}" |"#, self.lint_type, self.message);
153 format!("{file_with_pos} {} \"{}\"\n", self.lint_type, self.message)
158 #[allow(clippy::result_large_err)]
159 fn get(path: &str) -> Result<ureq::Response, ureq::Error> {
160 const MAX_RETRIES: u8 = 4;
163 match ureq::get(path).call() {
164 Ok(res) => return Ok(res),
165 Err(e) if retries >= MAX_RETRIES => return Err(e),
166 Err(ureq::Error::Transport(e)) => eprintln!("Error: {e}"),
167 Err(e) => return Err(e),
169 eprintln!("retrying in {retries} seconds...");
170 thread::sleep(Duration::from_secs(u64::from(retries)));
176 /// Makes the sources available on the disk for clippy to check.
177 /// Clones a git repo and checks out the specified commit or downloads a crate from crates.io or
178 /// copies a local folder
179 fn download_and_extract(&self) -> Crate {
181 CrateSource::CratesIo { name, version, options } => {
182 let extract_dir = PathBuf::from(LINTCHECK_SOURCES);
183 let krate_download_dir = PathBuf::from(LINTCHECK_DOWNLOADS);
185 // url to download the crate from crates.io
186 let url = format!("https://crates.io/api/v1/crates/{name}/{version}/download");
187 println!("Downloading and extracting {name} {version} from {url}");
188 create_dirs(&krate_download_dir, &extract_dir);
190 let krate_file_path = krate_download_dir.join(format!("{name}-{version}.crate.tar.gz"));
191 // don't download/extract if we already have done so
192 if !krate_file_path.is_file() {
193 // create a file path to download and write the crate data into
194 let mut krate_dest = std::fs::File::create(&krate_file_path).unwrap();
195 let mut krate_req = get(&url).unwrap().into_reader();
196 // copy the crate into the file
197 std::io::copy(&mut krate_req, &mut krate_dest).unwrap();
200 let ungz_tar = flate2::read::GzDecoder::new(std::fs::File::open(&krate_file_path).unwrap());
201 // extract the tar archive
202 let mut archive = tar::Archive::new(ungz_tar);
203 archive.unpack(&extract_dir).expect("Failed to extract!");
205 // crate is extracted, return a new Krate object which contains the path to the extracted
206 // sources that clippy can check
208 version: version.clone(),
210 path: extract_dir.join(format!("{name}-{version}/")),
211 options: options.clone(),
221 let mut repo_path = PathBuf::from(LINTCHECK_SOURCES);
222 // add a -git suffix in case we have the same crate from crates.io and a git repo
223 repo_path.push(format!("{name}-git"));
226 // clone the repo if we have not done so
227 if !repo_path.is_dir() {
228 println!("Cloning {url} and checking out {commit}");
229 if !Command::new("git")
234 .expect("Failed to clone git repo!")
237 eprintln!("Failed to clone {url} into {}", repo_path.display());
240 // check out the commit/branch/whatever
241 if !Command::new("git")
242 .args(["-c", "advice.detachedHead=false"])
245 .current_dir(&repo_path)
247 .expect("Failed to check out commit")
250 eprintln!("Failed to checkout {commit} of repo at {}", repo_path.display());
254 version: commit.clone(),
257 options: options.clone(),
260 CrateSource::Path { name, path, options } => {
261 fn is_cache_dir(entry: &DirEntry) -> bool {
262 std::fs::read(entry.path().join("CACHEDIR.TAG"))
263 .map(|x| x.starts_with(b"Signature: 8a477f597d28d172789f06886806bc55"))
267 // copy path into the dest_crate_root but skip directories that contain a CACHEDIR.TAG file.
268 // The target/ directory contains a CACHEDIR.TAG file so it is the most commonly skipped directory
269 // as a result of this filter.
270 let dest_crate_root = PathBuf::from(LINTCHECK_SOURCES).join(name);
271 if dest_crate_root.exists() {
272 println!("Deleting existing directory at {dest_crate_root:?}");
273 std::fs::remove_dir_all(&dest_crate_root).unwrap();
276 println!("Copying {path:?} to {dest_crate_root:?}");
278 for entry in WalkDir::new(path).into_iter().filter_entry(|e| !is_cache_dir(e)) {
279 let entry = entry.unwrap();
280 let entry_path = entry.path();
281 let relative_entry_path = entry_path.strip_prefix(path).unwrap();
282 let dest_path = dest_crate_root.join(relative_entry_path);
283 let metadata = entry_path.symlink_metadata().unwrap();
285 if metadata.is_dir() {
286 std::fs::create_dir(dest_path).unwrap();
287 } else if metadata.is_file() {
288 std::fs::copy(entry_path, dest_path).unwrap();
293 version: String::from("local"),
295 path: dest_crate_root,
296 options: options.clone(),
304 /// Run `cargo clippy` on the `Crate` and collect and return all the lint warnings that clippy
306 #[allow(clippy::too_many_arguments)]
309 cargo_clippy_path: &Path,
310 clippy_driver_path: &Path,
311 target_dir_index: &AtomicUsize,
312 total_crates_to_lint: usize,
313 config: &LintcheckConfig,
314 lint_filter: &Vec<String>,
315 server: &Option<LintcheckServer>,
316 ) -> Vec<ClippyWarning> {
317 // advance the atomic index by one
318 let index = target_dir_index.fetch_add(1, Ordering::SeqCst);
319 // "loop" the index within 0..thread_limit
320 let thread_index = index % config.max_jobs;
321 let perc = (index * 100) / total_crates_to_lint;
323 if config.max_jobs == 1 {
325 "{index}/{total_crates_to_lint} {perc}% Linting {} {}",
326 &self.name, &self.version
330 "{index}/{total_crates_to_lint} {perc}% Linting {} {} in target dir {thread_index:?}",
331 &self.name, &self.version
335 let cargo_clippy_path = std::fs::canonicalize(cargo_clippy_path).unwrap();
337 let shared_target_dir = clippy_project_root().join("target/lintcheck/shared_target_dir");
339 let mut cargo_clippy_args = if config.fix {
342 vec!["--", "--message-format=json", "--"]
345 let mut clippy_args = Vec::<&str>::new();
346 if let Some(options) = &self.options {
348 clippy_args.push(opt);
351 clippy_args.extend(["-Wclippy::pedantic", "-Wclippy::cargo"]);
354 if lint_filter.is_empty() {
355 clippy_args.push("--cap-lints=warn");
357 clippy_args.push("--cap-lints=allow");
358 clippy_args.extend(lint_filter.iter().map(std::string::String::as_str));
361 if let Some(server) = server {
362 let target = shared_target_dir.join("recursive");
364 // `cargo clippy` is a wrapper around `cargo check` that mainly sets `RUSTC_WORKSPACE_WRAPPER` to
365 // `clippy-driver`. We do the same thing here with a couple changes:
367 // `RUSTC_WRAPPER` is used instead of `RUSTC_WORKSPACE_WRAPPER` so that we can lint all crate
368 // dependencies rather than only workspace members
370 // The wrapper is set to the `lintcheck` so we can force enable linting and ignore certain crates
371 // (see `crate::driver`)
372 let status = Command::new("cargo")
375 .current_dir(&self.path)
376 .env("CLIPPY_ARGS", clippy_args.join("__CLIPPY_HACKERY__"))
377 .env("CARGO_TARGET_DIR", target)
378 .env("RUSTC_WRAPPER", env::current_exe().unwrap())
379 // Pass the absolute path so `crate::driver` can find `clippy-driver`, as it's executed in various
380 // different working directories
381 .env("CLIPPY_DRIVER", clippy_driver_path)
382 .env("LINTCHECK_SERVER", server.local_addr.to_string())
384 .expect("failed to run cargo");
386 assert_eq!(status.code(), Some(0));
391 cargo_clippy_args.extend(clippy_args);
393 let all_output = Command::new(&cargo_clippy_path)
394 // use the looping index to create individual target dirs
395 .env("CARGO_TARGET_DIR", shared_target_dir.join(format!("_{thread_index:?}")))
396 .args(&cargo_clippy_args)
397 .current_dir(&self.path)
399 .unwrap_or_else(|error| {
401 "Encountered error:\n{error:?}\ncargo_clippy_path: {}\ncrate path:{}\n",
402 &cargo_clippy_path.display(),
406 let stdout = String::from_utf8_lossy(&all_output.stdout);
407 let stderr = String::from_utf8_lossy(&all_output.stderr);
408 let status = &all_output.status;
410 if !status.success() {
412 "\nWARNING: bad exit status after checking {} {} \n",
413 self.name, self.version
418 if let Some(stderr) = stderr
420 .find(|line| line.contains("failed to automatically apply fixes suggested by rustc to crate"))
422 let subcrate = &stderr[63..];
424 "ERROR: failed to apply some suggetion to {} / to (sub)crate {subcrate}",
428 // fast path, we don't need the warnings anyway
432 // get all clippy warnings and ICEs
433 let warnings: Vec<ClippyWarning> = Message::parse_stream(stdout.as_bytes())
434 .filter_map(|msg| match msg {
435 Ok(Message::CompilerMessage(message)) => ClippyWarning::new(message.message, &self.name, &self.version),
444 /// Builds clippy inside the repo to make sure we have a clippy executable we can use.
446 let status = Command::new("cargo")
449 .expect("Failed to build clippy!");
450 if !status.success() {
451 eprintln!("Error: Failed to compile Clippy!");
452 std::process::exit(1);
456 /// Read a `lintcheck_crates.toml` file
457 fn read_crates(toml_path: &Path) -> (Vec<CrateSource>, RecursiveOptions) {
458 let toml_content: String =
459 std::fs::read_to_string(toml_path).unwrap_or_else(|_| panic!("Failed to read {}", toml_path.display()));
460 let crate_list: SourceList =
461 toml::from_str(&toml_content).unwrap_or_else(|e| panic!("Failed to parse {}: \n{e}", toml_path.display()));
462 // parse the hashmap of the toml file into a list of crates
463 let tomlcrates: Vec<TomlCrate> = crate_list.crates.into_values().collect();
465 // flatten TomlCrates into CrateSources (one TomlCrates may represent several versions of a crate =>
466 // multiple Cratesources)
467 let mut crate_sources = Vec::new();
468 for tk in tomlcrates {
469 if let Some(ref path) = tk.path {
470 crate_sources.push(CrateSource::Path {
471 name: tk.name.clone(),
472 path: PathBuf::from(path),
473 options: tk.options.clone(),
475 } else if let Some(ref versions) = tk.versions {
476 // if we have multiple versions, save each one
477 for ver in versions.iter() {
478 crate_sources.push(CrateSource::CratesIo {
479 name: tk.name.clone(),
480 version: ver.to_string(),
481 options: tk.options.clone(),
484 } else if tk.git_url.is_some() && tk.git_hash.is_some() {
485 // otherwise, we should have a git source
486 crate_sources.push(CrateSource::Git {
487 name: tk.name.clone(),
488 url: tk.git_url.clone().unwrap(),
489 commit: tk.git_hash.clone().unwrap(),
490 options: tk.options.clone(),
493 panic!("Invalid crate source: {tk:?}");
496 // if we have a version as well as a git data OR only one git data, something is funky
497 if tk.versions.is_some() && (tk.git_url.is_some() || tk.git_hash.is_some())
498 || tk.git_hash.is_some() != tk.git_url.is_some()
500 eprintln!("tomlkrate: {tk:?}");
502 tk.git_hash.is_some(),
503 tk.git_url.is_some(),
504 "Error: Encountered TomlCrate with only one of git_hash and git_url!"
507 tk.path.is_none() || (tk.git_hash.is_none() && tk.versions.is_none()),
508 "Error: TomlCrate can only have one of 'git_.*', 'version' or 'path' fields"
510 unreachable!("Failed to translate TomlCrate into CrateSource!");
514 crate_sources.sort();
516 (crate_sources, crate_list.recursive)
519 /// Generate a short list of occurring lints-types and their count
520 fn gather_stats(clippy_warnings: &[ClippyWarning]) -> (String, HashMap<&String, usize>) {
521 // count lint type occurrences
522 let mut counter: HashMap<&String, usize> = HashMap::new();
525 .for_each(|wrn| *counter.entry(&wrn.lint_type).or_insert(0) += 1);
527 // collect into a tupled list for sorting
528 let mut stats: Vec<(&&String, &usize)> = counter.iter().map(|(lint, count)| (lint, count)).collect();
529 // sort by "000{count} {clippy::lintname}"
530 // to not have a lint with 200 and 2 warnings take the same spot
531 stats.sort_by_key(|(lint, count)| format!("{count:0>4}, {lint}"));
533 let mut header = String::from("| lint | count |\n");
534 header.push_str("| -------------------------------------------------- | ----- |\n");
535 let stats_string = stats
537 .map(|(lint, count)| format!("| {lint:<50} | {count:>4} |\n"))
538 .fold(header, |mut table, line| {
539 table.push_str(&line);
543 (stats_string, counter)
546 #[allow(clippy::too_many_lines)]
548 // We're being executed as a `RUSTC_WRAPPER` as part of `--recursive`
549 if let Ok(addr) = env::var("LINTCHECK_SERVER") {
550 driver::drive(&addr);
553 // assert that we launch lintcheck from the repo root (via cargo lintcheck)
554 if std::fs::metadata("lintcheck/Cargo.toml").is_err() {
555 eprintln!("lintcheck needs to be run from clippy's repo root!\nUse `cargo lintcheck` alternatively.");
556 std::process::exit(3);
559 let config = LintcheckConfig::new();
561 println!("Compiling clippy...");
563 println!("Done compiling");
565 let cargo_clippy_path = fs::canonicalize(format!("target/debug/cargo-clippy{EXE_SUFFIX}")).unwrap();
566 let clippy_driver_path = fs::canonicalize(format!("target/debug/clippy-driver{EXE_SUFFIX}")).unwrap();
568 // assert that clippy is found
570 cargo_clippy_path.is_file(),
571 "target/debug/cargo-clippy binary not found! {}",
572 cargo_clippy_path.display()
575 let clippy_ver = std::process::Command::new(&cargo_clippy_path)
578 .map(|o| String::from_utf8_lossy(&o.stdout).into_owned())
579 .expect("could not get clippy version!");
581 // download and extract the crates, then run clippy on them and collect clippy's warnings
582 // flatten into one big list of warnings
584 let (crates, recursive_options) = read_crates(&config.sources_toml_path);
585 let old_stats = read_stats_from_file(&config.lintcheck_results_path);
587 let counter = AtomicUsize::new(1);
588 let lint_filter: Vec<String> = config
592 let mut filter = filter.clone();
593 filter.insert_str(0, "--force-warn=");
598 let crates: Vec<Crate> = crates
601 if let Some(only_one_crate) = &config.only {
602 let name = match krate {
603 CrateSource::CratesIo { name, .. }
604 | CrateSource::Git { name, .. }
605 | CrateSource::Path { name, .. } => name,
608 name == only_one_crate
613 .map(|krate| krate.download_and_extract())
616 if crates.is_empty() {
618 "ERROR: could not find crate '{}' in lintcheck/lintcheck_crates.toml",
619 config.only.unwrap(),
621 std::process::exit(1);
624 // run parallel with rayon
626 // This helps when we check many small crates with dep-trees that don't have a lot of branches in
627 // order to achieve some kind of parallelism
629 rayon::ThreadPoolBuilder::new()
630 .num_threads(config.max_jobs)
634 let server = config.recursive.then(|| {
635 let _ = fs::remove_dir_all("target/lintcheck/shared_target_dir/recursive");
637 LintcheckServer::spawn(recursive_options)
640 let mut clippy_warnings: Vec<ClippyWarning> = crates
643 krate.run_clippy_lints(
655 if let Some(server) = server {
656 clippy_warnings.extend(server.warnings());
659 // if we are in --fix mode, don't change the log files, terminate here
664 // generate some stats
665 let (stats_formatted, new_stats) = gather_stats(&clippy_warnings);
667 // grab crashes/ICEs, save the crate name and the ice message
668 let ices: Vec<(&String, &String)> = clippy_warnings
670 .filter(|warning| warning.is_ice)
671 .map(|w| (&w.crate_name, &w.message))
674 let mut all_msgs: Vec<String> = clippy_warnings
676 .map(|warn| warn.to_output(config.markdown))
679 all_msgs.push("\n\n### Stats:\n\n".into());
680 all_msgs.push(stats_formatted);
682 // save the text into lintcheck-logs/logs.txt
683 let mut text = clippy_ver; // clippy version number on top
684 text.push_str("\n### Reports\n\n");
686 text.push_str("| file | lint | message |\n");
687 text.push_str("| --- | --- | --- |\n");
689 write!(text, "{}", all_msgs.join("")).unwrap();
690 text.push_str("\n\n### ICEs:\n");
691 for (cratename, msg) in &ices {
692 let _ = write!(text, "{cratename}: '{msg}'");
695 println!("Writing logs to {}", config.lintcheck_results_path.display());
696 fs::create_dir_all(config.lintcheck_results_path.parent().unwrap()).unwrap();
697 fs::write(&config.lintcheck_results_path, text).unwrap();
699 print_stats(old_stats, new_stats, &config.lint_filter);
702 /// read the previous stats from the lintcheck-log file
703 fn read_stats_from_file(file_path: &Path) -> HashMap<String, usize> {
704 let file_content: String = match std::fs::read_to_string(file_path).ok() {
705 Some(content) => content,
707 return HashMap::new();
711 let lines: Vec<String> = file_content.lines().map(ToString::to_string).collect();
715 .skip_while(|line| line.as_str() != "### Stats:")
716 // Skipping the table header and the `Stats:` label
718 .take_while(|line| line.starts_with("| "))
720 let mut spl = line.split('|');
721 // Skip the first `|` symbol
723 if let (Some(lint), Some(count)) = (spl.next(), spl.next()) {
724 Some((lint.trim().to_string(), count.trim().parse::<usize>().unwrap()))
729 .collect::<HashMap<String, usize>>()
732 /// print how lint counts changed between runs
733 fn print_stats(old_stats: HashMap<String, usize>, new_stats: HashMap<&String, usize>, lint_filter: &Vec<String>) {
734 let same_in_both_hashmaps = old_stats
736 .filter(|(old_key, old_val)| new_stats.get::<&String>(old_key) == Some(old_val))
737 .map(|(k, v)| (k.to_string(), *v))
738 .collect::<Vec<(String, usize)>>();
740 let mut old_stats_deduped = old_stats;
741 let mut new_stats_deduped = new_stats;
743 // remove duplicates from both hashmaps
744 for (k, v) in &same_in_both_hashmaps {
745 assert!(old_stats_deduped.remove(k) == Some(*v));
746 assert!(new_stats_deduped.remove(k) == Some(*v));
749 println!("\nStats:");
751 // list all new counts (key is in new stats but not in old stats)
754 .filter(|(new_key, _)| old_stats_deduped.get::<str>(new_key).is_none())
755 .for_each(|(new_key, new_value)| {
756 println!("{new_key} 0 => {new_value}");
759 // list all changed counts (key is in both maps but value differs)
762 .filter(|(new_key, _new_val)| old_stats_deduped.get::<str>(new_key).is_some())
763 .for_each(|(new_key, new_val)| {
764 let old_val = old_stats_deduped.get::<str>(new_key).unwrap();
765 println!("{new_key} {old_val} => {new_val}");
768 // list all gone counts (key is in old status but not in new stats)
771 .filter(|(old_key, _)| new_stats_deduped.get::<&String>(old_key).is_none())
772 .filter(|(old_key, _)| lint_filter.is_empty() || lint_filter.contains(old_key))
773 .for_each(|(old_key, old_value)| {
774 println!("{old_key} {old_value} => 0");
778 /// Create necessary directories to run the lintcheck tool.
782 /// This function panics if creating one of the dirs fails.
783 fn create_dirs(krate_download_dir: &Path, extract_dir: &Path) {
784 std::fs::create_dir("target/lintcheck/").unwrap_or_else(|err| {
787 ErrorKind::AlreadyExists,
788 "cannot create lintcheck target dir"
791 std::fs::create_dir(krate_download_dir).unwrap_or_else(|err| {
792 assert_eq!(err.kind(), ErrorKind::AlreadyExists, "cannot create crate download dir");
794 std::fs::create_dir(extract_dir).unwrap_or_else(|err| {
797 ErrorKind::AlreadyExists,
798 "cannot create crate extraction dir"
803 /// Returns the path to the Clippy project directory
805 fn clippy_project_root() -> &'static Path {
806 Path::new(env!("CARGO_MANIFEST_DIR")).parent().unwrap()
810 fn lintcheck_test() {
816 "./lintcheck/Cargo.toml",
819 "lintcheck/test_sources.toml",
821 let status = std::process::Command::new("cargo")
823 .current_dir("..") // repo root
827 assert!(status.unwrap().success());