1 // Run clippy on a fixed set of crates and collect the warnings.
2 // This helps observing the impact clippy changes have on a set of real-world code (and not just our
5 // When a new lint is introduced, we can search the results for new warnings and check for false
8 #![allow(clippy::collapsible_else_if)]
12 use config::LintcheckConfig;
14 use std::collections::HashMap;
16 use std::fmt::Write as _;
18 use std::io::ErrorKind;
19 use std::path::{Path, PathBuf};
20 use std::process::Command;
21 use std::sync::atomic::{AtomicUsize, Ordering};
23 use std::time::Duration;
25 use cargo_metadata::diagnostic::DiagnosticLevel;
26 use cargo_metadata::Message;
27 use rayon::prelude::*;
28 use serde::{Deserialize, Serialize};
29 use walkdir::{DirEntry, WalkDir};
32 const CLIPPY_DRIVER_PATH: &str = "target/debug/clippy-driver";
34 const CARGO_CLIPPY_PATH: &str = "target/debug/cargo-clippy";
37 const CLIPPY_DRIVER_PATH: &str = "target/debug/clippy-driver.exe";
39 const CARGO_CLIPPY_PATH: &str = "target/debug/cargo-clippy.exe";
41 const LINTCHECK_DOWNLOADS: &str = "target/lintcheck/downloads";
42 const LINTCHECK_SOURCES: &str = "target/lintcheck/sources";
44 /// List of sources to check, loaded from a .toml file
45 #[derive(Debug, Serialize, Deserialize)]
47 crates: HashMap<String, TomlCrate>,
50 /// A crate source stored inside the .toml
51 /// will be translated into on one of the `CrateSource` variants
52 #[derive(Debug, Serialize, Deserialize)]
55 versions: Option<Vec<String>>,
56 git_url: Option<String>,
57 git_hash: Option<String>,
59 options: Option<Vec<String>>,
62 /// Represents an archive we download from crates.io, or a git repo, or a local repo/folder
63 /// Once processed (downloaded/extracted/cloned/copied...), this will be translated into a `Crate`
64 #[derive(Debug, Serialize, Deserialize, Eq, Hash, PartialEq, Ord, PartialOrd)]
69 options: Option<Vec<String>>,
75 options: Option<Vec<String>>,
80 options: Option<Vec<String>>,
84 /// Represents the actual source code of a crate that we ran "cargo clippy" on
89 // path to the extracted sources that clippy can check
91 options: Option<Vec<String>>,
94 /// A single warning that clippy issued while checking a `Crate`
96 struct ClippyWarning {
108 fn new(cargo_message: Message, krate: &Crate) -> Option<Self> {
109 let diag = match cargo_message {
110 Message::CompilerMessage(message) => message.message,
114 let lint_type = diag.code?.code;
115 if !(lint_type.contains("clippy") || diag.message.contains("clippy"))
116 || diag.message.contains("could not read cargo metadata")
121 let span = diag.spans.into_iter().find(|span| span.is_primary)?;
123 let file = match Path::new(&span.file_name).strip_prefix(env!("CARGO_HOME")) {
124 Ok(stripped) => format!("$CARGO_HOME/{}", stripped.display()),
126 "target/lintcheck/sources/{}-{}/{}",
127 krate.name, krate.version, span.file_name
132 crate_name: krate.name.clone(),
134 line: span.line_start,
135 column: span.column_start,
137 message: diag.message,
138 is_ice: diag.level == DiagnosticLevel::Ice,
142 fn to_output(&self, markdown: bool) -> String {
143 let file_with_pos = format!("{}:{}:{}", &self.file, &self.line, &self.column);
145 let lint = format!("`{}`", self.lint_type);
147 let mut file = self.file.clone();
148 if !file.starts_with('$') {
149 file.insert_str(0, "../");
152 let mut output = String::from("| ");
153 let _ = write!(output, "[`{}`]({}#L{})", file_with_pos, file, self.line);
154 let _ = write!(output, r#" | {:<50} | "{}" |"#, lint, self.message);
158 format!("{} {} \"{}\"\n", file_with_pos, self.lint_type, self.message)
163 fn get(path: &str) -> Result<ureq::Response, ureq::Error> {
164 const MAX_RETRIES: u8 = 4;
167 match ureq::get(path).call() {
168 Ok(res) => return Ok(res),
169 Err(e) if retries >= MAX_RETRIES => return Err(e),
170 Err(ureq::Error::Transport(e)) => eprintln!("Error: {}", e),
171 Err(e) => return Err(e),
173 eprintln!("retrying in {} seconds...", retries);
174 thread::sleep(Duration::from_secs(retries as u64));
180 /// Makes the sources available on the disk for clippy to check.
181 /// Clones a git repo and checks out the specified commit or downloads a crate from crates.io or
182 /// copies a local folder
183 fn download_and_extract(&self) -> Crate {
185 CrateSource::CratesIo { name, version, options } => {
186 let extract_dir = PathBuf::from(LINTCHECK_SOURCES);
187 let krate_download_dir = PathBuf::from(LINTCHECK_DOWNLOADS);
189 // url to download the crate from crates.io
190 let url = format!("https://crates.io/api/v1/crates/{}/{}/download", name, version);
191 println!("Downloading and extracting {} {} from {}", name, version, url);
192 create_dirs(&krate_download_dir, &extract_dir);
194 let krate_file_path = krate_download_dir.join(format!("{}-{}.crate.tar.gz", name, version));
195 // don't download/extract if we already have done so
196 if !krate_file_path.is_file() {
197 // create a file path to download and write the crate data into
198 let mut krate_dest = std::fs::File::create(&krate_file_path).unwrap();
199 let mut krate_req = get(&url).unwrap().into_reader();
200 // copy the crate into the file
201 std::io::copy(&mut krate_req, &mut krate_dest).unwrap();
204 let ungz_tar = flate2::read::GzDecoder::new(std::fs::File::open(&krate_file_path).unwrap());
205 // extract the tar archive
206 let mut archive = tar::Archive::new(ungz_tar);
207 archive.unpack(&extract_dir).expect("Failed to extract!");
209 // crate is extracted, return a new Krate object which contains the path to the extracted
210 // sources that clippy can check
212 version: version.clone(),
214 path: extract_dir.join(format!("{}-{}/", name, version)),
215 options: options.clone(),
225 let mut repo_path = PathBuf::from(LINTCHECK_SOURCES);
226 // add a -git suffix in case we have the same crate from crates.io and a git repo
227 repo_path.push(format!("{}-git", name));
230 // clone the repo if we have not done so
231 if !repo_path.is_dir() {
232 println!("Cloning {} and checking out {}", url, commit);
233 if !Command::new("git")
238 .expect("Failed to clone git repo!")
241 eprintln!("Failed to clone {} into {}", url, repo_path.display())
244 // check out the commit/branch/whatever
245 if !Command::new("git")
248 .current_dir(&repo_path)
250 .expect("Failed to check out commit")
253 eprintln!("Failed to checkout {} of repo at {}", commit, repo_path.display())
257 version: commit.clone(),
260 options: options.clone(),
263 CrateSource::Path { name, path, options } => {
264 // copy path into the dest_crate_root but skip directories that contain a CACHEDIR.TAG file.
265 // The target/ directory contains a CACHEDIR.TAG file so it is the most commonly skipped directory
266 // as a result of this filter.
267 let dest_crate_root = PathBuf::from(LINTCHECK_SOURCES).join(name);
268 if dest_crate_root.exists() {
269 println!("Deleting existing directory at {:?}", dest_crate_root);
270 std::fs::remove_dir_all(&dest_crate_root).unwrap();
273 println!("Copying {:?} to {:?}", path, dest_crate_root);
275 fn is_cache_dir(entry: &DirEntry) -> bool {
276 std::fs::read(entry.path().join("CACHEDIR.TAG"))
277 .map(|x| x.starts_with(b"Signature: 8a477f597d28d172789f06886806bc55"))
281 for entry in WalkDir::new(path).into_iter().filter_entry(|e| !is_cache_dir(e)) {
282 let entry = entry.unwrap();
283 let entry_path = entry.path();
284 let relative_entry_path = entry_path.strip_prefix(path).unwrap();
285 let dest_path = dest_crate_root.join(relative_entry_path);
286 let metadata = entry_path.symlink_metadata().unwrap();
288 if metadata.is_dir() {
289 std::fs::create_dir(dest_path).unwrap();
290 } else if metadata.is_file() {
291 std::fs::copy(entry_path, dest_path).unwrap();
296 version: String::from("local"),
298 path: dest_crate_root,
299 options: options.clone(),
307 /// Run `cargo clippy` on the `Crate` and collect and return all the lint warnings that clippy
311 cargo_clippy_path: &Path,
312 target_dir_index: &AtomicUsize,
313 total_crates_to_lint: usize,
314 config: &LintcheckConfig,
315 lint_filter: &Vec<String>,
316 ) -> Vec<ClippyWarning> {
317 // advance the atomic index by one
318 let index = target_dir_index.fetch_add(1, Ordering::SeqCst);
319 // "loop" the index within 0..thread_limit
320 let thread_index = index % config.max_jobs;
321 let perc = (index * 100) / total_crates_to_lint;
323 if config.max_jobs == 1 {
325 "{}/{} {}% Linting {} {}",
326 index, total_crates_to_lint, perc, &self.name, &self.version
330 "{}/{} {}% Linting {} {} in target dir {:?}",
331 index, total_crates_to_lint, perc, &self.name, &self.version, thread_index
335 let cargo_clippy_path = std::fs::canonicalize(cargo_clippy_path).unwrap();
337 let shared_target_dir = clippy_project_root().join("target/lintcheck/shared_target_dir");
339 let mut args = if config.fix {
342 vec!["--", "--message-format=json", "--"]
345 if let Some(options) = &self.options {
350 args.extend(&["-Wclippy::pedantic", "-Wclippy::cargo"])
353 if lint_filter.is_empty() {
354 args.push("--cap-lints=warn");
356 args.push("--cap-lints=allow");
357 args.extend(lint_filter.iter().map(|filter| filter.as_str()))
360 let all_output = std::process::Command::new(&cargo_clippy_path)
361 // use the looping index to create individual target dirs
364 shared_target_dir.join(format!("_{:?}", thread_index)),
366 // lint warnings will look like this:
367 // src/cargo/ops/cargo_compile.rs:127:35: warning: usage of `FromIterator::from_iter`
369 .current_dir(&self.path)
371 .unwrap_or_else(|error| {
373 "Encountered error:\n{:?}\ncargo_clippy_path: {}\ncrate path:{}\n",
375 &cargo_clippy_path.display(),
379 let stdout = String::from_utf8_lossy(&all_output.stdout);
380 let stderr = String::from_utf8_lossy(&all_output.stderr);
381 let status = &all_output.status;
383 if !status.success() {
385 "\nWARNING: bad exit status after checking {} {} \n",
386 self.name, self.version
391 if let Some(stderr) = stderr
393 .find(|line| line.contains("failed to automatically apply fixes suggested by rustc to crate"))
395 let subcrate = &stderr[63..];
397 "ERROR: failed to apply some suggetion to {} / to (sub)crate {}",
401 // fast path, we don't need the warnings anyway
405 // get all clippy warnings and ICEs
406 let warnings: Vec<ClippyWarning> = Message::parse_stream(stdout.as_bytes())
407 .filter_map(|msg| ClippyWarning::new(msg.unwrap(), &self))
414 /// Builds clippy inside the repo to make sure we have a clippy executable we can use.
416 let status = Command::new("cargo")
419 .expect("Failed to build clippy!");
420 if !status.success() {
421 eprintln!("Error: Failed to compile Clippy!");
422 std::process::exit(1);
426 /// Read a `toml` file and return a list of `CrateSources` that we want to check with clippy
427 fn read_crates(toml_path: &Path) -> Vec<CrateSource> {
428 let toml_content: String =
429 std::fs::read_to_string(&toml_path).unwrap_or_else(|_| panic!("Failed to read {}", toml_path.display()));
430 let crate_list: SourceList =
431 toml::from_str(&toml_content).unwrap_or_else(|e| panic!("Failed to parse {}: \n{}", toml_path.display(), e));
432 // parse the hashmap of the toml file into a list of crates
433 let tomlcrates: Vec<TomlCrate> = crate_list
436 .map(|(_cratename, tomlcrate)| tomlcrate)
439 // flatten TomlCrates into CrateSources (one TomlCrates may represent several versions of a crate =>
440 // multiple Cratesources)
441 let mut crate_sources = Vec::new();
442 tomlcrates.into_iter().for_each(|tk| {
443 if let Some(ref path) = tk.path {
444 crate_sources.push(CrateSource::Path {
445 name: tk.name.clone(),
446 path: PathBuf::from(path),
447 options: tk.options.clone(),
449 } else if let Some(ref versions) = tk.versions {
450 // if we have multiple versions, save each one
451 versions.iter().for_each(|ver| {
452 crate_sources.push(CrateSource::CratesIo {
453 name: tk.name.clone(),
454 version: ver.to_string(),
455 options: tk.options.clone(),
458 } else if tk.git_url.is_some() && tk.git_hash.is_some() {
459 // otherwise, we should have a git source
460 crate_sources.push(CrateSource::Git {
461 name: tk.name.clone(),
462 url: tk.git_url.clone().unwrap(),
463 commit: tk.git_hash.clone().unwrap(),
464 options: tk.options.clone(),
467 panic!("Invalid crate source: {tk:?}");
470 // if we have a version as well as a git data OR only one git data, something is funky
471 if tk.versions.is_some() && (tk.git_url.is_some() || tk.git_hash.is_some())
472 || tk.git_hash.is_some() != tk.git_url.is_some()
474 eprintln!("tomlkrate: {:?}", tk);
475 if tk.git_hash.is_some() != tk.git_url.is_some() {
476 panic!("Error: Encountered TomlCrate with only one of git_hash and git_url!");
478 if tk.path.is_some() && (tk.git_hash.is_some() || tk.versions.is_some()) {
479 panic!("Error: TomlCrate can only have one of 'git_.*', 'version' or 'path' fields");
481 unreachable!("Failed to translate TomlCrate into CrateSource!");
485 crate_sources.sort();
490 /// Generate a short list of occurring lints-types and their count
491 fn gather_stats(clippy_warnings: &[ClippyWarning]) -> (String, HashMap<&String, usize>) {
492 // count lint type occurrences
493 let mut counter: HashMap<&String, usize> = HashMap::new();
496 .for_each(|wrn| *counter.entry(&wrn.lint_type).or_insert(0) += 1);
498 // collect into a tupled list for sorting
499 let mut stats: Vec<(&&String, &usize)> = counter.iter().map(|(lint, count)| (lint, count)).collect();
500 // sort by "000{count} {clippy::lintname}"
501 // to not have a lint with 200 and 2 warnings take the same spot
502 stats.sort_by_key(|(lint, count)| format!("{:0>4}, {}", count, lint));
504 let mut header = String::from("| lint | count |\n");
505 header.push_str("| -------------------------------------------------- | ----- |\n");
506 let stats_string = stats
508 .map(|(lint, count)| format!("| {:<50} | {:>4} |\n", lint, count))
509 .fold(header, |mut table, line| {
510 table.push_str(&line);
514 (stats_string, counter)
517 /// check if the latest modification of the logfile is older than the modification date of the
518 /// clippy binary, if this is true, we should clean the lintchec shared target directory and recheck
519 fn lintcheck_needs_rerun(lintcheck_logs_path: &Path) -> bool {
520 if !lintcheck_logs_path.exists() {
524 let clippy_modified: std::time::SystemTime = {
525 let mut times = [CLIPPY_DRIVER_PATH, CARGO_CLIPPY_PATH].iter().map(|p| {
527 .expect("failed to get metadata of file")
529 .expect("failed to get modification date")
531 // the oldest modification of either of the binaries
532 std::cmp::max(times.next().unwrap(), times.next().unwrap())
535 let logs_modified: std::time::SystemTime = std::fs::metadata(lintcheck_logs_path)
536 .expect("failed to get metadata of file")
538 .expect("failed to get modification date");
540 // time is represented in seconds since X
541 // logs_modified 2 and clippy_modified 5 means clippy binary is older and we need to recheck
542 logs_modified < clippy_modified
546 // assert that we launch lintcheck from the repo root (via cargo lintcheck)
547 if std::fs::metadata("lintcheck/Cargo.toml").is_err() {
548 eprintln!("lintcheck needs to be run from clippy's repo root!\nUse `cargo lintcheck` alternatively.");
549 std::process::exit(3);
552 let config = LintcheckConfig::new();
554 println!("Compiling clippy...");
556 println!("Done compiling");
558 // if the clippy bin is newer than our logs, throw away target dirs to force clippy to
560 if lintcheck_needs_rerun(&config.lintcheck_results_path) {
561 let shared_target_dir = "target/lintcheck/shared_target_dir";
562 // if we get an Err here, the shared target dir probably does simply not exist
563 if let Ok(metadata) = std::fs::metadata(&shared_target_dir) {
564 if metadata.is_dir() {
565 println!("Clippy is newer than lint check logs, clearing lintcheck shared target dir...");
566 std::fs::remove_dir_all(&shared_target_dir)
567 .expect("failed to remove target/lintcheck/shared_target_dir");
572 let cargo_clippy_path: PathBuf = PathBuf::from(CARGO_CLIPPY_PATH)
574 .expect("failed to canonicalize path to clippy binary");
576 // assert that clippy is found
578 cargo_clippy_path.is_file(),
579 "target/debug/cargo-clippy binary not found! {}",
580 cargo_clippy_path.display()
583 let clippy_ver = std::process::Command::new(CARGO_CLIPPY_PATH)
586 .map(|o| String::from_utf8_lossy(&o.stdout).into_owned())
587 .expect("could not get clippy version!");
589 // download and extract the crates, then run clippy on them and collect clippy's warnings
590 // flatten into one big list of warnings
592 let crates = read_crates(&config.sources_toml_path);
593 let old_stats = read_stats_from_file(&config.lintcheck_results_path);
595 let counter = AtomicUsize::new(1);
596 let lint_filter: Vec<String> = config
600 let mut filter = filter.clone();
601 filter.insert_str(0, "--force-warn=");
606 let crates: Vec<Crate> = crates
609 if let Some(only_one_crate) = &config.only {
610 let name = match krate {
611 CrateSource::CratesIo { name, .. }
612 | CrateSource::Git { name, .. }
613 | CrateSource::Path { name, .. } => name,
616 name == only_one_crate
621 .map(|krate| krate.download_and_extract())
624 if crates.is_empty() {
626 "ERROR: could not find crate '{}' in lintcheck/lintcheck_crates.toml",
627 config.only.unwrap(),
629 std::process::exit(1);
632 // run parallel with rayon
634 // This helps when we check many small crates with dep-trees that don't have a lot of branches in
635 // order to achieve some kind of parallelism
637 rayon::ThreadPoolBuilder::new()
638 .num_threads(config.max_jobs)
642 let clippy_warnings: Vec<ClippyWarning> = crates
644 .flat_map(|krate| krate.run_clippy_lints(&cargo_clippy_path, &counter, crates.len(), &config, &lint_filter))
647 // if we are in --fix mode, don't change the log files, terminate here
652 // generate some stats
653 let (stats_formatted, new_stats) = gather_stats(&clippy_warnings);
655 // grab crashes/ICEs, save the crate name and the ice message
656 let ices: Vec<(&String, &String)> = clippy_warnings
658 .filter(|warning| warning.is_ice)
659 .map(|w| (&w.crate_name, &w.message))
662 let mut all_msgs: Vec<String> = clippy_warnings
664 .map(|warn| warn.to_output(config.markdown))
667 all_msgs.push("\n\n### Stats:\n\n".into());
668 all_msgs.push(stats_formatted);
670 // save the text into lintcheck-logs/logs.txt
671 let mut text = clippy_ver; // clippy version number on top
672 text.push_str("\n### Reports\n\n");
674 text.push_str("| file | lint | message |\n");
675 text.push_str("| --- | --- | --- |\n");
677 write!(text, "{}", all_msgs.join("")).unwrap();
678 text.push_str("\n\n### ICEs:\n");
679 for (cratename, msg) in ices.iter() {
680 let _ = write!(text, "{}: '{}'", cratename, msg);
683 println!("Writing logs to {}", config.lintcheck_results_path.display());
684 std::fs::create_dir_all(config.lintcheck_results_path.parent().unwrap()).unwrap();
685 write(&config.lintcheck_results_path, text).unwrap();
687 print_stats(old_stats, new_stats, &config.lint_filter);
690 /// read the previous stats from the lintcheck-log file
691 fn read_stats_from_file(file_path: &Path) -> HashMap<String, usize> {
692 let file_content: String = match std::fs::read_to_string(file_path).ok() {
693 Some(content) => content,
695 return HashMap::new();
699 let lines: Vec<String> = file_content.lines().map(ToString::to_string).collect();
703 .skip_while(|line| line.as_str() != "### Stats:")
704 // Skipping the table header and the `Stats:` label
706 .take_while(|line| line.starts_with("| "))
708 let mut spl = line.split('|');
709 // Skip the first `|` symbol
711 if let (Some(lint), Some(count)) = (spl.next(), spl.next()) {
712 Some((lint.trim().to_string(), count.trim().parse::<usize>().unwrap()))
717 .collect::<HashMap<String, usize>>()
720 /// print how lint counts changed between runs
721 fn print_stats(old_stats: HashMap<String, usize>, new_stats: HashMap<&String, usize>, lint_filter: &Vec<String>) {
722 let same_in_both_hashmaps = old_stats
724 .filter(|(old_key, old_val)| new_stats.get::<&String>(&old_key) == Some(old_val))
725 .map(|(k, v)| (k.to_string(), *v))
726 .collect::<Vec<(String, usize)>>();
728 let mut old_stats_deduped = old_stats;
729 let mut new_stats_deduped = new_stats;
731 // remove duplicates from both hashmaps
732 same_in_both_hashmaps.iter().for_each(|(k, v)| {
733 assert!(old_stats_deduped.remove(k) == Some(*v));
734 assert!(new_stats_deduped.remove(k) == Some(*v));
737 println!("\nStats:");
739 // list all new counts (key is in new stats but not in old stats)
742 .filter(|(new_key, _)| old_stats_deduped.get::<str>(&new_key).is_none())
743 .for_each(|(new_key, new_value)| {
744 println!("{} 0 => {}", new_key, new_value);
747 // list all changed counts (key is in both maps but value differs)
750 .filter(|(new_key, _new_val)| old_stats_deduped.get::<str>(&new_key).is_some())
751 .for_each(|(new_key, new_val)| {
752 let old_val = old_stats_deduped.get::<str>(&new_key).unwrap();
753 println!("{} {} => {}", new_key, old_val, new_val);
756 // list all gone counts (key is in old status but not in new stats)
759 .filter(|(old_key, _)| new_stats_deduped.get::<&String>(&old_key).is_none())
760 .filter(|(old_key, _)| lint_filter.is_empty() || lint_filter.contains(old_key))
761 .for_each(|(old_key, old_value)| {
762 println!("{} {} => 0", old_key, old_value);
766 /// Create necessary directories to run the lintcheck tool.
770 /// This function panics if creating one of the dirs fails.
771 fn create_dirs(krate_download_dir: &Path, extract_dir: &Path) {
772 std::fs::create_dir("target/lintcheck/").unwrap_or_else(|err| {
773 if err.kind() != ErrorKind::AlreadyExists {
774 panic!("cannot create lintcheck target dir");
777 std::fs::create_dir(&krate_download_dir).unwrap_or_else(|err| {
778 if err.kind() != ErrorKind::AlreadyExists {
779 panic!("cannot create crate download dir");
782 std::fs::create_dir(&extract_dir).unwrap_or_else(|err| {
783 if err.kind() != ErrorKind::AlreadyExists {
784 panic!("cannot create crate extraction dir");
789 /// Returns the path to the Clippy project directory
791 fn clippy_project_root() -> &'static Path {
792 Path::new(env!("CARGO_MANIFEST_DIR")).parent().unwrap()
796 fn lintcheck_test() {
802 "./lintcheck/Cargo.toml",
805 "lintcheck/test_sources.toml",
807 let status = std::process::Command::new("cargo")
809 .current_dir("..") // repo root
813 assert!(status.unwrap().success());