1 // Run clippy on a fixed set of crates and collect the warnings.
2 // This helps observing the impact clippy changs have on a set of real-world code.
4 // When a new lint is introduced, we can search the results for new warnings and check for false
7 #![cfg(feature = "lintcheck")]
8 #![allow(clippy::filter_map, clippy::collapsible_else_if)]
10 use crate::clippy_project_root;
12 use std::process::Command;
13 use std::sync::atomic::{AtomicUsize, Ordering};
14 use std::{collections::HashMap, io::ErrorKind};
18 path::{Path, PathBuf},
22 use rayon::prelude::*;
23 use serde::{Deserialize, Serialize};
24 use serde_json::Value;
26 const CLIPPY_DRIVER_PATH: &str = "target/debug/clippy-driver";
27 const CARGO_CLIPPY_PATH: &str = "target/debug/cargo-clippy";
29 const LINTCHECK_DOWNLOADS: &str = "target/lintcheck/downloads";
30 const LINTCHECK_SOURCES: &str = "target/lintcheck/sources";
32 /// List of sources to check, loaded from a .toml file
33 #[derive(Debug, Serialize, Deserialize)]
35 crates: HashMap<String, TomlCrate>,
38 /// A crate source stored inside the .toml
39 /// will be translated into on one of the `CrateSource` variants
40 #[derive(Debug, Serialize, Deserialize)]
43 versions: Option<Vec<String>>,
44 git_url: Option<String>,
45 git_hash: Option<String>,
47 options: Option<Vec<String>>,
50 /// Represents an archive we download from crates.io, or a git repo, or a local repo/folder
51 /// Once processed (downloaded/extracted/cloned/copied...), this will be translated into a `Crate`
52 #[derive(Debug, Serialize, Deserialize, Eq, Hash, PartialEq, Ord, PartialOrd)]
57 options: Option<Vec<String>>,
63 options: Option<Vec<String>>,
68 options: Option<Vec<String>>,
72 /// Represents the actual source code of a crate that we ran "cargo clippy" on
77 // path to the extracted sources that clippy can check
79 options: Option<Vec<String>>,
82 /// A single warning that clippy issued while checking a `Crate`
84 struct ClippyWarning {
86 crate_version: String,
95 impl std::fmt::Display for ClippyWarning {
96 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
99 r#"target/lintcheck/sources/{}-{}/{}:{}:{} {} "{}""#,
100 &self.crate_name, &self.crate_version, &self.file, &self.line, &self.column, &self.linttype, &self.message
106 /// Makes the sources available on the disk for clippy to check.
107 /// Clones a git repo and checks out the specified commit or downloads a crate from crates.io or
108 /// copies a local folder
109 fn download_and_extract(&self) -> Crate {
111 CrateSource::CratesIo { name, version, options } => {
112 let extract_dir = PathBuf::from(LINTCHECK_SOURCES);
113 let krate_download_dir = PathBuf::from(LINTCHECK_DOWNLOADS);
115 // url to download the crate from crates.io
116 let url = format!("https://crates.io/api/v1/crates/{}/{}/download", name, version);
117 println!("Downloading and extracting {} {} from {}", name, version, url);
118 create_dirs(&krate_download_dir, &extract_dir);
120 let krate_file_path = krate_download_dir.join(format!("{}-{}.crate.tar.gz", name, version));
121 // don't download/extract if we already have done so
122 if !krate_file_path.is_file() {
123 // create a file path to download and write the crate data into
124 let mut krate_dest = std::fs::File::create(&krate_file_path).unwrap();
125 let mut krate_req = ureq::get(&url).call().unwrap().into_reader();
126 // copy the crate into the file
127 std::io::copy(&mut krate_req, &mut krate_dest).unwrap();
130 let ungz_tar = flate2::read::GzDecoder::new(std::fs::File::open(&krate_file_path).unwrap());
131 // extract the tar archive
132 let mut archive = tar::Archive::new(ungz_tar);
133 archive.unpack(&extract_dir).expect("Failed to extract!");
135 // crate is extracted, return a new Krate object which contains the path to the extracted
136 // sources that clippy can check
138 version: version.clone(),
140 path: extract_dir.join(format!("{}-{}/", name, version)),
141 options: options.clone(),
151 let mut repo_path = PathBuf::from(LINTCHECK_SOURCES);
152 // add a -git suffix in case we have the same crate from crates.io and a git repo
153 repo_path.push(format!("{}-git", name));
156 // clone the repo if we have not done so
157 if !repo_path.is_dir() {
158 println!("Cloning {} and checking out {}", url, commit);
159 if !Command::new("git")
164 .expect("Failed to clone git repo!")
167 eprintln!("Failed to clone {} into {}", url, repo_path.display())
170 // check out the commit/branch/whatever
171 if !Command::new("git")
174 .current_dir(&repo_path)
176 .expect("Failed to check out commit")
179 eprintln!("Failed to checkout {} of repo at {}", commit, repo_path.display())
183 version: commit.clone(),
186 options: options.clone(),
189 CrateSource::Path { name, path, options } => {
192 // simply copy the entire directory into our target dir
193 let copy_dest = PathBuf::from(format!("{}/", LINTCHECK_SOURCES));
195 // the source path of the crate we copied, ${copy_dest}/crate_name
196 let crate_root = copy_dest.join(name); // .../crates/local_crate
198 if crate_root.exists() {
200 "Not copying {} to {}, destination already exists",
205 println!("Copying {} to {}", path.display(), copy_dest.display());
207 dir::copy(path, ©_dest, &dir::CopyOptions::new()).unwrap_or_else(|_| {
208 panic!("Failed to copy from {}, to {}", path.display(), crate_root.display())
213 version: String::from("local"),
216 options: options.clone(),
224 /// Run `cargo clippy` on the `Crate` and collect and return all the lint warnings that clippy
228 cargo_clippy_path: &Path,
229 target_dir_index: &AtomicUsize,
231 total_crates_to_lint: usize,
233 ) -> Vec<ClippyWarning> {
234 // advance the atomic index by one
235 let index = target_dir_index.fetch_add(1, Ordering::SeqCst);
236 // "loop" the index within 0..thread_limit
237 let thread_index = index % thread_limit;
238 let perc = (index * 100) / total_crates_to_lint;
240 if thread_limit == 1 {
242 "{}/{} {}% Linting {} {}",
243 index, total_crates_to_lint, perc, &self.name, &self.version
247 "{}/{} {}% Linting {} {} in target dir {:?}",
248 index, total_crates_to_lint, perc, &self.name, &self.version, thread_index
252 let cargo_clippy_path = std::fs::canonicalize(cargo_clippy_path).unwrap();
254 let shared_target_dir = clippy_project_root().join("target/lintcheck/shared_target_dir");
256 let mut args = if fix {
258 "-Zunstable-options",
260 "-Zunstable-options",
266 vec!["--", "--message-format=json", "--", "--cap-lints=warn"]
269 if let Some(options) = &self.options {
274 args.extend(&["-Wclippy::pedantic", "-Wclippy::cargo"])
277 let all_output = std::process::Command::new(&cargo_clippy_path)
278 // use the looping index to create individual target dirs
281 shared_target_dir.join(format!("_{:?}", thread_index)),
283 // lint warnings will look like this:
284 // src/cargo/ops/cargo_compile.rs:127:35: warning: usage of `FromIterator::from_iter`
286 .current_dir(&self.path)
288 .unwrap_or_else(|error| {
290 "Encountered error:\n{:?}\ncargo_clippy_path: {}\ncrate path:{}\n",
292 &cargo_clippy_path.display(),
296 let stdout = String::from_utf8_lossy(&all_output.stdout);
297 let stderr = String::from_utf8_lossy(&all_output.stderr);
300 if let Some(stderr) = stderr
302 .find(|line| line.contains("failed to automatically apply fixes suggested by rustc to crate"))
304 let subcrate = &stderr[63..];
306 "ERROR: failed to apply some suggetion to {} / to (sub)crate {}",
310 // fast path, we don't need the warnings anyway
314 let output_lines = stdout.lines();
315 let warnings: Vec<ClippyWarning> = output_lines
317 // get all clippy warnings and ICEs
318 .filter(|line| filter_clippy_warnings(&line))
319 .map(|json_msg| parse_json_message(json_msg, &self))
327 struct LintcheckConfig {
328 // max number of jobs to spawn (default 1)
330 // we read the sources to check from here
331 sources_toml_path: PathBuf,
332 // we save the clippy lint results here
333 lintcheck_results_path: PathBuf,
334 // whether to just run --fix and not collect all the warnings
338 impl LintcheckConfig {
339 fn from_clap(clap_config: &ArgMatches) -> Self {
340 // first, check if we got anything passed via the LINTCHECK_TOML env var,
341 // if not, ask clap if we got any value for --crates-toml <foo>
342 // if not, use the default "clippy_dev/lintcheck_crates.toml"
343 let sources_toml = env::var("LINTCHECK_TOML").unwrap_or_else(|_| {
345 .value_of("crates-toml")
347 .unwrap_or("clippy_dev/lintcheck_crates.toml")
351 let sources_toml_path = PathBuf::from(sources_toml);
353 // for the path where we save the lint results, get the filename without extension (so for
354 // wasd.toml, use "wasd"...)
355 let filename: PathBuf = sources_toml_path.file_stem().unwrap().into();
356 let lintcheck_results_path = PathBuf::from(format!("lintcheck-logs/{}_logs.txt", filename.display()));
358 // look at the --threads arg, if 0 is passed, ask rayon rayon how many threads it would spawn and
359 // use half of that for the physical core count
360 // by default use a single thread
361 let max_jobs = match clap_config.value_of("threads") {
363 let threads: usize = threads
365 .unwrap_or_else(|_| panic!("Failed to parse '{}' to a digit", threads));
368 // Rayon seems to return thread count so half that for core count
369 (rayon::current_num_threads() / 2) as usize
374 // no -j passed, use a single thread
377 let fix: bool = clap_config.is_present("fix");
382 lintcheck_results_path,
388 /// takes a single json-formatted clippy warnings and returns true (we are interested in that line)
389 /// or false (we aren't)
390 fn filter_clippy_warnings(line: &str) -> bool {
391 // we want to collect ICEs because clippy might have crashed.
392 // these are summarized later
393 if line.contains("internal compiler error: ") {
396 // in general, we want all clippy warnings
397 // however due to some kind of bug, sometimes there are absolute paths
398 // to libcore files inside the message
399 // or we end up with cargo-metadata output (https://github.com/rust-lang/rust-clippy/issues/6508)
401 // filter out these message to avoid unnecessary noise in the logs
402 if line.contains("clippy::")
403 && !(line.contains("could not read cargo metadata")
404 || (line.contains(".rustup") && line.contains("toolchains")))
411 /// Builds clippy inside the repo to make sure we have a clippy executable we can use.
413 let status = Command::new("cargo")
416 .expect("Failed to build clippy!");
417 if !status.success() {
418 eprintln!("Error: Failed to compile Clippy!");
419 std::process::exit(1);
423 /// Read a `toml` file and return a list of `CrateSources` that we want to check with clippy
424 fn read_crates(toml_path: &Path) -> Vec<CrateSource> {
425 let toml_content: String =
426 std::fs::read_to_string(&toml_path).unwrap_or_else(|_| panic!("Failed to read {}", toml_path.display()));
427 let crate_list: SourceList =
428 toml::from_str(&toml_content).unwrap_or_else(|e| panic!("Failed to parse {}: \n{}", toml_path.display(), e));
429 // parse the hashmap of the toml file into a list of crates
430 let tomlcrates: Vec<TomlCrate> = crate_list
433 .map(|(_cratename, tomlcrate)| tomlcrate)
436 // flatten TomlCrates into CrateSources (one TomlCrates may represent several versions of a crate =>
437 // multiple Cratesources)
438 let mut crate_sources = Vec::new();
439 tomlcrates.into_iter().for_each(|tk| {
440 if let Some(ref path) = tk.path {
441 crate_sources.push(CrateSource::Path {
442 name: tk.name.clone(),
443 path: PathBuf::from(path),
444 options: tk.options.clone(),
448 // if we have multiple versions, save each one
449 if let Some(ref versions) = tk.versions {
450 versions.iter().for_each(|ver| {
451 crate_sources.push(CrateSource::CratesIo {
452 name: tk.name.clone(),
453 version: ver.to_string(),
454 options: tk.options.clone(),
458 // otherwise, we should have a git source
459 if tk.git_url.is_some() && tk.git_hash.is_some() {
460 crate_sources.push(CrateSource::Git {
461 name: tk.name.clone(),
462 url: tk.git_url.clone().unwrap(),
463 commit: tk.git_hash.clone().unwrap(),
464 options: tk.options.clone(),
467 // if we have a version as well as a git data OR only one git data, something is funky
468 if tk.versions.is_some() && (tk.git_url.is_some() || tk.git_hash.is_some())
469 || tk.git_hash.is_some() != tk.git_url.is_some()
471 eprintln!("tomlkrate: {:?}", tk);
472 if tk.git_hash.is_some() != tk.git_url.is_some() {
473 panic!("Error: Encountered TomlCrate with only one of git_hash and git_url!");
475 if tk.path.is_some() && (tk.git_hash.is_some() || tk.versions.is_some()) {
476 panic!("Error: TomlCrate can only have one of 'git_.*', 'version' or 'path' fields");
478 unreachable!("Failed to translate TomlCrate into CrateSource!");
482 crate_sources.sort();
487 /// Parse the json output of clippy and return a `ClippyWarning`
488 fn parse_json_message(json_message: &str, krate: &Crate) -> ClippyWarning {
489 let jmsg: Value = serde_json::from_str(&json_message).unwrap_or_else(|e| panic!("Failed to parse json:\n{:?}", e));
492 crate_name: krate.name.to_string(),
493 crate_version: krate.version.to_string(),
494 file: jmsg["message"]["spans"][0]["file_name"]
498 line: jmsg["message"]["spans"][0]["line_start"]
502 column: jmsg["message"]["spans"][0]["text"][0]["highlight_start"]
506 linttype: jmsg["message"]["code"]["code"].to_string().trim_matches('"').into(),
507 message: jmsg["message"]["message"].to_string().trim_matches('"').into(),
508 is_ice: json_message.contains("internal compiler error: "),
512 /// Generate a short list of occuring lints-types and their count
513 fn gather_stats(clippy_warnings: &[ClippyWarning]) -> (String, HashMap<&String, usize>) {
514 // count lint type occurrences
515 let mut counter: HashMap<&String, usize> = HashMap::new();
518 .for_each(|wrn| *counter.entry(&wrn.linttype).or_insert(0) += 1);
520 // collect into a tupled list for sorting
521 let mut stats: Vec<(&&String, &usize)> = counter.iter().map(|(lint, count)| (lint, count)).collect();
522 // sort by "000{count} {clippy::lintname}"
523 // to not have a lint with 200 and 2 warnings take the same spot
524 stats.sort_by_key(|(lint, count)| format!("{:0>4}, {}", count, lint));
526 let stats_string = stats
528 .map(|(lint, count)| format!("{} {}\n", lint, count))
529 .collect::<String>();
531 (stats_string, counter)
534 /// check if the latest modification of the logfile is older than the modification date of the
535 /// clippy binary, if this is true, we should clean the lintchec shared target directory and recheck
536 fn lintcheck_needs_rerun(lintcheck_logs_path: &Path) -> bool {
537 if !lintcheck_logs_path.exists() {
541 let clippy_modified: std::time::SystemTime = {
542 let mut times = [CLIPPY_DRIVER_PATH, CARGO_CLIPPY_PATH].iter().map(|p| {
544 .expect("failed to get metadata of file")
546 .expect("failed to get modification date")
548 // the oldest modification of either of the binaries
549 std::cmp::max(times.next().unwrap(), times.next().unwrap())
552 let logs_modified: std::time::SystemTime = std::fs::metadata(lintcheck_logs_path)
553 .expect("failed to get metadata of file")
555 .expect("failed to get modification date");
557 // time is represented in seconds since X
558 // logs_modified 2 and clippy_modified 5 means clippy binary is older and we need to recheck
559 logs_modified < clippy_modified
562 /// lintchecks `main()` function
566 /// This function panics if the clippy binaries don't exist.
567 pub fn run(clap_config: &ArgMatches) {
568 let config = LintcheckConfig::from_clap(clap_config);
570 println!("Compiling clippy...");
572 println!("Done compiling");
574 // if the clippy bin is newer than our logs, throw away target dirs to force clippy to
576 if lintcheck_needs_rerun(&config.lintcheck_results_path) {
577 let shared_target_dir = "target/lintcheck/shared_target_dir";
578 // if we get an Err here, the shared target dir probably does simply not exist
579 if let Ok(metadata) = std::fs::metadata(&shared_target_dir) {
580 if metadata.is_dir() {
581 println!("Clippy is newer than lint check logs, clearing lintcheck shared target dir...");
582 std::fs::remove_dir_all(&shared_target_dir)
583 .expect("failed to remove target/lintcheck/shared_target_dir");
588 let cargo_clippy_path: PathBuf = PathBuf::from(CARGO_CLIPPY_PATH)
590 .expect("failed to canonicalize path to clippy binary");
592 // assert that clippy is found
594 cargo_clippy_path.is_file(),
595 "target/debug/cargo-clippy binary not found! {}",
596 cargo_clippy_path.display()
599 let clippy_ver = std::process::Command::new(CARGO_CLIPPY_PATH)
602 .map(|o| String::from_utf8_lossy(&o.stdout).into_owned())
603 .expect("could not get clippy version!");
605 // download and extract the crates, then run clippy on them and collect clippys warnings
606 // flatten into one big list of warnings
608 let crates = read_crates(&config.sources_toml_path);
609 let old_stats = read_stats_from_file(&config.lintcheck_results_path);
611 let counter = AtomicUsize::new(1);
613 let clippy_warnings: Vec<ClippyWarning> = if let Some(only_one_crate) = clap_config.value_of("only") {
614 // if we don't have the specified crate in the .toml, throw an error
615 if !crates.iter().any(|krate| {
616 let name = match krate {
617 CrateSource::CratesIo { name, .. } | CrateSource::Git { name, .. } | CrateSource::Path { name, .. } => {
621 name == only_one_crate
624 "ERROR: could not find crate '{}' in clippy_dev/lintcheck_crates.toml",
627 std::process::exit(1);
630 // only check a single crate that was passed via cmdline
633 .map(|krate| krate.download_and_extract())
634 .filter(|krate| krate.name == only_one_crate)
635 .flat_map(|krate| krate.run_clippy_lints(&cargo_clippy_path, &AtomicUsize::new(0), 1, 1, config.fix))
638 if config.max_jobs > 1 {
639 // run parallel with rayon
641 // Ask rayon for thread count. Assume that half of that is the number of physical cores
642 // Use one target dir for each core so that we can run N clippys in parallel.
643 // We need to use different target dirs because cargo would lock them for a single build otherwise,
644 // killing the parallelism. However this also means that deps will only be reused half/a
645 // quarter of the time which might result in a longer wall clock runtime
647 // This helps when we check many small crates with dep-trees that don't have a lot of branches in
648 // order to achive some kind of parallelism
650 // by default, use a single thread
651 let num_cpus = config.max_jobs;
652 let num_crates = crates.len();
654 // check all crates (default)
657 .map(|krate| krate.download_and_extract())
659 krate.run_clippy_lints(&cargo_clippy_path, &counter, num_cpus, num_crates, config.fix)
664 let num_crates = crates.len();
667 .map(|krate| krate.download_and_extract())
668 .flat_map(|krate| krate.run_clippy_lints(&cargo_clippy_path, &counter, 1, num_crates, config.fix))
673 // if we are in --fix mode, don't change the log files, terminate here
678 // generate some stats
679 let (stats_formatted, new_stats) = gather_stats(&clippy_warnings);
681 // grab crashes/ICEs, save the crate name and the ice message
682 let ices: Vec<(&String, &String)> = clippy_warnings
684 .filter(|warning| warning.is_ice)
685 .map(|w| (&w.crate_name, &w.message))
688 let mut all_msgs: Vec<String> = clippy_warnings.iter().map(ToString::to_string).collect();
690 all_msgs.push("\n\n\n\nStats:\n".into());
691 all_msgs.push(stats_formatted);
693 // save the text into lintcheck-logs/logs.txt
694 let mut text = clippy_ver; // clippy version number on top
695 text.push_str(&format!("\n{}", all_msgs.join("")));
696 text.push_str("ICEs:\n");
698 .for_each(|(cratename, msg)| text.push_str(&format!("{}: '{}'", cratename, msg)));
700 println!("Writing logs to {}", config.lintcheck_results_path.display());
701 write(&config.lintcheck_results_path, text).unwrap();
703 print_stats(old_stats, new_stats);
706 /// read the previous stats from the lintcheck-log file
707 fn read_stats_from_file(file_path: &Path) -> HashMap<String, usize> {
708 let file_content: String = match std::fs::read_to_string(file_path).ok() {
709 Some(content) => content,
711 return HashMap::new();
715 let lines: Vec<String> = file_content.lines().map(ToString::to_string).collect();
717 // search for the beginning "Stats:" and the end "ICEs:" of the section we want
718 let start = lines.iter().position(|line| line == "Stats:").unwrap();
719 let end = lines.iter().position(|line| line == "ICEs:").unwrap();
721 let stats_lines = &lines[start + 1..end];
726 let mut spl = line.split(' ');
728 spl.next().unwrap().to_string(),
729 spl.next().unwrap().parse::<usize>().unwrap(),
732 .collect::<HashMap<String, usize>>()
735 /// print how lint counts changed between runs
736 fn print_stats(old_stats: HashMap<String, usize>, new_stats: HashMap<&String, usize>) {
737 let same_in_both_hashmaps = old_stats
739 .filter(|(old_key, old_val)| new_stats.get::<&String>(&old_key) == Some(old_val))
740 .map(|(k, v)| (k.to_string(), *v))
741 .collect::<Vec<(String, usize)>>();
743 let mut old_stats_deduped = old_stats;
744 let mut new_stats_deduped = new_stats;
746 // remove duplicates from both hashmaps
747 same_in_both_hashmaps.iter().for_each(|(k, v)| {
748 assert!(old_stats_deduped.remove(k) == Some(*v));
749 assert!(new_stats_deduped.remove(k) == Some(*v));
752 println!("\nStats:");
754 // list all new counts (key is in new stats but not in old stats)
757 .filter(|(new_key, _)| old_stats_deduped.get::<str>(&new_key).is_none())
758 .for_each(|(new_key, new_value)| {
759 println!("{} 0 => {}", new_key, new_value);
762 // list all changed counts (key is in both maps but value differs)
765 .filter(|(new_key, _new_val)| old_stats_deduped.get::<str>(&new_key).is_some())
766 .for_each(|(new_key, new_val)| {
767 let old_val = old_stats_deduped.get::<str>(&new_key).unwrap();
768 println!("{} {} => {}", new_key, old_val, new_val);
771 // list all gone counts (key is in old status but not in new stats)
774 .filter(|(old_key, _)| new_stats_deduped.get::<&String>(&old_key).is_none())
775 .for_each(|(old_key, old_value)| {
776 println!("{} {} => 0", old_key, old_value);
780 /// Create necessary directories to run the lintcheck tool.
784 /// This function panics if creating one of the dirs fails.
785 fn create_dirs(krate_download_dir: &Path, extract_dir: &Path) {
786 std::fs::create_dir("target/lintcheck/").unwrap_or_else(|err| {
787 if err.kind() != ErrorKind::AlreadyExists {
788 panic!("cannot create lintcheck target dir");
791 std::fs::create_dir(&krate_download_dir).unwrap_or_else(|err| {
792 if err.kind() != ErrorKind::AlreadyExists {
793 panic!("cannot create crate download dir");
796 std::fs::create_dir(&extract_dir).unwrap_or_else(|err| {
797 if err.kind() != ErrorKind::AlreadyExists {
798 panic!("cannot create crate extraction dir");
804 fn lintcheck_test() {
814 "clippy_dev/Cargo.toml",
820 "clippy_dev/test_sources.toml",
822 let status = std::process::Command::new("cargo")
824 .current_dir("../" /* repo root */)
827 assert!(status.unwrap().success());