]> git.lizzy.rs Git - rust.git/blob - src/librustc_incremental/persist/fs.rs
Rollup merge of #68093 - GuillaumeGomez:fix-deref-impl-typedef, r=oli-obk
[rust.git] / src / librustc_incremental / persist / fs.rs
1 //! This module manages how the incremental compilation cache is represented in
2 //! the file system.
3 //!
4 //! Incremental compilation caches are managed according to a copy-on-write
5 //! strategy: Once a complete, consistent cache version is finalized, it is
6 //! never modified. Instead, when a subsequent compilation session is started,
7 //! the compiler will allocate a new version of the cache that starts out as
8 //! a copy of the previous version. Then only this new copy is modified and it
9 //! will not be visible to other processes until it is finalized. This ensures
10 //! that multiple compiler processes can be executed concurrently for the same
11 //! crate without interfering with each other or blocking each other.
12 //!
13 //! More concretely this is implemented via the following protocol:
14 //!
15 //! 1. For a newly started compilation session, the compiler allocates a
16 //!    new `session` directory within the incremental compilation directory.
17 //!    This session directory will have a unique name that ends with the suffix
18 //!    "-working" and that contains a creation timestamp.
19 //! 2. Next, the compiler looks for the newest finalized session directory,
20 //!    that is, a session directory from a previous compilation session that
21 //!    has been marked as valid and consistent. A session directory is
22 //!    considered finalized if the "-working" suffix in the directory name has
23 //!    been replaced by the SVH of the crate.
24 //! 3. Once the compiler has found a valid, finalized session directory, it will
25 //!    hard-link/copy its contents into the new "-working" directory. If all
26 //!    goes well, it will have its own, private copy of the source directory and
27 //!    subsequently not have to worry about synchronizing with other compiler
28 //!    processes.
29 //! 4. Now the compiler can do its normal compilation process, which involves
30 //!    reading and updating its private session directory.
31 //! 5. When compilation finishes without errors, the private session directory
32 //!    will be in a state where it can be used as input for other compilation
33 //!    sessions. That is, it will contain a dependency graph and cache artifacts
34 //!    that are consistent with the state of the source code it was compiled
35 //!    from, with no need to change them ever again. At this point, the compiler
36 //!    finalizes and "publishes" its private session directory by renaming it
37 //!    from "s-{timestamp}-{random}-working" to "s-{timestamp}-{SVH}".
38 //! 6. At this point the "old" session directory that we copied our data from
39 //!    at the beginning of the session has become obsolete because we have just
40 //!    published a more current version. Thus the compiler will delete it.
41 //!
42 //! ## Garbage Collection
43 //!
44 //! Naively following the above protocol might lead to old session directories
45 //! piling up if a compiler instance crashes for some reason before its able to
46 //! remove its private session directory. In order to avoid wasting disk space,
47 //! the compiler also does some garbage collection each time it is started in
48 //! incremental compilation mode. Specifically, it will scan the incremental
49 //! compilation directory for private session directories that are not in use
50 //! any more and will delete those. It will also delete any finalized session
51 //! directories for a given crate except for the most recent one.
52 //!
53 //! ## Synchronization
54 //!
55 //! There is some synchronization needed in order for the compiler to be able to
56 //! determine whether a given private session directory is not in used any more.
57 //! This is done by creating a lock file for each session directory and
58 //! locking it while the directory is still being used. Since file locks have
59 //! operating system support, we can rely on the lock being released if the
60 //! compiler process dies for some unexpected reason. Thus, when garbage
61 //! collecting private session directories, the collecting process can determine
62 //! whether the directory is still in use by trying to acquire a lock on the
63 //! file. If locking the file fails, the original process must still be alive.
64 //! If locking the file succeeds, we know that the owning process is not alive
65 //! any more and we can safely delete the directory.
66 //! There is still a small time window between the original process creating the
67 //! lock file and actually locking it. In order to minimize the chance that
68 //! another process tries to acquire the lock in just that instance, only
69 //! session directories that are older than a few seconds are considered for
70 //! garbage collection.
71 //!
72 //! Another case that has to be considered is what happens if one process
73 //! deletes a finalized session directory that another process is currently
74 //! trying to copy from. This case is also handled via the lock file. Before
75 //! a process starts copying a finalized session directory, it will acquire a
76 //! shared lock on the directory's lock file. Any garbage collecting process,
77 //! on the other hand, will acquire an exclusive lock on the lock file.
78 //! Thus, if a directory is being collected, any reader process will fail
79 //! acquiring the shared lock and will leave the directory alone. Conversely,
80 //! if a collecting process can't acquire the exclusive lock because the
81 //! directory is currently being read from, it will leave collecting that
82 //! directory to another process at a later point in time.
83 //! The exact same scheme is also used when reading the metadata hashes file
84 //! from an extern crate. When a crate is compiled, the hash values of its
85 //! metadata are stored in a file in its session directory. When the
86 //! compilation session of another crate imports the first crate's metadata,
87 //! it also has to read in the accompanying metadata hashes. It thus will access
88 //! the finalized session directory of all crates it links to and while doing
89 //! so, it will also place a read lock on that the respective session directory
90 //! so that it won't be deleted while the metadata hashes are loaded.
91 //!
92 //! ## Preconditions
93 //!
94 //! This system relies on two features being available in the file system in
95 //! order to work really well: file locking and hard linking.
96 //! If hard linking is not available (like on FAT) the data in the cache
97 //! actually has to be copied at the beginning of each session.
98 //! If file locking does not work reliably (like on NFS), some of the
99 //! synchronization will go haywire.
100 //! In both cases we recommend to locate the incremental compilation directory
101 //! on a file system that supports these things.
102 //! It might be a good idea though to try and detect whether we are on an
103 //! unsupported file system and emit a warning in that case. This is not yet
104 //! implemented.
105
106 use rustc::session::{CrateDisambiguator, Session};
107 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
108 use rustc_data_structures::svh::Svh;
109 use rustc_data_structures::{base_n, flock};
110 use rustc_fs_util::{link_or_copy, LinkOrCopy};
111
112 use std::fs as std_fs;
113 use std::io;
114 use std::mem;
115 use std::path::{Path, PathBuf};
116 use std::time::{Duration, SystemTime, UNIX_EPOCH};
117
118 use rand::{thread_rng, RngCore};
119
120 #[cfg(test)]
121 mod tests;
122
123 const LOCK_FILE_EXT: &str = ".lock";
124 const DEP_GRAPH_FILENAME: &str = "dep-graph.bin";
125 const WORK_PRODUCTS_FILENAME: &str = "work-products.bin";
126 const QUERY_CACHE_FILENAME: &str = "query-cache.bin";
127
128 // We encode integers using the following base, so they are shorter than decimal
129 // or hexadecimal numbers (we want short file and directory names). Since these
130 // numbers will be used in file names, we choose an encoding that is not
131 // case-sensitive (as opposed to base64, for example).
132 const INT_ENCODE_BASE: usize = base_n::CASE_INSENSITIVE;
133
134 pub fn dep_graph_path(sess: &Session) -> PathBuf {
135     in_incr_comp_dir_sess(sess, DEP_GRAPH_FILENAME)
136 }
137 pub fn dep_graph_path_from(incr_comp_session_dir: &Path) -> PathBuf {
138     in_incr_comp_dir(incr_comp_session_dir, DEP_GRAPH_FILENAME)
139 }
140
141 pub fn work_products_path(sess: &Session) -> PathBuf {
142     in_incr_comp_dir_sess(sess, WORK_PRODUCTS_FILENAME)
143 }
144
145 pub fn query_cache_path(sess: &Session) -> PathBuf {
146     in_incr_comp_dir_sess(sess, QUERY_CACHE_FILENAME)
147 }
148
149 pub fn lock_file_path(session_dir: &Path) -> PathBuf {
150     let crate_dir = session_dir.parent().unwrap();
151
152     let directory_name = session_dir.file_name().unwrap().to_string_lossy();
153     assert_no_characters_lost(&directory_name);
154
155     let dash_indices: Vec<_> = directory_name.match_indices("-").map(|(idx, _)| idx).collect();
156     if dash_indices.len() != 3 {
157         bug!(
158             "Encountered incremental compilation session directory with \
159               malformed name: {}",
160             session_dir.display()
161         )
162     }
163
164     crate_dir.join(&directory_name[0..dash_indices[2]]).with_extension(&LOCK_FILE_EXT[1..])
165 }
166
167 pub fn in_incr_comp_dir_sess(sess: &Session, file_name: &str) -> PathBuf {
168     in_incr_comp_dir(&sess.incr_comp_session_dir(), file_name)
169 }
170
171 pub fn in_incr_comp_dir(incr_comp_session_dir: &Path, file_name: &str) -> PathBuf {
172     incr_comp_session_dir.join(file_name)
173 }
174
175 /// Allocates the private session directory. The boolean in the Ok() result
176 /// indicates whether we should try loading a dep graph from the successfully
177 /// initialized directory, or not.
178 /// The post-condition of this fn is that we have a valid incremental
179 /// compilation session directory, if the result is `Ok`. A valid session
180 /// directory is one that contains a locked lock file. It may or may not contain
181 /// a dep-graph and work products from a previous session.
182 /// If the call fails, the fn may leave behind an invalid session directory.
183 /// The garbage collection will take care of it.
184 pub fn prepare_session_directory(
185     sess: &Session,
186     crate_name: &str,
187     crate_disambiguator: CrateDisambiguator,
188 ) {
189     if sess.opts.incremental.is_none() {
190         return;
191     }
192
193     let _timer = sess.timer("incr_comp_prepare_session_directory");
194
195     debug!("prepare_session_directory");
196
197     // {incr-comp-dir}/{crate-name-and-disambiguator}
198     let crate_dir = crate_path(sess, crate_name, crate_disambiguator);
199     debug!("crate-dir: {}", crate_dir.display());
200     if create_dir(sess, &crate_dir, "crate").is_err() {
201         return;
202     }
203
204     // Hack: canonicalize the path *after creating the directory*
205     // because, on windows, long paths can cause problems;
206     // canonicalization inserts this weird prefix that makes windows
207     // tolerate long paths.
208     let crate_dir = match crate_dir.canonicalize() {
209         Ok(v) => v,
210         Err(err) => {
211             sess.err(&format!(
212                 "incremental compilation: error canonicalizing path `{}`: {}",
213                 crate_dir.display(),
214                 err
215             ));
216             return;
217         }
218     };
219
220     let mut source_directories_already_tried = FxHashSet::default();
221
222     loop {
223         // Generate a session directory of the form:
224         //
225         // {incr-comp-dir}/{crate-name-and-disambiguator}/s-{timestamp}-{random}-working
226         let session_dir = generate_session_dir_path(&crate_dir);
227         debug!("session-dir: {}", session_dir.display());
228
229         // Lock the new session directory. If this fails, return an
230         // error without retrying
231         let (directory_lock, lock_file_path) = match lock_directory(sess, &session_dir) {
232             Ok(e) => e,
233             Err(_) => return,
234         };
235
236         // Now that we have the lock, we can actually create the session
237         // directory
238         if create_dir(sess, &session_dir, "session").is_err() {
239             return;
240         }
241
242         // Find a suitable source directory to copy from. Ignore those that we
243         // have already tried before.
244         let source_directory = find_source_directory(&crate_dir, &source_directories_already_tried);
245
246         let source_directory = if let Some(dir) = source_directory {
247             dir
248         } else {
249             // There's nowhere to copy from, we're done
250             debug!(
251                 "no source directory found. Continuing with empty session \
252                     directory."
253             );
254
255             sess.init_incr_comp_session(session_dir, directory_lock, false);
256             return;
257         };
258
259         debug!("attempting to copy data from source: {}", source_directory.display());
260
261         // Try copying over all files from the source directory
262         if let Ok(allows_links) = copy_files(sess, &session_dir, &source_directory) {
263             debug!("successfully copied data from: {}", source_directory.display());
264
265             if !allows_links {
266                 sess.warn(&format!(
267                     "Hard linking files in the incremental \
268                                         compilation cache failed. Copying files \
269                                         instead. Consider moving the cache \
270                                         directory to a file system which supports \
271                                         hard linking in session dir `{}`",
272                     session_dir.display()
273                 ));
274             }
275
276             sess.init_incr_comp_session(session_dir, directory_lock, true);
277             return;
278         } else {
279             debug!("copying failed - trying next directory");
280
281             // Something went wrong while trying to copy/link files from the
282             // source directory. Try again with a different one.
283             source_directories_already_tried.insert(source_directory);
284
285             // Try to remove the session directory we just allocated. We don't
286             // know if there's any garbage in it from the failed copy action.
287             if let Err(err) = safe_remove_dir_all(&session_dir) {
288                 sess.warn(&format!(
289                     "Failed to delete partly initialized \
290                                     session dir `{}`: {}",
291                     session_dir.display(),
292                     err
293                 ));
294             }
295
296             delete_session_dir_lock_file(sess, &lock_file_path);
297             mem::drop(directory_lock);
298         }
299     }
300 }
301
302 /// This function finalizes and thus 'publishes' the session directory by
303 /// renaming it to `s-{timestamp}-{svh}` and releasing the file lock.
304 /// If there have been compilation errors, however, this function will just
305 /// delete the presumably invalid session directory.
306 pub fn finalize_session_directory(sess: &Session, svh: Svh) {
307     if sess.opts.incremental.is_none() {
308         return;
309     }
310
311     let _timer = sess.timer("incr_comp_finalize_session_directory");
312
313     let incr_comp_session_dir: PathBuf = sess.incr_comp_session_dir().clone();
314
315     if sess.has_errors_or_delayed_span_bugs() {
316         // If there have been any errors during compilation, we don't want to
317         // publish this session directory. Rather, we'll just delete it.
318
319         debug!(
320             "finalize_session_directory() - invalidating session directory: {}",
321             incr_comp_session_dir.display()
322         );
323
324         if let Err(err) = safe_remove_dir_all(&*incr_comp_session_dir) {
325             sess.warn(&format!(
326                 "Error deleting incremental compilation \
327                                 session directory `{}`: {}",
328                 incr_comp_session_dir.display(),
329                 err
330             ));
331         }
332
333         let lock_file_path = lock_file_path(&*incr_comp_session_dir);
334         delete_session_dir_lock_file(sess, &lock_file_path);
335         sess.mark_incr_comp_session_as_invalid();
336     }
337
338     debug!("finalize_session_directory() - session directory: {}", incr_comp_session_dir.display());
339
340     let old_sub_dir_name = incr_comp_session_dir.file_name().unwrap().to_string_lossy();
341     assert_no_characters_lost(&old_sub_dir_name);
342
343     // Keep the 's-{timestamp}-{random-number}' prefix, but replace the
344     // '-working' part with the SVH of the crate
345     let dash_indices: Vec<_> = old_sub_dir_name.match_indices("-").map(|(idx, _)| idx).collect();
346     if dash_indices.len() != 3 {
347         bug!(
348             "Encountered incremental compilation session directory with \
349               malformed name: {}",
350             incr_comp_session_dir.display()
351         )
352     }
353
354     // State: "s-{timestamp}-{random-number}-"
355     let mut new_sub_dir_name = String::from(&old_sub_dir_name[..=dash_indices[2]]);
356
357     // Append the svh
358     base_n::push_str(svh.as_u64() as u128, INT_ENCODE_BASE, &mut new_sub_dir_name);
359
360     // Create the full path
361     let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name);
362     debug!("finalize_session_directory() - new path: {}", new_path.display());
363
364     match std_fs::rename(&*incr_comp_session_dir, &new_path) {
365         Ok(_) => {
366             debug!("finalize_session_directory() - directory renamed successfully");
367
368             // This unlocks the directory
369             sess.finalize_incr_comp_session(new_path);
370         }
371         Err(e) => {
372             // Warn about the error. However, no need to abort compilation now.
373             sess.warn(&format!(
374                 "Error finalizing incremental compilation \
375                                session directory `{}`: {}",
376                 incr_comp_session_dir.display(),
377                 e
378             ));
379
380             debug!("finalize_session_directory() - error, marking as invalid");
381             // Drop the file lock, so we can garage collect
382             sess.mark_incr_comp_session_as_invalid();
383         }
384     }
385
386     let _ = garbage_collect_session_directories(sess);
387 }
388
389 pub fn delete_all_session_dir_contents(sess: &Session) -> io::Result<()> {
390     let sess_dir_iterator = sess.incr_comp_session_dir().read_dir()?;
391     for entry in sess_dir_iterator {
392         let entry = entry?;
393         safe_remove_file(&entry.path())?
394     }
395     Ok(())
396 }
397
398 fn copy_files(sess: &Session, target_dir: &Path, source_dir: &Path) -> Result<bool, ()> {
399     // We acquire a shared lock on the lock file of the directory, so that
400     // nobody deletes it out from under us while we are reading from it.
401     let lock_file_path = lock_file_path(source_dir);
402     let _lock = if let Ok(lock) = flock::Lock::new(
403         &lock_file_path,
404         false, // don't wait,
405         false, // don't create
406         false,
407     ) {
408         // not exclusive
409         lock
410     } else {
411         // Could not acquire the lock, don't try to copy from here
412         return Err(());
413     };
414
415     let source_dir_iterator = match source_dir.read_dir() {
416         Ok(it) => it,
417         Err(_) => return Err(()),
418     };
419
420     let mut files_linked = 0;
421     let mut files_copied = 0;
422
423     for entry in source_dir_iterator {
424         match entry {
425             Ok(entry) => {
426                 let file_name = entry.file_name();
427
428                 let target_file_path = target_dir.join(file_name);
429                 let source_path = entry.path();
430
431                 debug!("copying into session dir: {}", source_path.display());
432                 match link_or_copy(source_path, target_file_path) {
433                     Ok(LinkOrCopy::Link) => files_linked += 1,
434                     Ok(LinkOrCopy::Copy) => files_copied += 1,
435                     Err(_) => return Err(()),
436                 }
437             }
438             Err(_) => return Err(()),
439         }
440     }
441
442     if sess.opts.debugging_opts.incremental_info {
443         println!(
444             "[incremental] session directory: \
445                   {} files hard-linked",
446             files_linked
447         );
448         println!(
449             "[incremental] session directory: \
450                  {} files copied",
451             files_copied
452         );
453     }
454
455     Ok(files_linked > 0 || files_copied == 0)
456 }
457
458 /// Generates unique directory path of the form:
459 /// {crate_dir}/s-{timestamp}-{random-number}-working
460 fn generate_session_dir_path(crate_dir: &Path) -> PathBuf {
461     let timestamp = timestamp_to_string(SystemTime::now());
462     debug!("generate_session_dir_path: timestamp = {}", timestamp);
463     let random_number = thread_rng().next_u32();
464     debug!("generate_session_dir_path: random_number = {}", random_number);
465
466     let directory_name = format!(
467         "s-{}-{}-working",
468         timestamp,
469         base_n::encode(random_number as u128, INT_ENCODE_BASE)
470     );
471     debug!("generate_session_dir_path: directory_name = {}", directory_name);
472     let directory_path = crate_dir.join(directory_name);
473     debug!("generate_session_dir_path: directory_path = {}", directory_path.display());
474     directory_path
475 }
476
477 fn create_dir(sess: &Session, path: &Path, dir_tag: &str) -> Result<(), ()> {
478     match std_fs::create_dir_all(path) {
479         Ok(()) => {
480             debug!("{} directory created successfully", dir_tag);
481             Ok(())
482         }
483         Err(err) => {
484             sess.err(&format!(
485                 "Could not create incremental compilation {} \
486                                directory `{}`: {}",
487                 dir_tag,
488                 path.display(),
489                 err
490             ));
491             Err(())
492         }
493     }
494 }
495
496 /// Allocate the lock-file and lock it.
497 fn lock_directory(sess: &Session, session_dir: &Path) -> Result<(flock::Lock, PathBuf), ()> {
498     let lock_file_path = lock_file_path(session_dir);
499     debug!("lock_directory() - lock_file: {}", lock_file_path.display());
500
501     match flock::Lock::new(
502         &lock_file_path,
503         false, // don't wait
504         true,  // create the lock file
505         true,
506     ) {
507         // the lock should be exclusive
508         Ok(lock) => Ok((lock, lock_file_path)),
509         Err(err) => {
510             sess.err(&format!(
511                 "incremental compilation: could not create \
512                                session directory lock file: {}",
513                 err
514             ));
515             Err(())
516         }
517     }
518 }
519
520 fn delete_session_dir_lock_file(sess: &Session, lock_file_path: &Path) {
521     if let Err(err) = safe_remove_file(&lock_file_path) {
522         sess.warn(&format!(
523             "Error deleting lock file for incremental \
524                             compilation session directory `{}`: {}",
525             lock_file_path.display(),
526             err
527         ));
528     }
529 }
530
531 /// Finds the most recent published session directory that is not in the
532 /// ignore-list.
533 fn find_source_directory(
534     crate_dir: &Path,
535     source_directories_already_tried: &FxHashSet<PathBuf>,
536 ) -> Option<PathBuf> {
537     let iter = crate_dir
538         .read_dir()
539         .unwrap() // FIXME
540         .filter_map(|e| e.ok().map(|e| e.path()));
541
542     find_source_directory_in_iter(iter, source_directories_already_tried)
543 }
544
545 fn find_source_directory_in_iter<I>(
546     iter: I,
547     source_directories_already_tried: &FxHashSet<PathBuf>,
548 ) -> Option<PathBuf>
549 where
550     I: Iterator<Item = PathBuf>,
551 {
552     let mut best_candidate = (UNIX_EPOCH, None);
553
554     for session_dir in iter {
555         debug!("find_source_directory_in_iter - inspecting `{}`", session_dir.display());
556
557         let directory_name = session_dir.file_name().unwrap().to_string_lossy();
558         assert_no_characters_lost(&directory_name);
559
560         if source_directories_already_tried.contains(&session_dir)
561             || !is_session_directory(&directory_name)
562             || !is_finalized(&directory_name)
563         {
564             debug!("find_source_directory_in_iter - ignoring");
565             continue;
566         }
567
568         let timestamp = extract_timestamp_from_session_dir(&directory_name).unwrap_or_else(|_| {
569             bug!("unexpected incr-comp session dir: {}", session_dir.display())
570         });
571
572         if timestamp > best_candidate.0 {
573             best_candidate = (timestamp, Some(session_dir.clone()));
574         }
575     }
576
577     best_candidate.1
578 }
579
580 fn is_finalized(directory_name: &str) -> bool {
581     !directory_name.ends_with("-working")
582 }
583
584 fn is_session_directory(directory_name: &str) -> bool {
585     directory_name.starts_with("s-") && !directory_name.ends_with(LOCK_FILE_EXT)
586 }
587
588 fn is_session_directory_lock_file(file_name: &str) -> bool {
589     file_name.starts_with("s-") && file_name.ends_with(LOCK_FILE_EXT)
590 }
591
592 fn extract_timestamp_from_session_dir(directory_name: &str) -> Result<SystemTime, ()> {
593     if !is_session_directory(directory_name) {
594         return Err(());
595     }
596
597     let dash_indices: Vec<_> = directory_name.match_indices("-").map(|(idx, _)| idx).collect();
598     if dash_indices.len() != 3 {
599         return Err(());
600     }
601
602     string_to_timestamp(&directory_name[dash_indices[0] + 1..dash_indices[1]])
603 }
604
605 fn timestamp_to_string(timestamp: SystemTime) -> String {
606     let duration = timestamp.duration_since(UNIX_EPOCH).unwrap();
607     let micros = duration.as_secs() * 1_000_000 + (duration.subsec_nanos() as u64) / 1000;
608     base_n::encode(micros as u128, INT_ENCODE_BASE)
609 }
610
611 fn string_to_timestamp(s: &str) -> Result<SystemTime, ()> {
612     let micros_since_unix_epoch = u64::from_str_radix(s, INT_ENCODE_BASE as u32);
613
614     if micros_since_unix_epoch.is_err() {
615         return Err(());
616     }
617
618     let micros_since_unix_epoch = micros_since_unix_epoch.unwrap();
619
620     let duration = Duration::new(
621         micros_since_unix_epoch / 1_000_000,
622         1000 * (micros_since_unix_epoch % 1_000_000) as u32,
623     );
624     Ok(UNIX_EPOCH + duration)
625 }
626
627 fn crate_path(
628     sess: &Session,
629     crate_name: &str,
630     crate_disambiguator: CrateDisambiguator,
631 ) -> PathBuf {
632     let incr_dir = sess.opts.incremental.as_ref().unwrap().clone();
633
634     // The full crate disambiguator is really long. 64 bits of it should be
635     // sufficient.
636     let crate_disambiguator = crate_disambiguator.to_fingerprint().to_smaller_hash();
637     let crate_disambiguator = base_n::encode(crate_disambiguator as u128, INT_ENCODE_BASE);
638
639     let crate_name = format!("{}-{}", crate_name, crate_disambiguator);
640     incr_dir.join(crate_name)
641 }
642
643 fn assert_no_characters_lost(s: &str) {
644     if s.contains('\u{FFFD}') {
645         bug!("Could not losslessly convert '{}'.", s)
646     }
647 }
648
649 fn is_old_enough_to_be_collected(timestamp: SystemTime) -> bool {
650     timestamp < SystemTime::now() - Duration::from_secs(10)
651 }
652
653 pub fn garbage_collect_session_directories(sess: &Session) -> io::Result<()> {
654     debug!("garbage_collect_session_directories() - begin");
655
656     let session_directory = sess.incr_comp_session_dir();
657     debug!(
658         "garbage_collect_session_directories() - session directory: {}",
659         session_directory.display()
660     );
661
662     let crate_directory = session_directory.parent().unwrap();
663     debug!(
664         "garbage_collect_session_directories() - crate directory: {}",
665         crate_directory.display()
666     );
667
668     // First do a pass over the crate directory, collecting lock files and
669     // session directories
670     let mut session_directories = FxHashSet::default();
671     let mut lock_files = FxHashSet::default();
672
673     for dir_entry in crate_directory.read_dir()? {
674         let dir_entry = match dir_entry {
675             Ok(dir_entry) => dir_entry,
676             _ => {
677                 // Ignore any errors
678                 continue;
679             }
680         };
681
682         let entry_name = dir_entry.file_name();
683         let entry_name = entry_name.to_string_lossy();
684
685         if is_session_directory_lock_file(&entry_name) {
686             assert_no_characters_lost(&entry_name);
687             lock_files.insert(entry_name.into_owned());
688         } else if is_session_directory(&entry_name) {
689             assert_no_characters_lost(&entry_name);
690             session_directories.insert(entry_name.into_owned());
691         } else {
692             // This is something we don't know, leave it alone
693         }
694     }
695
696     // Now map from lock files to session directories
697     let lock_file_to_session_dir: FxHashMap<String, Option<String>> = lock_files
698         .into_iter()
699         .map(|lock_file_name| {
700             assert!(lock_file_name.ends_with(LOCK_FILE_EXT));
701             let dir_prefix_end = lock_file_name.len() - LOCK_FILE_EXT.len();
702             let session_dir = {
703                 let dir_prefix = &lock_file_name[0..dir_prefix_end];
704                 session_directories.iter().find(|dir_name| dir_name.starts_with(dir_prefix))
705             };
706             (lock_file_name, session_dir.map(String::clone))
707         })
708         .collect();
709
710     // Delete all lock files, that don't have an associated directory. They must
711     // be some kind of leftover
712     for (lock_file_name, directory_name) in &lock_file_to_session_dir {
713         if directory_name.is_none() {
714             let timestamp = match extract_timestamp_from_session_dir(lock_file_name) {
715                 Ok(timestamp) => timestamp,
716                 Err(()) => {
717                     debug!(
718                         "found lock-file with malformed timestamp: {}",
719                         crate_directory.join(&lock_file_name).display()
720                     );
721                     // Ignore it
722                     continue;
723                 }
724             };
725
726             let lock_file_path = crate_directory.join(&**lock_file_name);
727
728             if is_old_enough_to_be_collected(timestamp) {
729                 debug!(
730                     "garbage_collect_session_directories() - deleting \
731                         garbage lock file: {}",
732                     lock_file_path.display()
733                 );
734                 delete_session_dir_lock_file(sess, &lock_file_path);
735             } else {
736                 debug!(
737                     "garbage_collect_session_directories() - lock file with \
738                         no session dir not old enough to be collected: {}",
739                     lock_file_path.display()
740                 );
741             }
742         }
743     }
744
745     // Filter out `None` directories
746     let lock_file_to_session_dir: FxHashMap<String, String> = lock_file_to_session_dir
747         .into_iter()
748         .filter_map(|(lock_file_name, directory_name)| directory_name.map(|n| (lock_file_name, n)))
749         .collect();
750
751     // Delete all session directories that don't have a lock file.
752     for directory_name in session_directories {
753         if !lock_file_to_session_dir.values().any(|dir| *dir == directory_name) {
754             let path = crate_directory.join(directory_name);
755             if let Err(err) = safe_remove_dir_all(&path) {
756                 sess.warn(&format!(
757                     "Failed to garbage collect invalid incremental \
758                                     compilation session directory `{}`: {}",
759                     path.display(),
760                     err
761                 ));
762             }
763         }
764     }
765
766     // Now garbage collect the valid session directories.
767     let mut deletion_candidates = vec![];
768     let mut definitely_delete = vec![];
769
770     for (lock_file_name, directory_name) in &lock_file_to_session_dir {
771         debug!("garbage_collect_session_directories() - inspecting: {}", directory_name);
772
773         let timestamp = match extract_timestamp_from_session_dir(directory_name) {
774             Ok(timestamp) => timestamp,
775             Err(()) => {
776                 debug!(
777                     "found session-dir with malformed timestamp: {}",
778                     crate_directory.join(directory_name).display()
779                 );
780                 // Ignore it
781                 continue;
782             }
783         };
784
785         if is_finalized(directory_name) {
786             let lock_file_path = crate_directory.join(lock_file_name);
787             match flock::Lock::new(
788                 &lock_file_path,
789                 false, // don't wait
790                 false, // don't create the lock-file
791                 true,
792             ) {
793                 // get an exclusive lock
794                 Ok(lock) => {
795                     debug!(
796                         "garbage_collect_session_directories() - \
797                             successfully acquired lock"
798                     );
799                     debug!(
800                         "garbage_collect_session_directories() - adding \
801                             deletion candidate: {}",
802                         directory_name
803                     );
804
805                     // Note that we are holding on to the lock
806                     deletion_candidates.push((
807                         timestamp,
808                         crate_directory.join(directory_name),
809                         Some(lock),
810                     ));
811                 }
812                 Err(_) => {
813                     debug!(
814                         "garbage_collect_session_directories() - \
815                             not collecting, still in use"
816                     );
817                 }
818             }
819         } else if is_old_enough_to_be_collected(timestamp) {
820             // When cleaning out "-working" session directories, i.e.
821             // session directories that might still be in use by another
822             // compiler instance, we only look a directories that are
823             // at least ten seconds old. This is supposed to reduce the
824             // chance of deleting a directory in the time window where
825             // the process has allocated the directory but has not yet
826             // acquired the file-lock on it.
827
828             // Try to acquire the directory lock. If we can't, it
829             // means that the owning process is still alive and we
830             // leave this directory alone.
831             let lock_file_path = crate_directory.join(lock_file_name);
832             match flock::Lock::new(
833                 &lock_file_path,
834                 false, // don't wait
835                 false, // don't create the lock-file
836                 true,
837             ) {
838                 // get an exclusive lock
839                 Ok(lock) => {
840                     debug!(
841                         "garbage_collect_session_directories() - \
842                             successfully acquired lock"
843                     );
844
845                     // Note that we are holding on to the lock
846                     definitely_delete.push((crate_directory.join(directory_name), Some(lock)));
847                 }
848                 Err(_) => {
849                     debug!(
850                         "garbage_collect_session_directories() - \
851                             not collecting, still in use"
852                     );
853                 }
854             }
855         } else {
856             debug!(
857                 "garbage_collect_session_directories() - not finalized, not \
858                     old enough"
859             );
860         }
861     }
862
863     // Delete all but the most recent of the candidates
864     for (path, lock) in all_except_most_recent(deletion_candidates) {
865         debug!("garbage_collect_session_directories() - deleting `{}`", path.display());
866
867         if let Err(err) = safe_remove_dir_all(&path) {
868             sess.warn(&format!(
869                 "Failed to garbage collect finalized incremental \
870                                 compilation session directory `{}`: {}",
871                 path.display(),
872                 err
873             ));
874         } else {
875             delete_session_dir_lock_file(sess, &lock_file_path(&path));
876         }
877
878         // Let's make it explicit that the file lock is released at this point,
879         // or rather, that we held on to it until here
880         mem::drop(lock);
881     }
882
883     for (path, lock) in definitely_delete {
884         debug!("garbage_collect_session_directories() - deleting `{}`", path.display());
885
886         if let Err(err) = safe_remove_dir_all(&path) {
887             sess.warn(&format!(
888                 "Failed to garbage collect incremental \
889                                 compilation session directory `{}`: {}",
890                 path.display(),
891                 err
892             ));
893         } else {
894             delete_session_dir_lock_file(sess, &lock_file_path(&path));
895         }
896
897         // Let's make it explicit that the file lock is released at this point,
898         // or rather, that we held on to it until here
899         mem::drop(lock);
900     }
901
902     Ok(())
903 }
904
905 fn all_except_most_recent(
906     deletion_candidates: Vec<(SystemTime, PathBuf, Option<flock::Lock>)>,
907 ) -> FxHashMap<PathBuf, Option<flock::Lock>> {
908     let most_recent = deletion_candidates.iter().map(|&(timestamp, ..)| timestamp).max();
909
910     if let Some(most_recent) = most_recent {
911         deletion_candidates
912             .into_iter()
913             .filter(|&(timestamp, ..)| timestamp != most_recent)
914             .map(|(_, path, lock)| (path, lock))
915             .collect()
916     } else {
917         FxHashMap::default()
918     }
919 }
920
921 /// Since paths of artifacts within session directories can get quite long, we
922 /// need to support deleting files with very long paths. The regular
923 /// WinApi functions only support paths up to 260 characters, however. In order
924 /// to circumvent this limitation, we canonicalize the path of the directory
925 /// before passing it to std::fs::remove_dir_all(). This will convert the path
926 /// into the '\\?\' format, which supports much longer paths.
927 fn safe_remove_dir_all(p: &Path) -> io::Result<()> {
928     if p.exists() {
929         let canonicalized = p.canonicalize()?;
930         std_fs::remove_dir_all(canonicalized)
931     } else {
932         Ok(())
933     }
934 }
935
936 fn safe_remove_file(p: &Path) -> io::Result<()> {
937     if p.exists() {
938         let canonicalized = p.canonicalize()?;
939         match std_fs::remove_file(canonicalized) {
940             Err(ref err) if err.kind() == io::ErrorKind::NotFound => Ok(()),
941             result => result,
942         }
943     } else {
944         Ok(())
945     }
946 }