]> git.lizzy.rs Git - rust.git/blob - compiler/rustc_incremental/src/persist/fs.rs
Stabilize File::options()
[rust.git] / compiler / rustc_incremental / src / persist / fs.rs
1 //! This module manages how the incremental compilation cache is represented in
2 //! the file system.
3 //!
4 //! Incremental compilation caches are managed according to a copy-on-write
5 //! strategy: Once a complete, consistent cache version is finalized, it is
6 //! never modified. Instead, when a subsequent compilation session is started,
7 //! the compiler will allocate a new version of the cache that starts out as
8 //! a copy of the previous version. Then only this new copy is modified and it
9 //! will not be visible to other processes until it is finalized. This ensures
10 //! that multiple compiler processes can be executed concurrently for the same
11 //! crate without interfering with each other or blocking each other.
12 //!
13 //! More concretely this is implemented via the following protocol:
14 //!
15 //! 1. For a newly started compilation session, the compiler allocates a
16 //!    new `session` directory within the incremental compilation directory.
17 //!    This session directory will have a unique name that ends with the suffix
18 //!    "-working" and that contains a creation timestamp.
19 //! 2. Next, the compiler looks for the newest finalized session directory,
20 //!    that is, a session directory from a previous compilation session that
21 //!    has been marked as valid and consistent. A session directory is
22 //!    considered finalized if the "-working" suffix in the directory name has
23 //!    been replaced by the SVH of the crate.
24 //! 3. Once the compiler has found a valid, finalized session directory, it will
25 //!    hard-link/copy its contents into the new "-working" directory. If all
26 //!    goes well, it will have its own, private copy of the source directory and
27 //!    subsequently not have to worry about synchronizing with other compiler
28 //!    processes.
29 //! 4. Now the compiler can do its normal compilation process, which involves
30 //!    reading and updating its private session directory.
31 //! 5. When compilation finishes without errors, the private session directory
32 //!    will be in a state where it can be used as input for other compilation
33 //!    sessions. That is, it will contain a dependency graph and cache artifacts
34 //!    that are consistent with the state of the source code it was compiled
35 //!    from, with no need to change them ever again. At this point, the compiler
36 //!    finalizes and "publishes" its private session directory by renaming it
37 //!    from "s-{timestamp}-{random}-working" to "s-{timestamp}-{SVH}".
38 //! 6. At this point the "old" session directory that we copied our data from
39 //!    at the beginning of the session has become obsolete because we have just
40 //!    published a more current version. Thus the compiler will delete it.
41 //!
42 //! ## Garbage Collection
43 //!
44 //! Naively following the above protocol might lead to old session directories
45 //! piling up if a compiler instance crashes for some reason before its able to
46 //! remove its private session directory. In order to avoid wasting disk space,
47 //! the compiler also does some garbage collection each time it is started in
48 //! incremental compilation mode. Specifically, it will scan the incremental
49 //! compilation directory for private session directories that are not in use
50 //! any more and will delete those. It will also delete any finalized session
51 //! directories for a given crate except for the most recent one.
52 //!
53 //! ## Synchronization
54 //!
55 //! There is some synchronization needed in order for the compiler to be able to
56 //! determine whether a given private session directory is not in used any more.
57 //! This is done by creating a lock file for each session directory and
58 //! locking it while the directory is still being used. Since file locks have
59 //! operating system support, we can rely on the lock being released if the
60 //! compiler process dies for some unexpected reason. Thus, when garbage
61 //! collecting private session directories, the collecting process can determine
62 //! whether the directory is still in use by trying to acquire a lock on the
63 //! file. If locking the file fails, the original process must still be alive.
64 //! If locking the file succeeds, we know that the owning process is not alive
65 //! any more and we can safely delete the directory.
66 //! There is still a small time window between the original process creating the
67 //! lock file and actually locking it. In order to minimize the chance that
68 //! another process tries to acquire the lock in just that instance, only
69 //! session directories that are older than a few seconds are considered for
70 //! garbage collection.
71 //!
72 //! Another case that has to be considered is what happens if one process
73 //! deletes a finalized session directory that another process is currently
74 //! trying to copy from. This case is also handled via the lock file. Before
75 //! a process starts copying a finalized session directory, it will acquire a
76 //! shared lock on the directory's lock file. Any garbage collecting process,
77 //! on the other hand, will acquire an exclusive lock on the lock file.
78 //! Thus, if a directory is being collected, any reader process will fail
79 //! acquiring the shared lock and will leave the directory alone. Conversely,
80 //! if a collecting process can't acquire the exclusive lock because the
81 //! directory is currently being read from, it will leave collecting that
82 //! directory to another process at a later point in time.
83 //! The exact same scheme is also used when reading the metadata hashes file
84 //! from an extern crate. When a crate is compiled, the hash values of its
85 //! metadata are stored in a file in its session directory. When the
86 //! compilation session of another crate imports the first crate's metadata,
87 //! it also has to read in the accompanying metadata hashes. It thus will access
88 //! the finalized session directory of all crates it links to and while doing
89 //! so, it will also place a read lock on that the respective session directory
90 //! so that it won't be deleted while the metadata hashes are loaded.
91 //!
92 //! ## Preconditions
93 //!
94 //! This system relies on two features being available in the file system in
95 //! order to work really well: file locking and hard linking.
96 //! If hard linking is not available (like on FAT) the data in the cache
97 //! actually has to be copied at the beginning of each session.
98 //! If file locking does not work reliably (like on NFS), some of the
99 //! synchronization will go haywire.
100 //! In both cases we recommend to locate the incremental compilation directory
101 //! on a file system that supports these things.
102 //! It might be a good idea though to try and detect whether we are on an
103 //! unsupported file system and emit a warning in that case. This is not yet
104 //! implemented.
105
106 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
107 use rustc_data_structures::svh::Svh;
108 use rustc_data_structures::{base_n, flock};
109 use rustc_errors::ErrorReported;
110 use rustc_fs_util::{link_or_copy, LinkOrCopy};
111 use rustc_session::{Session, StableCrateId};
112
113 use std::fs as std_fs;
114 use std::io;
115 use std::mem;
116 use std::path::{Path, PathBuf};
117 use std::time::{Duration, SystemTime, UNIX_EPOCH};
118
119 use rand::{thread_rng, RngCore};
120
121 #[cfg(test)]
122 mod tests;
123
124 const LOCK_FILE_EXT: &str = ".lock";
125 const DEP_GRAPH_FILENAME: &str = "dep-graph.bin";
126 const STAGING_DEP_GRAPH_FILENAME: &str = "dep-graph.part.bin";
127 const WORK_PRODUCTS_FILENAME: &str = "work-products.bin";
128 const QUERY_CACHE_FILENAME: &str = "query-cache.bin";
129
130 // We encode integers using the following base, so they are shorter than decimal
131 // or hexadecimal numbers (we want short file and directory names). Since these
132 // numbers will be used in file names, we choose an encoding that is not
133 // case-sensitive (as opposed to base64, for example).
134 const INT_ENCODE_BASE: usize = base_n::CASE_INSENSITIVE;
135
136 pub fn dep_graph_path(sess: &Session) -> PathBuf {
137     in_incr_comp_dir_sess(sess, DEP_GRAPH_FILENAME)
138 }
139 pub fn staging_dep_graph_path(sess: &Session) -> PathBuf {
140     in_incr_comp_dir_sess(sess, STAGING_DEP_GRAPH_FILENAME)
141 }
142 pub fn dep_graph_path_from(incr_comp_session_dir: &Path) -> PathBuf {
143     in_incr_comp_dir(incr_comp_session_dir, DEP_GRAPH_FILENAME)
144 }
145
146 pub fn work_products_path(sess: &Session) -> PathBuf {
147     in_incr_comp_dir_sess(sess, WORK_PRODUCTS_FILENAME)
148 }
149
150 pub fn query_cache_path(sess: &Session) -> PathBuf {
151     in_incr_comp_dir_sess(sess, QUERY_CACHE_FILENAME)
152 }
153
154 pub fn lock_file_path(session_dir: &Path) -> PathBuf {
155     let crate_dir = session_dir.parent().unwrap();
156
157     let directory_name = session_dir.file_name().unwrap().to_string_lossy();
158     assert_no_characters_lost(&directory_name);
159
160     let dash_indices: Vec<_> = directory_name.match_indices('-').map(|(idx, _)| idx).collect();
161     if dash_indices.len() != 3 {
162         bug!(
163             "Encountered incremental compilation session directory with \
164               malformed name: {}",
165             session_dir.display()
166         )
167     }
168
169     crate_dir.join(&directory_name[0..dash_indices[2]]).with_extension(&LOCK_FILE_EXT[1..])
170 }
171
172 pub fn in_incr_comp_dir_sess(sess: &Session, file_name: &str) -> PathBuf {
173     in_incr_comp_dir(&sess.incr_comp_session_dir(), file_name)
174 }
175
176 pub fn in_incr_comp_dir(incr_comp_session_dir: &Path, file_name: &str) -> PathBuf {
177     incr_comp_session_dir.join(file_name)
178 }
179
180 /// Allocates the private session directory. The boolean in the Ok() result
181 /// indicates whether we should try loading a dep graph from the successfully
182 /// initialized directory, or not.
183 /// The post-condition of this fn is that we have a valid incremental
184 /// compilation session directory, if the result is `Ok`. A valid session
185 /// directory is one that contains a locked lock file. It may or may not contain
186 /// a dep-graph and work products from a previous session.
187 /// If the call fails, the fn may leave behind an invalid session directory.
188 /// The garbage collection will take care of it.
189 pub fn prepare_session_directory(
190     sess: &Session,
191     crate_name: &str,
192     stable_crate_id: StableCrateId,
193 ) -> Result<(), ErrorReported> {
194     if sess.opts.incremental.is_none() {
195         return Ok(());
196     }
197
198     let _timer = sess.timer("incr_comp_prepare_session_directory");
199
200     debug!("prepare_session_directory");
201
202     // {incr-comp-dir}/{crate-name-and-disambiguator}
203     let crate_dir = crate_path(sess, crate_name, stable_crate_id);
204     debug!("crate-dir: {}", crate_dir.display());
205     create_dir(sess, &crate_dir, "crate")?;
206
207     // Hack: canonicalize the path *after creating the directory*
208     // because, on windows, long paths can cause problems;
209     // canonicalization inserts this weird prefix that makes windows
210     // tolerate long paths.
211     let crate_dir = match crate_dir.canonicalize() {
212         Ok(v) => v,
213         Err(err) => {
214             sess.err(&format!(
215                 "incremental compilation: error canonicalizing path `{}`: {}",
216                 crate_dir.display(),
217                 err
218             ));
219             return Err(ErrorReported);
220         }
221     };
222
223     let mut source_directories_already_tried = FxHashSet::default();
224
225     loop {
226         // Generate a session directory of the form:
227         //
228         // {incr-comp-dir}/{crate-name-and-disambiguator}/s-{timestamp}-{random}-working
229         let session_dir = generate_session_dir_path(&crate_dir);
230         debug!("session-dir: {}", session_dir.display());
231
232         // Lock the new session directory. If this fails, return an
233         // error without retrying
234         let (directory_lock, lock_file_path) = lock_directory(sess, &session_dir)?;
235
236         // Now that we have the lock, we can actually create the session
237         // directory
238         create_dir(sess, &session_dir, "session")?;
239
240         // Find a suitable source directory to copy from. Ignore those that we
241         // have already tried before.
242         let source_directory = find_source_directory(&crate_dir, &source_directories_already_tried);
243
244         let Some(source_directory) = source_directory else {
245             // There's nowhere to copy from, we're done
246             debug!(
247                 "no source directory found. Continuing with empty session \
248                     directory."
249             );
250
251             sess.init_incr_comp_session(session_dir, directory_lock, false);
252             return Ok(());
253         };
254
255         debug!("attempting to copy data from source: {}", source_directory.display());
256
257         // Try copying over all files from the source directory
258         if let Ok(allows_links) = copy_files(sess, &session_dir, &source_directory) {
259             debug!("successfully copied data from: {}", source_directory.display());
260
261             if !allows_links {
262                 sess.warn(&format!(
263                     "Hard linking files in the incremental \
264                                         compilation cache failed. Copying files \
265                                         instead. Consider moving the cache \
266                                         directory to a file system which supports \
267                                         hard linking in session dir `{}`",
268                     session_dir.display()
269                 ));
270             }
271
272             sess.init_incr_comp_session(session_dir, directory_lock, true);
273             return Ok(());
274         } else {
275             debug!("copying failed - trying next directory");
276
277             // Something went wrong while trying to copy/link files from the
278             // source directory. Try again with a different one.
279             source_directories_already_tried.insert(source_directory);
280
281             // Try to remove the session directory we just allocated. We don't
282             // know if there's any garbage in it from the failed copy action.
283             if let Err(err) = safe_remove_dir_all(&session_dir) {
284                 sess.warn(&format!(
285                     "Failed to delete partly initialized \
286                                     session dir `{}`: {}",
287                     session_dir.display(),
288                     err
289                 ));
290             }
291
292             delete_session_dir_lock_file(sess, &lock_file_path);
293             mem::drop(directory_lock);
294         }
295     }
296 }
297
298 /// This function finalizes and thus 'publishes' the session directory by
299 /// renaming it to `s-{timestamp}-{svh}` and releasing the file lock.
300 /// If there have been compilation errors, however, this function will just
301 /// delete the presumably invalid session directory.
302 pub fn finalize_session_directory(sess: &Session, svh: Svh) {
303     if sess.opts.incremental.is_none() {
304         return;
305     }
306
307     let _timer = sess.timer("incr_comp_finalize_session_directory");
308
309     let incr_comp_session_dir: PathBuf = sess.incr_comp_session_dir().clone();
310
311     if sess.has_errors_or_delayed_span_bugs() {
312         // If there have been any errors during compilation, we don't want to
313         // publish this session directory. Rather, we'll just delete it.
314
315         debug!(
316             "finalize_session_directory() - invalidating session directory: {}",
317             incr_comp_session_dir.display()
318         );
319
320         if let Err(err) = safe_remove_dir_all(&*incr_comp_session_dir) {
321             sess.warn(&format!(
322                 "Error deleting incremental compilation \
323                                 session directory `{}`: {}",
324                 incr_comp_session_dir.display(),
325                 err
326             ));
327         }
328
329         let lock_file_path = lock_file_path(&*incr_comp_session_dir);
330         delete_session_dir_lock_file(sess, &lock_file_path);
331         sess.mark_incr_comp_session_as_invalid();
332     }
333
334     debug!("finalize_session_directory() - session directory: {}", incr_comp_session_dir.display());
335
336     let old_sub_dir_name = incr_comp_session_dir.file_name().unwrap().to_string_lossy();
337     assert_no_characters_lost(&old_sub_dir_name);
338
339     // Keep the 's-{timestamp}-{random-number}' prefix, but replace the
340     // '-working' part with the SVH of the crate
341     let dash_indices: Vec<_> = old_sub_dir_name.match_indices('-').map(|(idx, _)| idx).collect();
342     if dash_indices.len() != 3 {
343         bug!(
344             "Encountered incremental compilation session directory with \
345               malformed name: {}",
346             incr_comp_session_dir.display()
347         )
348     }
349
350     // State: "s-{timestamp}-{random-number}-"
351     let mut new_sub_dir_name = String::from(&old_sub_dir_name[..=dash_indices[2]]);
352
353     // Append the svh
354     base_n::push_str(svh.as_u64() as u128, INT_ENCODE_BASE, &mut new_sub_dir_name);
355
356     // Create the full path
357     let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name);
358     debug!("finalize_session_directory() - new path: {}", new_path.display());
359
360     match std_fs::rename(&*incr_comp_session_dir, &new_path) {
361         Ok(_) => {
362             debug!("finalize_session_directory() - directory renamed successfully");
363
364             // This unlocks the directory
365             sess.finalize_incr_comp_session(new_path);
366         }
367         Err(e) => {
368             // Warn about the error. However, no need to abort compilation now.
369             sess.warn(&format!(
370                 "Error finalizing incremental compilation \
371                                session directory `{}`: {}",
372                 incr_comp_session_dir.display(),
373                 e
374             ));
375
376             debug!("finalize_session_directory() - error, marking as invalid");
377             // Drop the file lock, so we can garage collect
378             sess.mark_incr_comp_session_as_invalid();
379         }
380     }
381
382     let _ = garbage_collect_session_directories(sess);
383 }
384
385 pub fn delete_all_session_dir_contents(sess: &Session) -> io::Result<()> {
386     let sess_dir_iterator = sess.incr_comp_session_dir().read_dir()?;
387     for entry in sess_dir_iterator {
388         let entry = entry?;
389         safe_remove_file(&entry.path())?
390     }
391     Ok(())
392 }
393
394 fn copy_files(sess: &Session, target_dir: &Path, source_dir: &Path) -> Result<bool, ()> {
395     // We acquire a shared lock on the lock file of the directory, so that
396     // nobody deletes it out from under us while we are reading from it.
397     let lock_file_path = lock_file_path(source_dir);
398
399     // not exclusive
400     let Ok(_lock) = flock::Lock::new(
401         &lock_file_path,
402         false, // don't wait,
403         false, // don't create
404         false,
405     ) else {
406         // Could not acquire the lock, don't try to copy from here
407         return Err(());
408     };
409
410     let source_dir_iterator = match source_dir.read_dir() {
411         Ok(it) => it,
412         Err(_) => return Err(()),
413     };
414
415     let mut files_linked = 0;
416     let mut files_copied = 0;
417
418     for entry in source_dir_iterator {
419         match entry {
420             Ok(entry) => {
421                 let file_name = entry.file_name();
422
423                 let target_file_path = target_dir.join(file_name);
424                 let source_path = entry.path();
425
426                 debug!("copying into session dir: {}", source_path.display());
427                 match link_or_copy(source_path, target_file_path) {
428                     Ok(LinkOrCopy::Link) => files_linked += 1,
429                     Ok(LinkOrCopy::Copy) => files_copied += 1,
430                     Err(_) => return Err(()),
431                 }
432             }
433             Err(_) => return Err(()),
434         }
435     }
436
437     if sess.opts.debugging_opts.incremental_info {
438         eprintln!(
439             "[incremental] session directory: \
440                   {} files hard-linked",
441             files_linked
442         );
443         eprintln!(
444             "[incremental] session directory: \
445                  {} files copied",
446             files_copied
447         );
448     }
449
450     Ok(files_linked > 0 || files_copied == 0)
451 }
452
453 /// Generates unique directory path of the form:
454 /// {crate_dir}/s-{timestamp}-{random-number}-working
455 fn generate_session_dir_path(crate_dir: &Path) -> PathBuf {
456     let timestamp = timestamp_to_string(SystemTime::now());
457     debug!("generate_session_dir_path: timestamp = {}", timestamp);
458     let random_number = thread_rng().next_u32();
459     debug!("generate_session_dir_path: random_number = {}", random_number);
460
461     let directory_name = format!(
462         "s-{}-{}-working",
463         timestamp,
464         base_n::encode(random_number as u128, INT_ENCODE_BASE)
465     );
466     debug!("generate_session_dir_path: directory_name = {}", directory_name);
467     let directory_path = crate_dir.join(directory_name);
468     debug!("generate_session_dir_path: directory_path = {}", directory_path.display());
469     directory_path
470 }
471
472 fn create_dir(sess: &Session, path: &Path, dir_tag: &str) -> Result<(), ErrorReported> {
473     match std_fs::create_dir_all(path) {
474         Ok(()) => {
475             debug!("{} directory created successfully", dir_tag);
476             Ok(())
477         }
478         Err(err) => {
479             sess.err(&format!(
480                 "Could not create incremental compilation {} \
481                                directory `{}`: {}",
482                 dir_tag,
483                 path.display(),
484                 err
485             ));
486             Err(ErrorReported)
487         }
488     }
489 }
490
491 /// Allocate the lock-file and lock it.
492 fn lock_directory(
493     sess: &Session,
494     session_dir: &Path,
495 ) -> Result<(flock::Lock, PathBuf), ErrorReported> {
496     let lock_file_path = lock_file_path(session_dir);
497     debug!("lock_directory() - lock_file: {}", lock_file_path.display());
498
499     match flock::Lock::new(
500         &lock_file_path,
501         false, // don't wait
502         true,  // create the lock file
503         true,
504     ) {
505         // the lock should be exclusive
506         Ok(lock) => Ok((lock, lock_file_path)),
507         Err(lock_err) => {
508             let mut err = sess.struct_err(&format!(
509                 "incremental compilation: could not create \
510                  session directory lock file: {}",
511                 lock_err
512             ));
513             if flock::Lock::error_unsupported(&lock_err) {
514                 err.note(&format!(
515                     "the filesystem for the incremental path at {} \
516                      does not appear to support locking, consider changing the \
517                      incremental path to a filesystem that supports locking \
518                      or disable incremental compilation",
519                     session_dir.display()
520                 ));
521                 if std::env::var_os("CARGO").is_some() {
522                     err.help(
523                         "incremental compilation can be disabled by setting the \
524                          environment variable CARGO_INCREMENTAL=0 (see \
525                          https://doc.rust-lang.org/cargo/reference/profiles.html#incremental)",
526                     );
527                     err.help(
528                         "the entire build directory can be changed to a different \
529                         filesystem by setting the environment variable CARGO_TARGET_DIR \
530                         to a different path (see \
531                         https://doc.rust-lang.org/cargo/reference/config.html#buildtarget-dir)",
532                     );
533                 }
534             }
535             err.emit();
536             Err(ErrorReported)
537         }
538     }
539 }
540
541 fn delete_session_dir_lock_file(sess: &Session, lock_file_path: &Path) {
542     if let Err(err) = safe_remove_file(&lock_file_path) {
543         sess.warn(&format!(
544             "Error deleting lock file for incremental \
545                             compilation session directory `{}`: {}",
546             lock_file_path.display(),
547             err
548         ));
549     }
550 }
551
552 /// Finds the most recent published session directory that is not in the
553 /// ignore-list.
554 fn find_source_directory(
555     crate_dir: &Path,
556     source_directories_already_tried: &FxHashSet<PathBuf>,
557 ) -> Option<PathBuf> {
558     let iter = crate_dir
559         .read_dir()
560         .unwrap() // FIXME
561         .filter_map(|e| e.ok().map(|e| e.path()));
562
563     find_source_directory_in_iter(iter, source_directories_already_tried)
564 }
565
566 fn find_source_directory_in_iter<I>(
567     iter: I,
568     source_directories_already_tried: &FxHashSet<PathBuf>,
569 ) -> Option<PathBuf>
570 where
571     I: Iterator<Item = PathBuf>,
572 {
573     let mut best_candidate = (UNIX_EPOCH, None);
574
575     for session_dir in iter {
576         debug!("find_source_directory_in_iter - inspecting `{}`", session_dir.display());
577
578         let directory_name = session_dir.file_name().unwrap().to_string_lossy();
579         assert_no_characters_lost(&directory_name);
580
581         if source_directories_already_tried.contains(&session_dir)
582             || !is_session_directory(&directory_name)
583             || !is_finalized(&directory_name)
584         {
585             debug!("find_source_directory_in_iter - ignoring");
586             continue;
587         }
588
589         let timestamp = extract_timestamp_from_session_dir(&directory_name).unwrap_or_else(|_| {
590             bug!("unexpected incr-comp session dir: {}", session_dir.display())
591         });
592
593         if timestamp > best_candidate.0 {
594             best_candidate = (timestamp, Some(session_dir.clone()));
595         }
596     }
597
598     best_candidate.1
599 }
600
601 fn is_finalized(directory_name: &str) -> bool {
602     !directory_name.ends_with("-working")
603 }
604
605 fn is_session_directory(directory_name: &str) -> bool {
606     directory_name.starts_with("s-") && !directory_name.ends_with(LOCK_FILE_EXT)
607 }
608
609 fn is_session_directory_lock_file(file_name: &str) -> bool {
610     file_name.starts_with("s-") && file_name.ends_with(LOCK_FILE_EXT)
611 }
612
613 fn extract_timestamp_from_session_dir(directory_name: &str) -> Result<SystemTime, ()> {
614     if !is_session_directory(directory_name) {
615         return Err(());
616     }
617
618     let dash_indices: Vec<_> = directory_name.match_indices('-').map(|(idx, _)| idx).collect();
619     if dash_indices.len() != 3 {
620         return Err(());
621     }
622
623     string_to_timestamp(&directory_name[dash_indices[0] + 1..dash_indices[1]])
624 }
625
626 fn timestamp_to_string(timestamp: SystemTime) -> String {
627     let duration = timestamp.duration_since(UNIX_EPOCH).unwrap();
628     let micros = duration.as_secs() * 1_000_000 + (duration.subsec_nanos() as u64) / 1000;
629     base_n::encode(micros as u128, INT_ENCODE_BASE)
630 }
631
632 fn string_to_timestamp(s: &str) -> Result<SystemTime, ()> {
633     let micros_since_unix_epoch = u64::from_str_radix(s, INT_ENCODE_BASE as u32);
634
635     if micros_since_unix_epoch.is_err() {
636         return Err(());
637     }
638
639     let micros_since_unix_epoch = micros_since_unix_epoch.unwrap();
640
641     let duration = Duration::new(
642         micros_since_unix_epoch / 1_000_000,
643         1000 * (micros_since_unix_epoch % 1_000_000) as u32,
644     );
645     Ok(UNIX_EPOCH + duration)
646 }
647
648 fn crate_path(sess: &Session, crate_name: &str, stable_crate_id: StableCrateId) -> PathBuf {
649     let incr_dir = sess.opts.incremental.as_ref().unwrap().clone();
650
651     let stable_crate_id = base_n::encode(stable_crate_id.to_u64() as u128, INT_ENCODE_BASE);
652
653     let crate_name = format!("{}-{}", crate_name, stable_crate_id);
654     incr_dir.join(crate_name)
655 }
656
657 fn assert_no_characters_lost(s: &str) {
658     if s.contains('\u{FFFD}') {
659         bug!("Could not losslessly convert '{}'.", s)
660     }
661 }
662
663 fn is_old_enough_to_be_collected(timestamp: SystemTime) -> bool {
664     timestamp < SystemTime::now() - Duration::from_secs(10)
665 }
666
667 pub fn garbage_collect_session_directories(sess: &Session) -> io::Result<()> {
668     debug!("garbage_collect_session_directories() - begin");
669
670     let session_directory = sess.incr_comp_session_dir();
671     debug!(
672         "garbage_collect_session_directories() - session directory: {}",
673         session_directory.display()
674     );
675
676     let crate_directory = session_directory.parent().unwrap();
677     debug!(
678         "garbage_collect_session_directories() - crate directory: {}",
679         crate_directory.display()
680     );
681
682     // First do a pass over the crate directory, collecting lock files and
683     // session directories
684     let mut session_directories = FxHashSet::default();
685     let mut lock_files = FxHashSet::default();
686
687     for dir_entry in crate_directory.read_dir()? {
688         let dir_entry = match dir_entry {
689             Ok(dir_entry) => dir_entry,
690             _ => {
691                 // Ignore any errors
692                 continue;
693             }
694         };
695
696         let entry_name = dir_entry.file_name();
697         let entry_name = entry_name.to_string_lossy();
698
699         if is_session_directory_lock_file(&entry_name) {
700             assert_no_characters_lost(&entry_name);
701             lock_files.insert(entry_name.into_owned());
702         } else if is_session_directory(&entry_name) {
703             assert_no_characters_lost(&entry_name);
704             session_directories.insert(entry_name.into_owned());
705         } else {
706             // This is something we don't know, leave it alone
707         }
708     }
709
710     // Now map from lock files to session directories
711     let lock_file_to_session_dir: FxHashMap<String, Option<String>> = lock_files
712         .into_iter()
713         .map(|lock_file_name| {
714             assert!(lock_file_name.ends_with(LOCK_FILE_EXT));
715             let dir_prefix_end = lock_file_name.len() - LOCK_FILE_EXT.len();
716             let session_dir = {
717                 let dir_prefix = &lock_file_name[0..dir_prefix_end];
718                 session_directories.iter().find(|dir_name| dir_name.starts_with(dir_prefix))
719             };
720             (lock_file_name, session_dir.map(String::clone))
721         })
722         .collect();
723
724     // Delete all lock files, that don't have an associated directory. They must
725     // be some kind of leftover
726     for (lock_file_name, directory_name) in &lock_file_to_session_dir {
727         if directory_name.is_none() {
728             let timestamp = match extract_timestamp_from_session_dir(lock_file_name) {
729                 Ok(timestamp) => timestamp,
730                 Err(()) => {
731                     debug!(
732                         "found lock-file with malformed timestamp: {}",
733                         crate_directory.join(&lock_file_name).display()
734                     );
735                     // Ignore it
736                     continue;
737                 }
738             };
739
740             let lock_file_path = crate_directory.join(&**lock_file_name);
741
742             if is_old_enough_to_be_collected(timestamp) {
743                 debug!(
744                     "garbage_collect_session_directories() - deleting \
745                         garbage lock file: {}",
746                     lock_file_path.display()
747                 );
748                 delete_session_dir_lock_file(sess, &lock_file_path);
749             } else {
750                 debug!(
751                     "garbage_collect_session_directories() - lock file with \
752                         no session dir not old enough to be collected: {}",
753                     lock_file_path.display()
754                 );
755             }
756         }
757     }
758
759     // Filter out `None` directories
760     let lock_file_to_session_dir: FxHashMap<String, String> = lock_file_to_session_dir
761         .into_iter()
762         .filter_map(|(lock_file_name, directory_name)| directory_name.map(|n| (lock_file_name, n)))
763         .collect();
764
765     // Delete all session directories that don't have a lock file.
766     for directory_name in session_directories {
767         if !lock_file_to_session_dir.values().any(|dir| *dir == directory_name) {
768             let path = crate_directory.join(directory_name);
769             if let Err(err) = safe_remove_dir_all(&path) {
770                 sess.warn(&format!(
771                     "Failed to garbage collect invalid incremental \
772                                     compilation session directory `{}`: {}",
773                     path.display(),
774                     err
775                 ));
776             }
777         }
778     }
779
780     // Now garbage collect the valid session directories.
781     let mut deletion_candidates = vec![];
782
783     for (lock_file_name, directory_name) in &lock_file_to_session_dir {
784         debug!("garbage_collect_session_directories() - inspecting: {}", directory_name);
785
786         let timestamp = match extract_timestamp_from_session_dir(directory_name) {
787             Ok(timestamp) => timestamp,
788             Err(()) => {
789                 debug!(
790                     "found session-dir with malformed timestamp: {}",
791                     crate_directory.join(directory_name).display()
792                 );
793                 // Ignore it
794                 continue;
795             }
796         };
797
798         if is_finalized(directory_name) {
799             let lock_file_path = crate_directory.join(lock_file_name);
800             match flock::Lock::new(
801                 &lock_file_path,
802                 false, // don't wait
803                 false, // don't create the lock-file
804                 true,
805             ) {
806                 // get an exclusive lock
807                 Ok(lock) => {
808                     debug!(
809                         "garbage_collect_session_directories() - \
810                             successfully acquired lock"
811                     );
812                     debug!(
813                         "garbage_collect_session_directories() - adding \
814                             deletion candidate: {}",
815                         directory_name
816                     );
817
818                     // Note that we are holding on to the lock
819                     deletion_candidates.push((
820                         timestamp,
821                         crate_directory.join(directory_name),
822                         Some(lock),
823                     ));
824                 }
825                 Err(_) => {
826                     debug!(
827                         "garbage_collect_session_directories() - \
828                             not collecting, still in use"
829                     );
830                 }
831             }
832         } else if is_old_enough_to_be_collected(timestamp) {
833             // When cleaning out "-working" session directories, i.e.
834             // session directories that might still be in use by another
835             // compiler instance, we only look a directories that are
836             // at least ten seconds old. This is supposed to reduce the
837             // chance of deleting a directory in the time window where
838             // the process has allocated the directory but has not yet
839             // acquired the file-lock on it.
840
841             // Try to acquire the directory lock. If we can't, it
842             // means that the owning process is still alive and we
843             // leave this directory alone.
844             let lock_file_path = crate_directory.join(lock_file_name);
845             match flock::Lock::new(
846                 &lock_file_path,
847                 false, // don't wait
848                 false, // don't create the lock-file
849                 true,
850             ) {
851                 // get an exclusive lock
852                 Ok(lock) => {
853                     debug!(
854                         "garbage_collect_session_directories() - \
855                             successfully acquired lock"
856                     );
857
858                     delete_old(sess, &crate_directory.join(directory_name));
859
860                     // Let's make it explicit that the file lock is released at this point,
861                     // or rather, that we held on to it until here
862                     mem::drop(lock);
863                 }
864                 Err(_) => {
865                     debug!(
866                         "garbage_collect_session_directories() - \
867                             not collecting, still in use"
868                     );
869                 }
870             }
871         } else {
872             debug!(
873                 "garbage_collect_session_directories() - not finalized, not \
874                     old enough"
875             );
876         }
877     }
878
879     // Delete all but the most recent of the candidates
880     for (path, lock) in all_except_most_recent(deletion_candidates) {
881         debug!("garbage_collect_session_directories() - deleting `{}`", path.display());
882
883         if let Err(err) = safe_remove_dir_all(&path) {
884             sess.warn(&format!(
885                 "Failed to garbage collect finalized incremental \
886                                 compilation session directory `{}`: {}",
887                 path.display(),
888                 err
889             ));
890         } else {
891             delete_session_dir_lock_file(sess, &lock_file_path(&path));
892         }
893
894         // Let's make it explicit that the file lock is released at this point,
895         // or rather, that we held on to it until here
896         mem::drop(lock);
897     }
898
899     Ok(())
900 }
901
902 fn delete_old(sess: &Session, path: &Path) {
903     debug!("garbage_collect_session_directories() - deleting `{}`", path.display());
904
905     if let Err(err) = safe_remove_dir_all(&path) {
906         sess.warn(&format!(
907             "Failed to garbage collect incremental compilation session directory `{}`: {}",
908             path.display(),
909             err
910         ));
911     } else {
912         delete_session_dir_lock_file(sess, &lock_file_path(&path));
913     }
914 }
915
916 fn all_except_most_recent(
917     deletion_candidates: Vec<(SystemTime, PathBuf, Option<flock::Lock>)>,
918 ) -> FxHashMap<PathBuf, Option<flock::Lock>> {
919     let most_recent = deletion_candidates.iter().map(|&(timestamp, ..)| timestamp).max();
920
921     if let Some(most_recent) = most_recent {
922         deletion_candidates
923             .into_iter()
924             .filter(|&(timestamp, ..)| timestamp != most_recent)
925             .map(|(_, path, lock)| (path, lock))
926             .collect()
927     } else {
928         FxHashMap::default()
929     }
930 }
931
932 /// Since paths of artifacts within session directories can get quite long, we
933 /// need to support deleting files with very long paths. The regular
934 /// WinApi functions only support paths up to 260 characters, however. In order
935 /// to circumvent this limitation, we canonicalize the path of the directory
936 /// before passing it to std::fs::remove_dir_all(). This will convert the path
937 /// into the '\\?\' format, which supports much longer paths.
938 fn safe_remove_dir_all(p: &Path) -> io::Result<()> {
939     let canonicalized = match std_fs::canonicalize(p) {
940         Ok(canonicalized) => canonicalized,
941         Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(()),
942         Err(err) => return Err(err),
943     };
944
945     std_fs::remove_dir_all(canonicalized)
946 }
947
948 fn safe_remove_file(p: &Path) -> io::Result<()> {
949     let canonicalized = match std_fs::canonicalize(p) {
950         Ok(canonicalized) => canonicalized,
951         Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(()),
952         Err(err) => return Err(err),
953     };
954
955     match std_fs::remove_file(canonicalized) {
956         Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(()),
957         result => result,
958     }
959 }