]> git.lizzy.rs Git - rust.git/blob - src/librustc_incremental/persist/fs.rs
Add riscv64gc-unknown-none-elf target
[rust.git] / src / librustc_incremental / persist / fs.rs
1 //! This module manages how the incremental compilation cache is represented in
2 //! the file system.
3 //!
4 //! Incremental compilation caches are managed according to a copy-on-write
5 //! strategy: Once a complete, consistent cache version is finalized, it is
6 //! never modified. Instead, when a subsequent compilation session is started,
7 //! the compiler will allocate a new version of the cache that starts out as
8 //! a copy of the previous version. Then only this new copy is modified and it
9 //! will not be visible to other processes until it is finalized. This ensures
10 //! that multiple compiler processes can be executed concurrently for the same
11 //! crate without interfering with each other or blocking each other.
12 //!
13 //! More concretely this is implemented via the following protocol:
14 //!
15 //! 1. For a newly started compilation session, the compiler allocates a
16 //!    new `session` directory within the incremental compilation directory.
17 //!    This session directory will have a unique name that ends with the suffix
18 //!    "-working" and that contains a creation timestamp.
19 //! 2. Next, the compiler looks for the newest finalized session directory,
20 //!    that is, a session directory from a previous compilation session that
21 //!    has been marked as valid and consistent. A session directory is
22 //!    considered finalized if the "-working" suffix in the directory name has
23 //!    been replaced by the SVH of the crate.
24 //! 3. Once the compiler has found a valid, finalized session directory, it will
25 //!    hard-link/copy its contents into the new "-working" directory. If all
26 //!    goes well, it will have its own, private copy of the source directory and
27 //!    subsequently not have to worry about synchronizing with other compiler
28 //!    processes.
29 //! 4. Now the compiler can do its normal compilation process, which involves
30 //!    reading and updating its private session directory.
31 //! 5. When compilation finishes without errors, the private session directory
32 //!    will be in a state where it can be used as input for other compilation
33 //!    sessions. That is, it will contain a dependency graph and cache artifacts
34 //!    that are consistent with the state of the source code it was compiled
35 //!    from, with no need to change them ever again. At this point, the compiler
36 //!    finalizes and "publishes" its private session directory by renaming it
37 //!    from "s-{timestamp}-{random}-working" to "s-{timestamp}-{SVH}".
38 //! 6. At this point the "old" session directory that we copied our data from
39 //!    at the beginning of the session has become obsolete because we have just
40 //!    published a more current version. Thus the compiler will delete it.
41 //!
42 //! ## Garbage Collection
43 //!
44 //! Naively following the above protocol might lead to old session directories
45 //! piling up if a compiler instance crashes for some reason before its able to
46 //! remove its private session directory. In order to avoid wasting disk space,
47 //! the compiler also does some garbage collection each time it is started in
48 //! incremental compilation mode. Specifically, it will scan the incremental
49 //! compilation directory for private session directories that are not in use
50 //! any more and will delete those. It will also delete any finalized session
51 //! directories for a given crate except for the most recent one.
52 //!
53 //! ## Synchronization
54 //!
55 //! There is some synchronization needed in order for the compiler to be able to
56 //! determine whether a given private session directory is not in used any more.
57 //! This is done by creating a lock file for each session directory and
58 //! locking it while the directory is still being used. Since file locks have
59 //! operating system support, we can rely on the lock being released if the
60 //! compiler process dies for some unexpected reason. Thus, when garbage
61 //! collecting private session directories, the collecting process can determine
62 //! whether the directory is still in use by trying to acquire a lock on the
63 //! file. If locking the file fails, the original process must still be alive.
64 //! If locking the file succeeds, we know that the owning process is not alive
65 //! any more and we can safely delete the directory.
66 //! There is still a small time window between the original process creating the
67 //! lock file and actually locking it. In order to minimize the chance that
68 //! another process tries to acquire the lock in just that instance, only
69 //! session directories that are older than a few seconds are considered for
70 //! garbage collection.
71 //!
72 //! Another case that has to be considered is what happens if one process
73 //! deletes a finalized session directory that another process is currently
74 //! trying to copy from. This case is also handled via the lock file. Before
75 //! a process starts copying a finalized session directory, it will acquire a
76 //! shared lock on the directory's lock file. Any garbage collecting process,
77 //! on the other hand, will acquire an exclusive lock on the lock file.
78 //! Thus, if a directory is being collected, any reader process will fail
79 //! acquiring the shared lock and will leave the directory alone. Conversely,
80 //! if a collecting process can't acquire the exclusive lock because the
81 //! directory is currently being read from, it will leave collecting that
82 //! directory to another process at a later point in time.
83 //! The exact same scheme is also used when reading the metadata hashes file
84 //! from an extern crate. When a crate is compiled, the hash values of its
85 //! metadata are stored in a file in its session directory. When the
86 //! compilation session of another crate imports the first crate's metadata,
87 //! it also has to read in the accompanying metadata hashes. It thus will access
88 //! the finalized session directory of all crates it links to and while doing
89 //! so, it will also place a read lock on that the respective session directory
90 //! so that it won't be deleted while the metadata hashes are loaded.
91 //!
92 //! ## Preconditions
93 //!
94 //! This system relies on two features being available in the file system in
95 //! order to work really well: file locking and hard linking.
96 //! If hard linking is not available (like on FAT) the data in the cache
97 //! actually has to be copied at the beginning of each session.
98 //! If file locking does not work reliably (like on NFS), some of the
99 //! synchronization will go haywire.
100 //! In both cases we recommend to locate the incremental compilation directory
101 //! on a file system that supports these things.
102 //! It might be a good idea though to try and detect whether we are on an
103 //! unsupported file system and emit a warning in that case. This is not yet
104 //! implemented.
105
106 use rustc::session::{Session, CrateDisambiguator};
107 use rustc_fs_util::{link_or_copy, LinkOrCopy};
108 use rustc_data_structures::{flock, base_n};
109 use rustc_data_structures::fx::{FxHashSet, FxHashMap};
110 use rustc_data_structures::svh::Svh;
111
112 use std::fs as std_fs;
113 use std::io;
114 use std::mem;
115 use std::path::{Path, PathBuf};
116 use std::time::{UNIX_EPOCH, SystemTime, Duration};
117
118 use rand::{RngCore, thread_rng};
119
120 const LOCK_FILE_EXT: &str = ".lock";
121 const DEP_GRAPH_FILENAME: &str = "dep-graph.bin";
122 const WORK_PRODUCTS_FILENAME: &str = "work-products.bin";
123 const QUERY_CACHE_FILENAME: &str = "query-cache.bin";
124
125 // We encode integers using the following base, so they are shorter than decimal
126 // or hexadecimal numbers (we want short file and directory names). Since these
127 // numbers will be used in file names, we choose an encoding that is not
128 // case-sensitive (as opposed to base64, for example).
129 const INT_ENCODE_BASE: usize = base_n::CASE_INSENSITIVE;
130
131 pub fn dep_graph_path(sess: &Session) -> PathBuf {
132     in_incr_comp_dir_sess(sess, DEP_GRAPH_FILENAME)
133 }
134 pub fn dep_graph_path_from(incr_comp_session_dir: &Path) -> PathBuf {
135     in_incr_comp_dir(incr_comp_session_dir, DEP_GRAPH_FILENAME)
136 }
137
138 pub fn work_products_path(sess: &Session) -> PathBuf {
139     in_incr_comp_dir_sess(sess, WORK_PRODUCTS_FILENAME)
140 }
141
142 pub fn query_cache_path(sess: &Session) -> PathBuf {
143     in_incr_comp_dir_sess(sess, QUERY_CACHE_FILENAME)
144 }
145
146 pub fn lock_file_path(session_dir: &Path) -> PathBuf {
147     let crate_dir = session_dir.parent().unwrap();
148
149     let directory_name = session_dir.file_name().unwrap().to_string_lossy();
150     assert_no_characters_lost(&directory_name);
151
152     let dash_indices: Vec<_> = directory_name.match_indices("-")
153                                              .map(|(idx, _)| idx)
154                                              .collect();
155     if dash_indices.len() != 3 {
156         bug!("Encountered incremental compilation session directory with \
157               malformed name: {}",
158              session_dir.display())
159     }
160
161     crate_dir.join(&directory_name[0 .. dash_indices[2]])
162              .with_extension(&LOCK_FILE_EXT[1..])
163 }
164
165 pub fn in_incr_comp_dir_sess(sess: &Session, file_name: &str) -> PathBuf {
166     in_incr_comp_dir(&sess.incr_comp_session_dir(), file_name)
167 }
168
169 pub fn in_incr_comp_dir(incr_comp_session_dir: &Path, file_name: &str) -> PathBuf {
170     incr_comp_session_dir.join(file_name)
171 }
172
173 /// Allocates the private session directory. The boolean in the Ok() result
174 /// indicates whether we should try loading a dep graph from the successfully
175 /// initialized directory, or not.
176 /// The post-condition of this fn is that we have a valid incremental
177 /// compilation session directory, if the result is `Ok`. A valid session
178 /// directory is one that contains a locked lock file. It may or may not contain
179 /// a dep-graph and work products from a previous session.
180 /// If the call fails, the fn may leave behind an invalid session directory.
181 /// The garbage collection will take care of it.
182 pub fn prepare_session_directory(sess: &Session,
183                                  crate_name: &str,
184                                  crate_disambiguator: CrateDisambiguator) {
185     if sess.opts.incremental.is_none() {
186         return
187     }
188
189     debug!("prepare_session_directory");
190
191     // {incr-comp-dir}/{crate-name-and-disambiguator}
192     let crate_dir = crate_path(sess, crate_name, crate_disambiguator);
193     debug!("crate-dir: {}", crate_dir.display());
194     if create_dir(sess, &crate_dir, "crate").is_err() {
195         return
196     }
197
198     // Hack: canonicalize the path *after creating the directory*
199     // because, on windows, long paths can cause problems;
200     // canonicalization inserts this weird prefix that makes windows
201     // tolerate long paths.
202     let crate_dir = match crate_dir.canonicalize() {
203         Ok(v) => v,
204         Err(err) => {
205             sess.err(&format!("incremental compilation: error canonicalizing path `{}`: {}",
206                               crate_dir.display(), err));
207             return
208         }
209     };
210
211     let mut source_directories_already_tried = FxHashSet::default();
212
213     loop {
214         // Generate a session directory of the form:
215         //
216         // {incr-comp-dir}/{crate-name-and-disambiguator}/s-{timestamp}-{random}-working
217         let session_dir = generate_session_dir_path(&crate_dir);
218         debug!("session-dir: {}", session_dir.display());
219
220         // Lock the new session directory. If this fails, return an
221         // error without retrying
222         let (directory_lock, lock_file_path) = match lock_directory(sess, &session_dir) {
223             Ok(e) => e,
224             Err(_) => return,
225         };
226
227         // Now that we have the lock, we can actually create the session
228         // directory
229         if create_dir(sess, &session_dir, "session").is_err() {
230             return
231         }
232
233         // Find a suitable source directory to copy from. Ignore those that we
234         // have already tried before.
235         let source_directory = find_source_directory(&crate_dir,
236                                                      &source_directories_already_tried);
237
238         let source_directory = if let Some(dir) = source_directory {
239             dir
240         } else {
241             // There's nowhere to copy from, we're done
242             debug!("no source directory found. Continuing with empty session \
243                     directory.");
244
245             sess.init_incr_comp_session(session_dir, directory_lock, false);
246             return
247         };
248
249         debug!("attempting to copy data from source: {}",
250                source_directory.display());
251
252
253
254         // Try copying over all files from the source directory
255         if let Ok(allows_links) = copy_files(sess,
256                                              &session_dir,
257                                              &source_directory) {
258             debug!("successfully copied data from: {}",
259                    source_directory.display());
260
261             if !allows_links {
262                 sess.warn(&format!("Hard linking files in the incremental \
263                                         compilation cache failed. Copying files \
264                                         instead. Consider moving the cache \
265                                         directory to a file system which supports \
266                                         hard linking in session dir `{}`",
267                                         session_dir.display())
268                     );
269             }
270
271             sess.init_incr_comp_session(session_dir, directory_lock, true);
272             return
273         } else {
274              debug!("copying failed - trying next directory");
275
276             // Something went wrong while trying to copy/link files from the
277             // source directory. Try again with a different one.
278             source_directories_already_tried.insert(source_directory);
279
280             // Try to remove the session directory we just allocated. We don't
281             // know if there's any garbage in it from the failed copy action.
282             if let Err(err) = safe_remove_dir_all(&session_dir) {
283                 sess.warn(&format!("Failed to delete partly initialized \
284                                     session dir `{}`: {}",
285                                    session_dir.display(),
286                                    err));
287             }
288
289             delete_session_dir_lock_file(sess, &lock_file_path);
290             mem::drop(directory_lock);
291         }
292     }
293 }
294
295
296 /// This function finalizes and thus 'publishes' the session directory by
297 /// renaming it to `s-{timestamp}-{svh}` and releasing the file lock.
298 /// If there have been compilation errors, however, this function will just
299 /// delete the presumably invalid session directory.
300 pub fn finalize_session_directory(sess: &Session, svh: Svh) {
301     if sess.opts.incremental.is_none() {
302         return;
303     }
304
305     let incr_comp_session_dir: PathBuf = sess.incr_comp_session_dir().clone();
306
307     if sess.has_errors() {
308         // If there have been any errors during compilation, we don't want to
309         // publish this session directory. Rather, we'll just delete it.
310
311         debug!("finalize_session_directory() - invalidating session directory: {}",
312                 incr_comp_session_dir.display());
313
314         if let Err(err) = safe_remove_dir_all(&*incr_comp_session_dir) {
315             sess.warn(&format!("Error deleting incremental compilation \
316                                 session directory `{}`: {}",
317                                incr_comp_session_dir.display(),
318                                err));
319         }
320
321         let lock_file_path = lock_file_path(&*incr_comp_session_dir);
322         delete_session_dir_lock_file(sess, &lock_file_path);
323         sess.mark_incr_comp_session_as_invalid();
324     }
325
326     debug!("finalize_session_directory() - session directory: {}",
327             incr_comp_session_dir.display());
328
329     let old_sub_dir_name = incr_comp_session_dir.file_name()
330                                                 .unwrap()
331                                                 .to_string_lossy();
332     assert_no_characters_lost(&old_sub_dir_name);
333
334     // Keep the 's-{timestamp}-{random-number}' prefix, but replace the
335     // '-working' part with the SVH of the crate
336     let dash_indices: Vec<_> = old_sub_dir_name.match_indices("-")
337                                                .map(|(idx, _)| idx)
338                                                .collect();
339     if dash_indices.len() != 3 {
340         bug!("Encountered incremental compilation session directory with \
341               malformed name: {}",
342              incr_comp_session_dir.display())
343     }
344
345     // State: "s-{timestamp}-{random-number}-"
346     let mut new_sub_dir_name = String::from(&old_sub_dir_name[..= dash_indices[2]]);
347
348     // Append the svh
349     base_n::push_str(svh.as_u64() as u128, INT_ENCODE_BASE, &mut new_sub_dir_name);
350
351     // Create the full path
352     let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name);
353     debug!("finalize_session_directory() - new path: {}", new_path.display());
354
355     match std_fs::rename(&*incr_comp_session_dir, &new_path) {
356         Ok(_) => {
357             debug!("finalize_session_directory() - directory renamed successfully");
358
359             // This unlocks the directory
360             sess.finalize_incr_comp_session(new_path);
361         }
362         Err(e) => {
363             // Warn about the error. However, no need to abort compilation now.
364             sess.warn(&format!("Error finalizing incremental compilation \
365                                session directory `{}`: {}",
366                                incr_comp_session_dir.display(),
367                                e));
368
369             debug!("finalize_session_directory() - error, marking as invalid");
370             // Drop the file lock, so we can garage collect
371             sess.mark_incr_comp_session_as_invalid();
372         }
373     }
374
375     let _ = garbage_collect_session_directories(sess);
376 }
377
378 pub fn delete_all_session_dir_contents(sess: &Session) -> io::Result<()> {
379     let sess_dir_iterator = sess.incr_comp_session_dir().read_dir()?;
380     for entry in sess_dir_iterator {
381         let entry = entry?;
382         safe_remove_file(&entry.path())?
383     }
384     Ok(())
385 }
386
387 fn copy_files(sess: &Session,
388               target_dir: &Path,
389               source_dir: &Path)
390               -> Result<bool, ()> {
391     // We acquire a shared lock on the lock file of the directory, so that
392     // nobody deletes it out from under us while we are reading from it.
393     let lock_file_path = lock_file_path(source_dir);
394     let _lock = if let Ok(lock) = flock::Lock::new(&lock_file_path,
395                                                    false,   // don't wait,
396                                                    false,   // don't create
397                                                    false) { // not exclusive
398         lock
399     } else {
400         // Could not acquire the lock, don't try to copy from here
401         return Err(())
402     };
403
404     let source_dir_iterator = match source_dir.read_dir() {
405         Ok(it) => it,
406         Err(_) => return Err(())
407     };
408
409     let mut files_linked = 0;
410     let mut files_copied = 0;
411
412     for entry in source_dir_iterator {
413         match entry {
414             Ok(entry) => {
415                 let file_name = entry.file_name();
416
417                 let target_file_path = target_dir.join(file_name);
418                 let source_path = entry.path();
419
420                 debug!("copying into session dir: {}", source_path.display());
421                 match link_or_copy(source_path, target_file_path) {
422                     Ok(LinkOrCopy::Link) => {
423                         files_linked += 1
424                     }
425                     Ok(LinkOrCopy::Copy) => {
426                         files_copied += 1
427                     }
428                     Err(_) => return Err(())
429                 }
430             }
431             Err(_) => {
432                 return Err(())
433             }
434         }
435     }
436
437     if sess.opts.debugging_opts.incremental_info {
438         println!("[incremental] session directory: \
439                   {} files hard-linked", files_linked);
440         println!("[incremental] session directory: \
441                  {} files copied", files_copied);
442     }
443
444     Ok(files_linked > 0 || files_copied == 0)
445 }
446
447 /// Generate unique directory path of the form:
448 /// {crate_dir}/s-{timestamp}-{random-number}-working
449 fn generate_session_dir_path(crate_dir: &Path) -> PathBuf {
450     let timestamp = timestamp_to_string(SystemTime::now());
451     debug!("generate_session_dir_path: timestamp = {}", timestamp);
452     let random_number = thread_rng().next_u32();
453     debug!("generate_session_dir_path: random_number = {}", random_number);
454
455     let directory_name = format!("s-{}-{}-working",
456                                   timestamp,
457                                   base_n::encode(random_number as u128,
458                                                  INT_ENCODE_BASE));
459     debug!("generate_session_dir_path: directory_name = {}", directory_name);
460     let directory_path = crate_dir.join(directory_name);
461     debug!("generate_session_dir_path: directory_path = {}", directory_path.display());
462     directory_path
463 }
464
465 fn create_dir(sess: &Session, path: &Path, dir_tag: &str) -> Result<(),()> {
466     match std_fs::create_dir_all(path) {
467         Ok(()) => {
468             debug!("{} directory created successfully", dir_tag);
469             Ok(())
470         }
471         Err(err) => {
472             sess.err(&format!("Could not create incremental compilation {} \
473                                directory `{}`: {}",
474                               dir_tag,
475                               path.display(),
476                               err));
477             Err(())
478         }
479     }
480 }
481
482 /// Allocate the lock-file and lock it.
483 fn lock_directory(sess: &Session,
484                   session_dir: &Path)
485                   -> Result<(flock::Lock, PathBuf), ()> {
486     let lock_file_path = lock_file_path(session_dir);
487     debug!("lock_directory() - lock_file: {}", lock_file_path.display());
488
489     match flock::Lock::new(&lock_file_path,
490                            false, // don't wait
491                            true,  // create the lock file
492                            true) { // the lock should be exclusive
493         Ok(lock) => Ok((lock, lock_file_path)),
494         Err(err) => {
495             sess.err(&format!("incremental compilation: could not create \
496                                session directory lock file: {}", err));
497             Err(())
498         }
499     }
500 }
501
502 fn delete_session_dir_lock_file(sess: &Session,
503                                 lock_file_path: &Path) {
504     if let Err(err) = safe_remove_file(&lock_file_path) {
505         sess.warn(&format!("Error deleting lock file for incremental \
506                             compilation session directory `{}`: {}",
507                            lock_file_path.display(),
508                            err));
509     }
510 }
511
512 /// Find the most recent published session directory that is not in the
513 /// ignore-list.
514 fn find_source_directory(crate_dir: &Path,
515                          source_directories_already_tried: &FxHashSet<PathBuf>)
516                          -> Option<PathBuf> {
517     let iter = crate_dir.read_dir()
518                         .unwrap() // FIXME
519                         .filter_map(|e| e.ok().map(|e| e.path()));
520
521     find_source_directory_in_iter(iter, source_directories_already_tried)
522 }
523
524 fn find_source_directory_in_iter<I>(iter: I,
525                                     source_directories_already_tried: &FxHashSet<PathBuf>)
526                                     -> Option<PathBuf>
527     where I: Iterator<Item=PathBuf>
528 {
529     let mut best_candidate = (UNIX_EPOCH, None);
530
531     for session_dir in iter {
532         debug!("find_source_directory_in_iter - inspecting `{}`",
533                session_dir.display());
534
535         let directory_name = session_dir.file_name().unwrap().to_string_lossy();
536         assert_no_characters_lost(&directory_name);
537
538         if source_directories_already_tried.contains(&session_dir) ||
539            !is_session_directory(&directory_name) ||
540            !is_finalized(&directory_name) {
541             debug!("find_source_directory_in_iter - ignoring.");
542             continue
543         }
544
545         let timestamp = extract_timestamp_from_session_dir(&directory_name)
546             .unwrap_or_else(|_| {
547                 bug!("unexpected incr-comp session dir: {}", session_dir.display())
548             });
549
550         if timestamp > best_candidate.0 {
551             best_candidate = (timestamp, Some(session_dir.clone()));
552         }
553     }
554
555     best_candidate.1
556 }
557
558 fn is_finalized(directory_name: &str) -> bool {
559     !directory_name.ends_with("-working")
560 }
561
562 fn is_session_directory(directory_name: &str) -> bool {
563     directory_name.starts_with("s-") &&
564     !directory_name.ends_with(LOCK_FILE_EXT)
565 }
566
567 fn is_session_directory_lock_file(file_name: &str) -> bool {
568     file_name.starts_with("s-") && file_name.ends_with(LOCK_FILE_EXT)
569 }
570
571 fn extract_timestamp_from_session_dir(directory_name: &str)
572                                       -> Result<SystemTime, ()> {
573     if !is_session_directory(directory_name) {
574         return Err(())
575     }
576
577     let dash_indices: Vec<_> = directory_name.match_indices("-")
578                                              .map(|(idx, _)| idx)
579                                              .collect();
580     if dash_indices.len() != 3 {
581         return Err(())
582     }
583
584     string_to_timestamp(&directory_name[dash_indices[0]+1 .. dash_indices[1]])
585 }
586
587 fn timestamp_to_string(timestamp: SystemTime) -> String {
588     let duration = timestamp.duration_since(UNIX_EPOCH).unwrap();
589     let micros = duration.as_secs() * 1_000_000 +
590                 (duration.subsec_nanos() as u64) / 1000;
591     base_n::encode(micros as u128, INT_ENCODE_BASE)
592 }
593
594 fn string_to_timestamp(s: &str) -> Result<SystemTime, ()> {
595     let micros_since_unix_epoch = u64::from_str_radix(s, INT_ENCODE_BASE as u32);
596
597     if micros_since_unix_epoch.is_err() {
598         return Err(())
599     }
600
601     let micros_since_unix_epoch = micros_since_unix_epoch.unwrap();
602
603     let duration = Duration::new(micros_since_unix_epoch / 1_000_000,
604                                  1000 * (micros_since_unix_epoch % 1_000_000) as u32);
605     Ok(UNIX_EPOCH + duration)
606 }
607
608 fn crate_path(sess: &Session,
609               crate_name: &str,
610               crate_disambiguator: CrateDisambiguator)
611               -> PathBuf {
612
613     let incr_dir = sess.opts.incremental.as_ref().unwrap().clone();
614
615     // The full crate disambiguator is really long. 64 bits of it should be
616     // sufficient.
617     let crate_disambiguator = crate_disambiguator.to_fingerprint().to_smaller_hash();
618     let crate_disambiguator = base_n::encode(crate_disambiguator as u128,
619                                              INT_ENCODE_BASE);
620
621     let crate_name = format!("{}-{}", crate_name, crate_disambiguator);
622     incr_dir.join(crate_name)
623 }
624
625 fn assert_no_characters_lost(s: &str) {
626     if s.contains('\u{FFFD}') {
627         bug!("Could not losslessly convert '{}'.", s)
628     }
629 }
630
631 fn is_old_enough_to_be_collected(timestamp: SystemTime) -> bool {
632     timestamp < SystemTime::now() - Duration::from_secs(10)
633 }
634
635 pub fn garbage_collect_session_directories(sess: &Session) -> io::Result<()> {
636     debug!("garbage_collect_session_directories() - begin");
637
638     let session_directory = sess.incr_comp_session_dir();
639     debug!("garbage_collect_session_directories() - session directory: {}",
640         session_directory.display());
641
642     let crate_directory = session_directory.parent().unwrap();
643     debug!("garbage_collect_session_directories() - crate directory: {}",
644         crate_directory.display());
645
646     // First do a pass over the crate directory, collecting lock files and
647     // session directories
648     let mut session_directories = FxHashSet::default();
649     let mut lock_files = FxHashSet::default();
650
651     for dir_entry in crate_directory.read_dir()? {
652         let dir_entry = match dir_entry {
653             Ok(dir_entry) => dir_entry,
654             _ => {
655                 // Ignore any errors
656                 continue
657             }
658         };
659
660         let entry_name = dir_entry.file_name();
661         let entry_name = entry_name.to_string_lossy();
662
663         if is_session_directory_lock_file(&entry_name) {
664             assert_no_characters_lost(&entry_name);
665             lock_files.insert(entry_name.into_owned());
666         } else if is_session_directory(&entry_name) {
667             assert_no_characters_lost(&entry_name);
668             session_directories.insert(entry_name.into_owned());
669         } else {
670             // This is something we don't know, leave it alone
671         }
672     }
673
674     // Now map from lock files to session directories
675     let lock_file_to_session_dir: FxHashMap<String, Option<String>> =
676         lock_files.into_iter()
677                   .map(|lock_file_name| {
678                         assert!(lock_file_name.ends_with(LOCK_FILE_EXT));
679                         let dir_prefix_end = lock_file_name.len() - LOCK_FILE_EXT.len();
680                         let session_dir = {
681                             let dir_prefix = &lock_file_name[0 .. dir_prefix_end];
682                             session_directories.iter()
683                                                .find(|dir_name| dir_name.starts_with(dir_prefix))
684                         };
685                         (lock_file_name, session_dir.map(String::clone))
686                     })
687                   .collect();
688
689     // Delete all lock files, that don't have an associated directory. They must
690     // be some kind of leftover
691     for (lock_file_name, directory_name) in &lock_file_to_session_dir {
692         if directory_name.is_none() {
693             let timestamp = match extract_timestamp_from_session_dir(lock_file_name) {
694                 Ok(timestamp) => timestamp,
695                 Err(()) => {
696                     debug!("Found lock-file with malformed timestamp: {}",
697                         crate_directory.join(&lock_file_name).display());
698                     // Ignore it
699                     continue
700                 }
701             };
702
703             let lock_file_path = crate_directory.join(&**lock_file_name);
704
705             if is_old_enough_to_be_collected(timestamp) {
706                 debug!("garbage_collect_session_directories() - deleting \
707                         garbage lock file: {}", lock_file_path.display());
708                 delete_session_dir_lock_file(sess, &lock_file_path);
709             } else {
710                 debug!("garbage_collect_session_directories() - lock file with \
711                         no session dir not old enough to be collected: {}",
712                        lock_file_path.display());
713             }
714         }
715     }
716
717     // Filter out `None` directories
718     let lock_file_to_session_dir: FxHashMap<String, String> =
719         lock_file_to_session_dir.into_iter()
720                                 .filter_map(|(lock_file_name, directory_name)| {
721                                     directory_name.map(|n| (lock_file_name, n))
722                                 })
723                                 .collect();
724
725     // Delete all session directories that don't have a lock file.
726     for directory_name in session_directories {
727         if !lock_file_to_session_dir.values().any(|dir| *dir == directory_name) {
728             let path = crate_directory.join(directory_name);
729             if let Err(err) = safe_remove_dir_all(&path) {
730                 sess.warn(&format!("Failed to garbage collect invalid incremental \
731                                     compilation session directory `{}`: {}",
732                                     path.display(),
733                                     err));
734             }
735         }
736     }
737
738     // Now garbage collect the valid session directories.
739     let mut deletion_candidates = vec![];
740     let mut definitely_delete = vec![];
741
742     for (lock_file_name, directory_name) in &lock_file_to_session_dir {
743         debug!("garbage_collect_session_directories() - inspecting: {}",
744                 directory_name);
745
746         let timestamp = match extract_timestamp_from_session_dir(directory_name) {
747             Ok(timestamp) => timestamp,
748             Err(()) => {
749                 debug!("Found session-dir with malformed timestamp: {}",
750                         crate_directory.join(directory_name).display());
751                 // Ignore it
752                 continue
753             }
754         };
755
756         if is_finalized(directory_name) {
757             let lock_file_path = crate_directory.join(lock_file_name);
758             match flock::Lock::new(&lock_file_path,
759                                    false,  // don't wait
760                                    false,  // don't create the lock-file
761                                    true) { // get an exclusive lock
762                 Ok(lock) => {
763                     debug!("garbage_collect_session_directories() - \
764                             successfully acquired lock");
765                     debug!("garbage_collect_session_directories() - adding \
766                             deletion candidate: {}", directory_name);
767
768                     // Note that we are holding on to the lock
769                     deletion_candidates.push((timestamp,
770                                               crate_directory.join(directory_name),
771                                               Some(lock)));
772                 }
773                 Err(_) => {
774                     debug!("garbage_collect_session_directories() - \
775                             not collecting, still in use");
776                 }
777             }
778         } else if is_old_enough_to_be_collected(timestamp) {
779             // When cleaning out "-working" session directories, i.e.
780             // session directories that might still be in use by another
781             // compiler instance, we only look a directories that are
782             // at least ten seconds old. This is supposed to reduce the
783             // chance of deleting a directory in the time window where
784             // the process has allocated the directory but has not yet
785             // acquired the file-lock on it.
786
787             // Try to acquire the directory lock. If we can't, it
788             // means that the owning process is still alive and we
789             // leave this directory alone.
790             let lock_file_path = crate_directory.join(lock_file_name);
791             match flock::Lock::new(&lock_file_path,
792                                    false,  // don't wait
793                                    false,  // don't create the lock-file
794                                    true) { // get an exclusive lock
795                 Ok(lock) => {
796                     debug!("garbage_collect_session_directories() - \
797                             successfully acquired lock");
798
799                     // Note that we are holding on to the lock
800                     definitely_delete.push((crate_directory.join(directory_name),
801                                             Some(lock)));
802                 }
803                 Err(_) => {
804                     debug!("garbage_collect_session_directories() - \
805                             not collecting, still in use");
806                 }
807             }
808         } else {
809             debug!("garbage_collect_session_directories() - not finalized, not \
810                     old enough");
811         }
812     }
813
814     // Delete all but the most recent of the candidates
815     for (path, lock) in all_except_most_recent(deletion_candidates) {
816         debug!("garbage_collect_session_directories() - deleting `{}`",
817                 path.display());
818
819         if let Err(err) = safe_remove_dir_all(&path) {
820             sess.warn(&format!("Failed to garbage collect finalized incremental \
821                                 compilation session directory `{}`: {}",
822                                path.display(),
823                                err));
824         } else {
825             delete_session_dir_lock_file(sess, &lock_file_path(&path));
826         }
827
828
829         // Let's make it explicit that the file lock is released at this point,
830         // or rather, that we held on to it until here
831         mem::drop(lock);
832     }
833
834     for (path, lock) in definitely_delete {
835         debug!("garbage_collect_session_directories() - deleting `{}`",
836                 path.display());
837
838         if let Err(err) = safe_remove_dir_all(&path) {
839             sess.warn(&format!("Failed to garbage collect incremental \
840                                 compilation session directory `{}`: {}",
841                                path.display(),
842                                err));
843         } else {
844             delete_session_dir_lock_file(sess, &lock_file_path(&path));
845         }
846
847         // Let's make it explicit that the file lock is released at this point,
848         // or rather, that we held on to it until here
849         mem::drop(lock);
850     }
851
852     Ok(())
853 }
854
855 fn all_except_most_recent(deletion_candidates: Vec<(SystemTime, PathBuf, Option<flock::Lock>)>)
856                           -> FxHashMap<PathBuf, Option<flock::Lock>> {
857     let most_recent = deletion_candidates.iter()
858                                          .map(|&(timestamp, ..)| timestamp)
859                                          .max();
860
861     if let Some(most_recent) = most_recent {
862         deletion_candidates.into_iter()
863                            .filter(|&(timestamp, ..)| timestamp != most_recent)
864                            .map(|(_, path, lock)| (path, lock))
865                            .collect()
866     } else {
867         FxHashMap::default()
868     }
869 }
870
871 /// Since paths of artifacts within session directories can get quite long, we
872 /// need to support deleting files with very long paths. The regular
873 /// WinApi functions only support paths up to 260 characters, however. In order
874 /// to circumvent this limitation, we canonicalize the path of the directory
875 /// before passing it to std::fs::remove_dir_all(). This will convert the path
876 /// into the '\\?\' format, which supports much longer paths.
877 fn safe_remove_dir_all(p: &Path) -> io::Result<()> {
878     if p.exists() {
879         let canonicalized = p.canonicalize()?;
880         std_fs::remove_dir_all(canonicalized)
881     } else {
882         Ok(())
883     }
884 }
885
886 fn safe_remove_file(p: &Path) -> io::Result<()> {
887     if p.exists() {
888         let canonicalized = p.canonicalize()?;
889         std_fs::remove_file(canonicalized)
890     } else {
891         Ok(())
892     }
893 }
894
895 #[test]
896 fn test_all_except_most_recent() {
897     assert_eq!(all_except_most_recent(
898         vec![
899             (UNIX_EPOCH + Duration::new(4, 0), PathBuf::from("4"), None),
900             (UNIX_EPOCH + Duration::new(1, 0), PathBuf::from("1"), None),
901             (UNIX_EPOCH + Duration::new(5, 0), PathBuf::from("5"), None),
902             (UNIX_EPOCH + Duration::new(3, 0), PathBuf::from("3"), None),
903             (UNIX_EPOCH + Duration::new(2, 0), PathBuf::from("2"), None),
904         ]).keys().cloned().collect::<FxHashSet<PathBuf>>(),
905         vec![
906             PathBuf::from("1"),
907             PathBuf::from("2"),
908             PathBuf::from("3"),
909             PathBuf::from("4"),
910         ].into_iter().collect::<FxHashSet<PathBuf>>()
911     );
912
913     assert_eq!(all_except_most_recent(
914         vec![
915         ]).keys().cloned().collect::<FxHashSet<PathBuf>>(),
916         FxHashSet::default()
917     );
918 }
919
920 #[test]
921 fn test_timestamp_serialization() {
922     for i in 0 .. 1_000u64 {
923         let time = UNIX_EPOCH + Duration::new(i * 1_434_578, (i as u32) * 239_000);
924         let s = timestamp_to_string(time);
925         assert_eq!(Ok(time), string_to_timestamp(&s));
926     }
927 }
928
929 #[test]
930 fn test_find_source_directory_in_iter() {
931     let already_visited = FxHashSet::default();
932
933     // Find newest
934     assert_eq!(find_source_directory_in_iter(
935         vec![PathBuf::from("crate-dir/s-3234-0000-svh"),
936              PathBuf::from("crate-dir/s-2234-0000-svh"),
937              PathBuf::from("crate-dir/s-1234-0000-svh")].into_iter(), &already_visited),
938         Some(PathBuf::from("crate-dir/s-3234-0000-svh")));
939
940     // Filter out "-working"
941     assert_eq!(find_source_directory_in_iter(
942         vec![PathBuf::from("crate-dir/s-3234-0000-working"),
943              PathBuf::from("crate-dir/s-2234-0000-svh"),
944              PathBuf::from("crate-dir/s-1234-0000-svh")].into_iter(), &already_visited),
945         Some(PathBuf::from("crate-dir/s-2234-0000-svh")));
946
947     // Handle empty
948     assert_eq!(find_source_directory_in_iter(vec![].into_iter(), &already_visited),
949                None);
950
951     // Handle only working
952     assert_eq!(find_source_directory_in_iter(
953         vec![PathBuf::from("crate-dir/s-3234-0000-working"),
954              PathBuf::from("crate-dir/s-2234-0000-working"),
955              PathBuf::from("crate-dir/s-1234-0000-working")].into_iter(), &already_visited),
956         None);
957 }