]> git.lizzy.rs Git - rust.git/blob - src/librustc_incremental/persist/fs.rs
Add an unstable FileTypeExt extension trait for Windows
[rust.git] / src / librustc_incremental / persist / fs.rs
1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11
12 //! This module manages how the incremental compilation cache is represented in
13 //! the file system.
14 //!
15 //! Incremental compilation caches are managed according to a copy-on-write
16 //! strategy: Once a complete, consistent cache version is finalized, it is
17 //! never modified. Instead, when a subsequent compilation session is started,
18 //! the compiler will allocate a new version of the cache that starts out as
19 //! a copy of the previous version. Then only this new copy is modified and it
20 //! will not be visible to other processes until it is finalized. This ensures
21 //! that multiple compiler processes can be executed concurrently for the same
22 //! crate without interfering with each other or blocking each other.
23 //!
24 //! More concretely this is implemented via the following protocol:
25 //!
26 //! 1. For a newly started compilation session, the compiler allocates a
27 //!    new `session` directory within the incremental compilation directory.
28 //!    This session directory will have a unique name that ends with the suffix
29 //!    "-working" and that contains a creation timestamp.
30 //! 2. Next, the compiler looks for the newest finalized session directory,
31 //!    that is, a session directory from a previous compilation session that
32 //!    has been marked as valid and consistent. A session directory is
33 //!    considered finalized if the "-working" suffix in the directory name has
34 //!    been replaced by the SVH of the crate.
35 //! 3. Once the compiler has found a valid, finalized session directory, it will
36 //!    hard-link/copy its contents into the new "-working" directory. If all
37 //!    goes well, it will have its own, private copy of the source directory and
38 //!    subsequently not have to worry about synchronizing with other compiler
39 //!    processes.
40 //! 4. Now the compiler can do its normal compilation process, which involves
41 //!    reading and updating its private session directory.
42 //! 5. When compilation finishes without errors, the private session directory
43 //!    will be in a state where it can be used as input for other compilation
44 //!    sessions. That is, it will contain a dependency graph and cache artifacts
45 //!    that are consistent with the state of the source code it was compiled
46 //!    from, with no need to change them ever again. At this point, the compiler
47 //!    finalizes and "publishes" its private session directory by renaming it
48 //!    from "s-{timestamp}-{random}-working" to "s-{timestamp}-{SVH}".
49 //! 6. At this point the "old" session directory that we copied our data from
50 //!    at the beginning of the session has become obsolete because we have just
51 //!    published a more current version. Thus the compiler will delete it.
52 //!
53 //! ## Garbage Collection
54 //!
55 //! Naively following the above protocol might lead to old session directories
56 //! piling up if a compiler instance crashes for some reason before its able to
57 //! remove its private session directory. In order to avoid wasting disk space,
58 //! the compiler also does some garbage collection each time it is started in
59 //! incremental compilation mode. Specifically, it will scan the incremental
60 //! compilation directory for private session directories that are not in use
61 //! any more and will delete those. It will also delete any finalized session
62 //! directories for a given crate except for the most recent one.
63 //!
64 //! ## Synchronization
65 //!
66 //! There is some synchronization needed in order for the compiler to be able to
67 //! determine whether a given private session directory is not in used any more.
68 //! This is done by creating a lock file for each session directory and
69 //! locking it while the directory is still being used. Since file locks have
70 //! operating system support, we can rely on the lock being released if the
71 //! compiler process dies for some unexpected reason. Thus, when garbage
72 //! collecting private session directories, the collecting process can determine
73 //! whether the directory is still in use by trying to acquire a lock on the
74 //! file. If locking the file fails, the original process must still be alive.
75 //! If locking the file succeeds, we know that the owning process is not alive
76 //! any more and we can safely delete the directory.
77 //! There is still a small time window between the original process creating the
78 //! lock file and actually locking it. In order to minimize the chance that
79 //! another process tries to acquire the lock in just that instance, only
80 //! session directories that are older than a few seconds are considered for
81 //! garbage collection.
82 //!
83 //! Another case that has to be considered is what happens if one process
84 //! deletes a finalized session directory that another process is currently
85 //! trying to copy from. This case is also handled via the lock file. Before
86 //! a process starts copying a finalized session directory, it will acquire a
87 //! shared lock on the directory's lock file. Any garbage collecting process,
88 //! on the other hand, will acquire an exclusive lock on the lock file.
89 //! Thus, if a directory is being collected, any reader process will fail
90 //! acquiring the shared lock and will leave the directory alone. Conversely,
91 //! if a collecting process can't acquire the exclusive lock because the
92 //! directory is currently being read from, it will leave collecting that
93 //! directory to another process at a later point in time.
94 //! The exact same scheme is also used when reading the metadata hashes file
95 //! from an extern crate. When a crate is compiled, the hash values of its
96 //! metadata are stored in a file in its session directory. When the
97 //! compilation session of another crate imports the first crate's metadata,
98 //! it also has to read in the accompanying metadata hashes. It thus will access
99 //! the finalized session directory of all crates it links to and while doing
100 //! so, it will also place a read lock on that the respective session directory
101 //! so that it won't be deleted while the metadata hashes are loaded.
102 //!
103 //! ## Preconditions
104 //!
105 //! This system relies on two features being available in the file system in
106 //! order to work really well: file locking and hard linking.
107 //! If hard linking is not available (like on FAT) the data in the cache
108 //! actually has to be copied at the beginning of each session.
109 //! If file locking does not work reliably (like on NFS), some of the
110 //! synchronization will go haywire.
111 //! In both cases we recommend to locate the incremental compilation directory
112 //! on a file system that supports these things.
113 //! It might be a good idea though to try and detect whether we are on an
114 //! unsupported file system and emit a warning in that case. This is not yet
115 //! implemented.
116
117 use rustc::hir::svh::Svh;
118 use rustc::session::{Session, CrateDisambiguator};
119 use rustc::util::fs as fs_util;
120 use rustc_data_structures::{flock, base_n};
121 use rustc_data_structures::fx::{FxHashSet, FxHashMap};
122
123 use std::fs as std_fs;
124 use std::io;
125 use std::mem;
126 use std::path::{Path, PathBuf};
127 use std::time::{UNIX_EPOCH, SystemTime, Duration};
128
129 use rand::{thread_rng, Rng};
130
131 const LOCK_FILE_EXT: &'static str = ".lock";
132 const DEP_GRAPH_FILENAME: &'static str = "dep-graph.bin";
133 const WORK_PRODUCTS_FILENAME: &'static str = "work-products.bin";
134 const QUERY_CACHE_FILENAME: &'static str = "query-cache.bin";
135
136 // We encode integers using the following base, so they are shorter than decimal
137 // or hexadecimal numbers (we want short file and directory names). Since these
138 // numbers will be used in file names, we choose an encoding that is not
139 // case-sensitive (as opposed to base64, for example).
140 const INT_ENCODE_BASE: usize = base_n::CASE_INSENSITIVE;
141
142 pub fn dep_graph_path(sess: &Session) -> PathBuf {
143     in_incr_comp_dir_sess(sess, DEP_GRAPH_FILENAME)
144 }
145 pub fn dep_graph_path_from(incr_comp_session_dir: &Path) -> PathBuf {
146     in_incr_comp_dir(incr_comp_session_dir, DEP_GRAPH_FILENAME)
147 }
148
149 pub fn work_products_path(sess: &Session) -> PathBuf {
150     in_incr_comp_dir_sess(sess, WORK_PRODUCTS_FILENAME)
151 }
152
153 pub fn query_cache_path(sess: &Session) -> PathBuf {
154     in_incr_comp_dir_sess(sess, QUERY_CACHE_FILENAME)
155 }
156
157 pub fn lock_file_path(session_dir: &Path) -> PathBuf {
158     let crate_dir = session_dir.parent().unwrap();
159
160     let directory_name = session_dir.file_name().unwrap().to_string_lossy();
161     assert_no_characters_lost(&directory_name);
162
163     let dash_indices: Vec<_> = directory_name.match_indices("-")
164                                              .map(|(idx, _)| idx)
165                                              .collect();
166     if dash_indices.len() != 3 {
167         bug!("Encountered incremental compilation session directory with \
168               malformed name: {}",
169              session_dir.display())
170     }
171
172     crate_dir.join(&directory_name[0 .. dash_indices[2]])
173              .with_extension(&LOCK_FILE_EXT[1..])
174 }
175
176 pub fn in_incr_comp_dir_sess(sess: &Session, file_name: &str) -> PathBuf {
177     in_incr_comp_dir(&sess.incr_comp_session_dir(), file_name)
178 }
179
180 pub fn in_incr_comp_dir(incr_comp_session_dir: &Path, file_name: &str) -> PathBuf {
181     incr_comp_session_dir.join(file_name)
182 }
183
184 /// Allocates the private session directory. The boolean in the Ok() result
185 /// indicates whether we should try loading a dep graph from the successfully
186 /// initialized directory, or not.
187 /// The post-condition of this fn is that we have a valid incremental
188 /// compilation session directory, if the result is `Ok`. A valid session
189 /// directory is one that contains a locked lock file. It may or may not contain
190 /// a dep-graph and work products from a previous session.
191 /// If the call fails, the fn may leave behind an invalid session directory.
192 /// The garbage collection will take care of it.
193 pub fn prepare_session_directory(sess: &Session,
194                                  crate_name: &str,
195                                  crate_disambiguator: CrateDisambiguator) {
196     if sess.opts.incremental.is_none() {
197         return
198     }
199
200     debug!("prepare_session_directory");
201
202     // {incr-comp-dir}/{crate-name-and-disambiguator}
203     let crate_dir = crate_path(sess, crate_name, crate_disambiguator);
204     debug!("crate-dir: {}", crate_dir.display());
205     if create_dir(sess, &crate_dir, "crate").is_err() {
206         return
207     }
208
209     // Hack: canonicalize the path *after creating the directory*
210     // because, on windows, long paths can cause problems;
211     // canonicalization inserts this weird prefix that makes windows
212     // tolerate long paths.
213     let crate_dir = match crate_dir.canonicalize() {
214         Ok(v) => v,
215         Err(err) => {
216             sess.err(&format!("incremental compilation: error canonicalizing path `{}`: {}",
217                               crate_dir.display(), err));
218             return
219         }
220     };
221
222     let mut source_directories_already_tried = FxHashSet();
223
224     loop {
225         // Generate a session directory of the form:
226         //
227         // {incr-comp-dir}/{crate-name-and-disambiguator}/s-{timestamp}-{random}-working
228         let session_dir = generate_session_dir_path(&crate_dir);
229         debug!("session-dir: {}", session_dir.display());
230
231         // Lock the new session directory. If this fails, return an
232         // error without retrying
233         let (directory_lock, lock_file_path) = match lock_directory(sess, &session_dir) {
234             Ok(e) => e,
235             Err(_) => return,
236         };
237
238         // Now that we have the lock, we can actually create the session
239         // directory
240         if create_dir(sess, &session_dir, "session").is_err() {
241             return
242         }
243
244         // Find a suitable source directory to copy from. Ignore those that we
245         // have already tried before.
246         let source_directory = find_source_directory(&crate_dir,
247                                                      &source_directories_already_tried);
248
249         let source_directory = if let Some(dir) = source_directory {
250             dir
251         } else {
252             // There's nowhere to copy from, we're done
253             debug!("no source directory found. Continuing with empty session \
254                     directory.");
255
256             sess.init_incr_comp_session(session_dir, directory_lock, false);
257             return
258         };
259
260         debug!("attempting to copy data from source: {}",
261                source_directory.display());
262
263
264
265         // Try copying over all files from the source directory
266         if let Ok(allows_links) = copy_files(sess,
267                                              &session_dir,
268                                              &source_directory) {
269             debug!("successfully copied data from: {}",
270                    source_directory.display());
271
272             if !allows_links {
273                 sess.warn(&format!("Hard linking files in the incremental \
274                                         compilation cache failed. Copying files \
275                                         instead. Consider moving the cache \
276                                         directory to a file system which supports \
277                                         hard linking in session dir `{}`",
278                                         session_dir.display())
279                     );
280             }
281
282             sess.init_incr_comp_session(session_dir, directory_lock, true);
283             return
284         } else {
285              debug!("copying failed - trying next directory");
286
287             // Something went wrong while trying to copy/link files from the
288             // source directory. Try again with a different one.
289             source_directories_already_tried.insert(source_directory);
290
291             // Try to remove the session directory we just allocated. We don't
292             // know if there's any garbage in it from the failed copy action.
293             if let Err(err) = safe_remove_dir_all(&session_dir) {
294                 sess.warn(&format!("Failed to delete partly initialized \
295                                     session dir `{}`: {}",
296                                    session_dir.display(),
297                                    err));
298             }
299
300             delete_session_dir_lock_file(sess, &lock_file_path);
301             mem::drop(directory_lock);
302         }
303     }
304 }
305
306
307 /// This function finalizes and thus 'publishes' the session directory by
308 /// renaming it to `s-{timestamp}-{svh}` and releasing the file lock.
309 /// If there have been compilation errors, however, this function will just
310 /// delete the presumably invalid session directory.
311 pub fn finalize_session_directory(sess: &Session, svh: Svh) {
312     if sess.opts.incremental.is_none() {
313         return;
314     }
315
316     let incr_comp_session_dir: PathBuf = sess.incr_comp_session_dir().clone();
317
318     if sess.has_errors() {
319         // If there have been any errors during compilation, we don't want to
320         // publish this session directory. Rather, we'll just delete it.
321
322         debug!("finalize_session_directory() - invalidating session directory: {}",
323                 incr_comp_session_dir.display());
324
325         if let Err(err) = safe_remove_dir_all(&*incr_comp_session_dir) {
326             sess.warn(&format!("Error deleting incremental compilation \
327                                 session directory `{}`: {}",
328                                incr_comp_session_dir.display(),
329                                err));
330         }
331
332         let lock_file_path = lock_file_path(&*incr_comp_session_dir);
333         delete_session_dir_lock_file(sess, &lock_file_path);
334         sess.mark_incr_comp_session_as_invalid();
335     }
336
337     debug!("finalize_session_directory() - session directory: {}",
338             incr_comp_session_dir.display());
339
340     let old_sub_dir_name = incr_comp_session_dir.file_name()
341                                                 .unwrap()
342                                                 .to_string_lossy();
343     assert_no_characters_lost(&old_sub_dir_name);
344
345     // Keep the 's-{timestamp}-{random-number}' prefix, but replace the
346     // '-working' part with the SVH of the crate
347     let dash_indices: Vec<_> = old_sub_dir_name.match_indices("-")
348                                                .map(|(idx, _)| idx)
349                                                .collect();
350     if dash_indices.len() != 3 {
351         bug!("Encountered incremental compilation session directory with \
352               malformed name: {}",
353              incr_comp_session_dir.display())
354     }
355
356     // State: "s-{timestamp}-{random-number}-"
357     let mut new_sub_dir_name = String::from(&old_sub_dir_name[.. dash_indices[2] + 1]);
358
359     // Append the svh
360     base_n::push_str(svh.as_u64() as u128, INT_ENCODE_BASE, &mut new_sub_dir_name);
361
362     // Create the full path
363     let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name);
364     debug!("finalize_session_directory() - new path: {}", new_path.display());
365
366     match std_fs::rename(&*incr_comp_session_dir, &new_path) {
367         Ok(_) => {
368             debug!("finalize_session_directory() - directory renamed successfully");
369
370             // This unlocks the directory
371             sess.finalize_incr_comp_session(new_path);
372         }
373         Err(e) => {
374             // Warn about the error. However, no need to abort compilation now.
375             sess.warn(&format!("Error finalizing incremental compilation \
376                                session directory `{}`: {}",
377                                incr_comp_session_dir.display(),
378                                e));
379
380             debug!("finalize_session_directory() - error, marking as invalid");
381             // Drop the file lock, so we can garage collect
382             sess.mark_incr_comp_session_as_invalid();
383         }
384     }
385
386     let _ = garbage_collect_session_directories(sess);
387 }
388
389 pub fn delete_all_session_dir_contents(sess: &Session) -> io::Result<()> {
390     let sess_dir_iterator = sess.incr_comp_session_dir().read_dir()?;
391     for entry in sess_dir_iterator {
392         let entry = entry?;
393         safe_remove_file(&entry.path())?
394     }
395     Ok(())
396 }
397
398 fn copy_files(sess: &Session,
399               target_dir: &Path,
400               source_dir: &Path)
401               -> Result<bool, ()> {
402     // We acquire a shared lock on the lock file of the directory, so that
403     // nobody deletes it out from under us while we are reading from it.
404     let lock_file_path = lock_file_path(source_dir);
405     let _lock = if let Ok(lock) = flock::Lock::new(&lock_file_path,
406                                                    false,   // don't wait,
407                                                    false,   // don't create
408                                                    false) { // not exclusive
409         lock
410     } else {
411         // Could not acquire the lock, don't try to copy from here
412         return Err(())
413     };
414
415     let source_dir_iterator = match source_dir.read_dir() {
416         Ok(it) => it,
417         Err(_) => return Err(())
418     };
419
420     let mut files_linked = 0;
421     let mut files_copied = 0;
422
423     for entry in source_dir_iterator {
424         match entry {
425             Ok(entry) => {
426                 let file_name = entry.file_name();
427
428                 let target_file_path = target_dir.join(file_name);
429                 let source_path = entry.path();
430
431                 debug!("copying into session dir: {}", source_path.display());
432                 match fs_util::link_or_copy(source_path, target_file_path) {
433                     Ok(fs_util::LinkOrCopy::Link) => {
434                         files_linked += 1
435                     }
436                     Ok(fs_util::LinkOrCopy::Copy) => {
437                         files_copied += 1
438                     }
439                     Err(_) => return Err(())
440                 }
441             }
442             Err(_) => {
443                 return Err(())
444             }
445         }
446     }
447
448     if sess.opts.debugging_opts.incremental_info {
449         println!("[incremental] session directory: \
450                   {} files hard-linked", files_linked);
451         println!("[incremental] session directory: \
452                  {} files copied", files_copied);
453     }
454
455     Ok(files_linked > 0 || files_copied == 0)
456 }
457
458 /// Generate unique directory path of the form:
459 /// {crate_dir}/s-{timestamp}-{random-number}-working
460 fn generate_session_dir_path(crate_dir: &Path) -> PathBuf {
461     let timestamp = timestamp_to_string(SystemTime::now());
462     debug!("generate_session_dir_path: timestamp = {}", timestamp);
463     let random_number = thread_rng().next_u32();
464     debug!("generate_session_dir_path: random_number = {}", random_number);
465
466     let directory_name = format!("s-{}-{}-working",
467                                   timestamp,
468                                   base_n::encode(random_number as u128,
469                                                  INT_ENCODE_BASE));
470     debug!("generate_session_dir_path: directory_name = {}", directory_name);
471     let directory_path = crate_dir.join(directory_name);
472     debug!("generate_session_dir_path: directory_path = {}", directory_path.display());
473     directory_path
474 }
475
476 fn create_dir(sess: &Session, path: &Path, dir_tag: &str) -> Result<(),()> {
477     match std_fs::create_dir_all(path) {
478         Ok(()) => {
479             debug!("{} directory created successfully", dir_tag);
480             Ok(())
481         }
482         Err(err) => {
483             sess.err(&format!("Could not create incremental compilation {} \
484                                directory `{}`: {}",
485                               dir_tag,
486                               path.display(),
487                               err));
488             Err(())
489         }
490     }
491 }
492
493 /// Allocate a the lock-file and lock it.
494 fn lock_directory(sess: &Session,
495                   session_dir: &Path)
496                   -> Result<(flock::Lock, PathBuf), ()> {
497     let lock_file_path = lock_file_path(session_dir);
498     debug!("lock_directory() - lock_file: {}", lock_file_path.display());
499
500     match flock::Lock::new(&lock_file_path,
501                            false, // don't wait
502                            true,  // create the lock file
503                            true) { // the lock should be exclusive
504         Ok(lock) => Ok((lock, lock_file_path)),
505         Err(err) => {
506             sess.err(&format!("incremental compilation: could not create \
507                                session directory lock file: {}", err));
508             Err(())
509         }
510     }
511 }
512
513 fn delete_session_dir_lock_file(sess: &Session,
514                                 lock_file_path: &Path) {
515     if let Err(err) = safe_remove_file(&lock_file_path) {
516         sess.warn(&format!("Error deleting lock file for incremental \
517                             compilation session directory `{}`: {}",
518                            lock_file_path.display(),
519                            err));
520     }
521 }
522
523 /// Find the most recent published session directory that is not in the
524 /// ignore-list.
525 fn find_source_directory(crate_dir: &Path,
526                          source_directories_already_tried: &FxHashSet<PathBuf>)
527                          -> Option<PathBuf> {
528     let iter = crate_dir.read_dir()
529                         .unwrap() // FIXME
530                         .filter_map(|e| e.ok().map(|e| e.path()));
531
532     find_source_directory_in_iter(iter, source_directories_already_tried)
533 }
534
535 fn find_source_directory_in_iter<I>(iter: I,
536                                     source_directories_already_tried: &FxHashSet<PathBuf>)
537                                     -> Option<PathBuf>
538     where I: Iterator<Item=PathBuf>
539 {
540     let mut best_candidate = (UNIX_EPOCH, None);
541
542     for session_dir in iter {
543         debug!("find_source_directory_in_iter - inspecting `{}`",
544                session_dir.display());
545
546         let directory_name = session_dir.file_name().unwrap().to_string_lossy();
547         assert_no_characters_lost(&directory_name);
548
549         if source_directories_already_tried.contains(&session_dir) ||
550            !is_session_directory(&directory_name) ||
551            !is_finalized(&directory_name) {
552             debug!("find_source_directory_in_iter - ignoring.");
553             continue
554         }
555
556         let timestamp = extract_timestamp_from_session_dir(&directory_name)
557             .unwrap_or_else(|_| {
558                 bug!("unexpected incr-comp session dir: {}", session_dir.display())
559             });
560
561         if timestamp > best_candidate.0 {
562             best_candidate = (timestamp, Some(session_dir.clone()));
563         }
564     }
565
566     best_candidate.1
567 }
568
569 fn is_finalized(directory_name: &str) -> bool {
570     !directory_name.ends_with("-working")
571 }
572
573 fn is_session_directory(directory_name: &str) -> bool {
574     directory_name.starts_with("s-") &&
575     !directory_name.ends_with(LOCK_FILE_EXT)
576 }
577
578 fn is_session_directory_lock_file(file_name: &str) -> bool {
579     file_name.starts_with("s-") && file_name.ends_with(LOCK_FILE_EXT)
580 }
581
582 fn extract_timestamp_from_session_dir(directory_name: &str)
583                                       -> Result<SystemTime, ()> {
584     if !is_session_directory(directory_name) {
585         return Err(())
586     }
587
588     let dash_indices: Vec<_> = directory_name.match_indices("-")
589                                              .map(|(idx, _)| idx)
590                                              .collect();
591     if dash_indices.len() != 3 {
592         return Err(())
593     }
594
595     string_to_timestamp(&directory_name[dash_indices[0]+1 .. dash_indices[1]])
596 }
597
598 fn timestamp_to_string(timestamp: SystemTime) -> String {
599     let duration = timestamp.duration_since(UNIX_EPOCH).unwrap();
600     let micros = duration.as_secs() * 1_000_000 +
601                 (duration.subsec_nanos() as u64) / 1000;
602     base_n::encode(micros as u128, INT_ENCODE_BASE)
603 }
604
605 fn string_to_timestamp(s: &str) -> Result<SystemTime, ()> {
606     let micros_since_unix_epoch = u64::from_str_radix(s, 36);
607
608     if micros_since_unix_epoch.is_err() {
609         return Err(())
610     }
611
612     let micros_since_unix_epoch = micros_since_unix_epoch.unwrap();
613
614     let duration = Duration::new(micros_since_unix_epoch / 1_000_000,
615                                  1000 * (micros_since_unix_epoch % 1_000_000) as u32);
616     Ok(UNIX_EPOCH + duration)
617 }
618
619 fn crate_path(sess: &Session,
620               crate_name: &str,
621               crate_disambiguator: CrateDisambiguator)
622               -> PathBuf {
623
624     let incr_dir = sess.opts.incremental.as_ref().unwrap().clone();
625
626     // The full crate disambiguator is really long. 64 bits of it should be
627     // sufficient.
628     let crate_disambiguator = crate_disambiguator.to_fingerprint().to_smaller_hash();
629     let crate_disambiguator = base_n::encode(crate_disambiguator as u128,
630                                              INT_ENCODE_BASE);
631
632     let crate_name = format!("{}-{}", crate_name, crate_disambiguator);
633     incr_dir.join(crate_name)
634 }
635
636 fn assert_no_characters_lost(s: &str) {
637     if s.contains('\u{FFFD}') {
638         bug!("Could not losslessly convert '{}'.", s)
639     }
640 }
641
642 fn is_old_enough_to_be_collected(timestamp: SystemTime) -> bool {
643     timestamp < SystemTime::now() - Duration::from_secs(10)
644 }
645
646 pub fn garbage_collect_session_directories(sess: &Session) -> io::Result<()> {
647     debug!("garbage_collect_session_directories() - begin");
648
649     let session_directory = sess.incr_comp_session_dir();
650     debug!("garbage_collect_session_directories() - session directory: {}",
651         session_directory.display());
652
653     let crate_directory = session_directory.parent().unwrap();
654     debug!("garbage_collect_session_directories() - crate directory: {}",
655         crate_directory.display());
656
657     // First do a pass over the crate directory, collecting lock files and
658     // session directories
659     let mut session_directories = FxHashSet();
660     let mut lock_files = FxHashSet();
661
662     for dir_entry in try!(crate_directory.read_dir()) {
663         let dir_entry = match dir_entry {
664             Ok(dir_entry) => dir_entry,
665             _ => {
666                 // Ignore any errors
667                 continue
668             }
669         };
670
671         let entry_name = dir_entry.file_name();
672         let entry_name = entry_name.to_string_lossy();
673
674         if is_session_directory_lock_file(&entry_name) {
675             assert_no_characters_lost(&entry_name);
676             lock_files.insert(entry_name.into_owned());
677         } else if is_session_directory(&entry_name) {
678             assert_no_characters_lost(&entry_name);
679             session_directories.insert(entry_name.into_owned());
680         } else {
681             // This is something we don't know, leave it alone
682         }
683     }
684
685     // Now map from lock files to session directories
686     let lock_file_to_session_dir: FxHashMap<String, Option<String>> =
687         lock_files.into_iter()
688                   .map(|lock_file_name| {
689                         assert!(lock_file_name.ends_with(LOCK_FILE_EXT));
690                         let dir_prefix_end = lock_file_name.len() - LOCK_FILE_EXT.len();
691                         let session_dir = {
692                             let dir_prefix = &lock_file_name[0 .. dir_prefix_end];
693                             session_directories.iter()
694                                                .find(|dir_name| dir_name.starts_with(dir_prefix))
695                         };
696                         (lock_file_name, session_dir.map(String::clone))
697                     })
698                   .collect();
699
700     // Delete all lock files, that don't have an associated directory. They must
701     // be some kind of leftover
702     for (lock_file_name, directory_name) in &lock_file_to_session_dir {
703         if directory_name.is_none() {
704             let timestamp = match extract_timestamp_from_session_dir(lock_file_name) {
705                 Ok(timestamp) => timestamp,
706                 Err(()) => {
707                     debug!("Found lock-file with malformed timestamp: {}",
708                         crate_directory.join(&lock_file_name).display());
709                     // Ignore it
710                     continue
711                 }
712             };
713
714             let lock_file_path = crate_directory.join(&**lock_file_name);
715
716             if is_old_enough_to_be_collected(timestamp) {
717                 debug!("garbage_collect_session_directories() - deleting \
718                         garbage lock file: {}", lock_file_path.display());
719                 delete_session_dir_lock_file(sess, &lock_file_path);
720             } else {
721                 debug!("garbage_collect_session_directories() - lock file with \
722                         no session dir not old enough to be collected: {}",
723                        lock_file_path.display());
724             }
725         }
726     }
727
728     // Filter out `None` directories
729     let lock_file_to_session_dir: FxHashMap<String, String> =
730         lock_file_to_session_dir.into_iter()
731                                 .filter_map(|(lock_file_name, directory_name)| {
732                                     directory_name.map(|n| (lock_file_name, n))
733                                 })
734                                 .collect();
735
736     let mut deletion_candidates = vec![];
737     let mut definitely_delete = vec![];
738
739     for (lock_file_name, directory_name) in &lock_file_to_session_dir {
740         debug!("garbage_collect_session_directories() - inspecting: {}",
741                 directory_name);
742
743         let timestamp = match extract_timestamp_from_session_dir(directory_name) {
744             Ok(timestamp) => timestamp,
745             Err(()) => {
746                 debug!("Found session-dir with malformed timestamp: {}",
747                         crate_directory.join(directory_name).display());
748                 // Ignore it
749                 continue
750             }
751         };
752
753         if is_finalized(directory_name) {
754             let lock_file_path = crate_directory.join(lock_file_name);
755             match flock::Lock::new(&lock_file_path,
756                                    false,  // don't wait
757                                    false,  // don't create the lock-file
758                                    true) { // get an exclusive lock
759                 Ok(lock) => {
760                     debug!("garbage_collect_session_directories() - \
761                             successfully acquired lock");
762                     debug!("garbage_collect_session_directories() - adding \
763                             deletion candidate: {}", directory_name);
764
765                     // Note that we are holding on to the lock
766                     deletion_candidates.push((timestamp,
767                                               crate_directory.join(directory_name),
768                                               Some(lock)));
769                 }
770                 Err(_) => {
771                     debug!("garbage_collect_session_directories() - \
772                             not collecting, still in use");
773                 }
774             }
775         } else if is_old_enough_to_be_collected(timestamp) {
776             // When cleaning out "-working" session directories, i.e.
777             // session directories that might still be in use by another
778             // compiler instance, we only look a directories that are
779             // at least ten seconds old. This is supposed to reduce the
780             // chance of deleting a directory in the time window where
781             // the process has allocated the directory but has not yet
782             // acquired the file-lock on it.
783
784             // Try to acquire the directory lock. If we can't, it
785             // means that the owning process is still alive and we
786             // leave this directory alone.
787             let lock_file_path = crate_directory.join(lock_file_name);
788             match flock::Lock::new(&lock_file_path,
789                                    false,  // don't wait
790                                    false,  // don't create the lock-file
791                                    true) { // get an exclusive lock
792                 Ok(lock) => {
793                     debug!("garbage_collect_session_directories() - \
794                             successfully acquired lock");
795
796                     // Note that we are holding on to the lock
797                     definitely_delete.push((crate_directory.join(directory_name),
798                                             Some(lock)));
799                 }
800                 Err(_) => {
801                     debug!("garbage_collect_session_directories() - \
802                             not collecting, still in use");
803                 }
804             }
805         } else {
806             debug!("garbage_collect_session_directories() - not finalized, not \
807                     old enough");
808         }
809     }
810
811     // Delete all but the most recent of the candidates
812     for (path, lock) in all_except_most_recent(deletion_candidates) {
813         debug!("garbage_collect_session_directories() - deleting `{}`",
814                 path.display());
815
816         if let Err(err) = safe_remove_dir_all(&path) {
817             sess.warn(&format!("Failed to garbage collect finalized incremental \
818                                 compilation session directory `{}`: {}",
819                                path.display(),
820                                err));
821         } else {
822             delete_session_dir_lock_file(sess, &lock_file_path(&path));
823         }
824
825
826         // Let's make it explicit that the file lock is released at this point,
827         // or rather, that we held on to it until here
828         mem::drop(lock);
829     }
830
831     for (path, lock) in definitely_delete {
832         debug!("garbage_collect_session_directories() - deleting `{}`",
833                 path.display());
834
835         if let Err(err) = safe_remove_dir_all(&path) {
836             sess.warn(&format!("Failed to garbage collect incremental \
837                                 compilation session directory `{}`: {}",
838                                path.display(),
839                                err));
840         } else {
841             delete_session_dir_lock_file(sess, &lock_file_path(&path));
842         }
843
844         // Let's make it explicit that the file lock is released at this point,
845         // or rather, that we held on to it until here
846         mem::drop(lock);
847     }
848
849     Ok(())
850 }
851
852 fn all_except_most_recent(deletion_candidates: Vec<(SystemTime, PathBuf, Option<flock::Lock>)>)
853                           -> FxHashMap<PathBuf, Option<flock::Lock>> {
854     let most_recent = deletion_candidates.iter()
855                                          .map(|&(timestamp, ..)| timestamp)
856                                          .max();
857
858     if let Some(most_recent) = most_recent {
859         deletion_candidates.into_iter()
860                            .filter(|&(timestamp, ..)| timestamp != most_recent)
861                            .map(|(_, path, lock)| (path, lock))
862                            .collect()
863     } else {
864         FxHashMap()
865     }
866 }
867
868 /// Since paths of artifacts within session directories can get quite long, we
869 /// need to support deleting files with very long paths. The regular
870 /// WinApi functions only support paths up to 260 characters, however. In order
871 /// to circumvent this limitation, we canonicalize the path of the directory
872 /// before passing it to std::fs::remove_dir_all(). This will convert the path
873 /// into the '\\?\' format, which supports much longer paths.
874 fn safe_remove_dir_all(p: &Path) -> io::Result<()> {
875     if p.exists() {
876         let canonicalized = try!(p.canonicalize());
877         std_fs::remove_dir_all(canonicalized)
878     } else {
879         Ok(())
880     }
881 }
882
883 fn safe_remove_file(p: &Path) -> io::Result<()> {
884     if p.exists() {
885         let canonicalized = try!(p.canonicalize());
886         std_fs::remove_file(canonicalized)
887     } else {
888         Ok(())
889     }
890 }
891
892 #[test]
893 fn test_all_except_most_recent() {
894     assert_eq!(all_except_most_recent(
895         vec![
896             (UNIX_EPOCH + Duration::new(4, 0), PathBuf::from("4"), None),
897             (UNIX_EPOCH + Duration::new(1, 0), PathBuf::from("1"), None),
898             (UNIX_EPOCH + Duration::new(5, 0), PathBuf::from("5"), None),
899             (UNIX_EPOCH + Duration::new(3, 0), PathBuf::from("3"), None),
900             (UNIX_EPOCH + Duration::new(2, 0), PathBuf::from("2"), None),
901         ]).keys().cloned().collect::<FxHashSet<PathBuf>>(),
902         vec![
903             PathBuf::from("1"),
904             PathBuf::from("2"),
905             PathBuf::from("3"),
906             PathBuf::from("4"),
907         ].into_iter().collect::<FxHashSet<PathBuf>>()
908     );
909
910     assert_eq!(all_except_most_recent(
911         vec![
912         ]).keys().cloned().collect::<FxHashSet<PathBuf>>(),
913         FxHashSet()
914     );
915 }
916
917 #[test]
918 fn test_timestamp_serialization() {
919     for i in 0 .. 1_000u64 {
920         let time = UNIX_EPOCH + Duration::new(i * 1_434_578, (i as u32) * 239_000);
921         let s = timestamp_to_string(time);
922         assert_eq!(Ok(time), string_to_timestamp(&s));
923     }
924 }
925
926 #[test]
927 fn test_find_source_directory_in_iter() {
928     let already_visited = FxHashSet();
929
930     // Find newest
931     assert_eq!(find_source_directory_in_iter(
932         vec![PathBuf::from("crate-dir/s-3234-0000-svh"),
933              PathBuf::from("crate-dir/s-2234-0000-svh"),
934              PathBuf::from("crate-dir/s-1234-0000-svh")].into_iter(), &already_visited),
935         Some(PathBuf::from("crate-dir/s-3234-0000-svh")));
936
937     // Filter out "-working"
938     assert_eq!(find_source_directory_in_iter(
939         vec![PathBuf::from("crate-dir/s-3234-0000-working"),
940              PathBuf::from("crate-dir/s-2234-0000-svh"),
941              PathBuf::from("crate-dir/s-1234-0000-svh")].into_iter(), &already_visited),
942         Some(PathBuf::from("crate-dir/s-2234-0000-svh")));
943
944     // Handle empty
945     assert_eq!(find_source_directory_in_iter(vec![].into_iter(), &already_visited),
946                None);
947
948     // Handle only working
949     assert_eq!(find_source_directory_in_iter(
950         vec![PathBuf::from("crate-dir/s-3234-0000-working"),
951              PathBuf::from("crate-dir/s-2234-0000-working"),
952              PathBuf::from("crate-dir/s-1234-0000-working")].into_iter(), &already_visited),
953         None);
954 }