]> git.lizzy.rs Git - rust.git/blob - src/librustc_incremental/persist/fs.rs
Auto merge of #35856 - phimuemue:master, r=brson
[rust.git] / src / librustc_incremental / persist / fs.rs
1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11
12 //! This module manages how the incremental compilation cache is represented in
13 //! the file system.
14 //!
15 //! Incremental compilation caches are managed according to a copy-on-write
16 //! strategy: Once a complete, consistent cache version is finalized, it is
17 //! never modified. Instead, when a subsequent compilation session is started,
18 //! the compiler will allocate a new version of the cache that starts out as
19 //! a copy of the previous version. Then only this new copy is modified and it
20 //! will not be visible to other processes until it is finalized. This ensures
21 //! that multiple compiler processes can be executed concurrently for the same
22 //! crate without interfering with each other or blocking each other.
23 //!
24 //! More concretely this is implemented via the following protocol:
25 //!
26 //! 1. For a newly started compilation session, the compiler allocates a
27 //!    new `session` directory within the incremental compilation directory.
28 //!    This session directory will have a unique name that ends with the suffix
29 //!    "-working" and that contains a creation timestamp.
30 //! 2. Next, the compiler looks for the newest finalized session directory,
31 //!    that is, a session directory from a previous compilation session that
32 //!    has been marked as valid and consistent. A session directory is
33 //!    considered finalized if the "-working" suffix in the directory name has
34 //!    been replaced by the SVH of the crate.
35 //! 3. Once the compiler has found a valid, finalized session directory, it will
36 //!    hard-link/copy its contents into the new "-working" directory. If all
37 //!    goes well, it will have its own, private copy of the source directory and
38 //!    subsequently not have to worry about synchronizing with other compiler
39 //!    processes.
40 //! 4. Now the compiler can do its normal compilation process, which involves
41 //!    reading and updating its private session directory.
42 //! 5. When compilation finishes without errors, the private session directory
43 //!    will be in a state where it can be used as input for other compilation
44 //!    sessions. That is, it will contain a dependency graph and cache artifacts
45 //!    that are consistent with the state of the source code it was compiled
46 //!    from, with no need to change them ever again. At this point, the compiler
47 //!    finalizes and "publishes" its private session directory by renaming it
48 //!    from "s-{timestamp}-{random}-working" to "s-{timestamp}-{SVH}".
49 //! 6. At this point the "old" session directory that we copied our data from
50 //!    at the beginning of the session has become obsolete because we have just
51 //!    published a more current version. Thus the compiler will delete it.
52 //!
53 //! ## Garbage Collection
54 //!
55 //! Naively following the above protocol might lead to old session directories
56 //! piling up if a compiler instance crashes for some reason before its able to
57 //! remove its private session directory. In order to avoid wasting disk space,
58 //! the compiler also does some garbage collection each time it is started in
59 //! incremental compilation mode. Specifically, it will scan the incremental
60 //! compilation directory for private session directories that are not in use
61 //! any more and will delete those. It will also delete any finalized session
62 //! directories for a given crate except for the most recent one.
63 //!
64 //! ## Synchronization
65 //!
66 //! There is some synchronization needed in order for the compiler to be able to
67 //! determine whether a given private session directory is not in used any more.
68 //! This is done by creating a lock file for each session directory and
69 //! locking it while the directory is still being used. Since file locks have
70 //! operating system support, we can rely on the lock being released if the
71 //! compiler process dies for some unexpected reason. Thus, when garbage
72 //! collecting private session directories, the collecting process can determine
73 //! whether the directory is still in use by trying to acquire a lock on the
74 //! file. If locking the file fails, the original process must still be alive.
75 //! If locking the file succeeds, we know that the owning process is not alive
76 //! any more and we can safely delete the directory.
77 //! There is still a small time window between the original process creating the
78 //! lock file and actually locking it. In order to minimize the chance that
79 //! another process tries to acquire the lock in just that instance, only
80 //! session directories that are older than a few seconds are considered for
81 //! garbage collection.
82 //!
83 //! Another case that has to be considered is what happens if one process
84 //! deletes a finalized session directory that another process is currently
85 //! trying to copy from. This case is also handled via the lock file. Before
86 //! a process starts copying a finalized session directory, it will acquire a
87 //! shared lock on the directory's lock file. Any garbage collecting process,
88 //! on the other hand, will acquire an exclusive lock on the lock file.
89 //! Thus, if a directory is being collected, any reader process will fail
90 //! acquiring the shared lock and will leave the directory alone. Conversely,
91 //! if a collecting process can't acquire the exclusive lock because the
92 //! directory is currently being read from, it will leave collecting that
93 //! directory to another process at a later point in time.
94 //! The exact same scheme is also used when reading the metadata hashes file
95 //! from an extern crate. When a crate is compiled, the hash values of its
96 //! metadata are stored in a file in its session directory. When the
97 //! compilation session of another crate imports the first crate's metadata,
98 //! it also has to read in the accompanying metadata hashes. It thus will access
99 //! the finalized session directory of all crates it links to and while doing
100 //! so, it will also place a read lock on that the respective session directory
101 //! so that it won't be deleted while the metadata hashes are loaded.
102 //!
103 //! ## Preconditions
104 //!
105 //! This system relies on two features being available in the file system in
106 //! order to work really well: file locking and hard linking.
107 //! If hard linking is not available (like on FAT) the data in the cache
108 //! actually has to be copied at the beginning of each session.
109 //! If file locking does not work reliably (like on NFS), some of the
110 //! synchronization will go haywire.
111 //! In both cases we recommend to locate the incremental compilation directory
112 //! on a file system that supports these things.
113 //! It might be a good idea though to try and detect whether we are on an
114 //! unsupported file system and emit a warning in that case. This is not yet
115 //! implemented.
116
117 use rustc::hir::svh::Svh;
118 use rustc::middle::cstore::LOCAL_CRATE;
119 use rustc::session::Session;
120 use rustc::ty::TyCtxt;
121 use rustc::util::fs as fs_util;
122 use rustc_data_structures::flock;
123 use rustc_data_structures::fnv::{FnvHashSet, FnvHashMap};
124
125 use std::ffi::OsString;
126 use std::fs as std_fs;
127 use std::io;
128 use std::mem;
129 use std::path::{Path, PathBuf};
130 use std::time::{UNIX_EPOCH, SystemTime, Duration};
131 use std::__rand::{thread_rng, Rng};
132 use syntax::ast;
133
134 const LOCK_FILE_EXT: &'static str = ".lock";
135 const DEP_GRAPH_FILENAME: &'static str = "dep-graph.bin";
136 const WORK_PRODUCTS_FILENAME: &'static str = "work-products.bin";
137 const METADATA_HASHES_FILENAME: &'static str = "metadata.bin";
138
139 pub fn dep_graph_path(sess: &Session) -> PathBuf {
140     in_incr_comp_dir_sess(sess, DEP_GRAPH_FILENAME)
141 }
142
143 pub fn work_products_path(sess: &Session) -> PathBuf {
144     in_incr_comp_dir_sess(sess, WORK_PRODUCTS_FILENAME)
145 }
146
147 pub fn metadata_hash_export_path(sess: &Session) -> PathBuf {
148     in_incr_comp_dir_sess(sess, METADATA_HASHES_FILENAME)
149 }
150
151 pub fn metadata_hash_import_path(import_session_dir: &Path) -> PathBuf {
152     import_session_dir.join(METADATA_HASHES_FILENAME)
153 }
154
155 pub fn lock_file_path(session_dir: &Path) -> PathBuf {
156     let crate_dir = session_dir.parent().unwrap();
157
158     let directory_name = session_dir.file_name().unwrap().to_string_lossy();
159     assert_no_characters_lost(&directory_name);
160
161     let dash_indices: Vec<_> = directory_name.match_indices("-")
162                                              .map(|(idx, _)| idx)
163                                              .collect();
164     if dash_indices.len() != 3 {
165         bug!("Encountered incremental compilation session directory with \
166               malformed name: {}",
167              session_dir.display())
168     }
169
170     crate_dir.join(&directory_name[0 .. dash_indices[2]])
171              .with_extension(&LOCK_FILE_EXT[1..])
172 }
173
174 pub fn in_incr_comp_dir_sess(sess: &Session, file_name: &str) -> PathBuf {
175     in_incr_comp_dir(&sess.incr_comp_session_dir(), file_name)
176 }
177
178 pub fn in_incr_comp_dir(incr_comp_session_dir: &Path, file_name: &str) -> PathBuf {
179     incr_comp_session_dir.join(file_name)
180 }
181
182 /// Allocates the private session directory. The boolean in the Ok() result
183 /// indicates whether we should try loading a dep graph from the successfully
184 /// initialized directory, or not.
185 /// The post-condition of this fn is that we have a valid incremental
186 /// compilation session directory, if the result is `Ok`. A valid session
187 /// directory is one that contains a locked lock file. It may or may not contain
188 /// a dep-graph and work products from a previous session.
189 /// If the call fails, the fn may leave behind an invalid session directory.
190 /// The garbage collection will take care of it.
191 pub fn prepare_session_directory(tcx: TyCtxt) -> Result<bool, ()> {
192     debug!("prepare_session_directory");
193
194     // {incr-comp-dir}/{crate-name-and-disambiguator}
195     let crate_dir = crate_path_tcx(tcx, LOCAL_CRATE);
196     debug!("crate-dir: {}", crate_dir.display());
197     try!(create_dir(tcx.sess, &crate_dir, "crate"));
198
199     let mut source_directories_already_tried = FnvHashSet();
200
201     loop {
202         // Generate a session directory of the form:
203         //
204         // {incr-comp-dir}/{crate-name-and-disambiguator}/s-{timestamp}-{random}-working
205         let session_dir = generate_session_dir_path(&crate_dir);
206         debug!("session-dir: {}", session_dir.display());
207
208         // Lock the new session directory. If this fails, return an
209         // error without retrying
210         let (directory_lock, lock_file_path) = try!(lock_directory(tcx.sess, &session_dir));
211
212         // Now that we have the lock, we can actually create the session
213         // directory
214         try!(create_dir(tcx.sess, &session_dir, "session"));
215
216         // Find a suitable source directory to copy from. Ignore those that we
217         // have already tried before.
218         let source_directory = find_source_directory(&crate_dir,
219                                                      &source_directories_already_tried);
220
221         let source_directory = if let Some(dir) = source_directory {
222             dir
223         } else {
224             // There's nowhere to copy from, we're done
225             debug!("no source directory found. Continuing with empty session \
226                     directory.");
227
228             tcx.sess.init_incr_comp_session(session_dir, directory_lock);
229             return Ok(false)
230         };
231
232         debug!("attempting to copy data from source: {}",
233                source_directory.display());
234
235         let print_file_copy_stats = tcx.sess.opts.debugging_opts.incremental_info;
236
237         // Try copying over all files from the source directory
238         if copy_files(&session_dir, &source_directory, print_file_copy_stats).is_ok() {
239             debug!("successfully copied data from: {}",
240                    source_directory.display());
241
242             tcx.sess.init_incr_comp_session(session_dir, directory_lock);
243             return Ok(true)
244         } else {
245              debug!("copying failed - trying next directory");
246
247             // Something went wrong while trying to copy/link files from the
248             // source directory. Try again with a different one.
249             source_directories_already_tried.insert(source_directory);
250
251             // Try to remove the session directory we just allocated. We don't
252             // know if there's any garbage in it from the failed copy action.
253             if let Err(err) = safe_remove_dir_all(&session_dir) {
254                 tcx.sess.warn(&format!("Failed to delete partly initialized \
255                                         session dir `{}`: {}",
256                                        session_dir.display(),
257                                        err));
258             }
259
260             delete_session_dir_lock_file(tcx.sess, &lock_file_path);
261             mem::drop(directory_lock);
262         }
263     }
264 }
265
266
267 /// This function finalizes and thus 'publishes' the session directory by
268 /// renaming it to `s-{timestamp}-{svh}` and releasing the file lock.
269 /// If there have been compilation errors, however, this function will just
270 /// delete the presumably invalid session directory.
271 pub fn finalize_session_directory(sess: &Session, svh: Svh) {
272     if sess.opts.incremental.is_none() {
273         return;
274     }
275
276     let incr_comp_session_dir: PathBuf = sess.incr_comp_session_dir().clone();
277
278     if sess.has_errors() {
279         // If there have been any errors during compilation, we don't want to
280         // publish this session directory. Rather, we'll just delete it.
281
282         debug!("finalize_session_directory() - invalidating session directory: {}",
283                 incr_comp_session_dir.display());
284
285         if let Err(err) = safe_remove_dir_all(&*incr_comp_session_dir) {
286             sess.warn(&format!("Error deleting incremental compilation \
287                                 session directory `{}`: {}",
288                                incr_comp_session_dir.display(),
289                                err));
290         }
291
292         let lock_file_path = lock_file_path(&*incr_comp_session_dir);
293         delete_session_dir_lock_file(sess, &lock_file_path);
294         sess.mark_incr_comp_session_as_invalid();
295     }
296
297     debug!("finalize_session_directory() - session directory: {}",
298             incr_comp_session_dir.display());
299
300     let old_sub_dir_name = incr_comp_session_dir.file_name()
301                                                 .unwrap()
302                                                 .to_string_lossy();
303     assert_no_characters_lost(&old_sub_dir_name);
304
305     // Keep the 's-{timestamp}-{random-number}' prefix, but replace the
306     // '-working' part with the SVH of the crate
307     let dash_indices: Vec<_> = old_sub_dir_name.match_indices("-")
308                                                .map(|(idx, _)| idx)
309                                                .collect();
310     if dash_indices.len() != 3 {
311         bug!("Encountered incremental compilation session directory with \
312               malformed name: {}",
313              incr_comp_session_dir.display())
314     }
315
316     // State: "s-{timestamp}-{random-number}-"
317     let mut new_sub_dir_name = String::from(&old_sub_dir_name[.. dash_indices[2] + 1]);
318
319     // Append the svh
320     new_sub_dir_name.push_str(&encode_base_36(svh.as_u64()));
321
322     // Create the full path
323     let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name);
324     debug!("finalize_session_directory() - new path: {}", new_path.display());
325
326     match std_fs::rename(&*incr_comp_session_dir, &new_path) {
327         Ok(_) => {
328             debug!("finalize_session_directory() - directory renamed successfully");
329
330             // This unlocks the directory
331             sess.finalize_incr_comp_session(new_path);
332         }
333         Err(e) => {
334             // Warn about the error. However, no need to abort compilation now.
335             sess.warn(&format!("Error finalizing incremental compilation \
336                                session directory `{}`: {}",
337                                incr_comp_session_dir.display(),
338                                e));
339
340             debug!("finalize_session_directory() - error, marking as invalid");
341             // Drop the file lock, so we can garage collect
342             sess.mark_incr_comp_session_as_invalid();
343         }
344     }
345
346     let _ = garbage_collect_session_directories(sess);
347 }
348
349 fn copy_files(target_dir: &Path,
350               source_dir: &Path,
351               print_stats_on_success: bool)
352               -> Result<(), ()> {
353     // We acquire a shared lock on the lock file of the directory, so that
354     // nobody deletes it out from under us while we are reading from it.
355     let lock_file_path = lock_file_path(source_dir);
356     let _lock = if let Ok(lock) = flock::Lock::new(&lock_file_path,
357                                                    false,   // don't wait,
358                                                    false,   // don't create
359                                                    false) { // not exclusive
360         lock
361     } else {
362         // Could not acquire the lock, don't try to copy from here
363         return Err(())
364     };
365
366     let source_dir_iterator = match source_dir.read_dir() {
367         Ok(it) => it,
368         Err(_) => return Err(())
369     };
370
371     let mut files_linked = 0;
372     let mut files_copied = 0;
373
374     for entry in source_dir_iterator {
375         match entry {
376             Ok(entry) => {
377                 let file_name = entry.file_name();
378
379                 let target_file_path = target_dir.join(file_name);
380                 let source_path = entry.path();
381
382                 debug!("copying into session dir: {}", source_path.display());
383                 match fs_util::link_or_copy(source_path, target_file_path) {
384                     Ok(fs_util::LinkOrCopy::Link) => {
385                         files_linked += 1
386                     }
387                     Ok(fs_util::LinkOrCopy::Copy) => {
388                         files_copied += 1
389                     }
390                     Err(_) => return Err(())
391                 }
392             }
393             Err(_) => {
394                 return Err(())
395             }
396         }
397     }
398
399     if print_stats_on_success {
400         println!("incr. comp. session directory: {} files hard-linked", files_linked);
401         println!("incr. comp. session directory: {} files copied", files_copied);
402     }
403
404     Ok(())
405 }
406
407 /// Generate unique directory path of the form:
408 /// {crate_dir}/s-{timestamp}-{random-number}-working
409 fn generate_session_dir_path(crate_dir: &Path) -> PathBuf {
410     let timestamp = timestamp_to_string(SystemTime::now());
411     debug!("generate_session_dir_path: timestamp = {}", timestamp);
412     let random_number = thread_rng().next_u32();
413     debug!("generate_session_dir_path: random_number = {}", random_number);
414
415     let directory_name = format!("s-{}-{}-working",
416                                   timestamp,
417                                   encode_base_36(random_number as u64));
418     debug!("generate_session_dir_path: directory_name = {}", directory_name);
419     let directory_path = crate_dir.join(directory_name);
420     debug!("generate_session_dir_path: directory_path = {}", directory_path.display());
421     directory_path
422 }
423
424 fn create_dir(sess: &Session, path: &Path, dir_tag: &str) -> Result<(),()> {
425     match fs_util::create_dir_racy(path) {
426         Ok(()) => {
427             debug!("{} directory created successfully", dir_tag);
428             Ok(())
429         }
430         Err(err) => {
431             sess.err(&format!("Could not create incremental compilation {} \
432                                directory `{}`: {}",
433                               dir_tag,
434                               path.display(),
435                               err));
436             Err(())
437         }
438     }
439 }
440
441 /// Allocate a the lock-file and lock it.
442 fn lock_directory(sess: &Session,
443                   session_dir: &Path)
444                   -> Result<(flock::Lock, PathBuf), ()> {
445     let lock_file_path = lock_file_path(session_dir);
446     debug!("lock_directory() - lock_file: {}", lock_file_path.display());
447
448     match flock::Lock::new(&lock_file_path,
449                            false, // don't wait
450                            true,  // create the lock file
451                            true) { // the lock should be exclusive
452         Ok(lock) => Ok((lock, lock_file_path)),
453         Err(err) => {
454             sess.err(&format!("incremental compilation: could not create \
455                                session directory lock file: {}", err));
456             Err(())
457         }
458     }
459 }
460
461 fn delete_session_dir_lock_file(sess: &Session,
462                                 lock_file_path: &Path) {
463     if let Err(err) = safe_remove_file(&lock_file_path) {
464         sess.warn(&format!("Error deleting lock file for incremental \
465                             compilation session directory `{}`: {}",
466                            lock_file_path.display(),
467                            err));
468     }
469 }
470
471 /// Find the most recent published session directory that is not in the
472 /// ignore-list.
473 fn find_source_directory(crate_dir: &Path,
474                          source_directories_already_tried: &FnvHashSet<PathBuf>)
475                          -> Option<PathBuf> {
476     let iter = crate_dir.read_dir()
477                         .unwrap() // FIXME
478                         .filter_map(|e| e.ok().map(|e| e.path()));
479
480     find_source_directory_in_iter(iter, source_directories_already_tried)
481 }
482
483 fn find_source_directory_in_iter<I>(iter: I,
484                                     source_directories_already_tried: &FnvHashSet<PathBuf>)
485                                     -> Option<PathBuf>
486     where I: Iterator<Item=PathBuf>
487 {
488     let mut best_candidate = (UNIX_EPOCH, None);
489
490     for session_dir in iter {
491         debug!("find_source_directory_in_iter - inspecting `{}`",
492                session_dir.display());
493
494         let directory_name = session_dir.file_name().unwrap().to_string_lossy();
495         assert_no_characters_lost(&directory_name);
496
497         if source_directories_already_tried.contains(&session_dir) ||
498            !is_session_directory(&directory_name) ||
499            !is_finalized(&directory_name) {
500             debug!("find_source_directory_in_iter - ignoring.");
501             continue
502         }
503
504         let timestamp = extract_timestamp_from_session_dir(&directory_name)
505             .unwrap_or_else(|_| {
506                 bug!("unexpected incr-comp session dir: {}", session_dir.display())
507             });
508
509         if timestamp > best_candidate.0 {
510             best_candidate = (timestamp, Some(session_dir.clone()));
511         }
512     }
513
514     best_candidate.1
515 }
516
517 fn is_finalized(directory_name: &str) -> bool {
518     !directory_name.ends_with("-working")
519 }
520
521 fn is_session_directory(directory_name: &str) -> bool {
522     directory_name.starts_with("s-") &&
523     !directory_name.ends_with(LOCK_FILE_EXT)
524 }
525
526 fn is_session_directory_lock_file(file_name: &str) -> bool {
527     file_name.starts_with("s-") && file_name.ends_with(LOCK_FILE_EXT)
528 }
529
530 fn extract_timestamp_from_session_dir(directory_name: &str)
531                                       -> Result<SystemTime, ()> {
532     if !is_session_directory(directory_name) {
533         return Err(())
534     }
535
536     let dash_indices: Vec<_> = directory_name.match_indices("-")
537                                              .map(|(idx, _)| idx)
538                                              .collect();
539     if dash_indices.len() != 3 {
540         return Err(())
541     }
542
543     string_to_timestamp(&directory_name[dash_indices[0]+1 .. dash_indices[1]])
544 }
545
546 const BASE_36: &'static [u8] = b"0123456789abcdefghijklmnopqrstuvwxyz";
547
548 fn encode_base_36(mut n: u64) -> String {
549     let mut s = Vec::with_capacity(13);
550     loop {
551         s.push(BASE_36[(n % 36) as usize]);
552         n /= 36;
553
554         if n == 0 {
555             break;
556         }
557     }
558     s.reverse();
559     String::from_utf8(s).unwrap()
560 }
561
562 fn timestamp_to_string(timestamp: SystemTime) -> String {
563     let duration = timestamp.duration_since(UNIX_EPOCH).unwrap();
564     let micros = duration.as_secs() * 1_000_000 +
565                 (duration.subsec_nanos() as u64) / 1000;
566     encode_base_36(micros)
567 }
568
569 fn string_to_timestamp(s: &str) -> Result<SystemTime, ()> {
570     let micros_since_unix_epoch = u64::from_str_radix(s, 36);
571
572     if micros_since_unix_epoch.is_err() {
573         return Err(())
574     }
575
576     let micros_since_unix_epoch = micros_since_unix_epoch.unwrap();
577
578     let duration = Duration::new(micros_since_unix_epoch / 1_000_000,
579                                  1000 * (micros_since_unix_epoch % 1_000_000) as u32);
580     Ok(UNIX_EPOCH + duration)
581 }
582
583 fn crate_path_tcx(tcx: TyCtxt, cnum: ast::CrateNum) -> PathBuf {
584     crate_path(tcx.sess, &tcx.crate_name(cnum), &tcx.crate_disambiguator(cnum))
585 }
586
587 /// Finds the session directory containing the correct metadata hashes file for
588 /// the given crate. In order to do that it has to compute the crate directory
589 /// of the given crate, and in there, look for the session directory with the
590 /// correct SVH in it.
591 /// Note that we have to match on the exact SVH here, not just the
592 /// crate's (name, disambiguator) pair. The metadata hashes are only valid for
593 /// the exact version of the binary we are reading from now (i.e. the hashes
594 /// are part of the dependency graph of a specific compilation session).
595 pub fn find_metadata_hashes_for(tcx: TyCtxt, cnum: ast::CrateNum) -> Option<PathBuf> {
596     let crate_directory = crate_path_tcx(tcx, cnum);
597
598     if !crate_directory.exists() {
599         return None
600     }
601
602     let dir_entries = match crate_directory.read_dir() {
603         Ok(dir_entries) => dir_entries,
604         Err(e) => {
605             tcx.sess
606                .err(&format!("incremental compilation: Could not read crate directory `{}`: {}",
607                              crate_directory.display(), e));
608             return None
609         }
610     };
611
612     let target_svh = tcx.sess.cstore.crate_hash(cnum);
613     let target_svh = encode_base_36(target_svh.as_u64());
614
615     let sub_dir = find_metadata_hashes_iter(&target_svh, dir_entries.filter_map(|e| {
616         e.ok().map(|e| e.file_name().to_string_lossy().into_owned())
617     }));
618
619     sub_dir.map(|sub_dir_name| crate_directory.join(&sub_dir_name))
620 }
621
622 fn find_metadata_hashes_iter<'a, I>(target_svh: &str, iter: I) -> Option<OsString>
623     where I: Iterator<Item=String>
624 {
625     for sub_dir_name in iter {
626         if !is_session_directory(&sub_dir_name) || !is_finalized(&sub_dir_name) {
627             // This is not a usable session directory
628             continue
629         }
630
631         let is_match = if let Some(last_dash_pos) = sub_dir_name.rfind("-") {
632             let candidate_svh = &sub_dir_name[last_dash_pos + 1 .. ];
633             target_svh == candidate_svh
634         } else {
635             // some kind of invalid directory name
636             continue
637         };
638
639         if is_match {
640             return Some(OsString::from(sub_dir_name))
641         }
642     }
643
644     None
645 }
646
647 fn crate_path(sess: &Session,
648               crate_name: &str,
649               crate_disambiguator: &str)
650               -> PathBuf {
651     use std::hash::{SipHasher, Hasher, Hash};
652
653     let incr_dir = sess.opts.incremental.as_ref().unwrap().clone();
654
655     // The full crate disambiguator is really long. A hash of it should be
656     // sufficient.
657     let mut hasher = SipHasher::new();
658     crate_disambiguator.hash(&mut hasher);
659
660     let crate_name = format!("{}-{}", crate_name, encode_base_36(hasher.finish()));
661     incr_dir.join(crate_name)
662 }
663
664 fn assert_no_characters_lost(s: &str) {
665     if s.contains('\u{FFFD}') {
666         bug!("Could not losslessly convert '{}'.", s)
667     }
668 }
669
670 fn is_old_enough_to_be_collected(timestamp: SystemTime) -> bool {
671     timestamp < SystemTime::now() - Duration::from_secs(10)
672 }
673
674 pub fn garbage_collect_session_directories(sess: &Session) -> io::Result<()> {
675     debug!("garbage_collect_session_directories() - begin");
676
677     let session_directory = sess.incr_comp_session_dir();
678     debug!("garbage_collect_session_directories() - session directory: {}",
679         session_directory.display());
680
681     let crate_directory = session_directory.parent().unwrap();
682     debug!("garbage_collect_session_directories() - crate directory: {}",
683         crate_directory.display());
684
685     // First do a pass over the crate directory, collecting lock files and
686     // session directories
687     let mut session_directories = FnvHashSet();
688     let mut lock_files = FnvHashSet();
689
690     for dir_entry in try!(crate_directory.read_dir()) {
691         let dir_entry = match dir_entry {
692             Ok(dir_entry) => dir_entry,
693             _ => {
694                 // Ignore any errors
695                 continue
696             }
697         };
698
699         let entry_name = dir_entry.file_name();
700         let entry_name = entry_name.to_string_lossy();
701
702         if is_session_directory_lock_file(&entry_name) {
703             assert_no_characters_lost(&entry_name);
704             lock_files.insert(entry_name.into_owned());
705         } else if is_session_directory(&entry_name) {
706             assert_no_characters_lost(&entry_name);
707             session_directories.insert(entry_name.into_owned());
708         } else {
709             // This is something we don't know, leave it alone
710         }
711     }
712
713     // Now map from lock files to session directories
714     let lock_file_to_session_dir: FnvHashMap<String, Option<String>> =
715         lock_files.into_iter()
716                   .map(|lock_file_name| {
717                         assert!(lock_file_name.ends_with(LOCK_FILE_EXT));
718                         let dir_prefix_end = lock_file_name.len() - LOCK_FILE_EXT.len();
719                         let session_dir = {
720                             let dir_prefix = &lock_file_name[0 .. dir_prefix_end];
721                             session_directories.iter()
722                                                .find(|dir_name| dir_name.starts_with(dir_prefix))
723                         };
724                         (lock_file_name, session_dir.map(String::clone))
725                     })
726                   .collect();
727
728     // Delete all lock files, that don't have an associated directory. They must
729     // be some kind of leftover
730     for (lock_file_name, directory_name) in &lock_file_to_session_dir {
731         if directory_name.is_none() {
732             let timestamp = match extract_timestamp_from_session_dir(lock_file_name) {
733                 Ok(timestamp) => timestamp,
734                 Err(()) => {
735                     debug!("Found lock-file with malformed timestamp: {}",
736                         crate_directory.join(&lock_file_name).display());
737                     // Ignore it
738                     continue
739                 }
740             };
741
742             let lock_file_path = crate_directory.join(&**lock_file_name);
743
744             if is_old_enough_to_be_collected(timestamp) {
745                 debug!("garbage_collect_session_directories() - deleting \
746                         garbage lock file: {}", lock_file_path.display());
747                 delete_session_dir_lock_file(sess, &lock_file_path);
748             } else {
749                 debug!("garbage_collect_session_directories() - lock file with \
750                         no session dir not old enough to be collected: {}",
751                        lock_file_path.display());
752             }
753         }
754     }
755
756     // Filter out `None` directories
757     let lock_file_to_session_dir: FnvHashMap<String, String> =
758         lock_file_to_session_dir.into_iter()
759                                 .filter_map(|(lock_file_name, directory_name)| {
760                                     directory_name.map(|n| (lock_file_name, n))
761                                 })
762                                 .collect();
763
764     let mut deletion_candidates = vec![];
765     let mut definitely_delete = vec![];
766
767     for (lock_file_name, directory_name) in &lock_file_to_session_dir {
768         debug!("garbage_collect_session_directories() - inspecting: {}",
769                 directory_name);
770
771         let timestamp = match extract_timestamp_from_session_dir(directory_name) {
772             Ok(timestamp) => timestamp,
773             Err(()) => {
774                 debug!("Found session-dir with malformed timestamp: {}",
775                         crate_directory.join(directory_name).display());
776                 // Ignore it
777                 continue
778             }
779         };
780
781         if is_finalized(directory_name) {
782             let lock_file_path = crate_directory.join(lock_file_name);
783             match flock::Lock::new(&lock_file_path,
784                                    false,  // don't wait
785                                    false,  // don't create the lock-file
786                                    true) { // get an exclusive lock
787                 Ok(lock) => {
788                     debug!("garbage_collect_session_directories() - \
789                             successfully acquired lock");
790                     debug!("garbage_collect_session_directories() - adding \
791                             deletion candidate: {}", directory_name);
792
793                     // Note that we are holding on to the lock
794                     deletion_candidates.push((timestamp,
795                                               crate_directory.join(directory_name),
796                                               Some(lock)));
797                 }
798                 Err(_) => {
799                     debug!("garbage_collect_session_directories() - \
800                             not collecting, still in use");
801                 }
802             }
803         } else if is_old_enough_to_be_collected(timestamp) {
804             // When cleaning out "-working" session directories, i.e.
805             // session directories that might still be in use by another
806             // compiler instance, we only look a directories that are
807             // at least ten seconds old. This is supposed to reduce the
808             // chance of deleting a directory in the time window where
809             // the process has allocated the directory but has not yet
810             // acquired the file-lock on it.
811
812             // Try to acquire the directory lock. If we can't, it
813             // means that the owning process is still alive and we
814             // leave this directory alone.
815             let lock_file_path = crate_directory.join(lock_file_name);
816             match flock::Lock::new(&lock_file_path,
817                                    false,  // don't wait
818                                    false,  // don't create the lock-file
819                                    true) { // get an exclusive lock
820                 Ok(lock) => {
821                     debug!("garbage_collect_session_directories() - \
822                             successfully acquired lock");
823
824                     // Note that we are holding on to the lock
825                     definitely_delete.push((crate_directory.join(directory_name),
826                                             Some(lock)));
827                 }
828                 Err(_) => {
829                     debug!("garbage_collect_session_directories() - \
830                             not collecting, still in use");
831                 }
832             }
833         } else {
834             debug!("garbage_collect_session_directories() - not finalized, not \
835                     old enough");
836         }
837     }
838
839     // Delete all but the most recent of the candidates
840     for (path, lock) in all_except_most_recent(deletion_candidates) {
841         debug!("garbage_collect_session_directories() - deleting `{}`",
842                 path.display());
843
844         if let Err(err) = safe_remove_dir_all(&path) {
845             sess.warn(&format!("Failed to garbage collect finalized incremental \
846                                 compilation session directory `{}`: {}",
847                                path.display(),
848                                err));
849         } else {
850             delete_session_dir_lock_file(sess, &lock_file_path(&path));
851         }
852
853
854         // Let's make it explicit that the file lock is released at this point,
855         // or rather, that we held on to it until here
856         mem::drop(lock);
857     }
858
859     for (path, lock) in definitely_delete {
860         debug!("garbage_collect_session_directories() - deleting `{}`",
861                 path.display());
862
863         if let Err(err) = safe_remove_dir_all(&path) {
864             sess.warn(&format!("Failed to garbage collect incremental \
865                                 compilation session directory `{}`: {}",
866                                path.display(),
867                                err));
868         } else {
869             delete_session_dir_lock_file(sess, &lock_file_path(&path));
870         }
871
872         // Let's make it explicit that the file lock is released at this point,
873         // or rather, that we held on to it until here
874         mem::drop(lock);
875     }
876
877     Ok(())
878 }
879
880 fn all_except_most_recent(deletion_candidates: Vec<(SystemTime, PathBuf, Option<flock::Lock>)>)
881                           -> FnvHashMap<PathBuf, Option<flock::Lock>> {
882     let most_recent = deletion_candidates.iter()
883                                          .map(|&(timestamp, _, _)| timestamp)
884                                          .max();
885
886     if let Some(most_recent) = most_recent {
887         deletion_candidates.into_iter()
888                            .filter(|&(timestamp, _, _)| timestamp != most_recent)
889                            .map(|(_, path, lock)| (path, lock))
890                            .collect()
891     } else {
892         FnvHashMap()
893     }
894 }
895
896 /// Since paths of artifacts within session directories can get quite long, we
897 /// need to support deleting files with very long paths. The regular
898 /// WinApi functions only support paths up to 260 characters, however. In order
899 /// to circumvent this limitation, we canonicalize the path of the directory
900 /// before passing it to std::fs::remove_dir_all(). This will convert the path
901 /// into the '\\?\' format, which supports much longer paths.
902 fn safe_remove_dir_all(p: &Path) -> io::Result<()> {
903     if p.exists() {
904         let canonicalized = try!(p.canonicalize());
905         std_fs::remove_dir_all(canonicalized)
906     } else {
907         Ok(())
908     }
909 }
910
911 fn safe_remove_file(p: &Path) -> io::Result<()> {
912     if p.exists() {
913         let canonicalized = try!(p.canonicalize());
914         std_fs::remove_file(canonicalized)
915     } else {
916         Ok(())
917     }
918 }
919
920 #[test]
921 fn test_all_except_most_recent() {
922     assert_eq!(all_except_most_recent(
923         vec![
924             (UNIX_EPOCH + Duration::new(4, 0), PathBuf::from("4"), None),
925             (UNIX_EPOCH + Duration::new(1, 0), PathBuf::from("1"), None),
926             (UNIX_EPOCH + Duration::new(5, 0), PathBuf::from("5"), None),
927             (UNIX_EPOCH + Duration::new(3, 0), PathBuf::from("3"), None),
928             (UNIX_EPOCH + Duration::new(2, 0), PathBuf::from("2"), None),
929         ]).keys().cloned().collect::<FnvHashSet<PathBuf>>(),
930         vec![
931             PathBuf::from("1"),
932             PathBuf::from("2"),
933             PathBuf::from("3"),
934             PathBuf::from("4"),
935         ].into_iter().collect::<FnvHashSet<PathBuf>>()
936     );
937
938     assert_eq!(all_except_most_recent(
939         vec![
940         ]).keys().cloned().collect::<FnvHashSet<PathBuf>>(),
941         FnvHashSet()
942     );
943 }
944
945 #[test]
946 fn test_timestamp_serialization() {
947     for i in 0 .. 1_000u64 {
948         let time = UNIX_EPOCH + Duration::new(i * 1_434_578, (i as u32) * 239_000);
949         let s = timestamp_to_string(time);
950         assert_eq!(Ok(time), string_to_timestamp(&s));
951     }
952 }
953
954 #[test]
955 fn test_find_source_directory_in_iter() {
956     let already_visited = FnvHashSet();
957
958     // Find newest
959     assert_eq!(find_source_directory_in_iter(
960         vec![PathBuf::from("crate-dir/s-3234-0000-svh"),
961              PathBuf::from("crate-dir/s-2234-0000-svh"),
962              PathBuf::from("crate-dir/s-1234-0000-svh")].into_iter(), &already_visited),
963         Some(PathBuf::from("crate-dir/s-3234-0000-svh")));
964
965     // Filter out "-working"
966     assert_eq!(find_source_directory_in_iter(
967         vec![PathBuf::from("crate-dir/s-3234-0000-working"),
968              PathBuf::from("crate-dir/s-2234-0000-svh"),
969              PathBuf::from("crate-dir/s-1234-0000-svh")].into_iter(), &already_visited),
970         Some(PathBuf::from("crate-dir/s-2234-0000-svh")));
971
972     // Handle empty
973     assert_eq!(find_source_directory_in_iter(vec![].into_iter(), &already_visited),
974                None);
975
976     // Handle only working
977     assert_eq!(find_source_directory_in_iter(
978         vec![PathBuf::from("crate-dir/s-3234-0000-working"),
979              PathBuf::from("crate-dir/s-2234-0000-working"),
980              PathBuf::from("crate-dir/s-1234-0000-working")].into_iter(), &already_visited),
981         None);
982 }
983
984 #[test]
985 fn test_find_metadata_hashes_iter()
986 {
987     assert_eq!(find_metadata_hashes_iter("testsvh2",
988         vec![
989             String::from("s-timestamp1-testsvh1"),
990             String::from("s-timestamp2-testsvh2"),
991             String::from("s-timestamp3-testsvh3"),
992         ].into_iter()),
993         Some(OsString::from("s-timestamp2-testsvh2"))
994     );
995
996     assert_eq!(find_metadata_hashes_iter("testsvh2",
997         vec![
998             String::from("s-timestamp1-testsvh1"),
999             String::from("s-timestamp2-testsvh2"),
1000             String::from("invalid-name"),
1001         ].into_iter()),
1002         Some(OsString::from("s-timestamp2-testsvh2"))
1003     );
1004
1005     assert_eq!(find_metadata_hashes_iter("testsvh2",
1006         vec![
1007             String::from("s-timestamp1-testsvh1"),
1008             String::from("s-timestamp2-testsvh2-working"),
1009             String::from("s-timestamp3-testsvh3"),
1010         ].into_iter()),
1011         None
1012     );
1013
1014     assert_eq!(find_metadata_hashes_iter("testsvh1",
1015         vec![
1016             String::from("s-timestamp1-random1-working"),
1017             String::from("s-timestamp2-random2-working"),
1018             String::from("s-timestamp3-random3-working"),
1019         ].into_iter()),
1020         None
1021     );
1022
1023     assert_eq!(find_metadata_hashes_iter("testsvh2",
1024         vec![
1025             String::from("timestamp1-testsvh2"),
1026             String::from("timestamp2-testsvh2"),
1027             String::from("timestamp3-testsvh2"),
1028         ].into_iter()),
1029         None
1030     );
1031 }
1032
1033 #[test]
1034 fn test_encode_base_36() {
1035     fn test(n: u64) {
1036         assert_eq!(Ok(n), u64::from_str_radix(&encode_base_36(n)[..], 36));
1037     }
1038
1039     test(0);
1040     test(1);
1041     test(35);
1042     test(36);
1043     test(37);
1044     test(u64::max_value());
1045
1046     for i in 0 .. 1_000 {
1047         test(i * 983);
1048     }
1049 }