Fix cache hit stats

[rust.git] / src / librustc / ty / query / plumbing.rs
diff --git a/src/librustc/ty/query/plumbing.rs b/src/librustc/ty/query/plumbing.rs

index 49028107df758f1700638185252b8cbd178012ec..781ed03f224181f49ffae911d4ece855a94b6720 100644 (file)
--- a/src/librustc/ty/query/plumbing.rs
+++ b/src/librustc/ty/query/plumbing.rs
@@ -3,8 +3,9 @@
  //! manage the caches, and so forth.
  
  use crate::dep_graph::{DepKind, DepNode, DepNodeIndex, SerializedDepNodeIndex};
-use crate::ty::query::config::{QueryConfig, QueryDescription};
-use crate::ty::query::job::{QueryInfo, QueryJob};
+use crate::ty::query::caches::QueryCache;
+use crate::ty::query::config::{QueryAccessors, QueryDescription};
+use crate::ty::query::job::{QueryInfo, QueryJob, QueryJobId, QueryShardJobId};
  use crate::ty::query::Query;
  use crate::ty::tls;
  use crate::ty::{self, TyCtxt};
@@ -13,7 +14,7 @@
  use rustc_data_structures::cold_path;
  use rustc_data_structures::fx::{FxHashMap, FxHasher};
  use rustc_data_structures::sharded::Sharded;
-use rustc_data_structures::sync::{Lock, Lrc};
+use rustc_data_structures::sync::{Lock, LockGuard};
  use rustc_data_structures::thin_vec::ThinVec;
  use rustc_errors::{struct_span_err, Diagnostic, DiagnosticBuilder, FatalError, Handler, Level};
  use rustc_span::source_map::DUMMY_SP;
@@ -21,147 +22,193 @@
  use std::collections::hash_map::Entry;
  use std::hash::{Hash, Hasher};
  use std::mem;
+use std::num::NonZeroU32;
  use std::ptr;
+#[cfg(debug_assertions)]
+use std::sync::atomic::{AtomicUsize, Ordering};
  
-use rustc_error_codes::*;
-
-pub struct QueryCache<'tcx, D: QueryConfig<'tcx> + ?Sized> {
-    pub(super) results: FxHashMap<D::Key, QueryValue<D::Value>>,
+pub(crate) struct QueryStateShard<'tcx, D: QueryAccessors<'tcx> + ?Sized> {
+    pub(super) cache: <<D as QueryAccessors<'tcx>>::Cache as QueryCache<D::Key, D::Value>>::Sharded,
      pub(super) active: FxHashMap<D::Key, QueryResult<'tcx>>,
-    #[cfg(debug_assertions)]
-    pub(super) cache_hits: usize,
+
+    /// Used to generate unique ids for active jobs.
+    pub(super) jobs: u32,
+}
+
+impl<'tcx, Q: QueryAccessors<'tcx>> QueryStateShard<'tcx, Q> {
+    fn get_cache(
+        &mut self,
+    ) -> &mut <<Q as QueryAccessors<'tcx>>::Cache as QueryCache<Q::Key, Q::Value>>::Sharded {
+        &mut self.cache
+    }
  }
  
-pub(super) struct QueryValue<T> {
-    pub(super) value: T,
-    pub(super) index: DepNodeIndex,
+impl<'tcx, Q: QueryAccessors<'tcx>> Default for QueryStateShard<'tcx, Q> {
+    fn default() -> QueryStateShard<'tcx, Q> {
+        QueryStateShard { cache: Default::default(), active: Default::default(), jobs: 0 }
+    }
+}
+
+pub(crate) struct QueryState<'tcx, D: QueryAccessors<'tcx> + ?Sized> {
+    pub(super) cache: D::Cache,
+    pub(super) shards: Sharded<QueryStateShard<'tcx, D>>,
+    #[cfg(debug_assertions)]
+    pub(super) cache_hits: AtomicUsize,
  }
  
-impl<T> QueryValue<T> {
-    pub(super) fn new(value: T, dep_node_index: DepNodeIndex) -> QueryValue<T> {
-        QueryValue { value, index: dep_node_index }
+impl<'tcx, Q: QueryAccessors<'tcx>> QueryState<'tcx, Q> {
+    pub(super) fn get_lookup<K: Hash>(&'tcx self, key: &K) -> QueryLookup<'tcx, Q> {
+        // We compute the key's hash once and then use it for both the
+        // shard lookup and the hashmap lookup. This relies on the fact
+        // that both of them use `FxHasher`.
+        let mut hasher = FxHasher::default();
+        key.hash(&mut hasher);
+        let key_hash = hasher.finish();
+
+        let shard = self.shards.get_shard_index_by_hash(key_hash);
+        let lock = self.shards.get_shard_by_index(shard).lock();
+        QueryLookup { key_hash, shard, lock }
      }
  }
  
  /// Indicates the state of a query for a given key in a query map.
  pub(super) enum QueryResult<'tcx> {
      /// An already executing query. The query job can be used to await for its completion.
-    Started(Lrc<QueryJob<'tcx>>),
+    Started(QueryJob<'tcx>),
  
-    /// The query panicked. Queries trying to wait on this will raise a fatal error or
+    /// The query panicked. Queries trying to wait on this will raise a fatal error which will
      /// silently panic.
      Poisoned,
  }
  
-impl<'tcx, M: QueryConfig<'tcx>> Default for QueryCache<'tcx, M> {
-    fn default() -> QueryCache<'tcx, M> {
-        QueryCache {
-            results: FxHashMap::default(),
-            active: FxHashMap::default(),
+impl<'tcx, M: QueryAccessors<'tcx>> QueryState<'tcx, M> {
+    pub fn iter_results<R>(
+        &self,
+        f: impl for<'a> FnOnce(
+            Box<dyn Iterator<Item = (&'a M::Key, &'a M::Value, DepNodeIndex)> + 'a>,
+        ) -> R,
+    ) -> R {
+        self.cache.iter(&self.shards, |shard| &mut shard.cache, f)
+    }
+    pub fn all_inactive(&self) -> bool {
+        let shards = self.shards.lock_shards();
+        shards.iter().all(|shard| shard.active.is_empty())
+    }
+}
+
+impl<'tcx, M: QueryAccessors<'tcx>> Default for QueryState<'tcx, M> {
+    fn default() -> QueryState<'tcx, M> {
+        QueryState {
+            cache: M::Cache::default(),
+            shards: Default::default(),
              #[cfg(debug_assertions)]
-            cache_hits: 0,
+            cache_hits: AtomicUsize::new(0),
          }
      }
  }
  
+/// Values used when checking a query cache which can be reused on a cache-miss to execute the query.
+pub(crate) struct QueryLookup<'tcx, Q: QueryAccessors<'tcx>> {
+    pub(super) key_hash: u64,
+    pub(super) shard: usize,
+    pub(super) lock: LockGuard<'tcx, QueryStateShard<'tcx, Q>>,
+}
+
  /// A type representing the responsibility to execute the job in the `job` field.
  /// This will poison the relevant query if dropped.
-pub(super) struct JobOwner<'a, 'tcx, Q: QueryDescription<'tcx>> {
-    cache: &'a Sharded<QueryCache<'tcx, Q>>,
+pub(super) struct JobOwner<'tcx, Q: QueryDescription<'tcx>> {
+    tcx: TyCtxt<'tcx>,
      key: Q::Key,
-    job: Lrc<QueryJob<'tcx>>,
+    id: QueryJobId,
  }
  
-impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
+impl<'tcx, Q: QueryDescription<'tcx>> JobOwner<'tcx, Q> {
      /// Either gets a `JobOwner` corresponding the query, allowing us to
      /// start executing the query, or returns with the result of the query.
-    /// If the query is executing elsewhere, this will wait for it.
+    /// This function assumes that `try_get_cached` is already called and returned `lookup`.
+    /// If the query is executing elsewhere, this will wait for it and return the result.
      /// If the query panicked, this will silently panic.
      ///
      /// This function is inlined because that results in a noticeable speed-up
      /// for some compile-time benchmarks.
      #[inline(always)]
-    pub(super) fn try_get(tcx: TyCtxt<'tcx>, span: Span, key: &Q::Key) -> TryGetJob<'a, 'tcx, Q> {
-        let cache = Q::query_cache(tcx);
-        loop {
-            // We compute the key's hash once and then use it for both the
-            // shard lookup and the hashmap lookup. This relies on the fact
-            // that both of them use `FxHasher`.
-            let mut state = FxHasher::default();
-            key.hash(&mut state);
-            let key_hash = state.finish();
-
-            let mut lock = cache.get_shard_by_hash(key_hash).lock();
-            if let Some((_, value)) =
-                lock.results.raw_entry().from_key_hashed_nocheck(key_hash, key)
-            {
-                tcx.prof.query_cache_hit(Q::NAME);
-                let result = (value.value.clone(), value.index);
-                #[cfg(debug_assertions)]
-                {
-                    lock.cache_hits += 1;
-                }
-                return TryGetJob::JobCompleted(result);
-            }
+    pub(super) fn try_start(
+        tcx: TyCtxt<'tcx>,
+        span: Span,
+        key: &Q::Key,
+        mut lookup: QueryLookup<'tcx, Q>,
+    ) -> TryGetJob<'tcx, Q> {
+        let lock = &mut *lookup.lock;
+
+        let (latch, mut _query_blocked_prof_timer) = match lock.active.entry((*key).clone()) {
+            Entry::Occupied(mut entry) => {
+                match entry.get_mut() {
+                    QueryResult::Started(job) => {
+                        // For parallel queries, we'll block and wait until the query running
+                        // in another thread has completed. Record how long we wait in the
+                        // self-profiler.
+                        let _query_blocked_prof_timer = if cfg!(parallel_compiler) {
+                            Some(tcx.prof.query_blocked())
+                        } else {
+                            None
+                        };
  
-            #[cfg(parallel_compiler)]
-            let query_blocked_prof_timer;
-
-            let job = match lock.active.entry((*key).clone()) {
-                Entry::Occupied(entry) => {
-                    match *entry.get() {
-                        QueryResult::Started(ref job) => {
-                            // For parallel queries, we'll block and wait until the query running
-                            // in another thread has completed. Record how long we wait in the
-                            // self-profiler.
-                            #[cfg(parallel_compiler)]
-                            {
-                                query_blocked_prof_timer = tcx.prof.query_blocked(Q::NAME);
-                            }
-
-                            job.clone()
-                        }
-                        QueryResult::Poisoned => FatalError.raise(),
+                        // Create the id of the job we're waiting for
+                        let id = QueryJobId::new(job.id, lookup.shard, Q::dep_kind());
+
+                        (job.latch(id), _query_blocked_prof_timer)
                      }
+                    QueryResult::Poisoned => FatalError.raise(),
                  }
-                Entry::Vacant(entry) => {
-                    // No job entry for this query. Return a new one to be started later.
-                    return tls::with_related_context(tcx, |icx| {
-                        // Create the `parent` variable before `info`. This allows LLVM
-                        // to elide the move of `info`
-                        let parent = icx.query.clone();
-                        let info = QueryInfo { span, query: Q::query(key.clone()) };
-                        let job = Lrc::new(QueryJob::new(info, parent));
-                        let owner = JobOwner { cache, job: job.clone(), key: (*key).clone() };
-                        entry.insert(QueryResult::Started(job));
-                        TryGetJob::NotYetStarted(owner)
-                    });
-                }
-            };
-            mem::drop(lock);
+            }
+            Entry::Vacant(entry) => {
+                // No job entry for this query. Return a new one to be started later.
  
-            // If we are single-threaded we know that we have cycle error,
-            // so we just return the error.
-            #[cfg(not(parallel_compiler))]
-            return TryGetJob::Cycle(cold_path(|| {
-                Q::handle_cycle_error(tcx, job.find_cycle_in_stack(tcx, span))
-            }));
+                // Generate an id unique within this shard.
+                let id = lock.jobs.checked_add(1).unwrap();
+                lock.jobs = id;
+                let id = QueryShardJobId(NonZeroU32::new(id).unwrap());
  
-            // With parallel queries we might just have to wait on some other
-            // thread.
-            #[cfg(parallel_compiler)]
-            {
-                let result = job.r#await(tcx, span);
+                let global_id = QueryJobId::new(id, lookup.shard, Q::dep_kind());
  
-                // This `drop()` is not strictly necessary as the binding
-                // would go out of scope anyway. But it's good to have an
-                // explicit marker of how far the measurement goes.
-                drop(query_blocked_prof_timer);
+                let job = tls::with_related_context(tcx, |icx| QueryJob::new(id, span, icx.query));
  
-                if let Err(cycle) = result {
-                    return TryGetJob::Cycle(Q::handle_cycle_error(tcx, cycle));
-                }
+                entry.insert(QueryResult::Started(job));
+
+                let owner = JobOwner { tcx, id: global_id, key: (*key).clone() };
+                return TryGetJob::NotYetStarted(owner);
              }
+        };
+        mem::drop(lookup.lock);
+
+        // If we are single-threaded we know that we have cycle error,
+        // so we just return the error.
+        #[cfg(not(parallel_compiler))]
+        return TryGetJob::Cycle(cold_path(|| {
+            Q::handle_cycle_error(tcx, latch.find_cycle_in_stack(tcx, span))
+        }));
+
+        // With parallel queries we might just have to wait on some other
+        // thread.
+        #[cfg(parallel_compiler)]
+        {
+            let result = latch.wait_on(tcx, span);
+
+            if let Err(cycle) = result {
+                return TryGetJob::Cycle(Q::handle_cycle_error(tcx, cycle));
+            }
+
+            let cached = tcx.try_get_cached::<Q, _, _, _>(
+                (*key).clone(),
+                |value, index| (value.clone(), index),
+                |_, _| panic!("value must be in cache after waiting"),
+            );
+
+            if let Some(prof_timer) = _query_blocked_prof_timer.take() {
+                prof_timer.finish_with_query_invocation_id(cached.1.into());
+            }
+
+            return TryGetJob::JobCompleted(cached);
          }
      }
  
@@ -171,18 +218,22 @@ pub(super) fn try_get(tcx: TyCtxt<'tcx>, span: Span, key: &Q::Key) -> TryGetJob<
      pub(super) fn complete(self, result: &Q::Value, dep_node_index: DepNodeIndex) {
          // We can move out of `self` here because we `mem::forget` it below
          let key = unsafe { ptr::read(&self.key) };
-        let job = unsafe { ptr::read(&self.job) };
-        let cache = self.cache;
+        let tcx = self.tcx;
  
          // Forget ourself so our destructor won't poison the query
          mem::forget(self);
  
-        let value = QueryValue::new(result.clone(), dep_node_index);
-        {
-            let mut lock = cache.get_shard_by_value(&key).lock();
-            lock.active.remove(&key);
-            lock.results.insert(key, value);
-        }
+        let job = {
+            let state = Q::query_state(tcx);
+            let result = result.clone();
+            let mut lock = state.shards.get_shard_by_value(&key).lock();
+            let job = match lock.active.remove(&key).unwrap() {
+                QueryResult::Started(job) => job,
+                QueryResult::Poisoned => panic!(),
+            };
+            state.cache.complete(tcx, &mut lock.cache, key, result, dep_node_index);
+            job
+        };
  
          job.signal_complete();
      }
@@ -198,16 +249,25 @@ fn with_diagnostics<F, R>(f: F) -> (R, ThinVec<Diagnostic>)
      (result, diagnostics.into_inner())
  }
  
-impl<'a, 'tcx, Q: QueryDescription<'tcx>> Drop for JobOwner<'a, 'tcx, Q> {
+impl<'tcx, Q: QueryDescription<'tcx>> Drop for JobOwner<'tcx, Q> {
      #[inline(never)]
      #[cold]
      fn drop(&mut self) {
          // Poison the query so jobs waiting on it panic.
-        let shard = self.cache.get_shard_by_value(&self.key);
-        shard.lock().active.insert(self.key.clone(), QueryResult::Poisoned);
+        let state = Q::query_state(self.tcx);
+        let shard = state.shards.get_shard_by_value(&self.key);
+        let job = {
+            let mut shard = shard.lock();
+            let job = match shard.active.remove(&self.key).unwrap() {
+                QueryResult::Started(job) => job,
+                QueryResult::Poisoned => panic!(),
+            };
+            shard.active.insert(self.key.clone(), QueryResult::Poisoned);
+            job
+        };
          // Also signal the completion of the job, so waiters
          // will continue execution.
-        self.job.signal_complete();
+        job.signal_complete();
      }
  }
  
@@ -218,14 +278,15 @@ pub struct CycleError<'tcx> {
      pub(super) cycle: Vec<QueryInfo<'tcx>>,
  }
  
-/// The result of `try_get_lock`.
-pub(super) enum TryGetJob<'a, 'tcx, D: QueryDescription<'tcx>> {
+/// The result of `try_start`.
+pub(super) enum TryGetJob<'tcx, D: QueryDescription<'tcx>> {
      /// The query is not yet started. Contains a guard to the cache eventually used to start it.
-    NotYetStarted(JobOwner<'a, 'tcx, D>),
+    NotYetStarted(JobOwner<'tcx, D>),
  
      /// The query was already completed.
      /// Returns the result of the query and its dep-node index
      /// if it succeeded or a cycle error if it failed.
+    #[cfg(parallel_compiler)]
      JobCompleted((D::Value, DepNodeIndex)),
  
      /// Trying to execute the query resulted in a cycle.
@@ -239,7 +300,7 @@ impl<'tcx> TyCtxt<'tcx> {
      #[inline(always)]
      pub(super) fn start_query<F, R>(
          self,
-        job: Lrc<QueryJob<'tcx>>,
+        token: QueryJobId,
          diagnostics: Option<&Lock<ThinVec<Diagnostic>>>,
          compute: F,
      ) -> R
@@ -253,7 +314,7 @@ pub(super) fn start_query<F, R>(
              // Update the `ImplicitCtxt` to point to our new query job.
              let new_icx = tls::ImplicitCtxt {
                  tcx: self,
-                query: Some(job),
+                query: Some(token),
                  diagnostics,
                  layout_depth: current_icx.layout_depth,
                  task_deps: current_icx.task_deps,
@@ -320,23 +381,31 @@ pub fn try_print_query_stack(handler: &Handler) {
          // state if it was responsible for triggering the panic.
          tls::with_context_opt(|icx| {
              if let Some(icx) = icx {
-                let mut current_query = icx.query.clone();
+                let query_map = icx.tcx.queries.try_collect_active_jobs();
+
+                let mut current_query = icx.query;
                  let mut i = 0;
  
                  while let Some(query) = current_query {
+                    let query_info =
+                        if let Some(info) = query_map.as_ref().and_then(|map| map.get(&query)) {
+                            info
+                        } else {
+                            break;
+                        };
                      let mut diag = Diagnostic::new(
                          Level::FailureNote,
                          &format!(
                              "#{} [{}] {}",
                              i,
-                            query.info.query.name(),
-                            query.info.query.describe(icx.tcx)
+                            query_info.info.query.name(),
+                            query_info.info.query.describe(icx.tcx)
                          ),
                      );
-                    diag.span = icx.tcx.sess.source_map().def_span(query.info.span).into();
+                    diag.span = icx.tcx.sess.source_map().def_span(query_info.info.span).into();
                      handler.force_print_diagnostic(diag);
  
-                    current_query = query.parent.clone();
+                    current_query = query_info.job.parent;
                      i += 1;
                  }
              }
@@ -345,13 +414,72 @@ pub fn try_print_query_stack(handler: &Handler) {
          eprintln!("end of query stack");
      }
  
+    /// Checks if the query is already computed and in the cache.
+    /// It returns the shard index and a lock guard to the shard,
+    /// which will be used if the query is not in the cache and we need
+    /// to compute it.
+    #[inline(always)]
+    fn try_get_cached<Q, R, OnHit, OnMiss>(
+        self,
+        key: Q::Key,
+        // `on_hit` can be called while holding a lock to the query cache
+        on_hit: OnHit,
+        on_miss: OnMiss,
+    ) -> R
+    where
+        Q: QueryDescription<'tcx> + 'tcx,
+        OnHit: FnOnce(&Q::Value, DepNodeIndex) -> R,
+        OnMiss: FnOnce(Q::Key, QueryLookup<'tcx, Q>) -> R,
+    {
+        let state = Q::query_state(self);
+
+        state.cache.lookup(
+            state,
+            QueryStateShard::<Q>::get_cache,
+            key,
+            |value, index| {
+                if unlikely!(self.prof.enabled()) {
+                    self.prof.query_cache_hit(index.into());
+                }
+                #[cfg(debug_assertions)]
+                {
+                    state.cache_hits.fetch_add(1, Ordering::Relaxed);
+                }
+                on_hit(value, index)
+            },
+            on_miss,
+        )
+    }
+
      #[inline(never)]
-    pub(super) fn get_query<Q: QueryDescription<'tcx>>(self, span: Span, key: Q::Key) -> Q::Value {
-        debug!("ty::query::get_query<{}>(key={:?}, span={:?})", Q::NAME.as_str(), key, span);
+    pub(super) fn get_query<Q: QueryDescription<'tcx> + 'tcx>(
+        self,
+        span: Span,
+        key: Q::Key,
+    ) -> Q::Value {
+        debug!("ty::query::get_query<{}>(key={:?}, span={:?})", Q::NAME, key, span);
  
-        let job = match JobOwner::try_get(self, span, &key) {
+        self.try_get_cached::<Q, _, _, _>(
+            key,
+            |value, index| {
+                self.dep_graph.read_index(index);
+                value.clone()
+            },
+            |key, lookup| self.try_execute_query::<Q>(span, key, lookup),
+        )
+    }
+
+    #[inline(always)]
+    pub(super) fn try_execute_query<Q: QueryDescription<'tcx>>(
+        self,
+        span: Span,
+        key: Q::Key,
+        lookup: QueryLookup<'tcx, Q>,
+    ) -> Q::Value {
+        let job = match JobOwner::try_start(self, span, &key, lookup) {
              TryGetJob::NotYetStarted(job) => job,
              TryGetJob::Cycle(result) => return result,
+            #[cfg(parallel_compiler)]
              TryGetJob::JobCompleted((v, index)) => {
                  self.dep_graph.read_index(index);
                  return v;
@@ -366,15 +494,15 @@ pub(super) fn get_query<Q: QueryDescription<'tcx>>(self, span: Span, key: Q::Key
          }
  
          if Q::ANON {
-            let prof_timer = self.prof.query_provider(Q::NAME);
+            let prof_timer = self.prof.query_provider();
  
              let ((result, dep_node_index), diagnostics) = with_diagnostics(|diagnostics| {
-                self.start_query(job.job.clone(), diagnostics, |tcx| {
+                self.start_query(job.id, diagnostics, |tcx| {
                      tcx.dep_graph.with_anon_task(Q::dep_kind(), || Q::compute(tcx, key))
                  })
              });
  
-            drop(prof_timer);
+            prof_timer.finish_with_query_invocation_id(dep_node_index.into());
  
              self.dep_graph.read_index(dep_node_index);
  
@@ -395,7 +523,7 @@ pub(super) fn get_query<Q: QueryDescription<'tcx>>(self, span: Span, key: Q::Key
              // The diagnostics for this query will be
              // promoted to the current session during
              // `try_mark_green()`, so we can ignore them here.
-            let loaded = self.start_query(job.job.clone(), None, |tcx| {
+            let loaded = self.start_query(job.id, None, |tcx| {
                  let marked = tcx.dep_graph.try_mark_green_and_read(tcx, &dep_node);
                  marked.map(|(prev_dep_node_index, dep_node_index)| {
                      (
@@ -436,8 +564,9 @@ fn load_from_disk_and_cache_in_memory<Q: QueryDescription<'tcx>>(
          let result = if Q::cache_on_disk(self, key.clone(), None)
              && self.sess.opts.debugging_opts.incremental_queries
          {
-            let _prof_timer = self.prof.incr_cache_loading(Q::NAME);
+            let prof_timer = self.prof.incr_cache_loading();
              let result = Q::try_load_from_disk(self, prev_dep_node_index);
+            prof_timer.finish_with_query_invocation_id(dep_node_index.into());
  
              // We always expect to find a cached result for things that
              // can be forced from `DepNode`.
@@ -457,11 +586,13 @@ fn load_from_disk_and_cache_in_memory<Q: QueryDescription<'tcx>>(
          } else {
              // We could not load a result from the on-disk cache, so
              // recompute.
-            let _prof_timer = self.prof.query_provider(Q::NAME);
+            let prof_timer = self.prof.query_provider();
  
              // The dep-graph for this computation is already in-place.
              let result = self.dep_graph.with_ignore(|| Q::compute(self, key));
  
+            prof_timer.finish_with_query_invocation_id(dep_node_index.into());
+
              result
          };
  
@@ -506,7 +637,7 @@ fn incremental_verify_ich<Q: QueryDescription<'tcx>>(
      fn force_query_with_job<Q: QueryDescription<'tcx>>(
          self,
          key: Q::Key,
-        job: JobOwner<'_, 'tcx, Q>,
+        job: JobOwner<'tcx, Q>,
          dep_node: DepNode,
      ) -> (Q::Value, DepNodeIndex) {
          // If the following assertion triggers, it can have two reasons:
@@ -523,10 +654,10 @@ fn force_query_with_job<Q: QueryDescription<'tcx>>(
              dep_node
          );
  
-        let prof_timer = self.prof.query_provider(Q::NAME);
+        let prof_timer = self.prof.query_provider();
  
          let ((result, dep_node_index), diagnostics) = with_diagnostics(|diagnostics| {
-            self.start_query(job.job.clone(), diagnostics, |tcx| {
+            self.start_query(job.id, diagnostics, |tcx| {
                  if Q::EVAL_ALWAYS {
                      tcx.dep_graph.with_eval_always_task(
                          dep_node,
@@ -541,7 +672,7 @@ fn force_query_with_job<Q: QueryDescription<'tcx>>(
              })
          });
  
-        drop(prof_timer);
+        prof_timer.finish_with_query_invocation_id(dep_node_index.into());
  
          if unlikely!(!diagnostics.is_empty()) {
              if dep_node.kind != crate::dep_graph::DepKind::Null {
@@ -561,7 +692,7 @@ fn force_query_with_job<Q: QueryDescription<'tcx>>(
      /// side-effects -- e.g., in order to report errors for erroneous programs.
      ///
      /// Note: The optimization is only available during incr. comp.
-    pub(super) fn ensure_query<Q: QueryDescription<'tcx>>(self, key: Q::Key) -> () {
+    pub(super) fn ensure_query<Q: QueryDescription<'tcx> + 'tcx>(self, key: Q::Key) -> () {
          if Q::EVAL_ALWAYS {
              let _ = self.get_query::<Q>(DUMMY_SP, key);
              return;
@@ -572,29 +703,47 @@ pub(super) fn ensure_query<Q: QueryDescription<'tcx>>(self, key: Q::Key) -> () {
  
          let dep_node = Q::to_dep_node(self, &key);
  
-        if self.dep_graph.try_mark_green_and_read(self, &dep_node).is_none() {
-            // A None return from `try_mark_green_and_read` means that this is either
-            // a new dep node or that the dep node has already been marked red.
-            // Either way, we can't call `dep_graph.read()` as we don't have the
-            // DepNodeIndex. We must invoke the query itself. The performance cost
-            // this introduces should be negligible as we'll immediately hit the
-            // in-memory cache, or another query down the line will.
-
-            let _ = self.get_query::<Q>(DUMMY_SP, key);
-        } else {
-            self.prof.query_cache_hit(Q::NAME);
+        match self.dep_graph.try_mark_green_and_read(self, &dep_node) {
+            None => {
+                // A None return from `try_mark_green_and_read` means that this is either
+                // a new dep node or that the dep node has already been marked red.
+                // Either way, we can't call `dep_graph.read()` as we don't have the
+                // DepNodeIndex. We must invoke the query itself. The performance cost
+                // this introduces should be negligible as we'll immediately hit the
+                // in-memory cache, or another query down the line will.
+                let _ = self.get_query::<Q>(DUMMY_SP, key);
+            }
+            Some((_, dep_node_index)) => {
+                self.prof.query_cache_hit(dep_node_index.into());
+            }
          }
      }
  
      #[allow(dead_code)]
-    fn force_query<Q: QueryDescription<'tcx>>(self, key: Q::Key, span: Span, dep_node: DepNode) {
+    fn force_query<Q: QueryDescription<'tcx> + 'tcx>(
+        self,
+        key: Q::Key,
+        span: Span,
+        dep_node: DepNode,
+    ) {
          // We may be concurrently trying both execute and force a query.
          // Ensure that only one of them runs the query.
-        let job = match JobOwner::try_get(self, span, &key) {
-            TryGetJob::NotYetStarted(job) => job,
-            TryGetJob::Cycle(_) | TryGetJob::JobCompleted(_) => return,
-        };
-        self.force_query_with_job::<Q>(key, job, dep_node);
+
+        self.try_get_cached::<Q, _, _, _>(
+            key,
+            |_, _| {
+                // Cache hit, do nothing
+            },
+            |key, lookup| {
+                let job = match JobOwner::try_start(self, span, &key, lookup) {
+                    TryGetJob::NotYetStarted(job) => job,
+                    TryGetJob::Cycle(_) => return,
+                    #[cfg(parallel_compiler)]
+                    TryGetJob::JobCompleted(_) => return,
+                };
+                self.force_query_with_job::<Q>(key, job, dep_node);
+            },
+        );
      }
  }
  
@@ -603,17 +752,17 @@ macro_rules! handle_cycle_error {
          $tcx.report_cycle($error).emit();
          Value::from_cycle_error($tcx)
      }};
-    ([fatal_cycle$(, $modifiers:ident)*][$tcx:expr, $error:expr]) => {{
+    ([fatal_cycle $($rest:tt)*][$tcx:expr, $error:expr]) => {{
          $tcx.report_cycle($error).emit();
          $tcx.sess.abort_if_errors();
          unreachable!()
      }};
-    ([cycle_delay_bug$(, $modifiers:ident)*][$tcx:expr, $error:expr]) => {{
+    ([cycle_delay_bug $($rest:tt)*][$tcx:expr, $error:expr]) => {{
          $tcx.report_cycle($error).delay_as_bug();
          Value::from_cycle_error($tcx)
      }};
-    ([$other:ident$(, $modifiers:ident)*][$($args:tt)*]) => {
-        handle_cycle_error!([$($modifiers),*][$($args)*])
+    ([$other:ident $(($($other_args:tt)*))* $(, $($modifiers:tt)*)*][$($args:tt)*]) => {
+        handle_cycle_error!([$($($modifiers)*)*][$($args)*])
      };
  }
  
@@ -621,11 +770,11 @@ macro_rules! is_anon {
      ([]) => {{
          false
      }};
-    ([anon$(, $modifiers:ident)*]) => {{
+    ([anon $($rest:tt)*]) => {{
          true
      }};
-    ([$other:ident$(, $modifiers:ident)*]) => {
-        is_anon!([$($modifiers),*])
+    ([$other:ident $(($($other_args:tt)*))* $(, $($modifiers:tt)*)*]) => {
+        is_anon!([$($($modifiers)*)*])
      };
  }
  
@@ -633,11 +782,23 @@ macro_rules! is_eval_always {
      ([]) => {{
          false
      }};
-    ([eval_always$(, $modifiers:ident)*]) => {{
+    ([eval_always $($rest:tt)*]) => {{
          true
      }};
-    ([$other:ident$(, $modifiers:ident)*]) => {
-        is_eval_always!([$($modifiers),*])
+    ([$other:ident $(($($other_args:tt)*))* $(, $($modifiers:tt)*)*]) => {
+        is_eval_always!([$($($modifiers)*)*])
+    };
+}
+
+macro_rules! query_storage {
+    ([][$K:ty, $V:ty]) => {
+        <<$K as Key>::CacheSelector as CacheSelector<$K, $V>>::Cache
+    };
+    ([storage($ty:ty) $($rest:tt)*][$K:ty, $V:ty]) => {
+        $ty
+    };
+    ([$other:ident $(($($other_args:tt)*))* $(, $($modifiers:tt)*)*][$($args:tt)*]) => {
+        query_storage!([$($($modifiers)*)*][$($args)*])
      };
  }
  
@@ -645,11 +806,11 @@ macro_rules! hash_result {
      ([][$hcx:expr, $result:expr]) => {{
          dep_graph::hash_result($hcx, &$result)
      }};
-    ([no_hash$(, $modifiers:ident)*][$hcx:expr, $result:expr]) => {{
+    ([no_hash $($rest:tt)*][$hcx:expr, $result:expr]) => {{
          None
      }};
-    ([$other:ident$(, $modifiers:ident)*][$($args:tt)*]) => {
-        hash_result!([$($modifiers),*][$($args)*])
+    ([$other:ident $(($($other_args:tt)*))* $(, $($modifiers:tt)*)*][$($args:tt)*]) => {
+        hash_result!([$($($modifiers)*)*][$($args)*])
      };
  }
  
@@ -669,7 +830,6 @@ macro_rules! define_queries_inner {
          [$($modifiers:tt)*] fn $name:ident: $node:ident($K:ty) -> $V:ty,)*) => {
  
          use std::mem;
-        use rustc_data_structures::sharded::Sharded;
          use crate::{
              rustc_data_structures::stable_hasher::HashStable,
              rustc_data_structures::stable_hasher::StableHasher,
@@ -696,150 +856,38 @@ pub fn new(
                  }
              }
  
-            #[cfg(parallel_compiler)]
-            pub fn collect_active_jobs(&self) -> Vec<Lrc<QueryJob<$tcx>>> {
-                let mut jobs = Vec::new();
+            pub fn try_collect_active_jobs(
+                &self
+            ) -> Option<FxHashMap<QueryJobId, QueryJobInfo<'tcx>>> {
+                let mut jobs = FxHashMap::default();
  
-                // We use try_lock_shards here since we are only called from the
-                // deadlock handler, and this shouldn't be locked.
                  $(
-                    let shards = self.$name.try_lock_shards().unwrap();
-                    jobs.extend(shards.iter().flat_map(|shard| shard.active.values().filter_map(|v|
+                    // We use try_lock_shards here since we are called from the
+                    // deadlock handler, and this shouldn't be locked.
+                    let shards = self.$name.shards.try_lock_shards()?;
+                    let shards = shards.iter().enumerate();
+                    jobs.extend(shards.flat_map(|(shard_id, shard)| {
+                        shard.active.iter().filter_map(move |(k, v)| {
                          if let QueryResult::Started(ref job) = *v {
-                            Some(job.clone())
+                                let id = QueryJobId {
+                                    job: job.id,
+                                    shard:  u16::try_from(shard_id).unwrap(),
+                                    kind:
+                                        <queries::$name<'tcx> as QueryAccessors<'tcx>>::dep_kind(),
+                                };
+                                let info = QueryInfo {
+                                    span: job.span,
+                                    query: queries::$name::query(k.clone())
+                                };
+                                Some((id, QueryJobInfo { info,  job: job.clone() }))
                          } else {
                              None
                          }
-                    )));
-                )*
-
-                jobs
-            }
-
-            pub fn print_stats(&self) {
-                let mut queries = Vec::new();
-
-                #[derive(Clone)]
-                struct QueryStats {
-                    name: &'static str,
-                    cache_hits: usize,
-                    key_size: usize,
-                    key_type: &'static str,
-                    value_size: usize,
-                    value_type: &'static str,
-                    entry_count: usize,
-                }
-
-                fn stats<'tcx, Q: QueryConfig<'tcx>>(
-                    name: &'static str,
-                    map: &Sharded<QueryCache<'tcx, Q>>,
-                ) -> QueryStats {
-                    let map = map.lock_shards();
-                    QueryStats {
-                        name,
-                        #[cfg(debug_assertions)]
-                        cache_hits: map.iter().map(|shard| shard.cache_hits).sum(),
-                        #[cfg(not(debug_assertions))]
-                        cache_hits: 0,
-                        key_size: mem::size_of::<Q::Key>(),
-                        key_type: type_name::<Q::Key>(),
-                        value_size: mem::size_of::<Q::Value>(),
-                        value_type: type_name::<Q::Value>(),
-                        entry_count: map.iter().map(|shard| shard.results.len()).sum(),
-                    }
-                }
-
-                $(
-                    queries.push(stats::<queries::$name<'_>>(
-                        stringify!($name),
-                        &self.$name,
-                    ));
+                        })
+                    }));
                  )*
  
-                if cfg!(debug_assertions) {
-                    let hits: usize = queries.iter().map(|s| s.cache_hits).sum();
-                    let results: usize = queries.iter().map(|s| s.entry_count).sum();
-                    println!("\nQuery cache hit rate: {}", hits as f64 / (hits + results) as f64);
-                }
-
-                let mut query_key_sizes = queries.clone();
-                query_key_sizes.sort_by_key(|q| q.key_size);
-                println!("\nLarge query keys:");
-                for q in query_key_sizes.iter().rev()
-                                        .filter(|q| q.key_size > 8) {
-                    println!(
-                        "   {} - {} x {} - {}",
-                        q.name,
-                        q.key_size,
-                        q.entry_count,
-                        q.key_type
-                    );
-                }
-
-                let mut query_value_sizes = queries.clone();
-                query_value_sizes.sort_by_key(|q| q.value_size);
-                println!("\nLarge query values:");
-                for q in query_value_sizes.iter().rev()
-                                          .filter(|q| q.value_size > 8) {
-                    println!(
-                        "   {} - {} x {} - {}",
-                        q.name,
-                        q.value_size,
-                        q.entry_count,
-                        q.value_type
-                    );
-                }
-
-                if cfg!(debug_assertions) {
-                    let mut query_cache_hits = queries.clone();
-                    query_cache_hits.sort_by_key(|q| q.cache_hits);
-                    println!("\nQuery cache hits:");
-                    for q in query_cache_hits.iter().rev() {
-                        println!(
-                            "   {} - {} ({}%)",
-                            q.name,
-                            q.cache_hits,
-                            q.cache_hits as f64 / (q.cache_hits + q.entry_count) as f64
-                        );
-                    }
-                }
-
-                let mut query_value_count = queries.clone();
-                query_value_count.sort_by_key(|q| q.entry_count);
-                println!("\nQuery value count:");
-                for q in query_value_count.iter().rev() {
-                    println!("   {} - {}", q.name, q.entry_count);
-                }
-            }
-        }
-
-        #[allow(nonstandard_style)]
-        #[derive(Clone, Copy)]
-        pub enum QueryName {
-            $($name),*
-        }
-
-        impl rustc_data_structures::profiling::QueryName for QueryName {
-            fn discriminant(self) -> std::mem::Discriminant<QueryName> {
-                std::mem::discriminant(&self)
-            }
-
-            fn as_str(self) -> &'static str {
-                QueryName::as_str(&self)
-            }
-        }
-
-        impl QueryName {
-            pub fn register_with_profiler(
-                profiler: &rustc_data_structures::profiling::SelfProfiler,
-            ) {
-                $(profiler.register_query_name(QueryName::$name);)*
-            }
-
-            pub fn as_str(&self) -> &'static str {
-                match self {
-                    $(QueryName::$name => stringify!($name),)*
-                }
+                Some(jobs)
              }
          }
  
@@ -883,12 +931,6 @@ pub fn default_span(&self, tcx: TyCtxt<$tcx>, span: Span) -> Span {
                      $(Query::$name(key) => key.default_span(tcx),)*
                  }
              }
-
-            pub fn query_name(&self) -> QueryName {
-                match self {
-                    $(Query::$name(_) => QueryName::$name,)*
-                }
-            }
          }
  
          impl<'a, $tcx> HashStable<StableHashingContext<'a>> for Query<$tcx> {
@@ -922,8 +964,7 @@ pub fn $name<F: FnOnce() -> R, R>(f: F) -> R {
          $(impl<$tcx> QueryConfig<$tcx> for queries::$name<$tcx> {
              type Key = $K;
              type Value = $V;
-
-            const NAME: QueryName = QueryName::$name;
+            const NAME: &'static str = stringify!($name);
              const CATEGORY: ProfileCategory = $category;
          }
  
@@ -931,13 +972,15 @@ impl<$tcx> QueryAccessors<$tcx> for queries::$name<$tcx> {
              const ANON: bool = is_anon!([$($modifiers)*]);
              const EVAL_ALWAYS: bool = is_eval_always!([$($modifiers)*]);
  
+            type Cache = query_storage!([$($modifiers)*][$K, $V]);
+
              #[inline(always)]
              fn query(key: Self::Key) -> Query<'tcx> {
                  Query::$name(key)
              }
  
              #[inline(always)]
-            fn query_cache<'a>(tcx: TyCtxt<$tcx>) -> &'a Sharded<QueryCache<$tcx, Self>> {
+            fn query_state<'a>(tcx: TyCtxt<$tcx>) -> &'a QueryState<$tcx, Self> {
                  &tcx.queries.$name
              }
  
@@ -1035,6 +1078,35 @@ pub fn at(self, span: Span) -> TyCtxtAt<$tcx> {
              pub fn $name(self, key: $K) -> $V {
                  self.at(DUMMY_SP).$name(key)
              })*
+
+            /// All self-profiling events generated by the query engine use
+            /// virtual `StringId`s for their `event_id`. This method makes all
+            /// those virtual `StringId`s point to actual strings.
+            ///
+            /// If we are recording only summary data, the ids will point to
+            /// just the query names. If we are recording query keys too, we
+            /// allocate the corresponding strings here.
+            pub fn alloc_self_profile_query_strings(self) {
+                use crate::ty::query::profiling_support::{
+                    alloc_self_profile_query_strings_for_query_cache,
+                    QueryKeyStringCache,
+                };
+
+                if !self.prof.enabled() {
+                    return;
+                }
+
+                let mut string_cache = QueryKeyStringCache::new();
+
+                $({
+                    alloc_self_profile_query_strings_for_query_cache(
+                        self,
+                        stringify!($name),
+                        &self.queries.$name,
+                        &mut string_cache,
+                    );
+                })*
+            }
          }
  
          impl TyCtxtAt<$tcx> {
@@ -1069,7 +1141,7 @@ pub struct Queries<$tcx> {
              providers: IndexVec<CrateNum, Providers<$tcx>>,
              fallback_extern_providers: Box<Providers<$tcx>>,
  
-            $($(#[$attr])*  $name: Sharded<QueryCache<$tcx, queries::$name<$tcx>>>,)*
+            $($(#[$attr])*  $name: QueryState<$tcx, queries::$name<$tcx>>,)*
          }
      };
  }
@@ -1164,7 +1236,6 @@ pub fn force_from_dep_node(tcx: TyCtxt<'_>, dep_node: &DepNode) -> bool {
          // These are inputs that are expected to be pre-allocated and that
          // should therefore always be red or green already.
          DepKind::AllLocalTraitImpls |
-        DepKind::Krate |
          DepKind::CrateMetadata |
          DepKind::HirBody |
          DepKind::Hir |