]> git.lizzy.rs Git - rust.git/commitdiff
perf(dep_graph): Avoid allocating a set on when the number reads are small
authorMarkus Westerlind <markus.westerlind@distilnetworks.com>
Fri, 6 Mar 2020 15:44:22 +0000 (16:44 +0100)
committerMarkus Westerlind <markus.westerlind@distilnetworks.com>
Fri, 6 Mar 2020 15:48:34 +0000 (16:48 +0100)
`reserve_and_rehash` takes up 1.4% of the runtime on the `packed-simd`
benchmark which I believe is due to the number of reads are very low in
many cases (see https://github.com/rust-lang/rust/pull/50565 for
instance).

This avoids allocating the set until we start allocating the `reads`
`SmallVec` but it is possible that a lower limit might be better (not
tested since the improvement will be hard to spot either way).

src/librustc/dep_graph/graph.rs

index 33902fe913a9c9129010786c025ec84da6b8687b..18bf77a8c4fb0a92a41f949047691e7996d30f6d 100644 (file)
@@ -1128,11 +1128,25 @@ fn read_index(&self, source: DepNodeIndex) {
             let icx = if let Some(icx) = icx { icx } else { return };
             if let Some(task_deps) = icx.task_deps {
                 let mut task_deps = task_deps.lock();
+                let task_deps = &mut *task_deps;
                 if cfg!(debug_assertions) {
                     self.current.total_read_count.fetch_add(1, Relaxed);
                 }
-                if task_deps.read_set.insert(source) {
+
+                // As long as we only have a low number of reads we can avoid doing a hash
+                // insert and potentially allocating/reallocating the hashmap
+                let new_read = if task_deps.reads.len() < TASK_DEPS_READS_CAP {
+                    task_deps.reads.iter().all(|other| *other != source)
+                } else {
+                    task_deps.read_set.insert(source)
+                };
+                if new_read {
                     task_deps.reads.push(source);
+                    if task_deps.reads.len() == TASK_DEPS_READS_CAP {
+                        // Fill `read_set` with what we have so far so we can use the hashset next
+                        // time
+                        task_deps.read_set.extend(task_deps.reads.iter().copied());
+                    }
 
                     #[cfg(debug_assertions)]
                     {
@@ -1154,10 +1168,11 @@ fn read_index(&self, source: DepNodeIndex) {
     }
 }
 
+const TASK_DEPS_READS_CAP: usize = 8;
 pub struct TaskDeps {
     #[cfg(debug_assertions)]
     node: Option<DepNode>,
-    reads: SmallVec<[DepNodeIndex; 8]>,
+    reads: SmallVec<[DepNodeIndex; TASK_DEPS_READS_CAP]>,
     read_set: FxHashSet<DepNodeIndex>,
 }