Refactor CodegenUnit size estimates

author varkor <github@varkor.com>

Mon, 15 Jan 2018 18:28:34 +0000 (18:28 +0000)

committer varkor <github@varkor.com>

Fri, 19 Jan 2018 00:41:50 +0000 (00:41 +0000)
author varkor <github@varkor.com>
Mon, 15 Jan 2018 18:28:34 +0000 (18:28 +0000)
committer varkor <github@varkor.com>
Fri, 19 Jan 2018 00:41:50 +0000 (00:41 +0000)
diff --git a/src/librustc/mir/mono.rs b/src/librustc/mir/mono.rs

index efdf4066815f46ac4ba5df358cb440fdf0688bd3..bbef045a305a554840cfe441e77213272090518a 100644 (file)
--- a/src/librustc/mir/mono.rs
+++ b/src/librustc/mir/mono.rs
@@ -10,7 +10,7 @@
  
  use syntax::ast::NodeId;
  use syntax::symbol::InternedString;
-use ty::Instance;
+use ty::{Instance, TyCtxt};
  use util::nodemap::FxHashMap;
  use rustc_data_structures::base_n;
  use rustc_data_structures::stable_hasher::{HashStable, StableHasherResult,
@@ -25,6 +25,22 @@ pub enum MonoItem<'tcx> {
      GlobalAsm(NodeId),
  }
  
+impl<'tcx> MonoItem<'tcx> {
+    pub fn size_estimate<'a>(&self, tcx: &TyCtxt<'a, 'tcx, 'tcx>) -> usize {
+        match *self {
+            MonoItem::Fn(instance) => {
+                // Estimate the size of a function based on how many statements
+                // it contains.
+                let mir = tcx.instance_mir(instance.def);
+                mir.basic_blocks().iter().map(|bb| bb.statements.len()).sum()
+            },
+            // Conservatively estimate the size of a static declaration
+            // or assembly to be 1.
+            MonoItem::Static(_) | MonoItem::GlobalAsm(_) => 1,
+        }
+    }
+}
+
  impl<'tcx> HashStable<StableHashingContext<'tcx>> for MonoItem<'tcx> {
      fn hash_stable<W: StableHasherResult>(&self,
                                             hcx: &mut StableHashingContext<'tcx>,
@@ -52,6 +68,7 @@ pub struct CodegenUnit<'tcx> {
      /// as well as the crate name and disambiguator.
      name: InternedString,
      items: FxHashMap<MonoItem<'tcx>, (Linkage, Visibility)>,
+    size_estimate: Option<usize>,
  }
  
  #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
@@ -101,6 +118,7 @@ pub fn new(name: InternedString) -> CodegenUnit<'tcx> {
          CodegenUnit {
              name: name,
              items: FxHashMap(),
+            size_estimate: None,
          }
      }
  
@@ -131,6 +149,25 @@ pub fn mangle_name(human_readable_name: &str) -> String {
          let hash = hash & ((1u128 << 80) - 1);
          base_n::encode(hash, base_n::CASE_INSENSITIVE)
      }
+
+    pub fn estimate_size<'a>(&mut self, tcx: &TyCtxt<'a, 'tcx, 'tcx>) {
+        // Estimate the size of a codegen unit as (approximately) the number of MIR
+        // statements it corresponds to.
+        self.size_estimate = Some(self.items.keys().map(|mi| mi.size_estimate(tcx)).sum());
+    }
+
+    pub fn size_estimate(&self) -> usize {
+        // Should only be called if `estimate_size` has previously been called.
+        assert!(self.size_estimate.is_some());
+        self.size_estimate.unwrap()
+    }
+
+    pub fn modify_size_estimate(&mut self, delta: usize) {
+        assert!(self.size_estimate.is_some());
+        if let Some(size_estimate) = self.size_estimate {
+            self.size_estimate = Some(size_estimate + delta);
+        }
+    }
  }
  
  impl<'tcx> HashStable<StableHashingContext<'tcx>> for CodegenUnit<'tcx> {
@@ -140,6 +177,7 @@ fn hash_stable<W: StableHasherResult>(&self,
          let CodegenUnit {
              ref items,
              name,
+            ..
          } = *self;
  
          name.hash_stable(hcx, hasher);
diff --git a/src/librustc_mir/monomorphize/partitioning.rs b/src/librustc_mir/monomorphize/partitioning.rs

index d8ec074b8a46457633a74ffafd27862243244396..4150f9f95485bde1a82ab2c6f57bca0aae7f4d9e 100644 (file)
--- a/src/librustc_mir/monomorphize/partitioning.rs
+++ b/src/librustc_mir/monomorphize/partitioning.rs
@@ -110,7 +110,7 @@
  use rustc::ty::{self, TyCtxt, InstanceDef};
  use rustc::ty::item_path::characteristic_def_id_of_type;
  use rustc::util::nodemap::{FxHashMap, FxHashSet};
-use std::collections::hash_map::{HashMap, Entry};
+use std::collections::hash_map::Entry;
  use syntax::ast::NodeId;
  use syntax::symbol::{Symbol, InternedString};
  use rustc::mir::mono::MonoItem;
@@ -225,12 +225,14 @@ pub fn partition<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
      let mut initial_partitioning = place_root_translation_items(tcx,
                                                                  trans_items);
  
+    initial_partitioning.codegen_units.iter_mut().for_each(|cgu| cgu.estimate_size(&tcx));
+
      debug_dump(tcx, "INITIAL PARTITIONING:", initial_partitioning.codegen_units.iter());
  
      // If the partitioning should produce a fixed count of codegen units, merge
      // until that count is reached.
      if let PartitioningStrategy::FixedUnitCount(count) = strategy {
-        merge_codegen_units(tcx, &mut initial_partitioning, count, &tcx.crate_name.as_str());
+        merge_codegen_units(&mut initial_partitioning, count, &tcx.crate_name.as_str());
  
          debug_dump(tcx, "POST MERGING:", initial_partitioning.codegen_units.iter());
      }
@@ -242,6 +244,8 @@ pub fn partition<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
      let mut post_inlining = place_inlined_translation_items(initial_partitioning,
                                                              inlining_map);
  
+    post_inlining.codegen_units.iter_mut().for_each(|cgu| cgu.estimate_size(&tcx));
+
      debug_dump(tcx, "POST INLINING:", post_inlining.codegen_units.iter());
  
      // Next we try to make as many symbols "internal" as possible, so LLVM has
@@ -405,8 +409,7 @@ fn place_root_translation_items<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
      }
  }
  
-fn merge_codegen_units<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
-                             initial_partitioning: &mut PreInliningPartitioning<'tcx>,
+fn merge_codegen_units<'tcx>(initial_partitioning: &mut PreInliningPartitioning<'tcx>,
                               target_cgu_count: usize,
                               crate_name: &str) {
      assert!(target_cgu_count >= 1);
@@ -423,51 +426,16 @@ fn merge_codegen_units<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
      // the stable sort below will keep everything nice and deterministic.
      codegen_units.sort_by_key(|cgu| cgu.name().clone());
  
-    // Estimate the size of a codegen unit as (approximately) the number of MIR
-    // statements it corresponds to.
-    fn codegen_unit_size_estimate<'a, 'tcx>(cgu: &CodegenUnit<'tcx>,
-                                            mono_item_sizes: &HashMap<MonoItem, usize>)
-                                            -> usize {
-        cgu.items().keys().map(|mi| mono_item_sizes.get(mi).unwrap()).sum()
-    }
-
-    fn mono_item_size_estimate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
-                                         item: &MonoItem<'tcx>)
-                                         -> usize {
-        match item {
-            MonoItem::Fn(instance) => {
-                // Estimate the size of a function based on how many statements
-                // it contains.
-                let mir = tcx.instance_mir(instance.def);
-                mir.basic_blocks().iter().map(|bb| bb.statements.len()).sum()
-            },
-            // Conservatively estimate the size of a static declaration
-            // or assembly to be 1.
-            MonoItem::Static(_) | MonoItem::GlobalAsm(_) => 1,
-        }
-    }
-
-    // Since `sort_by_key` currently recomputes the keys for each comparison,
-    // we can save unnecessary recomputations by storing size estimates for
-    // each `MonoItem`. Storing estimates for `CodegenUnit` might be preferable,
-    // but its structure makes it awkward to use as a key and additionally their
-    // sizes change as the merging occurs, requiring the map to be updated.
-    let mut sizes: HashMap<MonoItem, usize> = HashMap::new();
-    for mis in codegen_units.iter().map(|cgu| cgu.items().keys()) {
-        mis.for_each(|mi| {
-            sizes.entry(*mi).or_insert_with(|| mono_item_size_estimate(tcx, mi));
-        });
-    }
-
      // Merge the two smallest codegen units until the target size is reached.
      // Note that "size" is estimated here rather inaccurately as the number of
      // translation items in a given unit. This could be improved on.
      while codegen_units.len() > target_cgu_count {
          // Sort small cgus to the back
-        codegen_units.sort_by_key(|cgu| usize::MAX - codegen_unit_size_estimate(cgu, &sizes));
+        codegen_units.sort_by_key(|cgu| usize::MAX - cgu.size_estimate());
          let mut smallest = codegen_units.pop().unwrap();
          let second_smallest = codegen_units.last_mut().unwrap();
  
+        second_smallest.modify_size_estimate(smallest.size_estimate());
          for (k, v) in smallest.items_mut().drain() {
              second_smallest.items_mut().insert(k, v);
          }
diff --git a/src/librustc_trans/base.rs b/src/librustc_trans/base.rs

index 633ed9b32cd1e59880f0708182e7027495810ec2..e03b6ee794d67d64ee578a6ba39a953436554786 100644 (file)
--- a/src/librustc_trans/base.rs
+++ b/src/librustc_trans/base.rs
@@ -79,7 +79,7 @@
  use std::str;
  use std::sync::Arc;
  use std::time::{Instant, Duration};
-use std::i32;
+use std::{i32, usize};
  use std::iter;
  use std::sync::mpsc;
  use syntax_pos::Span;
@@ -829,7 +829,7 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
      // account the size of each TransItem.
      let codegen_units = {
          let mut codegen_units = codegen_units;
-        codegen_units.sort_by_key(|cgu| -(cgu.items().len() as isize));
+        codegen_units.sort_by_key(|cgu| usize::MAX - cgu.size_estimate());
          codegen_units
      };
author	varkor <github@varkor.com>
	Mon, 15 Jan 2018 18:28:34 +0000 (18:28 +0000)
committer	varkor <github@varkor.com>
	Fri, 19 Jan 2018 00:41:50 +0000 (00:41 +0000)
src/librustc/mir/mono.rs		patch \| blob \| history
src/librustc_mir/monomorphize/partitioning.rs		patch \| blob \| history
src/librustc_trans/base.rs		patch \| blob \| history