3 use std::mem::{self, Discriminant};
7 use std::thread::ThreadId;
8 use std::time::{Duration, Instant};
13 use measureme::StringId;
15 /// MmapSerializatioSink is faster on macOS and Linux
16 /// but FileSerializationSink is faster on Windows
18 type SerializationSink = measureme::MmapSerializationSink;
20 type SerializationSink = measureme::FileSerializationSink;
22 type Profiler = measureme::Profiler<SerializationSink>;
24 pub trait QueryName: Sized + Copy {
25 fn discriminant(self) -> Discriminant<Self>;
26 fn as_str(self) -> &'static str;
29 #[derive(Clone, Copy, Debug, PartialEq, Eq, Ord, PartialOrd)]
30 pub enum ProfileCategory {
41 struct EventFilter: u32 {
42 const GENERIC_ACTIVITIES = 1 << 0;
43 const QUERY_PROVIDERS = 1 << 1;
44 const QUERY_CACHE_HITS = 1 << 2;
45 const QUERY_BLOCKED = 1 << 3;
46 const INCR_CACHE_LOADS = 1 << 4;
47 const SPARSE_PASS = 1 << 5;
48 const GENERIC_PASS = 1 << 6;
50 const DEFAULT = Self::GENERIC_ACTIVITIES.bits |
51 Self::QUERY_PROVIDERS.bits |
52 Self::QUERY_BLOCKED.bits |
53 Self::INCR_CACHE_LOADS.bits |
54 Self::SPARSE_PASS.bits |
55 Self::GENERIC_PASS.bits;
57 // empty() and none() aren't const-fns unfortunately
59 const ALL = !Self::NONE.bits;
63 const EVENT_FILTERS_BY_NAME: &[(&str, EventFilter)] = &[
64 ("none", EventFilter::NONE),
65 ("all", EventFilter::ALL),
66 ("sparse-pass", EventFilter::SPARSE_PASS),
67 ("generic-pass", EventFilter::GENERIC_PASS),
68 ("generic-activity", EventFilter::GENERIC_ACTIVITIES),
69 ("query-provider", EventFilter::QUERY_PROVIDERS),
70 ("query-cache-hit", EventFilter::QUERY_CACHE_HITS),
71 ("query-blocked", EventFilter::QUERY_BLOCKED),
72 ("incr-cache-load", EventFilter::INCR_CACHE_LOADS),
75 fn thread_id_to_u32(tid: ThreadId) -> u32 {
76 unsafe { mem::transmute::<ThreadId, u64>(tid) as u32 }
79 /// A reference to the SelfProfiler. It can be cloned and sent across thread
80 /// boundaries at will.
82 pub struct SelfProfilerRef {
83 // This field is `None` if self-profiling is disabled for the current
84 // compilation session.
85 profiler: Option<Arc<SelfProfiler>>,
87 // We store the filter mask directly in the reference because that doesn't
88 // cost anything and allows for filtering with checking if the profiler is
90 event_filter_mask: EventFilter,
92 // Print sparse passes to stdout
95 // Print generic passes to stdout
96 verbose_generic: bool,
99 impl SelfProfilerRef {
101 profiler: Option<Arc<SelfProfiler>>,
102 verbose_sparse: bool,
103 verbose_generic: bool,
104 ) -> SelfProfilerRef {
105 // If there is no SelfProfiler then the filter mask is set to NONE,
106 // ensuring that nothing ever tries to actually access it.
107 let mut event_filter_mask =
108 profiler.as_ref().map(|p| p.event_filter_mask).unwrap_or(EventFilter::NONE);
111 event_filter_mask |= EventFilter::SPARSE_PASS;
115 event_filter_mask |= EventFilter::GENERIC_PASS;
118 SelfProfilerRef { profiler, event_filter_mask, verbose_sparse, verbose_generic }
122 fn exec<F>(&self, event_filter: EventFilter, f: F) -> TimingGuard<'_>
124 F: for<'a> FnOnce(&'a SelfProfiler) -> TimingGuard<'a>,
128 || f(self.profiler.as_ref().unwrap()),
129 || TimingGuard::none(),
133 // This shim makes sure that cold calls only get executed if the filter mask
134 // lets them pass. It also contains some trickery to make sure that
135 // code is optimized for non-profiling compilation sessions, i.e. anything
136 // past the filter check is never inlined so it doesn't clutter the fast
141 event_filter: EventFilter,
142 cold: impl FnOnce() -> R,
143 hot: impl FnOnce() -> R,
145 if unlikely!(self.event_filter_mask.contains(event_filter)) {
152 /// Start profiling a sparse pass. Profiling continues until the
153 /// VerboseTimingGuard returned from this call is dropped.
155 pub fn sparse_pass<'a>(&'a self, event_id: &'a str) -> VerboseTimingGuard<'a> {
157 EventFilter::SPARSE_PASS,
159 VerboseTimingGuard::start(
162 .map(|profiler| (&**profiler, profiler.sparse_pass_event_kind)),
167 || VerboseTimingGuard::none(),
171 /// Start profiling a generic pass. Profiling continues until the
172 /// VerboseTimingGuard returned from this call is dropped.
174 pub fn generic_pass<'a>(&'a self, event_id: &'a str) -> VerboseTimingGuard<'a> {
176 EventFilter::GENERIC_PASS,
178 VerboseTimingGuard::start(
181 .map(|profiler| (&**profiler, profiler.generic_pass_event_kind)),
183 self.verbose_generic,
186 || VerboseTimingGuard::none(),
190 /// Start profiling a generic activity. Profiling continues until the
191 /// TimingGuard returned from this call is dropped.
193 pub fn generic_activity(&self, event_id: &str) -> TimingGuard<'_> {
194 self.exec(EventFilter::GENERIC_ACTIVITIES, |profiler| {
195 let event_id = profiler.profiler.alloc_string(event_id);
196 TimingGuard::start(profiler, profiler.generic_activity_event_kind, event_id)
200 /// Start profiling a query provider. Profiling continues until the
201 /// TimingGuard returned from this call is dropped.
203 pub fn query_provider(&self, query_name: impl QueryName) -> TimingGuard<'_> {
204 self.exec(EventFilter::QUERY_PROVIDERS, |profiler| {
205 let event_id = SelfProfiler::get_query_name_string_id(query_name);
206 TimingGuard::start(profiler, profiler.query_event_kind, event_id)
210 /// Record a query in-memory cache hit.
212 pub fn query_cache_hit(&self, query_name: impl QueryName) {
213 self.instant_query_event(
214 |profiler| profiler.query_cache_hit_event_kind,
216 EventFilter::QUERY_CACHE_HITS,
220 /// Start profiling a query being blocked on a concurrent execution.
221 /// Profiling continues until the TimingGuard returned from this call is
224 pub fn query_blocked(&self, query_name: impl QueryName) -> TimingGuard<'_> {
225 self.exec(EventFilter::QUERY_BLOCKED, |profiler| {
226 let event_id = SelfProfiler::get_query_name_string_id(query_name);
227 TimingGuard::start(profiler, profiler.query_blocked_event_kind, event_id)
231 /// Start profiling how long it takes to load a query result from the
232 /// incremental compilation on-disk cache. Profiling continues until the
233 /// TimingGuard returned from this call is dropped.
235 pub fn incr_cache_loading(&self, query_name: impl QueryName) -> TimingGuard<'_> {
236 self.exec(EventFilter::INCR_CACHE_LOADS, |profiler| {
237 let event_id = SelfProfiler::get_query_name_string_id(query_name);
238 TimingGuard::start(profiler, profiler.incremental_load_result_event_kind, event_id)
243 fn instant_query_event(
245 event_kind: fn(&SelfProfiler) -> StringId,
246 query_name: impl QueryName,
247 event_filter: EventFilter,
249 drop(self.exec(event_filter, |profiler| {
250 let event_id = SelfProfiler::get_query_name_string_id(query_name);
251 let thread_id = thread_id_to_u32(std::thread::current().id());
253 profiler.profiler.record_instant_event(event_kind(profiler), event_id, thread_id);
259 pub fn register_queries(&self, f: impl FnOnce(&SelfProfiler)) {
260 if let Some(profiler) = &self.profiler {
266 pub struct SelfProfiler {
268 event_filter_mask: EventFilter,
269 query_event_kind: StringId,
270 sparse_pass_event_kind: StringId,
271 generic_pass_event_kind: StringId,
272 generic_activity_event_kind: StringId,
273 incremental_load_result_event_kind: StringId,
274 query_blocked_event_kind: StringId,
275 query_cache_hit_event_kind: StringId,
280 output_directory: &Path,
281 crate_name: Option<&str>,
282 event_filters: &Option<Vec<String>>,
283 ) -> Result<SelfProfiler, Box<dyn Error>> {
284 fs::create_dir_all(output_directory)?;
286 let crate_name = crate_name.unwrap_or("unknown-crate");
287 let filename = format!("{}-{}.rustc_profile", crate_name, process::id());
288 let path = output_directory.join(&filename);
289 let profiler = Profiler::new(&path)?;
291 let query_event_kind = profiler.alloc_string("Query");
292 let sparse_pass_event_kind = profiler.alloc_string("SparsePass");
293 let generic_pass_event_kind = profiler.alloc_string("GenericPass");
294 let generic_activity_event_kind = profiler.alloc_string("GenericActivity");
295 let incremental_load_result_event_kind = profiler.alloc_string("IncrementalLoadResult");
296 let query_blocked_event_kind = profiler.alloc_string("QueryBlocked");
297 let query_cache_hit_event_kind = profiler.alloc_string("QueryCacheHit");
299 let mut event_filter_mask = EventFilter::empty();
301 if let Some(ref event_filters) = *event_filters {
302 let mut unknown_events = vec![];
303 for item in event_filters {
304 if let Some(&(_, mask)) =
305 EVENT_FILTERS_BY_NAME.iter().find(|&(name, _)| name == item)
307 event_filter_mask |= mask;
309 unknown_events.push(item.clone());
313 // Warn about any unknown event names
314 if unknown_events.len() > 0 {
315 unknown_events.sort();
316 unknown_events.dedup();
319 "Unknown self-profiler events specified: {}. Available options are: {}.",
320 unknown_events.join(", "),
321 EVENT_FILTERS_BY_NAME
323 .map(|&(name, _)| name.to_string())
329 event_filter_mask = EventFilter::DEFAULT;
336 sparse_pass_event_kind,
337 generic_pass_event_kind,
338 generic_activity_event_kind,
339 incremental_load_result_event_kind,
340 query_blocked_event_kind,
341 query_cache_hit_event_kind,
345 fn get_query_name_string_id(query_name: impl QueryName) -> StringId {
347 unsafe { mem::transmute::<Discriminant<_>, u64>(query_name.discriminant()) };
349 StringId::reserved(discriminant as u32)
352 pub fn register_query_name(&self, query_name: impl QueryName) {
353 let id = SelfProfiler::get_query_name_string_id(query_name);
354 self.profiler.alloc_string_with_reserved_id(id, query_name.as_str());
359 pub struct TimingGuard<'a>(Option<measureme::TimingGuard<'a, SerializationSink>>);
361 impl<'a> TimingGuard<'a> {
364 profiler: &'a SelfProfiler,
365 event_kind: StringId,
367 ) -> TimingGuard<'a> {
368 let thread_id = thread_id_to_u32(std::thread::current().id());
369 let raw_profiler = &profiler.profiler;
371 raw_profiler.start_recording_interval_event(event_kind, event_id, thread_id);
372 TimingGuard(Some(timing_guard))
376 pub fn none() -> TimingGuard<'a> {
382 pub struct VerboseTimingGuard<'a> {
384 start: Option<Instant>,
385 _guard: TimingGuard<'a>,
388 impl<'a> VerboseTimingGuard<'a> {
390 profiler: Option<(&'a SelfProfiler, StringId)>,
394 let _guard = profiler.map_or(TimingGuard::none(), |(profiler, event_kind)| {
395 let event = profiler.profiler.alloc_string(event_id);
396 TimingGuard::start(profiler, event_kind, event)
401 start: if verbose { Some(Instant::now()) } else { None },
406 pub fn run<R>(self, f: impl FnOnce() -> R) -> R {
412 VerboseTimingGuard { event_id: "", start: None, _guard: TimingGuard::none() }
416 impl Drop for VerboseTimingGuard<'_> {
418 self.start.map(|start| print_time_passes_entry(true, self.event_id, start.elapsed()));
422 pub fn print_time_passes_entry(do_it: bool, what: &str, dur: Duration) {
427 let mem_string = match get_resident() {
429 let mb = n as f64 / 1_000_000.0;
430 format!("; rss: {}MB", mb.round() as usize)
432 None => String::new(),
434 println!("time: {}{}\t{}", duration_to_secs_str(dur), mem_string, what);
437 // Hack up our own formatting for the duration to make it easier for scripts
438 // to parse (always use the same number of decimal places and the same unit).
439 pub fn duration_to_secs_str(dur: std::time::Duration) -> String {
440 const NANOS_PER_SEC: f64 = 1_000_000_000.0;
441 let secs = dur.as_secs() as f64 + dur.subsec_nanos() as f64 / NANOS_PER_SEC;
443 format!("{:.3}", secs)
448 fn get_resident() -> Option<usize> {
450 let contents = fs::read("/proc/self/statm").ok()?;
451 let contents = String::from_utf8(contents).ok()?;
452 let s = contents.split_whitespace().nth(field)?;
453 let npages = s.parse::<usize>().ok()?;
458 fn get_resident() -> Option<usize> {
461 type HANDLE = *mut u8;
464 #[allow(non_snake_case)]
465 struct PROCESS_MEMORY_COUNTERS {
467 PageFaultCount: DWORD,
468 PeakWorkingSetSize: size_t,
469 WorkingSetSize: size_t,
470 QuotaPeakPagedPoolUsage: size_t,
471 QuotaPagedPoolUsage: size_t,
472 QuotaPeakNonPagedPoolUsage: size_t,
473 QuotaNonPagedPoolUsage: size_t,
474 PagefileUsage: size_t,
475 PeakPagefileUsage: size_t,
477 #[allow(non_camel_case_types)]
478 type PPROCESS_MEMORY_COUNTERS = *mut PROCESS_MEMORY_COUNTERS;
479 #[link(name = "psapi")]
481 fn GetCurrentProcess() -> HANDLE;
482 fn GetProcessMemoryInfo(
484 ppsmemCounters: PPROCESS_MEMORY_COUNTERS,
488 let mut pmc: PROCESS_MEMORY_COUNTERS = unsafe { mem::zeroed() };
489 pmc.cb = mem::size_of_val(&pmc) as DWORD;
490 match unsafe { GetProcessMemoryInfo(GetCurrentProcess(), &mut pmc, pmc.cb) } {
492 _ => Some(pmc.WorkingSetSize as usize),