]> git.lizzy.rs Git - rust.git/blob - src/librustc_data_structures/profiling.rs
Auto merge of #67732 - pietroalbini:fewer-apples, r=alexcrichton
[rust.git] / src / librustc_data_structures / profiling.rs
1 use std::error::Error;
2 use std::fs;
3 use std::mem::{self, Discriminant};
4 use std::path::Path;
5 use std::process;
6 use std::sync::Arc;
7 use std::thread::ThreadId;
8 use std::time::{Duration, Instant};
9 use std::u32;
10
11 use crate::cold_path;
12
13 use measureme::StringId;
14
15 /// MmapSerializatioSink is faster on macOS and Linux
16 /// but FileSerializationSink is faster on Windows
17 #[cfg(not(windows))]
18 type SerializationSink = measureme::MmapSerializationSink;
19 #[cfg(windows)]
20 type SerializationSink = measureme::FileSerializationSink;
21
22 type Profiler = measureme::Profiler<SerializationSink>;
23
24 pub trait QueryName: Sized + Copy {
25     fn discriminant(self) -> Discriminant<Self>;
26     fn as_str(self) -> &'static str;
27 }
28
29 #[derive(Clone, Copy, Debug, PartialEq, Eq, Ord, PartialOrd)]
30 pub enum ProfileCategory {
31     Parsing,
32     Expansion,
33     TypeChecking,
34     BorrowChecking,
35     Codegen,
36     Linking,
37     Other,
38 }
39
40 bitflags::bitflags! {
41     struct EventFilter: u32 {
42         const GENERIC_ACTIVITIES = 1 << 0;
43         const QUERY_PROVIDERS    = 1 << 1;
44         const QUERY_CACHE_HITS   = 1 << 2;
45         const QUERY_BLOCKED      = 1 << 3;
46         const INCR_CACHE_LOADS   = 1 << 4;
47         const SPARSE_PASS   = 1 << 5;
48         const GENERIC_PASS   = 1 << 6;
49
50         const DEFAULT = Self::GENERIC_ACTIVITIES.bits |
51                         Self::QUERY_PROVIDERS.bits |
52                         Self::QUERY_BLOCKED.bits |
53                         Self::INCR_CACHE_LOADS.bits |
54                         Self::SPARSE_PASS.bits |
55                         Self::GENERIC_PASS.bits;
56
57         // empty() and none() aren't const-fns unfortunately
58         const NONE = 0;
59         const ALL  = !Self::NONE.bits;
60     }
61 }
62
63 const EVENT_FILTERS_BY_NAME: &[(&str, EventFilter)] = &[
64     ("none", EventFilter::NONE),
65     ("all", EventFilter::ALL),
66     ("sparse-pass", EventFilter::SPARSE_PASS),
67     ("generic-pass", EventFilter::GENERIC_PASS),
68     ("generic-activity", EventFilter::GENERIC_ACTIVITIES),
69     ("query-provider", EventFilter::QUERY_PROVIDERS),
70     ("query-cache-hit", EventFilter::QUERY_CACHE_HITS),
71     ("query-blocked", EventFilter::QUERY_BLOCKED),
72     ("incr-cache-load", EventFilter::INCR_CACHE_LOADS),
73 ];
74
75 fn thread_id_to_u32(tid: ThreadId) -> u32 {
76     unsafe { mem::transmute::<ThreadId, u64>(tid) as u32 }
77 }
78
79 /// A reference to the SelfProfiler. It can be cloned and sent across thread
80 /// boundaries at will.
81 #[derive(Clone)]
82 pub struct SelfProfilerRef {
83     // This field is `None` if self-profiling is disabled for the current
84     // compilation session.
85     profiler: Option<Arc<SelfProfiler>>,
86
87     // We store the filter mask directly in the reference because that doesn't
88     // cost anything and allows for filtering with checking if the profiler is
89     // actually enabled.
90     event_filter_mask: EventFilter,
91
92     // Print sparse passes to stdout
93     verbose_sparse: bool,
94
95     // Print generic passes to stdout
96     verbose_generic: bool,
97 }
98
99 impl SelfProfilerRef {
100     pub fn new(
101         profiler: Option<Arc<SelfProfiler>>,
102         verbose_sparse: bool,
103         verbose_generic: bool,
104     ) -> SelfProfilerRef {
105         // If there is no SelfProfiler then the filter mask is set to NONE,
106         // ensuring that nothing ever tries to actually access it.
107         let mut event_filter_mask =
108             profiler.as_ref().map(|p| p.event_filter_mask).unwrap_or(EventFilter::NONE);
109
110         if verbose_sparse {
111             event_filter_mask |= EventFilter::SPARSE_PASS;
112         }
113
114         if verbose_generic {
115             event_filter_mask |= EventFilter::GENERIC_PASS;
116         }
117
118         SelfProfilerRef { profiler, event_filter_mask, verbose_sparse, verbose_generic }
119     }
120
121     #[inline(always)]
122     fn exec<F>(&self, event_filter: EventFilter, f: F) -> TimingGuard<'_>
123     where
124         F: for<'a> FnOnce(&'a SelfProfiler) -> TimingGuard<'a>,
125     {
126         self.handle_event(
127             event_filter,
128             || f(self.profiler.as_ref().unwrap()),
129             || TimingGuard::none(),
130         )
131     }
132
133     // This shim makes sure that cold calls only get executed if the filter mask
134     // lets them pass. It also contains some trickery to make sure that
135     // code is optimized for non-profiling compilation sessions, i.e. anything
136     // past the filter check is never inlined so it doesn't clutter the fast
137     // path.
138     #[inline(always)]
139     fn handle_event<R>(
140         &self,
141         event_filter: EventFilter,
142         cold: impl FnOnce() -> R,
143         hot: impl FnOnce() -> R,
144     ) -> R {
145         if unlikely!(self.event_filter_mask.contains(event_filter)) {
146             cold_path(|| cold())
147         } else {
148             hot()
149         }
150     }
151
152     /// Start profiling a sparse pass. Profiling continues until the
153     /// VerboseTimingGuard returned from this call is dropped.
154     #[inline(always)]
155     pub fn sparse_pass<'a>(&'a self, event_id: &'a str) -> VerboseTimingGuard<'a> {
156         self.handle_event(
157             EventFilter::SPARSE_PASS,
158             || {
159                 VerboseTimingGuard::start(
160                     self.profiler
161                         .as_ref()
162                         .map(|profiler| (&**profiler, profiler.sparse_pass_event_kind)),
163                     event_id,
164                     self.verbose_sparse,
165                 )
166             },
167             || VerboseTimingGuard::none(),
168         )
169     }
170
171     /// Start profiling a generic pass. Profiling continues until the
172     /// VerboseTimingGuard returned from this call is dropped.
173     #[inline(always)]
174     pub fn generic_pass<'a>(&'a self, event_id: &'a str) -> VerboseTimingGuard<'a> {
175         self.handle_event(
176             EventFilter::GENERIC_PASS,
177             || {
178                 VerboseTimingGuard::start(
179                     self.profiler
180                         .as_ref()
181                         .map(|profiler| (&**profiler, profiler.generic_pass_event_kind)),
182                     event_id,
183                     self.verbose_generic,
184                 )
185             },
186             || VerboseTimingGuard::none(),
187         )
188     }
189
190     /// Start profiling a generic activity. Profiling continues until the
191     /// TimingGuard returned from this call is dropped.
192     #[inline(always)]
193     pub fn generic_activity(&self, event_id: &str) -> TimingGuard<'_> {
194         self.exec(EventFilter::GENERIC_ACTIVITIES, |profiler| {
195             let event_id = profiler.profiler.alloc_string(event_id);
196             TimingGuard::start(profiler, profiler.generic_activity_event_kind, event_id)
197         })
198     }
199
200     /// Start profiling a query provider. Profiling continues until the
201     /// TimingGuard returned from this call is dropped.
202     #[inline(always)]
203     pub fn query_provider(&self, query_name: impl QueryName) -> TimingGuard<'_> {
204         self.exec(EventFilter::QUERY_PROVIDERS, |profiler| {
205             let event_id = SelfProfiler::get_query_name_string_id(query_name);
206             TimingGuard::start(profiler, profiler.query_event_kind, event_id)
207         })
208     }
209
210     /// Record a query in-memory cache hit.
211     #[inline(always)]
212     pub fn query_cache_hit(&self, query_name: impl QueryName) {
213         self.instant_query_event(
214             |profiler| profiler.query_cache_hit_event_kind,
215             query_name,
216             EventFilter::QUERY_CACHE_HITS,
217         );
218     }
219
220     /// Start profiling a query being blocked on a concurrent execution.
221     /// Profiling continues until the TimingGuard returned from this call is
222     /// dropped.
223     #[inline(always)]
224     pub fn query_blocked(&self, query_name: impl QueryName) -> TimingGuard<'_> {
225         self.exec(EventFilter::QUERY_BLOCKED, |profiler| {
226             let event_id = SelfProfiler::get_query_name_string_id(query_name);
227             TimingGuard::start(profiler, profiler.query_blocked_event_kind, event_id)
228         })
229     }
230
231     /// Start profiling how long it takes to load a query result from the
232     /// incremental compilation on-disk cache. Profiling continues until the
233     /// TimingGuard returned from this call is dropped.
234     #[inline(always)]
235     pub fn incr_cache_loading(&self, query_name: impl QueryName) -> TimingGuard<'_> {
236         self.exec(EventFilter::INCR_CACHE_LOADS, |profiler| {
237             let event_id = SelfProfiler::get_query_name_string_id(query_name);
238             TimingGuard::start(profiler, profiler.incremental_load_result_event_kind, event_id)
239         })
240     }
241
242     #[inline(always)]
243     fn instant_query_event(
244         &self,
245         event_kind: fn(&SelfProfiler) -> StringId,
246         query_name: impl QueryName,
247         event_filter: EventFilter,
248     ) {
249         drop(self.exec(event_filter, |profiler| {
250             let event_id = SelfProfiler::get_query_name_string_id(query_name);
251             let thread_id = thread_id_to_u32(std::thread::current().id());
252
253             profiler.profiler.record_instant_event(event_kind(profiler), event_id, thread_id);
254
255             TimingGuard::none()
256         }));
257     }
258
259     pub fn register_queries(&self, f: impl FnOnce(&SelfProfiler)) {
260         if let Some(profiler) = &self.profiler {
261             f(&profiler)
262         }
263     }
264 }
265
266 pub struct SelfProfiler {
267     profiler: Profiler,
268     event_filter_mask: EventFilter,
269     query_event_kind: StringId,
270     sparse_pass_event_kind: StringId,
271     generic_pass_event_kind: StringId,
272     generic_activity_event_kind: StringId,
273     incremental_load_result_event_kind: StringId,
274     query_blocked_event_kind: StringId,
275     query_cache_hit_event_kind: StringId,
276 }
277
278 impl SelfProfiler {
279     pub fn new(
280         output_directory: &Path,
281         crate_name: Option<&str>,
282         event_filters: &Option<Vec<String>>,
283     ) -> Result<SelfProfiler, Box<dyn Error>> {
284         fs::create_dir_all(output_directory)?;
285
286         let crate_name = crate_name.unwrap_or("unknown-crate");
287         let filename = format!("{}-{}.rustc_profile", crate_name, process::id());
288         let path = output_directory.join(&filename);
289         let profiler = Profiler::new(&path)?;
290
291         let query_event_kind = profiler.alloc_string("Query");
292         let sparse_pass_event_kind = profiler.alloc_string("SparsePass");
293         let generic_pass_event_kind = profiler.alloc_string("GenericPass");
294         let generic_activity_event_kind = profiler.alloc_string("GenericActivity");
295         let incremental_load_result_event_kind = profiler.alloc_string("IncrementalLoadResult");
296         let query_blocked_event_kind = profiler.alloc_string("QueryBlocked");
297         let query_cache_hit_event_kind = profiler.alloc_string("QueryCacheHit");
298
299         let mut event_filter_mask = EventFilter::empty();
300
301         if let Some(ref event_filters) = *event_filters {
302             let mut unknown_events = vec![];
303             for item in event_filters {
304                 if let Some(&(_, mask)) =
305                     EVENT_FILTERS_BY_NAME.iter().find(|&(name, _)| name == item)
306                 {
307                     event_filter_mask |= mask;
308                 } else {
309                     unknown_events.push(item.clone());
310                 }
311             }
312
313             // Warn about any unknown event names
314             if unknown_events.len() > 0 {
315                 unknown_events.sort();
316                 unknown_events.dedup();
317
318                 warn!(
319                     "Unknown self-profiler events specified: {}. Available options are: {}.",
320                     unknown_events.join(", "),
321                     EVENT_FILTERS_BY_NAME
322                         .iter()
323                         .map(|&(name, _)| name.to_string())
324                         .collect::<Vec<_>>()
325                         .join(", ")
326                 );
327             }
328         } else {
329             event_filter_mask = EventFilter::DEFAULT;
330         }
331
332         Ok(SelfProfiler {
333             profiler,
334             event_filter_mask,
335             query_event_kind,
336             sparse_pass_event_kind,
337             generic_pass_event_kind,
338             generic_activity_event_kind,
339             incremental_load_result_event_kind,
340             query_blocked_event_kind,
341             query_cache_hit_event_kind,
342         })
343     }
344
345     fn get_query_name_string_id(query_name: impl QueryName) -> StringId {
346         let discriminant =
347             unsafe { mem::transmute::<Discriminant<_>, u64>(query_name.discriminant()) };
348
349         StringId::reserved(discriminant as u32)
350     }
351
352     pub fn register_query_name(&self, query_name: impl QueryName) {
353         let id = SelfProfiler::get_query_name_string_id(query_name);
354         self.profiler.alloc_string_with_reserved_id(id, query_name.as_str());
355     }
356 }
357
358 #[must_use]
359 pub struct TimingGuard<'a>(Option<measureme::TimingGuard<'a, SerializationSink>>);
360
361 impl<'a> TimingGuard<'a> {
362     #[inline]
363     pub fn start(
364         profiler: &'a SelfProfiler,
365         event_kind: StringId,
366         event_id: StringId,
367     ) -> TimingGuard<'a> {
368         let thread_id = thread_id_to_u32(std::thread::current().id());
369         let raw_profiler = &profiler.profiler;
370         let timing_guard =
371             raw_profiler.start_recording_interval_event(event_kind, event_id, thread_id);
372         TimingGuard(Some(timing_guard))
373     }
374
375     #[inline]
376     pub fn none() -> TimingGuard<'a> {
377         TimingGuard(None)
378     }
379 }
380
381 #[must_use]
382 pub struct VerboseTimingGuard<'a> {
383     event_id: &'a str,
384     start: Option<Instant>,
385     _guard: TimingGuard<'a>,
386 }
387
388 impl<'a> VerboseTimingGuard<'a> {
389     pub fn start(
390         profiler: Option<(&'a SelfProfiler, StringId)>,
391         event_id: &'a str,
392         verbose: bool,
393     ) -> Self {
394         let _guard = profiler.map_or(TimingGuard::none(), |(profiler, event_kind)| {
395             let event = profiler.profiler.alloc_string(event_id);
396             TimingGuard::start(profiler, event_kind, event)
397         });
398         VerboseTimingGuard {
399             event_id,
400             _guard,
401             start: if verbose { Some(Instant::now()) } else { None },
402         }
403     }
404
405     #[inline(always)]
406     pub fn run<R>(self, f: impl FnOnce() -> R) -> R {
407         let _timer = self;
408         f()
409     }
410
411     fn none() -> Self {
412         VerboseTimingGuard { event_id: "", start: None, _guard: TimingGuard::none() }
413     }
414 }
415
416 impl Drop for VerboseTimingGuard<'_> {
417     fn drop(&mut self) {
418         self.start.map(|start| print_time_passes_entry(true, self.event_id, start.elapsed()));
419     }
420 }
421
422 pub fn print_time_passes_entry(do_it: bool, what: &str, dur: Duration) {
423     if !do_it {
424         return;
425     }
426
427     let mem_string = match get_resident() {
428         Some(n) => {
429             let mb = n as f64 / 1_000_000.0;
430             format!("; rss: {}MB", mb.round() as usize)
431         }
432         None => String::new(),
433     };
434     println!("time: {}{}\t{}", duration_to_secs_str(dur), mem_string, what);
435 }
436
437 // Hack up our own formatting for the duration to make it easier for scripts
438 // to parse (always use the same number of decimal places and the same unit).
439 pub fn duration_to_secs_str(dur: std::time::Duration) -> String {
440     const NANOS_PER_SEC: f64 = 1_000_000_000.0;
441     let secs = dur.as_secs() as f64 + dur.subsec_nanos() as f64 / NANOS_PER_SEC;
442
443     format!("{:.3}", secs)
444 }
445
446 // Memory reporting
447 #[cfg(unix)]
448 fn get_resident() -> Option<usize> {
449     let field = 1;
450     let contents = fs::read("/proc/self/statm").ok()?;
451     let contents = String::from_utf8(contents).ok()?;
452     let s = contents.split_whitespace().nth(field)?;
453     let npages = s.parse::<usize>().ok()?;
454     Some(npages * 4096)
455 }
456
457 #[cfg(windows)]
458 fn get_resident() -> Option<usize> {
459     type BOOL = i32;
460     type DWORD = u32;
461     type HANDLE = *mut u8;
462     use libc::size_t;
463     #[repr(C)]
464     #[allow(non_snake_case)]
465     struct PROCESS_MEMORY_COUNTERS {
466         cb: DWORD,
467         PageFaultCount: DWORD,
468         PeakWorkingSetSize: size_t,
469         WorkingSetSize: size_t,
470         QuotaPeakPagedPoolUsage: size_t,
471         QuotaPagedPoolUsage: size_t,
472         QuotaPeakNonPagedPoolUsage: size_t,
473         QuotaNonPagedPoolUsage: size_t,
474         PagefileUsage: size_t,
475         PeakPagefileUsage: size_t,
476     }
477     #[allow(non_camel_case_types)]
478     type PPROCESS_MEMORY_COUNTERS = *mut PROCESS_MEMORY_COUNTERS;
479     #[link(name = "psapi")]
480     extern "system" {
481         fn GetCurrentProcess() -> HANDLE;
482         fn GetProcessMemoryInfo(
483             Process: HANDLE,
484             ppsmemCounters: PPROCESS_MEMORY_COUNTERS,
485             cb: DWORD,
486         ) -> BOOL;
487     }
488     let mut pmc: PROCESS_MEMORY_COUNTERS = unsafe { mem::zeroed() };
489     pmc.cb = mem::size_of_val(&pmc) as DWORD;
490     match unsafe { GetProcessMemoryInfo(GetCurrentProcess(), &mut pmc, pmc.cb) } {
491         0 => None,
492         _ => Some(pmc.WorkingSetSize as usize),
493     }
494 }