1 //! Simple hierarchical profiler
4 collections::{BTreeMap, HashSet},
8 atomic::{AtomicBool, Ordering},
11 time::{Duration, Instant},
14 use once_cell::sync::Lazy;
16 use crate::tree::{Idx, Tree};
19 /// env RA_PROFILE=* // dump everything
20 /// env RA_PROFILE=foo|bar|baz // enabled only selected entries
21 /// env RA_PROFILE=*@3>10 // dump everything, up to depth 3, if it takes more than 10 ms
23 countme::enable(env::var("RA_COUNT").is_ok());
24 let spec = env::var("RA_PROFILE").unwrap_or_default();
28 pub fn init_from(spec: &str) {
29 let filter = if spec.is_empty() { Filter::disabled() } else { Filter::from_spec(spec) };
33 type Label = &'static str;
35 /// This function starts a profiling scope in the current execution stack with a given description.
36 /// It returns a `Profile` struct that measures elapsed time between this method invocation and `Profile` struct drop.
37 /// It supports nested profiling scopes in case when this function is invoked multiple times at the execution stack.
38 /// In this case the profiling information will be nested at the output.
39 /// Profiling information is being printed in the stderr.
43 /// profile::init_from("profile1|profile2@2");
44 /// profiling_function1();
46 /// fn profiling_function1() {
47 /// let _p = profile::span("profile1");
48 /// profiling_function2();
51 /// fn profiling_function2() {
52 /// let _p = profile::span("profile2");
55 /// This will print in the stderr the following:
61 pub fn span(label: Label) -> ProfileSpan {
62 debug_assert!(!label.is_empty());
64 let enabled = PROFILING_ENABLED.load(Ordering::Relaxed);
65 if enabled && with_profile_stack(|stack| stack.push(label)) {
66 ProfileSpan(Some(ProfilerImpl { label, detail: None }))
73 pub fn heartbeat_span() -> HeartbeatSpan {
74 let enabled = PROFILING_ENABLED.load(Ordering::Relaxed);
75 HeartbeatSpan::new(enabled)
80 let enabled = PROFILING_ENABLED.load(Ordering::Relaxed);
82 with_profile_stack(|it| it.heartbeat(1));
86 pub struct ProfileSpan(Option<ProfilerImpl>);
90 detail: Option<String>,
94 pub fn detail(mut self, detail: impl FnOnce() -> String) -> ProfileSpan {
95 if let Some(profiler) = &mut self.0 {
96 profiler.detail = Some(detail());
102 impl Drop for ProfilerImpl {
105 with_profile_stack(|it| it.pop(self.label, self.detail.take()));
109 pub struct HeartbeatSpan {
115 pub fn new(enabled: bool) -> Self {
117 with_profile_stack(|it| it.heartbeats(true));
123 impl Drop for HeartbeatSpan {
126 with_profile_stack(|it| it.heartbeats(false));
131 static PROFILING_ENABLED: AtomicBool = AtomicBool::new(false);
132 static FILTER: Lazy<RwLock<Filter>> = Lazy::new(Default::default);
134 fn with_profile_stack<T>(f: impl FnOnce(&mut ProfileStack) -> T) -> T {
135 thread_local!(static STACK: RefCell<ProfileStack> = RefCell::new(ProfileStack::new()));
136 STACK.with(|it| f(&mut *it.borrow_mut()))
139 #[derive(Default, Clone, Debug)]
142 allowed: HashSet<String>,
143 longer_than: Duration,
144 heartbeat_longer_than: Duration,
149 fn disabled() -> Filter {
153 fn from_spec(mut spec: &str) -> Filter {
154 let longer_than = if let Some(idx) = spec.rfind('>') {
155 let longer_than = spec[idx + 1..].parse().expect("invalid profile longer_than");
157 Duration::from_millis(longer_than)
161 let heartbeat_longer_than = longer_than;
163 let depth = if let Some(idx) = spec.rfind('@') {
164 let depth: usize = spec[idx + 1..].parse().expect("invalid profile depth");
171 if spec == "*" { HashSet::new() } else { spec.split('|').map(String::from).collect() };
172 Filter { depth, allowed, longer_than, heartbeat_longer_than, version: 0 }
175 fn install(mut self) {
176 PROFILING_ENABLED.store(self.depth > 0, Ordering::SeqCst);
177 let mut old = FILTER.write().unwrap();
178 self.version = old.version + 1;
183 struct ProfileStack {
186 messages: Tree<Message>,
199 detail: Option<String>,
203 fn new() -> ProfileStack {
206 messages: Tree::default(),
207 filter: Default::default(),
212 fn push(&mut self, label: Label) -> bool {
213 if self.frames.is_empty() {
214 if let Ok(f) = FILTER.try_read() {
215 if f.version > self.filter.version {
216 self.filter = f.clone();
220 if self.frames.len() > self.filter.depth {
223 let allowed = &self.filter.allowed;
224 if self.frames.is_empty() && !allowed.is_empty() && !allowed.contains(label) {
228 self.frames.push(Frame { t: Instant::now(), heartbeats: 0 });
229 self.messages.start();
233 fn pop(&mut self, label: Label, detail: Option<String>) {
234 let frame = self.frames.pop().unwrap();
235 let duration = frame.t.elapsed();
238 self.heartbeat(frame.heartbeats);
239 let avg_span = duration / (frame.heartbeats + 1);
240 if avg_span > self.filter.heartbeat_longer_than {
241 eprintln!("Too few heartbeats {} ({}/{:?})?", label, frame.heartbeats, duration);
245 self.messages.finish(Message { duration, label, detail });
246 if self.frames.is_empty() {
247 let longer_than = self.filter.longer_than;
248 // Convert to millis for comparison to avoid problems with rounding
249 // (otherwise we could print `0ms` despite user's `>0` filter when
250 // `duration` is just a few nanos).
251 if duration.as_millis() > longer_than.as_millis() {
252 if let Some(root) = self.messages.root() {
253 print(&self.messages, root, 0, longer_than, &mut stderr().lock());
256 self.messages.clear();
260 fn heartbeats(&mut self, yes: bool) {
261 self.heartbeats = yes;
263 fn heartbeat(&mut self, n: u32) {
264 if let Some(frame) = self.frames.last_mut() {
265 frame.heartbeats += n;
271 tree: &Tree<Message>,
274 longer_than: Duration,
275 out: &mut impl Write,
277 let current_indent = " ".repeat(level as usize);
278 let detail = tree[curr].detail.as_ref().map(|it| format!(" @ {}", it)).unwrap_or_default();
283 ms(tree[curr].duration),
287 .expect("printing profiling info");
289 let mut accounted_for = Duration::default();
290 let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output.
291 for child in tree.children(curr) {
292 accounted_for += tree[child].duration;
294 if tree[child].duration.as_millis() > longer_than.as_millis() {
295 print(tree, child, level + 1, longer_than, out);
297 let (total_duration, cnt) =
298 short_children.entry(tree[child].label).or_insert((Duration::default(), 0));
299 *total_duration += tree[child].duration;
304 for (child_msg, (duration, count)) in &short_children {
305 writeln!(out, " {}{} - {} ({} calls)", current_indent, ms(*duration), child_msg, count)
306 .expect("printing profiling info");
309 let unaccounted = tree[curr].duration - accounted_for;
310 if tree.children(curr).next().is_some() && unaccounted > longer_than {
311 writeln!(out, " {}{} - ???", current_indent, ms(unaccounted))
312 .expect("printing profiling info");
316 #[allow(non_camel_case_types)]
319 impl fmt::Display for ms {
320 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
321 match self.0.as_millis() {
322 0 => f.write_str(" 0 "),
323 n => write!(f, "{:5}ms", n),