1 //! Simple hierarchical profiler
4 collections::{BTreeMap, HashSet},
8 atomic::{AtomicBool, Ordering},
11 time::{Duration, Instant},
14 use once_cell::sync::Lazy;
16 use crate::tree::{Idx, Tree};
19 /// env RA_PROFILE=* // dump everything
20 /// env RA_PROFILE=foo|bar|baz // enabled only selected entries
21 /// env RA_PROFILE=*@3>10 // dump everything, up to depth 3, if it takes more than 10 ms
23 countme::enable(env::var("RA_COUNT").is_ok());
24 let spec = env::var("RA_PROFILE").unwrap_or_default();
28 pub fn init_from(spec: &str) {
29 let filter = if spec.is_empty() { Filter::disabled() } else { Filter::from_spec(spec) };
33 type Label = &'static str;
35 /// This function starts a profiling scope in the current execution stack with a given description.
36 /// It returns a `Profile` struct that measures elapsed time between this method invocation and `Profile` struct drop.
37 /// It supports nested profiling scopes in case when this function is invoked multiple times at the execution stack.
38 /// In this case the profiling information will be nested at the output.
39 /// Profiling information is being printed in the stderr.
43 /// profile::init_from("profile1|profile2@2");
44 /// profiling_function1();
46 /// fn profiling_function1() {
47 /// let _p = profile::span("profile1");
48 /// profiling_function2();
51 /// fn profiling_function2() {
52 /// let _p = profile::span("profile2");
55 /// This will print in the stderr the following:
61 pub fn span(label: Label) -> ProfileSpan {
62 debug_assert!(!label.is_empty());
64 let enabled = PROFILING_ENABLED.load(Ordering::Relaxed);
65 if enabled && with_profile_stack(|stack| stack.push(label)) {
66 ProfileSpan(Some(ProfilerImpl { label, detail: None }))
72 pub struct ProfileSpan(Option<ProfilerImpl>);
76 detail: Option<String>,
80 pub fn detail(mut self, detail: impl FnOnce() -> String) -> ProfileSpan {
81 if let Some(profiler) = &mut self.0 {
82 profiler.detail = Some(detail())
88 impl Drop for ProfilerImpl {
91 with_profile_stack(|it| it.pop(self.label, self.detail.take()));
95 static PROFILING_ENABLED: AtomicBool = AtomicBool::new(false);
96 static FILTER: Lazy<RwLock<Filter>> = Lazy::new(Default::default);
98 fn with_profile_stack<T>(f: impl FnOnce(&mut ProfileStack) -> T) -> T {
99 thread_local!(static STACK: RefCell<ProfileStack> = RefCell::new(ProfileStack::new()));
100 STACK.with(|it| f(&mut *it.borrow_mut()))
103 #[derive(Default, Clone, Debug)]
106 allowed: HashSet<String>,
107 longer_than: Duration,
112 fn disabled() -> Filter {
116 fn from_spec(mut spec: &str) -> Filter {
117 let longer_than = if let Some(idx) = spec.rfind('>') {
118 let longer_than = spec[idx + 1..].parse().expect("invalid profile longer_than");
120 Duration::from_millis(longer_than)
125 let depth = if let Some(idx) = spec.rfind('@') {
126 let depth: usize = spec[idx + 1..].parse().expect("invalid profile depth");
133 if spec == "*" { HashSet::new() } else { spec.split('|').map(String::from).collect() };
134 Filter { depth, allowed, longer_than, version: 0 }
137 fn install(mut self) {
138 PROFILING_ENABLED.store(self.depth > 0, Ordering::SeqCst);
139 let mut old = FILTER.write().unwrap();
140 self.version = old.version + 1;
145 struct ProfileStack {
146 starts: Vec<Instant>,
148 messages: Tree<Message>,
155 detail: Option<String>,
159 fn new() -> ProfileStack {
160 ProfileStack { starts: Vec::new(), messages: Tree::default(), filter: Default::default() }
163 fn push(&mut self, label: Label) -> bool {
164 if self.starts.is_empty() {
165 if let Ok(f) = FILTER.try_read() {
166 if f.version > self.filter.version {
167 self.filter = f.clone();
171 if self.starts.len() > self.filter.depth {
174 let allowed = &self.filter.allowed;
175 if self.starts.is_empty() && !allowed.is_empty() && !allowed.contains(label) {
179 self.starts.push(Instant::now());
180 self.messages.start();
184 fn pop(&mut self, label: Label, detail: Option<String>) {
185 let start = self.starts.pop().unwrap();
186 let duration = start.elapsed();
187 self.messages.finish(Message { duration, label, detail });
188 if self.starts.is_empty() {
189 let longer_than = self.filter.longer_than;
190 // Convert to millis for comparison to avoid problems with rounding
191 // (otherwise we could print `0ms` despite user's `>0` filter when
192 // `duration` is just a few nanos).
193 if duration.as_millis() > longer_than.as_millis() {
194 if let Some(root) = self.messages.root() {
195 print(&self.messages, root, 0, longer_than, &mut stderr().lock());
198 self.messages.clear();
204 tree: &Tree<Message>,
207 longer_than: Duration,
208 out: &mut impl Write,
210 let current_indent = " ".repeat(level as usize);
211 let detail = tree[curr].detail.as_ref().map(|it| format!(" @ {}", it)).unwrap_or_default();
216 tree[curr].duration.as_millis(),
220 .expect("printing profiling info");
222 let mut accounted_for = Duration::default();
223 let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output.
224 for child in tree.children(curr) {
225 accounted_for += tree[child].duration;
227 if tree[child].duration.as_millis() > longer_than.as_millis() {
228 print(tree, child, level + 1, longer_than, out)
230 let (total_duration, cnt) =
231 short_children.entry(tree[child].label).or_insert((Duration::default(), 0));
232 *total_duration += tree[child].duration;
237 for (child_msg, (duration, count)) in short_children.iter() {
238 let millis = duration.as_millis();
239 writeln!(out, " {}{:5}ms - {} ({} calls)", current_indent, millis, child_msg, count)
240 .expect("printing profiling info");
243 let unaccounted = tree[curr].duration - accounted_for;
244 if tree.children(curr).next().is_some() && unaccounted > longer_than {
245 writeln!(out, " {}{:5}ms - ???", current_indent, unaccounted.as_millis())
246 .expect("printing profiling info");