use getopts;
+use json::ToJson;
+use json;
+use serialize::Decodable;
use sort;
-use stats;
use stats::Stats;
+use stats;
use term;
use time::precise_time_ns;
+use treemap::TreeMap;
use std::comm::{stream, SharedChan};
use std::either;
use std::task;
use std::to_str::ToStr;
use std::u64;
+use std::hashmap::HashMap;
+use std::os;
// The name of a test. By convention this follows the rules for rust
testfn: TestFn,
}
+#[deriving(Encodable,Decodable,Eq)]
+pub struct Metric {
+ value: f64,
+ noise: f64
+}
+
+pub struct MetricMap(TreeMap<~str,Metric>);
+
+/// Analysis of a single change in metric
+pub enum MetricChange {
+ LikelyNoise,
+ MetricAdded,
+ MetricRemoved,
+ Improvement(f64),
+ Regression(f64)
+}
+
+pub type MetricDiff = TreeMap<~str,MetricChange>;
+
// The default console test runner. It accepts the command line
// arguments and a vector of test_descs.
pub fn test_main(args: &[~str], tests: ~[TestDescAndFn]) {
run_ignored: bool,
run_tests: bool,
run_benchmarks: bool,
- save_results: Option<Path>,
- compare_results: Option<Path>,
+ ratchet_metrics: Option<Path>,
+ save_metrics: Option<Path>,
logfile: Option<Path>
}
let opts = ~[getopts::optflag("ignored"),
getopts::optflag("test"),
getopts::optflag("bench"),
- getopts::optopt("save"),
- getopts::optopt("diff"),
+ getopts::optopt("save-metrics"),
+ getopts::optopt("ratchet-metrics"),
getopts::optopt("logfile")];
let matches =
match getopts::getopts(args_, opts) {
let run_tests = ! run_benchmarks ||
getopts::opt_present(&matches, "test");
- let save_results = getopts::opt_maybe_str(&matches, "save");
- let save_results = save_results.map(|s| Path(*s));
+ let ratchet_metrics = getopts::opt_maybe_str(&matches, "ratchet-metrics");
+ let ratchet_metrics = ratchet_metrics.map(|s| Path(*s));
- let compare_results = getopts::opt_maybe_str(&matches, "diff");
- let compare_results = compare_results.map(|s| Path(*s));
+ let save_metrics = getopts::opt_maybe_str(&matches, "save-metrics");
+ let save_metrics = save_metrics.map(|s| Path(*s));
let test_opts = TestOpts {
filter: filter,
run_ignored: run_ignored,
run_tests: run_tests,
run_benchmarks: run_benchmarks,
- save_results: save_results,
- compare_results: compare_results,
+ ratchet_metrics: ratchet_metrics,
+ save_metrics: save_metrics,
logfile: logfile
};
failed: uint,
ignored: uint,
benchmarked: uint,
+ metrics: MetricMap,
failures: ~[TestDesc]
}
failed: 0u,
ignored: 0u,
benchmarked: 0u,
+ metrics: MetricMap::new(),
failures: ~[]
}
}
self.write_pretty("bench", term::color::CYAN);
}
+
+ pub fn write_added(&self) {
+ self.write_pretty("added", term::color::GREEN);
+ }
+
+ pub fn write_improved(&self) {
+ self.write_pretty("improved", term::color::GREEN);
+ }
+
+ pub fn write_removed(&self) {
+ self.write_pretty("removed", term::color::YELLOW);
+ }
+
+ pub fn write_regressed(&self) {
+ self.write_pretty("regressed", term::color::RED);
+ }
+
pub fn write_pretty(&self,
word: &str,
color: term::color::Color) {
}
}
- pub fn write_run_finish(&self) -> bool {
+ pub fn write_metric_diff(&self, diff: &MetricDiff) {
+ let mut noise = 0;
+ let mut improved = 0;
+ let mut regressed = 0;
+ let mut added = 0;
+ let mut removed = 0;
+
+ for diff.iter().advance() |(k, v)| {
+ match *v {
+ LikelyNoise => noise += 1,
+ MetricAdded => {
+ added += 1;
+ self.write_added();
+ self.out.write_line(fmt!(": %s", *k));
+ }
+ MetricRemoved => {
+ removed += 1;
+ self.write_removed();
+ self.out.write_line(fmt!(": %s", *k));
+ }
+ Improvement(pct) => {
+ improved += 1;
+ self.out.write_str(*k);
+ self.out.write_str(": ");
+ self.write_improved();
+ self.out.write_line(fmt!(" by %.2f%%", pct as float))
+ }
+ Regression(pct) => {
+ regressed += 1;
+ self.out.write_str(*k);
+ self.out.write_str(": ");
+ self.write_regressed();
+ self.out.write_line(fmt!(" by %.2f%%", pct as float))
+ }
+ }
+ }
+ self.out.write_line(fmt!("result of ratchet: %u matrics added, %u removed, \
+ %u improved, %u regressed, %u noise",
+ added, removed, improved, regressed, noise));
+ if regressed == 0 {
+ self.out.write_line("updated ratchet file")
+ } else {
+ self.out.write_line("left ratchet file untouched")
+ }
+ }
+
+ pub fn write_run_finish(&self, ratchet_metrics: &Option<Path>) -> bool {
assert!(self.passed + self.failed + self.ignored + self.benchmarked == self.total);
- let success = self.failed == 0u;
- if !success {
+
+ let ratchet_success = match *ratchet_metrics {
+ None => true,
+ Some(ref pth) => {
+ self.out.write_str(fmt!("\nusing metrics ratchet: %s\n", pth.to_str()));
+ let (diff, ok) = self.metrics.ratchet(pth);
+ self.write_metric_diff(&diff);
+ ok
+ }
+ };
+
+ let test_success = self.failed == 0u;
+ if !test_success {
self.write_failures();
}
- self.out.write_str("\nresult: ");
+ let success = ratchet_success && test_success;
+
+ self.out.write_str("\ntest result: ");
if success {
// There's no parallelism at this point so it's safe to use color
self.write_ok();
match result {
TrOk => st.passed += 1,
TrIgnored => st.ignored += 1,
- TrBench(_) => st.benchmarked += 1,
+ TrBench(bs) => {
+ st.metrics.insert_metric(test.name.to_str(),
+ bs.ns_iter_summ.median,
+ bs.ns_iter_summ.max - bs.ns_iter_summ.min);
+ st.benchmarked += 1
+ }
TrFailed => {
st.failed += 1;
st.failures.push(test);
}
let st = @mut ConsoleTestState::new(opts);
run_tests(opts, tests, |x| callback(&x, st));
- return st.write_run_finish();
+ match opts.save_metrics {
+ None => (),
+ Some(ref pth) => {
+ st.metrics.save(pth);
+ st.out.write_str(fmt!("\nmetrics saved to: %s", pth.to_str()));
+ }
+ }
+ return st.write_run_finish(&opts.ratchet_metrics);
}
#[test]
failed: 0u,
ignored: 0u,
benchmarked: 0u,
+ metrics: MetricsMap::new(),
failures: ~[test_b, test_a]
};
}
}
+
+impl ToJson for Metric {
+ fn to_json(&self) -> json::Json {
+ let mut map = ~HashMap::new();
+ map.insert(~"value", json::Number(self.value as float));
+ map.insert(~"noise", json::Number(self.noise as float));
+ json::Object(map)
+ }
+}
+
+impl MetricMap {
+
+ fn new() -> MetricMap {
+ MetricMap(TreeMap::new())
+ }
+
+ /// Load MetricDiff from a file.
+ fn load(p: &Path) -> MetricMap {
+ assert!(os::path_exists(p));
+ let f = io::file_reader(p).get();
+ let mut decoder = json::Decoder(json::from_reader(f).get());
+ MetricMap(Decodable::decode(&mut decoder))
+ }
+
+ /// Write MetricDiff to a file.
+ pub fn save(&self, p: &Path) {
+ let f = io::file_writer(p, [io::Create, io::Truncate]).get();
+ json::to_pretty_writer(f, &self.to_json());
+ }
+
+ /// Compare against another MetricMap
+ pub fn compare_to_old(&self, old: MetricMap) -> MetricDiff {
+ let mut diff : MetricDiff = TreeMap::new();
+ for old.iter().advance |(k, vold)| {
+ let r = match self.find(k) {
+ None => MetricRemoved,
+ Some(v) => {
+ let delta = (v.value - vold.value);
+ if delta.abs() < vold.noise.abs() {
+ LikelyNoise
+ } else {
+ let pct = delta.abs() / v.value * 100.0;
+ if vold.noise < 0.0 {
+ // When 'noise' is negative, it means we want
+ // to see deltas that go up over time, and can
+ // only tolerate slight negative movement.
+ if delta < 0.0 {
+ Regression(pct)
+ } else {
+ Improvement(pct)
+ }
+ } else {
+ // When 'noise' is positive, it means we want
+ // to see deltas that go down over time, and
+ // can only tolerate slight positive movements.
+ if delta < 0.0 {
+ Improvement(pct)
+ } else {
+ Regression(pct)
+ }
+ }
+ }
+ }
+ };
+ diff.insert(copy *k, r);
+ }
+ for self.iter().advance |(k, _)| {
+ if !diff.contains_key(k) {
+ diff.insert(copy *k, MetricAdded);
+ }
+ }
+ diff
+ }
+
+ /// Insert a named `value` (+/- `noise`) metric into the map. The value
+ /// must be non-negative. The `noise` indicates the uncertainty of the
+ /// metric, which doubles as the "noise range" of acceptable
+ /// pairwise-regressions on this named value, when comparing from one
+ /// metric to the next using `compare_to_old`.
+ ///
+ /// If `noise` is positive, then it means this metric is of a value
+ /// you want to see grow smaller, so a change larger than `noise` in the
+ /// positive direction represents a regression.
+ ///
+ /// If `noise` is negative, then it means this metric is of a value
+ /// you want to see grow larger, so a change larger than `noise` in the
+ /// negative direction represents a regression.
+ pub fn insert_metric(&mut self, name: &str, value: f64, noise: f64) {
+ let m = Metric {
+ value: value,
+ noise: noise
+ };
+ self.insert(name.to_owned(), m);
+ }
+
+ /// Attempt to "ratchet" an external metric file. This involves loading
+ /// metrics from a metric file (if it exists), comparing against
+ /// the metrics in `self` using `compare_to_old`, and rewriting the
+ /// file to contain the metrics in `self` if none of the
+ /// `MetricChange`s are `Regression`. Returns the diff as well
+ /// as a boolean indicating whether the ratchet succeeded.
+ pub fn ratchet(&self, p: &Path) -> (MetricDiff, bool) {
+ let old = if os::path_exists(p) {
+ MetricMap::load(p)
+ } else {
+ MetricMap::new()
+ };
+
+ let diff : MetricDiff = self.compare_to_old(old);
+ let ok = do diff.iter().all() |(_, v)| {
+ match *v {
+ Regression(_) => false,
+ _ => true
+ }
+ };
+
+ if ok {
+ debug!("rewriting file '%s' with updated metrics");
+ self.save(p);
+ }
+ return (diff, ok)
+ }
+}
+
+
+// Benchmarking
+
impl BenchHarness {
/// Callback for benchmark functions to run in their body.
pub fn iter(&mut self, inner:&fn()) {
f(self);
}
- // This is the Go benchmark algorithm. It produces a single
- // datapoint and always tries to run for 1s.
- pub fn go_bench(&mut self, f: &fn(&mut BenchHarness)) {
-
- // Rounds a number down to the nearest power of 10.
- fn round_down_10(n: u64) -> u64 {
- let mut n = n;
- let mut res = 1;
- while n > 10 {
- n = n / 10;
- res *= 10;
- }
- res
- }
-
- // Rounds x up to a number of the form [1eX, 2eX, 5eX].
- fn round_up(n: u64) -> u64 {
- let base = round_down_10(n);
- if n < (2 * base) {
- 2 * base
- } else if n < (5 * base) {
- 5 * base
- } else {
- 10 * base
- }
- }
+ // This is a more statistics-driven benchmark algorithm
+ pub fn auto_bench(&mut self, f: &fn(&mut BenchHarness)) -> stats::Summary {
// Initial bench run to get ballpark figure.
let mut n = 1_u64;
self.bench_n(n, |x| f(x));
- while n < 1_000_000_000 &&
- self.ns_elapsed() < 1_000_000_000 {
- let last = n;
-
- // Try to estimate iter count for 1s falling back to 1bn
- // iterations if first run took < 1ns.
- if self.ns_per_iter() == 0 {
- n = 1_000_000_000;
- } else {
- n = 1_000_000_000 / self.ns_per_iter();
- }
-
- n = u64::max(u64::min(n+n/2, 100*last), last+1);
- n = round_up(n);
- self.bench_n(n, |x| f(x));
+ // Try to estimate iter count for 1ms falling back to 1m
+ // iterations if first run took < 1ns.
+ if self.ns_per_iter() == 0 {
+ n = 1_000_000;
+ } else {
+ n = 1_000_000 / self.ns_per_iter();
}
- }
- // This is a more statistics-driven benchmark algorithm. It stops as
- // quickly as 100ms, so long as the statistical properties are
- // satisfactory. If those properties are not met, it may run as long as
- // the Go algorithm.
- pub fn auto_bench(&mut self, f: &fn(&mut BenchHarness)) -> stats::Summary {
-
- let mut magnitude = 1000;
-
- let samples : &mut [f64] = [0.0_f64, ..100];
+ let mut total_run = 0;
+ let samples : &mut [f64] = [0.0_f64, ..50];
loop {
let loop_start = precise_time_ns();
for samples.mut_iter().advance() |p| {
- self.bench_n(magnitude as u64, |x| f(x));
+ self.bench_n(n as u64, |x| f(x));
*p = self.ns_per_iter() as f64;
};
- // Clip top 10% and bottom 10% of outliers
- stats::winsorize(samples, 10.0);
+ stats::winsorize(samples, 5.0);
let summ = stats::Summary::new(samples);
+ for samples.mut_iter().advance() |p| {
+ self.bench_n(5 * n as u64, |x| f(x));
+ *p = self.ns_per_iter() as f64;
+ };
+
+ stats::winsorize(samples, 5.0);
+ let summ5 = stats::Summary::new(samples);
+
debug!("%u samples, median %f, MAD=%f, MADP=%f",
samples.len(),
summ.median as float,
let now = precise_time_ns();
let loop_run = now - loop_start;
- // Stop early if we have a good signal after a 100ms loop.
- if loop_run > 100_000_000 && summ.median_abs_dev_pct < 5.0 {
- return summ;
+ // If we've run for 100ms an seem to have converged to a
+ // stable median.
+ if loop_run > 100_000_000 &&
+ summ.median_abs_dev_pct < 1.0 &&
+ summ.median - summ5.median < summ5.median_abs_dev {
+ return summ5;
}
- // Longest we ever run for is 1s.
- if loop_run > 1_000_000_000 {
- return summ;
+ total_run += loop_run;
+ // Longest we ever run for is 10s.
+ if total_run > 10_000_000_000 {
+ return summ5;
}
- magnitude *= 3;
- magnitude /= 2;
+ n *= 2;
}
}
+
+
+
+
}
pub mod bench {
logfile: option::None,
run_tests: true,
run_benchmarks: false,
- save_results: option::None,
- compare_results: option::None
+ ratchet: option::None,
};
let tests = ~[
logfile: option::None,
run_tests: true,
run_benchmarks: false,
- save_results: option::None,
- compare_results: option::None
+ ratchet_metrics: option::None,
+ save_metrics: option::None,
};
let names =