1 // Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10 #![allow(non_snake_case)]
12 use std::rand::{Rng, thread_rng};
14 use std::iter::repeat;
16 use regex::{Regex, NoExpand};
18 fn bench_assert_match(b: &mut Bencher, re: Regex, text: &str) {
19 b.iter(|| if !re.is_match(text) { panic!("no match") });
23 fn no_exponential(b: &mut Bencher) {
25 let re = Regex::new(format!("{}{}",
26 repeat("a?").take(n).collect::<String>(),
27 repeat("a").take(n).collect::<String>()).as_slice()).unwrap();
28 let text = repeat("a").take(n).collect::<String>();
29 bench_assert_match(b, re, text.as_slice());
33 fn literal(b: &mut Bencher) {
35 let text = format!("{}y", repeat("x").take(50).collect::<String>());
36 bench_assert_match(b, re, text.as_slice());
40 fn not_literal(b: &mut Bencher) {
41 let re = regex!(".y");
42 let text = format!("{}y", repeat("x").take(50).collect::<String>());
43 bench_assert_match(b, re, text.as_slice());
47 fn match_class(b: &mut Bencher) {
48 let re = regex!("[abcdw]");
49 let text = format!("{}w", repeat("xxxx").take(20).collect::<String>());
50 bench_assert_match(b, re, text.as_slice());
54 fn match_class_in_range(b: &mut Bencher) {
55 // 'b' is between 'a' and 'c', so the class range checking doesn't help.
56 let re = regex!("[ac]");
57 let text = format!("{}c", repeat("bbbb").take(20).collect::<String>());
58 bench_assert_match(b, re, text.as_slice());
62 fn replace_all(b: &mut Bencher) {
63 let re = regex!("[cjrw]");
64 let text = "abcdefghijklmnopqrstuvwxyz";
65 // FIXME: This isn't using the $name expand stuff.
66 // It's possible RE2/Go is using it, but currently, the expand in this
67 // crate is actually compiling a regex, so it's incredibly slow.
68 b.iter(|| re.replace_all(text, NoExpand("")));
72 fn anchored_literal_short_non_match(b: &mut Bencher) {
73 let re = regex!("^zbc(d|e)");
74 let text = "abcdefghijklmnopqrstuvwxyz";
75 b.iter(|| re.is_match(text));
79 fn anchored_literal_long_non_match(b: &mut Bencher) {
80 let re = regex!("^zbc(d|e)");
81 let text = repeat("abcdefghijklmnopqrstuvwxyz").take(15).collect::<String>();
82 b.iter(|| re.is_match(text.as_slice()));
86 fn anchored_literal_short_match(b: &mut Bencher) {
87 let re = regex!("^.bc(d|e)");
88 let text = "abcdefghijklmnopqrstuvwxyz";
89 b.iter(|| re.is_match(text));
93 fn anchored_literal_long_match(b: &mut Bencher) {
94 let re = regex!("^.bc(d|e)");
95 let text = repeat("abcdefghijklmnopqrstuvwxyz").take(15).collect::<String>();
96 b.iter(|| re.is_match(text.as_slice()));
100 fn one_pass_short_a(b: &mut Bencher) {
101 let re = regex!("^.bc(d|e)*$");
102 let text = "abcddddddeeeededd";
103 b.iter(|| re.is_match(text));
107 fn one_pass_short_a_not(b: &mut Bencher) {
108 let re = regex!(".bc(d|e)*$");
109 let text = "abcddddddeeeededd";
110 b.iter(|| re.is_match(text));
114 fn one_pass_short_b(b: &mut Bencher) {
115 let re = regex!("^.bc(?:d|e)*$");
116 let text = "abcddddddeeeededd";
117 b.iter(|| re.is_match(text));
121 fn one_pass_short_b_not(b: &mut Bencher) {
122 let re = regex!(".bc(?:d|e)*$");
123 let text = "abcddddddeeeededd";
124 b.iter(|| re.is_match(text));
128 fn one_pass_long_prefix(b: &mut Bencher) {
129 let re = regex!("^abcdefghijklmnopqrstuvwxyz.*$");
130 let text = "abcdefghijklmnopqrstuvwxyz";
131 b.iter(|| re.is_match(text));
135 fn one_pass_long_prefix_not(b: &mut Bencher) {
136 let re = regex!("^.bcdefghijklmnopqrstuvwxyz.*$");
137 let text = "abcdefghijklmnopqrstuvwxyz";
138 b.iter(|| re.is_match(text));
141 macro_rules! throughput {
142 ($name:ident, $regex:expr, $size:expr) => (
144 fn $name(b: &mut Bencher) {
145 let text = gen_text($size);
147 b.iter(|| if $regex.is_match(text.as_slice()) { panic!("match") });
152 fn easy0() -> Regex { regex!("ABCDEFGHIJKLMNOPQRSTUVWXYZ$") }
153 fn easy1() -> Regex { regex!("A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$") }
154 fn medium() -> Regex { regex!("[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$") }
155 fn hard() -> Regex { regex!("[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$") }
157 fn gen_text(n: uint) -> String {
158 let mut rng = thread_rng();
159 let mut bytes = rng.gen_ascii_chars().map(|n| n as u8).take(n)
160 .collect::<Vec<u8>>();
161 for (i, b) in bytes.iter_mut().enumerate() {
166 String::from_utf8(bytes).unwrap()
169 throughput!{easy0_32, easy0(), 32}
170 throughput!{easy0_1K, easy0(), 1<<10}
171 throughput!{easy0_32K, easy0(), 32<<10}
173 throughput!{easy1_32, easy1(), 32}
174 throughput!{easy1_1K, easy1(), 1<<10}
175 throughput!{easy1_32K, easy1(), 32<<10}
177 throughput!{medium_32, medium(), 32}
178 throughput!{medium_1K, medium(), 1<<10}
179 throughput!{medium_32K,medium(), 32<<10}
181 throughput!{hard_32, hard(), 32}
182 throughput!{hard_1K, hard(), 1<<10}
183 throughput!{hard_32K,hard(), 32<<10}