]> git.lizzy.rs Git - rust.git/blob - src/libregex/test/tests.rs
Fix infinite recursion in the compiler.
[rust.git] / src / libregex / test / tests.rs
1 // Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 // ignore-tidy-linelength
12 // ignore-lexer-test FIXME #15679
13
14 use regex::{Regex, NoExpand};
15
16 #[test]
17 fn splitn() {
18     let re = regex!(r"\d+");
19     let text = "cauchy123plato456tyler789binx";
20     let subs: Vec<&str> = re.splitn(text, 2).collect();
21     assert_eq!(subs, vec!("cauchy", "plato456tyler789binx"));
22 }
23
24 #[test]
25 fn split() {
26     let re = regex!(r"\d+");
27     let text = "cauchy123plato456tyler789binx";
28     let subs: Vec<&str> = re.split(text).collect();
29     assert_eq!(subs, vec!("cauchy", "plato", "tyler", "binx"));
30 }
31
32 #[test]
33 fn empty_regex_empty_match() {
34     let re = regex!("");
35     let ms = re.find_iter("").collect::<Vec<(uint, uint)>>();
36     assert_eq!(ms, vec![(0, 0)]);
37 }
38
39 #[test]
40 fn empty_regex_nonempty_match() {
41     let re = regex!("");
42     let ms = re.find_iter("abc").collect::<Vec<(uint, uint)>>();
43     assert_eq!(ms, vec![(0, 0), (1, 1), (2, 2), (3, 3)]);
44 }
45
46 #[test]
47 fn quoted_bracket_set() {
48     let re = regex!(r"([\x{5b}\x{5d}])");
49     let ms = re.find_iter("[]").collect::<Vec<(uint, uint)>>();
50     assert_eq!(ms, vec![(0, 1), (1, 2)]);
51     let re = regex!(r"([\[\]])");
52     let ms = re.find_iter("[]").collect::<Vec<(uint, uint)>>();
53     assert_eq!(ms, vec![(0, 1), (1, 2)]);
54 }
55
56 #[test]
57 fn first_range_starts_with_left_bracket() {
58     let re = regex!(r"([[-z])");
59     let ms = re.find_iter("[]").collect::<Vec<(uint, uint)>>();
60     assert_eq!(ms, vec![(0, 1), (1, 2)]);
61 }
62
63 #[test]
64 fn range_ends_with_escape() {
65     let re = regex!(r"([\[-\x{5d}])");
66     let ms = re.find_iter("[]").collect::<Vec<(uint, uint)>>();
67     assert_eq!(ms, vec![(0, 1), (1, 2)]);
68 }
69
70 macro_rules! replace {
71     ($name:ident, $which:ident, $re:expr,
72      $search:expr, $replace:expr, $result:expr) => (
73         #[test]
74         fn $name() {
75             let re = regex!($re);
76             assert_eq!(re.$which($search, $replace), String::from_str($result));
77         }
78     );
79 }
80
81 replace!{rep_first, replace, r"\d", "age: 26", "Z", "age: Z6"}
82 replace!{rep_plus, replace, r"\d+", "age: 26", "Z", "age: Z"}
83 replace!{rep_all, replace_all, r"\d", "age: 26", "Z", "age: ZZ"}
84 replace!{rep_groups, replace, r"(\S+)\s+(\S+)", "w1 w2", "$2 $1", "w2 w1"}
85 replace!{rep_double_dollar, replace,
86          r"(\S+)\s+(\S+)", "w1 w2", "$2 $$1", "w2 $1"}
87 replace!{rep_no_expand, replace,
88          r"(\S+)\s+(\S+)", "w1 w2", NoExpand("$2 $1"), "$2 $1"}
89 replace!{rep_named, replace_all,
90          r"(?P<first>\S+)\s+(?P<last>\S+)(?P<space>\s*)",
91          "w1 w2 w3 w4", "$last $first$space", "w2 w1 w4 w3"}
92 replace!{rep_trim, replace_all, "^[ \t]+|[ \t]+$", " \t  trim me\t   \t",
93          "", "trim me"}
94
95 macro_rules! noparse {
96     ($name:ident, $re:expr) => (
97         #[test]
98         fn $name() {
99             let re = $re;
100             match Regex::new(re) {
101                 Err(_) => {},
102                 Ok(_) => panic!("Regex '{}' should cause a parse error.", re),
103             }
104         }
105     );
106 }
107
108 noparse!{fail_double_repeat, "a**"}
109 noparse!{fail_no_repeat_arg, "*"}
110 noparse!{fail_no_repeat_arg_begin, "^*"}
111 noparse!{fail_incomplete_escape, "\\"}
112 noparse!{fail_class_incomplete, "[A-"}
113 noparse!{fail_class_not_closed, "[A"}
114 noparse!{fail_class_no_begin, r"[\A]"}
115 noparse!{fail_class_no_end, r"[\z]"}
116 noparse!{fail_class_no_boundary, r"[\b]"}
117 noparse!{fail_open_paren, "("}
118 noparse!{fail_close_paren, ")"}
119 noparse!{fail_invalid_range, "[a-Z]"}
120 noparse!{fail_empty_capture_name, "(?P<>a)"}
121 noparse!{fail_empty_capture_exp, "(?P<name>)"}
122 noparse!{fail_bad_capture_name, "(?P<na-me>)"}
123 noparse!{fail_bad_flag, "(?a)a"}
124 noparse!{fail_empty_alt_before, "|a"}
125 noparse!{fail_empty_alt_after, "a|"}
126 noparse!{fail_counted_big_exact, "a{1001}"}
127 noparse!{fail_counted_big_min, "a{1001,}"}
128 noparse!{fail_counted_no_close, "a{1001"}
129 noparse!{fail_unfinished_cap, "(?"}
130 noparse!{fail_unfinished_escape, "\\"}
131 noparse!{fail_octal_digit, r"\8"}
132 noparse!{fail_hex_digit, r"\xG0"}
133 noparse!{fail_hex_short, r"\xF"}
134 noparse!{fail_hex_long_digits, r"\x{fffg}"}
135 noparse!{fail_flag_bad, "(?a)"}
136 noparse!{fail_flag_empty, "(?)"}
137 noparse!{fail_double_neg, "(?-i-i)"}
138 noparse!{fail_neg_empty, "(?i-)"}
139 noparse!{fail_empty_group, "()"}
140 noparse!{fail_dupe_named, "(?P<a>.)(?P<a>.)"}
141 noparse!{fail_range_end_no_class, "[a-[:lower:]]"}
142 noparse!{fail_range_end_no_begin, r"[a-\A]"}
143 noparse!{fail_range_end_no_end, r"[a-\z]"}
144 noparse!{fail_range_end_no_boundary, r"[a-\b]"}
145 noparse!{fail_repeat_no_expr, r"-|+"}
146
147 macro_rules! mat {
148     ($name:ident, $re:expr, $text:expr, $($loc:tt)+) => (
149         #[test]
150         fn $name() {
151             let text = $text;
152             let expected: Vec<Option<(uint, uint)>> = vec!($($loc)+);
153             let r = regex!($re);
154             let got = match r.captures(text) {
155                 Some(c) => c.iter_pos().collect::<Vec<Option<(uint, uint)>>>(),
156                 None => vec!(None),
157             };
158             // The test set sometimes leave out capture groups, so truncate
159             // actual capture groups to match test set.
160             let mut sgot = got.as_slice();
161             if sgot.len() > expected.len() {
162                 sgot = &sgot[..expected.len()]
163             }
164             if expected != sgot {
165                 panic!("For RE '{}' against '{}', expected '{:?}' but got '{:?}'",
166                       $re, text, expected, sgot);
167             }
168         }
169     );
170 }
171
172 // Some crazy expressions from regular-expressions.info.
173 mat!{match_ranges,
174      r"\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
175      "num: 255", Some((5, 8))}
176 mat!{match_ranges_not,
177      r"\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
178      "num: 256", None}
179 mat!{match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3))}
180 mat!{match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3))}
181 mat!{match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4))}
182 mat!{match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None}
183 mat!{match_email, r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
184      "mine is jam.slam@gmail.com ", Some((8, 26))}
185 mat!{match_email_not, r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
186      "mine is jam.slam@gmail ", None}
187 mat!{match_email_big, r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
188      "mine is jam.slam@gmail.com ", Some((8, 26))}
189 mat!{match_date1,
190      r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
191      "1900-01-01", Some((0, 10))}
192 mat!{match_date2,
193      r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
194      "1900-00-01", None}
195 mat!{match_date3,
196      r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
197      "1900-13-01", None}
198
199 // Exercise the flags.
200 mat!{match_flag_case, "(?i)abc", "ABC", Some((0, 3))}
201 mat!{match_flag_weird_case, "(?i)a(?-i)bc", "Abc", Some((0, 3))}
202 mat!{match_flag_weird_case_not, "(?i)a(?-i)bc", "ABC", None}
203 mat!{match_flag_case_dotnl, "(?is)a.", "A\n", Some((0, 2))}
204 mat!{match_flag_case_dotnl_toggle, "(?is)a.(?-is)a.", "A\nab", Some((0, 4))}
205 mat!{match_flag_case_dotnl_toggle_not, "(?is)a.(?-is)a.", "A\na\n", None}
206 mat!{match_flag_case_dotnl_toggle_ok, "(?is)a.(?-is:a.)?", "A\na\n", Some((0, 2))}
207 mat!{match_flag_multi, "(?m)(?:^\\d+$\n?)+", "123\n456\n789", Some((0, 11))}
208 mat!{match_flag_ungreedy, "(?U)a+", "aa", Some((0, 1))}
209 mat!{match_flag_ungreedy_greedy, "(?U)a+?", "aa", Some((0, 2))}
210 mat!{match_flag_ungreedy_noop, "(?U)(?-U)a+", "aa", Some((0, 2))}
211
212 // Some Unicode tests.
213 // A couple of these are commented out because something in the guts of macro expansion is creating
214 // invalid byte strings.
215 //mat!{uni_literal, r"Ⅰ", "Ⅰ", Some((0, 3))}
216 mat!{uni_one, r"\pN", "Ⅰ", Some((0, 3))}
217 mat!{uni_mixed, r"\pN+", "Ⅰ1Ⅱ2", Some((0, 8))}
218 mat!{uni_not, r"\PN+", "abⅠ", Some((0, 2))}
219 mat!{uni_not_class, r"[\PN]+", "abⅠ", Some((0, 2))}
220 mat!{uni_not_class_neg, r"[^\PN]+", "abⅠ", Some((2, 5))}
221 mat!{uni_case, r"(?i)Δ", "δ", Some((0, 2))}
222 //mat!{uni_case_not, r"Δ", "δ", None}
223 mat!{uni_case_upper, r"\p{Lu}+", "ΛΘΓΔα", Some((0, 8))}
224 mat!{uni_case_upper_nocase_flag, r"(?i)\p{Lu}+", "ΛΘΓΔα", Some((0, 10))}
225 mat!{uni_case_upper_nocase, r"\p{L}+", "ΛΘΓΔα", Some((0, 10))}
226 mat!{uni_case_lower, r"\p{Ll}+", "ΛΘΓΔα", Some((8, 10))}
227
228 // Test the Unicode friendliness of Perl character classes.
229 mat!{uni_perl_w, r"\w+", "dδd", Some((0, 4))}
230 mat!{uni_perl_w_not, r"\w+", "⥡", None}
231 mat!{uni_perl_w_neg, r"\W+", "⥡", Some((0, 3))}
232 mat!{uni_perl_d, r"\d+", "1२३9", Some((0, 8))}
233 mat!{uni_perl_d_not, r"\d+", "Ⅱ", None}
234 mat!{uni_perl_d_neg, r"\D+", "Ⅱ", Some((0, 3))}
235 mat!{uni_perl_s, r"\s+", " ", Some((0, 3))}
236 mat!{uni_perl_s_not, r"\s+", "☃", None}
237 mat!{uni_perl_s_neg, r"\S+", "☃", Some((0, 3))}
238
239 // And do the same for word boundaries.
240 mat!{uni_boundary_none, r"\d\b", "6δ", None}
241 mat!{uni_boundary_ogham, r"\d\b", "6 ", Some((0, 1))}
242
243 // A whole mess of tests from Glenn Fowler's regex test suite.
244 // Generated by the 'src/etc/regex-match-tests' program.
245 mod matches;