1 use std::str::pattern::*;
3 // This macro makes it easier to write
4 // tests that do a series of iterations
5 macro_rules! search_asserts {
6 ($haystack:expr, $needle:expr, $testname:expr, [$($func:ident),*], $result:expr) => {
7 let mut searcher = $needle.into_searcher($haystack);
8 let arr = [$( Step::from(searcher.$func()) ),*];
9 assert_eq!(&arr[..], &$result, $testname);
13 /// Combined enum for the results of next() and next_match()/next_reject()
14 #[derive(Debug, PartialEq, Eq)]
16 // variant names purposely chosen to
17 // be the same length for easy alignment
18 Matches(usize, usize),
19 Rejects(usize, usize),
20 InRange(usize, usize),
26 impl From<SearchStep> for Step {
27 fn from(x: SearchStep) -> Self {
29 SearchStep::Match(a, b) => Matches(a, b),
30 SearchStep::Reject(a, b) => Rejects(a, b),
31 SearchStep::Done => Done,
36 impl From<Option<(usize, usize)>> for Step {
37 fn from(x: Option<(usize, usize)>) -> Self {
39 Some((a, b)) => InRange(a, b),
45 // FIXME(Manishearth) these tests focus on single-character searching (CharSearcher)
46 // and on next()/next_match(), not next_reject(). This is because
47 // the memchr changes make next_match() for single chars complex, but next_reject()
48 // continues to use next() under the hood. We should add more test cases for all
49 // of these, as well as tests for StrSearcher and higher level tests for str::find() (etc)
52 fn test_simple_iteration() {
56 "forward iteration for ASCII string",
57 // a b c d e a b c d EOF
58 [next, next, next, next, next, next, next, next, next, next],
76 "reverse iteration for ASCII string",
77 // d c b a e d c b a EOF
79 next_back, next_back, next_back, next_back, next_back, next_back, next_back, next_back,
99 "forward iteration for Chinese string",
101 [next, next, next, next, next, next],
102 [Matches(0, 3), Rejects(3, 6), Matches(6, 9), Rejects(9, 12), Rejects(12, 15), Done]
108 "forward iteration for mixed string",
109 // 我 的 猫 说 m e o w EOF
110 [next, next, next, next, next, next, next, next, next],
127 "reverse iteration for mixed string",
128 // w o e m 说 猫 的 我 EOF
130 next_back, next_back, next_back, next_back, next_back, next_back, next_back, next_back,
148 fn test_simple_search() {
152 "next_match for ASCII string",
153 [next_match, next_match, next_match, next_match],
154 [InRange(0, 1), InRange(5, 6), InRange(10, 11), Done]
160 "next_match_back for ASCII string",
161 [next_match_back, next_match_back, next_match_back, next_match_back],
162 [InRange(10, 11), InRange(5, 6), InRange(0, 1), Done]
168 "next_reject for ASCII string",
169 [next_reject, next_reject, next_match, next_reject, next_reject],
170 [InRange(1, 2), InRange(2, 3), InRange(5, 6), InRange(6, 7), Done]
176 "next_reject_back for ASCII string",
196 // Á, 각, ก, 😀 all end in 0x81
197 // 🁀, ᘀ do not end in 0x81 but contain the byte
198 // ꁁ has 0x81 as its second and third bytes.
200 // The memchr-using implementation of next_match
201 // and next_match_back temporarily violate
202 // the property that the search is always on a unicode boundary,
203 // which is fine as long as this never reaches next() or next_back().
204 // So we test if next() is correct after each next_match() as well.
205 const STRESS: &str = "Áa🁀bÁꁁfg😁각กᘀ각aÁ각ꁁก😁a";
208 fn test_stress_indices() {
209 // this isn't really a test, more of documentation on the indices of each character in the stresstest string
214 "Indices of characters in stress test",
216 next, next, next, next, next, next, next, next, next, next, next, next, next, next,
217 next, next, next, next, next, next, next
225 Rejects(10, 13), // ꁁ
226 Rejects(13, 14), // f
227 Rejects(14, 15), // g
228 Rejects(15, 19), // 😀
229 Rejects(19, 22), // 각
230 Rejects(22, 25), // ก
231 Rejects(25, 28), // ᘀ
232 Rejects(28, 31), // 각
233 Rejects(31, 32), // a
234 Rejects(32, 34), // Á
235 Rejects(34, 37), // 각
236 Rejects(37, 40), // ꁁ
237 Rejects(40, 43), // ก
238 Rejects(43, 47), // 😀
239 Rejects(47, 48), // a
246 fn test_forward_search_shared_bytes() {
250 "Forward search for two-byte Latin character",
251 [next_match, next_match, next_match, next_match],
252 [InRange(0, 2), InRange(8, 10), InRange(32, 34), Done]
258 "Forward search for two-byte Latin character; check if next() still works",
259 [next_match, next, next_match, next, next_match, next, next_match],
274 "Forward search for three-byte Hangul character",
275 [next_match, next, next_match, next_match, next_match],
276 [InRange(19, 22), Rejects(22, 25), InRange(28, 31), InRange(34, 37), Done]
282 "Forward search for three-byte Hangul character; check if next() still works",
283 [next_match, next, next_match, next, next_match, next, next_match],
298 "Forward search for three-byte Thai character",
299 [next_match, next, next_match, next, next_match],
300 [InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done]
306 "Forward search for three-byte Thai character; check if next() still works",
307 [next_match, next, next_match, next, next_match],
308 [InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done]
314 "Forward search for four-byte emoji",
315 [next_match, next, next_match, next, next_match],
316 [InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done]
322 "Forward search for four-byte emoji; check if next() still works",
323 [next_match, next, next_match, next, next_match],
324 [InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done]
330 "Forward search for three-byte Yi character with repeated bytes",
331 [next_match, next, next_match, next, next_match],
332 [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done]
338 "Forward search for three-byte Yi character with repeated bytes; check if next() still works",
339 [next_match, next, next_match, next, next_match],
340 [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done]
345 fn test_reverse_search_shared_bytes() {
349 "Reverse search for two-byte Latin character",
350 [next_match_back, next_match_back, next_match_back, next_match_back],
351 [InRange(32, 34), InRange(8, 10), InRange(0, 2), Done]
357 "Reverse search for two-byte Latin character; check if next_back() still works",
358 [next_match_back, next_back, next_match_back, next_back, next_match_back, next_back],
359 [InRange(32, 34), Rejects(31, 32), InRange(8, 10), Rejects(7, 8), InRange(0, 2), Done]
365 "Reverse search for three-byte Hangul character",
366 [next_match_back, next_back, next_match_back, next_match_back, next_match_back],
367 [InRange(34, 37), Rejects(32, 34), InRange(28, 31), InRange(19, 22), Done]
373 "Reverse search for three-byte Hangul character; check if next_back() still works",
397 "Reverse search for three-byte Thai character",
398 [next_match_back, next_back, next_match_back, next_back, next_match_back],
399 [InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done]
405 "Reverse search for three-byte Thai character; check if next_back() still works",
406 [next_match_back, next_back, next_match_back, next_back, next_match_back],
407 [InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done]
413 "Reverse search for four-byte emoji",
414 [next_match_back, next_back, next_match_back, next_back, next_match_back],
415 [InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done]
421 "Reverse search for four-byte emoji; check if next_back() still works",
422 [next_match_back, next_back, next_match_back, next_back, next_match_back],
423 [InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done]
429 "Reverse search for three-byte Yi character with repeated bytes",
430 [next_match_back, next_back, next_match_back, next_back, next_match_back],
431 [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
437 "Reverse search for three-byte Yi character with repeated bytes; check if next_back() still works",
438 [next_match_back, next_back, next_match_back, next_back, next_match_back],
439 [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
444 fn double_ended_regression_test() {
445 // https://github.com/rust-lang/rust/issues/47175
446 // Ensures that double ended searching comes to a convergence
450 "alternating double ended search",
451 [next_match, next_match_back, next_match, next_match_back],
452 [InRange(0, 1), InRange(10, 11), InRange(5, 6), Done]
457 "triple double ended search for a",
458 [next_match, next_match_back, next_match_back, next_match_back],
459 [InRange(0, 1), InRange(10, 11), InRange(5, 6), Done]
464 "triple double ended search for d",
465 [next_match, next_match_back, next_match_back, next_match_back],
466 [InRange(3, 4), InRange(13, 14), InRange(8, 9), Done]
471 "Double ended search for two-byte Latin character",
472 [next_match, next_match_back, next_match, next_match_back],
473 [InRange(0, 2), InRange(32, 34), InRange(8, 10), Done]
478 "Reverse double ended search for three-byte Hangul character",
479 [next_match_back, next_back, next_match, next, next_match_back, next_match],
480 [InRange(34, 37), Rejects(32, 34), InRange(19, 22), Rejects(22, 25), InRange(28, 31), Done]
485 "Double ended search for three-byte Thai character",
486 [next_match, next_back, next, next_match_back, next_match],
487 [InRange(22, 25), Rejects(47, 48), Rejects(25, 28), InRange(40, 43), Done]
492 "Double ended search for four-byte emoji",
493 [next_match_back, next, next_match, next_back, next_match],
494 [InRange(43, 47), Rejects(0, 2), InRange(15, 19), Rejects(40, 43), Done]
499 "Double ended search for three-byte Yi character with repeated bytes",
500 [next_match, next, next_match_back, next_back, next_match],
501 [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(34, 37), Done]