1 // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! The string Pattern API.
13 //! For more details, see the traits `Pattern`, `Searcher`,
14 //! `ReverseSearcher` and `DoubleEndedSearcher`.
16 #![unstable(feature = "pattern",
17 reason = "API not fully fleshed out and ready to be stabilized")]
25 /// A `Pattern<'a>` expresses that the implementing type
26 /// can be used as a string pattern for searching in a `&'a str`.
28 /// For example, both `'a'` and `"aa"` are patterns that
29 /// would match at index `1` in the string `"baaaab"`.
31 /// The trait itself acts as a builder for an associated
32 /// `Searcher` type, which does the actual work of finding
33 /// occurrences of the pattern in a string.
34 pub trait Pattern<'a>: Sized {
35 /// Associated searcher for this pattern
36 type Searcher: Searcher<'a>;
38 /// Constructs the associated searcher from
39 /// `self` and the `haystack` to search in.
40 fn into_searcher(self, haystack: &'a str) -> Self::Searcher;
42 /// Checks whether the pattern matches anywhere in the haystack
44 fn is_contained_in(self, haystack: &'a str) -> bool {
45 self.into_searcher(haystack).next_match().is_some()
48 /// Checks whether the pattern matches at the front of the haystack
50 fn is_prefix_of(self, haystack: &'a str) -> bool {
51 match self.into_searcher(haystack).next() {
52 SearchStep::Match(0, _) => true,
57 /// Checks whether the pattern matches at the back of the haystack
59 fn is_suffix_of(self, haystack: &'a str) -> bool
60 where Self::Searcher: ReverseSearcher<'a>
62 match self.into_searcher(haystack).next_back() {
63 SearchStep::Match(_, j) if haystack.len() == j => true,
71 /// Result of calling `Searcher::next()` or `ReverseSearcher::next_back()`.
72 #[derive(Copy, Clone, Eq, PartialEq, Debug)]
74 /// Expresses that a match of the pattern has been found at
77 /// Expresses that `haystack[a..b]` has been rejected as a possible match
80 /// Note that there might be more than one `Reject` between two `Match`es,
81 /// there is no requirement for them to be combined into one.
83 /// Expresses that every byte of the haystack has been visted, ending
88 /// A searcher for a string pattern.
90 /// This trait provides methods for searching for non-overlapping
91 /// matches of a pattern starting from the front (left) of a string.
93 /// It will be implemented by associated `Searcher`
94 /// types of the `Pattern` trait.
96 /// The trait is marked unsafe because the indices returned by the
97 /// `next()` methods are required to lie on valid utf8 boundaries in
98 /// the haystack. This enables consumers of this trait to
99 /// slice the haystack without additional runtime checks.
100 pub unsafe trait Searcher<'a> {
101 /// Getter for the underlaying string to be searched in
103 /// Will always return the same `&str`
104 fn haystack(&self) -> &'a str;
106 /// Performs the next search step starting from the front.
108 /// - Returns `Match(a, b)` if `haystack[a..b]` matches the pattern.
109 /// - Returns `Reject(a, b)` if `haystack[a..b]` can not match the
110 /// pattern, even partially.
111 /// - Returns `Done` if every byte of the haystack has been visited
113 /// The stream of `Match` and `Reject` values up to a `Done`
114 /// will contain index ranges that are adjacent, non-overlapping,
115 /// covering the whole haystack, and laying on utf8 boundaries.
117 /// A `Match` result needs to contain the whole matched pattern,
118 /// however `Reject` results may be split up into arbitrary
119 /// many adjacent fragments. Both ranges may have zero length.
121 /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"`
122 /// might produce the stream
123 /// `[Reject(0, 1), Reject(1, 2), Match(2, 5), Reject(5, 8)]`
124 fn next(&mut self) -> SearchStep;
126 /// Find the next `Match` result. See `next()`
128 fn next_match(&mut self) -> Option<(usize, usize)> {
131 SearchStep::Match(a, b) => return Some((a, b)),
132 SearchStep::Done => return None,
138 /// Find the next `Reject` result. See `next()`
140 fn next_reject(&mut self) -> Option<(usize, usize)> {
143 SearchStep::Reject(a, b) => return Some((a, b)),
144 SearchStep::Done => return None,
151 /// A reverse searcher for a string pattern.
153 /// This trait provides methods for searching for non-overlapping
154 /// matches of a pattern starting from the back (right) of a string.
156 /// It will be implemented by associated `Searcher`
157 /// types of the `Pattern` trait if the pattern supports searching
158 /// for it from the back.
160 /// The index ranges returned by this trait are not required
161 /// to exactly match those of the forward search in reverse.
163 /// For the reason why this trait is marked unsafe, see them
164 /// parent trait `Searcher`.
165 pub unsafe trait ReverseSearcher<'a>: Searcher<'a> {
166 /// Performs the next search step starting from the back.
168 /// - Returns `Match(a, b)` if `haystack[a..b]` matches the pattern.
169 /// - Returns `Reject(a, b)` if `haystack[a..b]` can not match the
170 /// pattern, even partially.
171 /// - Returns `Done` if every byte of the haystack has been visited
173 /// The stream of `Match` and `Reject` values up to a `Done`
174 /// will contain index ranges that are adjacent, non-overlapping,
175 /// covering the whole haystack, and laying on utf8 boundaries.
177 /// A `Match` result needs to contain the whole matched pattern,
178 /// however `Reject` results may be split up into arbitrary
179 /// many adjacent fragments. Both ranges may have zero length.
181 /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"`
182 /// might produce the stream
183 /// `[Reject(7, 8), Match(4, 7), Reject(1, 4), Reject(0, 1)]`
184 fn next_back(&mut self) -> SearchStep;
186 /// Find the next `Match` result. See `next_back()`
188 fn next_match_back(&mut self) -> Option<(usize, usize)>{
190 match self.next_back() {
191 SearchStep::Match(a, b) => return Some((a, b)),
192 SearchStep::Done => return None,
198 /// Find the next `Reject` result. See `next_back()`
200 fn next_reject_back(&mut self) -> Option<(usize, usize)>{
202 match self.next_back() {
203 SearchStep::Reject(a, b) => return Some((a, b)),
204 SearchStep::Done => return None,
211 /// A marker trait to express that a `ReverseSearcher`
212 /// can be used for a `DoubleEndedIterator` implementation.
214 /// For this, the impl of `Searcher` and `ReverseSearcher` need
215 /// to follow these conditions:
217 /// - All results of `next()` need to be identical
218 /// to the results of `next_back()` in reverse order.
219 /// - `next()` and `next_back()` need to behave as
220 /// the two ends of a range of values, that is they
221 /// can not "walk past each other".
225 /// `char::Searcher` is a `DoubleEndedSearcher` because searching for a
226 /// `char` only requires looking at one at a time, which behaves the same
229 /// `(&str)::Searcher` is not a `DoubleEndedSearcher` because
230 /// the pattern `"aa"` in the haystack `"aaa"` matches as either
231 /// `"[aa]a"` or `"a[aa]"`, depending from which side it is searched.
232 pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
234 /////////////////////////////////////////////////////////////////////////////
235 // Impl for a CharEq wrapper
236 /////////////////////////////////////////////////////////////////////////////
240 fn matches(&mut self, char) -> bool;
241 fn only_ascii(&self) -> bool;
244 impl CharEq for char {
246 fn matches(&mut self, c: char) -> bool { *self == c }
249 fn only_ascii(&self) -> bool { (*self as u32) < 128 }
252 impl<F> CharEq for F where F: FnMut(char) -> bool {
254 fn matches(&mut self, c: char) -> bool { (*self)(c) }
257 fn only_ascii(&self) -> bool { false }
260 impl<'a> CharEq for &'a [char] {
262 fn matches(&mut self, c: char) -> bool {
263 self.iter().any(|&m| { let mut m = m; m.matches(c) })
267 fn only_ascii(&self) -> bool {
268 self.iter().all(|m| m.only_ascii())
272 struct CharEqPattern<C: CharEq>(C);
275 struct CharEqSearcher<'a, C: CharEq> {
278 char_indices: super::CharIndices<'a>,
283 impl<'a, C: CharEq> Pattern<'a> for CharEqPattern<C> {
284 type Searcher = CharEqSearcher<'a, C>;
287 fn into_searcher(self, haystack: &'a str) -> CharEqSearcher<'a, C> {
289 ascii_only: self.0.only_ascii(),
292 char_indices: haystack.char_indices(),
297 unsafe impl<'a, C: CharEq> Searcher<'a> for CharEqSearcher<'a, C> {
299 fn haystack(&self) -> &'a str {
304 fn next(&mut self) -> SearchStep {
305 let s = &mut self.char_indices;
306 // Compare lengths of the internal byte slice iterator
307 // to find length of current char
308 let (pre_len, _) = s.iter.iter.size_hint();
309 if let Some((i, c)) = s.next() {
310 let (len, _) = s.iter.iter.size_hint();
311 let char_len = pre_len - len;
312 if self.char_eq.matches(c) {
313 return SearchStep::Match(i, i + char_len);
315 return SearchStep::Reject(i, i + char_len);
322 unsafe impl<'a, C: CharEq> ReverseSearcher<'a> for CharEqSearcher<'a, C> {
324 fn next_back(&mut self) -> SearchStep {
325 let s = &mut self.char_indices;
326 // Compare lengths of the internal byte slice iterator
327 // to find length of current char
328 let (pre_len, _) = s.iter.iter.size_hint();
329 if let Some((i, c)) = s.next_back() {
330 let (len, _) = s.iter.iter.size_hint();
331 let char_len = pre_len - len;
332 if self.char_eq.matches(c) {
333 return SearchStep::Match(i, i + char_len);
335 return SearchStep::Reject(i, i + char_len);
342 impl<'a, C: CharEq> DoubleEndedSearcher<'a> for CharEqSearcher<'a, C> {}
344 /////////////////////////////////////////////////////////////////////////////
346 /////////////////////////////////////////////////////////////////////////////
348 // Todo: Optimize the naive implementation here
350 /// Associated type for `<&str as Pattern<'a>>::Searcher`.
352 pub struct StrSearcher<'a, 'b> {
360 #[derive(Clone, PartialEq)]
361 enum State { Done, NotDone, Reject(usize, usize) }
363 #[inline] fn done(&self) -> bool { *self == State::Done }
364 #[inline] fn take(&mut self) -> State { ::mem::replace(self, State::NotDone) }
367 /// Non-allocating substring search.
369 /// Will handle the pattern `""` as returning empty matches at each utf8
371 impl<'a, 'b> Pattern<'a> for &'b str {
372 type Searcher = StrSearcher<'a, 'b>;
375 fn into_searcher(self, haystack: &'a str) -> StrSearcher<'a, 'b> {
381 state: State::NotDone,
386 unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> {
388 fn haystack(&self) -> &'a str {
393 fn next(&mut self) -> SearchStep {
394 str_search_step(self,
395 |m: &mut StrSearcher| {
396 // Forward step for empty needle
397 let current_start = m.start;
399 m.start = m.haystack.char_range_at(current_start).next;
400 m.state = State::Reject(current_start, m.start);
402 SearchStep::Match(current_start, current_start)
404 |m: &mut StrSearcher| {
405 // Forward step for nonempty needle
406 let current_start = m.start;
407 // Compare byte window because this might break utf8 boundaries
408 let possible_match = &m.haystack.as_bytes()[m.start .. m.start + m.needle.len()];
409 if possible_match == m.needle.as_bytes() {
410 m.start += m.needle.len();
411 SearchStep::Match(current_start, m.start)
414 let haystack_suffix = &m.haystack[m.start..];
415 m.start += haystack_suffix.chars().next().unwrap().len_utf8();
416 SearchStep::Reject(current_start, m.start)
422 unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> {
424 fn next_back(&mut self) -> SearchStep {
425 str_search_step(self,
426 |m: &mut StrSearcher| {
427 // Backward step for empty needle
428 let current_end = m.end;
430 m.end = m.haystack.char_range_at_reverse(current_end).next;
431 m.state = State::Reject(m.end, current_end);
433 SearchStep::Match(current_end, current_end)
435 |m: &mut StrSearcher| {
436 // Backward step for nonempty needle
437 let current_end = m.end;
438 // Compare byte window because this might break utf8 boundaries
439 let possible_match = &m.haystack.as_bytes()[m.end - m.needle.len() .. m.end];
440 if possible_match == m.needle.as_bytes() {
441 m.end -= m.needle.len();
442 SearchStep::Match(m.end, current_end)
445 let haystack_prefix = &m.haystack[..m.end];
446 m.end -= haystack_prefix.chars().rev().next().unwrap().len_utf8();
447 SearchStep::Reject(m.end, current_end)
453 // Helper function for encapsulating the common control flow
454 // of doing a search step from the front or doing a search step from the back
455 fn str_search_step<F, G>(mut m: &mut StrSearcher,
456 empty_needle_step: F,
457 nonempty_needle_step: G) -> SearchStep
458 where F: FnOnce(&mut StrSearcher) -> SearchStep,
459 G: FnOnce(&mut StrSearcher) -> SearchStep
463 } else if m.needle.is_empty() && m.start <= m.end {
464 // Case for needle == ""
465 if let State::Reject(a, b) = m.state.take() {
466 SearchStep::Reject(a, b)
468 if m.start == m.end {
469 m.state = State::Done;
471 empty_needle_step(&mut m)
473 } else if m.start + m.needle.len() <= m.end {
474 // Case for needle != ""
475 nonempty_needle_step(&mut m)
476 } else if m.start < m.end {
477 // Remaining slice shorter than needle, reject it
478 m.state = State::Done;
479 SearchStep::Reject(m.start, m.end)
481 m.state = State::Done;
486 /////////////////////////////////////////////////////////////////////////////
488 macro_rules! pattern_methods {
489 ($t:ty, $pmap:expr, $smap:expr) => {
493 fn into_searcher(self, haystack: &'a str) -> $t {
494 ($smap)(($pmap)(self).into_searcher(haystack))
498 fn is_contained_in(self, haystack: &'a str) -> bool {
499 ($pmap)(self).is_contained_in(haystack)
503 fn is_prefix_of(self, haystack: &'a str) -> bool {
504 ($pmap)(self).is_prefix_of(haystack)
508 fn is_suffix_of(self, haystack: &'a str) -> bool
509 where $t: ReverseSearcher<'a>
511 ($pmap)(self).is_suffix_of(haystack)
516 macro_rules! searcher_methods {
519 fn haystack(&self) -> &'a str {
523 fn next(&mut self) -> SearchStep {
527 fn next_match(&mut self) -> Option<(usize, usize)> {
531 fn next_reject(&mut self) -> Option<(usize, usize)> {
537 fn next_back(&mut self) -> SearchStep {
541 fn next_match_back(&mut self) -> Option<(usize, usize)> {
542 self.0.next_match_back()
545 fn next_reject_back(&mut self) -> Option<(usize, usize)> {
546 self.0.next_reject_back()
551 /////////////////////////////////////////////////////////////////////////////
553 /////////////////////////////////////////////////////////////////////////////
555 /// Associated type for `<char as Pattern<'a>>::Searcher`.
557 pub struct CharSearcher<'a>(<CharEqPattern<char> as Pattern<'a>>::Searcher);
559 unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
560 searcher_methods!(forward);
563 unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
564 searcher_methods!(reverse);
567 impl<'a> DoubleEndedSearcher<'a> for CharSearcher<'a> {}
569 /// Searches for chars that are equal to a given char
570 impl<'a> Pattern<'a> for char {
571 pattern_methods!(CharSearcher<'a>, CharEqPattern, CharSearcher);
574 /////////////////////////////////////////////////////////////////////////////
576 /////////////////////////////////////////////////////////////////////////////
578 // Todo: Change / Remove due to ambiguity in meaning.
580 /// Associated type for `<&[char] as Pattern<'a>>::Searcher`.
582 pub struct CharSliceSearcher<'a, 'b>(<CharEqPattern<&'b [char]> as Pattern<'a>>::Searcher);
584 unsafe impl<'a, 'b> Searcher<'a> for CharSliceSearcher<'a, 'b> {
585 searcher_methods!(forward);
588 unsafe impl<'a, 'b> ReverseSearcher<'a> for CharSliceSearcher<'a, 'b> {
589 searcher_methods!(reverse);
592 impl<'a, 'b> DoubleEndedSearcher<'a> for CharSliceSearcher<'a, 'b> {}
594 /// Searches for chars that are equal to any of the chars in the array
595 impl<'a, 'b> Pattern<'a> for &'b [char] {
596 pattern_methods!(CharSliceSearcher<'a, 'b>, CharEqPattern, CharSliceSearcher);
599 /////////////////////////////////////////////////////////////////////////////
600 // Impl for F: FnMut(char) -> bool
601 /////////////////////////////////////////////////////////////////////////////
603 /// Associated type for `<F as Pattern<'a>>::Searcher`.
605 pub struct CharPredicateSearcher<'a, F>(<CharEqPattern<F> as Pattern<'a>>::Searcher)
606 where F: FnMut(char) -> bool;
608 unsafe impl<'a, F> Searcher<'a> for CharPredicateSearcher<'a, F>
609 where F: FnMut(char) -> bool
611 searcher_methods!(forward);
614 unsafe impl<'a, F> ReverseSearcher<'a> for CharPredicateSearcher<'a, F>
615 where F: FnMut(char) -> bool
617 searcher_methods!(reverse);
620 impl<'a, F> DoubleEndedSearcher<'a> for CharPredicateSearcher<'a, F>
621 where F: FnMut(char) -> bool {}
623 /// Searches for chars that match the given predicate
624 impl<'a, F> Pattern<'a> for F where F: FnMut(char) -> bool {
625 pattern_methods!(CharPredicateSearcher<'a, F>, CharEqPattern, CharPredicateSearcher);
628 /////////////////////////////////////////////////////////////////////////////
630 /////////////////////////////////////////////////////////////////////////////
632 /// Delegates to the `&str` impl.
633 impl<'a, 'b> Pattern<'a> for &'b &'b str {
634 pattern_methods!(StrSearcher<'a, 'b>, |&s| s, |s| s);