1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! String manipulation
13 //! For more details, see std::str
15 #![doc(primitive = "str")]
21 use cmp::{PartialEq, Eq};
22 use container::Container;
24 use iter::{Filter, Map, Iterator};
25 use iter::{DoubleEndedIterator, ExactSize};
28 use option::{None, Option, Some};
30 use slice::ImmutableVector;
35 Section: Creating a string
38 /// Converts a vector to a string slice without performing any allocations.
40 /// Once the slice has been validated as utf-8, it is transmuted in-place and
41 /// returned as a '&str' instead of a '&[u8]'
43 /// Returns None if the slice is not utf-8.
44 pub fn from_utf8<'a>(v: &'a [u8]) -> Option<&'a str> {
46 Some(unsafe { raw::from_utf8(v) })
50 /// Something that can be used to compare against a character
52 /// Determine if the splitter should split at the given character
53 fn matches(&mut self, char) -> bool;
54 /// Indicate if this is only concerned about ASCII characters,
55 /// which can allow for a faster implementation.
56 fn only_ascii(&self) -> bool;
59 impl CharEq for char {
61 fn matches(&mut self, c: char) -> bool { *self == c }
64 fn only_ascii(&self) -> bool { (*self as uint) < 128 }
67 impl<'a> CharEq for |char|: 'a -> bool {
69 fn matches(&mut self, c: char) -> bool { (*self)(c) }
72 fn only_ascii(&self) -> bool { false }
75 impl CharEq for extern "Rust" fn(char) -> bool {
77 fn matches(&mut self, c: char) -> bool { (*self)(c) }
80 fn only_ascii(&self) -> bool { false }
83 impl<'a> CharEq for &'a [char] {
85 fn matches(&mut self, c: char) -> bool {
86 self.iter().any(|&mut m| m.matches(c))
90 fn only_ascii(&self) -> bool {
91 self.iter().all(|m| m.only_ascii())
99 /// External iterator for a string's characters.
100 /// Use with the `std::iter` module.
102 pub struct Chars<'a> {
103 /// The slice remaining to be iterated
107 impl<'a> Iterator<char> for Chars<'a> {
109 fn next(&mut self) -> Option<char> {
110 // Decode the next codepoint, then update
111 // the slice to be just the remaining part
112 if self.string.len() != 0 {
113 let CharRange {ch, next} = self.string.char_range_at(0);
115 self.string = raw::slice_unchecked(self.string, next, self.string.len());
124 fn size_hint(&self) -> (uint, Option<uint>) {
125 (self.string.len().saturating_add(3)/4, Some(self.string.len()))
129 impl<'a> DoubleEndedIterator<char> for Chars<'a> {
131 fn next_back(&mut self) -> Option<char> {
132 if self.string.len() != 0 {
133 let CharRange {ch, next} = self.string.char_range_at_reverse(self.string.len());
135 self.string = raw::slice_unchecked(self.string, 0, next);
144 /// External iterator for a string's characters and their byte offsets.
145 /// Use with the `std::iter` module.
147 pub struct CharOffsets<'a> {
148 /// The original string to be iterated
153 impl<'a> Iterator<(uint, char)> for CharOffsets<'a> {
155 fn next(&mut self) -> Option<(uint, char)> {
156 // Compute the byte offset by using the pointer offset between
157 // the original string slice and the iterator's remaining part
158 let offset = self.iter.string.as_ptr() as uint - self.string.as_ptr() as uint;
159 self.iter.next().map(|ch| (offset, ch))
163 fn size_hint(&self) -> (uint, Option<uint>) {
164 self.iter.size_hint()
168 impl<'a> DoubleEndedIterator<(uint, char)> for CharOffsets<'a> {
170 fn next_back(&mut self) -> Option<(uint, char)> {
171 self.iter.next_back().map(|ch| {
172 let offset = self.iter.string.len() +
173 self.iter.string.as_ptr() as uint - self.string.as_ptr() as uint;
179 /// External iterator for a string's bytes.
180 /// Use with the `std::iter` module.
182 Map<'a, &'a u8, u8, slice::Items<'a, u8>>;
184 /// An iterator over the substrings of a string, separated by `sep`.
186 pub struct CharSplits<'a, Sep> {
187 /// The slice remaining to be iterated
190 /// Whether an empty string at the end is allowed
191 allow_trailing_empty: bool,
196 /// An iterator over the substrings of a string, separated by `sep`,
197 /// splitting at most `count` times.
199 pub struct CharSplitsN<'a, Sep> {
200 iter: CharSplits<'a, Sep>,
201 /// The number of splits remaining
206 /// An iterator over the words of a string, separated by a sequence of whitespace
208 Filter<'a, &'a str, CharSplits<'a, extern "Rust" fn(char) -> bool>>;
210 /// An iterator over the lines of a string, separated by either `\n` or (`\r\n`).
211 pub type AnyLines<'a> =
212 Map<'a, &'a str, &'a str, CharSplits<'a, char>>;
214 impl<'a, Sep> CharSplits<'a, Sep> {
216 fn get_end(&mut self) -> Option<&'a str> {
217 if !self.finished && (self.allow_trailing_empty || self.string.len() > 0) {
218 self.finished = true;
226 impl<'a, Sep: CharEq> Iterator<&'a str> for CharSplits<'a, Sep> {
228 fn next(&mut self) -> Option<&'a str> {
229 if self.finished { return None }
231 let mut next_split = None;
233 for (idx, byte) in self.string.bytes().enumerate() {
234 if self.sep.matches(byte as char) && byte < 128u8 {
235 next_split = Some((idx, idx + 1));
240 for (idx, ch) in self.string.char_indices() {
241 if self.sep.matches(ch) {
242 next_split = Some((idx, self.string.char_range_at(idx).next));
248 Some((a, b)) => unsafe {
249 let elt = raw::slice_unchecked(self.string, 0, a);
250 self.string = raw::slice_unchecked(self.string, b, self.string.len());
253 None => self.get_end(),
258 impl<'a, Sep: CharEq> DoubleEndedIterator<&'a str>
259 for CharSplits<'a, Sep> {
261 fn next_back(&mut self) -> Option<&'a str> {
262 if self.finished { return None }
264 if !self.allow_trailing_empty {
265 self.allow_trailing_empty = true;
266 match self.next_back() {
267 Some(elt) if !elt.is_empty() => return Some(elt),
268 _ => if self.finished { return None }
271 let len = self.string.len();
272 let mut next_split = None;
275 for (idx, byte) in self.string.bytes().enumerate().rev() {
276 if self.sep.matches(byte as char) && byte < 128u8 {
277 next_split = Some((idx, idx + 1));
282 for (idx, ch) in self.string.char_indices().rev() {
283 if self.sep.matches(ch) {
284 next_split = Some((idx, self.string.char_range_at(idx).next));
290 Some((a, b)) => unsafe {
291 let elt = raw::slice_unchecked(self.string, b, len);
292 self.string = raw::slice_unchecked(self.string, 0, a);
295 None => { self.finished = true; Some(self.string) }
300 impl<'a, Sep: CharEq> Iterator<&'a str> for CharSplitsN<'a, Sep> {
302 fn next(&mut self) -> Option<&'a str> {
305 if self.invert { self.iter.next_back() } else { self.iter.next() }
312 /// The internal state of an iterator that searches for matches of a substring
313 /// within a larger string using naive search
315 struct NaiveSearcher {
320 fn new() -> NaiveSearcher {
321 NaiveSearcher { position: 0 }
324 fn next(&mut self, haystack: &[u8], needle: &[u8]) -> Option<(uint, uint)> {
325 while self.position + needle.len() <= haystack.len() {
326 if haystack.slice(self.position, self.position + needle.len()) == needle {
327 let matchPos = self.position;
328 self.position += needle.len(); // add 1 for all matches
329 return Some((matchPos, matchPos + needle.len()));
338 /// The internal state of an iterator that searches for matches of a substring
339 /// within a larger string using two-way search
341 struct TwoWaySearcher {
352 impl TwoWaySearcher {
353 fn new(needle: &[u8]) -> TwoWaySearcher {
354 let (critPos1, period1) = TwoWaySearcher::maximal_suffix(needle, false);
355 let (critPos2, period2) = TwoWaySearcher::maximal_suffix(needle, true);
359 if critPos1 > critPos2 {
367 let byteset = needle.iter().fold(0, |a, &b| (1 << (b & 0x3f)) | a);
369 if needle.slice_to(critPos) == needle.slice_from(needle.len() - critPos) {
381 period: cmp::max(critPos, needle.len() - critPos) + 1,
385 memory: uint::MAX // Dummy value to signify that the period is long
391 fn next(&mut self, haystack: &[u8], needle: &[u8], longPeriod: bool) -> Option<(uint, uint)> {
393 // Check that we have room to search in
394 if self.position + needle.len() > haystack.len() {
398 // Quickly skip by large portions unrelated to our substring
399 if (self.byteset >> (haystack[self.position + needle.len() - 1] & 0x3f)) & 1 == 0 {
400 self.position += needle.len();
404 // See if the right part of the needle matches
405 let start = if longPeriod { self.critPos } else { cmp::max(self.critPos, self.memory) };
406 for i in range(start, needle.len()) {
407 if needle[i] != haystack[self.position + i] {
408 self.position += i - self.critPos + 1;
416 // See if the left part of the needle matches
417 let start = if longPeriod { 0 } else { self.memory };
418 for i in range(start, self.critPos).rev() {
419 if needle[i] != haystack[self.position + i] {
420 self.position += self.period;
422 self.memory = needle.len() - self.period;
428 // We have found a match!
429 let matchPos = self.position;
430 self.position += needle.len(); // add self.period for all matches
432 self.memory = 0; // set to needle.len() - self.period for all matches
434 return Some((matchPos, matchPos + needle.len()));
439 fn maximal_suffix(arr: &[u8], reversed: bool) -> (uint, uint) {
440 let mut left = -1; // Corresponds to i in the paper
441 let mut right = 0; // Corresponds to j in the paper
442 let mut offset = 1; // Corresponds to k in the paper
443 let mut period = 1; // Corresponds to p in the paper
445 while right + offset < arr.len() {
449 a = arr[left + offset];
450 b = arr[right + offset];
452 a = arr[right + offset];
453 b = arr[left + offset];
456 // Suffix is smaller, period is entire prefix so far.
459 period = right - left;
461 // Advance through repetition of the current period.
462 if offset == period {
469 // Suffix is larger, start over from current location.
480 /// The internal state of an iterator that searches for matches of a substring
481 /// within a larger string using a dynamically chosed search algorithm
484 Naive(NaiveSearcher),
485 TwoWay(TwoWaySearcher),
486 TwoWayLong(TwoWaySearcher)
490 fn new(haystack: &[u8], needle: &[u8]) -> Searcher {
492 if needle.len() > haystack.len() - 20 {
493 Naive(NaiveSearcher::new())
495 let searcher = TwoWaySearcher::new(needle);
496 if searcher.memory == uint::MAX { // If the period is long
505 /// An iterator over the start and end indices of the matches of a
506 /// substring within a larger string
508 pub struct MatchIndices<'a> {
515 /// An iterator over the substrings of a string separated by a given
518 pub struct StrSplits<'a> {
519 it: MatchIndices<'a>,
524 impl<'a> Iterator<(uint, uint)> for MatchIndices<'a> {
526 fn next(&mut self) -> Option<(uint, uint)> {
527 match self.searcher {
528 Naive(ref mut searcher)
529 => searcher.next(self.haystack.as_bytes(), self.needle.as_bytes()),
530 TwoWay(ref mut searcher)
531 => searcher.next(self.haystack.as_bytes(), self.needle.as_bytes(), false),
532 TwoWayLong(ref mut searcher)
533 => searcher.next(self.haystack.as_bytes(), self.needle.as_bytes(), true)
538 impl<'a> Iterator<&'a str> for StrSplits<'a> {
540 fn next(&mut self) -> Option<&'a str> {
541 if self.finished { return None; }
543 match self.it.next() {
544 Some((from, to)) => {
545 let ret = Some(self.it.haystack.slice(self.last_end, from));
550 self.finished = true;
551 Some(self.it.haystack.slice(self.last_end, self.it.haystack.len()))
558 Section: Comparing strings
561 // share the implementation of the lang-item vs. non-lang-item
564 fn eq_slice_(a: &str, b: &str) -> bool {
566 extern { fn memcmp(s1: *i8, s2: *i8, n: uint) -> i32; }
567 a.len() == b.len() && unsafe {
568 memcmp(a.as_ptr() as *i8,
574 /// Bytewise slice equality
578 pub fn eq_slice(a: &str, b: &str) -> bool {
582 /// Bytewise slice equality
585 pub fn eq_slice(a: &str, b: &str) -> bool {
593 /// Walk through `iter` checking that it's a valid UTF-8 sequence,
594 /// returning `true` in that case, or, if it is invalid, `false` with
595 /// `iter` reset such that it is pointing at the first byte in the
596 /// invalid sequence.
598 fn run_utf8_validation_iterator(iter: &mut slice::Items<u8>) -> bool {
600 // save the current thing we're pointing at.
603 // restore the iterator we had at the start of this codepoint.
604 macro_rules! err ( () => { {*iter = old; return false} });
605 macro_rules! next ( () => {
608 // we needed data, but there was none: error!
613 let first = match iter.next() {
615 // we're at the end of the iterator and a codepoint
616 // boundary at the same time, so this string is valid.
620 // ASCII characters are always valid, so only large
621 // bytes need more examination.
623 let w = utf8_char_width(first);
624 let second = next!();
625 // 2-byte encoding is for codepoints \u0080 to \u07ff
626 // first C2 80 last DF BF
627 // 3-byte encoding is for codepoints \u0800 to \uffff
628 // first E0 A0 80 last EF BF BF
629 // excluding surrogates codepoints \ud800 to \udfff
630 // ED A0 80 to ED BF BF
631 // 4-byte encoding is for codepoints \u10000 to \u10ffff
632 // first F0 90 80 80 last F4 8F BF BF
634 // Use the UTF-8 syntax from the RFC
636 // https://tools.ietf.org/html/rfc3629
638 // UTF8-2 = %xC2-DF UTF8-tail
639 // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
640 // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
641 // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
642 // %xF4 %x80-8F 2( UTF8-tail )
644 2 => if second & 192 != TAG_CONT_U8 {err!()},
646 match (first, second, next!() & 192) {
647 (0xE0 , 0xA0 .. 0xBF, TAG_CONT_U8) |
648 (0xE1 .. 0xEC, 0x80 .. 0xBF, TAG_CONT_U8) |
649 (0xED , 0x80 .. 0x9F, TAG_CONT_U8) |
650 (0xEE .. 0xEF, 0x80 .. 0xBF, TAG_CONT_U8) => {}
655 match (first, second, next!() & 192, next!() & 192) {
656 (0xF0 , 0x90 .. 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
657 (0xF1 .. 0xF3, 0x80 .. 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
658 (0xF4 , 0x80 .. 0x8F, TAG_CONT_U8, TAG_CONT_U8) => {}
668 /// Determines if a vector of bytes contains valid UTF-8.
669 pub fn is_utf8(v: &[u8]) -> bool {
670 run_utf8_validation_iterator(&mut v.iter())
673 /// Determines if a vector of `u16` contains valid UTF-16
674 pub fn is_utf16(v: &[u16]) -> bool {
675 let mut it = v.iter();
676 macro_rules! next ( ($ret:expr) => {
677 match it.next() { Some(u) => *u, None => return $ret }
683 match char::from_u32(u as u32) {
686 let u2 = next!(false);
687 if u < 0xD7FF || u > 0xDBFF ||
688 u2 < 0xDC00 || u2 > 0xDFFF { return false; }
694 /// An iterator that decodes UTF-16 encoded codepoints from a vector
697 pub struct Utf16Items<'a> {
698 iter: slice::Items<'a, u16>
700 /// The possibilities for values decoded from a `u16` stream.
701 #[deriving(PartialEq, Eq, Clone, Show)]
703 /// A valid codepoint.
705 /// An invalid surrogate without its pair.
710 /// Convert `self` to a `char`, taking `LoneSurrogate`s to the
711 /// replacement character (U+FFFD).
713 pub fn to_char_lossy(&self) -> char {
716 LoneSurrogate(_) => '\uFFFD'
721 impl<'a> Iterator<Utf16Item> for Utf16Items<'a> {
722 fn next(&mut self) -> Option<Utf16Item> {
723 let u = match self.iter.next() {
728 if u < 0xD800 || 0xDFFF < u {
730 Some(ScalarValue(unsafe {mem::transmute(u as u32)}))
731 } else if u >= 0xDC00 {
732 // a trailing surrogate
733 Some(LoneSurrogate(u))
735 // preserve state for rewinding.
738 let u2 = match self.iter.next() {
741 None => return Some(LoneSurrogate(u))
743 if u2 < 0xDC00 || u2 > 0xDFFF {
744 // not a trailing surrogate so we're not a valid
745 // surrogate pair, so rewind to redecode u2 next time.
747 return Some(LoneSurrogate(u))
750 // all ok, so lets decode it.
751 let c = ((u - 0xD800) as u32 << 10 | (u2 - 0xDC00) as u32) + 0x1_0000;
752 Some(ScalarValue(unsafe {mem::transmute(c)}))
757 fn size_hint(&self) -> (uint, Option<uint>) {
758 let (low, high) = self.iter.size_hint();
759 // we could be entirely valid surrogates (2 elements per
760 // char), or entirely non-surrogates (1 element per char)
765 /// Create an iterator over the UTF-16 encoded codepoints in `v`,
766 /// returning invalid surrogates as `LoneSurrogate`s.
772 /// use std::str::{ScalarValue, LoneSurrogate};
774 /// // 𝄞mus<invalid>ic<invalid>
775 /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
776 /// 0x0073, 0xDD1E, 0x0069, 0x0063,
779 /// assert_eq!(str::utf16_items(v).collect::<Vec<_>>(),
780 /// vec![ScalarValue('𝄞'),
781 /// ScalarValue('m'), ScalarValue('u'), ScalarValue('s'),
782 /// LoneSurrogate(0xDD1E),
783 /// ScalarValue('i'), ScalarValue('c'),
784 /// LoneSurrogate(0xD834)]);
786 pub fn utf16_items<'a>(v: &'a [u16]) -> Utf16Items<'a> {
787 Utf16Items { iter : v.iter() }
790 /// Return a slice of `v` ending at (and not including) the first NUL
799 /// let mut v = ['a' as u16, 'b' as u16, 'c' as u16, 'd' as u16];
800 /// // no NULs so no change
801 /// assert_eq!(str::truncate_utf16_at_nul(v), v.as_slice());
805 /// assert_eq!(str::truncate_utf16_at_nul(v),
806 /// &['a' as u16, 'b' as u16]);
808 pub fn truncate_utf16_at_nul<'a>(v: &'a [u16]) -> &'a [u16] {
809 match v.iter().position(|c| *c == 0) {
810 // don't include the 0
811 Some(i) => v.slice_to(i),
816 // https://tools.ietf.org/html/rfc3629
817 static UTF8_CHAR_WIDTH: [u8, ..256] = [
818 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
819 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
820 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
821 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
822 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
823 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
824 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
825 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
826 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
827 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
828 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
829 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
830 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
831 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
832 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
833 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
836 /// Given a first byte, determine how many bytes are in this UTF-8 character
838 pub fn utf8_char_width(b: u8) -> uint {
839 return UTF8_CHAR_WIDTH[b as uint] as uint;
842 /// Struct that contains a `char` and the index of the first byte of
843 /// the next `char` in a string. This can be used as a data structure
844 /// for iterating over the UTF-8 bytes of a string.
845 pub struct CharRange {
848 /// Index of the first byte of the next `char`
852 // Return the initial codepoint accumulator for the first byte.
853 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
854 // for width 3, and 3 bits for width 4
855 macro_rules! utf8_first_byte(
856 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
859 // return the value of $ch updated with continuation byte $byte
860 macro_rules! utf8_acc_cont_byte(
861 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
864 static TAG_CONT_U8: u8 = 128u8;
866 /// Unsafe operations
869 use container::Container;
873 use slice::{ImmutableVector};
874 use str::{is_utf8, StrSlice};
876 /// Converts a slice of bytes to a string slice without checking
877 /// that the string contains valid UTF-8.
878 pub unsafe fn from_utf8<'a>(v: &'a [u8]) -> &'a str {
882 /// Form a slice from a C string. Unsafe because the caller must ensure the
883 /// C string has the static lifetime, or else the return value may be
884 /// invalidated later.
885 pub unsafe fn c_str_to_static_slice(s: *i8) -> &'static str {
891 curr = s.offset(len as int);
893 let v = Slice { data: s, len: len };
894 assert!(is_utf8(::mem::transmute(v)));
898 /// Takes a bytewise (not UTF-8) slice from a string.
900 /// Returns the substring from [`begin`..`end`).
904 /// If begin is greater than end.
905 /// If end is greater than the length of the string.
907 pub unsafe fn slice_bytes<'a>(s: &'a str, begin: uint, end: uint) -> &'a str {
908 assert!(begin <= end);
909 assert!(end <= s.len());
910 slice_unchecked(s, begin, end)
913 /// Takes a bytewise (not UTF-8) slice from a string.
915 /// Returns the substring from [`begin`..`end`).
917 /// Caller must check slice boundaries!
919 pub unsafe fn slice_unchecked<'a>(s: &'a str, begin: uint, end: uint) -> &'a str {
920 mem::transmute(Slice {
921 data: s.as_ptr().offset(begin as int),
928 Section: Trait implementations
932 #[allow(missing_doc)]
934 use container::Container;
935 use cmp::{Ord, Ordering, Less, Equal, Greater, PartialEq, PartialOrd, Equiv, Eq};
937 use option::{Some, None};
938 use str::{Str, StrSlice, eq_slice};
940 impl<'a> Ord for &'a str {
942 fn cmp(&self, other: & &'a str) -> Ordering {
943 for (s_b, o_b) in self.bytes().zip(other.bytes()) {
944 match s_b.cmp(&o_b) {
945 Greater => return Greater,
951 self.len().cmp(&other.len())
955 impl<'a> PartialEq for &'a str {
957 fn eq(&self, other: & &'a str) -> bool {
958 eq_slice((*self), (*other))
961 fn ne(&self, other: & &'a str) -> bool { !(*self).eq(other) }
964 impl<'a> Eq for &'a str {}
966 impl<'a> PartialOrd for &'a str {
968 fn lt(&self, other: & &'a str) -> bool { self.cmp(other) == Less }
971 impl<'a, S: Str> Equiv<S> for &'a str {
973 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
980 /// Any string that can be represented as a slice
982 /// Work with `self` as a slice.
983 fn as_slice<'a>(&'a self) -> &'a str;
986 impl<'a> Str for &'a str {
988 fn as_slice<'a>(&'a self) -> &'a str { *self }
991 impl<'a> Container for &'a str {
993 fn len(&self) -> uint {
998 /// Methods for string slices
999 pub trait StrSlice<'a> {
1000 /// Returns true if one string contains another
1004 /// - needle - The string to look for
1005 fn contains<'a>(&self, needle: &'a str) -> bool;
1007 /// Returns true if a string contains a char.
1011 /// - needle - The char to look for
1012 fn contains_char(&self, needle: char) -> bool;
1014 /// An iterator over the characters of `self`. Note, this iterates
1015 /// over unicode code-points, not unicode graphemes.
1020 /// let v: Vec<char> = "abc åäö".chars().collect();
1021 /// assert_eq!(v, vec!['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
1023 fn chars(&self) -> Chars<'a>;
1025 /// An iterator over the bytes of `self`
1026 fn bytes(&self) -> Bytes<'a>;
1028 /// An iterator over the characters of `self` and their byte offsets.
1029 fn char_indices(&self) -> CharOffsets<'a>;
1031 /// An iterator over substrings of `self`, separated by characters
1032 /// matched by `sep`.
1037 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
1038 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
1040 /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_digit()).collect();
1041 /// assert_eq!(v, vec!["abc", "def", "ghi"]);
1043 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
1044 /// assert_eq!(v, vec!["lion", "", "tiger", "leopard"]);
1046 /// let v: Vec<&str> = "".split('X').collect();
1047 /// assert_eq!(v, vec![""]);
1049 fn split<Sep: CharEq>(&self, sep: Sep) -> CharSplits<'a, Sep>;
1051 /// An iterator over substrings of `self`, separated by characters
1052 /// matched by `sep`, restricted to splitting at most `count`
1058 /// let v: Vec<&str> = "Mary had a little lambda".splitn(' ', 2).collect();
1059 /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]);
1061 /// let v: Vec<&str> = "abc1def2ghi".splitn(|c: char| c.is_digit(), 1).collect();
1062 /// assert_eq!(v, vec!["abc", "def2ghi"]);
1064 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn('X', 2).collect();
1065 /// assert_eq!(v, vec!["lion", "", "tigerXleopard"]);
1067 /// let v: Vec<&str> = "abcXdef".splitn('X', 0).collect();
1068 /// assert_eq!(v, vec!["abcXdef"]);
1070 /// let v: Vec<&str> = "".splitn('X', 1).collect();
1071 /// assert_eq!(v, vec![""]);
1073 fn splitn<Sep: CharEq>(&self, sep: Sep, count: uint) -> CharSplitsN<'a, Sep>;
1075 /// An iterator over substrings of `self`, separated by characters
1076 /// matched by `sep`.
1078 /// Equivalent to `split`, except that the trailing substring
1079 /// is skipped if empty (terminator semantics).
1084 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
1085 /// assert_eq!(v, vec!["A", "B"]);
1087 /// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
1088 /// assert_eq!(v, vec!["A", "", "B", ""]);
1090 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect();
1091 /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]);
1093 /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_digit()).rev().collect();
1094 /// assert_eq!(v, vec!["ghi", "def", "abc"]);
1096 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
1097 /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
1099 fn split_terminator<Sep: CharEq>(&self, sep: Sep) -> CharSplits<'a, Sep>;
1101 /// An iterator over substrings of `self`, separated by characters
1102 /// matched by `sep`, starting from the end of the string.
1103 /// Restricted to splitting at most `count` times.
1108 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(' ', 2).collect();
1109 /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]);
1111 /// let v: Vec<&str> = "abc1def2ghi".rsplitn(|c: char| c.is_digit(), 1).collect();
1112 /// assert_eq!(v, vec!["ghi", "abc1def"]);
1114 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn('X', 2).collect();
1115 /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
1117 fn rsplitn<Sep: CharEq>(&self, sep: Sep, count: uint) -> CharSplitsN<'a, Sep>;
1119 /// An iterator over the start and end indices of the disjoint
1120 /// matches of `sep` within `self`.
1122 /// That is, each returned value `(start, end)` satisfies
1123 /// `self.slice(start, end) == sep`. For matches of `sep` within
1124 /// `self` that overlap, only the indicies corresponding to the
1125 /// first match are returned.
1130 /// let v: Vec<(uint, uint)> = "abcXXXabcYYYabc".match_indices("abc").collect();
1131 /// assert_eq!(v, vec![(0,3), (6,9), (12,15)]);
1133 /// let v: Vec<(uint, uint)> = "1abcabc2".match_indices("abc").collect();
1134 /// assert_eq!(v, vec![(1,4), (4,7)]);
1136 /// let v: Vec<(uint, uint)> = "ababa".match_indices("aba").collect();
1137 /// assert_eq!(v, vec![(0, 3)]); // only the first `aba`
1139 fn match_indices(&self, sep: &'a str) -> MatchIndices<'a>;
1141 /// An iterator over the substrings of `self` separated by `sep`.
1146 /// let v: Vec<&str> = "abcXXXabcYYYabc".split_str("abc").collect();
1147 /// assert_eq!(v, vec!["", "XXX", "YYY", ""]);
1149 /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
1150 /// assert_eq!(v, vec!["1", "", "2"]);
1152 fn split_str(&self, &'a str) -> StrSplits<'a>;
1154 /// An iterator over the lines of a string (subsequences separated
1155 /// by `\n`). This does not include the empty string after a
1161 /// let four_lines = "foo\nbar\n\nbaz\n";
1162 /// let v: Vec<&str> = four_lines.lines().collect();
1163 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
1165 fn lines(&self) -> CharSplits<'a, char>;
1167 /// An iterator over the lines of a string, separated by either
1168 /// `\n` or `\r\n`. As with `.lines()`, this does not include an
1169 /// empty trailing line.
1174 /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
1175 /// let v: Vec<&str> = four_lines.lines_any().collect();
1176 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
1178 fn lines_any(&self) -> AnyLines<'a>;
1180 /// An iterator over the words of a string (subsequences separated
1181 /// by any sequence of whitespace). Sequences of whitespace are
1182 /// collapsed, so empty "words" are not included.
1187 /// let some_words = " Mary had\ta little \n\t lamb";
1188 /// let v: Vec<&str> = some_words.words().collect();
1189 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
1191 fn words(&self) -> Words<'a>;
1193 /// Returns true if the string contains only whitespace.
1195 /// Whitespace characters are determined by `char::is_whitespace`.
1200 /// assert!(" \t\n".is_whitespace());
1201 /// assert!("".is_whitespace());
1203 /// assert!( !"abc".is_whitespace());
1205 fn is_whitespace(&self) -> bool;
1207 /// Returns true if the string contains only alphanumeric code
1210 /// Alphanumeric characters are determined by `char::is_alphanumeric`.
1215 /// assert!("Löwe老虎Léopard123".is_alphanumeric());
1216 /// assert!("".is_alphanumeric());
1218 /// assert!( !" &*~".is_alphanumeric());
1220 fn is_alphanumeric(&self) -> bool;
1222 /// Returns the number of Unicode code points (`char`) that a
1225 /// This does not perform any normalization, and is `O(n)`, since
1226 /// UTF-8 is a variable width encoding of code points.
1228 /// *Warning*: The number of code points in a string does not directly
1229 /// correspond to the number of visible characters or width of the
1230 /// visible text due to composing characters, and double- and
1231 /// zero-width ones.
1233 /// See also `.len()` for the byte length.
1238 /// // composed forms of `ö` and `é`
1239 /// let c = "Löwe 老虎 Léopard"; // German, Simplified Chinese, French
1240 /// // decomposed forms of `ö` and `é`
1241 /// let d = "Lo\u0308we 老虎 Le\u0301opard";
1243 /// assert_eq!(c.char_len(), 15);
1244 /// assert_eq!(d.char_len(), 17);
1246 /// assert_eq!(c.len(), 21);
1247 /// assert_eq!(d.len(), 23);
1249 /// // the two strings *look* the same
1250 /// println!("{}", c);
1251 /// println!("{}", d);
1253 fn char_len(&self) -> uint;
1255 /// Returns a slice of the given string from the byte range
1256 /// [`begin`..`end`).
1258 /// This operation is `O(1)`.
1260 /// Fails when `begin` and `end` do not point to valid characters
1261 /// or point beyond the last character of the string.
1263 /// See also `slice_to` and `slice_from` for slicing prefixes and
1264 /// suffixes of strings, and `slice_chars` for slicing based on
1265 /// code point counts.
1270 /// let s = "Löwe 老虎 Léopard";
1271 /// assert_eq!(s.slice(0, 1), "L");
1273 /// assert_eq!(s.slice(1, 9), "öwe 老");
1275 /// // these will fail:
1276 /// // byte 2 lies within `ö`:
1277 /// // s.slice(2, 3);
1279 /// // byte 8 lies within `老`
1280 /// // s.slice(1, 8);
1282 /// // byte 100 is outside the string
1283 /// // s.slice(3, 100);
1285 fn slice(&self, begin: uint, end: uint) -> &'a str;
1287 /// Returns a slice of the string from `begin` to its end.
1289 /// Equivalent to `self.slice(begin, self.len())`.
1291 /// Fails when `begin` does not point to a valid character, or is
1294 /// See also `slice`, `slice_to` and `slice_chars`.
1295 fn slice_from(&self, begin: uint) -> &'a str;
1297 /// Returns a slice of the string from the beginning to byte
1300 /// Equivalent to `self.slice(0, end)`.
1302 /// Fails when `end` does not point to a valid character, or is
1305 /// See also `slice`, `slice_from` and `slice_chars`.
1306 fn slice_to(&self, end: uint) -> &'a str;
1308 /// Returns a slice of the string from the character range
1309 /// [`begin`..`end`).
1311 /// That is, start at the `begin`-th code point of the string and
1312 /// continue to the `end`-th code point. This does not detect or
1313 /// handle edge cases such as leaving a combining character as the
1314 /// first code point of the string.
1316 /// Due to the design of UTF-8, this operation is `O(end)`.
1317 /// See `slice`, `slice_to` and `slice_from` for `O(1)`
1318 /// variants that use byte indices rather than code point
1321 /// Fails if `begin` > `end` or the either `begin` or `end` are
1322 /// beyond the last character of the string.
1327 /// let s = "Löwe 老虎 Léopard";
1328 /// assert_eq!(s.slice_chars(0, 4), "Löwe");
1329 /// assert_eq!(s.slice_chars(5, 7), "老虎");
1331 fn slice_chars(&self, begin: uint, end: uint) -> &'a str;
1333 /// Returns true if `needle` is a prefix of the string.
1334 fn starts_with(&self, needle: &str) -> bool;
1336 /// Returns true if `needle` is a suffix of the string.
1337 fn ends_with(&self, needle: &str) -> bool;
1339 /// Returns a string with leading and trailing whitespace removed.
1340 fn trim(&self) -> &'a str;
1342 /// Returns a string with leading whitespace removed.
1343 fn trim_left(&self) -> &'a str;
1345 /// Returns a string with trailing whitespace removed.
1346 fn trim_right(&self) -> &'a str;
1348 /// Returns a string with characters that match `to_trim` removed.
1352 /// * to_trim - a character matcher
1357 /// assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar")
1358 /// assert_eq!("12foo1bar12".trim_chars(&['1', '2']), "foo1bar")
1359 /// assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar")
1361 fn trim_chars<C: CharEq>(&self, to_trim: C) -> &'a str;
1363 /// Returns a string with leading `chars_to_trim` removed.
1367 /// * to_trim - a character matcher
1372 /// assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11")
1373 /// assert_eq!("12foo1bar12".trim_left_chars(&['1', '2']), "foo1bar12")
1374 /// assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123")
1376 fn trim_left_chars<C: CharEq>(&self, to_trim: C) -> &'a str;
1378 /// Returns a string with trailing `chars_to_trim` removed.
1382 /// * to_trim - a character matcher
1387 /// assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar")
1388 /// assert_eq!("12foo1bar12".trim_right_chars(&['1', '2']), "12foo1bar")
1389 /// assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar")
1391 fn trim_right_chars<C: CharEq>(&self, to_trim: C) -> &'a str;
1393 /// Check that `index`-th byte lies at the start and/or end of a
1394 /// UTF-8 code point sequence.
1396 /// The start and end of the string (when `index == self.len()`)
1397 /// are considered to be boundaries.
1399 /// Fails if `index` is greater than `self.len()`.
1404 /// let s = "Löwe 老虎 Léopard";
1405 /// assert!(s.is_char_boundary(0));
1407 /// assert!(s.is_char_boundary(6));
1408 /// assert!(s.is_char_boundary(s.len()));
1410 /// // second byte of `ö`
1411 /// assert!(!s.is_char_boundary(2));
1413 /// // third byte of `老`
1414 /// assert!(!s.is_char_boundary(8));
1416 fn is_char_boundary(&self, index: uint) -> bool;
1418 /// Pluck a character out of a string and return the index of the next
1421 /// This function can be used to iterate over the unicode characters of a
1426 /// This example manually iterate through the characters of a
1427 /// string; this should normally by done by `.chars()` or
1428 /// `.char_indices`.
1431 /// use std::str::CharRange;
1433 /// let s = "中华Việt Nam";
1435 /// while i < s.len() {
1436 /// let CharRange {ch, next} = s.char_range_at(i);
1437 /// println!("{}: {}", i, ch);
1459 /// * s - The string
1460 /// * i - The byte offset of the char to extract
1464 /// A record {ch: char, next: uint} containing the char value and the byte
1465 /// index of the next unicode character.
1469 /// If `i` is greater than or equal to the length of the string.
1470 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1471 fn char_range_at(&self, start: uint) -> CharRange;
1473 /// Given a byte position and a str, return the previous char and its position.
1475 /// This function can be used to iterate over a unicode string in reverse.
1477 /// Returns 0 for next index if called on start index 0.
1481 /// If `i` is greater than the length of the string.
1482 /// If `i` is not an index following a valid UTF-8 character.
1483 fn char_range_at_reverse(&self, start: uint) -> CharRange;
1485 /// Plucks the character starting at the `i`th byte of a string.
1489 /// If `i` is greater than or equal to the length of the string.
1490 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1491 fn char_at(&self, i: uint) -> char;
1493 /// Plucks the character ending at the `i`th byte of a string.
1497 /// If `i` is greater than the length of the string.
1498 /// If `i` is not an index following a valid UTF-8 character.
1499 fn char_at_reverse(&self, i: uint) -> char;
1501 /// Work with the byte buffer of a string as a byte slice.
1502 fn as_bytes(&self) -> &'a [u8];
1504 /// Returns the byte index of the first character of `self` that
1505 /// matches `search`.
1509 /// `Some` containing the byte index of the last matching character
1510 /// or `None` if there is no match
1515 /// let s = "Löwe 老虎 Léopard";
1517 /// assert_eq!(s.find('L'), Some(0));
1518 /// assert_eq!(s.find('é'), Some(14));
1520 /// // the first space
1521 /// assert_eq!(s.find(|c: char| c.is_whitespace()), Some(5));
1523 /// // neither are found
1524 /// assert_eq!(s.find(&['1', '2']), None);
1526 fn find<C: CharEq>(&self, search: C) -> Option<uint>;
1528 /// Returns the byte index of the last character of `self` that
1529 /// matches `search`.
1533 /// `Some` containing the byte index of the last matching character
1534 /// or `None` if there is no match.
1539 /// let s = "Löwe 老虎 Léopard";
1541 /// assert_eq!(s.rfind('L'), Some(13));
1542 /// assert_eq!(s.rfind('é'), Some(14));
1544 /// // the second space
1545 /// assert_eq!(s.rfind(|c: char| c.is_whitespace()), Some(12));
1547 /// // searches for an occurrence of either `1` or `2`, but neither are found
1548 /// assert_eq!(s.rfind(&['1', '2']), None);
1550 fn rfind<C: CharEq>(&self, search: C) -> Option<uint>;
1552 /// Returns the byte index of the first matching substring
1556 /// * `needle` - The string to search for
1560 /// `Some` containing the byte index of the first matching substring
1561 /// or `None` if there is no match.
1566 /// let s = "Löwe 老虎 Léopard";
1568 /// assert_eq!(s.find_str("老虎 L"), Some(6));
1569 /// assert_eq!(s.find_str("muffin man"), None);
1571 fn find_str(&self, &str) -> Option<uint>;
1573 /// Retrieves the first character from a string slice and returns
1574 /// it. This does not allocate a new string; instead, it returns a
1575 /// slice that point one character beyond the character that was
1576 /// shifted. If the string does not contain any characters,
1577 /// a tuple of None and an empty string is returned instead.
1582 /// let s = "Löwe 老虎 Léopard";
1583 /// let (c, s1) = s.slice_shift_char();
1584 /// assert_eq!(c, Some('L'));
1585 /// assert_eq!(s1, "öwe 老虎 Léopard");
1587 /// let (c, s2) = s1.slice_shift_char();
1588 /// assert_eq!(c, Some('ö'));
1589 /// assert_eq!(s2, "we 老虎 Léopard");
1591 fn slice_shift_char(&self) -> (Option<char>, &'a str);
1593 /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
1595 /// Fails if `inner` is not a direct slice contained within self.
1600 /// let string = "a\nb\nc";
1601 /// let lines: Vec<&str> = string.lines().collect();
1602 /// let lines = lines.as_slice();
1604 /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1605 /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1606 /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1608 fn subslice_offset(&self, inner: &str) -> uint;
1610 /// Return an unsafe pointer to the strings buffer.
1612 /// The caller must ensure that the string outlives this pointer,
1613 /// and that it is not reallocated (e.g. by pushing to the
1615 fn as_ptr(&self) -> *u8;
1618 impl<'a> StrSlice<'a> for &'a str {
1620 fn contains<'a>(&self, needle: &'a str) -> bool {
1621 self.find_str(needle).is_some()
1625 fn contains_char(&self, needle: char) -> bool {
1626 self.find(needle).is_some()
1630 fn chars(&self) -> Chars<'a> {
1631 Chars{string: *self}
1635 fn bytes(&self) -> Bytes<'a> {
1636 self.as_bytes().iter().map(|&b| b)
1640 fn char_indices(&self) -> CharOffsets<'a> {
1641 CharOffsets{string: *self, iter: self.chars()}
1645 fn split<Sep: CharEq>(&self, sep: Sep) -> CharSplits<'a, Sep> {
1648 only_ascii: sep.only_ascii(),
1650 allow_trailing_empty: true,
1656 fn splitn<Sep: CharEq>(&self, sep: Sep, count: uint)
1657 -> CharSplitsN<'a, Sep> {
1659 iter: self.split(sep),
1666 fn split_terminator<Sep: CharEq>(&self, sep: Sep)
1667 -> CharSplits<'a, Sep> {
1669 allow_trailing_empty: false,
1675 fn rsplitn<Sep: CharEq>(&self, sep: Sep, count: uint)
1676 -> CharSplitsN<'a, Sep> {
1678 iter: self.split(sep),
1685 fn match_indices(&self, sep: &'a str) -> MatchIndices<'a> {
1686 assert!(!sep.is_empty())
1690 searcher: Searcher::new(self.as_bytes(), sep.as_bytes())
1695 fn split_str(&self, sep: &'a str) -> StrSplits<'a> {
1697 it: self.match_indices(sep),
1704 fn lines(&self) -> CharSplits<'a, char> {
1705 self.split_terminator('\n')
1708 fn lines_any(&self) -> AnyLines<'a> {
1709 self.lines().map(|line| {
1711 if l > 0 && line[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
1717 fn words(&self) -> Words<'a> {
1718 self.split(char::is_whitespace).filter(|s| !s.is_empty())
1722 fn is_whitespace(&self) -> bool { self.chars().all(char::is_whitespace) }
1725 fn is_alphanumeric(&self) -> bool { self.chars().all(char::is_alphanumeric) }
1728 fn char_len(&self) -> uint { self.chars().len() }
1731 fn slice(&self, begin: uint, end: uint) -> &'a str {
1732 assert!(self.is_char_boundary(begin) && self.is_char_boundary(end));
1733 unsafe { raw::slice_bytes(*self, begin, end) }
1737 fn slice_from(&self, begin: uint) -> &'a str {
1738 self.slice(begin, self.len())
1742 fn slice_to(&self, end: uint) -> &'a str {
1743 assert!(self.is_char_boundary(end));
1744 unsafe { raw::slice_bytes(*self, 0, end) }
1747 fn slice_chars(&self, begin: uint, end: uint) -> &'a str {
1748 assert!(begin <= end);
1750 let mut begin_byte = None;
1751 let mut end_byte = None;
1753 // This could be even more efficient by not decoding,
1754 // only finding the char boundaries
1755 for (idx, _) in self.char_indices() {
1756 if count == begin { begin_byte = Some(idx); }
1757 if count == end { end_byte = Some(idx); break; }
1760 if begin_byte.is_none() && count == begin { begin_byte = Some(self.len()) }
1761 if end_byte.is_none() && count == end { end_byte = Some(self.len()) }
1763 match (begin_byte, end_byte) {
1764 (None, _) => fail!("slice_chars: `begin` is beyond end of string"),
1765 (_, None) => fail!("slice_chars: `end` is beyond end of string"),
1766 (Some(a), Some(b)) => unsafe { raw::slice_bytes(*self, a, b) }
1771 fn starts_with<'a>(&self, needle: &'a str) -> bool {
1772 let n = needle.len();
1773 self.len() >= n && needle.as_bytes() == self.as_bytes().slice_to(n)
1777 fn ends_with(&self, needle: &str) -> bool {
1778 let (m, n) = (self.len(), needle.len());
1779 m >= n && needle.as_bytes() == self.as_bytes().slice_from(m - n)
1783 fn trim(&self) -> &'a str {
1784 self.trim_left().trim_right()
1788 fn trim_left(&self) -> &'a str {
1789 self.trim_left_chars(char::is_whitespace)
1793 fn trim_right(&self) -> &'a str {
1794 self.trim_right_chars(char::is_whitespace)
1798 fn trim_chars<C: CharEq>(&self, mut to_trim: C) -> &'a str {
1799 let cur = match self.find(|c: char| !to_trim.matches(c)) {
1801 Some(i) => unsafe { raw::slice_bytes(*self, i, self.len()) }
1803 match cur.rfind(|c: char| !to_trim.matches(c)) {
1806 let right = cur.char_range_at(i).next;
1807 unsafe { raw::slice_bytes(cur, 0, right) }
1813 fn trim_left_chars<C: CharEq>(&self, mut to_trim: C) -> &'a str {
1814 match self.find(|c: char| !to_trim.matches(c)) {
1816 Some(first) => unsafe { raw::slice_bytes(*self, first, self.len()) }
1821 fn trim_right_chars<C: CharEq>(&self, mut to_trim: C) -> &'a str {
1822 match self.rfind(|c: char| !to_trim.matches(c)) {
1825 let next = self.char_range_at(last).next;
1826 unsafe { raw::slice_bytes(*self, 0u, next) }
1832 fn is_char_boundary(&self, index: uint) -> bool {
1833 if index == self.len() { return true; }
1834 if index > self.len() { return false; }
1835 let b = self[index];
1836 return b < 128u8 || b >= 192u8;
1840 fn char_range_at(&self, i: uint) -> CharRange {
1841 if self[i] < 128u8 {
1842 return CharRange {ch: self[i] as char, next: i + 1 };
1845 // Multibyte case is a fn to allow char_range_at to inline cleanly
1846 fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
1847 let mut val = s[i] as u32;
1848 let w = UTF8_CHAR_WIDTH[val as uint] as uint;
1851 val = utf8_first_byte!(val, w);
1852 val = utf8_acc_cont_byte!(val, s[i + 1]);
1853 if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
1854 if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
1856 return CharRange {ch: unsafe { mem::transmute(val) }, next: i + w};
1859 return multibyte_char_range_at(*self, i);
1863 fn char_range_at_reverse(&self, start: uint) -> CharRange {
1864 let mut prev = start;
1866 prev = prev.saturating_sub(1);
1867 if self[prev] < 128 { return CharRange{ch: self[prev] as char, next: prev} }
1869 // Multibyte case is a fn to allow char_range_at_reverse to inline cleanly
1870 fn multibyte_char_range_at_reverse(s: &str, mut i: uint) -> CharRange {
1871 // while there is a previous byte == 10......
1872 while i > 0 && s[i] & 192u8 == TAG_CONT_U8 {
1876 let mut val = s[i] as u32;
1877 let w = UTF8_CHAR_WIDTH[val as uint] as uint;
1880 val = utf8_first_byte!(val, w);
1881 val = utf8_acc_cont_byte!(val, s[i + 1]);
1882 if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
1883 if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
1885 return CharRange {ch: unsafe { mem::transmute(val) }, next: i};
1888 return multibyte_char_range_at_reverse(*self, prev);
1892 fn char_at(&self, i: uint) -> char {
1893 self.char_range_at(i).ch
1897 fn char_at_reverse(&self, i: uint) -> char {
1898 self.char_range_at_reverse(i).ch
1902 fn as_bytes(&self) -> &'a [u8] {
1903 unsafe { mem::transmute(*self) }
1906 fn find<C: CharEq>(&self, mut search: C) -> Option<uint> {
1907 if search.only_ascii() {
1908 self.bytes().position(|b| search.matches(b as char))
1910 for (index, c) in self.char_indices() {
1911 if search.matches(c) { return Some(index); }
1917 fn rfind<C: CharEq>(&self, mut search: C) -> Option<uint> {
1918 if search.only_ascii() {
1919 self.bytes().rposition(|b| search.matches(b as char))
1921 for (index, c) in self.char_indices().rev() {
1922 if search.matches(c) { return Some(index); }
1928 fn find_str(&self, needle: &str) -> Option<uint> {
1929 if needle.is_empty() {
1932 self.match_indices(needle)
1934 .map(|(start, _end)| start)
1939 fn slice_shift_char(&self) -> (Option<char>, &'a str) {
1940 if self.is_empty() {
1941 return (None, *self);
1943 let CharRange {ch, next} = self.char_range_at(0u);
1944 let next_s = unsafe { raw::slice_bytes(*self, next, self.len()) };
1945 return (Some(ch), next_s);
1949 fn subslice_offset(&self, inner: &str) -> uint {
1950 let a_start = self.as_ptr() as uint;
1951 let a_end = a_start + self.len();
1952 let b_start = inner.as_ptr() as uint;
1953 let b_end = b_start + inner.len();
1955 assert!(a_start <= b_start);
1956 assert!(b_end <= a_end);
1961 fn as_ptr(&self) -> *u8 {
1966 impl<'a> Default for &'a str {
1967 fn default() -> &'a str { "" }