1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
14 * Strings are a packed UTF-8 representation of text, stored as null
15 * terminated buffers of u8 bytes. Strings should be indexed in bytes,
16 * for efficiency, but UTF-8 unsafe operations should be avoided. For
17 * some heavy-duty uses, try std::rope.
27 use option::{None, Option, Some};
36 Section: Creating a string
40 * Convert a vector of bytes to a UTF-8 string
44 * Fails if invalid UTF-8
46 pub pure fn from_bytes(vv: &[const u8]) -> ~str {
48 return unsafe { raw::from_bytes(vv) };
51 /// Copy a slice into a new unique str
52 pub pure fn from_slice(s: &str) -> ~str {
53 unsafe { raw::slice_bytes(s, 0, len(s)) }
57 * Convert a byte to a UTF-8 string
61 * Fails if invalid UTF-8
63 pub pure fn from_byte(b: u8) -> ~str {
65 unsafe { ::cast::transmute(~[b, 0u8]) }
68 /// Appends a character at the end of a string
69 pub fn push_char(s: &mut ~str, ch: char) {
71 let code = ch as uint;
72 let nb = if code < max_one_b { 1u }
73 else if code < max_two_b { 2u }
74 else if code < max_three_b { 3u }
75 else if code < max_four_b { 4u }
76 else if code < max_five_b { 5u }
79 let new_len = len + nb;
80 reserve_at_least(&mut *s, new_len);
82 do as_buf(*s) |buf, _len| {
83 let buf: *mut u8 = ::cast::reinterpret_cast(&buf);
85 *ptr::mut_offset(buf, off) =
88 *ptr::mut_offset(buf, off) =
89 (code >> 6u & 31u | tag_two_b) as u8;
90 *ptr::mut_offset(buf, off + 1u) =
91 (code & 63u | tag_cont) as u8;
93 *ptr::mut_offset(buf, off) =
94 (code >> 12u & 15u | tag_three_b) as u8;
95 *ptr::mut_offset(buf, off + 1u) =
96 (code >> 6u & 63u | tag_cont) as u8;
97 *ptr::mut_offset(buf, off + 2u) =
98 (code & 63u | tag_cont) as u8;
100 *ptr::mut_offset(buf, off) =
101 (code >> 18u & 7u | tag_four_b) as u8;
102 *ptr::mut_offset(buf, off + 1u) =
103 (code >> 12u & 63u | tag_cont) as u8;
104 *ptr::mut_offset(buf, off + 2u) =
105 (code >> 6u & 63u | tag_cont) as u8;
106 *ptr::mut_offset(buf, off + 3u) =
107 (code & 63u | tag_cont) as u8;
109 *ptr::mut_offset(buf, off) =
110 (code >> 24u & 3u | tag_five_b) as u8;
111 *ptr::mut_offset(buf, off + 1u) =
112 (code >> 18u & 63u | tag_cont) as u8;
113 *ptr::mut_offset(buf, off + 2u) =
114 (code >> 12u & 63u | tag_cont) as u8;
115 *ptr::mut_offset(buf, off + 3u) =
116 (code >> 6u & 63u | tag_cont) as u8;
117 *ptr::mut_offset(buf, off + 4u) =
118 (code & 63u | tag_cont) as u8;
120 *ptr::mut_offset(buf, off) =
121 (code >> 30u & 1u | tag_six_b) as u8;
122 *ptr::mut_offset(buf, off + 1u) =
123 (code >> 24u & 63u | tag_cont) as u8;
124 *ptr::mut_offset(buf, off + 2u) =
125 (code >> 18u & 63u | tag_cont) as u8;
126 *ptr::mut_offset(buf, off + 3u) =
127 (code >> 12u & 63u | tag_cont) as u8;
128 *ptr::mut_offset(buf, off + 4u) =
129 (code >> 6u & 63u | tag_cont) as u8;
130 *ptr::mut_offset(buf, off + 5u) =
131 (code & 63u | tag_cont) as u8;
135 raw::set_len(s, new_len);
139 /// Convert a char to a string
140 pub pure fn from_char(ch: char) -> ~str {
142 unsafe { push_char(&mut buf, ch); }
146 /// Convert a vector of chars to a string
147 pub pure fn from_chars(chs: &[char]) -> ~str {
150 reserve(&mut buf, chs.len());
151 for vec::each(chs) |ch| {
152 push_char(&mut buf, *ch);
158 /// Appends a string slice to the back of a string, without overallocating
160 pub fn push_str_no_overallocate(lhs: &mut ~str, rhs: &str) {
162 let llen = lhs.len();
163 let rlen = rhs.len();
164 reserve(&mut *lhs, llen + rlen);
165 do as_buf(*lhs) |lbuf, _llen| {
166 do as_buf(rhs) |rbuf, _rlen| {
167 let dst = ptr::offset(lbuf, llen);
168 let dst = ::cast::transmute_mut_unsafe(dst);
169 ptr::copy_memory(dst, rbuf, rlen);
172 raw::set_len(lhs, llen + rlen);
175 /// Appends a string slice to the back of a string
177 pub fn push_str(lhs: &mut ~str, rhs: &str) {
179 let llen = lhs.len();
180 let rlen = rhs.len();
181 reserve_at_least(&mut *lhs, llen + rlen);
182 do as_buf(*lhs) |lbuf, _llen| {
183 do as_buf(rhs) |rbuf, _rlen| {
184 let dst = ptr::offset(lbuf, llen);
185 let dst = ::cast::transmute_mut_unsafe(dst);
186 ptr::copy_memory(dst, rbuf, rlen);
189 raw::set_len(lhs, llen + rlen);
193 /// Concatenate two strings together
195 pub pure fn append(lhs: ~str, rhs: &str) -> ~str {
198 push_str_no_overallocate(&mut v, rhs);
204 /// Concatenate a vector of strings
205 pub pure fn concat(v: &[~str]) -> ~str {
206 let mut s: ~str = ~"";
207 for vec::each(v) |ss| {
208 unsafe { push_str(&mut s, *ss) };
213 /// Concatenate a vector of strings, placing a given separator between each
214 pub pure fn connect(v: &[~str], sep: &str) -> ~str {
215 let mut s = ~"", first = true;
216 for vec::each(v) |ss| {
217 if first { first = false; } else { unsafe { push_str(&mut s, sep); } }
218 unsafe { push_str(&mut s, *ss) };
223 /// Concatenate a vector of strings, placing a given separator between each
224 pub pure fn connect_slices(v: &[&str], sep: &str) -> ~str {
225 let mut s = ~"", first = true;
226 for vec::each(v) |ss| {
227 if first { first = false; } else { unsafe { push_str(&mut s, sep); } }
228 unsafe { push_str(&mut s, *ss) };
233 /// Given a string, make a new string with repeated copies of it
234 pub pure fn repeat(ss: &str, nn: uint) -> ~str {
236 for nn.times { acc += ss; }
241 Section: Adding to and removing from a string
245 * Remove the final character from a string and return it
249 * If the string does not contain any characters
251 pub fn pop_char(s: &mut ~str) -> char {
254 let CharRange {ch, next} = char_range_at_reverse(*s, end);
255 unsafe { raw::set_len(s, next); }
260 * Remove the first character from a string and return it
264 * If the string does not contain any characters
266 pub fn shift_char(s: &mut ~str) -> char {
267 let CharRange {ch, next} = char_range_at(*s, 0u);
268 *s = unsafe { raw::slice_bytes(*s, next, len(*s)) };
273 * Removes the first character from a string slice and returns it. This does
274 * not allocate a new string; instead, it mutates a slice to point one
275 * character beyond the character that was shifted.
279 * If the string does not contain any characters
282 pub fn view_shift_char(s: &a/str) -> (char, &a/str) {
283 let CharRange {ch, next} = char_range_at(s, 0u);
284 let next_s = unsafe { raw::view_bytes(s, next, len(s)) };
288 /// Prepend a char to a string
289 pub fn unshift_char(s: &mut ~str, ch: char) {
290 *s = from_char(ch) + *s;
294 * Returns a string with leading `chars_to_trim` removed.
299 * * chars_to_trim - A vector of chars
302 pub pure fn trim_left_chars(s: &str, chars_to_trim: &[char]) -> ~str {
303 if chars_to_trim.is_empty() { return from_slice(s); }
305 match find(s, |c| !chars_to_trim.contains(&c)) {
307 Some(first) => unsafe { raw::slice_bytes(s, first, s.len()) }
312 * Returns a string with trailing `chars_to_trim` removed.
317 * * chars_to_trim - A vector of chars
320 pub pure fn trim_right_chars(s: &str, chars_to_trim: &[char]) -> ~str {
321 if chars_to_trim.is_empty() { return str::from_slice(s); }
323 match rfind(s, |c| !chars_to_trim.contains(&c)) {
326 let next = char_range_at(s, last).next;
327 unsafe { raw::slice_bytes(s, 0u, next) }
333 * Returns a string with leading and trailing `chars_to_trim` removed.
338 * * chars_to_trim - A vector of chars
341 pub pure fn trim_chars(s: &str, chars_to_trim: &[char]) -> ~str {
342 trim_left_chars(trim_right_chars(s, chars_to_trim), chars_to_trim)
345 /// Returns a string with leading whitespace removed
346 pub pure fn trim_left(s: &str) -> ~str {
347 match find(s, |c| !char::is_whitespace(c)) {
349 Some(first) => unsafe { raw::slice_bytes(s, first, len(s)) }
353 /// Returns a string with trailing whitespace removed
354 pub pure fn trim_right(s: &str) -> ~str {
355 match rfind(s, |c| !char::is_whitespace(c)) {
358 let next = char_range_at(s, last).next;
359 unsafe { raw::slice_bytes(s, 0u, next) }
364 /// Returns a string with leading and trailing whitespace removed
365 pub pure fn trim(s: &str) -> ~str { trim_left(trim_right(s)) }
368 Section: Transforming strings
372 * Converts a string to a vector of bytes
374 * The result vector is not null-terminated.
376 pub pure fn to_bytes(s: &str) -> ~[u8] {
378 let mut v: ~[u8] = ::cast::transmute(from_slice(s));
379 vec::raw::set_len(&mut v, len(s));
384 /// Work with the string as a byte slice, not including trailing null.
386 pub pure fn byte_slice<T>(s: &str, f: fn(v: &[u8]) -> T) -> T {
388 unsafe { vec::raw::buf_as_slice(p, n-1u, f) }
392 /// Convert a string to a vector of characters
393 pub pure fn chars(s: &str) -> ~[char] {
394 let mut buf = ~[], i = 0;
397 let CharRange {ch, next} = char_range_at(s, i);
398 unsafe { buf.push(ch); }
405 * Take a substring of another.
407 * Returns a string containing `n` characters starting at byte offset
410 pub pure fn substr(s: &str, begin: uint, n: uint) -> ~str {
411 slice(s, begin, begin + count_bytes(s, begin, n))
415 * Returns a slice of the given string from the byte range [`begin`..`end`)
417 * Fails when `begin` and `end` do not point to valid characters or
418 * beyond the last character of the string
420 pub pure fn slice(s: &str, begin: uint, end: uint) -> ~str {
421 assert is_char_boundary(s, begin);
422 assert is_char_boundary(s, end);
423 unsafe { raw::slice_bytes(s, begin, end) }
427 * Returns a view of the given string from the byte range [`begin`..`end`)
429 * Fails when `begin` and `end` do not point to valid characters or beyond
430 * the last character of the string
432 pub pure fn view(s: &a/str, begin: uint, end: uint) -> &a/str {
433 assert is_char_boundary(s, begin);
434 assert is_char_boundary(s, end);
435 unsafe { raw::view_bytes(s, begin, end) }
438 /// Splits a string into substrings at each occurrence of a given character
439 pub pure fn split_char(s: &str, sep: char) -> ~[~str] {
440 split_char_inner(s, sep, len(s), true)
444 * Splits a string into substrings at each occurrence of a given
445 * character up to 'count' times
447 * The byte must be a valid UTF-8/ASCII byte
449 pub pure fn splitn_char(s: &str, sep: char, count: uint) -> ~[~str] {
450 split_char_inner(s, sep, count, true)
453 /// Like `split_char`, but omits empty strings from the returned vector
454 pub pure fn split_char_nonempty(s: &str, sep: char) -> ~[~str] {
455 split_char_inner(s, sep, len(s), false)
458 pure fn split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool)
460 if sep < 128u as char {
461 let b = sep as u8, l = len(s);
462 let mut result = ~[], done = 0u;
463 let mut i = 0u, start = 0u;
464 while i < l && done < count {
466 if allow_empty || start < i {
468 result.push(raw::slice_bytes(s, start, i));
476 if allow_empty || start < l {
477 unsafe { result.push(raw::slice_bytes(s, start, l) ) };
481 splitn(s, |cur| cur == sep, count)
486 /// Splits a string into substrings using a character function
487 pub pure fn split(s: &str, sepfn: fn(char) -> bool) -> ~[~str] {
488 split_inner(s, sepfn, len(s), true)
492 * Splits a string into substrings using a character function, cutting at
493 * most `count` times.
495 pub pure fn splitn(s: &str, sepfn: fn(char) -> bool, count: uint) -> ~[~str] {
496 split_inner(s, sepfn, count, true)
499 /// Like `split`, but omits empty strings from the returned vector
500 pub pure fn split_nonempty(s: &str, sepfn: fn(char) -> bool) -> ~[~str] {
501 split_inner(s, sepfn, len(s), false)
504 pure fn split_inner(s: &str, sepfn: fn(cc: char) -> bool, count: uint,
505 allow_empty: bool) -> ~[~str] {
507 let mut result = ~[], i = 0u, start = 0u, done = 0u;
508 while i < l && done < count {
509 let CharRange {ch, next} = char_range_at(s, i);
511 if allow_empty || start < i {
513 result.push(raw::slice_bytes(s, start, i));
521 if allow_empty || start < l {
523 result.push(raw::slice_bytes(s, start, l));
529 // See Issue #1932 for why this is a naive search
530 pure fn iter_matches(s: &a/str, sep: &b/str, f: fn(uint, uint)) {
531 let sep_len = len(sep), l = len(s);
533 let mut i = 0u, match_start = 0u, match_i = 0u;
536 if s[i] == sep[match_i] {
537 if match_i == 0u { match_start = i; }
540 if match_i == sep_len {
541 f(match_start, i + 1u);
546 // Failed match, backtrack
549 i = match_start + 1u;
557 pure fn iter_between_matches(s: &a/str, sep: &b/str, f: fn(uint, uint)) {
558 let mut last_end = 0u;
559 do iter_matches(s, sep) |from, to| {
567 * Splits a string into a vector of the substrings separated by a given string
572 * assert ["", "XXX", "YYY", ""] == split_str(".XXX.YYY.", ".")
575 pub pure fn split_str(s: &a/str, sep: &b/str) -> ~[~str] {
576 let mut result = ~[];
577 do iter_between_matches(s, sep) |from, to| {
578 unsafe { result.push(raw::slice_bytes(s, from, to)); }
583 pub pure fn split_str_nonempty(s: &a/str, sep: &b/str) -> ~[~str] {
584 let mut result = ~[];
585 do iter_between_matches(s, sep) |from, to| {
587 unsafe { result.push(raw::slice_bytes(s, from, to)); }
593 /// Levenshtein Distance between two strings
594 pub fn levdistance(s: &str, t: &str) -> uint {
596 let slen = str::len(s);
597 let tlen = str::len(t);
599 if slen == 0 { return tlen; }
600 if tlen == 0 { return slen; }
602 let mut dcol = vec::from_fn(tlen + 1, |x| x);
604 for str::each_chari(s) |i, sc| {
607 dcol[0] = current + 1;
609 for str::each_chari(t) |j, tc| {
611 let mut next = dcol[j + 1];
614 dcol[j + 1] = current;
616 dcol[j + 1] = ::cmp::min(current, next);
617 dcol[j + 1] = ::cmp::min(dcol[j + 1], dcol[j]) + 1;
628 * Splits a string into a vector of the substrings separated by LF ('\n')
630 pub pure fn lines(s: &str) -> ~[~str] { split_char(s, '\n') }
633 * Splits a string into a vector of the substrings separated by LF ('\n')
634 * and/or CR LF ("\r\n")
636 pub pure fn lines_any(s: &str) -> ~[~str] {
637 vec::map(lines(s), |s| {
639 let mut cp = copy *s;
640 if l > 0u && s[l - 1u] == '\r' as u8 {
641 unsafe { raw::set_len(&mut cp, l - 1u); }
647 /// Splits a string into a vector of the substrings separated by whitespace
648 pub pure fn words(s: &str) -> ~[~str] {
649 split_nonempty(s, |c| char::is_whitespace(c))
652 /** Split a string into a vector of substrings,
653 * each of which is less than a limit
655 pub fn split_within(ss: &str, lim: uint) -> ~[~str] {
656 let words = str::words(ss);
659 if words == ~[] { return ~[]; }
661 let mut rows : ~[~str] = ~[];
662 let mut row : ~str = ~"";
664 for words.each |wptr| {
665 let word = copy *wptr;
667 // if adding this word to the row would go over the limit,
668 // then start a new row
669 if row.len() + word.len() + 1 > lim {
670 rows.push(copy row); // save previous row
671 row = word; // start a new one
673 if row.len() > 0 { row += ~" " } // separate words
674 row += word; // append to this row
679 if row != ~"" { rows.push(row); }
686 /// Convert a string to lowercase. ASCII only
687 pub pure fn to_lower(s: &str) -> ~str {
689 |c| unsafe{(libc::tolower(c as libc::c_char)) as char}
693 /// Convert a string to uppercase. ASCII only
694 pub pure fn to_upper(s: &str) -> ~str {
696 |c| unsafe{(libc::toupper(c as libc::c_char)) as char}
701 * Replace all occurrences of one string with another
705 * * s - The string containing substrings to replace
706 * * from - The string to replace
707 * * to - The replacement string
711 * The original string with all occurances of `from` replaced with `to`
713 pub pure fn replace(s: &str, from: &str, to: &str) -> ~str {
714 let mut result = ~"", first = true;
715 do iter_between_matches(s, from) |start, end| {
719 unsafe { push_str(&mut result, to); }
721 unsafe { push_str(&mut result, raw::slice_bytes(s, start, end)); }
727 Section: Comparing strings
730 /// Bytewise slice equality
733 pub pure fn eq_slice(a: &str, b: &str) -> bool {
734 do as_buf(a) |ap, alen| {
735 do as_buf(b) |bp, blen| {
736 if (alen != blen) { false }
739 libc::memcmp(ap as *libc::c_void,
741 (alen - 1) as libc::size_t) == 0
749 pub pure fn eq_slice(a: &str, b: &str) -> bool {
750 do as_buf(a) |ap, alen| {
751 do as_buf(b) |bp, blen| {
752 if (alen != blen) { false }
755 libc::memcmp(ap as *libc::c_void,
757 (alen - 1) as libc::size_t) == 0
764 /// Bytewise string equality
766 #[lang="uniq_str_eq"]
767 pub pure fn eq(a: &~str, b: &~str) -> bool {
772 pub pure fn eq(a: &~str, b: &~str) -> bool {
776 /// Bytewise slice less than
777 pure fn lt(a: &str, b: &str) -> bool {
778 let (a_len, b_len) = (a.len(), b.len());
779 let mut end = uint::min(a_len, b_len);
783 let (c_a, c_b) = (a[i], b[i]);
784 if c_a < c_b { return true; }
785 if c_a > c_b { return false; }
789 return a_len < b_len;
792 /// Bytewise less than or equal
793 pub pure fn le(a: &str, b: &str) -> bool {
797 /// Bytewise greater than or equal
798 pure fn ge(a: &str, b: &str) -> bool {
802 /// Bytewise greater than
803 pure fn gt(a: &str, b: &str) -> bool {
810 pure fn eq(&self, other: & &self/str) -> bool {
811 eq_slice((*self), (*other))
814 pure fn ne(&self, other: & &self/str) -> bool { !(*self).eq(other) }
820 pure fn eq(&self, other: &~str) -> bool {
821 eq_slice((*self), (*other))
824 pure fn ne(&self, other: &~str) -> bool { !(*self).eq(other) }
830 pure fn eq(&self, other: &@str) -> bool {
831 eq_slice((*self), (*other))
834 pure fn ne(&self, other: &@str) -> bool { !(*self).eq(other) }
840 pure fn lt(&self, other: &~str) -> bool { lt((*self), (*other)) }
842 pure fn le(&self, other: &~str) -> bool { le((*self), (*other)) }
844 pure fn ge(&self, other: &~str) -> bool { ge((*self), (*other)) }
846 pure fn gt(&self, other: &~str) -> bool { gt((*self), (*other)) }
852 pure fn lt(&self, other: & &self/str) -> bool { lt((*self), (*other)) }
854 pure fn le(&self, other: & &self/str) -> bool { le((*self), (*other)) }
856 pure fn ge(&self, other: & &self/str) -> bool { ge((*self), (*other)) }
858 pure fn gt(&self, other: & &self/str) -> bool { gt((*self), (*other)) }
864 pure fn lt(&self, other: &@str) -> bool { lt((*self), (*other)) }
866 pure fn le(&self, other: &@str) -> bool { le((*self), (*other)) }
868 pure fn ge(&self, other: &@str) -> bool { ge((*self), (*other)) }
870 pure fn gt(&self, other: &@str) -> bool { gt((*self), (*other)) }
874 Section: Iterating through strings
878 * Return true if a predicate matches all characters or if the string
879 * contains no characters
881 pub pure fn all(s: &str, it: fn(char) -> bool) -> bool {
882 all_between(s, 0u, len(s), it)
886 * Return true if a predicate matches any character (and false if it
887 * matches none or there are no characters)
889 pub pure fn any(ss: &str, pred: fn(char) -> bool) -> bool {
890 !all(ss, |cc| !pred(cc))
893 /// Apply a function to each character
894 pub pure fn map(ss: &str, ff: fn(char) -> char) -> ~str {
895 let mut result = ~"";
897 reserve(&mut result, len(ss));
898 for chars_each(ss) |cc| {
899 str::push_char(&mut result, ff(cc));
905 /// Iterate over the bytes in a string
906 pub pure fn bytes_each(ss: &str, it: fn(u8) -> bool) {
911 if !it(ss[pos]) { return; }
916 /// Iterate over the bytes in a string
918 pub pure fn each(s: &str, it: fn(u8) -> bool) {
919 eachi(s, |_i, b| it(b) )
922 /// Iterate over the bytes in a string, with indices
924 pub pure fn eachi(s: &str, it: fn(uint, u8) -> bool) {
925 let mut i = 0u, l = len(s);
927 if !it(i, s[i]) { break; }
932 /// Iterates over the chars in a string
934 pub pure fn each_char(s: &str, it: fn(char) -> bool) {
935 each_chari(s, |_i, c| it(c))
938 /// Iterates over the chars in a string, with indices
940 pub pure fn each_chari(s: &str, it: fn(uint, char) -> bool) {
941 let mut pos = 0u, ch_pos = 0u;
944 let CharRange {ch, next} = char_range_at(s, pos);
946 if !it(ch_pos, ch) { break; }
951 /// Iterate over the characters in a string
952 pub pure fn chars_each(s: &str, it: fn(char) -> bool) {
956 let CharRange {ch, next} = char_range_at(s, pos);
958 if !it(ch) { return; }
962 /// Apply a function to each substring after splitting by character
963 pub pure fn split_char_each(ss: &str, cc: char, ff: fn(v: &str) -> bool) {
964 vec::each(split_char(ss, cc), |s| ff(*s))
968 * Apply a function to each substring after splitting by character, up to
971 pub pure fn splitn_char_each(ss: &str, sep: char, count: uint,
972 ff: fn(v: &str) -> bool) {
973 vec::each(splitn_char(ss, sep, count), |s| ff(*s))
976 /// Apply a function to each word
977 pub pure fn words_each(ss: &str, ff: fn(v: &str) -> bool) {
978 vec::each(words(ss), |s| ff(*s))
982 * Apply a function to each line (by '\n')
984 pub pure fn lines_each(ss: &str, ff: fn(v: &str) -> bool) {
985 vec::each(lines(ss), |s| ff(*s))
993 * Returns the byte index of the first matching character
997 * * `s` - The string to search
998 * * `c` - The character to search for
1002 * An `option` containing the byte index of the first matching character
1003 * or `none` if there is no match
1005 pub pure fn find_char(s: &str, c: char) -> Option<uint> {
1006 find_char_between(s, c, 0u, len(s))
1010 * Returns the byte index of the first matching character beginning
1011 * from a given byte offset
1015 * * `s` - The string to search
1016 * * `c` - The character to search for
1017 * * `start` - The byte index to begin searching at, inclusive
1021 * An `option` containing the byte index of the first matching character
1022 * or `none` if there is no match
1026 * `start` must be less than or equal to `len(s)`. `start` must be the
1027 * index of a character boundary, as defined by `is_char_boundary`.
1029 pub pure fn find_char_from(s: &str, c: char, start: uint) -> Option<uint> {
1030 find_char_between(s, c, start, len(s))
1034 * Returns the byte index of the first matching character within a given range
1038 * * `s` - The string to search
1039 * * `c` - The character to search for
1040 * * `start` - The byte index to begin searching at, inclusive
1041 * * `end` - The byte index to end searching at, exclusive
1045 * An `option` containing the byte index of the first matching character
1046 * or `none` if there is no match
1050 * `start` must be less than or equal to `end` and `end` must be less than
1051 * or equal to `len(s)`. `start` must be the index of a character boundary,
1052 * as defined by `is_char_boundary`.
1054 pub pure fn find_char_between(s: &str, c: char, start: uint, end: uint)
1056 if c < 128u as char {
1057 assert start <= end;
1058 assert end <= len(s);
1062 if s[i] == b { return Some(i); }
1067 find_between(s, start, end, |x| x == c)
1072 * Returns the byte index of the last matching character
1076 * * `s` - The string to search
1077 * * `c` - The character to search for
1081 * An `option` containing the byte index of the last matching character
1082 * or `none` if there is no match
1084 pub pure fn rfind_char(s: &str, c: char) -> Option<uint> {
1085 rfind_char_between(s, c, len(s), 0u)
1089 * Returns the byte index of the last matching character beginning
1090 * from a given byte offset
1094 * * `s` - The string to search
1095 * * `c` - The character to search for
1096 * * `start` - The byte index to begin searching at, exclusive
1100 * An `option` containing the byte index of the last matching character
1101 * or `none` if there is no match
1105 * `start` must be less than or equal to `len(s)`. `start` must be
1106 * the index of a character boundary, as defined by `is_char_boundary`.
1108 pub pure fn rfind_char_from(s: &str, c: char, start: uint) -> Option<uint> {
1109 rfind_char_between(s, c, start, 0u)
1113 * Returns the byte index of the last matching character within a given range
1117 * * `s` - The string to search
1118 * * `c` - The character to search for
1119 * * `start` - The byte index to begin searching at, exclusive
1120 * * `end` - The byte index to end searching at, inclusive
1124 * An `option` containing the byte index of the last matching character
1125 * or `none` if there is no match
1129 * `end` must be less than or equal to `start` and `start` must be less than
1130 * or equal to `len(s)`. `start` must be the index of a character boundary,
1131 * as defined by `is_char_boundary`.
1133 pub pure fn rfind_char_between(s: &str, c: char, start: uint, end: uint)
1135 if c < 128u as char {
1136 assert start >= end;
1137 assert start <= len(s);
1142 if s[i] == b { return Some(i); }
1146 rfind_between(s, start, end, |x| x == c)
1151 * Returns the byte index of the first character that satisfies
1152 * the given predicate
1156 * * `s` - The string to search
1157 * * `f` - The predicate to satisfy
1161 * An `option` containing the byte index of the first matching character
1162 * or `none` if there is no match
1164 pub pure fn find(s: &str, f: fn(char) -> bool) -> Option<uint> {
1165 find_between(s, 0u, len(s), f)
1169 * Returns the byte index of the first character that satisfies
1170 * the given predicate, beginning from a given byte offset
1174 * * `s` - The string to search
1175 * * `start` - The byte index to begin searching at, inclusive
1176 * * `f` - The predicate to satisfy
1180 * An `option` containing the byte index of the first matching charactor
1181 * or `none` if there is no match
1185 * `start` must be less than or equal to `len(s)`. `start` must be the
1186 * index of a character boundary, as defined by `is_char_boundary`.
1188 pub pure fn find_from(s: &str, start: uint, f: fn(char)
1189 -> bool) -> Option<uint> {
1190 find_between(s, start, len(s), f)
1194 * Returns the byte index of the first character that satisfies
1195 * the given predicate, within a given range
1199 * * `s` - The string to search
1200 * * `start` - The byte index to begin searching at, inclusive
1201 * * `end` - The byte index to end searching at, exclusive
1202 * * `f` - The predicate to satisfy
1206 * An `option` containing the byte index of the first matching character
1207 * or `none` if there is no match
1211 * `start` must be less than or equal to `end` and `end` must be less than
1212 * or equal to `len(s)`. `start` must be the index of a character
1213 * boundary, as defined by `is_char_boundary`.
1215 pub pure fn find_between(s: &str, start: uint, end: uint, f: fn(char) -> bool)
1217 assert start <= end;
1218 assert end <= len(s);
1219 assert is_char_boundary(s, start);
1222 let CharRange {ch, next} = char_range_at(s, i);
1223 if f(ch) { return Some(i); }
1230 * Returns the byte index of the last character that satisfies
1231 * the given predicate
1235 * * `s` - The string to search
1236 * * `f` - The predicate to satisfy
1240 * An option containing the byte index of the last matching character
1241 * or `none` if there is no match
1243 pub pure fn rfind(s: &str, f: fn(char) -> bool) -> Option<uint> {
1244 rfind_between(s, len(s), 0u, f)
1248 * Returns the byte index of the last character that satisfies
1249 * the given predicate, beginning from a given byte offset
1253 * * `s` - The string to search
1254 * * `start` - The byte index to begin searching at, exclusive
1255 * * `f` - The predicate to satisfy
1259 * An `option` containing the byte index of the last matching character
1260 * or `none` if there is no match
1264 * `start` must be less than or equal to `len(s)', `start` must be the
1265 * index of a character boundary, as defined by `is_char_boundary`
1267 pub pure fn rfind_from(s: &str, start: uint, f: fn(char) -> bool)
1269 rfind_between(s, start, 0u, f)
1273 * Returns the byte index of the last character that satisfies
1274 * the given predicate, within a given range
1278 * * `s` - The string to search
1279 * * `start` - The byte index to begin searching at, exclusive
1280 * * `end` - The byte index to end searching at, inclusive
1281 * * `f` - The predicate to satisfy
1285 * An `option` containing the byte index of the last matching character
1286 * or `none` if there is no match
1290 * `end` must be less than or equal to `start` and `start` must be less
1291 * than or equal to `len(s)`. `start` must be the index of a character
1292 * boundary, as defined by `is_char_boundary`
1294 pub pure fn rfind_between(s: &str, start: uint, end: uint,
1295 f: fn(char) -> bool)
1297 assert start >= end;
1298 assert start <= len(s);
1299 assert is_char_boundary(s, start);
1302 let CharRange {ch, next: prev} = char_range_at_reverse(s, i);
1303 if f(ch) { return Some(prev); }
1309 // Utility used by various searching functions
1310 pure fn match_at(haystack: &a/str, needle: &b/str, at: uint) -> bool {
1312 for each(needle) |c| { if haystack[i] != c { return false; } i += 1u; }
1317 * Returns the byte index of the first matching substring
1321 * * `haystack` - The string to search
1322 * * `needle` - The string to search for
1326 * An `option` containing the byte index of the first matching substring
1327 * or `none` if there is no match
1329 pub pure fn find_str(haystack: &a/str, needle: &b/str) -> Option<uint> {
1330 find_str_between(haystack, needle, 0u, len(haystack))
1334 * Returns the byte index of the first matching substring beginning
1335 * from a given byte offset
1339 * * `haystack` - The string to search
1340 * * `needle` - The string to search for
1341 * * `start` - The byte index to begin searching at, inclusive
1345 * An `option` containing the byte index of the last matching character
1346 * or `none` if there is no match
1350 * `start` must be less than or equal to `len(s)`
1352 pub pure fn find_str_from(haystack: &a/str, needle: &b/str, start: uint)
1354 find_str_between(haystack, needle, start, len(haystack))
1358 * Returns the byte index of the first matching substring within a given range
1362 * * `haystack` - The string to search
1363 * * `needle` - The string to search for
1364 * * `start` - The byte index to begin searching at, inclusive
1365 * * `end` - The byte index to end searching at, exclusive
1369 * An `option` containing the byte index of the first matching character
1370 * or `none` if there is no match
1374 * `start` must be less than or equal to `end` and `end` must be less than
1375 * or equal to `len(s)`.
1377 pub pure fn find_str_between(haystack: &a/str, needle: &b/str, start: uint,
1380 // See Issue #1932 for why this is a naive search
1381 assert end <= len(haystack);
1382 let needle_len = len(needle);
1383 if needle_len == 0u { return Some(start); }
1384 if needle_len > end { return None; }
1387 let e = end - needle_len;
1389 if match_at(haystack, needle, i) { return Some(i); }
1396 * Returns true if one string contains another
1400 * * haystack - The string to look in
1401 * * needle - The string to look for
1403 pub pure fn contains(haystack: &a/str, needle: &b/str) -> bool {
1404 find_str(haystack, needle).is_some()
1408 * Returns true if a string contains a char.
1412 * * haystack - The string to look in
1413 * * needle - The char to look for
1415 pub pure fn contains_char(haystack: &str, needle: char) -> bool {
1416 find_char(haystack, needle).is_some()
1420 * Returns true if one string starts with another
1424 * * haystack - The string to look in
1425 * * needle - The string to look for
1427 pub pure fn starts_with(haystack: &a/str, needle: &b/str) -> bool {
1428 let haystack_len = len(haystack), needle_len = len(needle);
1429 if needle_len == 0u { true }
1430 else if needle_len > haystack_len { false }
1431 else { match_at(haystack, needle, 0u) }
1435 * Returns true if one string ends with another
1439 * * haystack - The string to look in
1440 * * needle - The string to look for
1442 pub pure fn ends_with(haystack: &a/str, needle: &b/str) -> bool {
1443 let haystack_len = len(haystack), needle_len = len(needle);
1444 if needle_len == 0u { true }
1445 else if needle_len > haystack_len { false }
1446 else { match_at(haystack, needle, haystack_len - needle_len) }
1450 Section: String properties
1453 /// Determines if a string contains only ASCII characters
1454 pub pure fn is_ascii(s: &str) -> bool {
1455 let mut i: uint = len(s);
1456 while i > 0u { i -= 1u; if !u8::is_ascii(s[i]) { return false; } }
1460 /// Returns true if the string has length 0
1461 pub pure fn is_empty(s: &str) -> bool { len(s) == 0u }
1464 * Returns true if the string contains only whitespace
1466 * Whitespace characters are determined by `char::is_whitespace`
1468 pub pure fn is_whitespace(s: &str) -> bool {
1469 return all(s, char::is_whitespace);
1473 * Returns true if the string contains only alphanumerics
1475 * Alphanumeric characters are determined by `char::is_alphanumeric`
1477 pure fn is_alphanumeric(s: &str) -> bool {
1478 return all(s, char::is_alphanumeric);
1481 /// Returns the string length/size in bytes not counting the null terminator
1482 pub pure fn len(s: &str) -> uint {
1483 do as_buf(s) |_p, n| { n - 1u }
1486 /// Returns the number of characters that a string holds
1487 pub pure fn char_len(s: &str) -> uint { count_chars(s, 0u, len(s)) }
1493 /// Determines if a vector of bytes contains valid UTF-8
1494 pub pure fn is_utf8(v: &[const u8]) -> bool {
1496 let total = vec::len::<u8>(v);
1498 let mut chsize = utf8_char_width(v[i]);
1499 if chsize == 0u { return false; }
1500 if i + chsize > total { return false; }
1503 if v[i] & 192u8 != tag_cont_u8 { return false; }
1511 /// Determines if a vector of `u16` contains valid UTF-16
1512 pub pure fn is_utf16(v: &[u16]) -> bool {
1513 let len = vec::len(v);
1518 if u <= 0xD7FF_u16 || u >= 0xE000_u16 {
1522 if i+1u < len { return false; }
1524 if u < 0xD7FF_u16 || u > 0xDBFF_u16 { return false; }
1525 if u2 < 0xDC00_u16 || u2 > 0xDFFF_u16 { return false; }
1532 /// Converts to a vector of `u16` encoded as UTF-16
1533 pub pure fn to_utf16(s: &str) -> ~[u16] {
1535 for chars_each(s) |cch| {
1536 // Arithmetic with u32 literals is easier on the eyes than chars.
1537 let mut ch = cch as u32;
1540 if (ch & 0xFFFF_u32) == ch {
1541 // The BMP falls through (assuming non-surrogate, as it
1543 assert ch <= 0xD7FF_u32 || ch >= 0xE000_u32;
1546 // Supplementary planes break into surrogates.
1547 assert ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32;
1549 let w1 = 0xD800_u16 | ((ch >> 10) as u16);
1550 let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
1551 u.push_all(~[w1, w2])
1558 pub pure fn utf16_chars(v: &[u16], f: fn(char)) {
1559 let len = vec::len(v);
1561 while (i < len && v[i] != 0u16) {
1564 if u <= 0xD7FF_u16 || u >= 0xE000_u16 {
1570 assert u >= 0xD800_u16 && u <= 0xDBFF_u16;
1571 assert u2 >= 0xDC00_u16 && u2 <= 0xDFFF_u16;
1572 let mut c = (u - 0xD800_u16) as char;
1574 c |= (u2 - 0xDC00_u16) as char;
1575 c |= 0x1_0000_u32 as char;
1583 pub pure fn from_utf16(v: &[u16]) -> ~str {
1586 reserve(&mut buf, vec::len(v));
1587 utf16_chars(v, |ch| push_char(&mut buf, ch));
1592 pub pure fn with_capacity(capacity: uint) -> ~str {
1594 unsafe { reserve(&mut buf, capacity); }
1599 * As char_len but for a slice of a string
1603 * * s - A valid string
1604 * * start - The position inside `s` where to start counting in bytes
1605 * * end - The position where to stop counting
1609 * The number of Unicode characters in `s` between the given indices.
1611 pub pure fn count_chars(s: &str, start: uint, end: uint) -> uint {
1612 assert is_char_boundary(s, start);
1613 assert is_char_boundary(s, end);
1614 let mut i = start, len = 0u;
1616 let next = char_range_at(s, i).next;
1623 /// Counts the number of bytes taken by the `n` in `s` starting from `start`.
1624 pub pure fn count_bytes(s: &b/str, start: uint, n: uint) -> uint {
1625 assert is_char_boundary(s, start);
1626 let mut end = start, cnt = n;
1630 let next = char_range_at(s, end).next;
1637 /// Given a first byte, determine how many bytes are in this UTF-8 character
1638 pub pure fn utf8_char_width(b: u8) -> uint {
1639 let byte: uint = b as uint;
1640 if byte < 128u { return 1u; }
1641 // Not a valid start byte
1642 if byte < 192u { return 0u; }
1643 if byte < 224u { return 2u; }
1644 if byte < 240u { return 3u; }
1645 if byte < 248u { return 4u; }
1646 if byte < 252u { return 5u; }
1651 * Returns false if the index points into the middle of a multi-byte
1652 * character sequence.
1654 pub pure fn is_char_boundary(s: &str, index: uint) -> bool {
1655 if index == len(s) { return true; }
1657 return b < 128u8 || b >= 192u8;
1661 * Pluck a character out of a string and return the index of the next
1664 * This function can be used to iterate over the unicode characters of a
1670 * let s = "中华Việt Nam";
1672 * while i < str::len(s) {
1673 * let CharRange {ch, next} = str::char_range_at(s, i);
1674 * std::io::println(fmt!("%u: %c",i,ch));
1697 * * i - The byte offset of the char to extract
1701 * A record {ch: char, next: uint} containing the char value and the byte
1702 * index of the next unicode character.
1706 * If `i` is greater than or equal to the length of the string.
1707 * If `i` is not the index of the beginning of a valid UTF-8 character.
1709 pub pure fn char_range_at(s: &str, i: uint) -> CharRange {
1711 let w = utf8_char_width(b0);
1713 if w == 1u { return CharRange {ch: b0 as char, next: i + 1u}; }
1719 assert (byte & 192u8 == tag_cont_u8);
1721 val += (byte & 63u8) as uint;
1724 // Clunky way to get the right bits from the first byte. Uses two shifts,
1725 // the first to clip off the marker bits at the left of the byte, and then
1726 // a second (as uint) to get it to the right position.
1727 val += ((b0 << ((w + 1u) as u8)) as uint) << ((w - 1u) * 6u - w - 1u);
1728 return CharRange {ch: val as char, next: i};
1731 /// Pluck a character out of a string
1732 pub pure fn char_at(s: &str, i: uint) -> char {
1733 return char_range_at(s, i).ch;
1736 pub struct CharRange {
1742 * Given a byte position and a str, return the previous char and its position
1744 * This function can be used to iterate over a unicode string in reverse.
1746 pure fn char_range_at_reverse(ss: &str, start: uint) -> CharRange {
1747 let mut prev = start;
1749 // while there is a previous byte == 10......
1750 while prev > 0u && ss[prev - 1u] & 192u8 == tag_cont_u8 {
1754 // now refer to the initial byte of previous char
1757 let ch = char_at(ss, prev);
1758 return CharRange {ch:ch, next:prev};
1762 * Loop through a substring, char by char
1766 * * This function does not check whether the substring is valid.
1767 * * This function fails if `start` or `end` do not
1768 * represent valid positions inside `s`
1772 * * s - A string to traverse. It may be empty.
1773 * * start - The byte offset at which to start in the string.
1774 * * end - The end of the range to traverse
1775 * * it - A block to execute with each consecutive character of `s`.
1776 * Return `true` to continue, `false` to stop.
1780 * `true` If execution proceeded correctly, `false` if it was interrupted,
1781 * that is if `it` returned `false` at any point.
1783 pub pure fn all_between(s: &str, start: uint, end: uint,
1784 it: fn(char) -> bool) -> bool {
1785 assert is_char_boundary(s, start);
1788 let CharRange {ch, next} = char_range_at(s, i);
1789 if !it(ch) { return false; }
1796 * Loop through a substring, char by char
1800 * * This function does not check whether the substring is valid.
1801 * * This function fails if `start` or `end` do not
1802 * represent valid positions inside `s`
1806 * * s - A string to traverse. It may be empty.
1807 * * start - The byte offset at which to start in the string.
1808 * * end - The end of the range to traverse
1809 * * it - A block to execute with each consecutive character of `s`.
1810 * Return `true` to continue, `false` to stop.
1814 * `true` if `it` returns `true` for any character
1816 pub pure fn any_between(s: &str, start: uint, end: uint,
1817 it: fn(char) -> bool) -> bool {
1818 !all_between(s, start, end, |c| !it(c))
1821 // UTF-8 tags and ranges
1822 const tag_cont_u8: u8 = 128u8;
1823 const tag_cont: uint = 128u;
1824 const max_one_b: uint = 128u;
1825 const tag_two_b: uint = 192u;
1826 const max_two_b: uint = 2048u;
1827 const tag_three_b: uint = 224u;
1828 const max_three_b: uint = 65536u;
1829 const tag_four_b: uint = 240u;
1830 const max_four_b: uint = 2097152u;
1831 const tag_five_b: uint = 248u;
1832 const max_five_b: uint = 67108864u;
1833 const tag_six_b: uint = 252u;
1837 * Work with the byte buffer of a string.
1839 * Allows for unsafe manipulation of strings, which is useful for foreign
1845 * let i = str::as_bytes("Hello World") { |bytes| vec::len(bytes) };
1848 pub pure fn as_bytes<T>(s: &const ~str, f: fn(&~[u8]) -> T) -> T {
1850 let v: *~[u8] = cast::transmute(copy s);
1856 * Work with the byte buffer of a string as a byte slice.
1858 * The byte slice does not include the null terminator.
1860 pub pure fn as_bytes_slice(s: &a/str) -> &a/[u8] {
1862 let (ptr, len): (*u8, uint) = ::cast::reinterpret_cast(&s);
1863 let outgoing_tuple: (*u8, uint) = (ptr, len - 1);
1864 return ::cast::reinterpret_cast(&outgoing_tuple);
1869 * Work with the byte buffer of a string as a null-terminated C string.
1871 * Allows for unsafe manipulation of strings, which is useful for foreign
1872 * interop. This is similar to `str::as_buf`, but guarantees null-termination.
1873 * If the given slice is not already null-terminated, this function will
1874 * allocate a temporary, copy the slice, null terminate it, and pass
1880 * let s = str::as_c_str("PATH", { |path| libc::getenv(path) });
1883 pub pure fn as_c_str<T>(s: &str, f: fn(*libc::c_char) -> T) -> T {
1884 do as_buf(s) |buf, len| {
1885 // NB: len includes the trailing null.
1887 if unsafe { *(ptr::offset(buf,len-1)) != 0 } {
1888 as_c_str(from_slice(s), f)
1890 f(buf as *libc::c_char)
1897 * Work with the byte buffer and length of a slice.
1899 * The given length is one byte longer than the 'official' indexable
1900 * length of the string. This is to permit probing the byte past the
1901 * indexable area for a null byte, as is the case in slices pointing
1902 * to full strings, or suffixes of them.
1905 pub pure fn as_buf<T>(s: &str, f: fn(*u8, uint) -> T) -> T {
1907 let v : *(*u8,uint) = ::cast::reinterpret_cast(&ptr::addr_of(&s));
1914 * Reserves capacity for exactly `n` bytes in the given string, not including
1915 * the null terminator.
1917 * Assuming single-byte characters, the resulting string will be large
1918 * enough to hold a string of length `n`. To account for the null terminator,
1919 * the underlying buffer will have the size `n` + 1.
1921 * If the capacity for `s` is already equal to or greater than the requested
1922 * capacity, then no action is taken.
1927 * * n - The number of bytes to reserve space for
1929 pub fn reserve(s: &mut ~str, n: uint) {
1931 let v: *mut ~[u8] = cast::transmute(s);
1932 vec::reserve(&mut *v, n + 1);
1937 * Reserves capacity for at least `n` bytes in the given string, not including
1938 * the null terminator.
1940 * Assuming single-byte characters, the resulting string will be large
1941 * enough to hold a string of length `n`. To account for the null terminator,
1942 * the underlying buffer will have the size `n` + 1.
1944 * This function will over-allocate in order to amortize the allocation costs
1945 * in scenarios where the caller may need to repeatedly reserve additional
1948 * If the capacity for `s` is already equal to or greater than the requested
1949 * capacity, then no action is taken.
1954 * * n - The number of bytes to reserve space for
1956 pub fn reserve_at_least(s: &mut ~str, n: uint) {
1957 reserve(s, uint::next_power_of_two(n + 1u) - 1u)
1961 * Returns the number of single-byte characters the string can hold without
1964 pub pure fn capacity(s: &const ~str) -> uint {
1965 do as_bytes(s) |buf| {
1966 let vcap = vec::capacity(buf);
1972 /// Escape each char in `s` with char::escape_default.
1973 pub pure fn escape_default(s: &str) -> ~str {
1974 let mut out: ~str = ~"";
1976 reserve_at_least(&mut out, str::len(s));
1977 for chars_each(s) |c| {
1978 push_str(&mut out, char::escape_default(c));
1984 /// Escape each char in `s` with char::escape_unicode.
1985 pub pure fn escape_unicode(s: &str) -> ~str {
1986 let mut out: ~str = ~"";
1988 reserve_at_least(&mut out, str::len(s));
1989 for chars_each(s) |c| {
1990 push_str(&mut out, char::escape_unicode(c));
1996 /// Unsafe operations
2002 use str::{as_buf, is_utf8, len, reserve_at_least};
2005 /// Create a Rust string from a null-terminated *u8 buffer
2006 pub unsafe fn from_buf(buf: *u8) -> ~str {
2007 let mut curr = buf, i = 0u;
2008 while *curr != 0u8 {
2010 curr = ptr::offset(buf, i);
2012 return from_buf_len(buf, i);
2015 /// Create a Rust string from a *u8 buffer of the given length
2016 pub unsafe fn from_buf_len(buf: *const u8, len: uint) -> ~str {
2017 let mut v: ~[u8] = vec::with_capacity(len + 1);
2018 vec::as_mut_buf(v, |vbuf, _len| {
2019 ptr::copy_memory(vbuf, buf as *u8, len)
2021 vec::raw::set_len(&mut v, len);
2025 return ::cast::transmute(v);
2028 /// Create a Rust string from a null-terminated C string
2029 pub unsafe fn from_c_str(c_str: *libc::c_char) -> ~str {
2030 from_buf(::cast::reinterpret_cast(&c_str))
2033 /// Create a Rust string from a `*c_char` buffer of the given length
2034 pub unsafe fn from_c_str_len(c_str: *libc::c_char, len: uint) -> ~str {
2035 from_buf_len(::cast::reinterpret_cast(&c_str), len)
2038 /// Converts a vector of bytes to a string.
2039 pub unsafe fn from_bytes(v: &[const u8]) -> ~str {
2040 do vec::as_const_buf(v) |buf, len| {
2041 from_buf_len(buf, len)
2045 /// Converts a byte to a string.
2046 pub unsafe fn from_byte(u: u8) -> ~str { raw::from_bytes([u]) }
2048 /// Form a slice from a *u8 buffer of the given length without copying.
2049 pub unsafe fn buf_as_slice<T>(buf: *u8, len: uint,
2050 f: fn(v: &str) -> T) -> T {
2051 let v = (buf, len + 1);
2052 assert is_utf8(::cast::reinterpret_cast(&v));
2053 f(::cast::transmute(v))
2057 * Takes a bytewise (not UTF-8) slice from a string.
2059 * Returns the substring from [`begin`..`end`).
2063 * If begin is greater than end.
2064 * If end is greater than the length of the string.
2066 pub unsafe fn slice_bytes(s: &str, begin: uint, end: uint) -> ~str {
2067 do as_buf(s) |sbuf, n| {
2068 assert (begin <= end);
2071 let mut v = vec::with_capacity(end - begin + 1u);
2073 do vec::as_imm_buf(v) |vbuf, _vlen| {
2074 let vbuf = ::cast::transmute_mut_unsafe(vbuf);
2075 let src = ptr::offset(sbuf, begin);
2076 ptr::copy_memory(vbuf, src, end - begin);
2078 vec::raw::set_len(&mut v, end - begin);
2080 ::cast::transmute(v)
2086 * Takes a bytewise (not UTF-8) view from a string.
2088 * Returns the substring from [`begin`..`end`).
2092 * If begin is greater than end.
2093 * If end is greater than the length of the string.
2096 pub unsafe fn view_bytes(s: &str, begin: uint, end: uint) -> &str {
2097 do as_buf(s) |sbuf, n| {
2098 assert (begin <= end);
2101 let tuple = (ptr::offset(sbuf, begin), end - begin + 1);
2102 ::cast::reinterpret_cast(&tuple)
2106 /// Appends a byte to a string. (Not UTF-8 safe).
2107 pub unsafe fn push_byte(s: &mut ~str, b: u8) {
2108 let new_len = s.len() + 1;
2109 reserve_at_least(&mut *s, new_len);
2110 do as_buf(*s) |buf, len| {
2111 let buf: *mut u8 = ::cast::reinterpret_cast(&buf);
2112 *ptr::mut_offset(buf, len) = b;
2114 set_len(&mut *s, new_len);
2117 /// Appends a vector of bytes to a string. (Not UTF-8 safe).
2118 unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) {
2119 let new_len = s.len() + bytes.len();
2120 reserve_at_least(&mut *s, new_len);
2121 for vec::each(bytes) |byte| { push_byte(&mut *s, *byte); }
2124 /// Removes the last byte from a string and returns it. (Not UTF-8 safe).
2125 pub unsafe fn pop_byte(s: &mut ~str) -> u8 {
2128 let b = s[len - 1u];
2129 unsafe { set_len(s, len - 1u) };
2133 /// Removes the first byte from a string and returns it. (Not UTF-8 safe).
2134 pub unsafe fn shift_byte(s: &mut ~str) -> u8 {
2138 *s = unsafe { raw::slice_bytes(*s, 1u, len) };
2142 /// Sets the length of the string and adds the null terminator
2143 pub unsafe fn set_len(v: &mut ~str, new_len: uint) {
2144 let v: **vec::raw::VecRepr = cast::transmute(v);
2145 let repr: *vec::raw::VecRepr = *v;
2146 (*repr).unboxed.fill = new_len + 1u;
2147 let null = ptr::mut_offset(cast::transmute(&((*repr).unboxed.data)),
2153 fn test_from_buf_len() {
2155 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
2156 let b = vec::raw::to_ptr(a);
2157 let c = from_buf_len(b, 3u);
2158 assert (c == ~"AAA");
2164 pub trait Trimmable {
2165 pure fn trim() -> Self;
2166 pure fn trim_left() -> Self;
2167 pure fn trim_right() -> Self;
2170 /// Extension methods for strings
2171 impl Trimmable for ~str {
2172 /// Returns a string with leading and trailing whitespace removed
2174 pure fn trim() -> ~str { trim(self) }
2175 /// Returns a string with leading whitespace removed
2177 pure fn trim_left() -> ~str { trim_left(self) }
2178 /// Returns a string with trailing whitespace removed
2180 pure fn trim_right() -> ~str { trim_right(self) }
2188 impl Add<&str,~str> for ~str {
2190 pure fn add(&self, rhs: & &self/str) -> ~str {
2191 append(copy *self, (*rhs))
2199 pub trait StrSlice {
2200 pure fn all(it: fn(char) -> bool) -> bool;
2201 pure fn any(it: fn(char) -> bool) -> bool;
2202 pure fn contains(needle: &a/str) -> bool;
2203 pure fn contains_char(needle: char) -> bool;
2204 pure fn each(it: fn(u8) -> bool);
2205 pure fn eachi(it: fn(uint, u8) -> bool);
2206 pure fn each_char(it: fn(char) -> bool);
2207 pure fn each_chari(it: fn(uint, char) -> bool);
2208 pure fn ends_with(needle: &str) -> bool;
2209 pure fn is_empty() -> bool;
2210 pure fn is_whitespace() -> bool;
2211 pure fn is_alphanumeric() -> bool;
2212 pure fn len() -> uint;
2213 pure fn slice(begin: uint, end: uint) -> ~str;
2214 pure fn split(sepfn: fn(char) -> bool) -> ~[~str];
2215 pure fn split_char(sep: char) -> ~[~str];
2216 pure fn split_str(sep: &a/str) -> ~[~str];
2217 pure fn starts_with(needle: &a/str) -> bool;
2218 pure fn substr(begin: uint, n: uint) -> ~str;
2219 pure fn to_lower() -> ~str;
2220 pure fn to_upper() -> ~str;
2221 pure fn escape_default() -> ~str;
2222 pure fn escape_unicode() -> ~str;
2223 pure fn trim() -> ~str;
2224 pure fn trim_left() -> ~str;
2225 pure fn trim_right() -> ~str;
2226 pure fn to_owned() -> ~str;
2227 pure fn to_managed() -> @str;
2228 pure fn char_at(i: uint) -> char;
2231 /// Extension methods for strings
2232 impl StrSlice for &str {
2234 * Return true if a predicate matches all characters or if the string
2235 * contains no characters
2238 pure fn all(it: fn(char) -> bool) -> bool { all(self, it) }
2240 * Return true if a predicate matches any character (and false if it
2241 * matches none or there are no characters)
2244 pure fn any(it: fn(char) -> bool) -> bool { any(self, it) }
2245 /// Returns true if one string contains another
2247 pure fn contains(needle: &a/str) -> bool { contains(self, needle) }
2248 /// Returns true if a string contains a char
2250 pure fn contains_char(needle: char) -> bool {
2251 contains_char(self, needle)
2253 /// Iterate over the bytes in a string
2255 pure fn each(it: fn(u8) -> bool) { each(self, it) }
2256 /// Iterate over the bytes in a string, with indices
2258 pure fn eachi(it: fn(uint, u8) -> bool) { eachi(self, it) }
2259 /// Iterate over the chars in a string
2261 pure fn each_char(it: fn(char) -> bool) { each_char(self, it) }
2262 /// Iterate over the chars in a string, with indices
2264 pure fn each_chari(it: fn(uint, char) -> bool) { each_chari(self, it) }
2265 /// Returns true if one string ends with another
2267 pure fn ends_with(needle: &str) -> bool { ends_with(self, needle) }
2268 /// Returns true if the string has length 0
2270 pure fn is_empty() -> bool { is_empty(self) }
2272 * Returns true if the string contains only whitespace
2274 * Whitespace characters are determined by `char::is_whitespace`
2277 pure fn is_whitespace() -> bool { is_whitespace(self) }
2279 * Returns true if the string contains only alphanumerics
2281 * Alphanumeric characters are determined by `char::is_alphanumeric`
2284 pure fn is_alphanumeric() -> bool { is_alphanumeric(self) }
2286 /// Returns the size in bytes not counting the null terminator
2287 pure fn len() -> uint { len(self) }
2289 * Returns a slice of the given string from the byte range
2292 * Fails when `begin` and `end` do not point to valid characters or
2293 * beyond the last character of the string
2296 pure fn slice(begin: uint, end: uint) -> ~str { slice(self, begin, end) }
2297 /// Splits a string into substrings using a character function
2299 pure fn split(sepfn: fn(char) -> bool) -> ~[~str] { split(self, sepfn) }
2301 * Splits a string into substrings at each occurrence of a given character
2304 pure fn split_char(sep: char) -> ~[~str] { split_char(self, sep) }
2306 * Splits a string into a vector of the substrings separated by a given
2310 pure fn split_str(sep: &a/str) -> ~[~str] { split_str(self, sep) }
2311 /// Returns true if one string starts with another
2313 pure fn starts_with(needle: &a/str) -> bool { starts_with(self, needle) }
2315 * Take a substring of another.
2317 * Returns a string containing `n` characters starting at byte offset
2321 pure fn substr(begin: uint, n: uint) -> ~str { substr(self, begin, n) }
2322 /// Convert a string to lowercase
2324 pure fn to_lower() -> ~str { to_lower(self) }
2325 /// Convert a string to uppercase
2327 pure fn to_upper() -> ~str { to_upper(self) }
2328 /// Escape each char in `s` with char::escape_default.
2330 pure fn escape_default() -> ~str { escape_default(self) }
2331 /// Escape each char in `s` with char::escape_unicode.
2333 pure fn escape_unicode() -> ~str { escape_unicode(self) }
2335 /// Returns a string with leading and trailing whitespace removed
2337 pure fn trim() -> ~str { trim(self) }
2338 /// Returns a string with leading whitespace removed
2340 pure fn trim_left() -> ~str { trim_left(self) }
2341 /// Returns a string with trailing whitespace removed
2343 pure fn trim_right() -> ~str { trim_right(self) }
2346 pure fn to_owned() -> ~str { self.slice(0, self.len()) }
2349 pure fn to_managed() -> @str {
2350 let v = at_vec::from_fn(self.len() + 1, |i| {
2351 if i == self.len() { 0 } else { self[i] }
2353 unsafe { ::cast::transmute(v) }
2357 pure fn char_at(i: uint) -> char { char_at(self, i) }
2360 pub trait OwnedStr {
2361 fn push_str(&mut self, v: &str);
2362 fn push_char(&mut self, c: char);
2365 pub impl OwnedStr for ~str {
2366 fn push_str(&mut self, v: &str) {
2370 fn push_char(&mut self, c: char) {
2387 assert (eq(&~"", &~""));
2388 assert (eq(&~"foo", &~"foo"));
2389 assert (!eq(&~"foo", &~"bar"));
2393 fn test_eq_slice() {
2394 assert (eq_slice(view("foobar", 0, 3), "foo"));
2395 assert (eq_slice(view("barfoo", 3, 6), "foo"));
2396 assert (!eq_slice("foo1", "foo2"));
2401 assert (le(&"", &""));
2402 assert (le(&"", &"foo"));
2403 assert (le(&"foo", &"foo"));
2404 assert (!eq(&~"foo", &~"bar"));
2409 assert (len(~"") == 0u);
2410 assert (len(~"hello world") == 11u);
2411 assert (len(~"\x63") == 1u);
2412 assert (len(~"\xa2") == 2u);
2413 assert (len(~"\u03c0") == 2u);
2414 assert (len(~"\u2620") == 3u);
2415 assert (len(~"\U0001d11e") == 4u);
2417 assert (char_len(~"") == 0u);
2418 assert (char_len(~"hello world") == 11u);
2419 assert (char_len(~"\x63") == 1u);
2420 assert (char_len(~"\xa2") == 1u);
2421 assert (char_len(~"\u03c0") == 1u);
2422 assert (char_len(~"\u2620") == 1u);
2423 assert (char_len(~"\U0001d11e") == 1u);
2424 assert (char_len(~"ประเทศไทย中华Việt Nam") == 19u);
2428 fn test_rfind_char() {
2429 assert rfind_char(~"hello", 'l') == Some(3u);
2430 assert rfind_char(~"hello", 'o') == Some(4u);
2431 assert rfind_char(~"hello", 'h') == Some(0u);
2432 assert rfind_char(~"hello", 'z').is_none();
2433 assert rfind_char(~"ประเทศไทย中华Việt Nam", '华') == Some(30u);
2437 fn test_pop_char() {
2438 let mut data = ~"ประเทศไทย中华";
2439 let cc = pop_char(&mut data);
2440 assert ~"ประเทศไทย中" == data;
2445 fn test_pop_char_2() {
2446 let mut data2 = ~"华";
2447 let cc2 = pop_char(&mut data2);
2448 assert ~"" == data2;
2454 #[ignore(cfg(windows))]
2455 fn test_pop_char_fail() {
2457 let _cc3 = pop_char(&mut data);
2461 fn test_split_char() {
2462 fn t(s: &str, c: char, u: &[~str]) {
2463 log(debug, ~"split_byte: " + s);
2464 let v = split_char(s, c);
2465 debug!("split_byte to: %?", v);
2466 assert vec::all2(v, u, |a,b| a == b);
2468 t(~"abc.hello.there", '.', ~[~"abc", ~"hello", ~"there"]);
2469 t(~".hello.there", '.', ~[~"", ~"hello", ~"there"]);
2470 t(~"...hello.there.", '.', ~[~"", ~"", ~"", ~"hello", ~"there", ~""]);
2472 assert ~[~"", ~"", ~"", ~"hello", ~"there", ~""]
2473 == split_char(~"...hello.there.", '.');
2475 assert ~[~""] == split_char(~"", 'z');
2476 assert ~[~"",~""] == split_char(~"z", 'z');
2477 assert ~[~"ok"] == split_char(~"ok", 'z');
2481 fn test_split_char_2() {
2482 let data = ~"ประเทศไทย中华Việt Nam";
2483 assert ~[~"ประเทศไทย中华", ~"iệt Nam"]
2484 == split_char(data, 'V');
2485 assert ~[~"ประเ", ~"ศไ", ~"ย中华Việt Nam"]
2486 == split_char(data, 'ท');
2490 fn test_splitn_char() {
2491 fn t(s: &str, c: char, n: uint, u: &[~str]) {
2492 log(debug, ~"splitn_byte: " + s);
2493 let v = splitn_char(s, c, n);
2494 debug!("split_byte to: %?", v);
2495 debug!("comparing vs. %?", u);
2496 assert vec::all2(v, u, |a,b| a == b);
2498 t(~"abc.hello.there", '.', 0u, ~[~"abc.hello.there"]);
2499 t(~"abc.hello.there", '.', 1u, ~[~"abc", ~"hello.there"]);
2500 t(~"abc.hello.there", '.', 2u, ~[~"abc", ~"hello", ~"there"]);
2501 t(~"abc.hello.there", '.', 3u, ~[~"abc", ~"hello", ~"there"]);
2502 t(~".hello.there", '.', 0u, ~[~".hello.there"]);
2503 t(~".hello.there", '.', 1u, ~[~"", ~"hello.there"]);
2504 t(~"...hello.there.", '.', 3u, ~[~"", ~"", ~"", ~"hello.there."]);
2505 t(~"...hello.there.", '.', 5u,
2506 ~[~"", ~"", ~"", ~"hello", ~"there", ~""]);
2508 assert ~[~""] == splitn_char(~"", 'z', 5u);
2509 assert ~[~"",~""] == splitn_char(~"z", 'z', 5u);
2510 assert ~[~"ok"] == splitn_char(~"ok", 'z', 5u);
2511 assert ~[~"z"] == splitn_char(~"z", 'z', 0u);
2512 assert ~[~"w.x.y"] == splitn_char(~"w.x.y", '.', 0u);
2513 assert ~[~"w",~"x.y"] == splitn_char(~"w.x.y", '.', 1u);
2517 fn test_splitn_char_2 () {
2518 let data = ~"ประเทศไทย中华Việt Nam";
2519 assert ~[~"ประเทศไทย中", ~"Việt Nam"]
2520 == splitn_char(data, '华', 1u);
2522 assert ~[~"", ~"", ~"XXX", ~"YYYzWWWz"]
2523 == splitn_char(~"zzXXXzYYYzWWWz", 'z', 3u);
2524 assert ~[~"",~""] == splitn_char(~"z", 'z', 5u);
2525 assert ~[~""] == splitn_char(~"", 'z', 5u);
2526 assert ~[~"ok"] == splitn_char(~"ok", 'z', 5u);
2531 fn test_splitn_char_3() {
2532 let data = ~"ประเทศไทย中华Việt Nam";
2533 assert ~[~"ประเทศไทย中华", ~"iệt Nam"]
2534 == splitn_char(data, 'V', 1u);
2535 assert ~[~"ประเ", ~"ศไทย中华Việt Nam"]
2536 == splitn_char(data, 'ท', 1u);
2541 fn test_split_str() {
2542 fn t(s: &str, sep: &a/str, i: int, k: &str) {
2543 fn borrow(x: &a/str) -> &a/str { x }
2544 let v = split_str(s, sep);
2545 assert borrow(v[i]) == k;
2548 t(~"--1233345--", ~"12345", 0, ~"--1233345--");
2549 t(~"abc::hello::there", ~"::", 0, ~"abc");
2550 t(~"abc::hello::there", ~"::", 1, ~"hello");
2551 t(~"abc::hello::there", ~"::", 2, ~"there");
2552 t(~"::hello::there", ~"::", 0, ~"");
2553 t(~"hello::there::", ~"::", 2, ~"");
2554 t(~"::hello::there::", ~"::", 3, ~"");
2556 let data = ~"ประเทศไทย中华Việt Nam";
2557 assert ~[~"ประเทศไทย", ~"Việt Nam"]
2558 == split_str (data, ~"中华");
2560 assert ~[~"", ~"XXX", ~"YYY", ~""]
2561 == split_str(~"zzXXXzzYYYzz", ~"zz");
2563 assert ~[~"zz", ~"zYYYz"]
2564 == split_str(~"zzXXXzYYYz", ~"XXX");
2567 assert ~[~"", ~"XXX", ~"YYY", ~""] == split_str(~".XXX.YYY.", ~".");
2568 assert ~[~""] == split_str(~"", ~".");
2569 assert ~[~"",~""] == split_str(~"zz", ~"zz");
2570 assert ~[~"ok"] == split_str(~"ok", ~"z");
2571 assert ~[~"",~"z"] == split_str(~"zzz", ~"zz");
2572 assert ~[~"",~"",~"z"] == split_str(~"zzzzz", ~"zz");
2578 let data = ~"ประเทศไทย中华Việt Nam";
2579 assert ~[~"ประเทศไทย中", ~"Việt Nam"]
2580 == split (data, |cc| cc == '华');
2582 assert ~[~"", ~"", ~"XXX", ~"YYY", ~""]
2583 == split(~"zzXXXzYYYz", char::is_lowercase);
2585 assert ~[~"zz", ~"", ~"", ~"z", ~"", ~"", ~"z"]
2586 == split(~"zzXXXzYYYz", char::is_uppercase);
2588 assert ~[~"",~""] == split(~"z", |cc| cc == 'z');
2589 assert ~[~""] == split(~"", |cc| cc == 'z');
2590 assert ~[~"ok"] == split(~"ok", |cc| cc == 'z');
2595 let lf = ~"\nMary had a little lamb\nLittle lamb\n";
2596 let crlf = ~"\r\nMary had a little lamb\r\nLittle lamb\r\n";
2598 assert ~[~"", ~"Mary had a little lamb", ~"Little lamb", ~""]
2601 assert ~[~"", ~"Mary had a little lamb", ~"Little lamb", ~""]
2604 assert ~[~"\r", ~"Mary had a little lamb\r", ~"Little lamb\r", ~""]
2607 assert ~[~"", ~"Mary had a little lamb", ~"Little lamb", ~""]
2610 assert ~[~""] == lines (~"");
2611 assert ~[~""] == lines_any(~"");
2612 assert ~[~"",~""] == lines (~"\n");
2613 assert ~[~"",~""] == lines_any(~"\n");
2614 assert ~[~"banana"] == lines (~"banana");
2615 assert ~[~"banana"] == lines_any(~"banana");
2620 let data = ~"\nMary had a little lamb\nLittle lamb\n";
2621 assert ~[~"Mary",~"had",~"a",~"little",~"lamb",~"Little",~"lamb"]
2624 assert ~[~"ok"] == words(~"ok");
2625 assert ~[] == words(~"");
2629 fn test_split_within() {
2630 assert split_within(~"", 0) == ~[];
2631 assert split_within(~"", 15) == ~[];
2632 assert split_within(~"hello", 15) == ~[~"hello"];
2634 let data = ~"\nMary had a little lamb\nLittle lamb\n";
2635 error!("~~~~ %?", split_within(data, 15));
2636 assert split_within(data, 15) == ~[~"Mary had a",
2642 fn test_find_str() {
2644 assert find_str(~"banana", ~"apple pie").is_none();
2645 assert find_str(~"", ~"") == Some(0u);
2647 let data = ~"ประเทศไทย中华Việt Nam";
2648 assert find_str(data, ~"") == Some(0u);
2649 assert find_str(data, ~"ประเ") == Some( 0u);
2650 assert find_str(data, ~"ะเ") == Some( 6u);
2651 assert find_str(data, ~"中华") == Some(27u);
2652 assert find_str(data, ~"ไท华").is_none();
2656 fn test_find_str_between() {
2658 assert find_str_between(~"", ~"", 0u, 0u) == Some(0u);
2660 let data = ~"abcabc";
2661 assert find_str_between(data, ~"ab", 0u, 6u) == Some(0u);
2662 assert find_str_between(data, ~"ab", 2u, 6u) == Some(3u);
2663 assert find_str_between(data, ~"ab", 2u, 4u).is_none();
2665 let mut data = ~"ประเทศไทย中华Việt Nam";
2667 assert find_str_between(data, ~"", 0u, 43u) == Some(0u);
2668 assert find_str_between(data, ~"", 6u, 43u) == Some(6u);
2670 assert find_str_between(data, ~"ประ", 0u, 43u) == Some( 0u);
2671 assert find_str_between(data, ~"ทศไ", 0u, 43u) == Some(12u);
2672 assert find_str_between(data, ~"ย中", 0u, 43u) == Some(24u);
2673 assert find_str_between(data, ~"iệt", 0u, 43u) == Some(34u);
2674 assert find_str_between(data, ~"Nam", 0u, 43u) == Some(40u);
2676 assert find_str_between(data, ~"ประ", 43u, 86u) == Some(43u);
2677 assert find_str_between(data, ~"ทศไ", 43u, 86u) == Some(55u);
2678 assert find_str_between(data, ~"ย中", 43u, 86u) == Some(67u);
2679 assert find_str_between(data, ~"iệt", 43u, 86u) == Some(77u);
2680 assert find_str_between(data, ~"Nam", 43u, 86u) == Some(83u);
2685 fn t(a: &str, b: &str, start: int) {
2686 assert substr(a, start as uint, len(b)) == b.to_str();
2688 t(~"hello", ~"llo", 2);
2689 t(~"hello", ~"el", 1);
2690 assert ~"ะเทศไท" == substr(~"ประเทศไทย中华Việt Nam", 6u, 6u);
2695 fn t(v: &[~str], s: &str) {
2696 assert concat(v) == s.to_str();
2698 t(~[~"you", ~"know", ~"I'm", ~"no", ~"good"], ~"youknowI'mnogood");
2699 let v: ~[~str] = ~[];
2706 fn t(v: &[~str], sep: &str, s: &str) {
2707 assert connect(v, sep) == s.to_str();
2709 t(~[~"you", ~"know", ~"I'm", ~"no", ~"good"],
2710 ~" ", ~"you know I'm no good");
2711 let v: ~[~str] = ~[];
2713 t(~[~"hi"], ~" ", ~"hi");
2717 fn test_connect_slices() {
2718 fn t(v: &[&str], sep: &str, s: &str) {
2719 assert connect_slices(v, sep) == s.to_str();
2721 t(["you", "know", "I'm", "no", "good"],
2722 " ", "you know I'm no good");
2724 t(["hi"], " ", "hi");
2729 assert repeat(~"x", 4) == ~"xxxx";
2730 assert repeat(~"hi", 4) == ~"hihihihi";
2731 assert repeat(~"ไท华", 3) == ~"ไท华ไท华ไท华";
2732 assert repeat(~"", 4) == ~"";
2733 assert repeat(~"hi", 0) == ~"";
2737 fn test_to_upper() {
2738 // libc::toupper, and hence str::to_upper
2739 // are culturally insensitive: they only work for ASCII
2740 // (see Issue #1347)
2741 let unicode = ~""; //"\u65e5\u672c"; // uncomment once non-ASCII works
2742 let input = ~"abcDEF" + unicode + ~"xyz:.;";
2743 let expected = ~"ABCDEF" + unicode + ~"XYZ:.;";
2744 let actual = to_upper(input);
2745 assert expected == actual;
2749 fn test_to_lower() {
2751 assert ~"" == map(~"", |c| libc::tolower(c as c_char) as char);
2752 assert ~"ymca" == map(~"YMCA",
2753 |c| libc::tolower(c as c_char) as char);
2758 fn test_unsafe_slice() {
2760 assert ~"ab" == raw::slice_bytes(~"abc", 0, 2);
2761 assert ~"bc" == raw::slice_bytes(~"abc", 1, 3);
2762 assert ~"" == raw::slice_bytes(~"abc", 1, 1);
2763 fn a_million_letter_a() -> ~str {
2766 while i < 100000 { push_str(&mut rs, ~"aaaaaaaaaa"); i += 1; }
2769 fn half_a_million_letter_a() -> ~str {
2772 while i < 100000 { push_str(&mut rs, ~"aaaaa"); i += 1; }
2775 assert half_a_million_letter_a() ==
2776 raw::slice_bytes(a_million_letter_a(), 0u, 500000);
2781 fn test_starts_with() {
2782 assert (starts_with(~"", ~""));
2783 assert (starts_with(~"abc", ~""));
2784 assert (starts_with(~"abc", ~"a"));
2785 assert (!starts_with(~"a", ~"abc"));
2786 assert (!starts_with(~"", ~"abc"));
2790 fn test_ends_with() {
2791 assert (ends_with(~"", ~""));
2792 assert (ends_with(~"abc", ~""));
2793 assert (ends_with(~"abc", ~"c"));
2794 assert (!ends_with(~"a", ~"abc"));
2795 assert (!ends_with(~"", ~"abc"));
2799 fn test_is_empty() {
2800 assert (is_empty(~""));
2801 assert (!is_empty(~"a"));
2807 assert replace(~"", a, ~"b") == ~"";
2808 assert replace(~"a", a, ~"b") == ~"b";
2809 assert replace(~"ab", a, ~"b") == ~"bb";
2811 assert replace(~" test test ", test, ~"toast") == ~" toast toast ";
2812 assert replace(~" test test ", test, ~"") == ~" ";
2816 fn test_replace_2a() {
2817 let data = ~"ประเทศไทย中华";
2818 let repl = ~"دولة الكويت";
2821 let A = ~"دولة الكويتทศไทย中华";
2822 assert (replace(data, a, repl) == A);
2826 fn test_replace_2b() {
2827 let data = ~"ประเทศไทย中华";
2828 let repl = ~"دولة الكويت";
2831 let B = ~"ปรدولة الكويتทศไทย中华";
2832 assert (replace(data, b, repl) == B);
2836 fn test_replace_2c() {
2837 let data = ~"ประเทศไทย中华";
2838 let repl = ~"دولة الكويت";
2841 let C = ~"ประเทศไทยدولة الكويت";
2842 assert (replace(data, c, repl) == C);
2846 fn test_replace_2d() {
2847 let data = ~"ประเทศไทย中华";
2848 let repl = ~"دولة الكويت";
2851 assert (replace(data, d, repl) == data);
2856 assert ~"ab" == slice(~"abc", 0, 2);
2857 assert ~"bc" == slice(~"abc", 1, 3);
2858 assert ~"" == slice(~"abc", 1, 1);
2859 assert ~"\u65e5" == slice(~"\u65e5\u672c", 0, 3);
2861 let data = ~"ประเทศไทย中华";
2862 assert ~"ป" == slice(data, 0, 3);
2863 assert ~"ร" == slice(data, 3, 6);
2864 assert ~"" == slice(data, 3, 3);
2865 assert ~"华" == slice(data, 30, 33);
2867 fn a_million_letter_X() -> ~str {
2871 push_str(&mut rs, ~"华华华华华华华华华华");
2876 fn half_a_million_letter_X() -> ~str {
2879 while i < 100000 { push_str(&mut rs, ~"华华华华华"); i += 1; }
2882 assert half_a_million_letter_X() ==
2883 slice(a_million_letter_X(), 0u, 3u * 500000u);
2888 let ss = ~"中华Việt Nam";
2890 assert ~"华" == slice(ss, 3u, 6u);
2891 assert ~"Việt Nam" == slice(ss, 6u, 16u);
2893 assert ~"ab" == slice(~"abc", 0u, 2u);
2894 assert ~"bc" == slice(~"abc", 1u, 3u);
2895 assert ~"" == slice(~"abc", 1u, 1u);
2897 assert ~"中" == slice(ss, 0u, 3u);
2898 assert ~"华V" == slice(ss, 3u, 7u);
2899 assert ~"" == slice(ss, 3u, 3u);
2914 #[ignore(cfg(windows))]
2915 fn test_slice_fail() {
2916 slice(~"中华Việt Nam", 0u, 2u);
2920 fn test_trim_left_chars() {
2921 assert trim_left_chars(~" *** foo *** ", ~[]) == ~" *** foo *** ";
2922 assert trim_left_chars(~" *** foo *** ", ~['*', ' ']) == ~"foo *** ";
2923 assert trim_left_chars(~" *** *** ", ~['*', ' ']) == ~"";
2924 assert trim_left_chars(~"foo *** ", ~['*', ' ']) == ~"foo *** ";
2928 fn test_trim_right_chars() {
2929 assert trim_right_chars(~" *** foo *** ", ~[]) == ~" *** foo *** ";
2930 assert trim_right_chars(~" *** foo *** ", ~['*', ' ']) == ~" *** foo";
2931 assert trim_right_chars(~" *** *** ", ~['*', ' ']) == ~"";
2932 assert trim_right_chars(~" *** foo", ~['*', ' ']) == ~" *** foo";
2936 fn test_trim_chars() {
2937 assert trim_chars(~" *** foo *** ", ~[]) == ~" *** foo *** ";
2938 assert trim_chars(~" *** foo *** ", ~['*', ' ']) == ~"foo";
2939 assert trim_chars(~" *** *** ", ~['*', ' ']) == ~"";
2940 assert trim_chars(~"foo", ~['*', ' ']) == ~"foo";
2944 fn test_trim_left() {
2945 assert (trim_left(~"") == ~"");
2946 assert (trim_left(~"a") == ~"a");
2947 assert (trim_left(~" ") == ~"");
2948 assert (trim_left(~" blah") == ~"blah");
2949 assert (trim_left(~" \u3000 wut") == ~"wut");
2950 assert (trim_left(~"hey ") == ~"hey ");
2954 fn test_trim_right() {
2955 assert (trim_right(~"") == ~"");
2956 assert (trim_right(~"a") == ~"a");
2957 assert (trim_right(~" ") == ~"");
2958 assert (trim_right(~"blah ") == ~"blah");
2959 assert (trim_right(~"wut \u3000 ") == ~"wut");
2960 assert (trim_right(~" hey") == ~" hey");
2965 assert (trim(~"") == ~"");
2966 assert (trim(~"a") == ~"a");
2967 assert (trim(~" ") == ~"");
2968 assert (trim(~" blah ") == ~"blah");
2969 assert (trim(~"\nwut \u3000 ") == ~"wut");
2970 assert (trim(~" hey dude ") == ~"hey dude");
2974 fn test_is_whitespace() {
2975 assert (is_whitespace(~""));
2976 assert (is_whitespace(~" "));
2977 assert (is_whitespace(~"\u2009")); // Thin space
2978 assert (is_whitespace(~" \n\t "));
2979 assert (!is_whitespace(~" _ "));
2983 fn test_is_ascii() {
2984 assert (is_ascii(~""));
2985 assert (is_ascii(~"a"));
2986 assert (!is_ascii(~"\u2009"));
2990 fn test_shift_byte() {
2992 let b = unsafe { raw::shift_byte(&mut s) };
2993 assert (s == ~"BC");
2998 fn test_pop_byte() {
3000 let b = unsafe { raw::pop_byte(&mut s) };
3001 assert (s == ~"AB");
3006 fn test_unsafe_from_bytes() {
3007 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8];
3008 let b = unsafe { raw::from_bytes(a) };
3009 assert (b == ~"AAAAAAA");
3013 fn test_from_bytes() {
3014 let ss = ~"ศไทย中华Việt Nam";
3015 let bb = ~[0xe0_u8, 0xb8_u8, 0xa8_u8,
3016 0xe0_u8, 0xb9_u8, 0x84_u8,
3017 0xe0_u8, 0xb8_u8, 0x97_u8,
3018 0xe0_u8, 0xb8_u8, 0xa2_u8,
3019 0xe4_u8, 0xb8_u8, 0xad_u8,
3020 0xe5_u8, 0x8d_u8, 0x8e_u8,
3021 0x56_u8, 0x69_u8, 0xe1_u8,
3022 0xbb_u8, 0x87_u8, 0x74_u8,
3023 0x20_u8, 0x4e_u8, 0x61_u8,
3026 assert ss == from_bytes(bb);
3031 #[ignore(cfg(windows))]
3032 fn test_from_bytes_fail() {
3033 let bb = ~[0xff_u8, 0xb8_u8, 0xa8_u8,
3034 0xe0_u8, 0xb9_u8, 0x84_u8,
3035 0xe0_u8, 0xb8_u8, 0x97_u8,
3036 0xe0_u8, 0xb8_u8, 0xa2_u8,
3037 0xe4_u8, 0xb8_u8, 0xad_u8,
3038 0xe5_u8, 0x8d_u8, 0x8e_u8,
3039 0x56_u8, 0x69_u8, 0xe1_u8,
3040 0xbb_u8, 0x87_u8, 0x74_u8,
3041 0x20_u8, 0x4e_u8, 0x61_u8,
3044 let _x = from_bytes(bb);
3048 fn test_from_buf() {
3050 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
3051 let b = vec::raw::to_ptr(a);
3052 let c = raw::from_buf(b);
3053 assert (c == ~"AAAAAAA");
3058 #[ignore(cfg(windows))]
3060 fn test_as_bytes_fail() {
3061 // Don't double free
3062 as_bytes::<()>(&~"", |_bytes| fail!() );
3068 let b = as_buf(a, |buf, _l| {
3069 assert unsafe { *buf } == 65u8;
3076 fn test_as_buf_small() {
3078 let b = as_buf(a, |buf, _l| {
3079 assert unsafe { *buf } == 65u8;
3089 let sb = as_buf(s, |b, _l| b);
3090 let s_cstr = raw::from_buf(sb);
3096 fn test_as_buf_3() {
3098 do as_buf(a) |buf, len| {
3100 assert a[0] == 'h' as u8;
3101 assert *buf == 'h' as u8;
3103 assert *ptr::offset(buf,4u) == 'o' as u8;
3104 assert *ptr::offset(buf,5u) == 0u8;
3110 fn vec_str_conversions() {
3111 let s1: ~str = ~"All mimsy were the borogoves";
3113 let v: ~[u8] = to_bytes(s1);
3114 let s2: ~str = from_bytes(v);
3115 let mut i: uint = 0u;
3116 let n1: uint = len(s1);
3117 let n2: uint = vec::len::<u8>(v);
3130 fn test_contains() {
3131 assert contains(~"abcde", ~"bcd");
3132 assert contains(~"abcde", ~"abcd");
3133 assert contains(~"abcde", ~"bcde");
3134 assert contains(~"abcde", ~"");
3135 assert contains(~"", ~"");
3136 assert !contains(~"abcde", ~"def");
3137 assert !contains(~"", ~"a");
3139 let data = ~"ประเทศไทย中华Việt Nam";
3140 assert contains(data, ~"ประเ");
3141 assert contains(data, ~"ะเ");
3142 assert contains(data, ~"中华");
3143 assert !contains(data, ~"ไท华");
3147 fn test_contains_char() {
3148 assert contains_char(~"abc", 'b');
3149 assert contains_char(~"a", 'a');
3150 assert !contains_char(~"abc", 'd');
3151 assert !contains_char(~"", 'a');
3155 fn test_chars_each() {
3157 for chars_each(~"x\u03c0y") |ch| {
3159 0 => assert ch == 'x',
3160 1 => assert ch == '\u03c0',
3161 2 => assert ch == 'y',
3162 _ => fail!(~"test_chars_each failed")
3167 chars_each(~"", |_ch| fail!() ); // should not fail
3171 fn test_bytes_each() {
3174 for bytes_each(~"xyz") |bb| {
3176 0 => assert bb == 'x' as u8,
3177 1 => assert bb == 'y' as u8,
3178 2 => assert bb == 'z' as u8,
3179 _ => fail!(~"test_bytes_each failed")
3184 for bytes_each(~"") |bb| {
3190 fn test_split_char_each() {
3191 let data = ~"\nMary had a little lamb\nLittle lamb\n";
3195 for split_char_each(data, ' ') |xx| {
3197 0 => assert "\nMary" == xx,
3198 1 => assert "had" == xx,
3199 2 => assert "a" == xx,
3200 3 => assert "little" == xx,
3208 fn test_splitn_char_each() {
3209 let data = ~"\nMary had a little lamb\nLittle lamb\n";
3213 for splitn_char_each(data, ' ', 2u) |xx| {
3215 0 => assert "\nMary" == xx,
3216 1 => assert "had" == xx,
3217 2 => assert "a little lamb\nLittle lamb\n" == xx,
3225 fn test_words_each() {
3226 let data = ~"\nMary had a little lamb\nLittle lamb\n";
3230 for words_each(data) |ww| {
3232 0 => assert "Mary" == ww,
3233 1 => assert "had" == ww,
3234 2 => assert "a" == ww,
3235 3 => assert "little" == ww,
3241 words_each(~"", |_x| fail!()); // should not fail
3245 fn test_lines_each () {
3246 let lf = ~"\nMary had a little lamb\nLittle lamb\n";
3250 for lines_each(lf) |x| {
3252 0 => assert "" == x,
3253 1 => assert "Mary had a little lamb" == x,
3254 2 => assert "Little lamb" == x,
3255 3 => assert "" == x,
3265 assert ~"" == map(~"", |c| libc::toupper(c as c_char) as char);
3266 assert ~"YMCA" == map(~"ymca",
3267 |c| libc::toupper(c as c_char) as char);
3273 assert true == all(~"", char::is_uppercase);
3274 assert false == all(~"ymca", char::is_uppercase);
3275 assert true == all(~"YMCA", char::is_uppercase);
3276 assert false == all(~"yMCA", char::is_uppercase);
3277 assert false == all(~"YMCy", char::is_uppercase);
3282 assert false == any(~"", char::is_uppercase);
3283 assert false == any(~"ymca", char::is_uppercase);
3284 assert true == any(~"YMCA", char::is_uppercase);
3285 assert true == any(~"yMCA", char::is_uppercase);
3286 assert true == any(~"Ymcy", char::is_uppercase);
3291 let ss = ~"ศไทย中华Việt Nam";
3292 assert ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m']
3300 ~[0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
3301 0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
3302 0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
3303 0xd800_u16, 0xdf30_u16, 0x000a_u16]),
3306 ~[0xd801_u16, 0xdc12_u16, 0xd801_u16,
3307 0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
3308 0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
3309 0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
3310 0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
3313 (~"𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n",
3314 ~[0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
3315 0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
3316 0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
3317 0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
3318 0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
3319 0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
3320 0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
3322 (~"𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n",
3323 ~[0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
3324 0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
3325 0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
3326 0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
3327 0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
3328 0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
3329 0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
3330 0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
3331 0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
3332 0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
3335 for vec::each(pairs) |p| {
3336 let (s, u) = copy *p;
3337 assert to_utf16(s) == u;
3338 assert from_utf16(u) == s;
3339 assert from_utf16(to_utf16(s)) == s;
3340 assert to_utf16(from_utf16(u)) == u;
3345 fn test_each_char() {
3347 let mut found_b = false;
3348 for each_char(s) |ch| {
3358 fn test_escape_unicode() {
3359 assert escape_unicode(~"abc") == ~"\\x61\\x62\\x63";
3360 assert escape_unicode(~"a c") == ~"\\x61\\x20\\x63";
3361 assert escape_unicode(~"\r\n\t") == ~"\\x0d\\x0a\\x09";
3362 assert escape_unicode(~"'\"\\") == ~"\\x27\\x22\\x5c";
3363 assert escape_unicode(~"\x00\x01\xfe\xff") == ~"\\x00\\x01\\xfe\\xff";
3364 assert escape_unicode(~"\u0100\uffff") == ~"\\u0100\\uffff";
3365 assert escape_unicode(~"\U00010000\U0010ffff") ==
3366 ~"\\U00010000\\U0010ffff";
3367 assert escape_unicode(~"ab\ufb00") == ~"\\x61\\x62\\ufb00";
3368 assert escape_unicode(~"\U0001d4ea\r") == ~"\\U0001d4ea\\x0d";
3372 fn test_escape_default() {
3373 assert escape_default(~"abc") == ~"abc";
3374 assert escape_default(~"a c") == ~"a c";
3375 assert escape_default(~"\r\n\t") == ~"\\r\\n\\t";
3376 assert escape_default(~"'\"\\") == ~"\\'\\\"\\\\";
3377 assert escape_default(~"\u0100\uffff") == ~"\\u0100\\uffff";
3378 assert escape_default(~"\U00010000\U0010ffff") ==
3379 ~"\\U00010000\\U0010ffff";
3380 assert escape_default(~"ab\ufb00") == ~"ab\\ufb00";
3381 assert escape_default(~"\U0001d4ea\r") == ~"\\U0001d4ea\\r";
3385 fn test_to_managed() {
3386 assert (~"abc").to_managed() == @"abc";
3387 assert view("abcdef", 1, 5).to_managed() == @"bcde";