1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 C-string manipulation and management
15 This modules provides the basic methods for creating and manipulating
16 null-terminated strings for use with FFI calls (back to C). Most C APIs require
17 that the string being passed to them is null-terminated, and by default rust's
18 string types are *not* null terminated.
20 The other problem with translating Rust strings to C strings is that Rust
21 strings can validly contain a null-byte in the middle of the string (0 is a
22 valid unicode codepoint). This means that not all Rust strings can actually be
23 translated to C strings.
25 # Creation of a C string
27 A C string is managed through the `CString` type defined in this module. It
28 "owns" the internal buffer of characters and will automatically deallocate the
29 buffer when the string is dropped. The `ToCStr` trait is implemented for `&str`
30 and `&[u8]`, but the conversions can fail due to some of the limitations
33 This also means that currently whenever a C string is created, an allocation
34 must be performed to place the data elsewhere (the lifetime of the C string is
35 not tied to the lifetime of the original string/data buffer). If C strings are
36 heavily used in applications, then caching may be advisable to prevent
37 unnecessary amounts of allocations.
39 An example of creating and using a C string would be:
45 fn puts(s: *libc::c_char);
49 let my_string = "Hello, world!";
51 // Allocate the C string with an explicit local that owns the string. The
52 // `c_buffer` pointer will be deallocated when `my_c_string` goes out of scope.
53 let my_c_string = my_string.to_c_str();
54 my_c_string.with_ref(|c_buffer| {
55 unsafe { puts(c_buffer); }
58 // Don't save off the allocation of the C string, the `c_buffer` will be
59 // deallocated when this block returns!
60 my_string.with_c_str(|c_buffer| {
61 unsafe { puts(c_buffer); }
69 use container::Container;
70 use iter::{Iterator, range};
77 use option::{Option, Some, None};
82 use slice::{ImmutableVector, MutableVector};
84 use rt::global_heap::malloc_raw;
87 /// The representation of a C String.
89 /// This structure wraps a `*libc::c_char`, and will automatically free the
90 /// memory it is pointing to when it goes out of scope.
96 impl Clone for CString {
97 /// Clone this CString into a new, uniquely owned CString. For safety
98 /// reasons, this is always a deep clone, rather than the usual shallow
100 fn clone(&self) -> CString {
101 if self.buf.is_null() {
102 CString { buf: self.buf, owns_buffer_: self.owns_buffer_ }
104 let len = self.len() + 1;
105 let buf = unsafe { malloc_raw(len) } as *mut libc::c_char;
106 unsafe { ptr::copy_nonoverlapping_memory(buf, self.buf, len); }
107 CString { buf: buf as *libc::c_char, owns_buffer_: true }
112 impl Eq for CString {
113 fn eq(&self, other: &CString) -> bool {
114 if self.buf as uint == other.buf as uint {
116 } else if self.buf.is_null() || other.buf.is_null() {
120 libc::strcmp(self.buf, other.buf) == 0
127 /// Create a C String from a pointer.
128 pub unsafe fn new(buf: *libc::c_char, owns_buffer: bool) -> CString {
129 CString { buf: buf, owns_buffer_: owns_buffer }
132 /// Unwraps the wrapped `*libc::c_char` from the `CString` wrapper.
133 /// Any ownership of the buffer by the `CString` wrapper is forgotten.
134 pub unsafe fn unwrap(self) -> *libc::c_char {
135 let mut c_str = self;
136 c_str.owns_buffer_ = false;
140 /// Calls a closure with a reference to the underlying `*libc::c_char`.
144 /// Fails if the CString is null.
145 pub fn with_ref<T>(&self, f: |*libc::c_char| -> T) -> T {
146 if self.buf.is_null() { fail!("CString is null!"); }
150 /// Calls a closure with a mutable reference to the underlying `*libc::c_char`.
154 /// Fails if the CString is null.
155 pub fn with_mut_ref<T>(&mut self, f: |*mut libc::c_char| -> T) -> T {
156 if self.buf.is_null() { fail!("CString is null!"); }
157 f(unsafe { cast::transmute_mut_unsafe(self.buf) })
160 /// Returns true if the CString is a null.
161 pub fn is_null(&self) -> bool {
165 /// Returns true if the CString is not null.
166 pub fn is_not_null(&self) -> bool {
167 self.buf.is_not_null()
170 /// Returns whether or not the `CString` owns the buffer.
171 pub fn owns_buffer(&self) -> bool {
175 /// Converts the CString into a `&[u8]` without copying.
176 /// Includes the terminating NUL byte.
180 /// Fails if the CString is null.
182 pub fn as_bytes<'a>(&'a self) -> &'a [u8] {
183 if self.buf.is_null() { fail!("CString is null!"); }
185 cast::transmute(Slice { data: self.buf, len: self.len() + 1 })
189 /// Converts the CString into a `&[u8]` without copying.
190 /// Does not include the terminating NUL byte.
194 /// Fails if the CString is null.
196 pub fn as_bytes_no_nul<'a>(&'a self) -> &'a [u8] {
197 if self.buf.is_null() { fail!("CString is null!"); }
199 cast::transmute(Slice { data: self.buf, len: self.len() })
203 /// Converts the CString into a `&str` without copying.
204 /// Returns None if the CString is not UTF-8.
208 /// Fails if the CString is null.
210 pub fn as_str<'a>(&'a self) -> Option<&'a str> {
211 let buf = self.as_bytes_no_nul();
215 /// Return a CString iterator.
219 /// Fails if the CString is null.
220 pub fn iter<'a>(&'a self) -> CChars<'a> {
221 if self.buf.is_null() { fail!("CString is null!"); }
224 marker: marker::ContravariantLifetime,
229 impl Drop for CString {
231 if self.owns_buffer_ {
233 libc::free(self.buf as *mut libc::c_void)
239 impl Container for CString {
240 /// Return the number of bytes in the CString (not including the NUL terminator).
244 /// Fails if the CString is null.
246 fn len(&self) -> uint {
247 if self.buf.is_null() { fail!("CString is null!"); }
249 ptr::position(self.buf, |c| *c == 0)
254 /// A generic trait for converting a value to a CString.
256 /// Copy the receiver into a CString.
260 /// Fails the task if the receiver has an interior null.
261 fn to_c_str(&self) -> CString;
263 /// Unsafe variant of `to_c_str()` that doesn't check for nulls.
264 unsafe fn to_c_str_unchecked(&self) -> CString;
266 /// Work with a temporary CString constructed from the receiver.
267 /// The provided `*libc::c_char` will be freed immediately upon return.
272 /// extern crate libc;
275 /// let s = "PATH".with_c_str(|path| unsafe {
276 /// libc::getenv(path)
283 /// Fails the task if the receiver has an interior null.
285 fn with_c_str<T>(&self, f: |*libc::c_char| -> T) -> T {
286 self.to_c_str().with_ref(f)
289 /// Unsafe variant of `with_c_str()` that doesn't check for nulls.
291 unsafe fn with_c_str_unchecked<T>(&self, f: |*libc::c_char| -> T) -> T {
292 self.to_c_str_unchecked().with_ref(f)
296 impl<'a> ToCStr for &'a str {
298 fn to_c_str(&self) -> CString {
299 self.as_bytes().to_c_str()
303 unsafe fn to_c_str_unchecked(&self) -> CString {
304 self.as_bytes().to_c_str_unchecked()
308 fn with_c_str<T>(&self, f: |*libc::c_char| -> T) -> T {
309 self.as_bytes().with_c_str(f)
313 unsafe fn with_c_str_unchecked<T>(&self, f: |*libc::c_char| -> T) -> T {
314 self.as_bytes().with_c_str_unchecked(f)
318 // The length of the stack allocated buffer for `vec.with_c_str()`
319 static BUF_LEN: uint = 128;
321 impl<'a> ToCStr for &'a [u8] {
322 fn to_c_str(&self) -> CString {
323 let mut cs = unsafe { self.to_c_str_unchecked() };
324 cs.with_mut_ref(|buf| check_for_null(*self, buf));
328 unsafe fn to_c_str_unchecked(&self) -> CString {
329 let self_len = self.len();
330 let buf = malloc_raw(self_len + 1);
332 ptr::copy_memory(buf, self.as_ptr(), self_len);
333 *buf.offset(self_len as int) = 0;
335 CString::new(buf as *libc::c_char, true)
338 fn with_c_str<T>(&self, f: |*libc::c_char| -> T) -> T {
339 unsafe { with_c_str(*self, true, f) }
342 unsafe fn with_c_str_unchecked<T>(&self, f: |*libc::c_char| -> T) -> T {
343 with_c_str(*self, false, f)
347 // Unsafe function that handles possibly copying the &[u8] into a stack array.
348 unsafe fn with_c_str<T>(v: &[u8], checked: bool, f: |*libc::c_char| -> T) -> T {
349 if v.len() < BUF_LEN {
350 let mut buf: [u8, .. BUF_LEN] = mem::uninit();
351 slice::bytes::copy_memory(buf, v);
354 let buf = buf.as_mut_ptr();
356 check_for_null(v, buf as *mut libc::c_char);
359 f(buf as *libc::c_char)
361 v.to_c_str().with_ref(f)
363 v.to_c_str_unchecked().with_ref(f)
368 fn check_for_null(v: &[u8], buf: *mut libc::c_char) {
369 for i in range(0, v.len()) {
371 let p = buf.offset(i as int);
377 /// External iterator for a CString's bytes.
379 /// Use with the `std::iter` module.
380 pub struct CChars<'a> {
382 marker: marker::ContravariantLifetime<'a>,
385 impl<'a> Iterator<libc::c_char> for CChars<'a> {
386 fn next(&mut self) -> Option<libc::c_char> {
387 let ch = unsafe { *self.ptr };
391 self.ptr = unsafe { self.ptr.offset(1) };
397 /// Parses a C "multistring", eg windows env values or
398 /// the req->ptr result in a uv_fs_readdir() call.
400 /// Optionally, a `count` can be passed in, limiting the
401 /// parsing to only being done `count`-times.
403 /// The specified closure is invoked with each string that
404 /// is found, and the number of strings found is returned.
405 pub unsafe fn from_c_multistring(buf: *libc::c_char,
407 f: |&CString|) -> uint {
409 let mut curr_ptr: uint = buf as uint;
411 let (limited_count, limit) = match count {
412 Some(limit) => (true, limit),
415 while ((limited_count && ctr < limit) || !limited_count)
416 && *(curr_ptr as *libc::c_char) != 0 as libc::c_char {
417 let cstr = CString::new(curr_ptr as *libc::c_char, false);
419 curr_ptr += cstr.len() + 1;
433 fn test_str_multistring_parsing() {
435 let input = bytes!("zero", "\x00", "one", "\x00", "\x00");
436 let ptr = input.as_ptr();
437 let expected = ["zero", "one"];
438 let mut it = expected.iter();
439 let result = from_c_multistring(ptr as *libc::c_char, None, |c| {
440 let cbytes = c.as_bytes_no_nul();
441 assert_eq!(cbytes, it.next().unwrap().as_bytes());
443 assert_eq!(result, 2);
444 assert!(it.next().is_none());
449 fn test_str_to_c_str() {
450 "".to_c_str().with_ref(|buf| {
452 assert_eq!(*buf.offset(0), 0);
456 "hello".to_c_str().with_ref(|buf| {
458 assert_eq!(*buf.offset(0), 'h' as libc::c_char);
459 assert_eq!(*buf.offset(1), 'e' as libc::c_char);
460 assert_eq!(*buf.offset(2), 'l' as libc::c_char);
461 assert_eq!(*buf.offset(3), 'l' as libc::c_char);
462 assert_eq!(*buf.offset(4), 'o' as libc::c_char);
463 assert_eq!(*buf.offset(5), 0);
469 fn test_vec_to_c_str() {
471 b.to_c_str().with_ref(|buf| {
473 assert_eq!(*buf.offset(0), 0);
477 let _ = bytes!("hello").to_c_str().with_ref(|buf| {
479 assert_eq!(*buf.offset(0), 'h' as libc::c_char);
480 assert_eq!(*buf.offset(1), 'e' as libc::c_char);
481 assert_eq!(*buf.offset(2), 'l' as libc::c_char);
482 assert_eq!(*buf.offset(3), 'l' as libc::c_char);
483 assert_eq!(*buf.offset(4), 'o' as libc::c_char);
484 assert_eq!(*buf.offset(5), 0);
488 let _ = bytes!("foo", 0xff).to_c_str().with_ref(|buf| {
490 assert_eq!(*buf.offset(0), 'f' as libc::c_char);
491 assert_eq!(*buf.offset(1), 'o' as libc::c_char);
492 assert_eq!(*buf.offset(2), 'o' as libc::c_char);
493 assert_eq!(*buf.offset(3), 0xff as i8);
494 assert_eq!(*buf.offset(4), 0);
501 let c_str = unsafe { CString::new(ptr::null(), false) };
502 assert!(c_str.is_null());
503 assert!(!c_str.is_not_null());
508 let c_str = "hello".to_c_str();
509 unsafe { libc::free(c_str.unwrap() as *mut libc::c_void) }
514 let c_str = "hello".to_c_str();
515 let len = unsafe { c_str.with_ref(|buf| libc::strlen(buf)) };
516 assert!(!c_str.is_null());
517 assert!(c_str.is_not_null());
523 fn test_with_ref_empty_fail() {
524 let c_str = unsafe { CString::new(ptr::null(), false) };
525 c_str.with_ref(|_| ());
530 let c_str = "".to_c_str();
531 let mut iter = c_str.iter();
532 assert_eq!(iter.next(), None);
534 let c_str = "hello".to_c_str();
535 let mut iter = c_str.iter();
536 assert_eq!(iter.next(), Some('h' as libc::c_char));
537 assert_eq!(iter.next(), Some('e' as libc::c_char));
538 assert_eq!(iter.next(), Some('l' as libc::c_char));
539 assert_eq!(iter.next(), Some('l' as libc::c_char));
540 assert_eq!(iter.next(), Some('o' as libc::c_char));
541 assert_eq!(iter.next(), None);
545 fn test_to_c_str_fail() {
547 assert!(task::try(proc() { "he\x00llo".to_c_str() }).is_err());
551 fn test_to_c_str_unchecked() {
553 "he\x00llo".to_c_str_unchecked().with_ref(|buf| {
554 assert_eq!(*buf.offset(0), 'h' as libc::c_char);
555 assert_eq!(*buf.offset(1), 'e' as libc::c_char);
556 assert_eq!(*buf.offset(2), 0);
557 assert_eq!(*buf.offset(3), 'l' as libc::c_char);
558 assert_eq!(*buf.offset(4), 'l' as libc::c_char);
559 assert_eq!(*buf.offset(5), 'o' as libc::c_char);
560 assert_eq!(*buf.offset(6), 0);
567 let c_str = "hello".to_c_str();
568 assert_eq!(c_str.as_bytes(), bytes!("hello", 0));
569 let c_str = "".to_c_str();
570 assert_eq!(c_str.as_bytes(), bytes!(0));
571 let c_str = bytes!("foo", 0xff).to_c_str();
572 assert_eq!(c_str.as_bytes(), bytes!("foo", 0xff, 0));
576 fn test_as_bytes_no_nul() {
577 let c_str = "hello".to_c_str();
578 assert_eq!(c_str.as_bytes_no_nul(), bytes!("hello"));
579 let c_str = "".to_c_str();
581 assert_eq!(c_str.as_bytes_no_nul(), exp);
582 let c_str = bytes!("foo", 0xff).to_c_str();
583 assert_eq!(c_str.as_bytes_no_nul(), bytes!("foo", 0xff));
588 fn test_as_bytes_fail() {
589 let c_str = unsafe { CString::new(ptr::null(), false) };
595 fn test_as_bytes_no_nul_fail() {
596 let c_str = unsafe { CString::new(ptr::null(), false) };
597 c_str.as_bytes_no_nul();
602 let c_str = "hello".to_c_str();
603 assert_eq!(c_str.as_str(), Some("hello"));
604 let c_str = "".to_c_str();
605 assert_eq!(c_str.as_str(), Some(""));
606 let c_str = bytes!("foo", 0xff).to_c_str();
607 assert_eq!(c_str.as_str(), None);
612 fn test_as_str_fail() {
613 let c_str = unsafe { CString::new(ptr::null(), false) };
620 let c_str = unsafe { CString::new(ptr::null(), false) };
626 fn test_iter_fail() {
627 let c_str = unsafe { CString::new(ptr::null(), false) };
633 let a = "hello".to_c_str();
639 fn test_clone_noleak() {
640 fn foo(f: |c: &CString|) {
642 let c = s.to_c_str();
643 // give the closure a non-owned CString
644 let mut c_ = c.with_ref(|c| unsafe { CString::new(c, false) } );
646 // muck with the buffer for later printing
647 c_.with_mut_ref(|c| unsafe { *c = 'X' as libc::c_char } );
650 let mut c_: Option<CString> = None;
652 c_ = Some(c.clone());
654 // force a copy, reading the memory
655 c.as_bytes().to_owned();
657 let c_ = c_.unwrap();
658 // force a copy, reading the memory
659 c_.as_bytes().to_owned();
663 fn test_clone_eq_null() {
664 let x = unsafe { CString::new(ptr::null(), false) };
673 use self::test::Bencher;
678 fn check(s: &str, c_str: *libc::c_char) {
679 let s_buf = s.as_ptr();
680 for i in range(0, s.len()) {
683 *s_buf.offset(i as int) as libc::c_char,
684 *c_str.offset(i as int));
689 static s_short: &'static str = "Mary";
690 static s_medium: &'static str = "Mary had a little lamb";
691 static s_long: &'static str = "\
692 Mary had a little lamb, Little lamb
693 Mary had a little lamb, Little lamb
694 Mary had a little lamb, Little lamb
695 Mary had a little lamb, Little lamb
696 Mary had a little lamb, Little lamb
697 Mary had a little lamb, Little lamb";
699 fn bench_to_str(b: &mut Bencher, s: &str) {
701 let c_str = s.to_c_str();
702 c_str.with_ref(|c_str_buf| check(s, c_str_buf))
707 fn bench_to_c_str_short(b: &mut Bencher) {
708 bench_to_str(b, s_short)
712 fn bench_to_c_str_medium(b: &mut Bencher) {
713 bench_to_str(b, s_medium)
717 fn bench_to_c_str_long(b: &mut Bencher) {
718 bench_to_str(b, s_long)
721 fn bench_to_c_str_unchecked(b: &mut Bencher, s: &str) {
723 let c_str = unsafe { s.to_c_str_unchecked() };
724 c_str.with_ref(|c_str_buf| check(s, c_str_buf))
729 fn bench_to_c_str_unchecked_short(b: &mut Bencher) {
730 bench_to_c_str_unchecked(b, s_short)
734 fn bench_to_c_str_unchecked_medium(b: &mut Bencher) {
735 bench_to_c_str_unchecked(b, s_medium)
739 fn bench_to_c_str_unchecked_long(b: &mut Bencher) {
740 bench_to_c_str_unchecked(b, s_long)
743 fn bench_with_c_str(b: &mut Bencher, s: &str) {
745 s.with_c_str(|c_str_buf| check(s, c_str_buf))
750 fn bench_with_c_str_short(b: &mut Bencher) {
751 bench_with_c_str(b, s_short)
755 fn bench_with_c_str_medium(b: &mut Bencher) {
756 bench_with_c_str(b, s_medium)
760 fn bench_with_c_str_long(b: &mut Bencher) {
761 bench_with_c_str(b, s_long)
764 fn bench_with_c_str_unchecked(b: &mut Bencher, s: &str) {
767 s.with_c_str_unchecked(|c_str_buf| check(s, c_str_buf))
773 fn bench_with_c_str_unchecked_short(b: &mut Bencher) {
774 bench_with_c_str_unchecked(b, s_short)
778 fn bench_with_c_str_unchecked_medium(b: &mut Bencher) {
779 bench_with_c_str_unchecked(b, s_medium)
783 fn bench_with_c_str_unchecked_long(b: &mut Bencher) {
784 bench_with_c_str_unchecked(b, s_long)