1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 C-string manipulation and management
15 This modules provides the basic methods for creating and manipulating
16 null-terminated strings for use with FFI calls (back to C). Most C APIs require
17 that the string being passed to them is null-terminated, and by default rust's
18 string types are *not* null terminated.
20 The other problem with translating Rust strings to C strings is that Rust
21 strings can validly contain a null-byte in the middle of the string (0 is a
22 valid unicode codepoint). This means that not all Rust strings can actually be
23 translated to C strings.
25 # Creation of a C string
27 A C string is managed through the `CString` type defined in this module. It
28 "owns" the internal buffer of characters and will automatically deallocate the
29 buffer when the string is dropped. The `ToCStr` trait is implemented for `&str`
30 and `&[u8]`, but the conversions can fail due to some of the limitations
33 This also means that currently whenever a C string is created, an allocation
34 must be performed to place the data elsewhere (the lifetime of the C string is
35 not tied to the lifetime of the original string/data buffer). If C strings are
36 heavily used in applications, then caching may be advisable to prevent
37 unnecessary amounts of allocations.
39 An example of creating and using a C string would be:
45 fn puts(s: *const libc::c_char);
49 let my_string = "Hello, world!";
51 // Allocate the C string with an explicit local that owns the string. The
52 // `c_buffer` pointer will be deallocated when `my_c_string` goes out of scope.
53 let my_c_string = my_string.to_c_str();
55 puts(my_c_string.as_ptr());
58 // Don't save/return the pointer to the C string, the `c_buffer` will be
59 // deallocated when this block returns!
60 my_string.with_c_str(|c_buffer| {
61 unsafe { puts(c_buffer); }
70 use alloc::libc_heap::malloc_raw;
71 use collections::string::String;
72 use collections::hash;
73 use core::kinds::marker;
81 /// The representation of a C String.
83 /// This structure wraps a `*libc::c_char`, and will automatically free the
84 /// memory it is pointing to when it goes out of scope.
86 buf: *const libc::c_char,
90 impl Clone for CString {
91 /// Clone this CString into a new, uniquely owned CString. For safety
92 /// reasons, this is always a deep clone, rather than the usual shallow
94 fn clone(&self) -> CString {
95 if self.buf.is_null() {
96 CString { buf: self.buf, owns_buffer_: self.owns_buffer_ }
98 let len = self.len() + 1;
99 let buf = unsafe { malloc_raw(len) } as *mut libc::c_char;
100 unsafe { ptr::copy_nonoverlapping_memory(buf, self.buf, len); }
101 CString { buf: buf as *const libc::c_char, owns_buffer_: true }
106 impl PartialEq for CString {
107 fn eq(&self, other: &CString) -> bool {
108 if self.buf as uint == other.buf as uint {
110 } else if self.buf.is_null() || other.buf.is_null() {
114 libc::strcmp(self.buf, other.buf) == 0
120 impl PartialOrd for CString {
122 fn partial_cmp(&self, other: &CString) -> Option<Ordering> {
123 self.as_bytes().partial_cmp(&other.as_bytes())
127 impl Eq for CString {}
129 impl<S: hash::Writer> hash::Hash<S> for CString {
131 fn hash(&self, state: &mut S) {
132 self.as_bytes().hash(state)
137 /// Create a C String from a pointer.
138 pub unsafe fn new(buf: *const libc::c_char, owns_buffer: bool) -> CString {
139 CString { buf: buf, owns_buffer_: owns_buffer }
142 /// Return a pointer to the NUL-terminated string data.
144 /// `.as_ptr` returns an internal pointer into the `CString`, and
145 /// may be invalidated when the `CString` falls out of scope (the
146 /// destructor will run, freeing the allocation if there is
150 /// let foo = "some string";
153 /// let x = foo.to_c_str();
154 /// let p = x.as_ptr();
156 /// // wrong (the CString will be freed, invalidating `p`)
157 /// let p = foo.to_c_str().as_ptr();
162 /// Fails if the CString is null.
167 /// extern crate libc;
170 /// let c_str = "foo bar".to_c_str();
172 /// libc::puts(c_str.as_ptr());
176 pub fn as_ptr(&self) -> *const libc::c_char {
177 if self.buf.is_null() { fail!("CString is null!"); }
182 /// Return a mutable pointer to the NUL-terminated string data.
184 /// `.as_mut_ptr` returns an internal pointer into the `CString`, and
185 /// may be invalidated when the `CString` falls out of scope (the
186 /// destructor will run, freeing the allocation if there is
190 /// let foo = "some string";
193 /// let mut x = foo.to_c_str();
194 /// let p = x.as_mut_ptr();
196 /// // wrong (the CString will be freed, invalidating `p`)
197 /// let p = foo.to_c_str().as_mut_ptr();
202 /// Fails if the CString is null.
203 pub fn as_mut_ptr(&mut self) -> *mut libc::c_char {
204 if self.buf.is_null() { fail!("CString is null!") }
209 /// Calls a closure with a reference to the underlying `*libc::c_char`.
213 /// Fails if the CString is null.
214 #[deprecated="use `.as_ptr()`"]
215 pub fn with_ref<T>(&self, f: |*const libc::c_char| -> T) -> T {
216 if self.buf.is_null() { fail!("CString is null!"); }
220 /// Calls a closure with a mutable reference to the underlying `*libc::c_char`.
224 /// Fails if the CString is null.
225 #[deprecated="use `.as_mut_ptr()`"]
226 pub fn with_mut_ref<T>(&mut self, f: |*mut libc::c_char| -> T) -> T {
227 if self.buf.is_null() { fail!("CString is null!"); }
228 f(self.buf as *mut libc::c_char)
231 /// Returns true if the CString is a null.
232 pub fn is_null(&self) -> bool {
236 /// Returns true if the CString is not null.
237 pub fn is_not_null(&self) -> bool {
238 self.buf.is_not_null()
241 /// Returns whether or not the `CString` owns the buffer.
242 pub fn owns_buffer(&self) -> bool {
246 /// Converts the CString into a `&[u8]` without copying.
247 /// Includes the terminating NUL byte.
251 /// Fails if the CString is null.
253 pub fn as_bytes<'a>(&'a self) -> &'a [u8] {
254 if self.buf.is_null() { fail!("CString is null!"); }
256 mem::transmute(Slice { data: self.buf, len: self.len() + 1 })
260 /// Converts the CString into a `&[u8]` without copying.
261 /// Does not include the terminating NUL byte.
265 /// Fails if the CString is null.
267 pub fn as_bytes_no_nul<'a>(&'a self) -> &'a [u8] {
268 if self.buf.is_null() { fail!("CString is null!"); }
270 mem::transmute(Slice { data: self.buf, len: self.len() })
274 /// Converts the CString into a `&str` without copying.
275 /// Returns None if the CString is not UTF-8.
279 /// Fails if the CString is null.
281 pub fn as_str<'a>(&'a self) -> Option<&'a str> {
282 let buf = self.as_bytes_no_nul();
286 /// Return a CString iterator.
290 /// Fails if the CString is null.
291 pub fn iter<'a>(&'a self) -> CChars<'a> {
292 if self.buf.is_null() { fail!("CString is null!"); }
295 marker: marker::ContravariantLifetime,
299 /// Unwraps the wrapped `*libc::c_char` from the `CString` wrapper.
301 /// Any ownership of the buffer by the `CString` wrapper is
302 /// forgotten, meaning that the backing allocation of this
303 /// `CString` is not automatically freed if it owns the
304 /// allocation. In this case, a user of `.unwrap()` should ensure
305 /// the allocation is freed, to avoid leaking memory.
307 /// Prefer `.as_ptr()` when just retrieving a pointer to the
308 /// string data, as that does not relinquish ownership.
309 pub unsafe fn unwrap(mut self) -> *const libc::c_char {
310 self.owns_buffer_ = false;
316 impl Drop for CString {
318 if self.owns_buffer_ {
320 libc::free(self.buf as *mut libc::c_void)
326 impl Collection for CString {
327 /// Return the number of bytes in the CString (not including the NUL terminator).
331 /// Fails if the CString is null.
333 fn len(&self) -> uint {
334 if self.buf.is_null() { fail!("CString is null!"); }
335 let mut cur = self.buf;
347 /// A generic trait for converting a value to a CString.
349 /// Copy the receiver into a CString.
353 /// Fails the task if the receiver has an interior null.
354 fn to_c_str(&self) -> CString;
356 /// Unsafe variant of `to_c_str()` that doesn't check for nulls.
357 unsafe fn to_c_str_unchecked(&self) -> CString;
359 /// Work with a temporary CString constructed from the receiver.
360 /// The provided `*libc::c_char` will be freed immediately upon return.
365 /// extern crate libc;
368 /// let s = "PATH".with_c_str(|path| unsafe {
369 /// libc::getenv(path)
376 /// Fails the task if the receiver has an interior null.
378 fn with_c_str<T>(&self, f: |*const libc::c_char| -> T) -> T {
379 let c_str = self.to_c_str();
383 /// Unsafe variant of `with_c_str()` that doesn't check for nulls.
385 unsafe fn with_c_str_unchecked<T>(&self, f: |*const libc::c_char| -> T) -> T {
386 let c_str = self.to_c_str_unchecked();
391 // FIXME (#12938): Until DST lands, we cannot decompose &str into &
392 // and str, so we cannot usefully take ToCStr arguments by reference
393 // (without forcing an additional & around &str). So we are instead
394 // temporarily adding an instance for ~str and String, so that we can
395 // take ToCStr as owned. When DST lands, the string instances should
396 // be revisited, and arguments bound by ToCStr should be passed by
399 impl<'a> ToCStr for &'a str {
401 fn to_c_str(&self) -> CString {
402 self.as_bytes().to_c_str()
406 unsafe fn to_c_str_unchecked(&self) -> CString {
407 self.as_bytes().to_c_str_unchecked()
411 fn with_c_str<T>(&self, f: |*const libc::c_char| -> T) -> T {
412 self.as_bytes().with_c_str(f)
416 unsafe fn with_c_str_unchecked<T>(&self, f: |*const libc::c_char| -> T) -> T {
417 self.as_bytes().with_c_str_unchecked(f)
421 impl ToCStr for String {
423 fn to_c_str(&self) -> CString {
424 self.as_bytes().to_c_str()
428 unsafe fn to_c_str_unchecked(&self) -> CString {
429 self.as_bytes().to_c_str_unchecked()
433 fn with_c_str<T>(&self, f: |*const libc::c_char| -> T) -> T {
434 self.as_bytes().with_c_str(f)
438 unsafe fn with_c_str_unchecked<T>(&self, f: |*const libc::c_char| -> T) -> T {
439 self.as_bytes().with_c_str_unchecked(f)
443 // The length of the stack allocated buffer for `vec.with_c_str()`
444 static BUF_LEN: uint = 128;
446 impl<'a> ToCStr for &'a [u8] {
447 fn to_c_str(&self) -> CString {
448 let mut cs = unsafe { self.to_c_str_unchecked() };
449 check_for_null(*self, cs.as_mut_ptr());
453 unsafe fn to_c_str_unchecked(&self) -> CString {
454 let self_len = self.len();
455 let buf = malloc_raw(self_len + 1);
457 ptr::copy_memory(buf, self.as_ptr(), self_len);
458 *buf.offset(self_len as int) = 0;
460 CString::new(buf as *const libc::c_char, true)
463 fn with_c_str<T>(&self, f: |*const libc::c_char| -> T) -> T {
464 unsafe { with_c_str(*self, true, f) }
467 unsafe fn with_c_str_unchecked<T>(&self, f: |*const libc::c_char| -> T) -> T {
468 with_c_str(*self, false, f)
472 // Unsafe function that handles possibly copying the &[u8] into a stack array.
473 unsafe fn with_c_str<T>(v: &[u8], checked: bool,
474 f: |*const libc::c_char| -> T) -> T {
475 let c_str = if v.len() < BUF_LEN {
476 let mut buf: [u8, .. BUF_LEN] = mem::uninitialized();
477 slice::bytes::copy_memory(buf, v);
480 let buf = buf.as_mut_ptr();
482 check_for_null(v, buf as *mut libc::c_char);
485 return f(buf as *const libc::c_char)
489 v.to_c_str_unchecked()
496 fn check_for_null(v: &[u8], buf: *mut libc::c_char) {
497 for i in range(0, v.len()) {
499 let p = buf.offset(i as int);
505 /// External iterator for a CString's bytes.
507 /// Use with the `std::iter` module.
508 pub struct CChars<'a> {
509 ptr: *const libc::c_char,
510 marker: marker::ContravariantLifetime<'a>,
513 impl<'a> Iterator<libc::c_char> for CChars<'a> {
514 fn next(&mut self) -> Option<libc::c_char> {
515 let ch = unsafe { *self.ptr };
519 self.ptr = unsafe { self.ptr.offset(1) };
525 /// Parses a C "multistring", eg windows env values or
526 /// the req->ptr result in a uv_fs_readdir() call.
528 /// Optionally, a `count` can be passed in, limiting the
529 /// parsing to only being done `count`-times.
531 /// The specified closure is invoked with each string that
532 /// is found, and the number of strings found is returned.
533 pub unsafe fn from_c_multistring(buf: *const libc::c_char,
535 f: |&CString|) -> uint {
537 let mut curr_ptr: uint = buf as uint;
539 let (limited_count, limit) = match count {
540 Some(limit) => (true, limit),
543 while ((limited_count && ctr < limit) || !limited_count)
544 && *(curr_ptr as *const libc::c_char) != 0 as libc::c_char {
545 let cstr = CString::new(curr_ptr as *const libc::c_char, false);
547 curr_ptr += cstr.len() + 1;
563 fn test_str_multistring_parsing() {
565 let input = b"zero\0one\0\0";
566 let ptr = input.as_ptr();
567 let expected = ["zero", "one"];
568 let mut it = expected.iter();
569 let result = from_c_multistring(ptr as *const libc::c_char, None, |c| {
570 let cbytes = c.as_bytes_no_nul();
571 assert_eq!(cbytes, it.next().unwrap().as_bytes());
573 assert_eq!(result, 2);
574 assert!(it.next().is_none());
579 fn test_str_to_c_str() {
580 let c_str = "".to_c_str();
582 assert_eq!(*c_str.as_ptr().offset(0), 0);
585 let c_str = "hello".to_c_str();
586 let buf = c_str.as_ptr();
588 assert_eq!(*buf.offset(0), 'h' as libc::c_char);
589 assert_eq!(*buf.offset(1), 'e' as libc::c_char);
590 assert_eq!(*buf.offset(2), 'l' as libc::c_char);
591 assert_eq!(*buf.offset(3), 'l' as libc::c_char);
592 assert_eq!(*buf.offset(4), 'o' as libc::c_char);
593 assert_eq!(*buf.offset(5), 0);
598 fn test_vec_to_c_str() {
600 let c_str = b.to_c_str();
602 assert_eq!(*c_str.as_ptr().offset(0), 0);
605 let c_str = b"hello".to_c_str();
606 let buf = c_str.as_ptr();
608 assert_eq!(*buf.offset(0), 'h' as libc::c_char);
609 assert_eq!(*buf.offset(1), 'e' as libc::c_char);
610 assert_eq!(*buf.offset(2), 'l' as libc::c_char);
611 assert_eq!(*buf.offset(3), 'l' as libc::c_char);
612 assert_eq!(*buf.offset(4), 'o' as libc::c_char);
613 assert_eq!(*buf.offset(5), 0);
616 let c_str = b"foo\xFF".to_c_str();
617 let buf = c_str.as_ptr();
619 assert_eq!(*buf.offset(0), 'f' as libc::c_char);
620 assert_eq!(*buf.offset(1), 'o' as libc::c_char);
621 assert_eq!(*buf.offset(2), 'o' as libc::c_char);
622 assert_eq!(*buf.offset(3), 0xffu8 as i8);
623 assert_eq!(*buf.offset(4), 0);
629 let c_str = unsafe { CString::new(ptr::null(), false) };
630 assert!(c_str.is_null());
631 assert!(!c_str.is_not_null());
636 let c_str = "hello".to_c_str();
637 unsafe { libc::free(c_str.unwrap() as *mut libc::c_void) }
642 let c_str = "hello".to_c_str();
643 let len = unsafe { libc::strlen(c_str.as_ptr()) };
644 assert!(!c_str.is_null());
645 assert!(c_str.is_not_null());
650 fn test_as_ptr_empty_fail() {
651 let c_str = unsafe { CString::new(ptr::null(), false) };
657 let c_str = "".to_c_str();
658 let mut iter = c_str.iter();
659 assert_eq!(iter.next(), None);
661 let c_str = "hello".to_c_str();
662 let mut iter = c_str.iter();
663 assert_eq!(iter.next(), Some('h' as libc::c_char));
664 assert_eq!(iter.next(), Some('e' as libc::c_char));
665 assert_eq!(iter.next(), Some('l' as libc::c_char));
666 assert_eq!(iter.next(), Some('l' as libc::c_char));
667 assert_eq!(iter.next(), Some('o' as libc::c_char));
668 assert_eq!(iter.next(), None);
672 fn test_to_c_str_fail() {
673 assert!(task::try(proc() { "he\x00llo".to_c_str() }).is_err());
677 fn test_to_c_str_unchecked() {
679 let c_string = "he\x00llo".to_c_str_unchecked();
680 let buf = c_string.as_ptr();
681 assert_eq!(*buf.offset(0), 'h' as libc::c_char);
682 assert_eq!(*buf.offset(1), 'e' as libc::c_char);
683 assert_eq!(*buf.offset(2), 0);
684 assert_eq!(*buf.offset(3), 'l' as libc::c_char);
685 assert_eq!(*buf.offset(4), 'l' as libc::c_char);
686 assert_eq!(*buf.offset(5), 'o' as libc::c_char);
687 assert_eq!(*buf.offset(6), 0);
693 let c_str = "hello".to_c_str();
694 assert_eq!(c_str.as_bytes(), b"hello\0");
695 let c_str = "".to_c_str();
696 assert_eq!(c_str.as_bytes(), b"\0");
697 let c_str = b"foo\xFF".to_c_str();
698 assert_eq!(c_str.as_bytes(), b"foo\xFF\0");
702 fn test_as_bytes_no_nul() {
703 let c_str = "hello".to_c_str();
704 assert_eq!(c_str.as_bytes_no_nul(), b"hello");
705 let c_str = "".to_c_str();
707 assert_eq!(c_str.as_bytes_no_nul(), exp);
708 let c_str = b"foo\xFF".to_c_str();
709 assert_eq!(c_str.as_bytes_no_nul(), b"foo\xFF");
714 fn test_as_bytes_fail() {
715 let c_str = unsafe { CString::new(ptr::null(), false) };
721 fn test_as_bytes_no_nul_fail() {
722 let c_str = unsafe { CString::new(ptr::null(), false) };
723 c_str.as_bytes_no_nul();
728 let c_str = "hello".to_c_str();
729 assert_eq!(c_str.as_str(), Some("hello"));
730 let c_str = "".to_c_str();
731 assert_eq!(c_str.as_str(), Some(""));
732 let c_str = b"foo\xFF".to_c_str();
733 assert_eq!(c_str.as_str(), None);
738 fn test_as_str_fail() {
739 let c_str = unsafe { CString::new(ptr::null(), false) };
746 let c_str = unsafe { CString::new(ptr::null(), false) };
752 fn test_iter_fail() {
753 let c_str = unsafe { CString::new(ptr::null(), false) };
759 let a = "hello".to_c_str();
765 fn test_clone_noleak() {
766 fn foo(f: |c: &CString|) {
767 let s = "test".to_string();
768 let c = s.to_c_str();
769 // give the closure a non-owned CString
770 let mut c_ = unsafe { CString::new(c.as_ptr(), false) };
772 // muck with the buffer for later printing
773 unsafe { *c_.as_mut_ptr() = 'X' as libc::c_char }
776 let mut c_: Option<CString> = None;
778 c_ = Some(c.clone());
780 // force a copy, reading the memory
781 c.as_bytes().to_owned();
783 let c_ = c_.unwrap();
784 // force a copy, reading the memory
785 c_.as_bytes().to_owned();
789 fn test_clone_eq_null() {
790 let x = unsafe { CString::new(ptr::null(), false) };
803 fn check(s: &str, c_str: *const libc::c_char) {
804 let s_buf = s.as_ptr();
805 for i in range(0, s.len()) {
808 *s_buf.offset(i as int) as libc::c_char,
809 *c_str.offset(i as int));
814 static s_short: &'static str = "Mary";
815 static s_medium: &'static str = "Mary had a little lamb";
816 static s_long: &'static str = "\
817 Mary had a little lamb, Little lamb
818 Mary had a little lamb, Little lamb
819 Mary had a little lamb, Little lamb
820 Mary had a little lamb, Little lamb
821 Mary had a little lamb, Little lamb
822 Mary had a little lamb, Little lamb";
824 fn bench_to_string(b: &mut Bencher, s: &str) {
826 let c_str = s.to_c_str();
827 check(s, c_str.as_ptr());
832 fn bench_to_c_str_short(b: &mut Bencher) {
833 bench_to_string(b, s_short)
837 fn bench_to_c_str_medium(b: &mut Bencher) {
838 bench_to_string(b, s_medium)
842 fn bench_to_c_str_long(b: &mut Bencher) {
843 bench_to_string(b, s_long)
846 fn bench_to_c_str_unchecked(b: &mut Bencher, s: &str) {
848 let c_str = unsafe { s.to_c_str_unchecked() };
849 check(s, c_str.as_ptr())
854 fn bench_to_c_str_unchecked_short(b: &mut Bencher) {
855 bench_to_c_str_unchecked(b, s_short)
859 fn bench_to_c_str_unchecked_medium(b: &mut Bencher) {
860 bench_to_c_str_unchecked(b, s_medium)
864 fn bench_to_c_str_unchecked_long(b: &mut Bencher) {
865 bench_to_c_str_unchecked(b, s_long)
868 fn bench_with_c_str(b: &mut Bencher, s: &str) {
870 s.with_c_str(|c_str_buf| check(s, c_str_buf))
875 fn bench_with_c_str_short(b: &mut Bencher) {
876 bench_with_c_str(b, s_short)
880 fn bench_with_c_str_medium(b: &mut Bencher) {
881 bench_with_c_str(b, s_medium)
885 fn bench_with_c_str_long(b: &mut Bencher) {
886 bench_with_c_str(b, s_long)
889 fn bench_with_c_str_unchecked(b: &mut Bencher, s: &str) {
892 s.with_c_str_unchecked(|c_str_buf| check(s, c_str_buf))
898 fn bench_with_c_str_unchecked_short(b: &mut Bencher) {
899 bench_with_c_str_unchecked(b, s_short)
903 fn bench_with_c_str_unchecked_medium(b: &mut Bencher) {
904 bench_with_c_str_unchecked(b, s_medium)
908 fn bench_with_c_str_unchecked_long(b: &mut Bencher) {
909 bench_with_c_str_unchecked(b, s_long)