1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! C-string manipulation and management
13 //! This modules provides the basic methods for creating and manipulating
14 //! null-terminated strings for use with FFI calls (back to C). Most C APIs require
15 //! that the string being passed to them is null-terminated, and by default rust's
16 //! string types are *not* null terminated.
18 //! The other problem with translating Rust strings to C strings is that Rust
19 //! strings can validly contain a null-byte in the middle of the string (0 is a
20 //! valid Unicode codepoint). This means that not all Rust strings can actually be
21 //! translated to C strings.
23 //! # Creation of a C string
25 //! A C string is managed through the `CString` type defined in this module. It
26 //! "owns" the internal buffer of characters and will automatically deallocate the
27 //! buffer when the string is dropped. The `ToCStr` trait is implemented for `&str`
28 //! and `&[u8]`, but the conversions can fail due to some of the limitations
31 //! This also means that currently whenever a C string is created, an allocation
32 //! must be performed to place the data elsewhere (the lifetime of the C string is
33 //! not tied to the lifetime of the original string/data buffer). If C strings are
34 //! heavily used in applications, then caching may be advisable to prevent
35 //! unnecessary amounts of allocations.
37 //! Be carefull to remember that the memory is managed by C allocator API and not
38 //! by Rust allocator API.
39 //! That means that the CString pointers should be freed with C allocator API
40 //! if you intend to do that on your own, as the behaviour if you free them with
41 //! Rust's allocator API is not well defined
43 //! An example of creating and using a C string would be:
46 //! extern crate libc;
48 //! use std::c_str::ToCStr;
51 //! fn puts(s: *const libc::c_char);
55 //! let my_string = "Hello, world!";
57 //! // Allocate the C string with an explicit local that owns the string. The
58 //! // `c_buffer` pointer will be deallocated when `my_c_string` goes out of scope.
59 //! let my_c_string = my_string.to_c_str();
61 //! puts(my_c_string.as_ptr());
64 //! // Don't save/return the pointer to the C string, the `c_buffer` will be
65 //! // deallocated when this block returns!
66 //! my_string.with_c_str(|c_buffer| {
67 //! unsafe { puts(c_buffer); }
80 use slice::{mod, IntSliceExt};
83 use core::kinds::marker;
85 /// The representation of a C String.
87 /// This structure wraps a `*libc::c_char`, and will automatically free the
88 /// memory it is pointing to when it goes out of scope.
89 #[allow(missing_copy_implementations)]
91 buf: *const libc::c_char,
95 unsafe impl Send for CString { }
96 unsafe impl Sync for CString { }
98 impl Clone for CString {
99 /// Clone this CString into a new, uniquely owned CString. For safety
100 /// reasons, this is always a deep clone with the memory allocated
101 /// with C's allocator API, rather than the usual shallow clone.
102 fn clone(&self) -> CString {
103 let len = self.len() + 1;
104 let buf = unsafe { libc::malloc(len as libc::size_t) } as *mut libc::c_char;
105 if buf.is_null() { ::alloc::oom() }
106 unsafe { ptr::copy_nonoverlapping_memory(buf, self.buf, len); }
107 CString { buf: buf as *const libc::c_char, owns_buffer_: true }
111 impl PartialEq for CString {
112 fn eq(&self, other: &CString) -> bool {
113 // Check if the two strings share the same buffer
114 if self.buf as uint == other.buf as uint {
118 libc::strcmp(self.buf, other.buf) == 0
124 impl PartialOrd for CString {
126 fn partial_cmp(&self, other: &CString) -> Option<Ordering> {
127 self.as_bytes().partial_cmp(other.as_bytes())
131 impl Eq for CString {}
133 impl<S: hash::Writer> hash::Hash<S> for CString {
135 fn hash(&self, state: &mut S) {
136 self.as_bytes().hash(state)
141 /// Create a C String from a pointer, with memory managed by C's allocator
142 /// API, so avoid calling it with a pointer to memory managed by Rust's
143 /// allocator API, as the behaviour would not be well defined.
147 /// Panics if `buf` is null
148 pub unsafe fn new(buf: *const libc::c_char, owns_buffer: bool) -> CString {
149 assert!(!buf.is_null());
150 CString { buf: buf, owns_buffer_: owns_buffer }
153 /// Return a pointer to the NUL-terminated string data.
155 /// `.as_ptr` returns an internal pointer into the `CString`, and
156 /// may be invalidated when the `CString` falls out of scope (the
157 /// destructor will run, freeing the allocation if there is
161 /// use std::c_str::ToCStr;
163 /// let foo = "some string";
166 /// let x = foo.to_c_str();
167 /// let p = x.as_ptr();
169 /// // wrong (the CString will be freed, invalidating `p`)
170 /// let p = foo.to_c_str().as_ptr();
176 /// extern crate libc;
178 /// use std::c_str::ToCStr;
181 /// let c_str = "foo bar".to_c_str();
183 /// libc::puts(c_str.as_ptr());
187 pub fn as_ptr(&self) -> *const libc::c_char {
191 /// Return a mutable pointer to the NUL-terminated string data.
193 /// `.as_mut_ptr` returns an internal pointer into the `CString`, and
194 /// may be invalidated when the `CString` falls out of scope (the
195 /// destructor will run, freeing the allocation if there is
199 /// use std::c_str::ToCStr;
201 /// let foo = "some string";
204 /// let mut x = foo.to_c_str();
205 /// let p = x.as_mut_ptr();
207 /// // wrong (the CString will be freed, invalidating `p`)
208 /// let p = foo.to_c_str().as_mut_ptr();
210 pub fn as_mut_ptr(&mut self) -> *mut libc::c_char {
214 /// Returns whether or not the `CString` owns the buffer.
215 pub fn owns_buffer(&self) -> bool {
219 /// Converts the CString into a `&[u8]` without copying.
220 /// Includes the terminating NUL byte.
222 pub fn as_bytes<'a>(&'a self) -> &'a [u8] {
224 slice::from_raw_buf(&self.buf, self.len() + 1).as_unsigned()
228 /// Converts the CString into a `&[u8]` without copying.
229 /// Does not include the terminating NUL byte.
231 pub fn as_bytes_no_nul<'a>(&'a self) -> &'a [u8] {
233 slice::from_raw_buf(&self.buf, self.len()).as_unsigned()
237 /// Converts the CString into a `&str` without copying.
238 /// Returns None if the CString is not UTF-8.
240 pub fn as_str<'a>(&'a self) -> Option<&'a str> {
241 let buf = self.as_bytes_no_nul();
242 str::from_utf8(buf).ok()
245 /// Return a CString iterator.
246 pub fn iter<'a>(&'a self) -> CChars<'a> {
249 marker: marker::ContravariantLifetime,
253 /// Unwraps the wrapped `*libc::c_char` from the `CString` wrapper.
255 /// Any ownership of the buffer by the `CString` wrapper is
256 /// forgotten, meaning that the backing allocation of this
257 /// `CString` is not automatically freed if it owns the
258 /// allocation. In this case, a user of `.unwrap()` should ensure
259 /// the allocation is freed, to avoid leaking memory. You should
260 /// use libc's memory allocator in this case.
262 /// Prefer `.as_ptr()` when just retrieving a pointer to the
263 /// string data, as that does not relinquish ownership.
264 pub unsafe fn into_inner(mut self) -> *const libc::c_char {
265 self.owns_buffer_ = false;
269 /// Deprecated, use into_inner() instead
270 #[deprecated = "renamed to into_inner()"]
271 pub unsafe fn unwrap(self) -> *const libc::c_char { self.into_inner() }
273 /// Return the number of bytes in the CString (not including the NUL
276 pub fn len(&self) -> uint {
277 unsafe { libc::strlen(self.buf) as uint }
280 /// Returns if there are no bytes in this string
282 pub fn is_empty(&self) -> bool { self.len() == 0 }
285 impl Drop for CString {
287 if self.owns_buffer_ {
289 libc::free(self.buf as *mut libc::c_void)
295 impl fmt::Show for CString {
296 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
297 String::from_utf8_lossy(self.as_bytes_no_nul()).fmt(f)
301 /// A generic trait for converting a value to a CString.
302 pub trait ToCStr for Sized? {
303 /// Copy the receiver into a CString.
307 /// Panics the task if the receiver has an interior null.
308 fn to_c_str(&self) -> CString;
310 /// Unsafe variant of `to_c_str()` that doesn't check for nulls.
311 unsafe fn to_c_str_unchecked(&self) -> CString;
313 /// Work with a temporary CString constructed from the receiver.
314 /// The provided `*libc::c_char` will be freed immediately upon return.
319 /// extern crate libc;
321 /// use std::c_str::ToCStr;
324 /// let s = "PATH".with_c_str(|path| unsafe {
325 /// libc::getenv(path)
332 /// Panics the task if the receiver has an interior null.
334 fn with_c_str<T, F>(&self, f: F) -> T where
335 F: FnOnce(*const libc::c_char) -> T,
337 let c_str = self.to_c_str();
341 /// Unsafe variant of `with_c_str()` that doesn't check for nulls.
343 unsafe fn with_c_str_unchecked<T, F>(&self, f: F) -> T where
344 F: FnOnce(*const libc::c_char) -> T,
346 let c_str = self.to_c_str_unchecked();
351 impl ToCStr for str {
353 fn to_c_str(&self) -> CString {
354 self.as_bytes().to_c_str()
358 unsafe fn to_c_str_unchecked(&self) -> CString {
359 self.as_bytes().to_c_str_unchecked()
363 fn with_c_str<T, F>(&self, f: F) -> T where
364 F: FnOnce(*const libc::c_char) -> T,
366 self.as_bytes().with_c_str(f)
370 unsafe fn with_c_str_unchecked<T, F>(&self, f: F) -> T where
371 F: FnOnce(*const libc::c_char) -> T,
373 self.as_bytes().with_c_str_unchecked(f)
377 impl ToCStr for String {
379 fn to_c_str(&self) -> CString {
380 self.as_bytes().to_c_str()
384 unsafe fn to_c_str_unchecked(&self) -> CString {
385 self.as_bytes().to_c_str_unchecked()
389 fn with_c_str<T, F>(&self, f: F) -> T where
390 F: FnOnce(*const libc::c_char) -> T,
392 self.as_bytes().with_c_str(f)
396 unsafe fn with_c_str_unchecked<T, F>(&self, f: F) -> T where
397 F: FnOnce(*const libc::c_char) -> T,
399 self.as_bytes().with_c_str_unchecked(f)
403 // The length of the stack allocated buffer for `vec.with_c_str()`
404 const BUF_LEN: uint = 128;
406 impl ToCStr for [u8] {
407 fn to_c_str(&self) -> CString {
408 let mut cs = unsafe { self.to_c_str_unchecked() };
409 check_for_null(self, cs.as_mut_ptr());
413 unsafe fn to_c_str_unchecked(&self) -> CString {
414 let self_len = self.len();
415 let buf = libc::malloc(self_len as libc::size_t + 1) as *mut u8;
416 if buf.is_null() { ::alloc::oom() }
418 ptr::copy_memory(buf, self.as_ptr(), self_len);
419 *buf.offset(self_len as int) = 0;
421 CString::new(buf as *const libc::c_char, true)
424 fn with_c_str<T, F>(&self, f: F) -> T where
425 F: FnOnce(*const libc::c_char) -> T,
427 unsafe { with_c_str(self, true, f) }
430 unsafe fn with_c_str_unchecked<T, F>(&self, f: F) -> T where
431 F: FnOnce(*const libc::c_char) -> T,
433 with_c_str(self, false, f)
437 impl<'a, Sized? T: ToCStr> ToCStr for &'a T {
439 fn to_c_str(&self) -> CString {
444 unsafe fn to_c_str_unchecked(&self) -> CString {
445 (**self).to_c_str_unchecked()
449 fn with_c_str<T, F>(&self, f: F) -> T where
450 F: FnOnce(*const libc::c_char) -> T,
452 (**self).with_c_str(f)
456 unsafe fn with_c_str_unchecked<T, F>(&self, f: F) -> T where
457 F: FnOnce(*const libc::c_char) -> T,
459 (**self).with_c_str_unchecked(f)
463 // Unsafe function that handles possibly copying the &[u8] into a stack array.
464 unsafe fn with_c_str<T, F>(v: &[u8], checked: bool, f: F) -> T where
465 F: FnOnce(*const libc::c_char) -> T,
467 let c_str = if v.len() < BUF_LEN {
468 let mut buf: [u8; BUF_LEN] = mem::uninitialized();
469 slice::bytes::copy_memory(&mut buf, v);
472 let buf = buf.as_mut_ptr();
474 check_for_null(v, buf as *mut libc::c_char);
477 return f(buf as *const libc::c_char)
481 v.to_c_str_unchecked()
488 fn check_for_null(v: &[u8], buf: *mut libc::c_char) {
489 for i in range(0, v.len()) {
491 let p = buf.offset(i as int);
497 /// External iterator for a CString's bytes.
499 /// Use with the `std::iter` module.
500 #[allow(raw_pointer_deriving)]
502 pub struct CChars<'a> {
503 ptr: *const libc::c_char,
504 marker: marker::ContravariantLifetime<'a>,
507 impl<'a> Iterator for CChars<'a> {
508 type Item = libc::c_char;
510 fn next(&mut self) -> Option<libc::c_char> {
511 let ch = unsafe { *self.ptr };
515 self.ptr = unsafe { self.ptr.offset(1) };
521 /// Parses a C "multistring", eg windows env values or
522 /// the req->ptr result in a uv_fs_readdir() call.
524 /// Optionally, a `count` can be passed in, limiting the
525 /// parsing to only being done `count`-times.
527 /// The specified closure is invoked with each string that
528 /// is found, and the number of strings found is returned.
529 pub unsafe fn from_c_multistring<F>(buf: *const libc::c_char,
536 let mut curr_ptr: uint = buf as uint;
538 let (limited_count, limit) = match count {
539 Some(limit) => (true, limit),
542 while ((limited_count && ctr < limit) || !limited_count)
543 && *(curr_ptr as *const libc::c_char) != 0 as libc::c_char {
544 let cstr = CString::new(curr_ptr as *const libc::c_char, false);
546 curr_ptr += cstr.len() + 1;
561 fn test_str_multistring_parsing() {
563 let input = b"zero\0one\0\0";
564 let ptr = input.as_ptr();
565 let expected = ["zero", "one"];
566 let mut it = expected.iter();
567 let result = from_c_multistring(ptr as *const libc::c_char, None, |c| {
568 let cbytes = c.as_bytes_no_nul();
569 assert_eq!(cbytes, it.next().unwrap().as_bytes());
571 assert_eq!(result, 2);
572 assert!(it.next().is_none());
577 fn test_str_to_c_str() {
578 let c_str = "".to_c_str();
580 assert_eq!(*c_str.as_ptr().offset(0), 0);
583 let c_str = "hello".to_c_str();
584 let buf = c_str.as_ptr();
586 assert_eq!(*buf.offset(0), 'h' as libc::c_char);
587 assert_eq!(*buf.offset(1), 'e' as libc::c_char);
588 assert_eq!(*buf.offset(2), 'l' as libc::c_char);
589 assert_eq!(*buf.offset(3), 'l' as libc::c_char);
590 assert_eq!(*buf.offset(4), 'o' as libc::c_char);
591 assert_eq!(*buf.offset(5), 0);
596 fn test_vec_to_c_str() {
598 let c_str = b.to_c_str();
600 assert_eq!(*c_str.as_ptr().offset(0), 0);
603 let c_str = b"hello".to_c_str();
604 let buf = c_str.as_ptr();
606 assert_eq!(*buf.offset(0), 'h' as libc::c_char);
607 assert_eq!(*buf.offset(1), 'e' as libc::c_char);
608 assert_eq!(*buf.offset(2), 'l' as libc::c_char);
609 assert_eq!(*buf.offset(3), 'l' as libc::c_char);
610 assert_eq!(*buf.offset(4), 'o' as libc::c_char);
611 assert_eq!(*buf.offset(5), 0);
614 let c_str = b"foo\xFF".to_c_str();
615 let buf = c_str.as_ptr();
617 assert_eq!(*buf.offset(0), 'f' as libc::c_char);
618 assert_eq!(*buf.offset(1), 'o' as libc::c_char);
619 assert_eq!(*buf.offset(2), 'o' as libc::c_char);
620 assert_eq!(*buf.offset(3), 0xffu8 as libc::c_char);
621 assert_eq!(*buf.offset(4), 0);
627 let c_str = "hello".to_c_str();
628 unsafe { libc::free(c_str.into_inner() as *mut libc::c_void) }
633 let c_str = "hello".to_c_str();
634 let len = unsafe { libc::strlen(c_str.as_ptr()) };
640 let c_str = "".to_c_str();
641 let mut iter = c_str.iter();
642 assert_eq!(iter.next(), None);
644 let c_str = "hello".to_c_str();
645 let mut iter = c_str.iter();
646 assert_eq!(iter.next(), Some('h' as libc::c_char));
647 assert_eq!(iter.next(), Some('e' as libc::c_char));
648 assert_eq!(iter.next(), Some('l' as libc::c_char));
649 assert_eq!(iter.next(), Some('l' as libc::c_char));
650 assert_eq!(iter.next(), Some('o' as libc::c_char));
651 assert_eq!(iter.next(), None);
655 fn test_to_c_str_fail() {
656 assert!(Thread::spawn(move|| { "he\x00llo".to_c_str() }).join().is_err());
660 fn test_to_c_str_unchecked() {
662 let c_string = "he\x00llo".to_c_str_unchecked();
663 let buf = c_string.as_ptr();
664 assert_eq!(*buf.offset(0), 'h' as libc::c_char);
665 assert_eq!(*buf.offset(1), 'e' as libc::c_char);
666 assert_eq!(*buf.offset(2), 0);
667 assert_eq!(*buf.offset(3), 'l' as libc::c_char);
668 assert_eq!(*buf.offset(4), 'l' as libc::c_char);
669 assert_eq!(*buf.offset(5), 'o' as libc::c_char);
670 assert_eq!(*buf.offset(6), 0);
676 let c_str = "hello".to_c_str();
677 assert_eq!(c_str.as_bytes(), b"hello\0");
678 let c_str = "".to_c_str();
679 assert_eq!(c_str.as_bytes(), b"\0");
680 let c_str = b"foo\xFF".to_c_str();
681 assert_eq!(c_str.as_bytes(), b"foo\xFF\0");
685 fn test_as_bytes_no_nul() {
686 let c_str = "hello".to_c_str();
687 assert_eq!(c_str.as_bytes_no_nul(), b"hello");
688 let c_str = "".to_c_str();
689 let exp: &[u8] = &[];
690 assert_eq!(c_str.as_bytes_no_nul(), exp);
691 let c_str = b"foo\xFF".to_c_str();
692 assert_eq!(c_str.as_bytes_no_nul(), b"foo\xFF");
697 let c_str = "hello".to_c_str();
698 assert_eq!(c_str.as_str(), Some("hello"));
699 let c_str = "".to_c_str();
700 assert_eq!(c_str.as_str(), Some(""));
701 let c_str = b"foo\xFF".to_c_str();
702 assert_eq!(c_str.as_str(), None);
708 let _c_str = unsafe { CString::new(ptr::null(), false) };
713 let a = "hello".to_c_str();
719 fn test_clone_noleak() {
720 fn foo<F>(f: F) where F: FnOnce(&CString) {
721 let s = "test".to_string();
722 let c = s.to_c_str();
723 // give the closure a non-owned CString
724 let mut c_ = unsafe { CString::new(c.as_ptr(), false) };
726 // muck with the buffer for later printing
727 unsafe { *c_.as_mut_ptr() = 'X' as libc::c_char }
730 let mut c_: Option<CString> = None;
732 c_ = Some(c.clone());
734 // force a copy, reading the memory
735 c.as_bytes().to_vec();
737 let c_ = c_.unwrap();
738 // force a copy, reading the memory
739 c_.as_bytes().to_vec();
748 use self::test::Bencher;
753 fn check(s: &str, c_str: *const libc::c_char) {
754 let s_buf = s.as_ptr();
755 for i in range(0, s.len()) {
758 *s_buf.offset(i as int) as libc::c_char,
759 *c_str.offset(i as int));
764 static S_SHORT: &'static str = "Mary";
765 static S_MEDIUM: &'static str = "Mary had a little lamb";
766 static S_LONG: &'static str = "\
767 Mary had a little lamb, Little lamb
768 Mary had a little lamb, Little lamb
769 Mary had a little lamb, Little lamb
770 Mary had a little lamb, Little lamb
771 Mary had a little lamb, Little lamb
772 Mary had a little lamb, Little lamb";
774 fn bench_to_string(b: &mut Bencher, s: &str) {
776 let c_str = s.to_c_str();
777 check(s, c_str.as_ptr());
782 fn bench_to_c_str_short(b: &mut Bencher) {
783 bench_to_string(b, S_SHORT)
787 fn bench_to_c_str_medium(b: &mut Bencher) {
788 bench_to_string(b, S_MEDIUM)
792 fn bench_to_c_str_long(b: &mut Bencher) {
793 bench_to_string(b, S_LONG)
796 fn bench_to_c_str_unchecked(b: &mut Bencher, s: &str) {
798 let c_str = unsafe { s.to_c_str_unchecked() };
799 check(s, c_str.as_ptr())
804 fn bench_to_c_str_unchecked_short(b: &mut Bencher) {
805 bench_to_c_str_unchecked(b, S_SHORT)
809 fn bench_to_c_str_unchecked_medium(b: &mut Bencher) {
810 bench_to_c_str_unchecked(b, S_MEDIUM)
814 fn bench_to_c_str_unchecked_long(b: &mut Bencher) {
815 bench_to_c_str_unchecked(b, S_LONG)
818 fn bench_with_c_str(b: &mut Bencher, s: &str) {
820 s.with_c_str(|c_str_buf| check(s, c_str_buf))
825 fn bench_with_c_str_short(b: &mut Bencher) {
826 bench_with_c_str(b, S_SHORT)
830 fn bench_with_c_str_medium(b: &mut Bencher) {
831 bench_with_c_str(b, S_MEDIUM)
835 fn bench_with_c_str_long(b: &mut Bencher) {
836 bench_with_c_str(b, S_LONG)
839 fn bench_with_c_str_unchecked(b: &mut Bencher, s: &str) {
842 s.with_c_str_unchecked(|c_str_buf| check(s, c_str_buf))
848 fn bench_with_c_str_unchecked_short(b: &mut Bencher) {
849 bench_with_c_str_unchecked(b, S_SHORT)
853 fn bench_with_c_str_unchecked_medium(b: &mut Bencher) {
854 bench_with_c_str_unchecked(b, S_MEDIUM)
858 fn bench_with_c_str_unchecked_long(b: &mut Bencher) {
859 bench_with_c_str_unchecked(b, S_LONG)