1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 C-string manipulation and management
15 This modules provides the basic methods for creating and manipulating
16 null-terminated strings for use with FFI calls (back to C). Most C APIs require
17 that the string being passed to them is null-terminated, and by default rust's
18 string types are *not* null terminated.
20 The other problem with translating Rust strings to C strings is that Rust
21 strings can validly contain a null-byte in the middle of the string (0 is a
22 valid Unicode codepoint). This means that not all Rust strings can actually be
23 translated to C strings.
25 # Creation of a C string
27 A C string is managed through the `CString` type defined in this module. It
28 "owns" the internal buffer of characters and will automatically deallocate the
29 buffer when the string is dropped. The `ToCStr` trait is implemented for `&str`
30 and `&[u8]`, but the conversions can fail due to some of the limitations
33 This also means that currently whenever a C string is created, an allocation
34 must be performed to place the data elsewhere (the lifetime of the C string is
35 not tied to the lifetime of the original string/data buffer). If C strings are
36 heavily used in applications, then caching may be advisable to prevent
37 unnecessary amounts of allocations.
39 Be carefull to remember that the memory is managed by C allocator API and not
40 by Rust allocator API.
41 That means that the CString pointers should be freed with C allocator API
42 if you intend to do that on your own, as the behaviour if you free them with
43 Rust's allocator API is not well defined
45 An example of creating and using a C string would be:
51 fn puts(s: *const libc::c_char);
55 let my_string = "Hello, world!";
57 // Allocate the C string with an explicit local that owns the string. The
58 // `c_buffer` pointer will be deallocated when `my_c_string` goes out of scope.
59 let my_c_string = my_string.to_c_str();
61 puts(my_c_string.as_ptr());
64 // Don't save/return the pointer to the C string, the `c_buffer` will be
65 // deallocated when this block returns!
66 my_string.with_c_str(|c_buffer| {
67 unsafe { puts(c_buffer); }
74 use collections::string::String;
75 use collections::hash;
77 use core::kinds::{Sized, marker};
79 use core::prelude::{Clone, Drop, Eq, Iterator};
80 use core::prelude::{SlicePrelude, None, Option, Ordering, PartialEq};
81 use core::prelude::{PartialOrd, RawPtr, Some, StrPrelude, range};
88 /// The representation of a C String.
90 /// This structure wraps a `*libc::c_char`, and will automatically free the
91 /// memory it is pointing to when it goes out of scope.
93 buf: *const libc::c_char,
97 impl Clone for CString {
98 /// Clone this CString into a new, uniquely owned CString. For safety
99 /// reasons, this is always a deep clone with the memory allocated
100 /// with C's allocator API, rather than the usual shallow clone.
101 fn clone(&self) -> CString {
102 let len = self.len() + 1;
103 let buf = unsafe { libc::malloc(len as libc::size_t) } as *mut libc::c_char;
104 if buf.is_null() { ::alloc::oom() }
105 unsafe { ptr::copy_nonoverlapping_memory(buf, self.buf, len); }
106 CString { buf: buf as *const libc::c_char, owns_buffer_: true }
110 impl PartialEq for CString {
111 fn eq(&self, other: &CString) -> bool {
112 // Check if the two strings share the same buffer
113 if self.buf as uint == other.buf as uint {
117 libc::strcmp(self.buf, other.buf) == 0
123 impl PartialOrd for CString {
125 fn partial_cmp(&self, other: &CString) -> Option<Ordering> {
126 self.as_bytes().partial_cmp(other.as_bytes())
130 impl Eq for CString {}
132 impl<S: hash::Writer> hash::Hash<S> for CString {
134 fn hash(&self, state: &mut S) {
135 self.as_bytes().hash(state)
140 /// Create a C String from a pointer, with memory managed by C's allocator
141 /// API, so avoid calling it with a pointer to memory managed by Rust's
142 /// allocator API, as the behaviour would not be well defined.
146 /// Panics if `buf` is null
147 pub unsafe fn new(buf: *const libc::c_char, owns_buffer: bool) -> CString {
148 assert!(!buf.is_null());
149 CString { buf: buf, owns_buffer_: owns_buffer }
152 /// Return a pointer to the NUL-terminated string data.
154 /// `.as_ptr` returns an internal pointer into the `CString`, and
155 /// may be invalidated when the `CString` falls out of scope (the
156 /// destructor will run, freeing the allocation if there is
160 /// let foo = "some string";
163 /// let x = foo.to_c_str();
164 /// let p = x.as_ptr();
166 /// // wrong (the CString will be freed, invalidating `p`)
167 /// let p = foo.to_c_str().as_ptr();
173 /// extern crate libc;
176 /// let c_str = "foo bar".to_c_str();
178 /// libc::puts(c_str.as_ptr());
182 pub fn as_ptr(&self) -> *const libc::c_char {
186 /// Return a mutable pointer to the NUL-terminated string data.
188 /// `.as_mut_ptr` returns an internal pointer into the `CString`, and
189 /// may be invalidated when the `CString` falls out of scope (the
190 /// destructor will run, freeing the allocation if there is
194 /// let foo = "some string";
197 /// let mut x = foo.to_c_str();
198 /// let p = x.as_mut_ptr();
200 /// // wrong (the CString will be freed, invalidating `p`)
201 /// let p = foo.to_c_str().as_mut_ptr();
203 pub fn as_mut_ptr(&mut self) -> *mut libc::c_char {
207 /// Returns whether or not the `CString` owns the buffer.
208 pub fn owns_buffer(&self) -> bool {
212 /// Converts the CString into a `&[u8]` without copying.
213 /// Includes the terminating NUL byte.
215 pub fn as_bytes<'a>(&'a self) -> &'a [u8] {
217 mem::transmute(Slice { data: self.buf, len: self.len() + 1 })
221 /// Converts the CString into a `&[u8]` without copying.
222 /// Does not include the terminating NUL byte.
224 pub fn as_bytes_no_nul<'a>(&'a self) -> &'a [u8] {
226 mem::transmute(Slice { data: self.buf, len: self.len() })
230 /// Converts the CString into a `&str` without copying.
231 /// Returns None if the CString is not UTF-8.
233 pub fn as_str<'a>(&'a self) -> Option<&'a str> {
234 let buf = self.as_bytes_no_nul();
238 /// Return a CString iterator.
239 pub fn iter<'a>(&'a self) -> CChars<'a> {
242 marker: marker::ContravariantLifetime,
246 /// Unwraps the wrapped `*libc::c_char` from the `CString` wrapper.
248 /// Any ownership of the buffer by the `CString` wrapper is
249 /// forgotten, meaning that the backing allocation of this
250 /// `CString` is not automatically freed if it owns the
251 /// allocation. In this case, a user of `.unwrap()` should ensure
252 /// the allocation is freed, to avoid leaking memory. You should
253 /// use libc's memory allocator in this case.
255 /// Prefer `.as_ptr()` when just retrieving a pointer to the
256 /// string data, as that does not relinquish ownership.
257 pub unsafe fn into_inner(mut self) -> *const libc::c_char {
258 self.owns_buffer_ = false;
262 /// Deprecated, use into_inner() instead
263 #[deprecated = "renamed to into_inner()"]
264 pub unsafe fn unwrap(self) -> *const libc::c_char { self.into_inner() }
266 /// Return the number of bytes in the CString (not including the NUL
269 pub fn len(&self) -> uint {
270 unsafe { libc::strlen(self.buf) as uint }
273 /// Returns if there are no bytes in this string
275 pub fn is_empty(&self) -> bool { self.len() == 0 }
278 impl Drop for CString {
280 if self.owns_buffer_ {
282 libc::free(self.buf as *mut libc::c_void)
288 impl fmt::Show for CString {
289 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
290 String::from_utf8_lossy(self.as_bytes_no_nul()).fmt(f)
294 /// A generic trait for converting a value to a CString.
295 pub trait ToCStr for Sized? {
296 /// Copy the receiver into a CString.
300 /// Panics the task if the receiver has an interior null.
301 fn to_c_str(&self) -> CString;
303 /// Unsafe variant of `to_c_str()` that doesn't check for nulls.
304 unsafe fn to_c_str_unchecked(&self) -> CString;
306 /// Work with a temporary CString constructed from the receiver.
307 /// The provided `*libc::c_char` will be freed immediately upon return.
312 /// extern crate libc;
315 /// let s = "PATH".with_c_str(|path| unsafe {
316 /// libc::getenv(path)
323 /// Panics the task if the receiver has an interior null.
325 fn with_c_str<T>(&self, f: |*const libc::c_char| -> T) -> T {
326 let c_str = self.to_c_str();
330 /// Unsafe variant of `with_c_str()` that doesn't check for nulls.
332 unsafe fn with_c_str_unchecked<T>(&self, f: |*const libc::c_char| -> T) -> T {
333 let c_str = self.to_c_str_unchecked();
338 impl ToCStr for str {
340 fn to_c_str(&self) -> CString {
341 self.as_bytes().to_c_str()
345 unsafe fn to_c_str_unchecked(&self) -> CString {
346 self.as_bytes().to_c_str_unchecked()
350 fn with_c_str<T>(&self, f: |*const libc::c_char| -> T) -> T {
351 self.as_bytes().with_c_str(f)
355 unsafe fn with_c_str_unchecked<T>(&self, f: |*const libc::c_char| -> T) -> T {
356 self.as_bytes().with_c_str_unchecked(f)
360 impl ToCStr for String {
362 fn to_c_str(&self) -> CString {
363 self.as_bytes().to_c_str()
367 unsafe fn to_c_str_unchecked(&self) -> CString {
368 self.as_bytes().to_c_str_unchecked()
372 fn with_c_str<T>(&self, f: |*const libc::c_char| -> T) -> T {
373 self.as_bytes().with_c_str(f)
377 unsafe fn with_c_str_unchecked<T>(&self, f: |*const libc::c_char| -> T) -> T {
378 self.as_bytes().with_c_str_unchecked(f)
382 // The length of the stack allocated buffer for `vec.with_c_str()`
383 const BUF_LEN: uint = 128;
385 impl ToCStr for [u8] {
386 fn to_c_str(&self) -> CString {
387 let mut cs = unsafe { self.to_c_str_unchecked() };
388 check_for_null(self, cs.as_mut_ptr());
392 unsafe fn to_c_str_unchecked(&self) -> CString {
393 let self_len = self.len();
394 let buf = libc::malloc(self_len as libc::size_t + 1) as *mut u8;
395 if buf.is_null() { ::alloc::oom() }
397 ptr::copy_memory(buf, self.as_ptr(), self_len);
398 *buf.offset(self_len as int) = 0;
400 CString::new(buf as *const libc::c_char, true)
403 fn with_c_str<T>(&self, f: |*const libc::c_char| -> T) -> T {
404 unsafe { with_c_str(self, true, f) }
407 unsafe fn with_c_str_unchecked<T>(&self, f: |*const libc::c_char| -> T) -> T {
408 with_c_str(self, false, f)
412 impl<'a, Sized? T: ToCStr> ToCStr for &'a T {
414 fn to_c_str(&self) -> CString {
419 unsafe fn to_c_str_unchecked(&self) -> CString {
420 (**self).to_c_str_unchecked()
424 fn with_c_str<T>(&self, f: |*const libc::c_char| -> T) -> T {
425 (**self).with_c_str(f)
429 unsafe fn with_c_str_unchecked<T>(&self, f: |*const libc::c_char| -> T) -> T {
430 (**self).with_c_str_unchecked(f)
434 // Unsafe function that handles possibly copying the &[u8] into a stack array.
435 unsafe fn with_c_str<T>(v: &[u8], checked: bool,
436 f: |*const libc::c_char| -> T) -> T {
437 let c_str = if v.len() < BUF_LEN {
438 let mut buf: [u8, .. BUF_LEN] = mem::uninitialized();
439 slice::bytes::copy_memory(&mut buf, v);
442 let buf = buf.as_mut_ptr();
444 check_for_null(v, buf as *mut libc::c_char);
447 return f(buf as *const libc::c_char)
451 v.to_c_str_unchecked()
458 fn check_for_null(v: &[u8], buf: *mut libc::c_char) {
459 for i in range(0, v.len()) {
461 let p = buf.offset(i as int);
467 /// External iterator for a CString's bytes.
469 /// Use with the `std::iter` module.
470 pub struct CChars<'a> {
471 ptr: *const libc::c_char,
472 marker: marker::ContravariantLifetime<'a>,
475 impl<'a> Iterator<libc::c_char> for CChars<'a> {
476 fn next(&mut self) -> Option<libc::c_char> {
477 let ch = unsafe { *self.ptr };
481 self.ptr = unsafe { self.ptr.offset(1) };
487 /// Parses a C "multistring", eg windows env values or
488 /// the req->ptr result in a uv_fs_readdir() call.
490 /// Optionally, a `count` can be passed in, limiting the
491 /// parsing to only being done `count`-times.
493 /// The specified closure is invoked with each string that
494 /// is found, and the number of strings found is returned.
495 pub unsafe fn from_c_multistring(buf: *const libc::c_char,
497 f: |&CString|) -> uint {
499 let mut curr_ptr: uint = buf as uint;
501 let (limited_count, limit) = match count {
502 Some(limit) => (true, limit),
505 while ((limited_count && ctr < limit) || !limited_count)
506 && *(curr_ptr as *const libc::c_char) != 0 as libc::c_char {
507 let cstr = CString::new(curr_ptr as *const libc::c_char, false);
509 curr_ptr += cstr.len() + 1;
525 fn test_str_multistring_parsing() {
527 let input = b"zero\0one\0\0";
528 let ptr = input.as_ptr();
529 let expected = ["zero", "one"];
530 let mut it = expected.iter();
531 let result = from_c_multistring(ptr as *const libc::c_char, None, |c| {
532 let cbytes = c.as_bytes_no_nul();
533 assert_eq!(cbytes, it.next().unwrap().as_bytes());
535 assert_eq!(result, 2);
536 assert!(it.next().is_none());
541 fn test_str_to_c_str() {
542 let c_str = "".to_c_str();
544 assert_eq!(*c_str.as_ptr().offset(0), 0);
547 let c_str = "hello".to_c_str();
548 let buf = c_str.as_ptr();
550 assert_eq!(*buf.offset(0), 'h' as libc::c_char);
551 assert_eq!(*buf.offset(1), 'e' as libc::c_char);
552 assert_eq!(*buf.offset(2), 'l' as libc::c_char);
553 assert_eq!(*buf.offset(3), 'l' as libc::c_char);
554 assert_eq!(*buf.offset(4), 'o' as libc::c_char);
555 assert_eq!(*buf.offset(5), 0);
560 fn test_vec_to_c_str() {
562 let c_str = b.to_c_str();
564 assert_eq!(*c_str.as_ptr().offset(0), 0);
567 let c_str = b"hello".to_c_str();
568 let buf = c_str.as_ptr();
570 assert_eq!(*buf.offset(0), 'h' as libc::c_char);
571 assert_eq!(*buf.offset(1), 'e' as libc::c_char);
572 assert_eq!(*buf.offset(2), 'l' as libc::c_char);
573 assert_eq!(*buf.offset(3), 'l' as libc::c_char);
574 assert_eq!(*buf.offset(4), 'o' as libc::c_char);
575 assert_eq!(*buf.offset(5), 0);
578 let c_str = b"foo\xFF".to_c_str();
579 let buf = c_str.as_ptr();
581 assert_eq!(*buf.offset(0), 'f' as libc::c_char);
582 assert_eq!(*buf.offset(1), 'o' as libc::c_char);
583 assert_eq!(*buf.offset(2), 'o' as libc::c_char);
584 assert_eq!(*buf.offset(3), 0xffu8 as i8);
585 assert_eq!(*buf.offset(4), 0);
591 let c_str = "hello".to_c_str();
592 unsafe { libc::free(c_str.unwrap() as *mut libc::c_void) }
597 let c_str = "hello".to_c_str();
598 let len = unsafe { libc::strlen(c_str.as_ptr()) };
604 let c_str = "".to_c_str();
605 let mut iter = c_str.iter();
606 assert_eq!(iter.next(), None);
608 let c_str = "hello".to_c_str();
609 let mut iter = c_str.iter();
610 assert_eq!(iter.next(), Some('h' as libc::c_char));
611 assert_eq!(iter.next(), Some('e' as libc::c_char));
612 assert_eq!(iter.next(), Some('l' as libc::c_char));
613 assert_eq!(iter.next(), Some('l' as libc::c_char));
614 assert_eq!(iter.next(), Some('o' as libc::c_char));
615 assert_eq!(iter.next(), None);
619 fn test_to_c_str_fail() {
620 assert!(task::try(proc() { "he\x00llo".to_c_str() }).is_err());
624 fn test_to_c_str_unchecked() {
626 let c_string = "he\x00llo".to_c_str_unchecked();
627 let buf = c_string.as_ptr();
628 assert_eq!(*buf.offset(0), 'h' as libc::c_char);
629 assert_eq!(*buf.offset(1), 'e' as libc::c_char);
630 assert_eq!(*buf.offset(2), 0);
631 assert_eq!(*buf.offset(3), 'l' as libc::c_char);
632 assert_eq!(*buf.offset(4), 'l' as libc::c_char);
633 assert_eq!(*buf.offset(5), 'o' as libc::c_char);
634 assert_eq!(*buf.offset(6), 0);
640 let c_str = "hello".to_c_str();
641 assert_eq!(c_str.as_bytes(), b"hello\0");
642 let c_str = "".to_c_str();
643 assert_eq!(c_str.as_bytes(), b"\0");
644 let c_str = b"foo\xFF".to_c_str();
645 assert_eq!(c_str.as_bytes(), b"foo\xFF\0");
649 fn test_as_bytes_no_nul() {
650 let c_str = "hello".to_c_str();
651 assert_eq!(c_str.as_bytes_no_nul(), b"hello");
652 let c_str = "".to_c_str();
653 let exp: &[u8] = &[];
654 assert_eq!(c_str.as_bytes_no_nul(), exp);
655 let c_str = b"foo\xFF".to_c_str();
656 assert_eq!(c_str.as_bytes_no_nul(), b"foo\xFF");
661 let c_str = "hello".to_c_str();
662 assert_eq!(c_str.as_str(), Some("hello"));
663 let c_str = "".to_c_str();
664 assert_eq!(c_str.as_str(), Some(""));
665 let c_str = b"foo\xFF".to_c_str();
666 assert_eq!(c_str.as_str(), None);
672 let _c_str = unsafe { CString::new(ptr::null(), false) };
677 let a = "hello".to_c_str();
683 fn test_clone_noleak() {
684 fn foo(f: |c: &CString|) {
685 let s = "test".to_string();
686 let c = s.to_c_str();
687 // give the closure a non-owned CString
688 let mut c_ = unsafe { CString::new(c.as_ptr(), false) };
690 // muck with the buffer for later printing
691 unsafe { *c_.as_mut_ptr() = 'X' as libc::c_char }
694 let mut c_: Option<CString> = None;
696 c_ = Some(c.clone());
698 // force a copy, reading the memory
699 c.as_bytes().to_vec();
701 let c_ = c_.unwrap();
702 // force a copy, reading the memory
703 c_.as_bytes().to_vec();
714 fn check(s: &str, c_str: *const libc::c_char) {
715 let s_buf = s.as_ptr();
716 for i in range(0, s.len()) {
719 *s_buf.offset(i as int) as libc::c_char,
720 *c_str.offset(i as int));
725 static S_SHORT: &'static str = "Mary";
726 static S_MEDIUM: &'static str = "Mary had a little lamb";
727 static S_LONG: &'static str = "\
728 Mary had a little lamb, Little lamb
729 Mary had a little lamb, Little lamb
730 Mary had a little lamb, Little lamb
731 Mary had a little lamb, Little lamb
732 Mary had a little lamb, Little lamb
733 Mary had a little lamb, Little lamb";
735 fn bench_to_string(b: &mut Bencher, s: &str) {
737 let c_str = s.to_c_str();
738 check(s, c_str.as_ptr());
743 fn bench_to_c_str_short(b: &mut Bencher) {
744 bench_to_string(b, S_SHORT)
748 fn bench_to_c_str_medium(b: &mut Bencher) {
749 bench_to_string(b, S_MEDIUM)
753 fn bench_to_c_str_long(b: &mut Bencher) {
754 bench_to_string(b, S_LONG)
757 fn bench_to_c_str_unchecked(b: &mut Bencher, s: &str) {
759 let c_str = unsafe { s.to_c_str_unchecked() };
760 check(s, c_str.as_ptr())
765 fn bench_to_c_str_unchecked_short(b: &mut Bencher) {
766 bench_to_c_str_unchecked(b, S_SHORT)
770 fn bench_to_c_str_unchecked_medium(b: &mut Bencher) {
771 bench_to_c_str_unchecked(b, S_MEDIUM)
775 fn bench_to_c_str_unchecked_long(b: &mut Bencher) {
776 bench_to_c_str_unchecked(b, S_LONG)
779 fn bench_with_c_str(b: &mut Bencher, s: &str) {
781 s.with_c_str(|c_str_buf| check(s, c_str_buf))
786 fn bench_with_c_str_short(b: &mut Bencher) {
787 bench_with_c_str(b, S_SHORT)
791 fn bench_with_c_str_medium(b: &mut Bencher) {
792 bench_with_c_str(b, S_MEDIUM)
796 fn bench_with_c_str_long(b: &mut Bencher) {
797 bench_with_c_str(b, S_LONG)
800 fn bench_with_c_str_unchecked(b: &mut Bencher, s: &str) {
803 s.with_c_str_unchecked(|c_str_buf| check(s, c_str_buf))
809 fn bench_with_c_str_unchecked_short(b: &mut Bencher) {
810 bench_with_c_str_unchecked(b, S_SHORT)
814 fn bench_with_c_str_unchecked_medium(b: &mut Bencher) {
815 bench_with_c_str_unchecked(b, S_MEDIUM)
819 fn bench_with_c_str_unchecked_long(b: &mut Bencher) {
820 bench_with_c_str_unchecked(b, S_LONG)