1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 C-string manipulation and management
15 This modules provides the basic methods for creating and manipulating
16 null-terminated strings for use with FFI calls (back to C). Most C APIs require
17 that the string being passed to them is null-terminated, and by default rust's
18 string types are *not* null terminated.
20 The other problem with translating Rust strings to C strings is that Rust
21 strings can validly contain a null-byte in the middle of the string (0 is a
22 valid Unicode codepoint). This means that not all Rust strings can actually be
23 translated to C strings.
25 # Creation of a C string
27 A C string is managed through the `CString` type defined in this module. It
28 "owns" the internal buffer of characters and will automatically deallocate the
29 buffer when the string is dropped. The `ToCStr` trait is implemented for `&str`
30 and `&[u8]`, but the conversions can fail due to some of the limitations
33 This also means that currently whenever a C string is created, an allocation
34 must be performed to place the data elsewhere (the lifetime of the C string is
35 not tied to the lifetime of the original string/data buffer). If C strings are
36 heavily used in applications, then caching may be advisable to prevent
37 unnecessary amounts of allocations.
39 Be carefull to remember that the memory is managed by C allocator API and not
40 by Rust allocator API.
41 That means that the CString pointers should be freed with C allocator API
42 if you intend to do that on your own, as the behaviour if you free them with
43 Rust's allocator API is not well defined
45 An example of creating and using a C string would be:
51 fn puts(s: *const libc::c_char);
55 let my_string = "Hello, world!";
57 // Allocate the C string with an explicit local that owns the string. The
58 // `c_buffer` pointer will be deallocated when `my_c_string` goes out of scope.
59 let my_c_string = my_string.to_c_str();
61 puts(my_c_string.as_ptr());
64 // Don't save/return the pointer to the C string, the `c_buffer` will be
65 // deallocated when this block returns!
66 my_string.with_c_str(|c_buffer| {
67 unsafe { puts(c_buffer); }
74 use collections::string::String;
75 use collections::hash;
77 use core::kinds::{Sized, marker};
79 use core::prelude::{Clone, Drop, Eq, Iterator};
80 use core::prelude::{SlicePrelude, None, Option, Ordering, PartialEq};
81 use core::prelude::{PartialOrd, RawPtr, Some, StrPrelude, range};
88 /// The representation of a C String.
90 /// This structure wraps a `*libc::c_char`, and will automatically free the
91 /// memory it is pointing to when it goes out of scope.
93 buf: *const libc::c_char,
97 impl Clone for CString {
98 /// Clone this CString into a new, uniquely owned CString. For safety
99 /// reasons, this is always a deep clone with the memory allocated
100 /// with C's allocator API, rather than the usual shallow clone.
101 fn clone(&self) -> CString {
102 let len = self.len() + 1;
103 let buf = unsafe { libc::malloc(len as libc::size_t) } as *mut libc::c_char;
104 if buf.is_null() { ::alloc::oom() }
105 unsafe { ptr::copy_nonoverlapping_memory(buf, self.buf, len); }
106 CString { buf: buf as *const libc::c_char, owns_buffer_: true }
110 impl PartialEq for CString {
111 fn eq(&self, other: &CString) -> bool {
112 // Check if the two strings share the same buffer
113 if self.buf as uint == other.buf as uint {
117 libc::strcmp(self.buf, other.buf) == 0
123 impl PartialOrd for CString {
125 fn partial_cmp(&self, other: &CString) -> Option<Ordering> {
126 self.as_bytes().partial_cmp(other.as_bytes())
130 impl Eq for CString {}
132 impl<S: hash::Writer> hash::Hash<S> for CString {
134 fn hash(&self, state: &mut S) {
135 self.as_bytes().hash(state)
140 /// Create a C String from a pointer, with memory managed by C's allocator
141 /// API, so avoid calling it with a pointer to memory managed by Rust's
142 /// allocator API, as the behaviour would not be well defined.
146 /// Panics if `buf` is null
147 pub unsafe fn new(buf: *const libc::c_char, owns_buffer: bool) -> CString {
148 assert!(!buf.is_null());
149 CString { buf: buf, owns_buffer_: owns_buffer }
152 /// Return a pointer to the NUL-terminated string data.
154 /// `.as_ptr` returns an internal pointer into the `CString`, and
155 /// may be invalidated when the `CString` falls out of scope (the
156 /// destructor will run, freeing the allocation if there is
160 /// let foo = "some string";
163 /// let x = foo.to_c_str();
164 /// let p = x.as_ptr();
166 /// // wrong (the CString will be freed, invalidating `p`)
167 /// let p = foo.to_c_str().as_ptr();
173 /// extern crate libc;
176 /// let c_str = "foo bar".to_c_str();
178 /// libc::puts(c_str.as_ptr());
182 pub fn as_ptr(&self) -> *const libc::c_char {
186 /// Return a mutable pointer to the NUL-terminated string data.
188 /// `.as_mut_ptr` returns an internal pointer into the `CString`, and
189 /// may be invalidated when the `CString` falls out of scope (the
190 /// destructor will run, freeing the allocation if there is
194 /// let foo = "some string";
197 /// let mut x = foo.to_c_str();
198 /// let p = x.as_mut_ptr();
200 /// // wrong (the CString will be freed, invalidating `p`)
201 /// let p = foo.to_c_str().as_mut_ptr();
203 pub fn as_mut_ptr(&mut self) -> *mut libc::c_char {
207 /// Returns whether or not the `CString` owns the buffer.
208 pub fn owns_buffer(&self) -> bool {
212 /// Converts the CString into a `&[u8]` without copying.
213 /// Includes the terminating NUL byte.
215 pub fn as_bytes<'a>(&'a self) -> &'a [u8] {
217 mem::transmute(Slice { data: self.buf, len: self.len() + 1 })
221 /// Converts the CString into a `&[u8]` without copying.
222 /// Does not include the terminating NUL byte.
224 pub fn as_bytes_no_nul<'a>(&'a self) -> &'a [u8] {
226 mem::transmute(Slice { data: self.buf, len: self.len() })
230 /// Converts the CString into a `&str` without copying.
231 /// Returns None if the CString is not UTF-8.
233 pub fn as_str<'a>(&'a self) -> Option<&'a str> {
234 let buf = self.as_bytes_no_nul();
238 /// Return a CString iterator.
239 pub fn iter<'a>(&'a self) -> CChars<'a> {
242 marker: marker::ContravariantLifetime,
246 /// Unwraps the wrapped `*libc::c_char` from the `CString` wrapper.
248 /// Any ownership of the buffer by the `CString` wrapper is
249 /// forgotten, meaning that the backing allocation of this
250 /// `CString` is not automatically freed if it owns the
251 /// allocation. In this case, a user of `.unwrap()` should ensure
252 /// the allocation is freed, to avoid leaking memory. You should
253 /// use libc's memory allocator in this case.
255 /// Prefer `.as_ptr()` when just retrieving a pointer to the
256 /// string data, as that does not relinquish ownership.
257 pub unsafe fn unwrap(mut self) -> *const libc::c_char {
258 self.owns_buffer_ = false;
262 /// Return the number of bytes in the CString (not including the NUL
265 pub fn len(&self) -> uint {
266 unsafe { libc::strlen(self.buf) as uint }
269 /// Returns if there are no bytes in this string
271 pub fn is_empty(&self) -> bool { self.len() == 0 }
274 impl Drop for CString {
276 if self.owns_buffer_ {
278 libc::free(self.buf as *mut libc::c_void)
284 impl fmt::Show for CString {
285 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
286 String::from_utf8_lossy(self.as_bytes_no_nul()).fmt(f)
290 /// A generic trait for converting a value to a CString.
291 pub trait ToCStr for Sized? {
292 /// Copy the receiver into a CString.
296 /// Panics the task if the receiver has an interior null.
297 fn to_c_str(&self) -> CString;
299 /// Unsafe variant of `to_c_str()` that doesn't check for nulls.
300 unsafe fn to_c_str_unchecked(&self) -> CString;
302 /// Work with a temporary CString constructed from the receiver.
303 /// The provided `*libc::c_char` will be freed immediately upon return.
308 /// extern crate libc;
311 /// let s = "PATH".with_c_str(|path| unsafe {
312 /// libc::getenv(path)
319 /// Panics the task if the receiver has an interior null.
321 fn with_c_str<T>(&self, f: |*const libc::c_char| -> T) -> T {
322 let c_str = self.to_c_str();
326 /// Unsafe variant of `with_c_str()` that doesn't check for nulls.
328 unsafe fn with_c_str_unchecked<T>(&self, f: |*const libc::c_char| -> T) -> T {
329 let c_str = self.to_c_str_unchecked();
334 impl ToCStr for str {
336 fn to_c_str(&self) -> CString {
337 self.as_bytes().to_c_str()
341 unsafe fn to_c_str_unchecked(&self) -> CString {
342 self.as_bytes().to_c_str_unchecked()
346 fn with_c_str<T>(&self, f: |*const libc::c_char| -> T) -> T {
347 self.as_bytes().with_c_str(f)
351 unsafe fn with_c_str_unchecked<T>(&self, f: |*const libc::c_char| -> T) -> T {
352 self.as_bytes().with_c_str_unchecked(f)
356 impl ToCStr for String {
358 fn to_c_str(&self) -> CString {
359 self.as_bytes().to_c_str()
363 unsafe fn to_c_str_unchecked(&self) -> CString {
364 self.as_bytes().to_c_str_unchecked()
368 fn with_c_str<T>(&self, f: |*const libc::c_char| -> T) -> T {
369 self.as_bytes().with_c_str(f)
373 unsafe fn with_c_str_unchecked<T>(&self, f: |*const libc::c_char| -> T) -> T {
374 self.as_bytes().with_c_str_unchecked(f)
378 // The length of the stack allocated buffer for `vec.with_c_str()`
379 const BUF_LEN: uint = 128;
381 impl ToCStr for [u8] {
382 fn to_c_str(&self) -> CString {
383 let mut cs = unsafe { self.to_c_str_unchecked() };
384 check_for_null(self, cs.as_mut_ptr());
388 unsafe fn to_c_str_unchecked(&self) -> CString {
389 let self_len = self.len();
390 let buf = libc::malloc(self_len as libc::size_t + 1) as *mut u8;
391 if buf.is_null() { ::alloc::oom() }
393 ptr::copy_memory(buf, self.as_ptr(), self_len);
394 *buf.offset(self_len as int) = 0;
396 CString::new(buf as *const libc::c_char, true)
399 fn with_c_str<T>(&self, f: |*const libc::c_char| -> T) -> T {
400 unsafe { with_c_str(self, true, f) }
403 unsafe fn with_c_str_unchecked<T>(&self, f: |*const libc::c_char| -> T) -> T {
404 with_c_str(self, false, f)
408 impl<'a, Sized? T: ToCStr> ToCStr for &'a T {
410 fn to_c_str(&self) -> CString {
415 unsafe fn to_c_str_unchecked(&self) -> CString {
416 (**self).to_c_str_unchecked()
420 fn with_c_str<T>(&self, f: |*const libc::c_char| -> T) -> T {
421 (**self).with_c_str(f)
425 unsafe fn with_c_str_unchecked<T>(&self, f: |*const libc::c_char| -> T) -> T {
426 (**self).with_c_str_unchecked(f)
430 // Unsafe function that handles possibly copying the &[u8] into a stack array.
431 unsafe fn with_c_str<T>(v: &[u8], checked: bool,
432 f: |*const libc::c_char| -> T) -> T {
433 let c_str = if v.len() < BUF_LEN {
434 let mut buf: [u8, .. BUF_LEN] = mem::uninitialized();
435 slice::bytes::copy_memory(&mut buf, v);
438 let buf = buf.as_mut_ptr();
440 check_for_null(v, buf as *mut libc::c_char);
443 return f(buf as *const libc::c_char)
447 v.to_c_str_unchecked()
454 fn check_for_null(v: &[u8], buf: *mut libc::c_char) {
455 for i in range(0, v.len()) {
457 let p = buf.offset(i as int);
463 /// External iterator for a CString's bytes.
465 /// Use with the `std::iter` module.
466 pub struct CChars<'a> {
467 ptr: *const libc::c_char,
468 marker: marker::ContravariantLifetime<'a>,
471 impl<'a> Iterator<libc::c_char> for CChars<'a> {
472 fn next(&mut self) -> Option<libc::c_char> {
473 let ch = unsafe { *self.ptr };
477 self.ptr = unsafe { self.ptr.offset(1) };
483 /// Parses a C "multistring", eg windows env values or
484 /// the req->ptr result in a uv_fs_readdir() call.
486 /// Optionally, a `count` can be passed in, limiting the
487 /// parsing to only being done `count`-times.
489 /// The specified closure is invoked with each string that
490 /// is found, and the number of strings found is returned.
491 pub unsafe fn from_c_multistring(buf: *const libc::c_char,
493 f: |&CString|) -> uint {
495 let mut curr_ptr: uint = buf as uint;
497 let (limited_count, limit) = match count {
498 Some(limit) => (true, limit),
501 while ((limited_count && ctr < limit) || !limited_count)
502 && *(curr_ptr as *const libc::c_char) != 0 as libc::c_char {
503 let cstr = CString::new(curr_ptr as *const libc::c_char, false);
505 curr_ptr += cstr.len() + 1;
521 fn test_str_multistring_parsing() {
523 let input = b"zero\0one\0\0";
524 let ptr = input.as_ptr();
525 let expected = ["zero", "one"];
526 let mut it = expected.iter();
527 let result = from_c_multistring(ptr as *const libc::c_char, None, |c| {
528 let cbytes = c.as_bytes_no_nul();
529 assert_eq!(cbytes, it.next().unwrap().as_bytes());
531 assert_eq!(result, 2);
532 assert!(it.next().is_none());
537 fn test_str_to_c_str() {
538 let c_str = "".to_c_str();
540 assert_eq!(*c_str.as_ptr().offset(0), 0);
543 let c_str = "hello".to_c_str();
544 let buf = c_str.as_ptr();
546 assert_eq!(*buf.offset(0), 'h' as libc::c_char);
547 assert_eq!(*buf.offset(1), 'e' as libc::c_char);
548 assert_eq!(*buf.offset(2), 'l' as libc::c_char);
549 assert_eq!(*buf.offset(3), 'l' as libc::c_char);
550 assert_eq!(*buf.offset(4), 'o' as libc::c_char);
551 assert_eq!(*buf.offset(5), 0);
556 fn test_vec_to_c_str() {
558 let c_str = b.to_c_str();
560 assert_eq!(*c_str.as_ptr().offset(0), 0);
563 let c_str = b"hello".to_c_str();
564 let buf = c_str.as_ptr();
566 assert_eq!(*buf.offset(0), 'h' as libc::c_char);
567 assert_eq!(*buf.offset(1), 'e' as libc::c_char);
568 assert_eq!(*buf.offset(2), 'l' as libc::c_char);
569 assert_eq!(*buf.offset(3), 'l' as libc::c_char);
570 assert_eq!(*buf.offset(4), 'o' as libc::c_char);
571 assert_eq!(*buf.offset(5), 0);
574 let c_str = b"foo\xFF".to_c_str();
575 let buf = c_str.as_ptr();
577 assert_eq!(*buf.offset(0), 'f' as libc::c_char);
578 assert_eq!(*buf.offset(1), 'o' as libc::c_char);
579 assert_eq!(*buf.offset(2), 'o' as libc::c_char);
580 assert_eq!(*buf.offset(3), 0xffu8 as i8);
581 assert_eq!(*buf.offset(4), 0);
587 let c_str = "hello".to_c_str();
588 unsafe { libc::free(c_str.unwrap() as *mut libc::c_void) }
593 let c_str = "hello".to_c_str();
594 let len = unsafe { libc::strlen(c_str.as_ptr()) };
600 let c_str = "".to_c_str();
601 let mut iter = c_str.iter();
602 assert_eq!(iter.next(), None);
604 let c_str = "hello".to_c_str();
605 let mut iter = c_str.iter();
606 assert_eq!(iter.next(), Some('h' as libc::c_char));
607 assert_eq!(iter.next(), Some('e' as libc::c_char));
608 assert_eq!(iter.next(), Some('l' as libc::c_char));
609 assert_eq!(iter.next(), Some('l' as libc::c_char));
610 assert_eq!(iter.next(), Some('o' as libc::c_char));
611 assert_eq!(iter.next(), None);
615 fn test_to_c_str_fail() {
616 assert!(task::try(proc() { "he\x00llo".to_c_str() }).is_err());
620 fn test_to_c_str_unchecked() {
622 let c_string = "he\x00llo".to_c_str_unchecked();
623 let buf = c_string.as_ptr();
624 assert_eq!(*buf.offset(0), 'h' as libc::c_char);
625 assert_eq!(*buf.offset(1), 'e' as libc::c_char);
626 assert_eq!(*buf.offset(2), 0);
627 assert_eq!(*buf.offset(3), 'l' as libc::c_char);
628 assert_eq!(*buf.offset(4), 'l' as libc::c_char);
629 assert_eq!(*buf.offset(5), 'o' as libc::c_char);
630 assert_eq!(*buf.offset(6), 0);
636 let c_str = "hello".to_c_str();
637 assert_eq!(c_str.as_bytes(), b"hello\0");
638 let c_str = "".to_c_str();
639 assert_eq!(c_str.as_bytes(), b"\0");
640 let c_str = b"foo\xFF".to_c_str();
641 assert_eq!(c_str.as_bytes(), b"foo\xFF\0");
645 fn test_as_bytes_no_nul() {
646 let c_str = "hello".to_c_str();
647 assert_eq!(c_str.as_bytes_no_nul(), b"hello");
648 let c_str = "".to_c_str();
649 let exp: &[u8] = &[];
650 assert_eq!(c_str.as_bytes_no_nul(), exp);
651 let c_str = b"foo\xFF".to_c_str();
652 assert_eq!(c_str.as_bytes_no_nul(), b"foo\xFF");
657 let c_str = "hello".to_c_str();
658 assert_eq!(c_str.as_str(), Some("hello"));
659 let c_str = "".to_c_str();
660 assert_eq!(c_str.as_str(), Some(""));
661 let c_str = b"foo\xFF".to_c_str();
662 assert_eq!(c_str.as_str(), None);
668 let _c_str = unsafe { CString::new(ptr::null(), false) };
673 let a = "hello".to_c_str();
679 fn test_clone_noleak() {
680 fn foo(f: |c: &CString|) {
681 let s = "test".to_string();
682 let c = s.to_c_str();
683 // give the closure a non-owned CString
684 let mut c_ = unsafe { CString::new(c.as_ptr(), false) };
686 // muck with the buffer for later printing
687 unsafe { *c_.as_mut_ptr() = 'X' as libc::c_char }
690 let mut c_: Option<CString> = None;
692 c_ = Some(c.clone());
694 // force a copy, reading the memory
695 c.as_bytes().to_vec();
697 let c_ = c_.unwrap();
698 // force a copy, reading the memory
699 c_.as_bytes().to_vec();
710 fn check(s: &str, c_str: *const libc::c_char) {
711 let s_buf = s.as_ptr();
712 for i in range(0, s.len()) {
715 *s_buf.offset(i as int) as libc::c_char,
716 *c_str.offset(i as int));
721 static S_SHORT: &'static str = "Mary";
722 static S_MEDIUM: &'static str = "Mary had a little lamb";
723 static S_LONG: &'static str = "\
724 Mary had a little lamb, Little lamb
725 Mary had a little lamb, Little lamb
726 Mary had a little lamb, Little lamb
727 Mary had a little lamb, Little lamb
728 Mary had a little lamb, Little lamb
729 Mary had a little lamb, Little lamb";
731 fn bench_to_string(b: &mut Bencher, s: &str) {
733 let c_str = s.to_c_str();
734 check(s, c_str.as_ptr());
739 fn bench_to_c_str_short(b: &mut Bencher) {
740 bench_to_string(b, S_SHORT)
744 fn bench_to_c_str_medium(b: &mut Bencher) {
745 bench_to_string(b, S_MEDIUM)
749 fn bench_to_c_str_long(b: &mut Bencher) {
750 bench_to_string(b, S_LONG)
753 fn bench_to_c_str_unchecked(b: &mut Bencher, s: &str) {
755 let c_str = unsafe { s.to_c_str_unchecked() };
756 check(s, c_str.as_ptr())
761 fn bench_to_c_str_unchecked_short(b: &mut Bencher) {
762 bench_to_c_str_unchecked(b, S_SHORT)
766 fn bench_to_c_str_unchecked_medium(b: &mut Bencher) {
767 bench_to_c_str_unchecked(b, S_MEDIUM)
771 fn bench_to_c_str_unchecked_long(b: &mut Bencher) {
772 bench_to_c_str_unchecked(b, S_LONG)
775 fn bench_with_c_str(b: &mut Bencher, s: &str) {
777 s.with_c_str(|c_str_buf| check(s, c_str_buf))
782 fn bench_with_c_str_short(b: &mut Bencher) {
783 bench_with_c_str(b, S_SHORT)
787 fn bench_with_c_str_medium(b: &mut Bencher) {
788 bench_with_c_str(b, S_MEDIUM)
792 fn bench_with_c_str_long(b: &mut Bencher) {
793 bench_with_c_str(b, S_LONG)
796 fn bench_with_c_str_unchecked(b: &mut Bencher, s: &str) {
799 s.with_c_str_unchecked(|c_str_buf| check(s, c_str_buf))
805 fn bench_with_c_str_unchecked_short(b: &mut Bencher) {
806 bench_with_c_str_unchecked(b, S_SHORT)
810 fn bench_with_c_str_unchecked_medium(b: &mut Bencher) {
811 bench_with_c_str_unchecked(b, S_MEDIUM)
815 fn bench_with_c_str_unchecked_long(b: &mut Bencher) {
816 bench_with_c_str_unchecked(b, S_LONG)