1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 C-string manipulation and management
15 This modules provides the basic methods for creating and manipulating
16 null-terminated strings for use with FFI calls (back to C). Most C APIs require
17 that the string being passed to them is null-terminated, and by default rust's
18 string types are *not* null terminated.
20 The other problem with translating Rust strings to C strings is that Rust
21 strings can validly contain a null-byte in the middle of the string (0 is a
22 valid Unicode codepoint). This means that not all Rust strings can actually be
23 translated to C strings.
25 # Creation of a C string
27 A C string is managed through the `CString` type defined in this module. It
28 "owns" the internal buffer of characters and will automatically deallocate the
29 buffer when the string is dropped. The `ToCStr` trait is implemented for `&str`
30 and `&[u8]`, but the conversions can fail due to some of the limitations
33 This also means that currently whenever a C string is created, an allocation
34 must be performed to place the data elsewhere (the lifetime of the C string is
35 not tied to the lifetime of the original string/data buffer). If C strings are
36 heavily used in applications, then caching may be advisable to prevent
37 unnecessary amounts of allocations.
39 Be carefull to remember that the memory is managed by C allocator API and not
40 by Rust allocator API.
41 That means that the CString pointers should be freed with C allocator API
42 if you intend to do that on your own, as the behaviour if you free them with
43 Rust's allocator API is not well defined
45 An example of creating and using a C string would be:
51 fn puts(s: *const libc::c_char);
55 let my_string = "Hello, world!";
57 // Allocate the C string with an explicit local that owns the string. The
58 // `c_buffer` pointer will be deallocated when `my_c_string` goes out of scope.
59 let my_c_string = my_string.to_c_str();
61 puts(my_c_string.as_ptr());
64 // Don't save/return the pointer to the C string, the `c_buffer` will be
65 // deallocated when this block returns!
66 my_string.with_c_str(|c_buffer| {
67 unsafe { puts(c_buffer); }
74 use collections::string::String;
75 use collections::hash;
77 use core::kinds::{Sized, marker};
79 use core::prelude::{Clone, Drop, Eq, Iterator};
80 use core::prelude::{SlicePrelude, None, Option, Ordering, PartialEq};
81 use core::prelude::{PartialOrd, RawPtr, Some, StrPrelude, range};
88 /// The representation of a C String.
90 /// This structure wraps a `*libc::c_char`, and will automatically free the
91 /// memory it is pointing to when it goes out of scope.
93 buf: *const libc::c_char,
97 impl Clone for CString {
98 /// Clone this CString into a new, uniquely owned CString. For safety
99 /// reasons, this is always a deep clone with the memory allocated
100 /// with C's allocator API, rather than the usual shallow clone.
101 fn clone(&self) -> CString {
102 let len = self.len() + 1;
103 let buf = unsafe { libc::malloc(len as libc::size_t) } as *mut libc::c_char;
104 if buf.is_null() { ::alloc::oom() }
105 unsafe { ptr::copy_nonoverlapping_memory(buf, self.buf, len); }
106 CString { buf: buf as *const libc::c_char, owns_buffer_: true }
110 impl PartialEq for CString {
111 fn eq(&self, other: &CString) -> bool {
112 // Check if the two strings share the same buffer
113 if self.buf as uint == other.buf as uint {
117 libc::strcmp(self.buf, other.buf) == 0
123 impl PartialOrd for CString {
124 // NOTE(stage0): remove method after a snapshot
127 fn partial_cmp(&self, other: &CString) -> Option<Ordering> {
128 self.as_bytes().partial_cmp(&other.as_bytes())
130 #[cfg(not(stage0))] // NOTE(stage0): remove cfg after a snapshot
132 fn partial_cmp(&self, other: &CString) -> Option<Ordering> {
133 self.as_bytes().partial_cmp(other.as_bytes())
137 impl Eq for CString {}
139 impl<S: hash::Writer> hash::Hash<S> for CString {
141 fn hash(&self, state: &mut S) {
142 self.as_bytes().hash(state)
147 /// Create a C String from a pointer, with memory managed by C's allocator
148 /// API, so avoid calling it with a pointer to memory managed by Rust's
149 /// allocator API, as the behaviour would not be well defined.
153 /// Fails if `buf` is null
154 pub unsafe fn new(buf: *const libc::c_char, owns_buffer: bool) -> CString {
155 assert!(!buf.is_null());
156 CString { buf: buf, owns_buffer_: owns_buffer }
159 /// Return a pointer to the NUL-terminated string data.
161 /// `.as_ptr` returns an internal pointer into the `CString`, and
162 /// may be invalidated when the `CString` falls out of scope (the
163 /// destructor will run, freeing the allocation if there is
167 /// let foo = "some string";
170 /// let x = foo.to_c_str();
171 /// let p = x.as_ptr();
173 /// // wrong (the CString will be freed, invalidating `p`)
174 /// let p = foo.to_c_str().as_ptr();
180 /// extern crate libc;
183 /// let c_str = "foo bar".to_c_str();
185 /// libc::puts(c_str.as_ptr());
189 pub fn as_ptr(&self) -> *const libc::c_char {
193 /// Return a mutable pointer to the NUL-terminated string data.
195 /// `.as_mut_ptr` returns an internal pointer into the `CString`, and
196 /// may be invalidated when the `CString` falls out of scope (the
197 /// destructor will run, freeing the allocation if there is
201 /// let foo = "some string";
204 /// let mut x = foo.to_c_str();
205 /// let p = x.as_mut_ptr();
207 /// // wrong (the CString will be freed, invalidating `p`)
208 /// let p = foo.to_c_str().as_mut_ptr();
210 pub fn as_mut_ptr(&mut self) -> *mut libc::c_char {
214 /// Returns whether or not the `CString` owns the buffer.
215 pub fn owns_buffer(&self) -> bool {
219 /// Converts the CString into a `&[u8]` without copying.
220 /// Includes the terminating NUL byte.
222 pub fn as_bytes<'a>(&'a self) -> &'a [u8] {
224 mem::transmute(Slice { data: self.buf, len: self.len() + 1 })
228 /// Converts the CString into a `&[u8]` without copying.
229 /// Does not include the terminating NUL byte.
231 pub fn as_bytes_no_nul<'a>(&'a self) -> &'a [u8] {
233 mem::transmute(Slice { data: self.buf, len: self.len() })
237 /// Converts the CString into a `&str` without copying.
238 /// Returns None if the CString is not UTF-8.
240 pub fn as_str<'a>(&'a self) -> Option<&'a str> {
241 let buf = self.as_bytes_no_nul();
245 /// Return a CString iterator.
246 pub fn iter<'a>(&'a self) -> CChars<'a> {
249 marker: marker::ContravariantLifetime,
253 /// Unwraps the wrapped `*libc::c_char` from the `CString` wrapper.
255 /// Any ownership of the buffer by the `CString` wrapper is
256 /// forgotten, meaning that the backing allocation of this
257 /// `CString` is not automatically freed if it owns the
258 /// allocation. In this case, a user of `.unwrap()` should ensure
259 /// the allocation is freed, to avoid leaking memory. You should
260 /// use libc's memory allocator in this case.
262 /// Prefer `.as_ptr()` when just retrieving a pointer to the
263 /// string data, as that does not relinquish ownership.
264 pub unsafe fn unwrap(mut self) -> *const libc::c_char {
265 self.owns_buffer_ = false;
269 /// Return the number of bytes in the CString (not including the NUL
272 pub fn len(&self) -> uint {
273 unsafe { libc::strlen(self.buf) as uint }
276 /// Returns if there are no bytes in this string
278 pub fn is_empty(&self) -> bool { self.len() == 0 }
281 impl Drop for CString {
283 if self.owns_buffer_ {
285 libc::free(self.buf as *mut libc::c_void)
291 impl fmt::Show for CString {
292 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
293 String::from_utf8_lossy(self.as_bytes_no_nul()).fmt(f)
297 /// A generic trait for converting a value to a CString.
298 pub trait ToCStr for Sized? {
299 /// Copy the receiver into a CString.
303 /// Fails the task if the receiver has an interior null.
304 fn to_c_str(&self) -> CString;
306 /// Unsafe variant of `to_c_str()` that doesn't check for nulls.
307 unsafe fn to_c_str_unchecked(&self) -> CString;
309 /// Work with a temporary CString constructed from the receiver.
310 /// The provided `*libc::c_char` will be freed immediately upon return.
315 /// extern crate libc;
318 /// let s = "PATH".with_c_str(|path| unsafe {
319 /// libc::getenv(path)
326 /// Fails the task if the receiver has an interior null.
328 fn with_c_str<T>(&self, f: |*const libc::c_char| -> T) -> T {
329 let c_str = self.to_c_str();
333 /// Unsafe variant of `with_c_str()` that doesn't check for nulls.
335 unsafe fn with_c_str_unchecked<T>(&self, f: |*const libc::c_char| -> T) -> T {
336 let c_str = self.to_c_str_unchecked();
341 impl ToCStr for str {
343 fn to_c_str(&self) -> CString {
344 self.as_bytes().to_c_str()
348 unsafe fn to_c_str_unchecked(&self) -> CString {
349 self.as_bytes().to_c_str_unchecked()
353 fn with_c_str<T>(&self, f: |*const libc::c_char| -> T) -> T {
354 self.as_bytes().with_c_str(f)
358 unsafe fn with_c_str_unchecked<T>(&self, f: |*const libc::c_char| -> T) -> T {
359 self.as_bytes().with_c_str_unchecked(f)
363 impl ToCStr for String {
365 fn to_c_str(&self) -> CString {
366 self.as_bytes().to_c_str()
370 unsafe fn to_c_str_unchecked(&self) -> CString {
371 self.as_bytes().to_c_str_unchecked()
375 fn with_c_str<T>(&self, f: |*const libc::c_char| -> T) -> T {
376 self.as_bytes().with_c_str(f)
380 unsafe fn with_c_str_unchecked<T>(&self, f: |*const libc::c_char| -> T) -> T {
381 self.as_bytes().with_c_str_unchecked(f)
385 // The length of the stack allocated buffer for `vec.with_c_str()`
386 const BUF_LEN: uint = 128;
388 impl ToCStr for [u8] {
389 fn to_c_str(&self) -> CString {
390 let mut cs = unsafe { self.to_c_str_unchecked() };
391 check_for_null(self, cs.as_mut_ptr());
395 unsafe fn to_c_str_unchecked(&self) -> CString {
396 let self_len = self.len();
397 let buf = libc::malloc(self_len as libc::size_t + 1) as *mut u8;
398 if buf.is_null() { ::alloc::oom() }
400 ptr::copy_memory(buf, self.as_ptr(), self_len);
401 *buf.offset(self_len as int) = 0;
403 CString::new(buf as *const libc::c_char, true)
406 fn with_c_str<T>(&self, f: |*const libc::c_char| -> T) -> T {
407 unsafe { with_c_str(self, true, f) }
410 unsafe fn with_c_str_unchecked<T>(&self, f: |*const libc::c_char| -> T) -> T {
411 with_c_str(self, false, f)
415 impl<'a, Sized? T: ToCStr> ToCStr for &'a T {
417 fn to_c_str(&self) -> CString {
422 unsafe fn to_c_str_unchecked(&self) -> CString {
423 (**self).to_c_str_unchecked()
427 fn with_c_str<T>(&self, f: |*const libc::c_char| -> T) -> T {
428 (**self).with_c_str(f)
432 unsafe fn with_c_str_unchecked<T>(&self, f: |*const libc::c_char| -> T) -> T {
433 (**self).with_c_str_unchecked(f)
437 // Unsafe function that handles possibly copying the &[u8] into a stack array.
438 unsafe fn with_c_str<T>(v: &[u8], checked: bool,
439 f: |*const libc::c_char| -> T) -> T {
440 let c_str = if v.len() < BUF_LEN {
441 let mut buf: [u8, .. BUF_LEN] = mem::uninitialized();
442 slice::bytes::copy_memory(buf, v);
445 let buf = buf.as_mut_ptr();
447 check_for_null(v, buf as *mut libc::c_char);
450 return f(buf as *const libc::c_char)
454 v.to_c_str_unchecked()
461 fn check_for_null(v: &[u8], buf: *mut libc::c_char) {
462 for i in range(0, v.len()) {
464 let p = buf.offset(i as int);
470 /// External iterator for a CString's bytes.
472 /// Use with the `std::iter` module.
473 pub struct CChars<'a> {
474 ptr: *const libc::c_char,
475 marker: marker::ContravariantLifetime<'a>,
478 impl<'a> Iterator<libc::c_char> for CChars<'a> {
479 fn next(&mut self) -> Option<libc::c_char> {
480 let ch = unsafe { *self.ptr };
484 self.ptr = unsafe { self.ptr.offset(1) };
490 /// Parses a C "multistring", eg windows env values or
491 /// the req->ptr result in a uv_fs_readdir() call.
493 /// Optionally, a `count` can be passed in, limiting the
494 /// parsing to only being done `count`-times.
496 /// The specified closure is invoked with each string that
497 /// is found, and the number of strings found is returned.
498 pub unsafe fn from_c_multistring(buf: *const libc::c_char,
500 f: |&CString|) -> uint {
502 let mut curr_ptr: uint = buf as uint;
504 let (limited_count, limit) = match count {
505 Some(limit) => (true, limit),
508 while ((limited_count && ctr < limit) || !limited_count)
509 && *(curr_ptr as *const libc::c_char) != 0 as libc::c_char {
510 let cstr = CString::new(curr_ptr as *const libc::c_char, false);
512 curr_ptr += cstr.len() + 1;
528 fn test_str_multistring_parsing() {
530 let input = b"zero\0one\0\0";
531 let ptr = input.as_ptr();
532 let expected = ["zero", "one"];
533 let mut it = expected.iter();
534 let result = from_c_multistring(ptr as *const libc::c_char, None, |c| {
535 let cbytes = c.as_bytes_no_nul();
536 assert_eq!(cbytes, it.next().unwrap().as_bytes());
538 assert_eq!(result, 2);
539 assert!(it.next().is_none());
544 fn test_str_to_c_str() {
545 let c_str = "".to_c_str();
547 assert_eq!(*c_str.as_ptr().offset(0), 0);
550 let c_str = "hello".to_c_str();
551 let buf = c_str.as_ptr();
553 assert_eq!(*buf.offset(0), 'h' as libc::c_char);
554 assert_eq!(*buf.offset(1), 'e' as libc::c_char);
555 assert_eq!(*buf.offset(2), 'l' as libc::c_char);
556 assert_eq!(*buf.offset(3), 'l' as libc::c_char);
557 assert_eq!(*buf.offset(4), 'o' as libc::c_char);
558 assert_eq!(*buf.offset(5), 0);
563 fn test_vec_to_c_str() {
565 let c_str = b.to_c_str();
567 assert_eq!(*c_str.as_ptr().offset(0), 0);
570 let c_str = b"hello".to_c_str();
571 let buf = c_str.as_ptr();
573 assert_eq!(*buf.offset(0), 'h' as libc::c_char);
574 assert_eq!(*buf.offset(1), 'e' as libc::c_char);
575 assert_eq!(*buf.offset(2), 'l' as libc::c_char);
576 assert_eq!(*buf.offset(3), 'l' as libc::c_char);
577 assert_eq!(*buf.offset(4), 'o' as libc::c_char);
578 assert_eq!(*buf.offset(5), 0);
581 let c_str = b"foo\xFF".to_c_str();
582 let buf = c_str.as_ptr();
584 assert_eq!(*buf.offset(0), 'f' as libc::c_char);
585 assert_eq!(*buf.offset(1), 'o' as libc::c_char);
586 assert_eq!(*buf.offset(2), 'o' as libc::c_char);
587 assert_eq!(*buf.offset(3), 0xffu8 as i8);
588 assert_eq!(*buf.offset(4), 0);
594 let c_str = "hello".to_c_str();
595 unsafe { libc::free(c_str.unwrap() as *mut libc::c_void) }
600 let c_str = "hello".to_c_str();
601 let len = unsafe { libc::strlen(c_str.as_ptr()) };
607 let c_str = "".to_c_str();
608 let mut iter = c_str.iter();
609 assert_eq!(iter.next(), None);
611 let c_str = "hello".to_c_str();
612 let mut iter = c_str.iter();
613 assert_eq!(iter.next(), Some('h' as libc::c_char));
614 assert_eq!(iter.next(), Some('e' as libc::c_char));
615 assert_eq!(iter.next(), Some('l' as libc::c_char));
616 assert_eq!(iter.next(), Some('l' as libc::c_char));
617 assert_eq!(iter.next(), Some('o' as libc::c_char));
618 assert_eq!(iter.next(), None);
622 fn test_to_c_str_fail() {
623 assert!(task::try(proc() { "he\x00llo".to_c_str() }).is_err());
627 fn test_to_c_str_unchecked() {
629 let c_string = "he\x00llo".to_c_str_unchecked();
630 let buf = c_string.as_ptr();
631 assert_eq!(*buf.offset(0), 'h' as libc::c_char);
632 assert_eq!(*buf.offset(1), 'e' as libc::c_char);
633 assert_eq!(*buf.offset(2), 0);
634 assert_eq!(*buf.offset(3), 'l' as libc::c_char);
635 assert_eq!(*buf.offset(4), 'l' as libc::c_char);
636 assert_eq!(*buf.offset(5), 'o' as libc::c_char);
637 assert_eq!(*buf.offset(6), 0);
643 let c_str = "hello".to_c_str();
644 assert_eq!(c_str.as_bytes(), b"hello\0");
645 let c_str = "".to_c_str();
646 assert_eq!(c_str.as_bytes(), b"\0");
647 let c_str = b"foo\xFF".to_c_str();
648 assert_eq!(c_str.as_bytes(), b"foo\xFF\0");
652 fn test_as_bytes_no_nul() {
653 let c_str = "hello".to_c_str();
654 assert_eq!(c_str.as_bytes_no_nul(), b"hello");
655 let c_str = "".to_c_str();
657 assert_eq!(c_str.as_bytes_no_nul(), exp);
658 let c_str = b"foo\xFF".to_c_str();
659 assert_eq!(c_str.as_bytes_no_nul(), b"foo\xFF");
664 let c_str = "hello".to_c_str();
665 assert_eq!(c_str.as_str(), Some("hello"));
666 let c_str = "".to_c_str();
667 assert_eq!(c_str.as_str(), Some(""));
668 let c_str = b"foo\xFF".to_c_str();
669 assert_eq!(c_str.as_str(), None);
675 let _c_str = unsafe { CString::new(ptr::null(), false) };
680 let a = "hello".to_c_str();
686 fn test_clone_noleak() {
687 fn foo(f: |c: &CString|) {
688 let s = "test".to_string();
689 let c = s.to_c_str();
690 // give the closure a non-owned CString
691 let mut c_ = unsafe { CString::new(c.as_ptr(), false) };
693 // muck with the buffer for later printing
694 unsafe { *c_.as_mut_ptr() = 'X' as libc::c_char }
697 let mut c_: Option<CString> = None;
699 c_ = Some(c.clone());
701 // force a copy, reading the memory
702 c.as_bytes().to_vec();
704 let c_ = c_.unwrap();
705 // force a copy, reading the memory
706 c_.as_bytes().to_vec();
717 fn check(s: &str, c_str: *const libc::c_char) {
718 let s_buf = s.as_ptr();
719 for i in range(0, s.len()) {
722 *s_buf.offset(i as int) as libc::c_char,
723 *c_str.offset(i as int));
728 static S_SHORT: &'static str = "Mary";
729 static S_MEDIUM: &'static str = "Mary had a little lamb";
730 static S_LONG: &'static str = "\
731 Mary had a little lamb, Little lamb
732 Mary had a little lamb, Little lamb
733 Mary had a little lamb, Little lamb
734 Mary had a little lamb, Little lamb
735 Mary had a little lamb, Little lamb
736 Mary had a little lamb, Little lamb";
738 fn bench_to_string(b: &mut Bencher, s: &str) {
740 let c_str = s.to_c_str();
741 check(s, c_str.as_ptr());
746 fn bench_to_c_str_short(b: &mut Bencher) {
747 bench_to_string(b, S_SHORT)
751 fn bench_to_c_str_medium(b: &mut Bencher) {
752 bench_to_string(b, S_MEDIUM)
756 fn bench_to_c_str_long(b: &mut Bencher) {
757 bench_to_string(b, S_LONG)
760 fn bench_to_c_str_unchecked(b: &mut Bencher, s: &str) {
762 let c_str = unsafe { s.to_c_str_unchecked() };
763 check(s, c_str.as_ptr())
768 fn bench_to_c_str_unchecked_short(b: &mut Bencher) {
769 bench_to_c_str_unchecked(b, S_SHORT)
773 fn bench_to_c_str_unchecked_medium(b: &mut Bencher) {
774 bench_to_c_str_unchecked(b, S_MEDIUM)
778 fn bench_to_c_str_unchecked_long(b: &mut Bencher) {
779 bench_to_c_str_unchecked(b, S_LONG)
782 fn bench_with_c_str(b: &mut Bencher, s: &str) {
784 s.with_c_str(|c_str_buf| check(s, c_str_buf))
789 fn bench_with_c_str_short(b: &mut Bencher) {
790 bench_with_c_str(b, S_SHORT)
794 fn bench_with_c_str_medium(b: &mut Bencher) {
795 bench_with_c_str(b, S_MEDIUM)
799 fn bench_with_c_str_long(b: &mut Bencher) {
800 bench_with_c_str(b, S_LONG)
803 fn bench_with_c_str_unchecked(b: &mut Bencher, s: &str) {
806 s.with_c_str_unchecked(|c_str_buf| check(s, c_str_buf))
812 fn bench_with_c_str_unchecked_short(b: &mut Bencher) {
813 bench_with_c_str_unchecked(b, S_SHORT)
817 fn bench_with_c_str_unchecked_medium(b: &mut Bencher) {
818 bench_with_c_str_unchecked(b, S_MEDIUM)
822 fn bench_with_c_str_unchecked_long(b: &mut Bencher) {
823 bench_with_c_str_unchecked(b, S_LONG)