1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 C-string manipulation and management
15 This modules provides the basic methods for creating and manipulating
16 null-terminated strings for use with FFI calls (back to C). Most C APIs require
17 that the string being passed to them is null-terminated, and by default rust's
18 string types are *not* null terminated.
20 The other problem with translating Rust strings to C strings is that Rust
21 strings can validly contain a null-byte in the middle of the string (0 is a
22 valid unicode codepoint). This means that not all Rust strings can actually be
23 translated to C strings.
25 # Creation of a C string
27 A C string is managed through the `CString` type defined in this module. It
28 "owns" the internal buffer of characters and will automatically deallocate the
29 buffer when the string is dropped. The `ToCStr` trait is implemented for `&str`
30 and `&[u8]`, but the conversions can fail due to some of the limitations
33 This also means that currently whenever a C string is created, an allocation
34 must be performed to place the data elsewhere (the lifetime of the C string is
35 not tied to the lifetime of the original string/data buffer). If C strings are
36 heavily used in applications, then caching may be advisable to prevent
37 unnecessary amounts of allocations.
39 An example of creating and using a C string would be:
44 fn puts(s: *libc::c_char);
47 let my_string = "Hello, world!";
49 // Allocate the C string with an explicit local that owns the string. The
50 // `c_buffer` pointer will be deallocated when `my_c_string` goes out of scope.
51 let my_c_string = my_string.to_c_str();
52 my_c_string.with_ref(|c_buffer| {
53 unsafe { puts(c_buffer); }
56 // Don't save off the allocation of the C string, the `c_buffer` will be
57 // deallocated when this block returns!
58 my_string.with_c_str(|c_buffer| {
59 unsafe { puts(c_buffer); }
66 use container::Container;
67 use iter::{Iterator, range};
70 use option::{Option, Some, None};
75 use vec::{CopyableVector, ImmutableVector, MutableVector};
77 use unstable::intrinsics;
79 /// Resolution options for the `null_byte` condition
80 pub enum NullByteResolution {
81 /// Truncate at the null byte
83 /// Use a replacement byte
84 ReplaceWith(libc::c_char)
88 // This should be &[u8] but there's a lifetime issue (#5370).
89 pub null_byte: (~[u8]) -> NullByteResolution;
92 /// The representation of a C String.
94 /// This structure wraps a `*libc::c_char`, and will automatically free the
95 /// memory it is pointing to when it goes out of scope.
97 priv buf: *libc::c_char,
98 priv owns_buffer_: bool,
102 /// Create a C String from a pointer.
103 pub unsafe fn new(buf: *libc::c_char, owns_buffer: bool) -> CString {
104 CString { buf: buf, owns_buffer_: owns_buffer }
107 /// Unwraps the wrapped `*libc::c_char` from the `CString` wrapper.
108 /// Any ownership of the buffer by the `CString` wrapper is forgotten.
109 pub unsafe fn unwrap(self) -> *libc::c_char {
110 let mut c_str = self;
111 c_str.owns_buffer_ = false;
115 /// Calls a closure with a reference to the underlying `*libc::c_char`.
119 /// Fails if the CString is null.
120 pub fn with_ref<T>(&self, f: |*libc::c_char| -> T) -> T {
121 if self.buf.is_null() { fail!("CString is null!"); }
125 /// Calls a closure with a mutable reference to the underlying `*libc::c_char`.
129 /// Fails if the CString is null.
130 pub fn with_mut_ref<T>(&mut self, f: |*mut libc::c_char| -> T) -> T {
131 if self.buf.is_null() { fail!("CString is null!"); }
132 f(unsafe { cast::transmute_mut_unsafe(self.buf) })
135 /// Returns true if the CString is a null.
136 pub fn is_null(&self) -> bool {
140 /// Returns true if the CString is not null.
141 pub fn is_not_null(&self) -> bool {
142 self.buf.is_not_null()
145 /// Returns whether or not the `CString` owns the buffer.
146 pub fn owns_buffer(&self) -> bool {
150 /// Converts the CString into a `&[u8]` without copying.
154 /// Fails if the CString is null.
156 pub fn as_bytes<'a>(&'a self) -> &'a [u8] {
157 if self.buf.is_null() { fail!("CString is null!"); }
159 cast::transmute((self.buf, self.len() + 1))
163 /// Converts the CString into a `&str` without copying.
164 /// Returns None if the CString is not UTF-8 or is null.
166 pub fn as_str<'a>(&'a self) -> Option<&'a str> {
167 if self.buf.is_null() { return None; }
168 let buf = self.as_bytes();
169 let buf = buf.slice_to(buf.len()-1); // chop off the trailing NUL
170 str::from_utf8_slice_opt(buf)
173 /// Return a CString iterator.
174 pub fn iter<'a>(&'a self) -> CStringIterator<'a> {
177 lifetime: unsafe { cast::transmute(self.buf) },
182 impl Drop for CString {
184 if self.owns_buffer_ {
186 libc::free(self.buf as *libc::c_void)
192 impl Container for CString {
194 fn len(&self) -> uint {
196 ptr::position(self.buf, |c| *c == 0)
201 /// A generic trait for converting a value to a CString.
203 /// Copy the receiver into a CString.
207 /// Raises the `null_byte` condition if the receiver has an interior null.
208 fn to_c_str(&self) -> CString;
210 /// Unsafe variant of `to_c_str()` that doesn't check for nulls.
211 unsafe fn to_c_str_unchecked(&self) -> CString;
213 /// Work with a temporary CString constructed from the receiver.
214 /// The provided `*libc::c_char` will be freed immediately upon return.
219 /// let s = "PATH".with_c_str(|path| libc::getenv(path))
224 /// Raises the `null_byte` condition if the receiver has an interior null.
226 fn with_c_str<T>(&self, f: |*libc::c_char| -> T) -> T {
227 self.to_c_str().with_ref(f)
230 /// Unsafe variant of `with_c_str()` that doesn't check for nulls.
232 unsafe fn with_c_str_unchecked<T>(&self, f: |*libc::c_char| -> T) -> T {
233 self.to_c_str_unchecked().with_ref(f)
237 impl<'self> ToCStr for &'self str {
239 fn to_c_str(&self) -> CString {
240 self.as_bytes().to_c_str()
244 unsafe fn to_c_str_unchecked(&self) -> CString {
245 self.as_bytes().to_c_str_unchecked()
249 fn with_c_str<T>(&self, f: |*libc::c_char| -> T) -> T {
250 self.as_bytes().with_c_str(f)
254 unsafe fn with_c_str_unchecked<T>(&self, f: |*libc::c_char| -> T) -> T {
255 self.as_bytes().with_c_str_unchecked(f)
259 // The length of the stack allocated buffer for `vec.with_c_str()`
260 static BUF_LEN: uint = 128;
262 impl<'self> ToCStr for &'self [u8] {
263 fn to_c_str(&self) -> CString {
264 let mut cs = unsafe { self.to_c_str_unchecked() };
265 cs.with_mut_ref(|buf| check_for_null(*self, buf));
269 unsafe fn to_c_str_unchecked(&self) -> CString {
270 self.as_imm_buf(|self_buf, self_len| {
271 let buf = libc::malloc(self_len as libc::size_t + 1) as *mut u8;
273 fail!("failed to allocate memory!");
276 ptr::copy_memory(buf, self_buf, self_len);
277 *ptr::mut_offset(buf, self_len as int) = 0;
279 CString::new(buf as *libc::c_char, true)
283 fn with_c_str<T>(&self, f: |*libc::c_char| -> T) -> T {
284 unsafe { with_c_str(*self, true, f) }
287 unsafe fn with_c_str_unchecked<T>(&self, f: |*libc::c_char| -> T) -> T {
288 with_c_str(*self, false, f)
292 // Unsafe function that handles possibly copying the &[u8] into a stack array.
293 unsafe fn with_c_str<T>(v: &[u8], checked: bool, f: |*libc::c_char| -> T) -> T {
294 if v.len() < BUF_LEN {
295 let mut buf: [u8, .. BUF_LEN] = intrinsics::uninit();
296 vec::bytes::copy_memory(buf, v, v.len());
299 buf.as_mut_buf(|buf, _| {
301 check_for_null(v, buf as *mut libc::c_char);
304 f(buf as *libc::c_char)
307 v.to_c_str().with_ref(f)
309 v.to_c_str_unchecked().with_ref(f)
314 fn check_for_null(v: &[u8], buf: *mut libc::c_char) {
315 for i in range(0, v.len()) {
317 let p = buf.offset(i as int);
319 match null_byte::cond.raise(v.to_owned()) {
321 ReplaceWith(c) => *p = c
328 /// External iterator for a CString's bytes.
330 /// Use with the `std::iter` module.
331 pub struct CStringIterator<'self> {
332 priv ptr: *libc::c_char,
333 priv lifetime: &'self libc::c_char, // FIXME: #5922
336 impl<'self> Iterator<libc::c_char> for CStringIterator<'self> {
337 fn next(&mut self) -> Option<libc::c_char> {
338 let ch = unsafe { *self.ptr };
342 self.ptr = unsafe { ptr::offset(self.ptr, 1) };
348 /// Parses a C "multistring", eg windows env values or
349 /// the req->ptr result in a uv_fs_readdir() call.
351 /// Optionally, a `count` can be passed in, limiting the
352 /// parsing to only being done `count`-times.
354 /// The specified closure is invoked with each string that
355 /// is found, and the number of strings found is returned.
356 pub unsafe fn from_c_multistring(buf: *libc::c_char,
358 f: |&CString|) -> uint {
360 let mut curr_ptr: uint = buf as uint;
362 let (limited_count, limit) = match count {
363 Some(limit) => (true, limit),
366 while ((limited_count && ctr < limit) || !limited_count)
367 && *(curr_ptr as *libc::c_char) != 0 as libc::c_char {
368 let cstr = CString::new(curr_ptr as *libc::c_char, false);
370 curr_ptr += cstr.len() + 1;
381 use option::{Some, None};
385 fn test_str_multistring_parsing() {
387 let input = bytes!("zero", "\x00", "one", "\x00", "\x00");
388 let ptr = vec::raw::to_ptr(input);
389 let expected = ["zero", "one"];
390 let mut it = expected.iter();
391 let result = from_c_multistring(ptr as *libc::c_char, None, |c| {
392 let cbytes = c.as_bytes().slice_to(c.len());
393 assert_eq!(cbytes, it.next().unwrap().as_bytes());
395 assert_eq!(result, 2);
396 assert!(it.next().is_none());
401 fn test_str_to_c_str() {
402 "".to_c_str().with_ref(|buf| {
404 assert_eq!(*ptr::offset(buf, 0), 0);
408 "hello".to_c_str().with_ref(|buf| {
410 assert_eq!(*ptr::offset(buf, 0), 'h' as libc::c_char);
411 assert_eq!(*ptr::offset(buf, 1), 'e' as libc::c_char);
412 assert_eq!(*ptr::offset(buf, 2), 'l' as libc::c_char);
413 assert_eq!(*ptr::offset(buf, 3), 'l' as libc::c_char);
414 assert_eq!(*ptr::offset(buf, 4), 'o' as libc::c_char);
415 assert_eq!(*ptr::offset(buf, 5), 0);
421 fn test_vec_to_c_str() {
423 b.to_c_str().with_ref(|buf| {
425 assert_eq!(*ptr::offset(buf, 0), 0);
429 let _ = bytes!("hello").to_c_str().with_ref(|buf| {
431 assert_eq!(*ptr::offset(buf, 0), 'h' as libc::c_char);
432 assert_eq!(*ptr::offset(buf, 1), 'e' as libc::c_char);
433 assert_eq!(*ptr::offset(buf, 2), 'l' as libc::c_char);
434 assert_eq!(*ptr::offset(buf, 3), 'l' as libc::c_char);
435 assert_eq!(*ptr::offset(buf, 4), 'o' as libc::c_char);
436 assert_eq!(*ptr::offset(buf, 5), 0);
440 let _ = bytes!("foo", 0xff).to_c_str().with_ref(|buf| {
442 assert_eq!(*ptr::offset(buf, 0), 'f' as libc::c_char);
443 assert_eq!(*ptr::offset(buf, 1), 'o' as libc::c_char);
444 assert_eq!(*ptr::offset(buf, 2), 'o' as libc::c_char);
445 assert_eq!(*ptr::offset(buf, 3), 0xff);
446 assert_eq!(*ptr::offset(buf, 4), 0);
453 let c_str = unsafe { CString::new(ptr::null(), false) };
454 assert!(c_str.is_null());
455 assert!(!c_str.is_not_null());
460 let c_str = "hello".to_c_str();
461 unsafe { libc::free(c_str.unwrap() as *libc::c_void) }
466 let c_str = "hello".to_c_str();
467 let len = unsafe { c_str.with_ref(|buf| libc::strlen(buf)) };
468 assert!(!c_str.is_null());
469 assert!(c_str.is_not_null());
475 fn test_with_ref_empty_fail() {
476 let c_str = unsafe { CString::new(ptr::null(), false) };
477 c_str.with_ref(|_| ());
482 let c_str = "".to_c_str();
483 let mut iter = c_str.iter();
484 assert_eq!(iter.next(), None);
486 let c_str = "hello".to_c_str();
487 let mut iter = c_str.iter();
488 assert_eq!(iter.next(), Some('h' as libc::c_char));
489 assert_eq!(iter.next(), Some('e' as libc::c_char));
490 assert_eq!(iter.next(), Some('l' as libc::c_char));
491 assert_eq!(iter.next(), Some('l' as libc::c_char));
492 assert_eq!(iter.next(), Some('o' as libc::c_char));
493 assert_eq!(iter.next(), None);
497 fn test_to_c_str_fail() {
498 use c_str::null_byte::cond;
500 let mut error_happened = false;
502 assert_eq!(err, bytes!("he", 0, "llo").to_owned())
503 error_happened = true;
505 }).inside(|| "he\x00llo".to_c_str());
506 assert!(error_happened);
509 ReplaceWith('?' as libc::c_char)
510 }).inside(|| "he\x00llo".to_c_str()).with_ref(|buf| {
512 assert_eq!(*buf.offset(0), 'h' as libc::c_char);
513 assert_eq!(*buf.offset(1), 'e' as libc::c_char);
514 assert_eq!(*buf.offset(2), '?' as libc::c_char);
515 assert_eq!(*buf.offset(3), 'l' as libc::c_char);
516 assert_eq!(*buf.offset(4), 'l' as libc::c_char);
517 assert_eq!(*buf.offset(5), 'o' as libc::c_char);
518 assert_eq!(*buf.offset(6), 0);
524 fn test_to_c_str_unchecked() {
526 "he\x00llo".to_c_str_unchecked().with_ref(|buf| {
527 assert_eq!(*buf.offset(0), 'h' as libc::c_char);
528 assert_eq!(*buf.offset(1), 'e' as libc::c_char);
529 assert_eq!(*buf.offset(2), 0);
530 assert_eq!(*buf.offset(3), 'l' as libc::c_char);
531 assert_eq!(*buf.offset(4), 'l' as libc::c_char);
532 assert_eq!(*buf.offset(5), 'o' as libc::c_char);
533 assert_eq!(*buf.offset(6), 0);
540 let c_str = "hello".to_c_str();
541 assert_eq!(c_str.as_bytes(), bytes!("hello", 0));
542 let c_str = "".to_c_str();
543 assert_eq!(c_str.as_bytes(), bytes!(0));
544 let c_str = bytes!("foo", 0xff).to_c_str();
545 assert_eq!(c_str.as_bytes(), bytes!("foo", 0xff, 0));
550 fn test_as_bytes_fail() {
551 let c_str = unsafe { CString::new(ptr::null(), false) };
557 let c_str = "hello".to_c_str();
558 assert_eq!(c_str.as_str(), Some("hello"));
559 let c_str = "".to_c_str();
560 assert_eq!(c_str.as_str(), Some(""));
561 let c_str = bytes!("foo", 0xff).to_c_str();
562 assert_eq!(c_str.as_str(), None);
563 let c_str = unsafe { CString::new(ptr::null(), false) };
564 assert_eq!(c_str.as_str(), None);
574 use extra::test::BenchHarness;
577 fn check(s: &str, c_str: *libc::c_char) {
578 s.as_imm_buf(|s_buf, s_len| {
579 for i in range(0, s_len) {
582 *ptr::offset(s_buf, i as int) as libc::c_char,
583 *ptr::offset(c_str, i as int));
589 static s_short: &'static str = "Mary";
590 static s_medium: &'static str = "Mary had a little lamb";
591 static s_long: &'static str = "\
592 Mary had a little lamb, Little lamb
593 Mary had a little lamb, Little lamb
594 Mary had a little lamb, Little lamb
595 Mary had a little lamb, Little lamb
596 Mary had a little lamb, Little lamb
597 Mary had a little lamb, Little lamb";
599 fn bench_to_str(bh: &mut BenchHarness, s: &str) {
601 let c_str = s.to_c_str();
602 c_str.with_ref(|c_str_buf| check(s, c_str_buf))
607 fn bench_to_c_str_short(bh: &mut BenchHarness) {
608 bench_to_str(bh, s_short)
612 fn bench_to_c_str_medium(bh: &mut BenchHarness) {
613 bench_to_str(bh, s_medium)
617 fn bench_to_c_str_long(bh: &mut BenchHarness) {
618 bench_to_str(bh, s_long)
621 fn bench_to_c_str_unchecked(bh: &mut BenchHarness, s: &str) {
623 let c_str = unsafe { s.to_c_str_unchecked() };
624 c_str.with_ref(|c_str_buf| check(s, c_str_buf))
629 fn bench_to_c_str_unchecked_short(bh: &mut BenchHarness) {
630 bench_to_c_str_unchecked(bh, s_short)
634 fn bench_to_c_str_unchecked_medium(bh: &mut BenchHarness) {
635 bench_to_c_str_unchecked(bh, s_medium)
639 fn bench_to_c_str_unchecked_long(bh: &mut BenchHarness) {
640 bench_to_c_str_unchecked(bh, s_long)
643 fn bench_with_c_str(bh: &mut BenchHarness, s: &str) {
645 s.with_c_str(|c_str_buf| check(s, c_str_buf))
650 fn bench_with_c_str_short(bh: &mut BenchHarness) {
651 bench_with_c_str(bh, s_short)
655 fn bench_with_c_str_medium(bh: &mut BenchHarness) {
656 bench_with_c_str(bh, s_medium)
660 fn bench_with_c_str_long(bh: &mut BenchHarness) {
661 bench_with_c_str(bh, s_long)
664 fn bench_with_c_str_unchecked(bh: &mut BenchHarness, s: &str) {
667 s.with_c_str_unchecked(|c_str_buf| check(s, c_str_buf))
673 fn bench_with_c_str_unchecked_short(bh: &mut BenchHarness) {
674 bench_with_c_str_unchecked(bh, s_short)
678 fn bench_with_c_str_unchecked_medium(bh: &mut BenchHarness) {
679 bench_with_c_str_unchecked(bh, s_medium)
683 fn bench_with_c_str_unchecked_long(bh: &mut BenchHarness) {
684 bench_with_c_str_unchecked(bh, s_long)