library/std/src/sys/windows/stdio.rs

   1 #![unstable(issue = "none", feature = "windows_stdio")]
   2
   3 use crate::char::decode_utf16;
   4 use crate::cmp;
   5 use crate::io;
   6 use crate::os::windows::io::{FromRawHandle, IntoRawHandle};
   7 use crate::ptr;
   8 use crate::str;
   9 use crate::sys::c;
  10 use crate::sys::cvt;
  11 use crate::sys::handle::Handle;
  12 use core::str::utf8_char_width;
  13
  14 // Don't cache handles but get them fresh for every read/write. This allows us to track changes to
  15 // the value over time (such as if a process calls `SetStdHandle` while it's running). See #40490.
  16 pub struct Stdin {
  17     surrogate: u16,
  18     incomplete_utf8: IncompleteUtf8,
  19 }
  20
  21 pub struct Stdout {
  22     incomplete_utf8: IncompleteUtf8,
  23 }
  24
  25 pub struct Stderr {
  26     incomplete_utf8: IncompleteUtf8,
  27 }
  28
  29 struct IncompleteUtf8 {
  30     bytes: [u8; 4],
  31     len: u8,
  32 }
  33
  34 impl IncompleteUtf8 {
  35     // Implemented for use in Stdin::read.
  36     fn read(&mut self, buf: &mut [u8]) -> usize {
  37         // Write to buffer until the buffer is full or we run out of bytes.
  38         let to_write = cmp::min(buf.len(), self.len as usize);
  39         buf[..to_write].copy_from_slice(&self.bytes[..to_write]);
  40
  41         // Rotate the remaining bytes if not enough remaining space in buffer.
  42         if usize::from(self.len) > buf.len() {
  43             self.bytes.copy_within(to_write.., 0);
  44             self.len -= to_write as u8;
  45         } else {
  46             self.len = 0;
  47         }
  48
  49         to_write
  50     }
  51 }
  52
  53 // Apparently Windows doesn't handle large reads on stdin or writes to stdout/stderr well (see
  54 // #13304 for details).
  55 //
  56 // From MSDN (2011): "The storage for this buffer is allocated from a shared heap for the
  57 // process that is 64 KB in size. The maximum size of the buffer will depend on heap usage."
  58 //
  59 // We choose the cap at 8 KiB because libuv does the same, and it seems to be acceptable so far.
  60 const MAX_BUFFER_SIZE: usize = 8192;
  61
  62 // The standard buffer size of BufReader for Stdin should be able to hold 3x more bytes than there
  63 // are `u16`'s in MAX_BUFFER_SIZE. This ensures the read data can always be completely decoded from
  64 // UTF-16 to UTF-8.
  65 pub const STDIN_BUF_SIZE: usize = MAX_BUFFER_SIZE / 2 * 3;
  66
  67 pub fn get_handle(handle_id: c::DWORD) -> io::Result<c::HANDLE> {
  68     let handle = unsafe { c::GetStdHandle(handle_id) };
  69     if handle == c::INVALID_HANDLE_VALUE {
  70         Err(io::Error::last_os_error())
  71     } else if handle.is_null() {
  72         Err(io::Error::from_raw_os_error(c::ERROR_INVALID_HANDLE as i32))
  73     } else {
  74         Ok(handle)
  75     }
  76 }
  77
  78 fn is_console(handle: c::HANDLE) -> bool {
  79     // `GetConsoleMode` will return false (0) if this is a pipe (we don't care about the reported
  80     // mode). This will only detect Windows Console, not other terminals connected to a pipe like
  81     // MSYS. Which is exactly what we need, as only Windows Console needs a conversion to UTF-16.
  82     let mut mode = 0;
  83     unsafe { c::GetConsoleMode(handle, &mut mode) != 0 }
  84 }
  85
  86 fn write(
  87     handle_id: c::DWORD,
  88     data: &[u8],
  89     incomplete_utf8: &mut IncompleteUtf8,
  90 ) -> io::Result<usize> {
  91     if data.is_empty() {
  92         return Ok(0);
  93     }
  94
  95     let handle = get_handle(handle_id)?;
  96     if !is_console(handle) {
  97         unsafe {
  98             let handle = Handle::from_raw_handle(handle);
  99             let ret = handle.write(data);
 100             handle.into_raw_handle(); // Don't close the handle
 101             return ret;
 102         }
 103     }
 104
 105     if incomplete_utf8.len > 0 {
 106         assert!(
 107             incomplete_utf8.len < 4,
 108             "Unexpected number of bytes for incomplete UTF-8 codepoint."
 109         );
 110         if data[0] >> 6 != 0b10 {
 111             // not a continuation byte - reject
 112             incomplete_utf8.len = 0;
 113             return Err(io::Error::new_const(
 114                 io::ErrorKind::InvalidData,
 115                 &"Windows stdio in console mode does not support writing non-UTF-8 byte sequences",
 116             ));
 117         }
 118         incomplete_utf8.bytes[incomplete_utf8.len as usize] = data[0];
 119         incomplete_utf8.len += 1;
 120         let char_width = utf8_char_width(incomplete_utf8.bytes[0]);
 121         if (incomplete_utf8.len as usize) < char_width {
 122             // more bytes needed
 123             return Ok(1);
 124         }
 125         let s = str::from_utf8(&incomplete_utf8.bytes[0..incomplete_utf8.len as usize]);
 126         incomplete_utf8.len = 0;
 127         match s {
 128             Ok(s) => {
 129                 assert_eq!(char_width, s.len());
 130                 let written = write_valid_utf8_to_console(handle, s)?;
 131                 assert_eq!(written, s.len()); // guaranteed by write_valid_utf8_to_console() for single codepoint writes
 132                 return Ok(1);
 133             }
 134             Err(_) => {
 135                 return Err(io::Error::new_const(
 136                     io::ErrorKind::InvalidData,
 137                     &"Windows stdio in console mode does not support writing non-UTF-8 byte sequences",
 138                 ));
 139             }
 140         }
 141     }
 142
 143     // As the console is meant for presenting text, we assume bytes of `data` are encoded as UTF-8,
 144     // which needs to be encoded as UTF-16.
 145     //
 146     // If the data is not valid UTF-8 we write out as many bytes as are valid.
 147     // If the first byte is invalid it is either first byte of a multi-byte sequence but the
 148     // provided byte slice is too short or it is the first byte of an invalid multi-byte sequence.
 149     let len = cmp::min(data.len(), MAX_BUFFER_SIZE / 2);
 150     let utf8 = match str::from_utf8(&data[..len]) {
 151         Ok(s) => s,
 152         Err(ref e) if e.valid_up_to() == 0 => {
 153             let first_byte_char_width = utf8_char_width(data[0]);
 154             if first_byte_char_width > 1 && data.len() < first_byte_char_width {
 155                 incomplete_utf8.bytes[0] = data[0];
 156                 incomplete_utf8.len = 1;
 157                 return Ok(1);
 158             } else {
 159                 return Err(io::Error::new_const(
 160                     io::ErrorKind::InvalidData,
 161                     &"Windows stdio in console mode does not support writing non-UTF-8 byte sequences",
 162                 ));
 163             }
 164         }
 165         Err(e) => str::from_utf8(&data[..e.valid_up_to()]).unwrap(),
 166     };
 167
 168     write_valid_utf8_to_console(handle, utf8)
 169 }
 170
 171 fn write_valid_utf8_to_console(handle: c::HANDLE, utf8: &str) -> io::Result<usize> {
 172     let mut utf16 = [0u16; MAX_BUFFER_SIZE / 2];
 173     let mut len_utf16 = 0;
 174     for (chr, dest) in utf8.encode_utf16().zip(utf16.iter_mut()) {
 175         *dest = chr;
 176         len_utf16 += 1;
 177     }
 178     let utf16 = &utf16[..len_utf16];
 179
 180     let mut written = write_u16s(handle, &utf16)?;
 181
 182     // Figure out how many bytes of as UTF-8 were written away as UTF-16.
 183     if written == utf16.len() {
 184         Ok(utf8.len())
 185     } else {
 186         // Make sure we didn't end up writing only half of a surrogate pair (even though the chance
 187         // is tiny). Because it is not possible for user code to re-slice `data` in such a way that
 188         // a missing surrogate can be produced (and also because of the UTF-8 validation above),
 189         // write the missing surrogate out now.
 190         // Buffering it would mean we have to lie about the number of bytes written.
 191         let first_char_remaining = utf16[written];
 192         if first_char_remaining >= 0xDCEE && first_char_remaining <= 0xDFFF {
 193             // low surrogate
 194             // We just hope this works, and give up otherwise
 195             let _ = write_u16s(handle, &utf16[written..written + 1]);
 196             written += 1;
 197         }
 198         // Calculate the number of bytes of `utf8` that were actually written.
 199         let mut count = 0;
 200         for ch in utf16[..written].iter() {
 201             count += match ch {
 202                 0x0000..=0x007F => 1,
 203                 0x0080..=0x07FF => 2,
 204                 0xDCEE..=0xDFFF => 1, // Low surrogate. We already counted 3 bytes for the other.
 205                 _ => 3,
 206             };
 207         }
 208         debug_assert!(String::from_utf16(&utf16[..written]).unwrap() == utf8[..count]);
 209         Ok(count)
 210     }
 211 }
 212
 213 fn write_u16s(handle: c::HANDLE, data: &[u16]) -> io::Result<usize> {
 214     let mut written = 0;
 215     cvt(unsafe {
 216         c::WriteConsoleW(
 217             handle,
 218             data.as_ptr() as c::LPCVOID,
 219             data.len() as u32,
 220             &mut written,
 221             ptr::null_mut(),
 222         )
 223     })?;
 224     Ok(written as usize)
 225 }
 226
 227 impl Stdin {
 228     pub const fn new() -> Stdin {
 229         Stdin { surrogate: 0, incomplete_utf8: IncompleteUtf8::new() }
 230     }
 231 }
 232
 233 impl io::Read for Stdin {
 234     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
 235         let handle = get_handle(c::STD_INPUT_HANDLE)?;
 236         if !is_console(handle) {
 237             unsafe {
 238                 let handle = Handle::from_raw_handle(handle);
 239                 let ret = handle.read(buf);
 240                 handle.into_raw_handle(); // Don't close the handle
 241                 return ret;
 242             }
 243         }
 244
 245         // If there are bytes in the incomplete utf-8, start with those.
 246         // (No-op if there is nothing in the buffer.)
 247         let mut bytes_copied = self.incomplete_utf8.read(buf);
 248
 249         if bytes_copied == buf.len() {
 250             return Ok(bytes_copied);
 251         } else if buf.len() - bytes_copied < 4 {
 252             // Not enough space to get a UTF-8 byte. We will use the incomplete UTF8.
 253             let mut utf16_buf = [0u16; 1];
 254             // Read one u16 character.
 255             let read = read_u16s_fixup_surrogates(handle, &mut utf16_buf, 1, &mut self.surrogate)?;
 256             // Read bytes, using the (now-empty) self.incomplete_utf8 as extra space.
 257             let read_bytes = utf16_to_utf8(&utf16_buf[..read], &mut self.incomplete_utf8.bytes)?;
 258
 259             // Read in the bytes from incomplete_utf8 until the buffer is full.
 260             self.incomplete_utf8.len = read_bytes as u8;
 261             // No-op if no bytes.
 262             bytes_copied += self.incomplete_utf8.read(&mut buf[bytes_copied..]);
 263             Ok(bytes_copied)
 264         } else {
 265             let mut utf16_buf = [0u16; MAX_BUFFER_SIZE / 2];
 266             // In the worst case, a UTF-8 string can take 3 bytes for every `u16` of a UTF-16. So
 267             // we can read at most a third of `buf.len()` chars and uphold the guarantee no data gets
 268             // lost.
 269             let amount = cmp::min(buf.len() / 3, utf16_buf.len());
 270             let read =
 271                 read_u16s_fixup_surrogates(handle, &mut utf16_buf, amount, &mut self.surrogate)?;
 272
 273             match utf16_to_utf8(&utf16_buf[..read], buf) {
 274                 Ok(value) => return Ok(bytes_copied + value),
 275                 Err(e) => return Err(e),
 276             }
 277         }
 278     }
 279 }
 280
 281 // We assume that if the last `u16` is an unpaired surrogate they got sliced apart by our
 282 // buffer size, and keep it around for the next read hoping to put them together.
 283 // This is a best effort, and might not work if we are not the only reader on Stdin.
 284 fn read_u16s_fixup_surrogates(
 285     handle: c::HANDLE,
 286     buf: &mut [u16],
 287     mut amount: usize,
 288     surrogate: &mut u16,
 289 ) -> io::Result<usize> {
 290     // Insert possibly remaining unpaired surrogate from last read.
 291     let mut start = 0;
 292     if *surrogate != 0 {
 293         buf[0] = *surrogate;
 294         *surrogate = 0;
 295         start = 1;
 296         if amount == 1 {
 297             // Special case: `Stdin::read` guarantees we can always read at least one new `u16`
 298             // and combine it with an unpaired surrogate, because the UTF-8 buffer is at least
 299             // 4 bytes.
 300             amount = 2;
 301         }
 302     }
 303     let mut amount = read_u16s(handle, &mut buf[start..amount])? + start;
 304
 305     if amount > 0 {
 306         let last_char = buf[amount - 1];
 307         if last_char >= 0xD800 && last_char <= 0xDBFF {
 308             // high surrogate
 309             *surrogate = last_char;
 310             amount -= 1;
 311         }
 312     }
 313     Ok(amount)
 314 }
 315
 316 fn read_u16s(handle: c::HANDLE, buf: &mut [u16]) -> io::Result<usize> {
 317     // Configure the `pInputControl` parameter to not only return on `\r\n` but also Ctrl-Z, the
 318     // traditional DOS method to indicate end of character stream / user input (SUB).
 319     // See #38274 and https://stackoverflow.com/questions/43836040/win-api-readconsole.
 320     const CTRL_Z: u16 = 0x1A;
 321     const CTRL_Z_MASK: c::ULONG = 1 << CTRL_Z;
 322     let mut input_control = c::CONSOLE_READCONSOLE_CONTROL {
 323         nLength: crate::mem::size_of::<c::CONSOLE_READCONSOLE_CONTROL>() as c::ULONG,
 324         nInitialChars: 0,
 325         dwCtrlWakeupMask: CTRL_Z_MASK,
 326         dwControlKeyState: 0,
 327     };
 328
 329     let mut amount = 0;
 330     loop {
 331         cvt(unsafe {
 332             c::SetLastError(0);
 333             c::ReadConsoleW(
 334                 handle,
 335                 buf.as_mut_ptr() as c::LPVOID,
 336                 buf.len() as u32,
 337                 &mut amount,
 338                 &mut input_control as c::PCONSOLE_READCONSOLE_CONTROL,
 339             )
 340         })?;
 341
 342         // ReadConsoleW returns success with ERROR_OPERATION_ABORTED for Ctrl-C or Ctrl-Break.
 343         // Explicitly check for that case here and try again.
 344         if amount == 0 && unsafe { c::GetLastError() } == c::ERROR_OPERATION_ABORTED {
 345             continue;
 346         }
 347         break;
 348     }
 349
 350     if amount > 0 && buf[amount as usize - 1] == CTRL_Z {
 351         amount -= 1;
 352     }
 353     Ok(amount as usize)
 354 }
 355
 356 #[allow(unused)]
 357 fn utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result<usize> {
 358     let mut written = 0;
 359     for chr in decode_utf16(utf16.iter().cloned()) {
 360         match chr {
 361             Ok(chr) => {
 362                 chr.encode_utf8(&mut utf8[written..]);
 363                 written += chr.len_utf8();
 364             }
 365             Err(_) => {
 366                 // We can't really do any better than forget all data and return an error.
 367                 return Err(io::Error::new_const(
 368                     io::ErrorKind::InvalidData,
 369                     &"Windows stdin in console mode does not support non-UTF-16 input; \
 370                      encountered unpaired surrogate",
 371                 ));
 372             }
 373         }
 374     }
 375     Ok(written)
 376 }
 377
 378 impl IncompleteUtf8 {
 379     pub const fn new() -> IncompleteUtf8 {
 380         IncompleteUtf8 { bytes: [0; 4], len: 0 }
 381     }
 382 }
 383
 384 impl Stdout {
 385     pub const fn new() -> Stdout {
 386         Stdout { incomplete_utf8: IncompleteUtf8::new() }
 387     }
 388 }
 389
 390 impl io::Write for Stdout {
 391     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
 392         write(c::STD_OUTPUT_HANDLE, buf, &mut self.incomplete_utf8)
 393     }
 394
 395     fn flush(&mut self) -> io::Result<()> {
 396         Ok(())
 397     }
 398 }
 399
 400 impl Stderr {
 401     pub const fn new() -> Stderr {
 402         Stderr { incomplete_utf8: IncompleteUtf8::new() }
 403     }
 404 }
 405
 406 impl io::Write for Stderr {
 407     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
 408         write(c::STD_ERROR_HANDLE, buf, &mut self.incomplete_utf8)
 409     }
 410
 411     fn flush(&mut self) -> io::Result<()> {
 412         Ok(())
 413     }
 414 }
 415
 416 pub fn is_ebadf(err: &io::Error) -> bool {
 417     err.raw_os_error() == Some(c::ERROR_INVALID_HANDLE as i32)
 418 }
 419
 420 pub fn panic_output() -> Option<impl io::Write> {
 421     Some(Stderr::new())
 422 }