1 #![unstable(issue = "none", feature = "windows_stdio")]
3 use crate::char::decode_utf16;
6 use crate::os::windows::io::{FromRawHandle, IntoRawHandle};
11 use crate::sys::handle::Handle;
12 use core::str::utf8_char_width;
14 // Don't cache handles but get them fresh for every read/write. This allows us to track changes to
15 // the value over time (such as if a process calls `SetStdHandle` while it's running). See #40490.
18 incomplete_utf8: IncompleteUtf8,
22 incomplete_utf8: IncompleteUtf8,
26 incomplete_utf8: IncompleteUtf8,
29 struct IncompleteUtf8 {
35 // Implemented for use in Stdin::read.
36 fn read(&mut self, buf: &mut [u8]) -> usize {
37 // Write to buffer until the buffer is full or we run out of bytes.
38 let to_write = cmp::min(buf.len(), self.len as usize);
39 buf[..to_write].copy_from_slice(&self.bytes[..to_write]);
41 // Rotate the remaining bytes if not enough remaining space in buffer.
42 if usize::from(self.len) > buf.len() {
43 self.bytes.copy_within(to_write.., 0);
44 self.len -= to_write as u8;
53 // Apparently Windows doesn't handle large reads on stdin or writes to stdout/stderr well (see
54 // #13304 for details).
56 // From MSDN (2011): "The storage for this buffer is allocated from a shared heap for the
57 // process that is 64 KB in size. The maximum size of the buffer will depend on heap usage."
59 // We choose the cap at 8 KiB because libuv does the same, and it seems to be acceptable so far.
60 const MAX_BUFFER_SIZE: usize = 8192;
62 // The standard buffer size of BufReader for Stdin should be able to hold 3x more bytes than there
63 // are `u16`'s in MAX_BUFFER_SIZE. This ensures the read data can always be completely decoded from
65 pub const STDIN_BUF_SIZE: usize = MAX_BUFFER_SIZE / 2 * 3;
67 pub fn get_handle(handle_id: c::DWORD) -> io::Result<c::HANDLE> {
68 let handle = unsafe { c::GetStdHandle(handle_id) };
69 if handle == c::INVALID_HANDLE_VALUE {
70 Err(io::Error::last_os_error())
71 } else if handle.is_null() {
72 Err(io::Error::from_raw_os_error(c::ERROR_INVALID_HANDLE as i32))
78 fn is_console(handle: c::HANDLE) -> bool {
79 // `GetConsoleMode` will return false (0) if this is a pipe (we don't care about the reported
80 // mode). This will only detect Windows Console, not other terminals connected to a pipe like
81 // MSYS. Which is exactly what we need, as only Windows Console needs a conversion to UTF-16.
83 unsafe { c::GetConsoleMode(handle, &mut mode) != 0 }
89 incomplete_utf8: &mut IncompleteUtf8,
90 ) -> io::Result<usize> {
95 let handle = get_handle(handle_id)?;
96 if !is_console(handle) {
98 let handle = Handle::from_raw_handle(handle);
99 let ret = handle.write(data);
100 handle.into_raw_handle(); // Don't close the handle
105 if incomplete_utf8.len > 0 {
107 incomplete_utf8.len < 4,
108 "Unexpected number of bytes for incomplete UTF-8 codepoint."
110 if data[0] >> 6 != 0b10 {
111 // not a continuation byte - reject
112 incomplete_utf8.len = 0;
113 return Err(io::Error::new_const(
114 io::ErrorKind::InvalidData,
115 &"Windows stdio in console mode does not support writing non-UTF-8 byte sequences",
118 incomplete_utf8.bytes[incomplete_utf8.len as usize] = data[0];
119 incomplete_utf8.len += 1;
120 let char_width = utf8_char_width(incomplete_utf8.bytes[0]);
121 if (incomplete_utf8.len as usize) < char_width {
125 let s = str::from_utf8(&incomplete_utf8.bytes[0..incomplete_utf8.len as usize]);
126 incomplete_utf8.len = 0;
129 assert_eq!(char_width, s.len());
130 let written = write_valid_utf8_to_console(handle, s)?;
131 assert_eq!(written, s.len()); // guaranteed by write_valid_utf8_to_console() for single codepoint writes
135 return Err(io::Error::new_const(
136 io::ErrorKind::InvalidData,
137 &"Windows stdio in console mode does not support writing non-UTF-8 byte sequences",
143 // As the console is meant for presenting text, we assume bytes of `data` are encoded as UTF-8,
144 // which needs to be encoded as UTF-16.
146 // If the data is not valid UTF-8 we write out as many bytes as are valid.
147 // If the first byte is invalid it is either first byte of a multi-byte sequence but the
148 // provided byte slice is too short or it is the first byte of an invalid multi-byte sequence.
149 let len = cmp::min(data.len(), MAX_BUFFER_SIZE / 2);
150 let utf8 = match str::from_utf8(&data[..len]) {
152 Err(ref e) if e.valid_up_to() == 0 => {
153 let first_byte_char_width = utf8_char_width(data[0]);
154 if first_byte_char_width > 1 && data.len() < first_byte_char_width {
155 incomplete_utf8.bytes[0] = data[0];
156 incomplete_utf8.len = 1;
159 return Err(io::Error::new_const(
160 io::ErrorKind::InvalidData,
161 &"Windows stdio in console mode does not support writing non-UTF-8 byte sequences",
165 Err(e) => str::from_utf8(&data[..e.valid_up_to()]).unwrap(),
168 write_valid_utf8_to_console(handle, utf8)
171 fn write_valid_utf8_to_console(handle: c::HANDLE, utf8: &str) -> io::Result<usize> {
172 let mut utf16 = [0u16; MAX_BUFFER_SIZE / 2];
173 let mut len_utf16 = 0;
174 for (chr, dest) in utf8.encode_utf16().zip(utf16.iter_mut()) {
178 let utf16 = &utf16[..len_utf16];
180 let mut written = write_u16s(handle, &utf16)?;
182 // Figure out how many bytes of as UTF-8 were written away as UTF-16.
183 if written == utf16.len() {
186 // Make sure we didn't end up writing only half of a surrogate pair (even though the chance
187 // is tiny). Because it is not possible for user code to re-slice `data` in such a way that
188 // a missing surrogate can be produced (and also because of the UTF-8 validation above),
189 // write the missing surrogate out now.
190 // Buffering it would mean we have to lie about the number of bytes written.
191 let first_char_remaining = utf16[written];
192 if first_char_remaining >= 0xDCEE && first_char_remaining <= 0xDFFF {
194 // We just hope this works, and give up otherwise
195 let _ = write_u16s(handle, &utf16[written..written + 1]);
198 // Calculate the number of bytes of `utf8` that were actually written.
200 for ch in utf16[..written].iter() {
202 0x0000..=0x007F => 1,
203 0x0080..=0x07FF => 2,
204 0xDCEE..=0xDFFF => 1, // Low surrogate. We already counted 3 bytes for the other.
208 debug_assert!(String::from_utf16(&utf16[..written]).unwrap() == utf8[..count]);
213 fn write_u16s(handle: c::HANDLE, data: &[u16]) -> io::Result<usize> {
218 data.as_ptr() as c::LPCVOID,
228 pub const fn new() -> Stdin {
229 Stdin { surrogate: 0, incomplete_utf8: IncompleteUtf8::new() }
233 impl io::Read for Stdin {
234 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
235 let handle = get_handle(c::STD_INPUT_HANDLE)?;
236 if !is_console(handle) {
238 let handle = Handle::from_raw_handle(handle);
239 let ret = handle.read(buf);
240 handle.into_raw_handle(); // Don't close the handle
245 // If there are bytes in the incomplete utf-8, start with those.
246 // (No-op if there is nothing in the buffer.)
247 let mut bytes_copied = self.incomplete_utf8.read(buf);
249 if bytes_copied == buf.len() {
250 return Ok(bytes_copied);
251 } else if buf.len() - bytes_copied < 4 {
252 // Not enough space to get a UTF-8 byte. We will use the incomplete UTF8.
253 let mut utf16_buf = [0u16; 1];
254 // Read one u16 character.
255 let read = read_u16s_fixup_surrogates(handle, &mut utf16_buf, 1, &mut self.surrogate)?;
256 // Read bytes, using the (now-empty) self.incomplete_utf8 as extra space.
257 let read_bytes = utf16_to_utf8(&utf16_buf[..read], &mut self.incomplete_utf8.bytes)?;
259 // Read in the bytes from incomplete_utf8 until the buffer is full.
260 self.incomplete_utf8.len = read_bytes as u8;
261 // No-op if no bytes.
262 bytes_copied += self.incomplete_utf8.read(&mut buf[bytes_copied..]);
265 let mut utf16_buf = [0u16; MAX_BUFFER_SIZE / 2];
266 // In the worst case, a UTF-8 string can take 3 bytes for every `u16` of a UTF-16. So
267 // we can read at most a third of `buf.len()` chars and uphold the guarantee no data gets
269 let amount = cmp::min(buf.len() / 3, utf16_buf.len());
271 read_u16s_fixup_surrogates(handle, &mut utf16_buf, amount, &mut self.surrogate)?;
273 match utf16_to_utf8(&utf16_buf[..read], buf) {
274 Ok(value) => return Ok(bytes_copied + value),
275 Err(e) => return Err(e),
281 // We assume that if the last `u16` is an unpaired surrogate they got sliced apart by our
282 // buffer size, and keep it around for the next read hoping to put them together.
283 // This is a best effort, and might not work if we are not the only reader on Stdin.
284 fn read_u16s_fixup_surrogates(
289 ) -> io::Result<usize> {
290 // Insert possibly remaining unpaired surrogate from last read.
297 // Special case: `Stdin::read` guarantees we can always read at least one new `u16`
298 // and combine it with an unpaired surrogate, because the UTF-8 buffer is at least
303 let mut amount = read_u16s(handle, &mut buf[start..amount])? + start;
306 let last_char = buf[amount - 1];
307 if last_char >= 0xD800 && last_char <= 0xDBFF {
309 *surrogate = last_char;
316 fn read_u16s(handle: c::HANDLE, buf: &mut [u16]) -> io::Result<usize> {
317 // Configure the `pInputControl` parameter to not only return on `\r\n` but also Ctrl-Z, the
318 // traditional DOS method to indicate end of character stream / user input (SUB).
319 // See #38274 and https://stackoverflow.com/questions/43836040/win-api-readconsole.
320 const CTRL_Z: u16 = 0x1A;
321 const CTRL_Z_MASK: c::ULONG = 1 << CTRL_Z;
322 let mut input_control = c::CONSOLE_READCONSOLE_CONTROL {
323 nLength: crate::mem::size_of::<c::CONSOLE_READCONSOLE_CONTROL>() as c::ULONG,
325 dwCtrlWakeupMask: CTRL_Z_MASK,
326 dwControlKeyState: 0,
335 buf.as_mut_ptr() as c::LPVOID,
338 &mut input_control as c::PCONSOLE_READCONSOLE_CONTROL,
342 // ReadConsoleW returns success with ERROR_OPERATION_ABORTED for Ctrl-C or Ctrl-Break.
343 // Explicitly check for that case here and try again.
344 if amount == 0 && unsafe { c::GetLastError() } == c::ERROR_OPERATION_ABORTED {
350 if amount > 0 && buf[amount as usize - 1] == CTRL_Z {
357 fn utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result<usize> {
359 for chr in decode_utf16(utf16.iter().cloned()) {
362 chr.encode_utf8(&mut utf8[written..]);
363 written += chr.len_utf8();
366 // We can't really do any better than forget all data and return an error.
367 return Err(io::Error::new_const(
368 io::ErrorKind::InvalidData,
369 &"Windows stdin in console mode does not support non-UTF-16 input; \
370 encountered unpaired surrogate",
378 impl IncompleteUtf8 {
379 pub const fn new() -> IncompleteUtf8 {
380 IncompleteUtf8 { bytes: [0; 4], len: 0 }
385 pub const fn new() -> Stdout {
386 Stdout { incomplete_utf8: IncompleteUtf8::new() }
390 impl io::Write for Stdout {
391 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
392 write(c::STD_OUTPUT_HANDLE, buf, &mut self.incomplete_utf8)
395 fn flush(&mut self) -> io::Result<()> {
401 pub const fn new() -> Stderr {
402 Stderr { incomplete_utf8: IncompleteUtf8::new() }
406 impl io::Write for Stderr {
407 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
408 write(c::STD_ERROR_HANDLE, buf, &mut self.incomplete_utf8)
411 fn flush(&mut self) -> io::Result<()> {
416 pub fn is_ebadf(err: &io::Error) -> bool {
417 err.raw_os_error() == Some(c::ERROR_INVALID_HANDLE as i32)
420 pub fn panic_output() -> Option<impl io::Write> {