src/tools/miri/src/eval.rs

   1 //! Main evaluator loop and setting up the initial stack frame.
   2
   3 use std::ffi::{OsStr, OsString};
   4 use std::iter;
   5 use std::panic::{self, AssertUnwindSafe};
   6 use std::path::PathBuf;
   7 use std::task::Poll;
   8 use std::thread;
   9
  10 use log::info;
  11
  12 use crate::borrow_tracker::RetagFields;
  13 use rustc_data_structures::fx::FxHashSet;
  14 use rustc_hir::def::Namespace;
  15 use rustc_hir::def_id::DefId;
  16 use rustc_middle::ty::{
  17     self,
  18     layout::{LayoutCx, LayoutOf},
  19     TyCtxt,
  20 };
  21 use rustc_target::spec::abi::Abi;
  22
  23 use rustc_session::config::EntryFnType;
  24
  25 use crate::shims::tls;
  26 use crate::*;
  27
  28 /// When the main thread would exit, we will yield to any other thread that is ready to execute.
  29 /// But we must only do that a finite number of times, or a background thread running `loop {}`
  30 /// will hang the program.
  31 const MAIN_THREAD_YIELDS_AT_SHUTDOWN: u32 = 256;
  32
  33 #[derive(Copy, Clone, Debug, PartialEq)]
  34 pub enum AlignmentCheck {
  35     /// Do not check alignment.
  36     None,
  37     /// Check alignment "symbolically", i.e., using only the requested alignment for an allocation and not its real base address.
  38     Symbolic,
  39     /// Check alignment on the actual physical integer address.
  40     Int,
  41 }
  42
  43 #[derive(Copy, Clone, Debug, PartialEq)]
  44 pub enum RejectOpWith {
  45     /// Isolated op is rejected with an abort of the machine.
  46     Abort,
  47
  48     /// If not Abort, miri returns an error for an isolated op.
  49     /// Following options determine if user should be warned about such error.
  50     /// Do not print warning about rejected isolated op.
  51     NoWarning,
  52
  53     /// Print a warning about rejected isolated op, with backtrace.
  54     Warning,
  55
  56     /// Print a warning about rejected isolated op, without backtrace.
  57     WarningWithoutBacktrace,
  58 }
  59
  60 #[derive(Copy, Clone, Debug, PartialEq)]
  61 pub enum IsolatedOp {
  62     /// Reject an op requiring communication with the host. By
  63     /// default, miri rejects the op with an abort. If not, it returns
  64     /// an error code, and prints a warning about it. Warning levels
  65     /// are controlled by `RejectOpWith` enum.
  66     Reject(RejectOpWith),
  67
  68     /// Execute op requiring communication with the host, i.e. disable isolation.
  69     Allow,
  70 }
  71
  72 #[derive(Copy, Clone, PartialEq, Eq)]
  73 pub enum BacktraceStyle {
  74     /// Prints a terser backtrace which ideally only contains relevant information.
  75     Short,
  76     /// Prints a backtrace with all possible information.
  77     Full,
  78     /// Prints only the frame that the error occurs in.
  79     Off,
  80 }
  81
  82 /// Configuration needed to spawn a Miri instance.
  83 #[derive(Clone)]
  84 pub struct MiriConfig {
  85     /// The host environment snapshot to use as basis for what is provided to the interpreted program.
  86     /// (This is still subject to isolation as well as `forwarded_env_vars`.)
  87     pub env: Vec<(OsString, OsString)>,
  88     /// Determine if validity checking is enabled.
  89     pub validate: bool,
  90     /// Determines if Stacked Borrows is enabled.
  91     pub borrow_tracker: Option<BorrowTrackerMethod>,
  92     /// Controls alignment checking.
  93     pub check_alignment: AlignmentCheck,
  94     /// Controls function [ABI](Abi) checking.
  95     pub check_abi: bool,
  96     /// Action for an op requiring communication with the host.
  97     pub isolated_op: IsolatedOp,
  98     /// Determines if memory leaks should be ignored.
  99     pub ignore_leaks: bool,
 100     /// Environment variables that should always be forwarded from the host.
 101     pub forwarded_env_vars: Vec<String>,
 102     /// Command-line arguments passed to the interpreted program.
 103     pub args: Vec<String>,
 104     /// The seed to use when non-determinism or randomness are required (e.g. ptr-to-int cast, `getrandom()`).
 105     pub seed: Option<u64>,
 106     /// The stacked borrows pointer ids to report about
 107     pub tracked_pointer_tags: FxHashSet<BorTag>,
 108     /// The stacked borrows call IDs to report about
 109     pub tracked_call_ids: FxHashSet<CallId>,
 110     /// The allocation ids to report about.
 111     pub tracked_alloc_ids: FxHashSet<AllocId>,
 112     /// Determine if data race detection should be enabled
 113     pub data_race_detector: bool,
 114     /// Determine if weak memory emulation should be enabled. Requires data race detection to be enabled
 115     pub weak_memory_emulation: bool,
 116     /// Track when an outdated (weak memory) load happens.
 117     pub track_outdated_loads: bool,
 118     /// Rate of spurious failures for compare_exchange_weak atomic operations,
 119     /// between 0.0 and 1.0, defaulting to 0.8 (80% chance of failure).
 120     pub cmpxchg_weak_failure_rate: f64,
 121     /// If `Some`, enable the `measureme` profiler, writing results to a file
 122     /// with the specified prefix.
 123     pub measureme_out: Option<String>,
 124     /// Panic when unsupported functionality is encountered.
 125     pub panic_on_unsupported: bool,
 126     /// Which style to use for printing backtraces.
 127     pub backtrace_style: BacktraceStyle,
 128     /// Which provenance to use for int2ptr casts
 129     pub provenance_mode: ProvenanceMode,
 130     /// Whether to ignore any output by the program. This is helpful when debugging miri
 131     /// as its messages don't get intermingled with the program messages.
 132     pub mute_stdout_stderr: bool,
 133     /// The probability of the active thread being preempted at the end of each basic block.
 134     pub preemption_rate: f64,
 135     /// Report the current instruction being executed every N basic blocks.
 136     pub report_progress: Option<u32>,
 137     /// Whether Stacked Borrows retagging should recurse into fields of datatypes.
 138     pub retag_fields: RetagFields,
 139     /// The location of a shared object file to load when calling external functions
 140     /// FIXME! consider allowing users to specify paths to multiple SO files, or to a directory
 141     pub external_so_file: Option<PathBuf>,
 142     /// Run a garbage collector for BorTags every N basic blocks.
 143     pub gc_interval: u32,
 144     /// The number of CPUs to be reported by miri.
 145     pub num_cpus: u32,
 146     /// Requires Miri to emulate pages of a certain size
 147     pub page_size: Option<u64>,
 148 }
 149
 150 impl Default for MiriConfig {
 151     fn default() -> MiriConfig {
 152         MiriConfig {
 153             env: vec![],
 154             validate: true,
 155             borrow_tracker: Some(BorrowTrackerMethod::StackedBorrows),
 156             check_alignment: AlignmentCheck::Int,
 157             check_abi: true,
 158             isolated_op: IsolatedOp::Reject(RejectOpWith::Abort),
 159             ignore_leaks: false,
 160             forwarded_env_vars: vec![],
 161             args: vec![],
 162             seed: None,
 163             tracked_pointer_tags: FxHashSet::default(),
 164             tracked_call_ids: FxHashSet::default(),
 165             tracked_alloc_ids: FxHashSet::default(),
 166             data_race_detector: true,
 167             weak_memory_emulation: true,
 168             track_outdated_loads: false,
 169             cmpxchg_weak_failure_rate: 0.8, // 80%
 170             measureme_out: None,
 171             panic_on_unsupported: false,
 172             backtrace_style: BacktraceStyle::Short,
 173             provenance_mode: ProvenanceMode::Default,
 174             mute_stdout_stderr: false,
 175             preemption_rate: 0.01, // 1%
 176             report_progress: None,
 177             retag_fields: RetagFields::OnlyScalar,
 178             external_so_file: None,
 179             gc_interval: 10_000,
 180             num_cpus: 1,
 181             page_size: None,
 182         }
 183     }
 184 }
 185
 186 /// The state of the main thread. Implementation detail of `on_main_stack_empty`.
 187 #[derive(Default, Debug)]
 188 enum MainThreadState {
 189     #[default]
 190     Running,
 191     TlsDtors(tls::TlsDtorsState),
 192     Yield {
 193         remaining: u32,
 194     },
 195     Done,
 196 }
 197
 198 impl MainThreadState {
 199     fn on_main_stack_empty<'tcx>(
 200         &mut self,
 201         this: &mut MiriInterpCx<'_, 'tcx>,
 202     ) -> InterpResult<'tcx, Poll<()>> {
 203         use MainThreadState::*;
 204         match self {
 205             Running => {
 206                 *self = TlsDtors(Default::default());
 207             }
 208             TlsDtors(state) =>
 209                 match state.on_stack_empty(this)? {
 210                     Poll::Pending => {} // just keep going
 211                     Poll::Ready(()) => {
 212                         // Give background threads a chance to finish by yielding the main thread a
 213                         // couple of times -- but only if we would also preempt threads randomly.
 214                         if this.machine.preemption_rate > 0.0 {
 215                             // There is a non-zero chance they will yield back to us often enough to
 216                             // make Miri terminate eventually.
 217                             *self = Yield { remaining: MAIN_THREAD_YIELDS_AT_SHUTDOWN };
 218                         } else {
 219                             // The other threads did not get preempted, so no need to yield back to
 220                             // them.
 221                             *self = Done;
 222                         }
 223                     }
 224                 },
 225             Yield { remaining } =>
 226                 match remaining.checked_sub(1) {
 227                     None => *self = Done,
 228                     Some(new_remaining) => {
 229                         *remaining = new_remaining;
 230                         this.yield_active_thread();
 231                     }
 232                 },
 233             Done => {
 234                 // Figure out exit code.
 235                 let ret_place = MPlaceTy::from_aligned_ptr(
 236                     this.machine.main_fn_ret_place.unwrap().ptr,
 237                     this.machine.layouts.isize,
 238                 );
 239                 let exit_code = this.read_machine_isize(&ret_place.into())?;
 240                 // Need to call this ourselves since we are not going to return to the scheduler
 241                 // loop, and we want the main thread TLS to not show up as memory leaks.
 242                 this.terminate_active_thread()?;
 243                 // Stop interpreter loop.
 244                 throw_machine_stop!(TerminationInfo::Exit { code: exit_code, leak_check: true });
 245             }
 246         }
 247         Ok(Poll::Pending)
 248     }
 249 }
 250
 251 /// Returns a freshly created `InterpCx`.
 252 /// Public because this is also used by `priroda`.
 253 pub fn create_ecx<'mir, 'tcx: 'mir>(
 254     tcx: TyCtxt<'tcx>,
 255     entry_id: DefId,
 256     entry_type: EntryFnType,
 257     config: &MiriConfig,
 258 ) -> InterpResult<'tcx, InterpCx<'mir, 'tcx, MiriMachine<'mir, 'tcx>>> {
 259     let param_env = ty::ParamEnv::reveal_all();
 260     let layout_cx = LayoutCx { tcx, param_env };
 261     let mut ecx = InterpCx::new(
 262         tcx,
 263         rustc_span::source_map::DUMMY_SP,
 264         param_env,
 265         MiriMachine::new(config, layout_cx),
 266     );
 267
 268     // Some parts of initialization require a full `InterpCx`.
 269     MiriMachine::late_init(&mut ecx, config, {
 270         let mut state = MainThreadState::default();
 271         // Cannot capture anything GC-relevant here.
 272         Box::new(move |m| state.on_main_stack_empty(m))
 273     })?;
 274
 275     // Make sure we have MIR. We check MIR for some stable monomorphic function in libcore.
 276     let sentinel = ecx.try_resolve_path(&["core", "ascii", "escape_default"], Namespace::ValueNS);
 277     if !matches!(sentinel, Some(s) if tcx.is_mir_available(s.def.def_id())) {
 278         tcx.sess.fatal(
 279             "the current sysroot was built without `-Zalways-encode-mir`, or libcore seems missing. \
 280             Use `cargo miri setup` to prepare a sysroot that is suitable for Miri."
 281         );
 282     }
 283
 284     // Setup first stack frame.
 285     let entry_instance = ty::Instance::mono(tcx, entry_id);
 286
 287     // First argument is constructed later, because it's skipped if the entry function uses #[start].
 288
 289     // Second argument (argc): length of `config.args`.
 290     let argc = Scalar::from_machine_usize(u64::try_from(config.args.len()).unwrap(), &ecx);
 291     // Third argument (`argv`): created from `config.args`.
 292     let argv = {
 293         // Put each argument in memory, collect pointers.
 294         let mut argvs = Vec::<Immediate<Provenance>>::new();
 295         for arg in config.args.iter() {
 296             // Make space for `0` terminator.
 297             let size = u64::try_from(arg.len()).unwrap().checked_add(1).unwrap();
 298             let arg_type = tcx.mk_array(tcx.types.u8, size);
 299             let arg_place =
 300                 ecx.allocate(ecx.layout_of(arg_type)?, MiriMemoryKind::Machine.into())?;
 301             ecx.write_os_str_to_c_str(OsStr::new(arg), arg_place.ptr, size)?;
 302             ecx.mark_immutable(&arg_place);
 303             argvs.push(arg_place.to_ref(&ecx));
 304         }
 305         // Make an array with all these pointers, in the Miri memory.
 306         let argvs_layout = ecx.layout_of(
 307             tcx.mk_array(tcx.mk_imm_ptr(tcx.types.u8), u64::try_from(argvs.len()).unwrap()),
 308         )?;
 309         let argvs_place = ecx.allocate(argvs_layout, MiriMemoryKind::Machine.into())?;
 310         for (idx, arg) in argvs.into_iter().enumerate() {
 311             let place = ecx.mplace_field(&argvs_place, idx)?;
 312             ecx.write_immediate(arg, &place.into())?;
 313         }
 314         ecx.mark_immutable(&argvs_place);
 315         // A pointer to that place is the 3rd argument for main.
 316         let argv = argvs_place.to_ref(&ecx);
 317         // Store `argc` and `argv` for macOS `_NSGetArg{c,v}`.
 318         {
 319             let argc_place =
 320                 ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?;
 321             ecx.write_scalar(argc, &argc_place.into())?;
 322             ecx.mark_immutable(&argc_place);
 323             ecx.machine.argc = Some(*argc_place);
 324
 325             let argv_place = ecx.allocate(
 326                 ecx.layout_of(tcx.mk_imm_ptr(tcx.types.unit))?,
 327                 MiriMemoryKind::Machine.into(),
 328             )?;
 329             ecx.write_immediate(argv, &argv_place.into())?;
 330             ecx.mark_immutable(&argv_place);
 331             ecx.machine.argv = Some(*argv_place);
 332         }
 333         // Store command line as UTF-16 for Windows `GetCommandLineW`.
 334         {
 335             // Construct a command string with all the arguments.
 336             let cmd_utf16: Vec<u16> = args_to_utf16_command_string(config.args.iter());
 337
 338             let cmd_type = tcx.mk_array(tcx.types.u16, u64::try_from(cmd_utf16.len()).unwrap());
 339             let cmd_place =
 340                 ecx.allocate(ecx.layout_of(cmd_type)?, MiriMemoryKind::Machine.into())?;
 341             ecx.machine.cmd_line = Some(*cmd_place);
 342             // Store the UTF-16 string. We just allocated so we know the bounds are fine.
 343             for (idx, &c) in cmd_utf16.iter().enumerate() {
 344                 let place = ecx.mplace_field(&cmd_place, idx)?;
 345                 ecx.write_scalar(Scalar::from_u16(c), &place.into())?;
 346             }
 347             ecx.mark_immutable(&cmd_place);
 348         }
 349         argv
 350     };
 351
 352     // Return place (in static memory so that it does not count as leak).
 353     let ret_place = ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?;
 354     ecx.machine.main_fn_ret_place = Some(*ret_place);
 355     // Call start function.
 356
 357     match entry_type {
 358         EntryFnType::Main { .. } => {
 359             let start_id = tcx.lang_items().start_fn().unwrap();
 360             let main_ret_ty = tcx.fn_sig(entry_id).no_bound_vars().unwrap().output();
 361             let main_ret_ty = main_ret_ty.no_bound_vars().unwrap();
 362             let start_instance = ty::Instance::resolve(
 363                 tcx,
 364                 ty::ParamEnv::reveal_all(),
 365                 start_id,
 366                 tcx.mk_substs(::std::iter::once(ty::subst::GenericArg::from(main_ret_ty))),
 367             )
 368             .unwrap()
 369             .unwrap();
 370
 371             let main_ptr = ecx.create_fn_alloc_ptr(FnVal::Instance(entry_instance));
 372
 373             // Inlining of `DEFAULT` from
 374             // https://github.com/rust-lang/rust/blob/master/compiler/rustc_session/src/config/sigpipe.rs.
 375             // Alaways using DEFAULT is okay since we don't support signals in Miri anyway.
 376             let sigpipe = 2;
 377
 378             ecx.call_function(
 379                 start_instance,
 380                 Abi::Rust,
 381                 &[
 382                     Scalar::from_pointer(main_ptr, &ecx).into(),
 383                     argc.into(),
 384                     argv,
 385                     Scalar::from_u8(sigpipe).into(),
 386                 ],
 387                 Some(&ret_place.into()),
 388                 StackPopCleanup::Root { cleanup: true },
 389             )?;
 390         }
 391         EntryFnType::Start => {
 392             ecx.call_function(
 393                 entry_instance,
 394                 Abi::Rust,
 395                 &[argc.into(), argv],
 396                 Some(&ret_place.into()),
 397                 StackPopCleanup::Root { cleanup: true },
 398             )?;
 399         }
 400     }
 401
 402     Ok(ecx)
 403 }
 404
 405 /// Evaluates the entry function specified by `entry_id`.
 406 /// Returns `Some(return_code)` if program executed completed.
 407 /// Returns `None` if an evaluation error occurred.
 408 #[allow(clippy::needless_lifetimes)]
 409 pub fn eval_entry<'tcx>(
 410     tcx: TyCtxt<'tcx>,
 411     entry_id: DefId,
 412     entry_type: EntryFnType,
 413     config: MiriConfig,
 414 ) -> Option<i64> {
 415     // Copy setting before we move `config`.
 416     let ignore_leaks = config.ignore_leaks;
 417
 418     let mut ecx = match create_ecx(tcx, entry_id, entry_type, &config) {
 419         Ok(v) => v,
 420         Err(err) => {
 421             err.print_backtrace();
 422             panic!("Miri initialization error: {}", err.kind())
 423         }
 424     };
 425
 426     // Perform the main execution.
 427     let res: thread::Result<InterpResult<'_, !>> =
 428         panic::catch_unwind(AssertUnwindSafe(|| ecx.run_threads()));
 429     let res = res.unwrap_or_else(|panic_payload| {
 430         ecx.handle_ice();
 431         panic::resume_unwind(panic_payload)
 432     });
 433     let res = match res {
 434         Err(res) => res,
 435         // `Ok` can never happen
 436         Ok(never) => match never {},
 437     };
 438
 439     // Machine cleanup. Only do this if all threads have terminated; threads that are still running
 440     // might cause Stacked Borrows errors (https://github.com/rust-lang/miri/issues/2396).
 441     if ecx.have_all_terminated() {
 442         // Even if all threads have terminated, we have to beware of data races since some threads
 443         // might not have joined the main thread (https://github.com/rust-lang/miri/issues/2020,
 444         // https://github.com/rust-lang/miri/issues/2508).
 445         ecx.allow_data_races_all_threads_done();
 446         EnvVars::cleanup(&mut ecx).expect("error during env var cleanup");
 447     }
 448
 449     // Process the result.
 450     let (return_code, leak_check) = report_error(&ecx, res)?;
 451     if leak_check && !ignore_leaks {
 452         // Check for thread leaks.
 453         if !ecx.have_all_terminated() {
 454             tcx.sess.err("the main thread terminated without waiting for all remaining threads");
 455             tcx.sess.note_without_error("pass `-Zmiri-ignore-leaks` to disable this check");
 456             return None;
 457         }
 458         // Check for memory leaks.
 459         info!("Additonal static roots: {:?}", ecx.machine.static_roots);
 460         let leaks = ecx.leak_report(&ecx.machine.static_roots);
 461         if leaks != 0 {
 462             tcx.sess.err("the evaluated program leaked memory");
 463             tcx.sess.note_without_error("pass `-Zmiri-ignore-leaks` to disable this check");
 464             // Ignore the provided return code - let the reported error
 465             // determine the return code.
 466             return None;
 467         }
 468     }
 469     Some(return_code)
 470 }
 471
 472 /// Turns an array of arguments into a Windows command line string.
 473 ///
 474 /// The string will be UTF-16 encoded and NUL terminated.
 475 ///
 476 /// Panics if the zeroth argument contains the `"` character because doublequotes
 477 /// in `argv[0]` cannot be encoded using the standard command line parsing rules.
 478 ///
 479 /// Further reading:
 480 /// * [Parsing C++ command-line arguments](https://docs.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-160#parsing-c-command-line-arguments)
 481 /// * [The C/C++ Parameter Parsing Rules](https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES)
 482 fn args_to_utf16_command_string<I, T>(mut args: I) -> Vec<u16>
 483 where
 484     I: Iterator<Item = T>,
 485     T: AsRef<str>,
 486 {
 487     // Parse argv[0]. Slashes aren't escaped. Literal double quotes are not allowed.
 488     let mut cmd = {
 489         let arg0 = if let Some(arg0) = args.next() {
 490             arg0
 491         } else {
 492             return vec![0];
 493         };
 494         let arg0 = arg0.as_ref();
 495         if arg0.contains('"') {
 496             panic!("argv[0] cannot contain a doublequote (\") character");
 497         } else {
 498             // Always surround argv[0] with quotes.
 499             let mut s = String::new();
 500             s.push('"');
 501             s.push_str(arg0);
 502             s.push('"');
 503             s
 504         }
 505     };
 506
 507     // Build the other arguments.
 508     for arg in args {
 509         let arg = arg.as_ref();
 510         cmd.push(' ');
 511         if arg.is_empty() {
 512             cmd.push_str("\"\"");
 513         } else if !arg.bytes().any(|c| matches!(c, b'"' | b'\t' | b' ')) {
 514             // No quote, tab, or space -- no escaping required.
 515             cmd.push_str(arg);
 516         } else {
 517             // Spaces and tabs are escaped by surrounding them in quotes.
 518             // Quotes are themselves escaped by using backslashes when in a
 519             // quoted block.
 520             // Backslashes only need to be escaped when one or more are directly
 521             // followed by a quote. Otherwise they are taken literally.
 522
 523             cmd.push('"');
 524             let mut chars = arg.chars().peekable();
 525             loop {
 526                 let mut nslashes = 0;
 527                 while let Some(&'\\') = chars.peek() {
 528                     chars.next();
 529                     nslashes += 1;
 530                 }
 531
 532                 match chars.next() {
 533                     Some('"') => {
 534                         cmd.extend(iter::repeat('\\').take(nslashes * 2 + 1));
 535                         cmd.push('"');
 536                     }
 537                     Some(c) => {
 538                         cmd.extend(iter::repeat('\\').take(nslashes));
 539                         cmd.push(c);
 540                     }
 541                     None => {
 542                         cmd.extend(iter::repeat('\\').take(nslashes * 2));
 543                         break;
 544                     }
 545                 }
 546             }
 547             cmd.push('"');
 548         }
 549     }
 550
 551     if cmd.contains('\0') {
 552         panic!("interior null in command line arguments");
 553     }
 554     cmd.encode_utf16().chain(iter::once(0)).collect()
 555 }
 556
 557 #[cfg(test)]
 558 mod tests {
 559     use super::*;
 560     #[test]
 561     #[should_panic(expected = "argv[0] cannot contain a doublequote (\") character")]
 562     fn windows_argv0_panic_on_quote() {
 563         args_to_utf16_command_string(["\""].iter());
 564     }
 565     #[test]
 566     fn windows_argv0_no_escape() {
 567         // Ensure that a trailing backslash in argv[0] is not escaped.
 568         let cmd = String::from_utf16_lossy(&args_to_utf16_command_string(
 569             [r"C:\Program Files\", "arg1", "arg 2", "arg \" 3"].iter(),
 570         ));
 571         assert_eq!(cmd.trim_end_matches('\0'), r#""C:\Program Files\" arg1 "arg 2" "arg \" 3""#);
 572     }
 573 }