src/tools/miri/src/eval.rs

   1 //! Main evaluator loop and setting up the initial stack frame.
   2
   3 use std::ffi::{OsStr, OsString};
   4 use std::iter;
   5 use std::panic::{self, AssertUnwindSafe};
   6 use std::path::PathBuf;
   7 use std::thread;
   8
   9 use log::info;
  10
  11 use rustc_data_structures::fx::FxHashSet;
  12 use rustc_hir::def_id::DefId;
  13 use rustc_middle::ty::{
  14     self,
  15     layout::{LayoutCx, LayoutOf},
  16     TyCtxt,
  17 };
  18 use rustc_target::spec::abi::Abi;
  19
  20 use rustc_session::config::EntryFnType;
  21
  22 use crate::*;
  23
  24 #[derive(Copy, Clone, Debug, PartialEq)]
  25 pub enum AlignmentCheck {
  26     /// Do not check alignment.
  27     None,
  28     /// Check alignment "symbolically", i.e., using only the requested alignment for an allocation and not its real base address.
  29     Symbolic,
  30     /// Check alignment on the actual physical integer address.
  31     Int,
  32 }
  33
  34 #[derive(Copy, Clone, Debug, PartialEq)]
  35 pub enum RejectOpWith {
  36     /// Isolated op is rejected with an abort of the machine.
  37     Abort,
  38
  39     /// If not Abort, miri returns an error for an isolated op.
  40     /// Following options determine if user should be warned about such error.
  41     /// Do not print warning about rejected isolated op.
  42     NoWarning,
  43
  44     /// Print a warning about rejected isolated op, with backtrace.
  45     Warning,
  46
  47     /// Print a warning about rejected isolated op, without backtrace.
  48     WarningWithoutBacktrace,
  49 }
  50
  51 #[derive(Copy, Clone, Debug, PartialEq)]
  52 pub enum IsolatedOp {
  53     /// Reject an op requiring communication with the host. By
  54     /// default, miri rejects the op with an abort. If not, it returns
  55     /// an error code, and prints a warning about it. Warning levels
  56     /// are controlled by `RejectOpWith` enum.
  57     Reject(RejectOpWith),
  58
  59     /// Execute op requiring communication with the host, i.e. disable isolation.
  60     Allow,
  61 }
  62
  63 #[derive(Copy, Clone, PartialEq, Eq)]
  64 pub enum BacktraceStyle {
  65     /// Prints a terser backtrace which ideally only contains relevant information.
  66     Short,
  67     /// Prints a backtrace with all possible information.
  68     Full,
  69     /// Prints only the frame that the error occurs in.
  70     Off,
  71 }
  72
  73 /// Configuration needed to spawn a Miri instance.
  74 #[derive(Clone)]
  75 pub struct MiriConfig {
  76     /// The host environment snapshot to use as basis for what is provided to the interpreted program.
  77     /// (This is still subject to isolation as well as `forwarded_env_vars`.)
  78     pub env: Vec<(OsString, OsString)>,
  79     /// Determine if validity checking is enabled.
  80     pub validate: bool,
  81     /// Determines if Stacked Borrows is enabled.
  82     pub stacked_borrows: bool,
  83     /// Controls alignment checking.
  84     pub check_alignment: AlignmentCheck,
  85     /// Controls function [ABI](Abi) checking.
  86     pub check_abi: bool,
  87     /// Action for an op requiring communication with the host.
  88     pub isolated_op: IsolatedOp,
  89     /// Determines if memory leaks should be ignored.
  90     pub ignore_leaks: bool,
  91     /// Environment variables that should always be forwarded from the host.
  92     pub forwarded_env_vars: Vec<String>,
  93     /// Command-line arguments passed to the interpreted program.
  94     pub args: Vec<String>,
  95     /// The seed to use when non-determinism or randomness are required (e.g. ptr-to-int cast, `getrandom()`).
  96     pub seed: Option<u64>,
  97     /// The stacked borrows pointer ids to report about
  98     pub tracked_pointer_tags: FxHashSet<SbTag>,
  99     /// The stacked borrows call IDs to report about
 100     pub tracked_call_ids: FxHashSet<CallId>,
 101     /// The allocation ids to report about.
 102     pub tracked_alloc_ids: FxHashSet<AllocId>,
 103     /// Determine if data race detection should be enabled
 104     pub data_race_detector: bool,
 105     /// Determine if weak memory emulation should be enabled. Requires data race detection to be enabled
 106     pub weak_memory_emulation: bool,
 107     /// Track when an outdated (weak memory) load happens.
 108     pub track_outdated_loads: bool,
 109     /// Rate of spurious failures for compare_exchange_weak atomic operations,
 110     /// between 0.0 and 1.0, defaulting to 0.8 (80% chance of failure).
 111     pub cmpxchg_weak_failure_rate: f64,
 112     /// If `Some`, enable the `measureme` profiler, writing results to a file
 113     /// with the specified prefix.
 114     pub measureme_out: Option<String>,
 115     /// Panic when unsupported functionality is encountered.
 116     pub panic_on_unsupported: bool,
 117     /// Which style to use for printing backtraces.
 118     pub backtrace_style: BacktraceStyle,
 119     /// Which provenance to use for int2ptr casts
 120     pub provenance_mode: ProvenanceMode,
 121     /// Whether to ignore any output by the program. This is helpful when debugging miri
 122     /// as its messages don't get intermingled with the program messages.
 123     pub mute_stdout_stderr: bool,
 124     /// The probability of the active thread being preempted at the end of each basic block.
 125     pub preemption_rate: f64,
 126     /// Report the current instruction being executed every N basic blocks.
 127     pub report_progress: Option<u32>,
 128     /// Whether Stacked Borrows retagging should recurse into fields of datatypes.
 129     pub retag_fields: bool,
 130     /// The location of a shared object file to load when calling external functions
 131     /// FIXME! consider allowing users to specify paths to multiple SO files, or to a directory
 132     pub external_so_file: Option<PathBuf>,
 133     /// Run a garbage collector for SbTags every N basic blocks.
 134     pub gc_interval: u32,
 135     /// FIXME: add docs.
 136     pub num_cpus: u32,
 137 }
 138
 139 impl Default for MiriConfig {
 140     fn default() -> MiriConfig {
 141         MiriConfig {
 142             env: vec![],
 143             validate: true,
 144             stacked_borrows: true,
 145             check_alignment: AlignmentCheck::Int,
 146             check_abi: true,
 147             isolated_op: IsolatedOp::Reject(RejectOpWith::Abort),
 148             ignore_leaks: false,
 149             forwarded_env_vars: vec![],
 150             args: vec![],
 151             seed: None,
 152             tracked_pointer_tags: FxHashSet::default(),
 153             tracked_call_ids: FxHashSet::default(),
 154             tracked_alloc_ids: FxHashSet::default(),
 155             data_race_detector: true,
 156             weak_memory_emulation: true,
 157             track_outdated_loads: false,
 158             cmpxchg_weak_failure_rate: 0.8, // 80%
 159             measureme_out: None,
 160             panic_on_unsupported: false,
 161             backtrace_style: BacktraceStyle::Short,
 162             provenance_mode: ProvenanceMode::Default,
 163             mute_stdout_stderr: false,
 164             preemption_rate: 0.01, // 1%
 165             report_progress: None,
 166             retag_fields: false,
 167             external_so_file: None,
 168             gc_interval: 10_000,
 169             num_cpus: 1,
 170         }
 171     }
 172 }
 173
 174 /// Returns a freshly created `InterpCx`, along with an `MPlaceTy` representing
 175 /// the location where the return value of the `start` function will be
 176 /// written to.
 177 /// Public because this is also used by `priroda`.
 178 pub fn create_ecx<'mir, 'tcx: 'mir>(
 179     tcx: TyCtxt<'tcx>,
 180     entry_id: DefId,
 181     entry_type: EntryFnType,
 182     config: &MiriConfig,
 183 ) -> InterpResult<'tcx, (InterpCx<'mir, 'tcx, MiriMachine<'mir, 'tcx>>, MPlaceTy<'tcx, Provenance>)>
 184 {
 185     let param_env = ty::ParamEnv::reveal_all();
 186     let layout_cx = LayoutCx { tcx, param_env };
 187     let mut ecx = InterpCx::new(
 188         tcx,
 189         rustc_span::source_map::DUMMY_SP,
 190         param_env,
 191         MiriMachine::new(config, layout_cx),
 192     );
 193
 194     // Some parts of initialization require a full `InterpCx`.
 195     MiriMachine::late_init(&mut ecx, config)?;
 196
 197     // Make sure we have MIR. We check MIR for some stable monomorphic function in libcore.
 198     let sentinel = ecx.try_resolve_path(&["core", "ascii", "escape_default"]);
 199     if !matches!(sentinel, Some(s) if tcx.is_mir_available(s.def.def_id())) {
 200         tcx.sess.fatal(
 201             "the current sysroot was built without `-Zalways-encode-mir`, or libcore seems missing. \
 202             Use `cargo miri setup` to prepare a sysroot that is suitable for Miri."
 203         );
 204     }
 205
 206     // Setup first stack frame.
 207     let entry_instance = ty::Instance::mono(tcx, entry_id);
 208
 209     // First argument is constructed later, because it's skipped if the entry function uses #[start].
 210
 211     // Second argument (argc): length of `config.args`.
 212     let argc = Scalar::from_machine_usize(u64::try_from(config.args.len()).unwrap(), &ecx);
 213     // Third argument (`argv`): created from `config.args`.
 214     let argv = {
 215         // Put each argument in memory, collect pointers.
 216         let mut argvs = Vec::<Immediate<Provenance>>::new();
 217         for arg in config.args.iter() {
 218             // Make space for `0` terminator.
 219             let size = u64::try_from(arg.len()).unwrap().checked_add(1).unwrap();
 220             let arg_type = tcx.mk_array(tcx.types.u8, size);
 221             let arg_place =
 222                 ecx.allocate(ecx.layout_of(arg_type)?, MiriMemoryKind::Machine.into())?;
 223             ecx.write_os_str_to_c_str(OsStr::new(arg), arg_place.ptr, size)?;
 224             ecx.mark_immutable(&arg_place);
 225             argvs.push(arg_place.to_ref(&ecx));
 226         }
 227         // Make an array with all these pointers, in the Miri memory.
 228         let argvs_layout = ecx.layout_of(
 229             tcx.mk_array(tcx.mk_imm_ptr(tcx.types.u8), u64::try_from(argvs.len()).unwrap()),
 230         )?;
 231         let argvs_place = ecx.allocate(argvs_layout, MiriMemoryKind::Machine.into())?;
 232         for (idx, arg) in argvs.into_iter().enumerate() {
 233             let place = ecx.mplace_field(&argvs_place, idx)?;
 234             ecx.write_immediate(arg, &place.into())?;
 235         }
 236         ecx.mark_immutable(&argvs_place);
 237         // A pointer to that place is the 3rd argument for main.
 238         let argv = argvs_place.to_ref(&ecx);
 239         // Store `argc` and `argv` for macOS `_NSGetArg{c,v}`.
 240         {
 241             let argc_place =
 242                 ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?;
 243             ecx.write_scalar(argc, &argc_place.into())?;
 244             ecx.mark_immutable(&argc_place);
 245             ecx.machine.argc = Some(*argc_place);
 246
 247             let argv_place = ecx.allocate(
 248                 ecx.layout_of(tcx.mk_imm_ptr(tcx.types.unit))?,
 249                 MiriMemoryKind::Machine.into(),
 250             )?;
 251             ecx.write_immediate(argv, &argv_place.into())?;
 252             ecx.mark_immutable(&argv_place);
 253             ecx.machine.argv = Some(*argv_place);
 254         }
 255         // Store command line as UTF-16 for Windows `GetCommandLineW`.
 256         {
 257             // Construct a command string with all the arguments.
 258             let cmd_utf16: Vec<u16> = args_to_utf16_command_string(config.args.iter());
 259
 260             let cmd_type = tcx.mk_array(tcx.types.u16, u64::try_from(cmd_utf16.len()).unwrap());
 261             let cmd_place =
 262                 ecx.allocate(ecx.layout_of(cmd_type)?, MiriMemoryKind::Machine.into())?;
 263             ecx.machine.cmd_line = Some(*cmd_place);
 264             // Store the UTF-16 string. We just allocated so we know the bounds are fine.
 265             for (idx, &c) in cmd_utf16.iter().enumerate() {
 266                 let place = ecx.mplace_field(&cmd_place, idx)?;
 267                 ecx.write_scalar(Scalar::from_u16(c), &place.into())?;
 268             }
 269             ecx.mark_immutable(&cmd_place);
 270         }
 271         argv
 272     };
 273
 274     // Return place (in static memory so that it does not count as leak).
 275     let ret_place = ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?;
 276     // Call start function.
 277
 278     match entry_type {
 279         EntryFnType::Main { .. } => {
 280             let start_id = tcx.lang_items().start_fn().unwrap();
 281             let main_ret_ty = tcx.fn_sig(entry_id).output();
 282             let main_ret_ty = main_ret_ty.no_bound_vars().unwrap();
 283             let start_instance = ty::Instance::resolve(
 284                 tcx,
 285                 ty::ParamEnv::reveal_all(),
 286                 start_id,
 287                 tcx.mk_substs(::std::iter::once(ty::subst::GenericArg::from(main_ret_ty))),
 288             )
 289             .unwrap()
 290             .unwrap();
 291
 292             let main_ptr = ecx.create_fn_alloc_ptr(FnVal::Instance(entry_instance));
 293
 294             // Inlining of `DEFAULT` from
 295             // https://github.com/rust-lang/rust/blob/master/compiler/rustc_session/src/config/sigpipe.rs.
 296             // Alaways using DEFAULT is okay since we don't support signals in Miri anyway.
 297             let sigpipe = 2;
 298
 299             ecx.call_function(
 300                 start_instance,
 301                 Abi::Rust,
 302                 &[
 303                     Scalar::from_pointer(main_ptr, &ecx).into(),
 304                     argc.into(),
 305                     argv,
 306                     Scalar::from_u8(sigpipe).into(),
 307                 ],
 308                 Some(&ret_place.into()),
 309                 StackPopCleanup::Root { cleanup: true },
 310             )?;
 311         }
 312         EntryFnType::Start => {
 313             ecx.call_function(
 314                 entry_instance,
 315                 Abi::Rust,
 316                 &[argc.into(), argv],
 317                 Some(&ret_place.into()),
 318                 StackPopCleanup::Root { cleanup: true },
 319             )?;
 320         }
 321     }
 322
 323     Ok((ecx, ret_place))
 324 }
 325
 326 /// Evaluates the entry function specified by `entry_id`.
 327 /// Returns `Some(return_code)` if program executed completed.
 328 /// Returns `None` if an evaluation error occurred.
 329 #[allow(clippy::needless_lifetimes)]
 330 pub fn eval_entry<'tcx>(
 331     tcx: TyCtxt<'tcx>,
 332     entry_id: DefId,
 333     entry_type: EntryFnType,
 334     config: MiriConfig,
 335 ) -> Option<i64> {
 336     // Copy setting before we move `config`.
 337     let ignore_leaks = config.ignore_leaks;
 338
 339     let (mut ecx, ret_place) = match create_ecx(tcx, entry_id, entry_type, &config) {
 340         Ok(v) => v,
 341         Err(err) => {
 342             err.print_backtrace();
 343             panic!("Miri initialization error: {}", err.kind())
 344         }
 345     };
 346
 347     // Perform the main execution.
 348     let res: thread::Result<InterpResult<'_, i64>> = panic::catch_unwind(AssertUnwindSafe(|| {
 349         // Main loop.
 350         loop {
 351             match ecx.schedule()? {
 352                 SchedulingAction::ExecuteStep => {
 353                     assert!(ecx.step()?, "a terminated thread was scheduled for execution");
 354                 }
 355                 SchedulingAction::ExecuteTimeoutCallback => {
 356                     ecx.run_timeout_callback()?;
 357                 }
 358                 SchedulingAction::ExecuteDtors => {
 359                     // This will either enable the thread again (so we go back
 360                     // to `ExecuteStep`), or determine that this thread is done
 361                     // for good.
 362                     ecx.schedule_next_tls_dtor_for_active_thread()?;
 363                 }
 364                 SchedulingAction::Stop => {
 365                     break;
 366                 }
 367             }
 368         }
 369         let return_code = ecx.read_scalar(&ret_place.into())?.to_machine_isize(&ecx)?;
 370         Ok(return_code)
 371     }));
 372     let res = res.unwrap_or_else(|panic_payload| {
 373         ecx.handle_ice();
 374         panic::resume_unwind(panic_payload)
 375     });
 376
 377     // Machine cleanup. Only do this if all threads have terminated; threads that are still running
 378     // might cause Stacked Borrows errors (https://github.com/rust-lang/miri/issues/2396).
 379     if ecx.have_all_terminated() {
 380         // Even if all threads have terminated, we have to beware of data races since some threads
 381         // might not have joined the main thread (https://github.com/rust-lang/miri/issues/2020,
 382         // https://github.com/rust-lang/miri/issues/2508).
 383         ecx.allow_data_races_all_threads_done();
 384         EnvVars::cleanup(&mut ecx).expect("error during env var cleanup");
 385     }
 386
 387     // Process the result.
 388     match res {
 389         Ok(return_code) => {
 390             if !ignore_leaks {
 391                 // Check for thread leaks.
 392                 if !ecx.have_all_terminated() {
 393                     tcx.sess.err(
 394                         "the main thread terminated without waiting for all remaining threads",
 395                     );
 396                     tcx.sess.note_without_error("pass `-Zmiri-ignore-leaks` to disable this check");
 397                     return None;
 398                 }
 399                 // Check for memory leaks.
 400                 info!("Additonal static roots: {:?}", ecx.machine.static_roots);
 401                 let leaks = ecx.leak_report(&ecx.machine.static_roots);
 402                 if leaks != 0 {
 403                     tcx.sess.err("the evaluated program leaked memory");
 404                     tcx.sess.note_without_error("pass `-Zmiri-ignore-leaks` to disable this check");
 405                     // Ignore the provided return code - let the reported error
 406                     // determine the return code.
 407                     return None;
 408                 }
 409             }
 410             Some(return_code)
 411         }
 412         Err(e) => report_error(&ecx, e),
 413     }
 414 }
 415
 416 /// Turns an array of arguments into a Windows command line string.
 417 ///
 418 /// The string will be UTF-16 encoded and NUL terminated.
 419 ///
 420 /// Panics if the zeroth argument contains the `"` character because doublequotes
 421 /// in `argv[0]` cannot be encoded using the standard command line parsing rules.
 422 ///
 423 /// Further reading:
 424 /// * [Parsing C++ command-line arguments](https://docs.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-160#parsing-c-command-line-arguments)
 425 /// * [The C/C++ Parameter Parsing Rules](https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES)
 426 fn args_to_utf16_command_string<I, T>(mut args: I) -> Vec<u16>
 427 where
 428     I: Iterator<Item = T>,
 429     T: AsRef<str>,
 430 {
 431     // Parse argv[0]. Slashes aren't escaped. Literal double quotes are not allowed.
 432     let mut cmd = {
 433         let arg0 = if let Some(arg0) = args.next() {
 434             arg0
 435         } else {
 436             return vec![0];
 437         };
 438         let arg0 = arg0.as_ref();
 439         if arg0.contains('"') {
 440             panic!("argv[0] cannot contain a doublequote (\") character");
 441         } else {
 442             // Always surround argv[0] with quotes.
 443             let mut s = String::new();
 444             s.push('"');
 445             s.push_str(arg0);
 446             s.push('"');
 447             s
 448         }
 449     };
 450
 451     // Build the other arguments.
 452     for arg in args {
 453         let arg = arg.as_ref();
 454         cmd.push(' ');
 455         if arg.is_empty() {
 456             cmd.push_str("\"\"");
 457         } else if !arg.bytes().any(|c| matches!(c, b'"' | b'\t' | b' ')) {
 458             // No quote, tab, or space -- no escaping required.
 459             cmd.push_str(arg);
 460         } else {
 461             // Spaces and tabs are escaped by surrounding them in quotes.
 462             // Quotes are themselves escaped by using backslashes when in a
 463             // quoted block.
 464             // Backslashes only need to be escaped when one or more are directly
 465             // followed by a quote. Otherwise they are taken literally.
 466
 467             cmd.push('"');
 468             let mut chars = arg.chars().peekable();
 469             loop {
 470                 let mut nslashes = 0;
 471                 while let Some(&'\\') = chars.peek() {
 472                     chars.next();
 473                     nslashes += 1;
 474                 }
 475
 476                 match chars.next() {
 477                     Some('"') => {
 478                         cmd.extend(iter::repeat('\\').take(nslashes * 2 + 1));
 479                         cmd.push('"');
 480                     }
 481                     Some(c) => {
 482                         cmd.extend(iter::repeat('\\').take(nslashes));
 483                         cmd.push(c);
 484                     }
 485                     None => {
 486                         cmd.extend(iter::repeat('\\').take(nslashes * 2));
 487                         break;
 488                     }
 489                 }
 490             }
 491             cmd.push('"');
 492         }
 493     }
 494
 495     if cmd.contains('\0') {
 496         panic!("interior null in command line arguments");
 497     }
 498     cmd.encode_utf16().chain(iter::once(0)).collect()
 499 }
 500
 501 #[cfg(test)]
 502 mod tests {
 503     use super::*;
 504     #[test]
 505     #[should_panic(expected = "argv[0] cannot contain a doublequote (\") character")]
 506     fn windows_argv0_panic_on_quote() {
 507         args_to_utf16_command_string(["\""].iter());
 508     }
 509     #[test]
 510     fn windows_argv0_no_escape() {
 511         // Ensure that a trailing backslash in argv[0] is not escaped.
 512         let cmd = String::from_utf16_lossy(&args_to_utf16_command_string(
 513             [r"C:\Program Files\", "arg1", "arg 2", "arg \" 3"].iter(),
 514         ));
 515         assert_eq!(cmd.trim_end_matches('\0'), r#""C:\Program Files\" arg1 "arg 2" "arg \" 3""#);
 516     }
 517 }