src/eval.rs

   1 //! Main evaluator loop and setting up the initial stack frame.
   2
   3 use std::ffi::OsStr;
   4 use std::iter;
   5
   6 use log::info;
   7
   8 use rustc_hir::def_id::DefId;
   9 use rustc_middle::ty::{
  10     self,
  11     layout::{LayoutCx, LayoutOf},
  12     TyCtxt,
  13 };
  14 use rustc_target::spec::abi::Abi;
  15
  16 use rustc_session::config::EntryFnType;
  17
  18 use std::collections::HashSet;
  19
  20 use crate::*;
  21
  22 #[derive(Copy, Clone, Debug, PartialEq)]
  23 pub enum AlignmentCheck {
  24     /// Do not check alignment.
  25     None,
  26     /// Check alignment "symbolically", i.e., using only the requested alignment for an allocation and not its real base address.
  27     Symbolic,
  28     /// Check alignment on the actual physical integer address.
  29     Int,
  30 }
  31
  32 #[derive(Copy, Clone, Debug, PartialEq)]
  33 pub enum RejectOpWith {
  34     /// Isolated op is rejected with an abort of the machine.
  35     Abort,
  36
  37     /// If not Abort, miri returns an error for an isolated op.
  38     /// Following options determine if user should be warned about such error.
  39     /// Do not print warning about rejected isolated op.
  40     NoWarning,
  41
  42     /// Print a warning about rejected isolated op, with backtrace.
  43     Warning,
  44
  45     /// Print a warning about rejected isolated op, without backtrace.
  46     WarningWithoutBacktrace,
  47 }
  48
  49 #[derive(Copy, Clone, Debug, PartialEq)]
  50 pub enum IsolatedOp {
  51     /// Reject an op requiring communication with the host. By
  52     /// default, miri rejects the op with an abort. If not, it returns
  53     /// an error code, and prints a warning about it. Warning levels
  54     /// are controlled by `RejectOpWith` enum.
  55     Reject(RejectOpWith),
  56
  57     /// Execute op requiring communication with the host, i.e. disable isolation.
  58     Allow,
  59 }
  60
  61 #[derive(Copy, Clone, PartialEq, Eq)]
  62 pub enum BacktraceStyle {
  63     /// Prints a terser backtrace which ideally only contains relevant information.
  64     Short,
  65     /// Prints a backtrace with all possible information.
  66     Full,
  67     /// Prints only the frame that the error occurs in.
  68     Off,
  69 }
  70
  71 /// Configuration needed to spawn a Miri instance.
  72 #[derive(Clone)]
  73 pub struct MiriConfig {
  74     /// Determine if validity checking is enabled.
  75     pub validate: bool,
  76     /// Determines if Stacked Borrows is enabled.
  77     pub stacked_borrows: bool,
  78     /// Controls alignment checking.
  79     pub check_alignment: AlignmentCheck,
  80     /// Controls integer and float validity initialization checking.
  81     pub allow_uninit_numbers: bool,
  82     /// Controls how we treat ptr2int and int2ptr transmutes.
  83     pub allow_ptr_int_transmute: bool,
  84     /// Controls function [ABI](Abi) checking.
  85     pub check_abi: bool,
  86     /// Action for an op requiring communication with the host.
  87     pub isolated_op: IsolatedOp,
  88     /// Determines if memory leaks should be ignored.
  89     pub ignore_leaks: bool,
  90     /// Environment variables that should always be isolated from the host.
  91     pub excluded_env_vars: Vec<String>,
  92     /// Environment variables that should always be forwarded from the host.
  93     pub forwarded_env_vars: Vec<String>,
  94     /// Command-line arguments passed to the interpreted program.
  95     pub args: Vec<String>,
  96     /// The seed to use when non-determinism or randomness are required (e.g. ptr-to-int cast, `getrandom()`).
  97     pub seed: Option<u64>,
  98     /// The stacked borrows pointer ids to report about
  99     pub tracked_pointer_tags: HashSet<SbTag>,
 100     /// The stacked borrows call IDs to report about
 101     pub tracked_call_ids: HashSet<CallId>,
 102     /// The allocation ids to report about.
 103     pub tracked_alloc_ids: HashSet<AllocId>,
 104     /// Determine if data race detection should be enabled
 105     pub data_race_detector: bool,
 106     /// Determine if weak memory emulation should be enabled. Requires data race detection to be enabled
 107     pub weak_memory_emulation: bool,
 108     /// Rate of spurious failures for compare_exchange_weak atomic operations,
 109     /// between 0.0 and 1.0, defaulting to 0.8 (80% chance of failure).
 110     pub cmpxchg_weak_failure_rate: f64,
 111     /// If `Some`, enable the `measureme` profiler, writing results to a file
 112     /// with the specified prefix.
 113     pub measureme_out: Option<String>,
 114     /// Panic when unsupported functionality is encountered.
 115     pub panic_on_unsupported: bool,
 116     /// Which style to use for printing backtraces.
 117     pub backtrace_style: BacktraceStyle,
 118     /// Which provenance to use for int2ptr casts
 119     pub provenance_mode: ProvenanceMode,
 120     /// Whether to ignore any output by the program. This is helpful when debugging miri
 121     /// as its messages don't get intermingled with the program messages.
 122     pub mute_stdout_stderr: bool,
 123     /// The probability of the active thread being preempted at the end of each basic block.
 124     pub preemption_rate: f64,
 125     /// Report the current instruction being executed every N basic blocks.
 126     pub report_progress: Option<u32>,
 127     /// Whether Stacked Borrows retagging should recurse into fields of datatypes.
 128     pub retag_fields: bool,
 129 }
 130
 131 impl Default for MiriConfig {
 132     fn default() -> MiriConfig {
 133         MiriConfig {
 134             validate: true,
 135             stacked_borrows: true,
 136             check_alignment: AlignmentCheck::Int,
 137             allow_uninit_numbers: false,
 138             allow_ptr_int_transmute: false,
 139             check_abi: true,
 140             isolated_op: IsolatedOp::Reject(RejectOpWith::Abort),
 141             ignore_leaks: false,
 142             excluded_env_vars: vec![],
 143             forwarded_env_vars: vec![],
 144             args: vec![],
 145             seed: None,
 146             tracked_pointer_tags: HashSet::default(),
 147             tracked_call_ids: HashSet::default(),
 148             tracked_alloc_ids: HashSet::default(),
 149             data_race_detector: true,
 150             weak_memory_emulation: true,
 151             cmpxchg_weak_failure_rate: 0.8, // 80%
 152             measureme_out: None,
 153             panic_on_unsupported: false,
 154             backtrace_style: BacktraceStyle::Short,
 155             provenance_mode: ProvenanceMode::Default,
 156             mute_stdout_stderr: false,
 157             preemption_rate: 0.01, // 1%
 158             report_progress: None,
 159             retag_fields: false,
 160         }
 161     }
 162 }
 163
 164 /// Returns a freshly created `InterpCx`, along with an `MPlaceTy` representing
 165 /// the location where the return value of the `start` function will be
 166 /// written to.
 167 /// Public because this is also used by `priroda`.
 168 pub fn create_ecx<'mir, 'tcx: 'mir>(
 169     tcx: TyCtxt<'tcx>,
 170     entry_id: DefId,
 171     entry_type: EntryFnType,
 172     config: &MiriConfig,
 173 ) -> InterpResult<'tcx, (InterpCx<'mir, 'tcx, Evaluator<'mir, 'tcx>>, MPlaceTy<'tcx, Tag>)> {
 174     let param_env = ty::ParamEnv::reveal_all();
 175     let layout_cx = LayoutCx { tcx, param_env };
 176     let mut ecx = InterpCx::new(
 177         tcx,
 178         rustc_span::source_map::DUMMY_SP,
 179         param_env,
 180         Evaluator::new(config, layout_cx),
 181     );
 182
 183     // Capture the current interpreter stack state (which should be empty) so that we can emit
 184     // allocation-tracking and tag-tracking diagnostics for allocations which are part of the
 185     // early runtime setup.
 186     let info = ecx.preprocess_diagnostics();
 187
 188     // Some parts of initialization require a full `InterpCx`.
 189     Evaluator::late_init(&mut ecx, config)?;
 190
 191     // Make sure we have MIR. We check MIR for some stable monomorphic function in libcore.
 192     let sentinel = ecx.try_resolve_path(&["core", "ascii", "escape_default"]);
 193     if !matches!(sentinel, Some(s) if tcx.is_mir_available(s.def.def_id())) {
 194         tcx.sess.fatal(
 195             "the current sysroot was built without `-Zalways-encode-mir`, or libcore seems missing. \
 196             Use `cargo miri setup` to prepare a sysroot that is suitable for Miri."
 197         );
 198     }
 199
 200     // Setup first stack frame.
 201     let entry_instance = ty::Instance::mono(tcx, entry_id);
 202
 203     // First argument is constructed later, because it's skipped if the entry function uses #[start].
 204
 205     // Second argument (argc): length of `config.args`.
 206     let argc = Scalar::from_machine_usize(u64::try_from(config.args.len()).unwrap(), &ecx);
 207     // Third argument (`argv`): created from `config.args`.
 208     let argv = {
 209         // Put each argument in memory, collect pointers.
 210         let mut argvs = Vec::<Immediate<Tag>>::new();
 211         for arg in config.args.iter() {
 212             // Make space for `0` terminator.
 213             let size = u64::try_from(arg.len()).unwrap().checked_add(1).unwrap();
 214             let arg_type = tcx.mk_array(tcx.types.u8, size);
 215             let arg_place =
 216                 ecx.allocate(ecx.layout_of(arg_type)?, MiriMemoryKind::Machine.into())?;
 217             ecx.write_os_str_to_c_str(OsStr::new(arg), arg_place.ptr, size)?;
 218             ecx.mark_immutable(&arg_place);
 219             argvs.push(arg_place.to_ref(&ecx));
 220         }
 221         // Make an array with all these pointers, in the Miri memory.
 222         let argvs_layout = ecx.layout_of(
 223             tcx.mk_array(tcx.mk_imm_ptr(tcx.types.u8), u64::try_from(argvs.len()).unwrap()),
 224         )?;
 225         let argvs_place = ecx.allocate(argvs_layout, MiriMemoryKind::Machine.into())?;
 226         for (idx, arg) in argvs.into_iter().enumerate() {
 227             let place = ecx.mplace_field(&argvs_place, idx)?;
 228             ecx.write_immediate(arg, &place.into())?;
 229         }
 230         ecx.mark_immutable(&argvs_place);
 231         // A pointer to that place is the 3rd argument for main.
 232         let argv = argvs_place.to_ref(&ecx);
 233         // Store `argc` and `argv` for macOS `_NSGetArg{c,v}`.
 234         {
 235             let argc_place =
 236                 ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?;
 237             ecx.write_scalar(argc, &argc_place.into())?;
 238             ecx.mark_immutable(&argc_place);
 239             ecx.machine.argc = Some(*argc_place);
 240
 241             let argv_place = ecx.allocate(
 242                 ecx.layout_of(tcx.mk_imm_ptr(tcx.types.unit))?,
 243                 MiriMemoryKind::Machine.into(),
 244             )?;
 245             ecx.write_immediate(argv, &argv_place.into())?;
 246             ecx.mark_immutable(&argv_place);
 247             ecx.machine.argv = Some(*argv_place);
 248         }
 249         // Store command line as UTF-16 for Windows `GetCommandLineW`.
 250         {
 251             // Construct a command string with all the aguments.
 252             let cmd_utf16: Vec<u16> = args_to_utf16_command_string(config.args.iter());
 253
 254             let cmd_type = tcx.mk_array(tcx.types.u16, u64::try_from(cmd_utf16.len()).unwrap());
 255             let cmd_place =
 256                 ecx.allocate(ecx.layout_of(cmd_type)?, MiriMemoryKind::Machine.into())?;
 257             ecx.machine.cmd_line = Some(*cmd_place);
 258             // Store the UTF-16 string. We just allocated so we know the bounds are fine.
 259             for (idx, &c) in cmd_utf16.iter().enumerate() {
 260                 let place = ecx.mplace_field(&cmd_place, idx)?;
 261                 ecx.write_scalar(Scalar::from_u16(c), &place.into())?;
 262             }
 263             ecx.mark_immutable(&cmd_place);
 264         }
 265         argv
 266     };
 267
 268     // Return place (in static memory so that it does not count as leak).
 269     let ret_place = ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?;
 270     // Call start function.
 271
 272     match entry_type {
 273         EntryFnType::Main => {
 274             let start_id = tcx.lang_items().start_fn().unwrap();
 275             let main_ret_ty = tcx.fn_sig(entry_id).output();
 276             let main_ret_ty = main_ret_ty.no_bound_vars().unwrap();
 277             let start_instance = ty::Instance::resolve(
 278                 tcx,
 279                 ty::ParamEnv::reveal_all(),
 280                 start_id,
 281                 tcx.mk_substs(::std::iter::once(ty::subst::GenericArg::from(main_ret_ty))),
 282             )
 283             .unwrap()
 284             .unwrap();
 285
 286             let main_ptr = ecx.create_fn_alloc_ptr(FnVal::Instance(entry_instance));
 287
 288             ecx.call_function(
 289                 start_instance,
 290                 Abi::Rust,
 291                 &[Scalar::from_pointer(main_ptr, &ecx).into(), argc.into(), argv],
 292                 &ret_place.into(),
 293                 StackPopCleanup::Root { cleanup: true },
 294             )?;
 295         }
 296         EntryFnType::Start => {
 297             ecx.call_function(
 298                 entry_instance,
 299                 Abi::Rust,
 300                 &[argc.into(), argv],
 301                 &ret_place.into(),
 302                 StackPopCleanup::Root { cleanup: true },
 303             )?;
 304         }
 305     }
 306
 307     // Emit any diagnostics related to the setup process for the runtime, so that when the
 308     // interpreter loop starts there are no unprocessed diagnostics.
 309     ecx.process_diagnostics(info);
 310
 311     Ok((ecx, ret_place))
 312 }
 313
 314 /// Evaluates the entry function specified by `entry_id`.
 315 /// Returns `Some(return_code)` if program executed completed.
 316 /// Returns `None` if an evaluation error occured.
 317 pub fn eval_entry<'tcx>(
 318     tcx: TyCtxt<'tcx>,
 319     entry_id: DefId,
 320     entry_type: EntryFnType,
 321     config: MiriConfig,
 322 ) -> Option<i64> {
 323     // Copy setting before we move `config`.
 324     let ignore_leaks = config.ignore_leaks;
 325
 326     let (mut ecx, ret_place) = match create_ecx(tcx, entry_id, entry_type, &config) {
 327         Ok(v) => v,
 328         Err(err) => {
 329             err.print_backtrace();
 330             panic!("Miri initialization error: {}", err.kind())
 331         }
 332     };
 333
 334     // Perform the main execution.
 335     let res: InterpResult<'_, i64> = (|| {
 336         // Main loop.
 337         loop {
 338             let info = ecx.preprocess_diagnostics();
 339             match ecx.schedule()? {
 340                 SchedulingAction::ExecuteStep => {
 341                     assert!(ecx.step()?, "a terminated thread was scheduled for execution");
 342                 }
 343                 SchedulingAction::ExecuteTimeoutCallback => {
 344                     assert!(
 345                         ecx.machine.communicate(),
 346                         "scheduler callbacks require disabled isolation, but the code \
 347                         that created the callback did not check it"
 348                     );
 349                     ecx.run_timeout_callback()?;
 350                 }
 351                 SchedulingAction::ExecuteDtors => {
 352                     // This will either enable the thread again (so we go back
 353                     // to `ExecuteStep`), or determine that this thread is done
 354                     // for good.
 355                     ecx.schedule_next_tls_dtor_for_active_thread()?;
 356                 }
 357                 SchedulingAction::Stop => {
 358                     break;
 359                 }
 360             }
 361             ecx.process_diagnostics(info);
 362         }
 363         let return_code = ecx.read_scalar(&ret_place.into())?.to_machine_isize(&ecx)?;
 364         Ok(return_code)
 365     })();
 366
 367     // Machine cleanup.
 368     // Execution of the program has halted so any memory access we do here
 369     // cannot produce a real data race. If we do not do something to disable
 370     // data race detection here, some uncommon combination of errors will
 371     // cause a data race to be detected:
 372     // https://github.com/rust-lang/miri/issues/2020
 373     ecx.allow_data_races_mut(|ecx| EnvVars::cleanup(ecx).unwrap());
 374
 375     // Process the result.
 376     match res {
 377         Ok(return_code) => {
 378             if !ignore_leaks {
 379                 // Check for thread leaks.
 380                 if !ecx.have_all_terminated() {
 381                     tcx.sess.err(
 382                         "the main thread terminated without waiting for all remaining threads",
 383                     );
 384                     tcx.sess.note_without_error("pass `-Zmiri-ignore-leaks` to disable this check");
 385                     return None;
 386                 }
 387                 // Check for memory leaks.
 388                 info!("Additonal static roots: {:?}", ecx.machine.static_roots);
 389                 let leaks = ecx.leak_report(&ecx.machine.static_roots);
 390                 if leaks != 0 {
 391                     tcx.sess.err("the evaluated program leaked memory");
 392                     tcx.sess.note_without_error("pass `-Zmiri-ignore-leaks` to disable this check");
 393                     // Ignore the provided return code - let the reported error
 394                     // determine the return code.
 395                     return None;
 396                 }
 397             }
 398             Some(return_code)
 399         }
 400         Err(e) => report_error(&ecx, e),
 401     }
 402 }
 403
 404 /// Turns an array of arguments into a Windows command line string.
 405 ///
 406 /// The string will be UTF-16 encoded and NUL terminated.
 407 ///
 408 /// Panics if the zeroth argument contains the `"` character because doublequotes
 409 /// in `argv[0]` cannot be encoded using the standard command line parsing rules.
 410 ///
 411 /// Further reading:
 412 /// * [Parsing C++ command-line arguments](https://docs.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-160#parsing-c-command-line-arguments)
 413 /// * [The C/C++ Parameter Parsing Rules](https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES)
 414 fn args_to_utf16_command_string<I, T>(mut args: I) -> Vec<u16>
 415 where
 416     I: Iterator<Item = T>,
 417     T: AsRef<str>,
 418 {
 419     // Parse argv[0]. Slashes aren't escaped. Literal double quotes are not allowed.
 420     let mut cmd = {
 421         let arg0 = if let Some(arg0) = args.next() {
 422             arg0
 423         } else {
 424             return vec![0];
 425         };
 426         let arg0 = arg0.as_ref();
 427         if arg0.contains('"') {
 428             panic!("argv[0] cannot contain a doublequote (\") character");
 429         } else {
 430             // Always surround argv[0] with quotes.
 431             let mut s = String::new();
 432             s.push('"');
 433             s.push_str(arg0);
 434             s.push('"');
 435             s
 436         }
 437     };
 438
 439     // Build the other arguments.
 440     for arg in args {
 441         let arg = arg.as_ref();
 442         cmd.push(' ');
 443         if arg.is_empty() {
 444             cmd.push_str("\"\"");
 445         } else if !arg.bytes().any(|c| matches!(c, b'"' | b'\t' | b' ')) {
 446             // No quote, tab, or space -- no escaping required.
 447             cmd.push_str(arg);
 448         } else {
 449             // Spaces and tabs are escaped by surrounding them in quotes.
 450             // Quotes are themselves escaped by using backslashes when in a
 451             // quoted block.
 452             // Backslashes only need to be escaped when one or more are directly
 453             // followed by a quote. Otherwise they are taken literally.
 454
 455             cmd.push('"');
 456             let mut chars = arg.chars().peekable();
 457             loop {
 458                 let mut nslashes = 0;
 459                 while let Some(&'\\') = chars.peek() {
 460                     chars.next();
 461                     nslashes += 1;
 462                 }
 463
 464                 match chars.next() {
 465                     Some('"') => {
 466                         cmd.extend(iter::repeat('\\').take(nslashes * 2 + 1));
 467                         cmd.push('"');
 468                     }
 469                     Some(c) => {
 470                         cmd.extend(iter::repeat('\\').take(nslashes));
 471                         cmd.push(c);
 472                     }
 473                     None => {
 474                         cmd.extend(iter::repeat('\\').take(nslashes * 2));
 475                         break;
 476                     }
 477                 }
 478             }
 479             cmd.push('"');
 480         }
 481     }
 482
 483     if cmd.contains('\0') {
 484         panic!("interior null in command line arguments");
 485     }
 486     cmd.encode_utf16().chain(iter::once(0)).collect()
 487 }
 488
 489 #[cfg(test)]
 490 mod tests {
 491     use super::*;
 492     #[test]
 493     #[should_panic(expected = "argv[0] cannot contain a doublequote (\") character")]
 494     fn windows_argv0_panic_on_quote() {
 495         args_to_utf16_command_string(["\""].iter());
 496     }
 497     #[test]
 498     fn windows_argv0_no_escape() {
 499         // Ensure that a trailing backslash in argv[0] is not escaped.
 500         let cmd = String::from_utf16_lossy(&args_to_utf16_command_string(
 501             [r"C:\Program Files\", "arg1", "arg 2", "arg \" 3"].iter(),
 502         ));
 503         assert_eq!(cmd.trim_end_matches('\0'), r#""C:\Program Files\" arg1 "arg 2" "arg \" 3""#);
 504     }
 505 }