Auto merge of #2162 - RalfJung:rustup, r=RalfJung

[rust.git] / src / shims / intrinsics.rs
diff --git a/src/shims/intrinsics.rs b/src/shims/intrinsics.rs

index 39ed5ada0aa9989bf2a36743856cd6617d7ecd4c..1f06971a3e70db0128c06876f4c3dc95f451bbe4 100644 (file)
--- a/src/shims/intrinsics.rs
+++ b/src/shims/intrinsics.rs
@@ -3,9 +3,9 @@
  use log::trace;
  
  use rustc_apfloat::{Float, Round};
-use rustc_middle::ty::layout::{IntegerExt, LayoutOf};
+use rustc_middle::ty::layout::{HasParamEnv, IntegerExt, LayoutOf};
  use rustc_middle::{mir, mir::BinOp, ty, ty::FloatTy};
-use rustc_target::abi::{Align, Integer};
+use rustc_target::abi::{Align, Endian, HasDataLayout, Integer, Size};
  
  use crate::*;
  use helpers::check_arg_count;
@@ -22,18 +22,20 @@ fn call_intrinsic(
          &mut self,
          instance: ty::Instance<'tcx>,
          args: &[OpTy<'tcx, Tag>],
-        ret: Option<(&PlaceTy<'tcx, Tag>, mir::BasicBlock)>,
+        dest: &PlaceTy<'tcx, Tag>,
+        ret: Option<mir::BasicBlock>,
          _unwind: StackPopUnwind,
      ) -> InterpResult<'tcx> {
          let this = self.eval_context_mut();
  
-        if this.emulate_intrinsic(instance, args, ret)? {
+        if this.emulate_intrinsic(instance, args, dest, ret)? {
              return Ok(());
          }
  
          // All supported intrinsics have a return place.
-        let intrinsic_name = &*this.tcx.item_name(instance.def_id()).as_str();
-        let (dest, ret) = match ret {
+        let intrinsic_name = this.tcx.item_name(instance.def_id());
+        let intrinsic_name = intrinsic_name.as_str();
+        let ret = match ret {
              None => throw_unsup_format!("unimplemented (diverging) intrinsic: {}", intrinsic_name),
              Some(p) => p,
          };
@@ -42,48 +44,71 @@ fn call_intrinsic(
          match intrinsic_name {
              // Miri overwriting CTFE intrinsics.
              "ptr_guaranteed_eq" => {
-                let &[ref left, ref right] = check_arg_count(args)?;
+                let [left, right] = check_arg_count(args)?;
                  let left = this.read_immediate(left)?;
                  let right = this.read_immediate(right)?;
                  this.binop_ignore_overflow(mir::BinOp::Eq, &left, &right, dest)?;
              }
              "ptr_guaranteed_ne" => {
-                let &[ref left, ref right] = check_arg_count(args)?;
+                let [left, right] = check_arg_count(args)?;
                  let left = this.read_immediate(left)?;
                  let right = this.read_immediate(right)?;
                  this.binop_ignore_overflow(mir::BinOp::Ne, &left, &right, dest)?;
              }
+            "const_allocate" => {
+                // For now, for compatibility with the run-time implementation of this, we just return null.
+                // See <https://github.com/rust-lang/rust/issues/93935>.
+                this.write_null(dest)?;
+            }
+            "const_deallocate" => {
+                // complete NOP
+            }
  
              // Raw memory accesses
              "volatile_load" => {
-                let &[ref place] = check_arg_count(args)?;
+                let [place] = check_arg_count(args)?;
                  let place = this.deref_operand(place)?;
                  this.copy_op(&place.into(), dest)?;
              }
              "volatile_store" => {
-                let &[ref place, ref dest] = check_arg_count(args)?;
+                let [place, dest] = check_arg_count(args)?;
                  let place = this.deref_operand(place)?;
                  this.copy_op(dest, &place.into())?;
              }
  
              "write_bytes" | "volatile_set_memory" => {
-                let &[ref ptr, ref val_byte, ref count] = check_arg_count(args)?;
+                let [ptr, val_byte, count] = check_arg_count(args)?;
                  let ty = instance.substs.type_at(0);
                  let ty_layout = this.layout_of(ty)?;
                  let val_byte = this.read_scalar(val_byte)?.to_u8()?;
                  let ptr = this.read_pointer(ptr)?;
                  let count = this.read_scalar(count)?.to_machine_usize(this)?;
+                // `checked_mul` enforces a too small bound (the correct one would probably be machine_isize_max),
+                // but no actual allocation can be big enough for the difference to be noticeable.
                  let byte_count = ty_layout.size.checked_mul(count, this).ok_or_else(|| {
                      err_ub_format!("overflow computing total size of `{}`", intrinsic_name)
                  })?;
-                this.memory
-                    .write_bytes(ptr, iter::repeat(val_byte).take(byte_count.bytes() as usize))?;
+                this.write_bytes_ptr(
+                    ptr,
+                    iter::repeat(val_byte).take(byte_count.bytes() as usize),
+                )?;
              }
  
              // Floating-point operations
+            "fabsf32" => {
+                let [f] = check_arg_count(args)?;
+                let f = this.read_scalar(f)?.to_f32()?;
+                // Can be implemented in soft-floats.
+                this.write_scalar(Scalar::from_f32(f.abs()), dest)?;
+            }
+            "fabsf64" => {
+                let [f] = check_arg_count(args)?;
+                let f = this.read_scalar(f)?.to_f64()?;
+                // Can be implemented in soft-floats.
+                this.write_scalar(Scalar::from_f64(f.abs()), dest)?;
+            }
              #[rustfmt::skip]
              | "sinf32"
-            | "fabsf32"
              | "cosf32"
              | "sqrtf32"
              | "expf32"
@@ -96,12 +121,11 @@ fn call_intrinsic(
              | "truncf32"
              | "roundf32"
              => {
-                let &[ref f] = check_arg_count(args)?;
+                let [f] = check_arg_count(args)?;
                  // FIXME: Using host floats.
                  let f = f32::from_bits(this.read_scalar(f)?.to_u32()?);
                  let f = match intrinsic_name {
                      "sinf32" => f.sin(),
-                    "fabsf32" => f.abs(),
                      "cosf32" => f.cos(),
                      "sqrtf32" => f.sqrt(),
                      "expf32" => f.exp(),
@@ -120,7 +144,6 @@ fn call_intrinsic(
  
              #[rustfmt::skip]
              | "sinf64"
-            | "fabsf64"
              | "cosf64"
              | "sqrtf64"
              | "expf64"
@@ -133,12 +156,11 @@ fn call_intrinsic(
              | "truncf64"
              | "roundf64"
              => {
-                let &[ref f] = check_arg_count(args)?;
+                let [f] = check_arg_count(args)?;
                  // FIXME: Using host floats.
                  let f = f64::from_bits(this.read_scalar(f)?.to_u64()?);
                  let f = match intrinsic_name {
                      "sinf64" => f.sin(),
-                    "fabsf64" => f.abs(),
                      "cosf64" => f.cos(),
                      "sqrtf64" => f.sqrt(),
                      "expf64" => f.exp(),
@@ -162,7 +184,7 @@ fn call_intrinsic(
              | "fdiv_fast"
              | "frem_fast"
              => {
-                let &[ref a, ref b] = check_arg_count(args)?;
+                let [a, b] = check_arg_count(args)?;
                  let a = this.read_immediate(a)?;
                  let b = this.read_immediate(b)?;
                  let op = match intrinsic_name {
@@ -207,7 +229,7 @@ fn call_intrinsic(
              | "maxnumf32"
              | "copysignf32"
              => {
-                let &[ref a, ref b] = check_arg_count(args)?;
+                let [a, b] = check_arg_count(args)?;
                  let a = this.read_scalar(a)?.to_f32()?;
                  let b = this.read_scalar(b)?.to_f32()?;
                  let res = match intrinsic_name {
@@ -224,7 +246,7 @@ fn call_intrinsic(
              | "maxnumf64"
              | "copysignf64"
              => {
-                let &[ref a, ref b] = check_arg_count(args)?;
+                let [a, b] = check_arg_count(args)?;
                  let a = this.read_scalar(a)?.to_f64()?;
                  let b = this.read_scalar(b)?.to_f64()?;
                  let res = match intrinsic_name {
@@ -237,7 +259,7 @@ fn call_intrinsic(
              }
  
              "powf32" => {
-                let &[ref f, ref f2] = check_arg_count(args)?;
+                let [f, f2] = check_arg_count(args)?;
                  // FIXME: Using host floats.
                  let f = f32::from_bits(this.read_scalar(f)?.to_u32()?);
                  let f2 = f32::from_bits(this.read_scalar(f2)?.to_u32()?);
@@ -245,7 +267,7 @@ fn call_intrinsic(
              }
  
              "powf64" => {
-                let &[ref f, ref f2] = check_arg_count(args)?;
+                let [f, f2] = check_arg_count(args)?;
                  // FIXME: Using host floats.
                  let f = f64::from_bits(this.read_scalar(f)?.to_u64()?);
                  let f2 = f64::from_bits(this.read_scalar(f2)?.to_u64()?);
@@ -253,7 +275,7 @@ fn call_intrinsic(
              }
  
              "fmaf32" => {
-                let &[ref a, ref b, ref c] = check_arg_count(args)?;
+                let [a, b, c] = check_arg_count(args)?;
                  let a = this.read_scalar(a)?.to_f32()?;
                  let b = this.read_scalar(b)?.to_f32()?;
                  let c = this.read_scalar(c)?.to_f32()?;
@@ -262,7 +284,7 @@ fn call_intrinsic(
              }
  
              "fmaf64" => {
-                let &[ref a, ref b, ref c] = check_arg_count(args)?;
+                let [a, b, c] = check_arg_count(args)?;
                  let a = this.read_scalar(a)?.to_f64()?;
                  let b = this.read_scalar(b)?.to_f64()?;
                  let c = this.read_scalar(c)?.to_f64()?;
@@ -271,7 +293,7 @@ fn call_intrinsic(
              }
  
              "powif32" => {
-                let &[ref f, ref i] = check_arg_count(args)?;
+                let [f, i] = check_arg_count(args)?;
                  // FIXME: Using host floats.
                  let f = f32::from_bits(this.read_scalar(f)?.to_u32()?);
                  let i = this.read_scalar(i)?.to_i32()?;
@@ -279,7 +301,7 @@ fn call_intrinsic(
              }
  
              "powif64" => {
-                let &[ref f, ref i] = check_arg_count(args)?;
+                let [f, i] = check_arg_count(args)?;
                  // FIXME: Using host floats.
                  let f = f64::from_bits(this.read_scalar(f)?.to_u64()?);
                  let i = this.read_scalar(i)?.to_i32()?;
@@ -287,7 +309,7 @@ fn call_intrinsic(
              }
  
              "float_to_int_unchecked" => {
-                let &[ref val] = check_arg_count(args)?;
+                let [val] = check_arg_count(args)?;
                  let val = this.read_immediate(val)?;
  
                  let res = match val.layout.ty.kind() {
@@ -307,14 +329,120 @@ fn call_intrinsic(
  
              // SIMD operations
              #[rustfmt::skip]
+            | "simd_neg"
+            | "simd_fabs"
+            | "simd_ceil"
+            | "simd_floor"
+            | "simd_round"
+            | "simd_trunc"
+            | "simd_fsqrt" => {
+                let [op] = check_arg_count(args)?;
+                let (op, op_len) = this.operand_to_simd(op)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(dest_len, op_len);
+
+                #[derive(Copy, Clone)]
+                enum HostFloatOp {
+                    Ceil,
+                    Floor,
+                    Round,
+                    Trunc,
+                    Sqrt,
+                }
+                #[derive(Copy, Clone)]
+                enum Op {
+                    MirOp(mir::UnOp),
+                    Abs,
+                    HostOp(HostFloatOp),
+                }
+                let which = match intrinsic_name {
+                    "simd_neg" => Op::MirOp(mir::UnOp::Neg),
+                    "simd_fabs" => Op::Abs,
+                    "simd_ceil" => Op::HostOp(HostFloatOp::Ceil),
+                    "simd_floor" => Op::HostOp(HostFloatOp::Floor),
+                    "simd_round" => Op::HostOp(HostFloatOp::Round),
+                    "simd_trunc" => Op::HostOp(HostFloatOp::Trunc),
+                    "simd_fsqrt" => Op::HostOp(HostFloatOp::Sqrt),
+                    _ => unreachable!(),
+                };
+
+                for i in 0..dest_len {
+                    let op = this.read_immediate(&this.mplace_index(&op, i)?.into())?;
+                    let dest = this.mplace_index(&dest, i)?;
+                    let val = match which {
+                        Op::MirOp(mir_op) => this.unary_op(mir_op, &op)?.to_scalar()?,
+                        Op::Abs => {
+                            // Works for f32 and f64.
+                            let ty::Float(float_ty) = op.layout.ty.kind() else {
+                                bug!("{} operand is not a float", intrinsic_name)
+                            };
+                            let op = op.to_scalar()?;
+                            match float_ty {
+                                FloatTy::F32 => Scalar::from_f32(op.to_f32()?.abs()),
+                                FloatTy::F64 => Scalar::from_f64(op.to_f64()?.abs()),
+                            }
+                        }
+                        Op::HostOp(host_op) => {
+                            let ty::Float(float_ty) = op.layout.ty.kind() else {
+                                bug!("{} operand is not a float", intrinsic_name)
+                            };
+                            // FIXME using host floats
+                            match float_ty {
+                                FloatTy::F32 => {
+                                    let f = f32::from_bits(op.to_scalar()?.to_u32()?);
+                                    let res = match host_op {
+                                        HostFloatOp::Ceil => f.ceil(),
+                                        HostFloatOp::Floor => f.floor(),
+                                        HostFloatOp::Round => f.round(),
+                                        HostFloatOp::Trunc => f.trunc(),
+                                        HostFloatOp::Sqrt => f.sqrt(),
+                                    };
+                                    Scalar::from_u32(res.to_bits())
+                                }
+                                FloatTy::F64 => {
+                                    let f = f64::from_bits(op.to_scalar()?.to_u64()?);
+                                    let res = match host_op {
+                                        HostFloatOp::Ceil => f.ceil(),
+                                        HostFloatOp::Floor => f.floor(),
+                                        HostFloatOp::Round => f.round(),
+                                        HostFloatOp::Trunc => f.trunc(),
+                                        HostFloatOp::Sqrt => f.sqrt(),
+                                    };
+                                    Scalar::from_u64(res.to_bits())
+                                }
+                            }
+
+                        }
+                    };
+                    this.write_scalar(val, &dest.into())?;
+                }
+            }
+            #[rustfmt::skip]
              | "simd_add"
              | "simd_sub"
              | "simd_mul"
              | "simd_div"
              | "simd_rem"
              | "simd_shl"
-            | "simd_shr" => {
-                let &[ref left, ref right] = check_arg_count(args)?;
+            | "simd_shr"
+            | "simd_and"
+            | "simd_or"
+            | "simd_xor"
+            | "simd_eq"
+            | "simd_ne"
+            | "simd_lt"
+            | "simd_le"
+            | "simd_gt"
+            | "simd_ge"
+            | "simd_fmax"
+            | "simd_fmin"
+            | "simd_saturating_add"
+            | "simd_saturating_sub"
+            | "simd_arith_offset" => {
+                use mir::BinOp;
+
+                let [left, right] = check_arg_count(args)?;
                  let (left, left_len) = this.operand_to_simd(left)?;
                  let (right, right_len) = this.operand_to_simd(right)?;
                  let (dest, dest_len) = this.place_to_simd(dest)?;
@@ -322,14 +450,35 @@ fn call_intrinsic(
                  assert_eq!(dest_len, left_len);
                  assert_eq!(dest_len, right_len);
  
-                let op = match intrinsic_name {
-                    "simd_add" => mir::BinOp::Add,
-                    "simd_sub" => mir::BinOp::Sub,
-                    "simd_mul" => mir::BinOp::Mul,
-                    "simd_div" => mir::BinOp::Div,
-                    "simd_rem" => mir::BinOp::Rem,
-                    "simd_shl" => mir::BinOp::Shl,
-                    "simd_shr" => mir::BinOp::Shr,
+                enum Op {
+                    MirOp(BinOp),
+                    SaturatingOp(BinOp),
+                    FMax,
+                    FMin,
+                    WrappingOffset,
+                }
+                let which = match intrinsic_name {
+                    "simd_add" => Op::MirOp(BinOp::Add),
+                    "simd_sub" => Op::MirOp(BinOp::Sub),
+                    "simd_mul" => Op::MirOp(BinOp::Mul),
+                    "simd_div" => Op::MirOp(BinOp::Div),
+                    "simd_rem" => Op::MirOp(BinOp::Rem),
+                    "simd_shl" => Op::MirOp(BinOp::Shl),
+                    "simd_shr" => Op::MirOp(BinOp::Shr),
+                    "simd_and" => Op::MirOp(BinOp::BitAnd),
+                    "simd_or" => Op::MirOp(BinOp::BitOr),
+                    "simd_xor" => Op::MirOp(BinOp::BitXor),
+                    "simd_eq" => Op::MirOp(BinOp::Eq),
+                    "simd_ne" => Op::MirOp(BinOp::Ne),
+                    "simd_lt" => Op::MirOp(BinOp::Lt),
+                    "simd_le" => Op::MirOp(BinOp::Le),
+                    "simd_gt" => Op::MirOp(BinOp::Gt),
+                    "simd_ge" => Op::MirOp(BinOp::Ge),
+                    "simd_fmax" => Op::FMax,
+                    "simd_fmin" => Op::FMin,
+                    "simd_saturating_add" => Op::SaturatingOp(BinOp::Add),
+                    "simd_saturating_sub" => Op::SaturatingOp(BinOp::Sub),
+                    "simd_arith_offset" => Op::WrappingOffset,
                      _ => unreachable!(),
                  };
  
@@ -337,19 +486,382 @@ fn call_intrinsic(
                      let left = this.read_immediate(&this.mplace_index(&left, i)?.into())?;
                      let right = this.read_immediate(&this.mplace_index(&right, i)?.into())?;
                      let dest = this.mplace_index(&dest, i)?;
-                    let (val, overflowed, ty) = this.overflowing_binary_op(op, &left, &right)?;
-                    assert_eq!(ty, dest.layout.ty);
-                    if matches!(op, mir::BinOp::Shl | mir::BinOp::Shr) {
-                        // Shifts have extra UB as SIMD operations that the MIR binop does not have.
-                        // See <https://github.com/rust-lang/rust/issues/91237>.
-                        if overflowed {
-                            let r_val = right.to_scalar()?.to_bits(right.layout.size)?;
-                            throw_ub_format!("overflowing shift by {} in `{}` in SIMD lane {}", r_val, intrinsic_name, i);
+                    let val = match which {
+                        Op::MirOp(mir_op) => {
+                            let (val, overflowed, ty) = this.overflowing_binary_op(mir_op, &left, &right)?;
+                            if matches!(mir_op, BinOp::Shl | BinOp::Shr) {
+                                // Shifts have extra UB as SIMD operations that the MIR binop does not have.
+                                // See <https://github.com/rust-lang/rust/issues/91237>.
+                                if overflowed {
+                                    let r_val = right.to_scalar()?.to_bits(right.layout.size)?;
+                                    throw_ub_format!("overflowing shift by {} in `{}` in SIMD lane {}", r_val, intrinsic_name, i);
+                                }
+                            }
+                            if matches!(mir_op, BinOp::Eq | BinOp::Ne | BinOp::Lt | BinOp::Le | BinOp::Gt | BinOp::Ge) {
+                                // Special handling for boolean-returning operations
+                                assert_eq!(ty, this.tcx.types.bool);
+                                let val = val.to_bool().unwrap();
+                                bool_to_simd_element(val, dest.layout.size)
+                            } else {
+                                assert_ne!(ty, this.tcx.types.bool);
+                                assert_eq!(ty, dest.layout.ty);
+                                val
+                            }
                          }
-                    }
+                        Op::SaturatingOp(mir_op) => {
+                            this.saturating_arith(mir_op, &left, &right)?
+                        }
+                        Op::WrappingOffset => {
+                            let ptr = this.scalar_to_ptr(left.to_scalar()?)?;
+                            let offset_count = right.to_scalar()?.to_machine_isize(this)?;
+                            let pointee_ty = left.layout.ty.builtin_deref(true).unwrap().ty;
+
+                            let pointee_size = i64::try_from(this.layout_of(pointee_ty)?.size.bytes()).unwrap();
+                            let offset_bytes = offset_count.wrapping_mul(pointee_size);
+                            let offset_ptr = ptr.wrapping_signed_offset(offset_bytes, this);
+                            Scalar::from_maybe_pointer(offset_ptr, this)
+                        }
+                        Op::FMax => {
+                            fmax_op(&left, &right)?
+                        }
+                        Op::FMin => {
+                            fmin_op(&left, &right)?
+                        }
+                    };
+                    this.write_scalar(val, &dest.into())?;
+                }
+            }
+            "simd_fma" => {
+                let [a, b, c] = check_arg_count(args)?;
+                let (a, a_len) = this.operand_to_simd(a)?;
+                let (b, b_len) = this.operand_to_simd(b)?;
+                let (c, c_len) = this.operand_to_simd(c)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(dest_len, a_len);
+                assert_eq!(dest_len, b_len);
+                assert_eq!(dest_len, c_len);
+
+                for i in 0..dest_len {
+                    let a = this.read_immediate(&this.mplace_index(&a, i)?.into())?.to_scalar()?;
+                    let b = this.read_immediate(&this.mplace_index(&b, i)?.into())?.to_scalar()?;
+                    let c = this.read_immediate(&this.mplace_index(&c, i)?.into())?.to_scalar()?;
+                    let dest = this.mplace_index(&dest, i)?;
+
+                    // Works for f32 and f64.
+                    let ty::Float(float_ty) = dest.layout.ty.kind() else {
+                        bug!("{} operand is not a float", intrinsic_name)
+                    };
+                    let val = match float_ty {
+                        FloatTy::F32 =>
+                            Scalar::from_f32(a.to_f32()?.mul_add(b.to_f32()?, c.to_f32()?).value),
+                        FloatTy::F64 =>
+                            Scalar::from_f64(a.to_f64()?.mul_add(b.to_f64()?, c.to_f64()?).value),
+                    };
                      this.write_scalar(val, &dest.into())?;
                  }
              }
+            #[rustfmt::skip]
+            | "simd_reduce_and"
+            | "simd_reduce_or"
+            | "simd_reduce_xor"
+            | "simd_reduce_any"
+            | "simd_reduce_all"
+            | "simd_reduce_max"
+            | "simd_reduce_min" => {
+                use mir::BinOp;
+
+                let [op] = check_arg_count(args)?;
+                let (op, op_len) = this.operand_to_simd(op)?;
+
+                let imm_from_bool =
+                    |b| ImmTy::from_scalar(Scalar::from_bool(b), this.machine.layouts.bool);
+
+                enum Op {
+                    MirOp(BinOp),
+                    MirOpBool(BinOp),
+                    Max,
+                    Min,
+                }
+                let which = match intrinsic_name {
+                    "simd_reduce_and" => Op::MirOp(BinOp::BitAnd),
+                    "simd_reduce_or" => Op::MirOp(BinOp::BitOr),
+                    "simd_reduce_xor" => Op::MirOp(BinOp::BitXor),
+                    "simd_reduce_any" => Op::MirOpBool(BinOp::BitOr),
+                    "simd_reduce_all" => Op::MirOpBool(BinOp::BitAnd),
+                    "simd_reduce_max" => Op::Max,
+                    "simd_reduce_min" => Op::Min,
+                    _ => unreachable!(),
+                };
+
+                // Initialize with first lane, then proceed with the rest.
+                let mut res = this.read_immediate(&this.mplace_index(&op, 0)?.into())?;
+                if matches!(which, Op::MirOpBool(_)) {
+                    // Convert to `bool` scalar.
+                    res = imm_from_bool(simd_element_to_bool(res)?);
+                }
+                for i in 1..op_len {
+                    let op = this.read_immediate(&this.mplace_index(&op, i)?.into())?;
+                    res = match which {
+                        Op::MirOp(mir_op) => {
+                            this.binary_op(mir_op, &res, &op)?
+                        }
+                        Op::MirOpBool(mir_op) => {
+                            let op = imm_from_bool(simd_element_to_bool(op)?);
+                            this.binary_op(mir_op, &res, &op)?
+                        }
+                        Op::Max => {
+                            if matches!(res.layout.ty.kind(), ty::Float(_)) {
+                                ImmTy::from_scalar(fmax_op(&res, &op)?, res.layout)
+                            } else {
+                                // Just boring integers, so NaNs to worry about
+                                if this.binary_op(BinOp::Ge, &res, &op)?.to_scalar()?.to_bool()? {
+                                    res
+                                } else {
+                                    op
+                                }
+                            }
+                        }
+                        Op::Min => {
+                            if matches!(res.layout.ty.kind(), ty::Float(_)) {
+                                ImmTy::from_scalar(fmin_op(&res, &op)?, res.layout)
+                            } else {
+                                // Just boring integers, so NaNs to worry about
+                                if this.binary_op(BinOp::Le, &res, &op)?.to_scalar()?.to_bool()? {
+                                    res
+                                } else {
+                                    op
+                                }
+                            }
+                        }
+                    };
+                }
+                this.write_immediate(*res, dest)?;
+            }
+            #[rustfmt::skip]
+            | "simd_reduce_add_ordered"
+            | "simd_reduce_mul_ordered" => {
+                use mir::BinOp;
+
+                let [op, init] = check_arg_count(args)?;
+                let (op, op_len) = this.operand_to_simd(op)?;
+                let init = this.read_immediate(init)?;
+
+                let mir_op = match intrinsic_name {
+                    "simd_reduce_add_ordered" => BinOp::Add,
+                    "simd_reduce_mul_ordered" => BinOp::Mul,
+                    _ => unreachable!(),
+                };
+
+                let mut res = init;
+                for i in 0..op_len {
+                    let op = this.read_immediate(&this.mplace_index(&op, i)?.into())?;
+                    res = this.binary_op(mir_op, &res, &op)?;
+                }
+                this.write_immediate(*res, dest)?;
+            }
+            "simd_select" => {
+                let [mask, yes, no] = check_arg_count(args)?;
+                let (mask, mask_len) = this.operand_to_simd(mask)?;
+                let (yes, yes_len) = this.operand_to_simd(yes)?;
+                let (no, no_len) = this.operand_to_simd(no)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(dest_len, mask_len);
+                assert_eq!(dest_len, yes_len);
+                assert_eq!(dest_len, no_len);
+
+                for i in 0..dest_len {
+                    let mask = this.read_immediate(&this.mplace_index(&mask, i)?.into())?;
+                    let yes = this.read_immediate(&this.mplace_index(&yes, i)?.into())?;
+                    let no = this.read_immediate(&this.mplace_index(&no, i)?.into())?;
+                    let dest = this.mplace_index(&dest, i)?;
+
+                    let val = if simd_element_to_bool(mask)? { yes } else { no };
+                    this.write_immediate(*val, &dest.into())?;
+                }
+            }
+            "simd_select_bitmask" => {
+                let [mask, yes, no] = check_arg_count(args)?;
+                let (yes, yes_len) = this.operand_to_simd(yes)?;
+                let (no, no_len) = this.operand_to_simd(no)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+                let bitmask_len = dest_len.max(8);
+
+                assert!(mask.layout.ty.is_integral());
+                assert!(bitmask_len <= 64);
+                assert_eq!(bitmask_len, mask.layout.size.bits());
+                assert_eq!(dest_len, yes_len);
+                assert_eq!(dest_len, no_len);
+
+                let mask: u64 = this
+                    .read_scalar(mask)?
+                    .check_init()?
+                    .to_bits(mask.layout.size)?
+                    .try_into()
+                    .unwrap();
+                for i in 0..dest_len {
+                    let mask =
+                        mask & (1 << simd_bitmask_index(i, dest_len, this.data_layout().endian));
+                    let yes = this.read_immediate(&this.mplace_index(&yes, i)?.into())?;
+                    let no = this.read_immediate(&this.mplace_index(&no, i)?.into())?;
+                    let dest = this.mplace_index(&dest, i)?;
+
+                    let val = if mask != 0 { yes } else { no };
+                    this.write_immediate(*val, &dest.into())?;
+                }
+                for i in dest_len..bitmask_len {
+                    // If the mask is "padded", ensure that padding is all-zero.
+                    let mask = mask & (1 << i);
+                    if mask != 0 {
+                        throw_ub_format!(
+                            "a SIMD bitmask less than 8 bits long must be filled with 0s for the remaining bits"
+                        );
+                    }
+                }
+            }
+            #[rustfmt::skip]
+            "simd_cast" | "simd_as" => {
+                let [op] = check_arg_count(args)?;
+                let (op, op_len) = this.operand_to_simd(op)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(dest_len, op_len);
+
+                let safe_cast = intrinsic_name == "simd_as";
+
+                for i in 0..dest_len {
+                    let op = this.read_immediate(&this.mplace_index(&op, i)?.into())?;
+                    let dest = this.mplace_index(&dest, i)?;
+
+                    let val = match (op.layout.ty.kind(), dest.layout.ty.kind()) {
+                        // Int-to-(int|float): always safe
+                        (ty::Int(_) | ty::Uint(_), ty::Int(_) | ty::Uint(_) | ty::Float(_)) =>
+                            this.misc_cast(&op, dest.layout.ty)?,
+                        // Float-to-float: always safe
+                        (ty::Float(_), ty::Float(_)) =>
+                            this.misc_cast(&op, dest.layout.ty)?,
+                        // Float-to-int in safe mode
+                        (ty::Float(_), ty::Int(_) | ty::Uint(_)) if safe_cast =>
+                            this.misc_cast(&op, dest.layout.ty)?,
+                        // Float-to-int in unchecked mode
+                        (ty::Float(FloatTy::F32), ty::Int(_) | ty::Uint(_)) if !safe_cast =>
+                            this.float_to_int_unchecked(op.to_scalar()?.to_f32()?, dest.layout.ty)?.into(),
+                        (ty::Float(FloatTy::F64), ty::Int(_) | ty::Uint(_)) if !safe_cast =>
+                            this.float_to_int_unchecked(op.to_scalar()?.to_f64()?, dest.layout.ty)?.into(),
+                        _ =>
+                            throw_unsup_format!(
+                                "Unsupported SIMD cast from element type {} to {}",
+                                op.layout.ty,
+                                dest.layout.ty
+                            ),
+                    };
+                    this.write_immediate(val, &dest.into())?;
+                }
+            }
+            "simd_shuffle" => {
+                let [left, right, index] = check_arg_count(args)?;
+                let (left, left_len) = this.operand_to_simd(left)?;
+                let (right, right_len) = this.operand_to_simd(right)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                // `index` is an array, not a SIMD type
+                let ty::Array(_, index_len) = index.layout.ty.kind() else {
+                    bug!("simd_shuffle index argument has non-array type {}", index.layout.ty)
+                };
+                let index_len = index_len.eval_usize(*this.tcx, this.param_env());
+
+                assert_eq!(left_len, right_len);
+                assert_eq!(index_len, dest_len);
+
+                for i in 0..dest_len {
+                    let src_index: u64 = this
+                        .read_immediate(&this.operand_index(index, i)?)?
+                        .to_scalar()?
+                        .to_u32()?
+                        .into();
+                    let dest = this.mplace_index(&dest, i)?;
+
+                    let val = if src_index < left_len {
+                        this.read_immediate(&this.mplace_index(&left, src_index)?.into())?
+                    } else if src_index < left_len.checked_add(right_len).unwrap() {
+                        this.read_immediate(
+                            &this.mplace_index(&right, src_index - left_len)?.into(),
+                        )?
+                    } else {
+                        bug!(
+                            "simd_shuffle index {} is out of bounds for 2 vectors of size {}",
+                            src_index,
+                            left_len
+                        );
+                    };
+                    this.write_immediate(*val, &dest.into())?;
+                }
+            }
+            "simd_gather" => {
+                let [passthru, ptrs, mask] = check_arg_count(args)?;
+                let (passthru, passthru_len) = this.operand_to_simd(passthru)?;
+                let (ptrs, ptrs_len) = this.operand_to_simd(ptrs)?;
+                let (mask, mask_len) = this.operand_to_simd(mask)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(dest_len, passthru_len);
+                assert_eq!(dest_len, ptrs_len);
+                assert_eq!(dest_len, mask_len);
+
+                for i in 0..dest_len {
+                    let passthru = this.read_immediate(&this.mplace_index(&passthru, i)?.into())?;
+                    let ptr = this.read_immediate(&this.mplace_index(&ptrs, i)?.into())?;
+                    let mask = this.read_immediate(&this.mplace_index(&mask, i)?.into())?;
+                    let dest = this.mplace_index(&dest, i)?;
+
+                    let val = if simd_element_to_bool(mask)? {
+                        let place = this.deref_operand(&ptr.into())?;
+                        this.read_immediate(&place.into())?
+                    } else {
+                        passthru
+                    };
+                    this.write_immediate(*val, &dest.into())?;
+                }
+            }
+            "simd_scatter" => {
+                let [value, ptrs, mask] = check_arg_count(args)?;
+                let (value, value_len) = this.operand_to_simd(value)?;
+                let (ptrs, ptrs_len) = this.operand_to_simd(ptrs)?;
+                let (mask, mask_len) = this.operand_to_simd(mask)?;
+
+                assert_eq!(ptrs_len, value_len);
+                assert_eq!(ptrs_len, mask_len);
+
+                for i in 0..ptrs_len {
+                    let value = this.read_immediate(&this.mplace_index(&value, i)?.into())?;
+                    let ptr = this.read_immediate(&this.mplace_index(&ptrs, i)?.into())?;
+                    let mask = this.read_immediate(&this.mplace_index(&mask, i)?.into())?;
+
+                    if simd_element_to_bool(mask)? {
+                        let place = this.deref_operand(&ptr.into())?;
+                        this.write_immediate(*value, &place.into())?;
+                    }
+                }
+            }
+            "simd_bitmask" => {
+                let [op] = check_arg_count(args)?;
+                let (op, op_len) = this.operand_to_simd(op)?;
+                let bitmask_len = op_len.max(8);
+
+                assert!(dest.layout.ty.is_integral());
+                assert!(bitmask_len <= 64);
+                assert_eq!(bitmask_len, dest.layout.size.bits());
+
+                let mut res = 0u64;
+                for i in 0..op_len {
+                    let op = this.read_immediate(&this.mplace_index(&op, i)?.into())?;
+                    if simd_element_to_bool(op)? {
+                        res |= 1 << simd_bitmask_index(i, op_len, this.data_layout().endian);
+                    }
+                }
+                this.write_int(res, dest)?;
+            }
  
              // Atomic operations
              "atomic_load" => this.atomic_load(args, dest, AtomicReadOp::SeqCst)?,
@@ -552,14 +1064,14 @@ fn call_intrinsic(
  
              // Other
              "exact_div" => {
-                let &[ref num, ref denom] = check_arg_count(args)?;
+                let [num, denom] = check_arg_count(args)?;
                  this.exact_div(&this.read_immediate(num)?, &this.read_immediate(denom)?, dest)?;
              }
  
              "try" => return this.handle_try(args, dest, ret),
  
              "breakpoint" => {
-                let &[] = check_arg_count(args)?;
+                let [] = check_arg_count(args)?;
                  // normally this would raise a SIGTRAP, which aborts if no debugger is connected
                  throw_machine_stop!(TerminationInfo::Abort("Trace/breakpoint trap".to_string()))
              }
@@ -580,7 +1092,7 @@ fn atomic_load(
      ) -> InterpResult<'tcx> {
          let this = self.eval_context_mut();
  
-        let &[ref place] = check_arg_count(args)?;
+        let [place] = check_arg_count(args)?;
          let place = this.deref_operand(place)?;
  
          // make sure it fits into a scalar; otherwise it cannot be atomic
@@ -590,7 +1102,7 @@ fn atomic_load(
          // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
          // be 8-aligned).
          let align = Align::from_bytes(place.layout.size.bytes()).unwrap();
-        this.memory.check_ptr_access_align(
+        this.check_ptr_access_align(
              place.ptr,
              place.layout.size,
              align,
@@ -608,7 +1120,7 @@ fn atomic_store(
      ) -> InterpResult<'tcx> {
          let this = self.eval_context_mut();
  
-        let &[ref place, ref val] = check_arg_count(args)?;
+        let [place, val] = check_arg_count(args)?;
          let place = this.deref_operand(place)?;
          let val = this.read_scalar(val)?; // make sure it fits into a scalar; otherwise it cannot be atomic
  
@@ -616,7 +1128,7 @@ fn atomic_store(
          // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
          // be 8-aligned).
          let align = Align::from_bytes(place.layout.size.bytes()).unwrap();
-        this.memory.check_ptr_access_align(
+        this.check_ptr_access_align(
              place.ptr,
              place.layout.size,
              align,
@@ -633,7 +1145,7 @@ fn compiler_fence(
          args: &[OpTy<'tcx, Tag>],
          atomic: AtomicFenceOp,
      ) -> InterpResult<'tcx> {
-        let &[] = check_arg_count(args)?;
+        let [] = check_arg_count(args)?;
          let _ = atomic;
          //FIXME: compiler fences are currently ignored
          Ok(())
@@ -645,7 +1157,7 @@ fn atomic_fence(
          atomic: AtomicFenceOp,
      ) -> InterpResult<'tcx> {
          let this = self.eval_context_mut();
-        let &[] = check_arg_count(args)?;
+        let [] = check_arg_count(args)?;
          this.validate_atomic_fence(atomic)?;
          Ok(())
      }
@@ -659,7 +1171,7 @@ fn atomic_op(
      ) -> InterpResult<'tcx> {
          let this = self.eval_context_mut();
  
-        let &[ref place, ref rhs] = check_arg_count(args)?;
+        let [place, rhs] = check_arg_count(args)?;
          let place = this.deref_operand(place)?;
  
          if !place.layout.ty.is_integral() {
@@ -671,7 +1183,7 @@ fn atomic_op(
          // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
          // be 8-aligned).
          let align = Align::from_bytes(place.layout.size.bytes()).unwrap();
-        this.memory.check_ptr_access_align(
+        this.check_ptr_access_align(
              place.ptr,
              place.layout.size,
              align,
@@ -681,12 +1193,12 @@ fn atomic_op(
          match atomic_op {
              AtomicOp::Min => {
                  let old = this.atomic_min_max_scalar(&place, rhs, true, atomic)?;
-                this.write_immediate(*old, &dest)?; // old value is returned
+                this.write_immediate(*old, dest)?; // old value is returned
                  Ok(())
              }
              AtomicOp::Max => {
                  let old = this.atomic_min_max_scalar(&place, rhs, false, atomic)?;
-                this.write_immediate(*old, &dest)?; // old value is returned
+                this.write_immediate(*old, dest)?; // old value is returned
                  Ok(())
              }
              AtomicOp::MirOp(op, neg) => {
@@ -705,7 +1217,7 @@ fn atomic_exchange(
      ) -> InterpResult<'tcx> {
          let this = self.eval_context_mut();
  
-        let &[ref place, ref new] = check_arg_count(args)?;
+        let [place, new] = check_arg_count(args)?;
          let place = this.deref_operand(place)?;
          let new = this.read_scalar(new)?;
  
@@ -713,7 +1225,7 @@ fn atomic_exchange(
          // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
          // be 8-aligned).
          let align = Align::from_bytes(place.layout.size.bytes()).unwrap();
-        this.memory.check_ptr_access_align(
+        this.check_ptr_access_align(
              place.ptr,
              place.layout.size,
              align,
@@ -735,7 +1247,7 @@ fn atomic_compare_exchange_impl(
      ) -> InterpResult<'tcx> {
          let this = self.eval_context_mut();
  
-        let &[ref place, ref expect_old, ref new] = check_arg_count(args)?;
+        let [place, expect_old, new] = check_arg_count(args)?;
          let place = this.deref_operand(place)?;
          let expect_old = this.read_immediate(expect_old)?; // read as immediate for the sake of `binary_op()`
          let new = this.read_scalar(new)?;
@@ -744,7 +1256,7 @@ fn atomic_compare_exchange_impl(
          // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
          // be 8-aligned).
          let align = Align::from_bytes(place.layout.size.bytes()).unwrap();
-        this.memory.check_ptr_access_align(
+        this.check_ptr_access_align(
              place.ptr,
              place.layout.size,
              align,
@@ -838,3 +1350,58 @@ fn float_to_int_unchecked<F>(
          })
      }
  }
+
+fn fmax_op<'tcx>(
+    left: &ImmTy<'tcx, Tag>,
+    right: &ImmTy<'tcx, Tag>,
+) -> InterpResult<'tcx, Scalar<Tag>> {
+    assert_eq!(left.layout.ty, right.layout.ty);
+    let ty::Float(float_ty) = left.layout.ty.kind() else {
+        bug!("fmax operand is not a float")
+    };
+    let left = left.to_scalar()?;
+    let right = right.to_scalar()?;
+    Ok(match float_ty {
+        FloatTy::F32 => Scalar::from_f32(left.to_f32()?.max(right.to_f32()?)),
+        FloatTy::F64 => Scalar::from_f64(left.to_f64()?.max(right.to_f64()?)),
+    })
+}
+
+fn fmin_op<'tcx>(
+    left: &ImmTy<'tcx, Tag>,
+    right: &ImmTy<'tcx, Tag>,
+) -> InterpResult<'tcx, Scalar<Tag>> {
+    assert_eq!(left.layout.ty, right.layout.ty);
+    let ty::Float(float_ty) = left.layout.ty.kind() else {
+        bug!("fmin operand is not a float")
+    };
+    let left = left.to_scalar()?;
+    let right = right.to_scalar()?;
+    Ok(match float_ty {
+        FloatTy::F32 => Scalar::from_f32(left.to_f32()?.min(right.to_f32()?)),
+        FloatTy::F64 => Scalar::from_f64(left.to_f64()?.min(right.to_f64()?)),
+    })
+}
+
+fn bool_to_simd_element(b: bool, size: Size) -> Scalar<Tag> {
+    // SIMD uses all-1 as pattern for "true"
+    let val = if b { -1 } else { 0 };
+    Scalar::from_int(val, size)
+}
+
+fn simd_element_to_bool<'tcx>(elem: ImmTy<'tcx, Tag>) -> InterpResult<'tcx, bool> {
+    let val = elem.to_scalar()?.to_int(elem.layout.size)?;
+    Ok(match val {
+        0 => false,
+        -1 => true,
+        _ => throw_ub_format!("each element of a SIMD mask must be all-0-bits or all-1-bits"),
+    })
+}
+
+fn simd_bitmask_index(idx: u64, vec_len: u64, endianess: Endian) -> u64 {
+    assert!(idx < vec_len);
+    match endianess {
+        Endian::Little => idx,
+        Endian::Big => vec_len - 1 - idx, // reverse order of bits
+    }
+}