Sync rust-lang/portable-simd@5f49d4c8435a25d804b2f375e949cb25479f5be9

author Jubilee Young <workingjubilee@gmail.com>

Mon, 28 Feb 2022 18:17:40 +0000 (10:17 -0800)

committer Jubilee Young <workingjubilee@gmail.com>

Mon, 28 Feb 2022 18:17:40 +0000 (10:17 -0800)
author Jubilee Young <workingjubilee@gmail.com>
Mon, 28 Feb 2022 18:17:40 +0000 (10:17 -0800)
committer Jubilee Young <workingjubilee@gmail.com>
Mon, 28 Feb 2022 18:17:40 +0000 (10:17 -0800)
diff --cc library/portable-simd/crates/core_simd/examples/spectral_norm.rs

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..c515dad4deabd455396d7d17ed4a30859ce1252b

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/examples/spectral_norm.rs
@@@ -1,0 -1,0 +1,77 @@@
++#![feature(portable_simd)]
++
++use core_simd::simd::*;
++
++fn a(i: usize, j: usize) -> f64 {
++    ((i + j) * (i + j + 1) / 2 + i + 1) as f64
++}
++
++fn mult_av(v: &[f64], out: &mut [f64]) {
++    assert!(v.len() == out.len());
++    assert!(v.len() % 2 == 0);
++
++    for (i, out) in out.iter_mut().enumerate() {
++        let mut sum = f64x2::splat(0.0);
++
++        let mut j = 0;
++        while j < v.len() {
++            let b = f64x2::from_slice(&v[j..]);
++            let a = f64x2::from_array([a(i, j), a(i, j + 1)]);
++            sum += b / a;
++            j += 2
++        }
++        *out = sum.horizontal_sum();
++    }
++}
++
++fn mult_atv(v: &[f64], out: &mut [f64]) {
++    assert!(v.len() == out.len());
++    assert!(v.len() % 2 == 0);
++
++    for (i, out) in out.iter_mut().enumerate() {
++        let mut sum = f64x2::splat(0.0);
++
++        let mut j = 0;
++        while j < v.len() {
++            let b = f64x2::from_slice(&v[j..]);
++            let a = f64x2::from_array([a(j, i), a(j + 1, i)]);
++            sum += b / a;
++            j += 2
++        }
++        *out = sum.horizontal_sum();
++    }
++}
++
++fn mult_atav(v: &[f64], out: &mut [f64], tmp: &mut [f64]) {
++    mult_av(v, tmp);
++    mult_atv(tmp, out);
++}
++
++pub fn spectral_norm(n: usize) -> f64 {
++    assert!(n % 2 == 0, "only even lengths are accepted");
++
++    let mut u = vec![1.0; n];
++    let mut v = u.clone();
++    let mut tmp = u.clone();
++
++    for _ in 0..10 {
++        mult_atav(&u, &mut v, &mut tmp);
++        mult_atav(&v, &mut u, &mut tmp);
++    }
++    (dot(&u, &v) / dot(&v, &v)).sqrt()
++}
++
++fn dot(x: &[f64], y: &[f64]) -> f64 {
++    // This is auto-vectorized:
++    x.iter().zip(y).map(|(&x, &y)| x * y).sum()
++}
++
++#[cfg(test)]
++#[test]
++fn test() {
++    assert_eq!(&format!("{:.9}", spectral_norm(100)), "1.274219991");
++}
++
++fn main() {
++    // Empty main to make cargo happy
++}
diff --cc library/portable-simd/crates/core_simd/src/comparisons.rs

index edef5af3687a27f4cd20099bdb6368ed4362d59d,0000000000000000000000000000000000000000..d024cf4ddbe30b04211e4460e2e0d831a1b6746d

mode 100644,000000..100644
--- 1/library/portable-simd/crates/core_simd/src/comparisons.rs
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/comparisons.rs
@@@ -1,56 -1,0 +1,68 @@@
+ +use crate::simd::intrinsics;
+ +use crate::simd::{LaneCount, Mask, Simd, SimdElement, SupportedLaneCount};
+ +
+ +impl<T, const LANES: usize> Simd<T, LANES>
+ +where
+ +    T: SimdElement + PartialEq,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    /// Test if each lane is equal to the corresponding lane in `other`.
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    pub fn lanes_eq(self, other: Self) -> Mask<T::Mask, LANES> {
++        // Safety: `self` is a vector, and the result of the comparison
++        // is always a valid mask.
+ +        unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) }
+ +    }
+ +
+ +    /// Test if each lane is not equal to the corresponding lane in `other`.
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    pub fn lanes_ne(self, other: Self) -> Mask<T::Mask, LANES> {
++        // Safety: `self` is a vector, and the result of the comparison
++        // is always a valid mask.
+ +        unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) }
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> Simd<T, LANES>
+ +where
+ +    T: SimdElement + PartialOrd,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    /// Test if each lane is less than the corresponding lane in `other`.
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    pub fn lanes_lt(self, other: Self) -> Mask<T::Mask, LANES> {
++        // Safety: `self` is a vector, and the result of the comparison
++        // is always a valid mask.
+ +        unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) }
+ +    }
+ +
+ +    /// Test if each lane is greater than the corresponding lane in `other`.
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    pub fn lanes_gt(self, other: Self) -> Mask<T::Mask, LANES> {
++        // Safety: `self` is a vector, and the result of the comparison
++        // is always a valid mask.
+ +        unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) }
+ +    }
+ +
+ +    /// Test if each lane is less than or equal to the corresponding lane in `other`.
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    pub fn lanes_le(self, other: Self) -> Mask<T::Mask, LANES> {
++        // Safety: `self` is a vector, and the result of the comparison
++        // is always a valid mask.
+ +        unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) }
+ +    }
+ +
+ +    /// Test if each lane is greater than or equal to the corresponding lane in `other`.
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    pub fn lanes_ge(self, other: Self) -> Mask<T::Mask, LANES> {
++        // Safety: `self` is a vector, and the result of the comparison
++        // is always a valid mask.
+ +        unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) }
+ +    }
+ +}
diff --cc library/portable-simd/crates/core_simd/src/intrinsics.rs

index 233657202f7e8def0053c03a12e4775ba98332b1,0000000000000000000000000000000000000000..e150946c705c964b15ca5e718527833bbb4d7b05

mode 100644,000000..100644
--- 1/library/portable-simd/crates/core_simd/src/intrinsics.rs
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/intrinsics.rs
@@@ -1,97 -1,0 +1,143 @@@
- /// simply lowered to the matching LLVM instructions by the compiler.  The associated instruction
- /// is documented alongside each intrinsic.
+ +//! This module contains the LLVM intrinsics bindings that provide the functionality for this
+ +//! crate.
+ +//!
+ +//! The LLVM assembly language is documented here: <https://llvm.org/docs/LangRef.html>
++//!
++//! A quick glossary of jargon that may appear in this module, mostly paraphrasing LLVM's LangRef:
++//! - poison: "undefined behavior as a value". specifically, it is like uninit memory (such as padding bytes). it is "safe" to create poison, BUT
++//!   poison MUST NOT be observed from safe code, as operations on poison return poison, like NaN. unlike NaN, which has defined comparisons,
++//!   poison is neither true nor false, and LLVM may also convert it to undef (at which point it is both). so, it can't be conditioned on, either.
++//! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note:
++//!   this means that division by poison or undef is like division by zero, which means it inflicts...
++//! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program:
++//!   LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception, and this is the "good end".
++//!   The "bad end" is that LLVM may reverse time to the moment control flow diverged on a path towards undefined behavior,
++//!   and destroy the other branch, potentially deleting safe code and violating Rust's `unsafe` contract.
++//!
++//! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory.
++//!
++//! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths.
+ +
+ +/// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are
-     pub(crate) fn simd_sub<T>(x: T, y: T) -> T;
++/// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner.
++/// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function.
+ +extern "platform-intrinsic" {
+ +    /// add/fadd
+ +    pub(crate) fn simd_add<T>(x: T, y: T) -> T;
+ +
+ +    /// sub/fsub
-     pub(crate) fn simd_div<T>(x: T, y: T) -> T;
++    pub(crate) fn simd_sub<T>(lhs: T, rhs: T) -> T;
+ +
+ +    /// mul/fmul
+ +    pub(crate) fn simd_mul<T>(x: T, y: T) -> T;
+ +
+ +    /// udiv/sdiv/fdiv
-     pub(crate) fn simd_rem<T>(x: T, y: T) -> T;
++    /// ints and uints: {s,u}div incur UB if division by zero occurs.
++    /// ints: sdiv is UB for int::MIN / -1.
++    /// floats: fdiv is never UB, but may create NaNs or infinities.
++    pub(crate) fn simd_div<T>(lhs: T, rhs: T) -> T;
+ +
+ +    /// urem/srem/frem
-     pub(crate) fn simd_shl<T>(x: T, y: T) -> T;
++    /// ints and uints: {s,u}rem incur UB if division by zero occurs.
++    /// ints: srem is UB for int::MIN / -1.
++    /// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno.
++    pub(crate) fn simd_rem<T>(lhs: T, rhs: T) -> T;
+ +
+ +    /// shl
-     /// lshr/ashr
-     pub(crate) fn simd_shr<T>(x: T, y: T) -> T;
++    /// for (u)ints. poison if rhs >= lhs::BITS
++    pub(crate) fn simd_shl<T>(lhs: T, rhs: T) -> T;
+ +
-     pub(crate) fn simd_saturating_sub<T>(x: T, y: T) -> T;
++    /// ints: ashr
++    /// uints: lshr
++    /// poison if rhs >= lhs::BITS
++    pub(crate) fn simd_shr<T>(lhs: T, rhs: T) -> T;
+ +
+ +    /// and
+ +    pub(crate) fn simd_and<T>(x: T, y: T) -> T;
+ +
+ +    /// or
+ +    pub(crate) fn simd_or<T>(x: T, y: T) -> T;
+ +
+ +    /// xor
+ +    pub(crate) fn simd_xor<T>(x: T, y: T) -> T;
+ +
+ +    /// fptoui/fptosi/uitofp/sitofp
++    /// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5
++    /// but the truncated value must fit in the target type or the result is poison.
++    /// use `simd_as` instead for a cast that performs a saturating conversion.
+ +    pub(crate) fn simd_cast<T, U>(x: T) -> U;
+ +    /// follows Rust's `T as U` semantics, including saturating float casts
+ +    /// which amounts to the same as `simd_cast` for many cases
+ +    #[cfg(not(bootstrap))]
+ +    pub(crate) fn simd_as<T, U>(x: T) -> U;
+ +
+ +    /// neg/fneg
++    /// ints: ultimately becomes a call to cg_ssa's BuilderMethods::neg. cg_llvm equates this to `simd_sub(Simd::splat(0), x)`.
++    /// floats: LLVM's fneg, which changes the floating point sign bit. Some arches have instructions for it.
++    /// Rust panics for Neg::neg(int::MIN) due to overflow, but it is not UB in LLVM without `nsw`.
+ +    pub(crate) fn simd_neg<T>(x: T) -> T;
+ +
+ +    /// fabs
+ +    pub(crate) fn simd_fabs<T>(x: T) -> T;
+ +
+ +    // minnum/maxnum
+ +    pub(crate) fn simd_fmin<T>(x: T, y: T) -> T;
+ +    pub(crate) fn simd_fmax<T>(x: T, y: T) -> T;
+ +
++    // these return Simd<int, N> with the same BITS size as the inputs
+ +    pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
+ +    pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
+ +    pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
+ +    pub(crate) fn simd_le<T, U>(x: T, y: T) -> U;
+ +    pub(crate) fn simd_gt<T, U>(x: T, y: T) -> U;
+ +    pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
+ +
+ +    // shufflevector
++    // idx: LLVM calls it a "shuffle mask vector constant", a vector of i32s
+ +    pub(crate) fn simd_shuffle<T, U, V>(x: T, y: T, idx: U) -> V;
+ +
++    /// llvm.masked.gather
++    /// like a loop of pointer reads
++    /// val: vector of values to select if a lane is masked
++    /// ptr: vector of pointers to read from
++    /// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val)
++    /// note, the LLVM intrinsic accepts a mask vector of <N x i1>
++    /// FIXME: review this if/when we fix up our mask story in general?
+ +    pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
++    /// llvm.masked.scatter
++    /// like gather, but more spicy, as it writes instead of reads
+ +    pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);
+ +
+ +    // {s,u}add.sat
+ +    pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T;
+ +
+ +    // {s,u}sub.sat
-     pub(crate) fn simd_select<M, T>(m: M, a: T, b: T) -> T;
++    pub(crate) fn simd_saturating_sub<T>(lhs: T, rhs: T) -> T;
+ +
+ +    // reductions
++    // llvm.vector.reduce.{add,fadd}
+ +    pub(crate) fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
++    // llvm.vector.reduce.{mul,fmul}
+ +    pub(crate) fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
+ +    #[allow(unused)]
+ +    pub(crate) fn simd_reduce_all<T>(x: T) -> bool;
+ +    #[allow(unused)]
+ +    pub(crate) fn simd_reduce_any<T>(x: T) -> bool;
+ +    pub(crate) fn simd_reduce_max<T, U>(x: T) -> U;
+ +    pub(crate) fn simd_reduce_min<T, U>(x: T) -> U;
+ +    pub(crate) fn simd_reduce_and<T, U>(x: T) -> U;
+ +    pub(crate) fn simd_reduce_or<T, U>(x: T) -> U;
+ +    pub(crate) fn simd_reduce_xor<T, U>(x: T) -> U;
+ +
+ +    // truncate integer vector to bitmask
+ +    #[allow(unused)]
+ +    pub(crate) fn simd_bitmask<T, U>(x: T) -> U;
+ +
+ +    // select
-     pub(crate) fn simd_select_bitmask<M, T>(m: M, a: T, b: T) -> T;
++    // first argument is a vector of integers, -1 (all bits 1) is "true"
++    // logically equivalent to (yes & m) | (no & (m^-1),
++    // but you can use it on floats.
++    pub(crate) fn simd_select<M, T>(m: M, yes: T, no: T) -> T;
+ +    #[allow(unused)]
++    pub(crate) fn simd_select_bitmask<M, T>(m: M, yes: T, no: T) -> T;
+ +}
diff --cc library/portable-simd/crates/core_simd/src/lib.rs

index 960a66400839fee40c374fd22686cacba19d0c45,0000000000000000000000000000000000000000..91ae34c05e095884169824f1f0b0ab461e1d6c7d

mode 100644,000000..100644
--- 1/library/portable-simd/crates/core_simd/src/lib.rs
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/lib.rs
@@@ -1,21 -1,0 +1,23 @@@
+ +#![cfg_attr(not(feature = "std"), no_std)]
+ +#![feature(
+ +    const_fn_trait_bound,
++    convert_float_to_int,
+ +    decl_macro,
++    intra_doc_pointers,
+ +    platform_intrinsics,
+ +    repr_simd,
+ +    simd_ffi,
+ +    staged_api,
+ +    stdsimd
+ +)]
+ +#![cfg_attr(feature = "generic_const_exprs", feature(generic_const_exprs))]
+ +#![cfg_attr(feature = "generic_const_exprs", allow(incomplete_features))]
+ +#![warn(missing_docs)]
+ +#![deny(unsafe_op_in_unsafe_fn)]
+ +#![unstable(feature = "portable_simd", issue = "86656")]
+ +//! Portable SIMD module.
+ +
+ +#[path = "mod.rs"]
+ +mod core_simd;
+ +pub use self::core_simd::simd;
+ +pub use simd::*;
diff --cc library/portable-simd/crates/core_simd/src/masks.rs

index ae1fef53da88e571db159de9614c44163ad069bf,0000000000000000000000000000000000000000..e1cd793045046b1963e3613802290cbd5f8ad1b8

mode 100644,000000..100644
--- 1/library/portable-simd/crates/core_simd/src/masks.rs
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/masks.rs
@@@ -1,587 -1,0 +1,582 @@@
- use crate::simd::intrinsics;
- use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+ +//! Types and traits associated with masking lanes of vectors.
+ +//! Types representing
+ +#![allow(non_camel_case_types)]
+ +
+ +#[cfg_attr(
+ +    not(all(target_arch = "x86_64", target_feature = "avx512f")),
+ +    path = "masks/full_masks.rs"
+ +)]
+ +#[cfg_attr(
+ +    all(target_arch = "x86_64", target_feature = "avx512f"),
+ +    path = "masks/bitmask.rs"
+ +)]
+ +mod mask_impl;
+ +
-     /// Convert this mask to a bitmask, with one bit set per lane.
-     #[cfg(feature = "generic_const_exprs")]
-     #[inline]
-     #[must_use = "method returns a new array and does not mutate the original value"]
-     pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
-         self.0.to_bitmask()
-     }
- 
-     /// Convert a bitmask to a mask.
-     #[cfg(feature = "generic_const_exprs")]
-     #[inline]
-     #[must_use = "method returns a new mask and does not mutate the original value"]
-     pub fn from_bitmask(bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
-         Self(mask_impl::Mask::from_bitmask(bitmask))
-     }
- 
++mod to_bitmask;
++pub use to_bitmask::ToBitMask;
++
++use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount};
+ +use core::cmp::Ordering;
+ +use core::{fmt, mem};
+ +
+ +mod sealed {
+ +    use super::*;
+ +
+ +    /// Not only does this seal the `MaskElement` trait, but these functions prevent other traits
+ +    /// from bleeding into the parent bounds.
+ +    ///
+ +    /// For example, `eq` could be provided by requiring `MaskElement: PartialEq`, but that would
+ +    /// prevent us from ever removing that bound, or from implementing `MaskElement` on
+ +    /// non-`PartialEq` types in the future.
+ +    pub trait Sealed {
+ +        fn valid<const LANES: usize>(values: Simd<Self, LANES>) -> bool
+ +        where
+ +            LaneCount<LANES>: SupportedLaneCount,
+ +            Self: SimdElement;
+ +
+ +        fn eq(self, other: Self) -> bool;
+ +
+ +        const TRUE: Self;
+ +
+ +        const FALSE: Self;
+ +    }
+ +}
+ +use sealed::Sealed;
+ +
+ +/// Marker trait for types that may be used as SIMD mask elements.
++///
++/// # Safety
++/// Type must be a signed integer.
+ +pub unsafe trait MaskElement: SimdElement + Sealed {}
+ +
+ +macro_rules! impl_element {
+ +    { $ty:ty } => {
+ +        impl Sealed for $ty {
+ +            fn valid<const LANES: usize>(value: Simd<Self, LANES>) -> bool
+ +            where
+ +                LaneCount<LANES>: SupportedLaneCount,
+ +            {
+ +                (value.lanes_eq(Simd::splat(0)) | value.lanes_eq(Simd::splat(-1))).all()
+ +            }
+ +
+ +            fn eq(self, other: Self) -> bool { self == other }
+ +
+ +            const TRUE: Self = -1;
+ +            const FALSE: Self = 0;
+ +        }
+ +
+ +        unsafe impl MaskElement for $ty {}
+ +    }
+ +}
+ +
+ +impl_element! { i8 }
+ +impl_element! { i16 }
+ +impl_element! { i32 }
+ +impl_element! { i64 }
+ +impl_element! { isize }
+ +
+ +/// A SIMD vector mask for `LANES` elements of width specified by `Element`.
+ +///
+ +/// The layout of this type is unspecified.
+ +#[repr(transparent)]
+ +pub struct Mask<T, const LANES: usize>(mask_impl::Mask<T, LANES>)
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount;
+ +
+ +impl<T, const LANES: usize> Copy for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +}
+ +
+ +impl<T, const LANES: usize> Clone for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    fn clone(&self) -> Self {
+ +        *self
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    /// Construct a mask by setting all lanes to the given value.
+ +    pub fn splat(value: bool) -> Self {
+ +        Self(mask_impl::Mask::splat(value))
+ +    }
+ +
+ +    /// Converts an array of bools to a SIMD mask.
+ +    pub fn from_array(array: [bool; LANES]) -> Self {
+ +        // SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of
+ +        //     true:    0b_0000_0001
+ +        //     false:   0b_0000_0000
+ +        // Thus, an array of bools is also a valid array of bytes: [u8; N]
+ +        // This would be hypothetically valid as an "in-place" transmute,
+ +        // but these are "dependently-sized" types, so copy elision it is!
+ +        unsafe {
+ +            let bytes: [u8; LANES] = mem::transmute_copy(&array);
+ +            let bools: Simd<i8, LANES> =
+ +                intrinsics::simd_ne(Simd::from_array(bytes), Simd::splat(0u8));
+ +            Mask::from_int_unchecked(intrinsics::simd_cast(bools))
+ +        }
+ +    }
+ +
+ +    /// Converts a SIMD mask to an array of bools.
+ +    pub fn to_array(self) -> [bool; LANES] {
+ +        // This follows mostly the same logic as from_array.
+ +        // SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of
+ +        //     true:    0b_0000_0001
+ +        //     false:   0b_0000_0000
+ +        // Thus, an array of bools is also a valid array of bytes: [u8; N]
+ +        // Since our masks are equal to integers where all bits are set,
+ +        // we can simply convert them to i8s, and then bitand them by the
+ +        // bitpattern for Rust's "true" bool.
+ +        // This would be hypothetically valid as an "in-place" transmute,
+ +        // but these are "dependently-sized" types, so copy elision it is!
+ +        unsafe {
+ +            let mut bytes: Simd<i8, LANES> = intrinsics::simd_cast(self.to_int());
+ +            bytes &= Simd::splat(1i8);
+ +            mem::transmute_copy(&bytes)
+ +        }
+ +    }
+ +
+ +    /// Converts a vector of integers to a mask, where 0 represents `false` and -1
+ +    /// represents `true`.
+ +    ///
+ +    /// # Safety
+ +    /// All lanes must be either 0 or -1.
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
++        // Safety: the caller must confirm this invariant
+ +        unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) }
+ +    }
+ +
+ +    /// Converts a vector of integers to a mask, where 0 represents `false` and -1
+ +    /// represents `true`.
+ +    ///
+ +    /// # Panics
+ +    /// Panics if any lane is not 0 or -1.
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    pub fn from_int(value: Simd<T, LANES>) -> Self {
+ +        assert!(T::valid(value), "all values must be either 0 or -1",);
++        // Safety: the validity has been checked
+ +        unsafe { Self::from_int_unchecked(value) }
+ +    }
+ +
+ +    /// Converts the mask to a vector of integers, where 0 represents `false` and -1
+ +    /// represents `true`.
+ +    #[inline]
+ +    #[must_use = "method returns a new vector and does not mutate the original value"]
+ +    pub fn to_int(self) -> Simd<T, LANES> {
+ +        self.0.to_int()
+ +    }
+ +
+ +    /// Tests the value of the specified lane.
+ +    ///
+ +    /// # Safety
+ +    /// `lane` must be less than `LANES`.
+ +    #[inline]
+ +    #[must_use = "method returns a new bool and does not mutate the original value"]
+ +    pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
++        // Safety: the caller must confirm this invariant
+ +        unsafe { self.0.test_unchecked(lane) }
+ +    }
+ +
+ +    /// Tests the value of the specified lane.
+ +    ///
+ +    /// # Panics
+ +    /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
+ +    #[inline]
+ +    #[must_use = "method returns a new bool and does not mutate the original value"]
+ +    pub fn test(&self, lane: usize) -> bool {
+ +        assert!(lane < LANES, "lane index out of range");
++        // Safety: the lane index has been checked
+ +        unsafe { self.test_unchecked(lane) }
+ +    }
+ +
+ +    /// Sets the value of the specified lane.
+ +    ///
+ +    /// # Safety
+ +    /// `lane` must be less than `LANES`.
+ +    #[inline]
+ +    pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
++        // Safety: the caller must confirm this invariant
+ +        unsafe {
+ +            self.0.set_unchecked(lane, value);
+ +        }
+ +    }
+ +
+ +    /// Sets the value of the specified lane.
+ +    ///
+ +    /// # Panics
+ +    /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
+ +    #[inline]
+ +    pub fn set(&mut self, lane: usize, value: bool) {
+ +        assert!(lane < LANES, "lane index out of range");
++        // Safety: the lane index has been checked
+ +        unsafe {
+ +            self.set_unchecked(lane, value);
+ +        }
+ +    }
+ +
+ +    /// Returns true if any lane is set, or false otherwise.
+ +    #[inline]
+ +    #[must_use = "method returns a new bool and does not mutate the original value"]
+ +    pub fn any(self) -> bool {
+ +        self.0.any()
+ +    }
+ +
+ +    /// Returns true if all lanes are set, or false otherwise.
+ +    #[inline]
+ +    #[must_use = "method returns a new bool and does not mutate the original value"]
+ +    pub fn all(self) -> bool {
+ +        self.0.all()
+ +    }
+ +}
+ +
+ +// vector/array conversion
+ +impl<T, const LANES: usize> From<[bool; LANES]> for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    fn from(array: [bool; LANES]) -> Self {
+ +        Self::from_array(array)
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> From<Mask<T, LANES>> for [bool; LANES]
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    fn from(vector: Mask<T, LANES>) -> Self {
+ +        vector.to_array()
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> Default for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    #[inline]
+ +    #[must_use = "method returns a defaulted mask with all lanes set to false (0)"]
+ +    fn default() -> Self {
+ +        Self::splat(false)
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> PartialEq for Mask<T, LANES>
+ +where
+ +    T: MaskElement + PartialEq,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    #[inline]
+ +    #[must_use = "method returns a new bool and does not mutate the original value"]
+ +    fn eq(&self, other: &Self) -> bool {
+ +        self.0 == other.0
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> PartialOrd for Mask<T, LANES>
+ +where
+ +    T: MaskElement + PartialOrd,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    #[inline]
+ +    #[must_use = "method returns a new Ordering and does not mutate the original value"]
+ +    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ +        self.0.partial_cmp(&other.0)
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> fmt::Debug for Mask<T, LANES>
+ +where
+ +    T: MaskElement + fmt::Debug,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ +        f.debug_list()
+ +            .entries((0..LANES).map(|lane| self.test(lane)))
+ +            .finish()
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    type Output = Self;
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    fn bitand(self, rhs: Self) -> Self {
+ +        Self(self.0 & rhs.0)
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitAnd<bool> for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    type Output = Self;
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    fn bitand(self, rhs: bool) -> Self {
+ +        self & Self::splat(rhs)
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitAnd<Mask<T, LANES>> for bool
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    type Output = Mask<T, LANES>;
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    fn bitand(self, rhs: Mask<T, LANES>) -> Mask<T, LANES> {
+ +        Mask::splat(self) & rhs
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    type Output = Self;
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    fn bitor(self, rhs: Self) -> Self {
+ +        Self(self.0 | rhs.0)
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitOr<bool> for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    type Output = Self;
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    fn bitor(self, rhs: bool) -> Self {
+ +        self | Self::splat(rhs)
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitOr<Mask<T, LANES>> for bool
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    type Output = Mask<T, LANES>;
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    fn bitor(self, rhs: Mask<T, LANES>) -> Mask<T, LANES> {
+ +        Mask::splat(self) | rhs
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    type Output = Self;
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    fn bitxor(self, rhs: Self) -> Self::Output {
+ +        Self(self.0 ^ rhs.0)
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitXor<bool> for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    type Output = Self;
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    fn bitxor(self, rhs: bool) -> Self::Output {
+ +        self ^ Self::splat(rhs)
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitXor<Mask<T, LANES>> for bool
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    type Output = Mask<T, LANES>;
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    fn bitxor(self, rhs: Mask<T, LANES>) -> Self::Output {
+ +        Mask::splat(self) ^ rhs
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    type Output = Mask<T, LANES>;
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    fn not(self) -> Self::Output {
+ +        Self(!self.0)
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitAndAssign for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    #[inline]
+ +    fn bitand_assign(&mut self, rhs: Self) {
+ +        self.0 = self.0 & rhs.0;
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitAndAssign<bool> for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    #[inline]
+ +    fn bitand_assign(&mut self, rhs: bool) {
+ +        *self &= Self::splat(rhs);
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitOrAssign for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    #[inline]
+ +    fn bitor_assign(&mut self, rhs: Self) {
+ +        self.0 = self.0 | rhs.0;
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitOrAssign<bool> for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    #[inline]
+ +    fn bitor_assign(&mut self, rhs: bool) {
+ +        *self |= Self::splat(rhs);
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitXorAssign for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    #[inline]
+ +    fn bitxor_assign(&mut self, rhs: Self) {
+ +        self.0 = self.0 ^ rhs.0;
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitXorAssign<bool> for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    #[inline]
+ +    fn bitxor_assign(&mut self, rhs: bool) {
+ +        *self ^= Self::splat(rhs);
+ +    }
+ +}
+ +
+ +/// Vector of eight 8-bit masks
+ +pub type mask8x8 = Mask<i8, 8>;
+ +
+ +/// Vector of 16 8-bit masks
+ +pub type mask8x16 = Mask<i8, 16>;
+ +
+ +/// Vector of 32 8-bit masks
+ +pub type mask8x32 = Mask<i8, 32>;
+ +
+ +/// Vector of 16 8-bit masks
+ +pub type mask8x64 = Mask<i8, 64>;
+ +
+ +/// Vector of four 16-bit masks
+ +pub type mask16x4 = Mask<i16, 4>;
+ +
+ +/// Vector of eight 16-bit masks
+ +pub type mask16x8 = Mask<i16, 8>;
+ +
+ +/// Vector of 16 16-bit masks
+ +pub type mask16x16 = Mask<i16, 16>;
+ +
+ +/// Vector of 32 16-bit masks
+ +pub type mask16x32 = Mask<i16, 32>;
+ +
+ +/// Vector of two 32-bit masks
+ +pub type mask32x2 = Mask<i32, 2>;
+ +
+ +/// Vector of four 32-bit masks
+ +pub type mask32x4 = Mask<i32, 4>;
+ +
+ +/// Vector of eight 32-bit masks
+ +pub type mask32x8 = Mask<i32, 8>;
+ +
+ +/// Vector of 16 32-bit masks
+ +pub type mask32x16 = Mask<i32, 16>;
+ +
+ +/// Vector of two 64-bit masks
+ +pub type mask64x2 = Mask<i64, 2>;
+ +
+ +/// Vector of four 64-bit masks
+ +pub type mask64x4 = Mask<i64, 4>;
+ +
+ +/// Vector of eight 64-bit masks
+ +pub type mask64x8 = Mask<i64, 8>;
+ +
+ +/// Vector of two pointer-width masks
+ +pub type masksizex2 = Mask<isize, 2>;
+ +
+ +/// Vector of four pointer-width masks
+ +pub type masksizex4 = Mask<isize, 4>;
+ +
+ +/// Vector of eight pointer-width masks
+ +pub type masksizex8 = Mask<isize, 8>;
+ +
+ +macro_rules! impl_from {
+ +    { $from:ty  => $($to:ty),* } => {
+ +        $(
+ +        impl<const LANES: usize> From<Mask<$from, LANES>> for Mask<$to, LANES>
+ +        where
+ +            LaneCount<LANES>: SupportedLaneCount,
+ +        {
+ +            fn from(value: Mask<$from, LANES>) -> Self {
+ +                Self(value.0.convert())
+ +            }
+ +        }
+ +        )*
+ +    }
+ +}
+ +impl_from! { i8 => i16, i32, i64, isize }
+ +impl_from! { i16 => i32, i64, isize, i8 }
+ +impl_from! { i32 => i64, isize, i8, i16 }
+ +impl_from! { i64 => isize, i8, i16, i32 }
+ +impl_from! { isize => i8, i16, i32, i64 }
diff --cc library/portable-simd/crates/core_simd/src/masks/bitmask.rs

index b4217dc87ba9c209e3e07c57fe7120dc20bcb922,0000000000000000000000000000000000000000..ec4dd357ee98c5b4660e3173053679d30dceecce

mode 100644,000000..100644
--- 1/library/portable-simd/crates/core_simd/src/masks/bitmask.rs
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/masks/bitmask.rs
@@@ -1,223 -1,0 +1,226 @@@
- use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+ +#![allow(unused_imports)]
+ +use super::MaskElement;
+ +use crate::simd::intrinsics;
-     #[cfg(feature = "generic_const_exprs")]
++use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
+ +use core::marker::PhantomData;
+ +
+ +/// A mask where each lane is represented by a single bit.
+ +#[repr(transparent)]
+ +pub struct Mask<T, const LANES: usize>(
+ +    <LaneCount<LANES> as SupportedLaneCount>::BitMask,
+ +    PhantomData<T>,
+ +)
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount;
+ +
+ +impl<T, const LANES: usize> Copy for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +}
+ +
+ +impl<T, const LANES: usize> Clone for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    fn clone(&self) -> Self {
+ +        *self
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> PartialEq for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    fn eq(&self, other: &Self) -> bool {
+ +        self.0.as_ref() == other.0.as_ref()
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> PartialOrd for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+ +        self.0.as_ref().partial_cmp(other.0.as_ref())
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> Eq for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +}
+ +
+ +impl<T, const LANES: usize> Ord for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+ +        self.0.as_ref().cmp(other.0.as_ref())
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    pub fn splat(value: bool) -> Self {
+ +        let mut mask = <LaneCount<LANES> as SupportedLaneCount>::BitMask::default();
+ +        if value {
+ +            mask.as_mut().fill(u8::MAX)
+ +        } else {
+ +            mask.as_mut().fill(u8::MIN)
+ +        }
+ +        if LANES % 8 > 0 {
+ +            *mask.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - LANES % 8);
+ +        }
+ +        Self(mask, PhantomData)
+ +    }
+ +
+ +    #[inline]
+ +    #[must_use = "method returns a new bool and does not mutate the original value"]
+ +    pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
+ +        (self.0.as_ref()[lane / 8] >> (lane % 8)) & 0x1 > 0
+ +    }
+ +
+ +    #[inline]
+ +    pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
+ +        unsafe {
+ +            self.0.as_mut()[lane / 8] ^= ((value ^ self.test_unchecked(lane)) as u8) << (lane % 8)
+ +        }
+ +    }
+ +
+ +    #[inline]
+ +    #[must_use = "method returns a new vector and does not mutate the original value"]
+ +    pub fn to_int(self) -> Simd<T, LANES> {
+ +        unsafe {
+ +            intrinsics::simd_select_bitmask(self.0, Simd::splat(T::TRUE), Simd::splat(T::FALSE))
+ +        }
+ +    }
+ +
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
+ +        unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) }
+ +    }
+ +
-     #[must_use = "method returns a new array and does not mutate the original value"]
-     pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
-         // Safety: these are the same type and we are laundering the generic
+ +    #[inline]
-     #[cfg(feature = "generic_const_exprs")]
++    pub fn to_bitmask_integer<U>(self) -> U
++    where
++        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
++    {
++        // Safety: these are the same types
+ +        unsafe { core::mem::transmute_copy(&self.0) }
+ +    }
+ +
-     #[must_use = "method returns a new mask and does not mutate the original value"]
-     pub fn from_bitmask(bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
-         // Safety: these are the same type and we are laundering the generic
-         Self(unsafe { core::mem::transmute_copy(&bitmask) }, PhantomData)
+ +    #[inline]
++    pub fn from_bitmask_integer<U>(bitmask: U) -> Self
++    where
++        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
++    {
++        // Safety: these are the same types
++        unsafe { Self(core::mem::transmute_copy(&bitmask), PhantomData) }
+ +    }
+ +
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    pub fn convert<U>(self) -> Mask<U, LANES>
+ +    where
+ +        U: MaskElement,
+ +    {
++        // Safety: bitmask layout does not depend on the element width
+ +        unsafe { core::mem::transmute_copy(&self) }
+ +    }
+ +
+ +    #[inline]
+ +    #[must_use = "method returns a new bool and does not mutate the original value"]
+ +    pub fn any(self) -> bool {
+ +        self != Self::splat(false)
+ +    }
+ +
+ +    #[inline]
+ +    #[must_use = "method returns a new bool and does not mutate the original value"]
+ +    pub fn all(self) -> bool {
+ +        self == Self::splat(true)
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +    <LaneCount<LANES> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
+ +{
+ +    type Output = Self;
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    fn bitand(mut self, rhs: Self) -> Self {
+ +        for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
+ +            *l &= r;
+ +        }
+ +        self
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +    <LaneCount<LANES> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
+ +{
+ +    type Output = Self;
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    fn bitor(mut self, rhs: Self) -> Self {
+ +        for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
+ +            *l |= r;
+ +        }
+ +        self
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    type Output = Self;
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    fn bitxor(mut self, rhs: Self) -> Self::Output {
+ +        for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
+ +            *l ^= r;
+ +        }
+ +        self
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    type Output = Self;
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    fn not(mut self) -> Self::Output {
+ +        for x in self.0.as_mut() {
+ +            *x = !*x;
+ +        }
+ +        if LANES % 8 > 0 {
+ +            *self.0.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - LANES % 8);
+ +        }
+ +        self
+ +    }
+ +}
diff --cc library/portable-simd/crates/core_simd/src/masks/full_masks.rs

index e5bb784bb910f4b40da0746f3f7ad7604d25b285,0000000000000000000000000000000000000000..8bbdf637de84defcc23f1e3a8afc07d0eb2538a7

mode 100644,000000..100644
--- 1/library/portable-simd/crates/core_simd/src/masks/full_masks.rs
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/masks/full_masks.rs
@@@ -1,228 -1,0 +1,246 @@@
- use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+ +//! Masks that take up full SIMD vector registers.
+ +
+ +use super::MaskElement;
+ +use crate::simd::intrinsics;
-     #[cfg(feature = "generic_const_exprs")]
++use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
+ +
+ +#[repr(transparent)]
+ +pub struct Mask<T, const LANES: usize>(Simd<T, LANES>)
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount;
+ +
+ +impl<T, const LANES: usize> Copy for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +}
+ +
+ +impl<T, const LANES: usize> Clone for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    fn clone(&self) -> Self {
+ +        *self
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> PartialEq for Mask<T, LANES>
+ +where
+ +    T: MaskElement + PartialEq,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    fn eq(&self, other: &Self) -> bool {
+ +        self.0.eq(&other.0)
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> PartialOrd for Mask<T, LANES>
+ +where
+ +    T: MaskElement + PartialOrd,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+ +        self.0.partial_cmp(&other.0)
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> Eq for Mask<T, LANES>
+ +where
+ +    T: MaskElement + Eq,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +}
+ +
+ +impl<T, const LANES: usize> Ord for Mask<T, LANES>
+ +where
+ +    T: MaskElement + Ord,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+ +        self.0.cmp(&other.0)
+ +    }
+ +}
+ +
++// Used for bitmask bit order workaround
++pub(crate) trait ReverseBits {
++    fn reverse_bits(self) -> Self;
++}
++
++macro_rules! impl_reverse_bits {
++    { $($int:ty),* } => {
++        $(
++        impl ReverseBits for $int {
++            fn reverse_bits(self) -> Self { <$int>::reverse_bits(self) }
++        }
++        )*
++    }
++}
++
++impl_reverse_bits! { u8, u16, u32, u64 }
++
+ +impl<T, const LANES: usize> Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    pub fn splat(value: bool) -> Self {
+ +        Self(Simd::splat(if value { T::TRUE } else { T::FALSE }))
+ +    }
+ +
+ +    #[inline]
+ +    #[must_use = "method returns a new bool and does not mutate the original value"]
+ +    pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
+ +        T::eq(self.0[lane], T::TRUE)
+ +    }
+ +
+ +    #[inline]
+ +    pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
+ +        self.0[lane] = if value { T::TRUE } else { T::FALSE }
+ +    }
+ +
+ +    #[inline]
+ +    #[must_use = "method returns a new vector and does not mutate the original value"]
+ +    pub fn to_int(self) -> Simd<T, LANES> {
+ +        self.0
+ +    }
+ +
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
+ +        Self(value)
+ +    }
+ +
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    pub fn convert<U>(self) -> Mask<U, LANES>
+ +    where
+ +        U: MaskElement,
+ +    {
++        // Safety: masks are simply integer vectors of 0 and -1, and we can cast the element type.
+ +        unsafe { Mask(intrinsics::simd_cast(self.0)) }
+ +    }
+ +
-     #[must_use = "method returns a new array and does not mutate the original value"]
-     pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
-         unsafe {
-             let mut bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN] =
-                 intrinsics::simd_bitmask(self.0);
- 
-             // There is a bug where LLVM appears to implement this operation with the wrong
-             // bit order.
-             // TODO fix this in a better way
-             if cfg!(target_endian = "big") {
-                 for x in bitmask.as_mut() {
-                     *x = x.reverse_bits();
-                 }
-             }
+ +    #[inline]
-     #[cfg(feature = "generic_const_exprs")]
++    pub(crate) fn to_bitmask_integer<U: ReverseBits>(self) -> U
++    where
++        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
++    {
++        // Safety: U is required to be the appropriate bitmask type
++        let bitmask: U = unsafe { intrinsics::simd_bitmask(self.0) };
+ +
++        // LLVM assumes bit order should match endianness
++        if cfg!(target_endian = "big") {
++            bitmask.reverse_bits()
++        } else {
+ +            bitmask
+ +        }
+ +    }
+ +
-     #[must_use = "method returns a new mask and does not mutate the original value"]
-     pub fn from_bitmask(mut bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
-         unsafe {
-             // There is a bug where LLVM appears to implement this operation with the wrong
-             // bit order.
-             // TODO fix this in a better way
-             if cfg!(target_endian = "big") {
-                 for x in bitmask.as_mut() {
-                     *x = x.reverse_bits();
-                 }
-             }
+ +    #[inline]
++    pub(crate) fn from_bitmask_integer<U: ReverseBits>(bitmask: U) -> Self
++    where
++        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
++    {
++        // LLVM assumes bit order should match endianness
++        let bitmask = if cfg!(target_endian = "big") {
++            bitmask.reverse_bits()
++        } else {
++            bitmask
++        };
+ +
++        // Safety: U is required to be the appropriate bitmask type
++        unsafe {
+ +            Self::from_int_unchecked(intrinsics::simd_select_bitmask(
+ +                bitmask,
+ +                Self::splat(true).to_int(),
+ +                Self::splat(false).to_int(),
+ +            ))
+ +        }
+ +    }
+ +
+ +    #[inline]
+ +    #[must_use = "method returns a new bool and does not mutate the original value"]
+ +    pub fn any(self) -> bool {
++        // Safety: use `self` as an integer vector
+ +        unsafe { intrinsics::simd_reduce_any(self.to_int()) }
+ +    }
+ +
+ +    #[inline]
+ +    #[must_use = "method returns a new vector and does not mutate the original value"]
+ +    pub fn all(self) -> bool {
++        // Safety: use `self` as an integer vector
+ +        unsafe { intrinsics::simd_reduce_all(self.to_int()) }
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::convert::From<Mask<T, LANES>> for Simd<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    fn from(value: Mask<T, LANES>) -> Self {
+ +        value.0
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    type Output = Self;
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    fn bitand(self, rhs: Self) -> Self {
++        // Safety: `self` is an integer vector
+ +        unsafe { Self(intrinsics::simd_and(self.0, rhs.0)) }
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    type Output = Self;
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    fn bitor(self, rhs: Self) -> Self {
++        // Safety: `self` is an integer vector
+ +        unsafe { Self(intrinsics::simd_or(self.0, rhs.0)) }
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    type Output = Self;
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    fn bitxor(self, rhs: Self) -> Self {
++        // Safety: `self` is an integer vector
+ +        unsafe { Self(intrinsics::simd_xor(self.0, rhs.0)) }
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    type Output = Self;
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original value"]
+ +    fn not(self) -> Self::Output {
+ +        Self::splat(true) ^ self
+ +    }
+ +}
diff --cc library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..1c2037764c1e45af1960550df93cc290af05eaee

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs
@@@ -1,0 -1,0 +1,57 @@@
++use super::{mask_impl, Mask, MaskElement};
++use crate::simd::{LaneCount, SupportedLaneCount};
++
++mod sealed {
++    pub trait Sealed {}
++}
++pub use sealed::Sealed;
++
++impl<T, const LANES: usize> Sealed for Mask<T, LANES>
++where
++    T: MaskElement,
++    LaneCount<LANES>: SupportedLaneCount,
++{
++}
++
++/// Converts masks to and from integer bitmasks.
++///
++/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB.
++///
++/// # Safety
++/// This trait is `unsafe` and sealed, since the `BitMask` type must match the number of lanes in
++/// the mask.
++pub unsafe trait ToBitMask: Sealed {
++    /// The integer bitmask type.
++    type BitMask;
++
++    /// Converts a mask to a bitmask.
++    fn to_bitmask(self) -> Self::BitMask;
++
++    /// Converts a bitmask to a mask.
++    fn from_bitmask(bitmask: Self::BitMask) -> Self;
++}
++
++macro_rules! impl_integer_intrinsic {
++    { $(unsafe impl ToBitMask<BitMask=$int:ty> for Mask<_, $lanes:literal>)* } => {
++        $(
++        unsafe impl<T: MaskElement> ToBitMask for Mask<T, $lanes> {
++            type BitMask = $int;
++
++            fn to_bitmask(self) -> $int {
++                self.0.to_bitmask_integer()
++            }
++
++            fn from_bitmask(bitmask: $int) -> Self {
++                Self(mask_impl::Mask::from_bitmask_integer(bitmask))
++            }
++        }
++        )*
++    }
++}
++
++impl_integer_intrinsic! {
++    unsafe impl ToBitMask<BitMask=u8> for Mask<_, 8>
++    unsafe impl ToBitMask<BitMask=u16> for Mask<_, 16>
++    unsafe impl ToBitMask<BitMask=u32> for Mask<_, 32>
++    unsafe impl ToBitMask<BitMask=u64> for Mask<_, 64>
++}
diff --cc library/portable-simd/crates/core_simd/src/math.rs

index 7435b6df9186098a4e1ce5c3b4f8b61ae0a8316b,0000000000000000000000000000000000000000..0b4e40983af53e8a54eb11d902850aef0fb53fb1

mode 100644,000000..100644
--- 1/library/portable-simd/crates/core_simd/src/math.rs
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/math.rs
@@@ -1,159 -1,0 +1,163 @@@
+ +use crate::simd::intrinsics::{simd_saturating_add, simd_saturating_sub};
+ +use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+ +
+ +macro_rules! impl_uint_arith {
+ +    ($($ty:ty),+) => {
+ +        $( impl<const LANES: usize> Simd<$ty, LANES> where LaneCount<LANES>: SupportedLaneCount {
+ +
+ +            /// Lanewise saturating add.
+ +            ///
+ +            /// # Examples
+ +            /// ```
+ +            /// # #![feature(portable_simd)]
+ +            /// # #[cfg(feature = "std")] use core_simd::Simd;
+ +            /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ +            #[doc = concat!("# use core::", stringify!($ty), "::MAX;")]
+ +            /// let x = Simd::from_array([2, 1, 0, MAX]);
+ +            /// let max = Simd::splat(MAX);
+ +            /// let unsat = x + max;
+ +            /// let sat = x.saturating_add(max);
+ +            /// assert_eq!(unsat, Simd::from_array([1, 0, MAX, MAX - 1]));
+ +            /// assert_eq!(sat, max);
+ +            /// ```
+ +            #[inline]
+ +            pub fn saturating_add(self, second: Self) -> Self {
++                // Safety: `self` is a vector
+ +                unsafe { simd_saturating_add(self, second) }
+ +            }
+ +
+ +            /// Lanewise saturating subtract.
+ +            ///
+ +            /// # Examples
+ +            /// ```
+ +            /// # #![feature(portable_simd)]
+ +            /// # #[cfg(feature = "std")] use core_simd::Simd;
+ +            /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ +            #[doc = concat!("# use core::", stringify!($ty), "::MAX;")]
+ +            /// let x = Simd::from_array([2, 1, 0, MAX]);
+ +            /// let max = Simd::splat(MAX);
+ +            /// let unsat = x - max;
+ +            /// let sat = x.saturating_sub(max);
+ +            /// assert_eq!(unsat, Simd::from_array([3, 2, 1, 0]));
+ +            /// assert_eq!(sat, Simd::splat(0));
+ +            #[inline]
+ +            pub fn saturating_sub(self, second: Self) -> Self {
++                // Safety: `self` is a vector
+ +                unsafe { simd_saturating_sub(self, second) }
+ +            }
+ +        })+
+ +    }
+ +}
+ +
+ +macro_rules! impl_int_arith {
+ +    ($($ty:ty),+) => {
+ +        $( impl<const LANES: usize> Simd<$ty, LANES> where LaneCount<LANES>: SupportedLaneCount {
+ +
+ +            /// Lanewise saturating add.
+ +            ///
+ +            /// # Examples
+ +            /// ```
+ +            /// # #![feature(portable_simd)]
+ +            /// # #[cfg(feature = "std")] use core_simd::Simd;
+ +            /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ +            #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
+ +            /// let x = Simd::from_array([MIN, 0, 1, MAX]);
+ +            /// let max = Simd::splat(MAX);
+ +            /// let unsat = x + max;
+ +            /// let sat = x.saturating_add(max);
+ +            /// assert_eq!(unsat, Simd::from_array([-1, MAX, MIN, -2]));
+ +            /// assert_eq!(sat, Simd::from_array([-1, MAX, MAX, MAX]));
+ +            /// ```
+ +            #[inline]
+ +            pub fn saturating_add(self, second: Self) -> Self {
++                // Safety: `self` is a vector
+ +                unsafe { simd_saturating_add(self, second) }
+ +            }
+ +
+ +            /// Lanewise saturating subtract.
+ +            ///
+ +            /// # Examples
+ +            /// ```
+ +            /// # #![feature(portable_simd)]
+ +            /// # #[cfg(feature = "std")] use core_simd::Simd;
+ +            /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ +            #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
+ +            /// let x = Simd::from_array([MIN, -2, -1, MAX]);
+ +            /// let max = Simd::splat(MAX);
+ +            /// let unsat = x - max;
+ +            /// let sat = x.saturating_sub(max);
+ +            /// assert_eq!(unsat, Simd::from_array([1, MAX, MIN, 0]));
+ +            /// assert_eq!(sat, Simd::from_array([MIN, MIN, MIN, 0]));
+ +            #[inline]
+ +            pub fn saturating_sub(self, second: Self) -> Self {
++                // Safety: `self` is a vector
+ +                unsafe { simd_saturating_sub(self, second) }
+ +            }
+ +
+ +            /// Lanewise absolute value, implemented in Rust.
+ +            /// Every lane becomes its absolute value.
+ +            ///
+ +            /// # Examples
+ +            /// ```
+ +            /// # #![feature(portable_simd)]
+ +            /// # #[cfg(feature = "std")] use core_simd::Simd;
+ +            /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ +            #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
+ +            /// let xs = Simd::from_array([MIN, MIN +1, -5, 0]);
+ +            /// assert_eq!(xs.abs(), Simd::from_array([MIN, MAX, 5, 0]));
+ +            /// ```
+ +            #[inline]
+ +            pub fn abs(self) -> Self {
+ +                const SHR: $ty = <$ty>::BITS as $ty - 1;
+ +                let m = self >> Simd::splat(SHR);
+ +                (self^m) - m
+ +            }
+ +
+ +            /// Lanewise saturating absolute value, implemented in Rust.
+ +            /// As abs(), except the MIN value becomes MAX instead of itself.
+ +            ///
+ +            /// # Examples
+ +            /// ```
+ +            /// # #![feature(portable_simd)]
+ +            /// # #[cfg(feature = "std")] use core_simd::Simd;
+ +            /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ +            #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
+ +            /// let xs = Simd::from_array([MIN, -2, 0, 3]);
+ +            /// let unsat = xs.abs();
+ +            /// let sat = xs.saturating_abs();
+ +            /// assert_eq!(unsat, Simd::from_array([MIN, 2, 0, 3]));
+ +            /// assert_eq!(sat, Simd::from_array([MAX, 2, 0, 3]));
+ +            /// ```
+ +            #[inline]
+ +            pub fn saturating_abs(self) -> Self {
+ +                // arith shift for -1 or 0 mask based on sign bit, giving 2s complement
+ +                const SHR: $ty = <$ty>::BITS as $ty - 1;
+ +                let m = self >> Simd::splat(SHR);
+ +                (self^m).saturating_sub(m)
+ +            }
+ +
+ +            /// Lanewise saturating negation, implemented in Rust.
+ +            /// As neg(), except the MIN value becomes MAX instead of itself.
+ +            ///
+ +            /// # Examples
+ +            /// ```
+ +            /// # #![feature(portable_simd)]
+ +            /// # #[cfg(feature = "std")] use core_simd::Simd;
+ +            /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ +            #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
+ +            /// let x = Simd::from_array([MIN, -2, 3, MAX]);
+ +            /// let unsat = -x;
+ +            /// let sat = x.saturating_neg();
+ +            /// assert_eq!(unsat, Simd::from_array([MIN, 2, -3, MIN + 1]));
+ +            /// assert_eq!(sat, Simd::from_array([MAX, 2, -3, MIN + 1]));
+ +            /// ```
+ +            #[inline]
+ +            pub fn saturating_neg(self) -> Self {
+ +                Self::splat(0).saturating_sub(self)
+ +            }
+ +        })+
+ +    }
+ +}
+ +
+ +impl_uint_arith! { u8, u16, u32, u64, usize }
+ +impl_int_arith! { i8, i16, i32, i64, isize }
diff --cc library/portable-simd/crates/core_simd/src/ops.rs

index b65038933bf33c185305f31fbcad70d1576166a1,0000000000000000000000000000000000000000..1b35b3e717a3259591b84c1d15bd1fdff24a40d8

mode 100644,000000..100644
--- 1/library/portable-simd/crates/core_simd/src/ops.rs
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/ops.rs
@@@ -1,241 -1,0 +1,250 @@@
- // Division by zero is poison, according to LLVM.
- // So is dividing the MIN value of a signed integer by -1,
- // since that would return MAX + 1.
- // FIXME: Rust allows <SInt>::MIN / -1,
- // so we should probably figure out how to make that safe.
+ +use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+ +use core::ops::{Add, Mul};
+ +use core::ops::{BitAnd, BitOr, BitXor};
+ +use core::ops::{Div, Rem, Sub};
+ +use core::ops::{Shl, Shr};
+ +
+ +mod assign;
+ +mod deref;
+ +mod unary;
+ +
+ +impl<I, T, const LANES: usize> core::ops::Index<I> for Simd<T, LANES>
+ +where
+ +    T: SimdElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +    I: core::slice::SliceIndex<[T]>,
+ +{
+ +    type Output = I::Output;
+ +    fn index(&self, index: I) -> &Self::Output {
+ +        &self.as_array()[index]
+ +    }
+ +}
+ +
+ +impl<I, T, const LANES: usize> core::ops::IndexMut<I> for Simd<T, LANES>
+ +where
+ +    T: SimdElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +    I: core::slice::SliceIndex<[T]>,
+ +{
+ +    fn index_mut(&mut self, index: I) -> &mut Self::Output {
+ +        &mut self.as_mut_array()[index]
+ +    }
+ +}
+ +
+ +macro_rules! unsafe_base {
+ +    ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => {
+ +        unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) }
+ +    };
+ +}
+ +
+ +/// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic.
+ +/// It handles performing a bitand in addition to calling the shift operator, so that the result
+ +/// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if rhs >= <Int>::BITS
+ +/// At worst, this will maybe add another instruction and cycle,
+ +/// at best, it may open up more optimization opportunities,
+ +/// or simply be elided entirely, especially for SIMD ISAs which default to this.
+ +///
+ +// FIXME: Consider implementing this in cg_llvm instead?
+ +// cg_clif defaults to this, and scalar MIR shifts also default to wrapping
+ +macro_rules! wrap_bitshift {
+ +    ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => {
+ +        unsafe {
+ +            $crate::simd::intrinsics::$simd_call(
+ +                $lhs,
+ +                $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)),
+ +            )
+ +        }
+ +    };
+ +}
+ +
-             const PANIC_OVERFLOW: &'static str = $overflow:literal;
++/// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic.
++/// It guards against LLVM's UB conditions for integer div or rem using masks and selects,
++/// thus guaranteeing a Rust value returns instead.
++///
++/// |                  | LLVM | Rust
++/// | :--------------: | :--- | :----------
++/// | N {/,%} 0        | UB   | panic!()
++/// | <$int>::MIN / -1 | UB   | <$int>::MIN
++/// | <$int>::MIN % -1 | UB   | 0
++///
+ +macro_rules! int_divrem_guard {
+ +    (   $lhs:ident,
+ +        $rhs:ident,
+ +        {   const PANIC_ZERO: &'static str = $zero:literal;
-         } else if <$int>::MIN != 0
-             && ($lhs.lanes_eq(Simd::splat(<$int>::MIN))
-                 // type inference can break here, so cut an SInt to size
-                 & $rhs.lanes_eq(Simd::splat(-1i64 as _))).any()
-         {
-             panic!($overflow);
+ +            $simd_call:ident
+ +        },
+ +        $int:ident ) => {
+ +        if $rhs.lanes_eq(Simd::splat(0)).any() {
+ +            panic!($zero);
-             unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) }
+ +        } else {
-             const PANIC_OVERFLOW: &'static str = "attempt to divide with overflow";
++            // Prevent otherwise-UB overflow on the MIN / -1 case.
++            let rhs = if <$int>::MIN != 0 {
++                // This should, at worst, optimize to a few branchless logical ops
++                // Ideally, this entire conditional should evaporate
++                // Fire LLVM and implement those manually if it doesn't get the hint
++                ($lhs.lanes_eq(Simd::splat(<$int>::MIN))
++                // type inference can break here, so cut an SInt to size
++                & $rhs.lanes_eq(Simd::splat(-1i64 as _)))
++                .select(Simd::splat(1), $rhs)
++            } else {
++                // Nice base case to make it easy to const-fold away the other branch.
++                $rhs
++            };
++            unsafe { $crate::simd::intrinsics::$simd_call($lhs, rhs) }
+ +        }
+ +    };
+ +}
+ +
+ +macro_rules! for_base_types {
+ +    (   T = ($($scalar:ident),*);
+ +        type Lhs = Simd<T, N>;
+ +        type Rhs = Simd<T, N>;
+ +        type Output = $out:ty;
+ +
+ +        impl $op:ident::$call:ident {
+ +            $macro_impl:ident $inner:tt
+ +        }) => {
+ +            $(
+ +                impl<const N: usize> $op<Self> for Simd<$scalar, N>
+ +                where
+ +                    $scalar: SimdElement,
+ +                    LaneCount<N>: SupportedLaneCount,
+ +                {
+ +                    type Output = $out;
+ +
+ +                    #[inline]
+ +                    #[must_use = "operator returns a new vector without mutating the inputs"]
+ +                    fn $call(self, rhs: Self) -> Self::Output {
+ +                        $macro_impl!(self, rhs, $inner, $scalar)
+ +                    }
+ +                })*
+ +    }
+ +}
+ +
+ +// A "TokenTree muncher": takes a set of scalar types `T = {};`
+ +// type parameters for the ops it implements, `Op::fn` names,
+ +// and a macro that expands into an expr, substituting in an intrinsic.
+ +// It passes that to for_base_types, which expands an impl for the types,
+ +// using the expanded expr in the function, and recurses with itself.
+ +//
+ +// tl;dr impls a set of ops::{Traits} for a set of types
+ +macro_rules! for_base_ops {
+ +    (
+ +        T = $types:tt;
+ +        type Lhs = Simd<T, N>;
+ +        type Rhs = Simd<T, N>;
+ +        type Output = $out:ident;
+ +        impl $op:ident::$call:ident
+ +            $inner:tt
+ +        $($rest:tt)*
+ +    ) => {
+ +        for_base_types! {
+ +            T = $types;
+ +            type Lhs = Simd<T, N>;
+ +            type Rhs = Simd<T, N>;
+ +            type Output = $out;
+ +            impl $op::$call
+ +                $inner
+ +        }
+ +        for_base_ops! {
+ +            T = $types;
+ +            type Lhs = Simd<T, N>;
+ +            type Rhs = Simd<T, N>;
+ +            type Output = $out;
+ +            $($rest)*
+ +        }
+ +    };
+ +    ($($done:tt)*) => {
+ +        // Done.
+ +    }
+ +}
+ +
+ +// Integers can always accept add, mul, sub, bitand, bitor, and bitxor.
+ +// For all of these operations, simd_* intrinsics apply wrapping logic.
+ +for_base_ops! {
+ +    T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize);
+ +    type Lhs = Simd<T, N>;
+ +    type Rhs = Simd<T, N>;
+ +    type Output = Self;
+ +
+ +    impl Add::add {
+ +        unsafe_base { simd_add }
+ +    }
+ +
+ +    impl Mul::mul {
+ +        unsafe_base { simd_mul }
+ +    }
+ +
+ +    impl Sub::sub {
+ +        unsafe_base { simd_sub }
+ +    }
+ +
+ +    impl BitAnd::bitand {
+ +        unsafe_base { simd_and }
+ +    }
+ +
+ +    impl BitOr::bitor {
+ +        unsafe_base { simd_or }
+ +    }
+ +
+ +    impl BitXor::bitxor {
+ +        unsafe_base { simd_xor }
+ +    }
+ +
+ +    impl Div::div {
+ +        int_divrem_guard {
+ +            const PANIC_ZERO: &'static str = "attempt to divide by zero";
-             const PANIC_OVERFLOW: &'static str = "attempt to calculate the remainder with overflow";
+ +            simd_div
+ +        }
+ +    }
+ +
+ +    impl Rem::rem {
+ +        int_divrem_guard {
+ +            const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
+ +            simd_rem
+ +        }
+ +    }
+ +
+ +    // The only question is how to handle shifts >= <Int>::BITS?
+ +    // Our current solution uses wrapping logic.
+ +    impl Shl::shl {
+ +        wrap_bitshift { simd_shl }
+ +    }
+ +
+ +    impl Shr::shr {
+ +        wrap_bitshift {
+ +            // This automatically monomorphizes to lshr or ashr, depending,
+ +            // so it's fine to use it for both UInts and SInts.
+ +            simd_shr
+ +        }
+ +    }
+ +}
+ +
+ +// We don't need any special precautions here:
+ +// Floats always accept arithmetic ops, but may become NaN.
+ +for_base_ops! {
+ +    T = (f32, f64);
+ +    type Lhs = Simd<T, N>;
+ +    type Rhs = Simd<T, N>;
+ +    type Output = Self;
+ +
+ +    impl Add::add {
+ +        unsafe_base { simd_add }
+ +    }
+ +
+ +    impl Mul::mul {
+ +        unsafe_base { simd_mul }
+ +    }
+ +
+ +    impl Sub::sub {
+ +        unsafe_base { simd_sub }
+ +    }
+ +
+ +    impl Div::div {
+ +        unsafe_base { simd_div }
+ +    }
+ +
+ +    impl Rem::rem {
+ +        unsafe_base { simd_rem }
+ +    }
+ +}
diff --cc library/portable-simd/crates/core_simd/src/reduction.rs

index e79a185816bfbfcd3d9745003b66e50f0f6ad57b,0000000000000000000000000000000000000000..e1cd743e44247d55c4c9f49f6f004a60d981a7cb

mode 100644,000000..100644
--- 1/library/portable-simd/crates/core_simd/src/reduction.rs
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/reduction.rs
@@@ -1,145 -1,0 +1,153 @@@
+ +use crate::simd::intrinsics::{
+ +    simd_reduce_add_ordered, simd_reduce_and, simd_reduce_max, simd_reduce_min,
+ +    simd_reduce_mul_ordered, simd_reduce_or, simd_reduce_xor,
+ +};
+ +use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+ +use core::ops::{BitAnd, BitOr, BitXor};
+ +
+ +macro_rules! impl_integer_reductions {
+ +    { $scalar:ty } => {
+ +        impl<const LANES: usize> Simd<$scalar, LANES>
+ +        where
+ +            LaneCount<LANES>: SupportedLaneCount,
+ +        {
+ +            /// Horizontal wrapping add.  Returns the sum of the lanes of the vector, with wrapping addition.
+ +            #[inline]
+ +            pub fn horizontal_sum(self) -> $scalar {
++                // Safety: `self` is an integer vector
+ +                unsafe { simd_reduce_add_ordered(self, 0) }
+ +            }
+ +
+ +            /// Horizontal wrapping multiply.  Returns the product of the lanes of the vector, with wrapping multiplication.
+ +            #[inline]
+ +            pub fn horizontal_product(self) -> $scalar {
++                // Safety: `self` is an integer vector
+ +                unsafe { simd_reduce_mul_ordered(self, 1) }
+ +            }
+ +
+ +            /// Horizontal maximum.  Returns the maximum lane in the vector.
+ +            #[inline]
+ +            pub fn horizontal_max(self) -> $scalar {
++                // Safety: `self` is an integer vector
+ +                unsafe { simd_reduce_max(self) }
+ +            }
+ +
+ +            /// Horizontal minimum.  Returns the minimum lane in the vector.
+ +            #[inline]
+ +            pub fn horizontal_min(self) -> $scalar {
++                // Safety: `self` is an integer vector
+ +                unsafe { simd_reduce_min(self) }
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +impl_integer_reductions! { i8 }
+ +impl_integer_reductions! { i16 }
+ +impl_integer_reductions! { i32 }
+ +impl_integer_reductions! { i64 }
+ +impl_integer_reductions! { isize }
+ +impl_integer_reductions! { u8 }
+ +impl_integer_reductions! { u16 }
+ +impl_integer_reductions! { u32 }
+ +impl_integer_reductions! { u64 }
+ +impl_integer_reductions! { usize }
+ +
+ +macro_rules! impl_float_reductions {
+ +    { $scalar:ty } => {
+ +        impl<const LANES: usize> Simd<$scalar, LANES>
+ +        where
+ +            LaneCount<LANES>: SupportedLaneCount,
+ +        {
+ +
+ +            /// Horizontal add.  Returns the sum of the lanes of the vector.
+ +            #[inline]
+ +            pub fn horizontal_sum(self) -> $scalar {
+ +                // LLVM sum is inaccurate on i586
+ +                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
+ +                    self.as_array().iter().sum()
+ +                } else {
++                    // Safety: `self` is a float vector
+ +                    unsafe { simd_reduce_add_ordered(self, 0.) }
+ +                }
+ +            }
+ +
+ +            /// Horizontal multiply.  Returns the product of the lanes of the vector.
+ +            #[inline]
+ +            pub fn horizontal_product(self) -> $scalar {
+ +                // LLVM product is inaccurate on i586
+ +                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
+ +                    self.as_array().iter().product()
+ +                } else {
++                    // Safety: `self` is a float vector
+ +                    unsafe { simd_reduce_mul_ordered(self, 1.) }
+ +                }
+ +            }
+ +
+ +            /// Horizontal maximum.  Returns the maximum lane in the vector.
+ +            ///
+ +            /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
+ +            /// return either.  This function will not return `NaN` unless all lanes are `NaN`.
+ +            #[inline]
+ +            pub fn horizontal_max(self) -> $scalar {
++                // Safety: `self` is a float vector
+ +                unsafe { simd_reduce_max(self) }
+ +            }
+ +
+ +            /// Horizontal minimum.  Returns the minimum lane in the vector.
+ +            ///
+ +            /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
+ +            /// return either.  This function will not return `NaN` unless all lanes are `NaN`.
+ +            #[inline]
+ +            pub fn horizontal_min(self) -> $scalar {
++                // Safety: `self` is a float vector
+ +                unsafe { simd_reduce_min(self) }
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +impl_float_reductions! { f32 }
+ +impl_float_reductions! { f64 }
+ +
+ +impl<T, const LANES: usize> Simd<T, LANES>
+ +where
+ +    Self: BitAnd<Self, Output = Self>,
+ +    T: SimdElement + BitAnd<T, Output = T>,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    /// Horizontal bitwise "and".  Returns the cumulative bitwise "and" across the lanes of
+ +    /// the vector.
+ +    #[inline]
+ +    pub fn horizontal_and(self) -> T {
+ +        unsafe { simd_reduce_and(self) }
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> Simd<T, LANES>
+ +where
+ +    Self: BitOr<Self, Output = Self>,
+ +    T: SimdElement + BitOr<T, Output = T>,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    /// Horizontal bitwise "or".  Returns the cumulative bitwise "or" across the lanes of
+ +    /// the vector.
+ +    #[inline]
+ +    pub fn horizontal_or(self) -> T {
+ +        unsafe { simd_reduce_or(self) }
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> Simd<T, LANES>
+ +where
+ +    Self: BitXor<Self, Output = Self>,
+ +    T: SimdElement + BitXor<T, Output = T>,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    /// Horizontal bitwise "xor".  Returns the cumulative bitwise "xor" across the lanes of
+ +    /// the vector.
+ +    #[inline]
+ +    pub fn horizontal_xor(self) -> T {
+ +        unsafe { simd_reduce_xor(self) }
+ +    }
+ +}
diff --cc library/portable-simd/crates/core_simd/src/round.rs

index 06ccab3ec494c6b0799bc43f29416b143318deab,0000000000000000000000000000000000000000..556bc2cc1feee8a114253cdb99d464a5438b3bba

mode 100644,000000..100644
--- 1/library/portable-simd/crates/core_simd/src/round.rs
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/round.rs
@@@ -1,37 -1,0 +1,40 @@@
- use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+ +use crate::simd::intrinsics;
-         $type:ty, $int_type:ty
++use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
++use core::convert::FloatToInt;
+ +
+ +macro_rules! implement {
+ +    {
-             pub unsafe fn to_int_unchecked(self) -> Simd<$int_type, LANES> {
++        $type:ty
+ +    } => {
+ +        impl<const LANES: usize> Simd<$type, LANES>
+ +        where
+ +            LaneCount<LANES>: SupportedLaneCount,
+ +        {
+ +            /// Rounds toward zero and converts to the same-width integer type, assuming that
+ +            /// the value is finite and fits in that type.
+ +            ///
+ +            /// # Safety
+ +            /// The value must:
+ +            ///
+ +            /// * Not be NaN
+ +            /// * Not be infinite
+ +            /// * Be representable in the return type, after truncating off its fractional part
++            ///
++            /// If these requirements are infeasible or costly, consider using the safe function [cast],
++            /// which saturates on conversion.
++            ///
++            /// [cast]: Simd::cast
+ +            #[inline]
- 
-             /// Creates a floating-point vector from an integer vector.  Rounds values that are
-             /// not exactly representable.
-             #[inline]
-             pub fn round_from_int(value: Simd<$int_type, LANES>) -> Self {
-                 unsafe { intrinsics::simd_cast(value) }
-             }
++            pub unsafe fn to_int_unchecked<I>(self) -> Simd<I, LANES>
++            where
++                $type: FloatToInt<I>,
++                I: SimdElement,
++            {
+ +                unsafe { intrinsics::simd_cast(self) }
+ +            }
- implement! { f32, i32 }
- implement! { f64, i64 }
+ +        }
+ +    }
+ +}
+ +
++implement! { f32 }
++implement! { f64 }
diff --cc library/portable-simd/crates/core_simd/src/select.rs

index 8d521057fbd3ed7a9da2bc540af546f7d2cf9ee2,0000000000000000000000000000000000000000..3acf07260e12b058ec3bb1532c24d1d77cfac312

mode 100644,000000..100644
--- 1/library/portable-simd/crates/core_simd/src/select.rs
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/select.rs
@@@ -1,57 -1,0 +1,61 @@@
+ +use crate::simd::intrinsics;
+ +use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount};
+ +
+ +impl<T, const LANES: usize> Mask<T, LANES>
+ +where
+ +    T: MaskElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    /// Choose lanes from two vectors.
+ +    ///
+ +    /// For each lane in the mask, choose the corresponding lane from `true_values` if
+ +    /// that lane mask is true, and `false_values` if that lane mask is false.
+ +    ///
++    /// # Examples
+ +    /// ```
+ +    /// # #![feature(portable_simd)]
+ +    /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
+ +    /// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask};
+ +    /// let a = Simd::from_array([0, 1, 2, 3]);
+ +    /// let b = Simd::from_array([4, 5, 6, 7]);
+ +    /// let mask = Mask::from_array([true, false, false, true]);
+ +    /// let c = mask.select(a, b);
+ +    /// assert_eq!(c.to_array(), [0, 5, 6, 3]);
+ +    /// ```
+ +    #[inline]
+ +    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+ +    pub fn select<U>(
+ +        self,
+ +        true_values: Simd<U, LANES>,
+ +        false_values: Simd<U, LANES>,
+ +    ) -> Simd<U, LANES>
+ +    where
+ +        U: SimdElement<Mask = T>,
+ +    {
++        // Safety: The mask has been cast to a vector of integers,
++        // and the operands to select between are vectors of the same type and length.
+ +        unsafe { intrinsics::simd_select(self.to_int(), true_values, false_values) }
+ +    }
+ +
+ +    /// Choose lanes from two masks.
+ +    ///
+ +    /// For each lane in the mask, choose the corresponding lane from `true_values` if
+ +    /// that lane mask is true, and `false_values` if that lane mask is false.
+ +    ///
++    /// # Examples
+ +    /// ```
+ +    /// # #![feature(portable_simd)]
+ +    /// # #[cfg(feature = "std")] use core_simd::Mask;
+ +    /// # #[cfg(not(feature = "std"))] use core::simd::Mask;
+ +    /// let a = Mask::<i32, 4>::from_array([true, true, false, false]);
+ +    /// let b = Mask::<i32, 4>::from_array([false, false, true, true]);
+ +    /// let mask = Mask::<i32, 4>::from_array([true, false, false, true]);
+ +    /// let c = mask.select_mask(a, b);
+ +    /// assert_eq!(c.to_array(), [true, false, true, false]);
+ +    /// ```
+ +    #[inline]
+ +    #[must_use = "method returns a new mask and does not mutate the original inputs"]
+ +    pub fn select_mask(self, true_values: Self, false_values: Self) -> Self {
+ +        self & true_values | !self & false_values
+ +    }
+ +}
diff --cc library/portable-simd/crates/core_simd/src/swizzle.rs

index bdc489774a54a43e1293a31a754cfb9da72ecc3d,0000000000000000000000000000000000000000..08b2add11667a77e94444459bb37c06effa67e33

mode 100644,000000..100644
--- 1/library/portable-simd/crates/core_simd/src/swizzle.rs
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/swizzle.rs
@@@ -1,383 -1,0 +1,385 @@@
+ +use crate::simd::intrinsics;
+ +use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+ +
+ +/// Constructs a new vector by selecting values from the lanes of the source vector or vectors to use.
+ +///
+ +/// When swizzling one vector, the indices of the result vector are indicated by a `const` array
+ +/// of `usize`, like [`Swizzle`].
+ +/// When swizzling two vectors, the indices are indicated by a `const` array of [`Which`], like
+ +/// [`Swizzle2`].
+ +///
+ +/// # Examples
+ +/// ## One source vector
+ +/// ```
+ +/// # #![feature(portable_simd)]
+ +/// # #[cfg(feature = "std")] use core_simd::{Simd, simd_swizzle};
+ +/// # #[cfg(not(feature = "std"))] use core::simd::{Simd, simd_swizzle};
+ +/// let v = Simd::<f32, 4>::from_array([0., 1., 2., 3.]);
+ +///
+ +/// // Keeping the same size
+ +/// let r = simd_swizzle!(v, [3, 0, 1, 2]);
+ +/// assert_eq!(r.to_array(), [3., 0., 1., 2.]);
+ +///
+ +/// // Changing the number of lanes
+ +/// let r = simd_swizzle!(v, [3, 1]);
+ +/// assert_eq!(r.to_array(), [3., 1.]);
+ +/// ```
+ +///
+ +/// ## Two source vectors
+ +/// ```
+ +/// # #![feature(portable_simd)]
+ +/// # #[cfg(feature = "std")] use core_simd::{Simd, simd_swizzle, Which};
+ +/// # #[cfg(not(feature = "std"))] use core::simd::{Simd, simd_swizzle, Which};
+ +/// use Which::*;
+ +/// let a = Simd::<f32, 4>::from_array([0., 1., 2., 3.]);
+ +/// let b = Simd::<f32, 4>::from_array([4., 5., 6., 7.]);
+ +///
+ +/// // Keeping the same size
+ +/// let r = simd_swizzle!(a, b, [First(0), First(1), Second(2), Second(3)]);
+ +/// assert_eq!(r.to_array(), [0., 1., 6., 7.]);
+ +///
+ +/// // Changing the number of lanes
+ +/// let r = simd_swizzle!(a, b, [First(0), Second(0)]);
+ +/// assert_eq!(r.to_array(), [0., 4.]);
+ +/// ```
+ +#[allow(unused_macros)]
+ +pub macro simd_swizzle {
+ +    (
+ +        $vector:expr, $index:expr $(,)?
+ +    ) => {
+ +        {
+ +            use $crate::simd::Swizzle;
+ +            struct Impl;
+ +            impl<const LANES: usize> Swizzle<LANES, {$index.len()}> for Impl {
+ +                const INDEX: [usize; {$index.len()}] = $index;
+ +            }
+ +            Impl::swizzle($vector)
+ +        }
+ +    },
+ +    (
+ +        $first:expr, $second:expr, $index:expr $(,)?
+ +    ) => {
+ +        {
+ +            use $crate::simd::{Which, Swizzle2};
+ +            struct Impl;
+ +            impl<const LANES: usize> Swizzle2<LANES, {$index.len()}> for Impl {
+ +                const INDEX: [Which; {$index.len()}] = $index;
+ +            }
+ +            Impl::swizzle2($first, $second)
+ +        }
+ +    }
+ +}
+ +
+ +/// An index into one of two vectors.
+ +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+ +pub enum Which {
+ +    /// Indexes the first vector.
+ +    First(usize),
+ +    /// Indexes the second vector.
+ +    Second(usize),
+ +}
+ +
+ +/// Create a vector from the elements of another vector.
+ +pub trait Swizzle<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
+ +    /// Map from the lanes of the input vector to the output vector.
+ +    const INDEX: [usize; OUTPUT_LANES];
+ +
+ +    /// Create a new vector from the lanes of `vector`.
+ +    ///
+ +    /// Lane `i` of the output is `vector[Self::INDEX[i]]`.
+ +    #[inline]
+ +    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+ +    fn swizzle<T>(vector: Simd<T, INPUT_LANES>) -> Simd<T, OUTPUT_LANES>
+ +    where
+ +        T: SimdElement,
+ +        LaneCount<INPUT_LANES>: SupportedLaneCount,
+ +        LaneCount<OUTPUT_LANES>: SupportedLaneCount,
+ +    {
++        // Safety: `vector` is a vector, and `INDEX_IMPL` is a const array of u32.
+ +        unsafe { intrinsics::simd_shuffle(vector, vector, Self::INDEX_IMPL) }
+ +    }
+ +}
+ +
+ +/// Create a vector from the elements of two other vectors.
+ +pub trait Swizzle2<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
+ +    /// Map from the lanes of the input vectors to the output vector
+ +    const INDEX: [Which; OUTPUT_LANES];
+ +
+ +    /// Create a new vector from the lanes of `first` and `second`.
+ +    ///
+ +    /// Lane `i` is `first[j]` when `Self::INDEX[i]` is `First(j)`, or `second[j]` when it is
+ +    /// `Second(j)`.
+ +    #[inline]
+ +    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+ +    fn swizzle2<T>(
+ +        first: Simd<T, INPUT_LANES>,
+ +        second: Simd<T, INPUT_LANES>,
+ +    ) -> Simd<T, OUTPUT_LANES>
+ +    where
+ +        T: SimdElement,
+ +        LaneCount<INPUT_LANES>: SupportedLaneCount,
+ +        LaneCount<OUTPUT_LANES>: SupportedLaneCount,
+ +    {
++        // Safety: `first` and `second` are vectors, and `INDEX_IMPL` is a const array of u32.
+ +        unsafe { intrinsics::simd_shuffle(first, second, Self::INDEX_IMPL) }
+ +    }
+ +}
+ +
+ +/// The `simd_shuffle` intrinsic expects `u32`, so do error checking and conversion here.
+ +/// This trait hides `INDEX_IMPL` from the public API.
+ +trait SwizzleImpl<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
+ +    const INDEX_IMPL: [u32; OUTPUT_LANES];
+ +}
+ +
+ +impl<T, const INPUT_LANES: usize, const OUTPUT_LANES: usize> SwizzleImpl<INPUT_LANES, OUTPUT_LANES>
+ +    for T
+ +where
+ +    T: Swizzle<INPUT_LANES, OUTPUT_LANES> + ?Sized,
+ +{
+ +    const INDEX_IMPL: [u32; OUTPUT_LANES] = {
+ +        let mut output = [0; OUTPUT_LANES];
+ +        let mut i = 0;
+ +        while i < OUTPUT_LANES {
+ +            let index = Self::INDEX[i];
+ +            assert!(index as u32 as usize == index);
+ +            assert!(index < INPUT_LANES, "source lane exceeds input lane count",);
+ +            output[i] = index as u32;
+ +            i += 1;
+ +        }
+ +        output
+ +    };
+ +}
+ +
+ +/// The `simd_shuffle` intrinsic expects `u32`, so do error checking and conversion here.
+ +/// This trait hides `INDEX_IMPL` from the public API.
+ +trait Swizzle2Impl<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
+ +    const INDEX_IMPL: [u32; OUTPUT_LANES];
+ +}
+ +
+ +impl<T, const INPUT_LANES: usize, const OUTPUT_LANES: usize> Swizzle2Impl<INPUT_LANES, OUTPUT_LANES>
+ +    for T
+ +where
+ +    T: Swizzle2<INPUT_LANES, OUTPUT_LANES> + ?Sized,
+ +{
+ +    const INDEX_IMPL: [u32; OUTPUT_LANES] = {
+ +        let mut output = [0; OUTPUT_LANES];
+ +        let mut i = 0;
+ +        while i < OUTPUT_LANES {
+ +            let (offset, index) = match Self::INDEX[i] {
+ +                Which::First(index) => (false, index),
+ +                Which::Second(index) => (true, index),
+ +            };
+ +            assert!(index < INPUT_LANES, "source lane exceeds input lane count",);
+ +
+ +            // lanes are indexed by the first vector, then second vector
+ +            let index = if offset { index + INPUT_LANES } else { index };
+ +            assert!(index as u32 as usize == index);
+ +            output[i] = index as u32;
+ +            i += 1;
+ +        }
+ +        output
+ +    };
+ +}
+ +
+ +impl<T, const LANES: usize> Simd<T, LANES>
+ +where
+ +    T: SimdElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    /// Reverse the order of the lanes in the vector.
+ +    #[inline]
+ +    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+ +    pub fn reverse(self) -> Self {
+ +        const fn reverse_index<const LANES: usize>() -> [usize; LANES] {
+ +            let mut index = [0; LANES];
+ +            let mut i = 0;
+ +            while i < LANES {
+ +                index[i] = LANES - i - 1;
+ +                i += 1;
+ +            }
+ +            index
+ +        }
+ +
+ +        struct Reverse;
+ +
+ +        impl<const LANES: usize> Swizzle<LANES, LANES> for Reverse {
+ +            const INDEX: [usize; LANES] = reverse_index::<LANES>();
+ +        }
+ +
+ +        Reverse::swizzle(self)
+ +    }
+ +
+ +    /// Rotates the vector such that the first `OFFSET` elements of the slice move to the end
+ +    /// while the last `LANES - OFFSET` elements move to the front. After calling `rotate_lanes_left`,
+ +    /// the element previously in lane `OFFSET` will become the first element in the slice.
+ +    #[inline]
+ +    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+ +    pub fn rotate_lanes_left<const OFFSET: usize>(self) -> Self {
+ +        const fn rotate_index<const OFFSET: usize, const LANES: usize>() -> [usize; LANES] {
+ +            let offset = OFFSET % LANES;
+ +            let mut index = [0; LANES];
+ +            let mut i = 0;
+ +            while i < LANES {
+ +                index[i] = (i + offset) % LANES;
+ +                i += 1;
+ +            }
+ +            index
+ +        }
+ +
+ +        struct Rotate<const OFFSET: usize>;
+ +
+ +        impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<OFFSET> {
+ +            const INDEX: [usize; LANES] = rotate_index::<OFFSET, LANES>();
+ +        }
+ +
+ +        Rotate::<OFFSET>::swizzle(self)
+ +    }
+ +
+ +    /// Rotates the vector such that the first `LANES - OFFSET` elements of the vector move to
+ +    /// the end while the last `OFFSET` elements move to the front. After calling `rotate_lanes_right`,
+ +    /// the element previously at index `LANES - OFFSET` will become the first element in the slice.
+ +    #[inline]
+ +    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+ +    pub fn rotate_lanes_right<const OFFSET: usize>(self) -> Self {
+ +        const fn rotate_index<const OFFSET: usize, const LANES: usize>() -> [usize; LANES] {
+ +            let offset = LANES - OFFSET % LANES;
+ +            let mut index = [0; LANES];
+ +            let mut i = 0;
+ +            while i < LANES {
+ +                index[i] = (i + offset) % LANES;
+ +                i += 1;
+ +            }
+ +            index
+ +        }
+ +
+ +        struct Rotate<const OFFSET: usize>;
+ +
+ +        impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<OFFSET> {
+ +            const INDEX: [usize; LANES] = rotate_index::<OFFSET, LANES>();
+ +        }
+ +
+ +        Rotate::<OFFSET>::swizzle(self)
+ +    }
+ +
+ +    /// Interleave two vectors.
+ +    ///
+ +    /// Produces two vectors with lanes taken alternately from `self` and `other`.
+ +    ///
+ +    /// The first result contains the first `LANES / 2` lanes from `self` and `other`,
+ +    /// alternating, starting with the first lane of `self`.
+ +    ///
+ +    /// The second result contains the last `LANES / 2` lanes from `self` and `other`,
+ +    /// alternating, starting with the lane `LANES / 2` from the start of `self`.
+ +    ///
+ +    /// ```
+ +    /// #![feature(portable_simd)]
+ +    /// # #[cfg(feature = "std")] use core_simd::Simd;
+ +    /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ +    /// let a = Simd::from_array([0, 1, 2, 3]);
+ +    /// let b = Simd::from_array([4, 5, 6, 7]);
+ +    /// let (x, y) = a.interleave(b);
+ +    /// assert_eq!(x.to_array(), [0, 4, 1, 5]);
+ +    /// assert_eq!(y.to_array(), [2, 6, 3, 7]);
+ +    /// ```
+ +    #[inline]
+ +    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+ +    pub fn interleave(self, other: Self) -> (Self, Self) {
+ +        const fn lo<const LANES: usize>() -> [Which; LANES] {
+ +            let mut idx = [Which::First(0); LANES];
+ +            let mut i = 0;
+ +            while i < LANES {
+ +                let offset = i / 2;
+ +                idx[i] = if i % 2 == 0 {
+ +                    Which::First(offset)
+ +                } else {
+ +                    Which::Second(offset)
+ +                };
+ +                i += 1;
+ +            }
+ +            idx
+ +        }
+ +        const fn hi<const LANES: usize>() -> [Which; LANES] {
+ +            let mut idx = [Which::First(0); LANES];
+ +            let mut i = 0;
+ +            while i < LANES {
+ +                let offset = (LANES + i) / 2;
+ +                idx[i] = if i % 2 == 0 {
+ +                    Which::First(offset)
+ +                } else {
+ +                    Which::Second(offset)
+ +                };
+ +                i += 1;
+ +            }
+ +            idx
+ +        }
+ +
+ +        struct Lo;
+ +        struct Hi;
+ +
+ +        impl<const LANES: usize> Swizzle2<LANES, LANES> for Lo {
+ +            const INDEX: [Which; LANES] = lo::<LANES>();
+ +        }
+ +
+ +        impl<const LANES: usize> Swizzle2<LANES, LANES> for Hi {
+ +            const INDEX: [Which; LANES] = hi::<LANES>();
+ +        }
+ +
+ +        (Lo::swizzle2(self, other), Hi::swizzle2(self, other))
+ +    }
+ +
+ +    /// Deinterleave two vectors.
+ +    ///
+ +    /// The first result takes every other lane of `self` and then `other`, starting with
+ +    /// the first lane.
+ +    ///
+ +    /// The second result takes every other lane of `self` and then `other`, starting with
+ +    /// the second lane.
+ +    ///
+ +    /// ```
+ +    /// #![feature(portable_simd)]
+ +    /// # #[cfg(feature = "std")] use core_simd::Simd;
+ +    /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ +    /// let a = Simd::from_array([0, 4, 1, 5]);
+ +    /// let b = Simd::from_array([2, 6, 3, 7]);
+ +    /// let (x, y) = a.deinterleave(b);
+ +    /// assert_eq!(x.to_array(), [0, 1, 2, 3]);
+ +    /// assert_eq!(y.to_array(), [4, 5, 6, 7]);
+ +    /// ```
+ +    #[inline]
+ +    #[must_use = "method returns a new vector and does not mutate the original inputs"]
+ +    pub fn deinterleave(self, other: Self) -> (Self, Self) {
+ +        const fn even<const LANES: usize>() -> [Which; LANES] {
+ +            let mut idx = [Which::First(0); LANES];
+ +            let mut i = 0;
+ +            while i < LANES / 2 {
+ +                idx[i] = Which::First(2 * i);
+ +                idx[i + LANES / 2] = Which::Second(2 * i);
+ +                i += 1;
+ +            }
+ +            idx
+ +        }
+ +        const fn odd<const LANES: usize>() -> [Which; LANES] {
+ +            let mut idx = [Which::First(0); LANES];
+ +            let mut i = 0;
+ +            while i < LANES / 2 {
+ +                idx[i] = Which::First(2 * i + 1);
+ +                idx[i + LANES / 2] = Which::Second(2 * i + 1);
+ +                i += 1;
+ +            }
+ +            idx
+ +        }
+ +
+ +        struct Even;
+ +        struct Odd;
+ +
+ +        impl<const LANES: usize> Swizzle2<LANES, LANES> for Even {
+ +            const INDEX: [Which; LANES] = even::<LANES>();
+ +        }
+ +
+ +        impl<const LANES: usize> Swizzle2<LANES, LANES> for Odd {
+ +            const INDEX: [Which; LANES] = odd::<LANES>();
+ +        }
+ +
+ +        (Even::swizzle2(self, other), Odd::swizzle2(self, other))
+ +    }
+ +}
diff --cc library/portable-simd/crates/core_simd/src/to_bytes.rs

index 8d9b3e8ff85ea9f458e05d4f9b7732f327c9300c,0000000000000000000000000000000000000000..b36b1a347b226866431060589d157a5d6bfee197

mode 100644,000000..100644
--- 1/library/portable-simd/crates/core_simd/src/to_bytes.rs
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/to_bytes.rs
@@@ -1,39 -1,0 +1,41 @@@
+ +macro_rules! impl_to_bytes {
+ +    { $ty:ty, $size:literal } => {
+ +        impl<const LANES: usize> crate::simd::Simd<$ty, LANES>
+ +        where
+ +            crate::simd::LaneCount<LANES>: crate::simd::SupportedLaneCount,
+ +            crate::simd::LaneCount<{{ $size * LANES }}>: crate::simd::SupportedLaneCount,
+ +        {
+ +            /// Return the memory representation of this integer as a byte array in native byte
+ +            /// order.
+ +            pub fn to_ne_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
++                // Safety: transmuting between vectors is safe
+ +                unsafe { core::mem::transmute_copy(&self) }
+ +            }
+ +
+ +            /// Create a native endian integer value from its memory representation as a byte array
+ +            /// in native endianness.
+ +            pub fn from_ne_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self {
++                // Safety: transmuting between vectors is safe
+ +                unsafe { core::mem::transmute_copy(&bytes) }
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +impl_to_bytes! { u8, 1 }
+ +impl_to_bytes! { u16, 2 }
+ +impl_to_bytes! { u32, 4 }
+ +impl_to_bytes! { u64, 8 }
+ +#[cfg(target_pointer_width = "32")]
+ +impl_to_bytes! { usize, 4 }
+ +#[cfg(target_pointer_width = "64")]
+ +impl_to_bytes! { usize, 8 }
+ +
+ +impl_to_bytes! { i8, 1 }
+ +impl_to_bytes! { i16, 2 }
+ +impl_to_bytes! { i32, 4 }
+ +impl_to_bytes! { i64, 8 }
+ +#[cfg(target_pointer_width = "32")]
+ +impl_to_bytes! { isize, 4 }
+ +#[cfg(target_pointer_width = "64")]
+ +impl_to_bytes! { isize, 8 }
diff --cc library/portable-simd/crates/core_simd/src/vector.rs

index b7ef7a56c7319b6404d1464149a7c68e28d3ba83,0000000000000000000000000000000000000000..ff1b2c756ad4025bb208502eb5f0db6bb2f5c0b0

mode 100644,000000..100644
--- 1/library/portable-simd/crates/core_simd/src/vector.rs
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/vector.rs
@@@ -1,558 -1,0 +1,633 @@@
- /// A SIMD vector of `LANES` elements of type `T`.
+ +mod float;
+ +mod int;
+ +mod uint;
+ +
+ +pub use float::*;
+ +pub use int::*;
+ +pub use uint::*;
+ +
+ +// Vectors of pointers are not for public use at the current time.
+ +pub(crate) mod ptr;
+ +
+ +use crate::simd::intrinsics;
+ +use crate::simd::{LaneCount, Mask, MaskElement, SupportedLaneCount};
+ +
-         // SAFETY: We have masked-off out-of-bounds lanes.
++/// A SIMD vector of `LANES` elements of type `T`. `Simd<T, N>` has the same shape as [`[T; N]`](array), but operates like `T`.
++///
++/// Two vectors of the same type and length will, by convention, support the operators (+, *, etc.) that `T` does.
++/// These take the lanes at each index on the left-hand side and right-hand side, perform the operation,
++/// and return the result in the same lane in a vector of equal size. For a given operator, this is equivalent to zipping
++/// the two arrays together and mapping the operator over each lane.
++///
++/// ```rust
++/// # #![feature(array_zip, portable_simd)]
++/// # use core::simd::{Simd};
++/// let a0: [i32; 4] = [-2, 0, 2, 4];
++/// let a1 = [10, 9, 8, 7];
++/// let zm_add = a0.zip(a1).map(|(lhs, rhs)| lhs + rhs);
++/// let zm_mul = a0.zip(a1).map(|(lhs, rhs)| lhs * rhs);
++///
++/// // `Simd<T, N>` implements `From<[T; N]>
++/// let (v0, v1) = (Simd::from(a0), Simd::from(a1));
++/// // Which means arrays implement `Into<Simd<T, N>>`.
++/// assert_eq!(v0 + v1, zm_add.into());
++/// assert_eq!(v0 * v1, zm_mul.into());
++/// ```
++///
++/// `Simd` with integers has the quirk that these operations are also inherently wrapping, as if `T` was [`Wrapping<T>`].
++/// Thus, `Simd` does not implement `wrapping_add`, because that is the default behavior.
++/// This means there is no warning on overflows, even in "debug" builds.
++/// For most applications where `Simd` is appropriate, it is "not a bug" to wrap,
++/// and even "debug builds" are unlikely to tolerate the loss of performance.
++/// You may want to consider using explicitly checked arithmetic if such is required.
++/// Division by zero still causes a panic, so you may want to consider using floating point numbers if that is unacceptable.
++///
++/// [`Wrapping<T>`]: core::num::Wrapping
++///
++/// # Layout
++/// `Simd<T, N>` has a layout similar to `[T; N]` (identical "shapes"), but with a greater alignment.
++/// `[T; N]` is aligned to `T`, but `Simd<T, N>` will have an alignment based on both `T` and `N`.
++/// It is thus sound to [`transmute`] `Simd<T, N>` to `[T; N]`, and will typically optimize to zero cost,
++/// but the reverse transmutation is more likely to require a copy the compiler cannot simply elide.
++///
++/// # ABI "Features"
++/// Due to Rust's safety guarantees, `Simd<T, N>` is currently passed to and from functions via memory, not SIMD registers,
++/// except as an optimization. `#[inline]` hints are recommended on functions that accept `Simd<T, N>` or return it.
++/// The need for this may be corrected in the future.
++///
++/// # Safe SIMD with Unsafe Rust
++///
++/// Operations with `Simd` are typically safe, but there are many reasons to want to combine SIMD with `unsafe` code.
++/// Care must be taken to respect differences between `Simd` and other types it may be transformed into or derived from.
++/// In particular, the layout of `Simd<T, N>` may be similar to `[T; N]`, and may allow some transmutations,
++/// but references to `[T; N]` are not interchangeable with those to `Simd<T, N>`.
++/// Thus, when using `unsafe` Rust to read and write `Simd<T, N>` through [raw pointers], it is a good idea to first try with
++/// [`read_unaligned`] and [`write_unaligned`]. This is because:
++/// - [`read`] and [`write`] require full alignment (in this case, `Simd<T, N>`'s alignment)
++/// - the likely source for reading or destination for writing `Simd<T, N>` is [`[T]`](slice) and similar types, aligned to `T`
++/// - combining these actions would violate the `unsafe` contract and explode the program into a puff of **undefined behavior**
++/// - the compiler can implicitly adjust layouts to make unaligned reads or writes fully aligned if it sees the optimization
++/// - most contemporary processors suffer no performance penalty for "unaligned" reads and writes that are aligned at runtime
++///
++/// By imposing less obligations, unaligned functions are less likely to make the program unsound,
++/// and may be just as fast as stricter alternatives.
++/// When trying to guarantee alignment, [`[T]::as_simd`][as_simd] is an option for converting `[T]` to `[Simd<T, N>]`,
++/// and allows soundly operating on an aligned SIMD body, but it may cost more time when handling the scalar head and tail.
++/// If these are not sufficient, then it is most ideal to design data structures to be already aligned
++/// to the `Simd<T, N>` you wish to use before using `unsafe` Rust to read or write.
++/// More conventional ways to compensate for these facts, like materializing `Simd` to or from an array first,
++/// are handled by safe methods like [`Simd::from_array`] and [`Simd::from_slice`].
++///
++/// [`transmute`]: core::mem::transmute
++/// [raw pointers]: pointer
++/// [`read_unaligned`]: pointer::read_unaligned
++/// [`write_unaligned`]: pointer::write_unaligned
++/// [`read`]: pointer::read
++/// [`write`]: pointer::write
++/// [as_simd]: slice::as_simd
+ +#[repr(simd)]
+ +pub struct Simd<T, const LANES: usize>([T; LANES])
+ +where
+ +    T: SimdElement,
+ +    LaneCount<LANES>: SupportedLaneCount;
+ +
+ +impl<T, const LANES: usize> Simd<T, LANES>
+ +where
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +    T: SimdElement,
+ +{
+ +    /// Number of lanes in this vector.
+ +    pub const LANES: usize = LANES;
+ +
+ +    /// Get the number of lanes in this vector.
+ +    pub const fn lanes(&self) -> usize {
+ +        LANES
+ +    }
+ +
+ +    /// Construct a SIMD vector by setting all lanes to the given value.
+ +    pub const fn splat(value: T) -> Self {
+ +        Self([value; LANES])
+ +    }
+ +
+ +    /// Returns an array reference containing the entire SIMD vector.
+ +    pub const fn as_array(&self) -> &[T; LANES] {
+ +        &self.0
+ +    }
+ +
+ +    /// Returns a mutable array reference containing the entire SIMD vector.
+ +    pub fn as_mut_array(&mut self) -> &mut [T; LANES] {
+ +        &mut self.0
+ +    }
+ +
+ +    /// Converts an array to a SIMD vector.
+ +    pub const fn from_array(array: [T; LANES]) -> Self {
+ +        Self(array)
+ +    }
+ +
+ +    /// Converts a SIMD vector to an array.
+ +    pub const fn to_array(self) -> [T; LANES] {
+ +        self.0
+ +    }
+ +
+ +    /// Converts a slice to a SIMD vector containing `slice[..LANES]`
+ +    /// # Panics
+ +    /// `from_slice` will panic if the slice's `len` is less than the vector's `Simd::LANES`.
+ +    #[must_use]
+ +    pub const fn from_slice(slice: &[T]) -> Self {
+ +        assert!(
+ +            slice.len() >= LANES,
+ +            "slice length must be at least the number of lanes"
+ +        );
+ +        let mut array = [slice[0]; LANES];
+ +        let mut i = 0;
+ +        while i < LANES {
+ +            array[i] = slice[i];
+ +            i += 1;
+ +        }
+ +        Self(array)
+ +    }
+ +
+ +    /// Performs lanewise conversion of a SIMD vector's elements to another SIMD-valid type.
+ +    /// This follows the semantics of Rust's `as` conversion for casting
+ +    /// integers to unsigned integers (interpreting as the other type, so `-1` to `MAX`),
+ +    /// and from floats to integers (truncating, or saturating at the limits) for each lane,
+ +    /// or vice versa.
+ +    ///
+ +    /// # Examples
+ +    /// ```
+ +    /// # #![feature(portable_simd)]
+ +    /// # #[cfg(feature = "std")] use core_simd::Simd;
+ +    /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ +    /// let floats: Simd<f32, 4> = Simd::from_array([1.9, -4.5, f32::INFINITY, f32::NAN]);
+ +    /// let ints = floats.cast::<i32>();
+ +    /// assert_eq!(ints, Simd::from_array([1, -4, i32::MAX, 0]));
+ +    ///
+ +    /// // Formally equivalent, but `Simd::cast` can optimize better.
+ +    /// assert_eq!(ints, Simd::from_array(floats.to_array().map(|x| x as i32)));
+ +    ///
+ +    /// // The float conversion does not round-trip.
+ +    /// let floats_again = ints.cast();
+ +    /// assert_ne!(floats, floats_again);
+ +    /// assert_eq!(floats_again, Simd::from_array([1.0, -4.0, 2147483647.0, 0.0]));
+ +    /// ```
+ +    #[must_use]
+ +    #[inline]
+ +    #[cfg(not(bootstrap))]
+ +    pub fn cast<U: SimdElement>(self) -> Simd<U, LANES> {
++        // Safety: The input argument is a vector of a known SIMD type.
+ +        unsafe { intrinsics::simd_as(self) }
+ +    }
+ +
+ +    /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
+ +    /// If an index is out-of-bounds, the lane is instead selected from the `or` vector.
+ +    ///
+ +    /// # Examples
+ +    /// ```
+ +    /// # #![feature(portable_simd)]
+ +    /// # #[cfg(feature = "std")] use core_simd::Simd;
+ +    /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ +    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+ +    /// let idxs = Simd::from_array([9, 3, 0, 5]);
+ +    /// let alt = Simd::from_array([-5, -4, -3, -2]);
+ +    ///
+ +    /// let result = Simd::gather_or(&vec, idxs, alt); // Note the lane that is out-of-bounds.
+ +    /// assert_eq!(result, Simd::from_array([-5, 13, 10, 15]));
+ +    /// ```
+ +    #[must_use]
+ +    #[inline]
+ +    pub fn gather_or(slice: &[T], idxs: Simd<usize, LANES>, or: Self) -> Self {
+ +        Self::gather_select(slice, Mask::splat(true), idxs, or)
+ +    }
+ +
+ +    /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
+ +    /// If an index is out-of-bounds, the lane is set to the default value for the type.
+ +    ///
+ +    /// # Examples
+ +    /// ```
+ +    /// # #![feature(portable_simd)]
+ +    /// # #[cfg(feature = "std")] use core_simd::Simd;
+ +    /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ +    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+ +    /// let idxs = Simd::from_array([9, 3, 0, 5]);
+ +    ///
+ +    /// let result = Simd::gather_or_default(&vec, idxs); // Note the lane that is out-of-bounds.
+ +    /// assert_eq!(result, Simd::from_array([0, 13, 10, 15]));
+ +    /// ```
+ +    #[must_use]
+ +    #[inline]
+ +    pub fn gather_or_default(slice: &[T], idxs: Simd<usize, LANES>) -> Self
+ +    where
+ +        T: Default,
+ +    {
+ +        Self::gather_or(slice, idxs, Self::splat(T::default()))
+ +    }
+ +
+ +    /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
+ +    /// The mask `enable`s all `true` lanes and disables all `false` lanes.
+ +    /// If an index is disabled or is out-of-bounds, the lane is selected from the `or` vector.
+ +    ///
+ +    /// # Examples
+ +    /// ```
+ +    /// # #![feature(portable_simd)]
+ +    /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
+ +    /// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask};
+ +    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+ +    /// let idxs = Simd::from_array([9, 3, 0, 5]);
+ +    /// let alt = Simd::from_array([-5, -4, -3, -2]);
+ +    /// let enable = Mask::from_array([true, true, true, false]); // Note the mask of the last lane.
+ +    ///
+ +    /// let result = Simd::gather_select(&vec, enable, idxs, alt); // Note the lane that is out-of-bounds.
+ +    /// assert_eq!(result, Simd::from_array([-5, 13, 10, -2]));
+ +    /// ```
+ +    #[must_use]
+ +    #[inline]
+ +    pub fn gather_select(
+ +        slice: &[T],
+ +        enable: Mask<isize, LANES>,
+ +        idxs: Simd<usize, LANES>,
+ +        or: Self,
+ +    ) -> Self {
+ +        let enable: Mask<isize, LANES> = enable & idxs.lanes_lt(Simd::splat(slice.len()));
-         // SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
++        // Safety: We have masked-off out-of-bounds lanes.
+ +        unsafe { Self::gather_select_unchecked(slice, enable, idxs, or) }
+ +    }
+ +
+ +    /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
+ +    /// The mask `enable`s all `true` lanes and disables all `false` lanes.
+ +    /// If an index is disabled, the lane is selected from the `or` vector.
+ +    ///
+ +    /// # Safety
+ +    ///
+ +    /// Calling this function with an `enable`d out-of-bounds index is *[undefined behavior]*
+ +    /// even if the resulting value is not used.
+ +    ///
+ +    /// # Examples
+ +    /// ```
+ +    /// # #![feature(portable_simd)]
+ +    /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
+ +    /// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask};
+ +    /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+ +    /// let idxs = Simd::from_array([9, 3, 0, 5]);
+ +    /// let alt = Simd::from_array([-5, -4, -3, -2]);
+ +    /// let enable = Mask::from_array([true, true, true, false]); // Note the final mask lane.
+ +    /// // If this mask was used to gather, it would be unsound. Let's fix that.
+ +    /// let enable = enable & idxs.lanes_lt(Simd::splat(vec.len()));
+ +    ///
+ +    /// // We have masked the OOB lane, so it's safe to gather now.
+ +    /// let result = unsafe { Simd::gather_select_unchecked(&vec, enable, idxs, alt) };
+ +    /// assert_eq!(result, Simd::from_array([-5, 13, 10, -2]));
+ +    /// ```
+ +    /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
+ +    #[must_use]
+ +    #[inline]
+ +    pub unsafe fn gather_select_unchecked(
+ +        slice: &[T],
+ +        enable: Mask<isize, LANES>,
+ +        idxs: Simd<usize, LANES>,
+ +        or: Self,
+ +    ) -> Self {
+ +        let base_ptr = crate::simd::ptr::SimdConstPtr::splat(slice.as_ptr());
+ +        // Ferris forgive me, I have done pointer arithmetic here.
+ +        let ptrs = base_ptr.wrapping_add(idxs);
-         // SAFETY: We have masked-off out-of-bounds lanes.
++        // Safety: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
+ +        unsafe { intrinsics::simd_gather(or, ptrs, enable.to_int()) }
+ +    }
+ +
+ +    /// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`.
+ +    /// If two lanes in the scattered vector would write to the same index
+ +    /// only the last lane is guaranteed to actually be written.
+ +    ///
+ +    /// # Examples
+ +    /// ```
+ +    /// # #![feature(portable_simd)]
+ +    /// # #[cfg(feature = "std")] use core_simd::Simd;
+ +    /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ +    /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+ +    /// let idxs = Simd::from_array([9, 3, 0, 0]);
+ +    /// let vals = Simd::from_array([-27, 82, -41, 124]);
+ +    ///
+ +    /// vals.scatter(&mut vec, idxs); // index 0 receives two writes.
+ +    /// assert_eq!(vec, vec![124, 11, 12, 82, 14, 15, 16, 17, 18]);
+ +    /// ```
+ +    #[inline]
+ +    pub fn scatter(self, slice: &mut [T], idxs: Simd<usize, LANES>) {
+ +        self.scatter_select(slice, Mask::splat(true), idxs)
+ +    }
+ +
+ +    /// Writes the values in a SIMD vector to multiple potentially discontiguous indices in `slice`.
+ +    /// The mask `enable`s all `true` lanes and disables all `false` lanes.
+ +    /// If an enabled index is out-of-bounds, the lane is not written.
+ +    /// If two enabled lanes in the scattered vector would write to the same index,
+ +    /// only the last lane is guaranteed to actually be written.
+ +    ///
+ +    /// # Examples
+ +    /// ```
+ +    /// # #![feature(portable_simd)]
+ +    /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
+ +    /// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask};
+ +    /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+ +    /// let idxs = Simd::from_array([9, 3, 0, 0]);
+ +    /// let vals = Simd::from_array([-27, 82, -41, 124]);
+ +    /// let enable = Mask::from_array([true, true, true, false]); // Note the mask of the last lane.
+ +    ///
+ +    /// vals.scatter_select(&mut vec, enable, idxs); // index 0's second write is masked, thus omitted.
+ +    /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]);
+ +    /// ```
+ +    #[inline]
+ +    pub fn scatter_select(
+ +        self,
+ +        slice: &mut [T],
+ +        enable: Mask<isize, LANES>,
+ +        idxs: Simd<usize, LANES>,
+ +    ) {
+ +        let enable: Mask<isize, LANES> = enable & idxs.lanes_lt(Simd::splat(slice.len()));
-         // SAFETY: This block works with *mut T derived from &mut 'a [T],
++        // Safety: We have masked-off out-of-bounds lanes.
+ +        unsafe { self.scatter_select_unchecked(slice, enable, idxs) }
+ +    }
+ +
+ +    /// Writes the values in a SIMD vector to multiple potentially discontiguous indices in `slice`.
+ +    /// The mask `enable`s all `true` lanes and disables all `false` lanes.
+ +    /// If two enabled lanes in the scattered vector would write to the same index,
+ +    /// only the last lane is guaranteed to actually be written.
+ +    ///
+ +    /// # Safety
+ +    ///
+ +    /// Calling this function with an enabled out-of-bounds index is *[undefined behavior]*,
+ +    /// and may lead to memory corruption.
+ +    ///
+ +    /// # Examples
+ +    /// ```
+ +    /// # #![feature(portable_simd)]
+ +    /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
+ +    /// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask};
+ +    /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+ +    /// let idxs = Simd::from_array([9, 3, 0, 0]);
+ +    /// let vals = Simd::from_array([-27, 82, -41, 124]);
+ +    /// let enable = Mask::from_array([true, true, true, false]); // Note the mask of the last lane.
+ +    /// // If this mask was used to scatter, it would be unsound. Let's fix that.
+ +    /// let enable = enable & idxs.lanes_lt(Simd::splat(vec.len()));
+ +    ///
+ +    /// // We have masked the OOB lane, so it's safe to scatter now.
+ +    /// unsafe { vals.scatter_select_unchecked(&mut vec, enable, idxs); }
+ +    /// // index 0's second write is masked, thus was omitted.
+ +    /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]);
+ +    /// ```
+ +    /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
+ +    #[inline]
+ +    pub unsafe fn scatter_select_unchecked(
+ +        self,
+ +        slice: &mut [T],
+ +        enable: Mask<isize, LANES>,
+ +        idxs: Simd<usize, LANES>,
+ +    ) {
- /// SAFETY: This trait, when implemented, asserts the compiler can monomorphize
++        // Safety: This block works with *mut T derived from &mut 'a [T],
+ +        // which means it is delicate in Rust's borrowing model, circa 2021:
+ +        // &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts!
+ +        // Even though this block is largely safe methods, it must be exactly this way
+ +        // to prevent invalidating the raw ptrs while they're live.
+ +        // Thus, entering this block requires all values to use being already ready:
+ +        // 0. idxs we want to write to, which are used to construct the mask.
+ +        // 1. enable, which depends on an initial &'a [T] and the idxs.
+ +        // 2. actual values to scatter (self).
+ +        // 3. &mut [T] which will become our base ptr.
+ +        unsafe {
+ +            // Now Entering ☢️ *mut T Zone
+ +            let base_ptr = crate::simd::ptr::SimdMutPtr::splat(slice.as_mut_ptr());
+ +            // Ferris forgive me, I have done pointer arithmetic here.
+ +            let ptrs = base_ptr.wrapping_add(idxs);
+ +            // The ptrs have been bounds-masked to prevent memory-unsafe writes insha'allah
+ +            intrinsics::simd_scatter(self, ptrs, enable.to_int())
+ +            // Cleared ☢️ *mut T Zone
+ +        }
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> Copy for Simd<T, LANES>
+ +where
+ +    T: SimdElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +}
+ +
+ +impl<T, const LANES: usize> Clone for Simd<T, LANES>
+ +where
+ +    T: SimdElement,
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +{
+ +    fn clone(&self) -> Self {
+ +        *self
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> Default for Simd<T, LANES>
+ +where
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +    T: SimdElement + Default,
+ +{
+ +    #[inline]
+ +    fn default() -> Self {
+ +        Self::splat(T::default())
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> PartialEq for Simd<T, LANES>
+ +where
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +    T: SimdElement + PartialEq,
+ +{
+ +    #[inline]
+ +    fn eq(&self, other: &Self) -> bool {
+ +        // TODO use SIMD equality
+ +        self.to_array() == other.to_array()
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> PartialOrd for Simd<T, LANES>
+ +where
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +    T: SimdElement + PartialOrd,
+ +{
+ +    #[inline]
+ +    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+ +        // TODO use SIMD equality
+ +        self.to_array().partial_cmp(other.as_ref())
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> Eq for Simd<T, LANES>
+ +where
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +    T: SimdElement + Eq,
+ +{
+ +}
+ +
+ +impl<T, const LANES: usize> Ord for Simd<T, LANES>
+ +where
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +    T: SimdElement + Ord,
+ +{
+ +    #[inline]
+ +    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+ +        // TODO use SIMD equality
+ +        self.to_array().cmp(other.as_ref())
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> core::hash::Hash for Simd<T, LANES>
+ +where
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +    T: SimdElement + core::hash::Hash,
+ +{
+ +    #[inline]
+ +    fn hash<H>(&self, state: &mut H)
+ +    where
+ +        H: core::hash::Hasher,
+ +    {
+ +        self.as_array().hash(state)
+ +    }
+ +}
+ +
+ +// array references
+ +impl<T, const LANES: usize> AsRef<[T; LANES]> for Simd<T, LANES>
+ +where
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +    T: SimdElement,
+ +{
+ +    #[inline]
+ +    fn as_ref(&self) -> &[T; LANES] {
+ +        &self.0
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> AsMut<[T; LANES]> for Simd<T, LANES>
+ +where
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +    T: SimdElement,
+ +{
+ +    #[inline]
+ +    fn as_mut(&mut self) -> &mut [T; LANES] {
+ +        &mut self.0
+ +    }
+ +}
+ +
+ +// slice references
+ +impl<T, const LANES: usize> AsRef<[T]> for Simd<T, LANES>
+ +where
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +    T: SimdElement,
+ +{
+ +    #[inline]
+ +    fn as_ref(&self) -> &[T] {
+ +        &self.0
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> AsMut<[T]> for Simd<T, LANES>
+ +where
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +    T: SimdElement,
+ +{
+ +    #[inline]
+ +    fn as_mut(&mut self) -> &mut [T] {
+ +        &mut self.0
+ +    }
+ +}
+ +
+ +// vector/array conversion
+ +impl<T, const LANES: usize> From<[T; LANES]> for Simd<T, LANES>
+ +where
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +    T: SimdElement,
+ +{
+ +    fn from(array: [T; LANES]) -> Self {
+ +        Self(array)
+ +    }
+ +}
+ +
+ +impl<T, const LANES: usize> From<Simd<T, LANES>> for [T; LANES]
+ +where
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +    T: SimdElement,
+ +{
+ +    fn from(vector: Simd<T, LANES>) -> Self {
+ +        vector.to_array()
+ +    }
+ +}
+ +
+ +mod sealed {
+ +    pub trait Sealed {}
+ +}
+ +use sealed::Sealed;
+ +
+ +/// Marker trait for types that may be used as SIMD vector elements.
++///
++/// # Safety
++/// This trait, when implemented, asserts the compiler can monomorphize
+ +/// `#[repr(simd)]` structs with the marked type as an element.
+ +/// Strictly, it is valid to impl if the vector will not be miscompiled.
+ +/// Practically, it is user-unfriendly to impl it if the vector won't compile,
+ +/// even when no soundness guarantees are broken by allowing the user to try.
+ +pub unsafe trait SimdElement: Sealed + Copy {
+ +    /// The mask element type corresponding to this element type.
+ +    type Mask: MaskElement;
+ +}
+ +
+ +impl Sealed for u8 {}
+ +unsafe impl SimdElement for u8 {
+ +    type Mask = i8;
+ +}
+ +
+ +impl Sealed for u16 {}
+ +unsafe impl SimdElement for u16 {
+ +    type Mask = i16;
+ +}
+ +
+ +impl Sealed for u32 {}
+ +unsafe impl SimdElement for u32 {
+ +    type Mask = i32;
+ +}
+ +
+ +impl Sealed for u64 {}
+ +unsafe impl SimdElement for u64 {
+ +    type Mask = i64;
+ +}
+ +
+ +impl Sealed for usize {}
+ +unsafe impl SimdElement for usize {
+ +    type Mask = isize;
+ +}
+ +
+ +impl Sealed for i8 {}
+ +unsafe impl SimdElement for i8 {
+ +    type Mask = i8;
+ +}
+ +
+ +impl Sealed for i16 {}
+ +unsafe impl SimdElement for i16 {
+ +    type Mask = i16;
+ +}
+ +
+ +impl Sealed for i32 {}
+ +unsafe impl SimdElement for i32 {
+ +    type Mask = i32;
+ +}
+ +
+ +impl Sealed for i64 {}
+ +unsafe impl SimdElement for i64 {
+ +    type Mask = i64;
+ +}
+ +
+ +impl Sealed for isize {}
+ +unsafe impl SimdElement for isize {
+ +    type Mask = isize;
+ +}
+ +
+ +impl Sealed for f32 {}
+ +unsafe impl SimdElement for f32 {
+ +    type Mask = i32;
+ +}
+ +
+ +impl Sealed for f64 {}
+ +unsafe impl SimdElement for f64 {
+ +    type Mask = i64;
+ +}
diff --cc library/portable-simd/crates/core_simd/src/vector/ptr.rs

index c668d9a6eaee35785e7eb84b7bc5a01f7d317bc8,0000000000000000000000000000000000000000..417d255c28d63b26db02107d73a650d2566aebf2

mode 100644,000000..100644
--- 1/library/portable-simd/crates/core_simd/src/vector/ptr.rs
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/vector/ptr.rs
@@@ -1,55 -1,0 +1,59 @@@
+ +//! Private implementation details of public gather/scatter APIs.
+ +use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+ +use core::mem;
+ +
+ +/// A vector of *const T.
+ +#[derive(Debug, Copy, Clone)]
+ +#[repr(simd)]
+ +pub(crate) struct SimdConstPtr<T, const LANES: usize>([*const T; LANES]);
+ +
+ +impl<T, const LANES: usize> SimdConstPtr<T, LANES>
+ +where
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +    T: Sized,
+ +{
+ +    #[inline]
+ +    #[must_use]
+ +    pub fn splat(ptr: *const T) -> Self {
+ +        Self([ptr; LANES])
+ +    }
+ +
+ +    #[inline]
+ +    #[must_use]
+ +    pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
++        // Safety: converting pointers to usize and vice-versa is safe
++        // (even if using that pointer is not)
+ +        unsafe {
+ +            let x: Simd<usize, LANES> = mem::transmute_copy(&self);
+ +            mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::<T>())) })
+ +        }
+ +    }
+ +}
+ +
+ +/// A vector of *mut T. Be very careful around potential aliasing.
+ +#[derive(Debug, Copy, Clone)]
+ +#[repr(simd)]
+ +pub(crate) struct SimdMutPtr<T, const LANES: usize>([*mut T; LANES]);
+ +
+ +impl<T, const LANES: usize> SimdMutPtr<T, LANES>
+ +where
+ +    LaneCount<LANES>: SupportedLaneCount,
+ +    T: Sized,
+ +{
+ +    #[inline]
+ +    #[must_use]
+ +    pub fn splat(ptr: *mut T) -> Self {
+ +        Self([ptr; LANES])
+ +    }
+ +
+ +    #[inline]
+ +    #[must_use]
+ +    pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
++        // Safety: converting pointers to usize and vice-versa is safe
++        // (even if using that pointer is not)
+ +        unsafe {
+ +            let x: Simd<usize, LANES> = mem::transmute_copy(&self);
+ +            mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::<T>())) })
+ +        }
+ +    }
+ +}
diff --cc library/portable-simd/crates/core_simd/src/vendor.rs

index e8ce7176b4f219d4ccb2ed85474365a0916759e2,0000000000000000000000000000000000000000..9fb70218c954374b71caa1a34e8c76871d48963f

mode 100644,000000..100644
--- 1/library/portable-simd/crates/core_simd/src/vendor.rs
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/vendor.rs
@@@ -1,29 -1,0 +1,31 @@@
+ +/// Provides implementations of `From<$a> for $b` and `From<$b> for $a` that transmutes the value.
+ +#[allow(unused)]
+ +macro_rules! from_transmute {
+ +    { unsafe $a:ty => $b:ty } => {
+ +        from_transmute!{ @impl $a => $b }
+ +        from_transmute!{ @impl $b => $a }
+ +    };
+ +    { @impl $from:ty => $to:ty } => {
+ +        impl core::convert::From<$from> for $to {
+ +            #[inline]
+ +            fn from(value: $from) -> $to {
++                // Safety: transmuting between vectors is safe, but the caller of this macro
++                // checks the invariants
+ +                unsafe { core::mem::transmute(value) }
+ +            }
+ +        }
+ +    };
+ +}
+ +
+ +/// Conversions to x86's SIMD types.
+ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ +mod x86;
+ +
+ +#[cfg(any(target_arch = "wasm32"))]
+ +mod wasm32;
+ +
+ +#[cfg(any(target_arch = "aarch64", target_arch = "arm",))]
+ +mod arm;
+ +
+ +#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
+ +mod powerpc;
diff --cc library/portable-simd/crates/core_simd/tests/masks.rs

index 6a8ecd33a73cfe0e2ea6d1494452233520e287dc,0000000000000000000000000000000000000000..3aec36ca7b7468b89f9a23401c85bbfd7a7b6151

mode 100644,000000..100644
--- 1/library/portable-simd/crates/core_simd/tests/masks.rs
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/masks.rs
@@@ -1,102 -1,0 +1,102 @@@
-             #[cfg(feature = "generic_const_exprs")]
+ +#![feature(portable_simd)]
+ +
+ +#[cfg(target_arch = "wasm32")]
+ +use wasm_bindgen_test::*;
+ +
+ +#[cfg(target_arch = "wasm32")]
+ +wasm_bindgen_test_configure!(run_in_browser);
+ +
+ +macro_rules! test_mask_api {
+ +    { $type:ident } => {
+ +        #[allow(non_snake_case)]
+ +        mod $type {
+ +            #[cfg(target_arch = "wasm32")]
+ +            use wasm_bindgen_test::*;
+ +
+ +            #[test]
+ +            #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ +            fn set_and_test() {
+ +                let values = [true, false, false, true, false, false, true, false];
+ +                let mut mask = core_simd::Mask::<$type, 8>::splat(false);
+ +                for (lane, value) in values.iter().copied().enumerate() {
+ +                    mask.set(lane, value);
+ +                }
+ +                for (lane, value) in values.iter().copied().enumerate() {
+ +                    assert_eq!(mask.test(lane), value);
+ +                }
+ +            }
+ +
+ +            #[test]
+ +            #[should_panic]
+ +            fn set_invalid_lane() {
+ +                let mut mask = core_simd::Mask::<$type, 8>::splat(false);
+ +                mask.set(8, true);
+ +                let _ = mask;
+ +            }
+ +
+ +            #[test]
+ +            #[should_panic]
+ +            fn test_invalid_lane() {
+ +                let mask = core_simd::Mask::<$type, 8>::splat(false);
+ +                let _ = mask.test(8);
+ +            }
+ +
+ +            #[test]
+ +            fn any() {
+ +                assert!(!core_simd::Mask::<$type, 8>::splat(false).any());
+ +                assert!(core_simd::Mask::<$type, 8>::splat(true).any());
+ +                let mut v = core_simd::Mask::<$type, 8>::splat(false);
+ +                v.set(2, true);
+ +                assert!(v.any());
+ +            }
+ +
+ +            #[test]
+ +            fn all() {
+ +                assert!(!core_simd::Mask::<$type, 8>::splat(false).all());
+ +                assert!(core_simd::Mask::<$type, 8>::splat(true).all());
+ +                let mut v = core_simd::Mask::<$type, 8>::splat(false);
+ +                v.set(2, true);
+ +                assert!(!v.all());
+ +            }
+ +
+ +            #[test]
+ +            fn roundtrip_int_conversion() {
+ +                let values = [true, false, false, true, false, false, true, false];
+ +                let mask = core_simd::Mask::<$type, 8>::from_array(values);
+ +                let int = mask.to_int();
+ +                assert_eq!(int.to_array(), [-1, 0, 0, -1, 0, 0, -1, 0]);
+ +                assert_eq!(core_simd::Mask::<$type, 8>::from_int(int), mask);
+ +            }
+ +
-                 assert_eq!(bitmask, [0b01001001, 0b10000011]);
+ +            #[test]
+ +            fn roundtrip_bitmask_conversion() {
++                use core_simd::ToBitMask;
+ +                let values = [
+ +                    true, false, false, true, false, false, true, false,
+ +                    true, true, false, false, false, false, false, true,
+ +                ];
+ +                let mask = core_simd::Mask::<$type, 16>::from_array(values);
+ +                let bitmask = mask.to_bitmask();
++                assert_eq!(bitmask, 0b1000001101001001);
+ +                assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask(bitmask), mask);
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +mod mask_api {
+ +    test_mask_api! { i8 }
+ +    test_mask_api! { i16 }
+ +    test_mask_api! { i32 }
+ +    test_mask_api! { i64 }
+ +    test_mask_api! { isize }
+ +}
+ +
+ +#[test]
+ +fn convert() {
+ +    let values = [true, false, false, true, false, false, true, false];
+ +    assert_eq!(
+ +        core_simd::Mask::<i8, 8>::from_array(values),
+ +        core_simd::Mask::<i32, 8>::from_array(values).into()
+ +    );
+ +}
diff --cc library/portable-simd/crates/core_simd/tests/ops_macros.rs

index 4fb9de198ee15f5678a18eedbed49fbb463f3336,0000000000000000000000000000000000000000..50f7a4ca170db983187a26dfc251a9aafacb91b0

mode 100644,000000..100644
--- 1/library/portable-simd/crates/core_simd/tests/ops_macros.rs
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/ops_macros.rs
@@@ -1,572 -1,0 +1,572 @@@
-             }
+ +/// Implements a test on a unary operation using proptest.
+ +///
+ +/// Compares the vector operation to the equivalent scalar operation.
+ +#[macro_export]
+ +macro_rules! impl_unary_op_test {
+ +    { $scalar:ty, $trait:ident :: $fn:ident, $scalar_fn:expr } => {
+ +        test_helpers::test_lanes! {
+ +            fn $fn<const LANES: usize>() {
+ +                test_helpers::test_unary_elementwise(
+ +                    &<core_simd::Simd<$scalar, LANES> as core::ops::$trait>::$fn,
+ +                    &$scalar_fn,
+ +                    &|_| true,
+ +                );
+ +            }
+ +        }
+ +    };
+ +    { $scalar:ty, $trait:ident :: $fn:ident } => {
+ +        impl_unary_op_test! { $scalar, $trait::$fn, <$scalar as core::ops::$trait>::$fn }
+ +    };
+ +}
+ +
+ +/// Implements a test on a binary operation using proptest.
+ +///
+ +/// Compares the vector operation to the equivalent scalar operation.
+ +#[macro_export]
+ +macro_rules! impl_binary_op_test {
+ +    { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident, $scalar_fn:expr } => {
+ +        mod $fn {
+ +            use super::*;
+ +            use core_simd::Simd;
+ +
+ +            test_helpers::test_lanes! {
+ +                fn normal<const LANES: usize>() {
+ +                    test_helpers::test_binary_elementwise(
+ +                        &<Simd<$scalar, LANES> as core::ops::$trait>::$fn,
+ +                        &$scalar_fn,
+ +                        &|_, _| true,
+ +                    );
+ +                }
+ +
+ +                fn assign<const LANES: usize>() {
+ +                    test_helpers::test_binary_elementwise(
+ +                        &|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign>::$fn_assign(&mut a, b); a },
+ +                        &$scalar_fn,
+ +                        &|_, _| true,
+ +                    );
+ +                }
+ +            }
+ +        }
+ +    };
+ +    { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident } => {
+ +        impl_binary_op_test! { $scalar, $trait::$fn, $trait_assign::$fn_assign, <$scalar as core::ops::$trait>::$fn }
+ +    };
+ +}
+ +
+ +/// Implements a test on a binary operation using proptest.
+ +///
+ +/// Like `impl_binary_op_test`, but allows providing a function for rejecting particular inputs
+ +/// (like the `proptest_assume` macro).
+ +///
+ +/// Compares the vector operation to the equivalent scalar operation.
+ +#[macro_export]
+ +macro_rules! impl_binary_checked_op_test {
+ +    { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident, $scalar_fn:expr, $check_fn:expr } => {
+ +        mod $fn {
+ +            use super::*;
+ +            use core_simd::Simd;
+ +
+ +            test_helpers::test_lanes! {
+ +                fn normal<const LANES: usize>() {
+ +                    test_helpers::test_binary_elementwise(
+ +                        &<Simd<$scalar, LANES> as core::ops::$trait>::$fn,
+ +                        &$scalar_fn,
+ +                        &|x, y| x.iter().zip(y.iter()).all(|(x, y)| $check_fn(*x, *y)),
+ +                    );
+ +                }
+ +
+ +                fn assign<const LANES: usize>() {
+ +                    test_helpers::test_binary_elementwise(
+ +                        &|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign>::$fn_assign(&mut a, b); a },
+ +                        &$scalar_fn,
+ +                        &|x, y| x.iter().zip(y.iter()).all(|(x, y)| $check_fn(*x, *y)),
+ +                    )
+ +                }
+ +            }
+ +        }
+ +    };
+ +    { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident, $check_fn:expr } => {
+ +        impl_binary_checked_op_test! { $scalar, $trait::$fn, $trait_assign::$fn_assign, <$scalar as core::ops::$trait>::$fn, $check_fn }
+ +    };
+ +}
+ +
+ +#[macro_export]
+ +macro_rules! impl_common_integer_tests {
+ +    { $vector:ident, $scalar:ident } => {
+ +        test_helpers::test_lanes! {
+ +            fn horizontal_sum<const LANES: usize>() {
+ +                test_helpers::test_1(&|x| {
+ +                    test_helpers::prop_assert_biteq! (
+ +                        $vector::<LANES>::from_array(x).horizontal_sum(),
+ +                        x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add),
+ +                    );
+ +                    Ok(())
+ +                });
+ +            }
+ +
+ +            fn horizontal_product<const LANES: usize>() {
+ +                test_helpers::test_1(&|x| {
+ +                    test_helpers::prop_assert_biteq! (
+ +                        $vector::<LANES>::from_array(x).horizontal_product(),
+ +                        x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul),
+ +                    );
+ +                    Ok(())
+ +                });
+ +            }
+ +
+ +            fn horizontal_and<const LANES: usize>() {
+ +                test_helpers::test_1(&|x| {
+ +                    test_helpers::prop_assert_biteq! (
+ +                        $vector::<LANES>::from_array(x).horizontal_and(),
+ +                        x.iter().copied().fold(-1i8 as $scalar, <$scalar as core::ops::BitAnd>::bitand),
+ +                    );
+ +                    Ok(())
+ +                });
+ +            }
+ +
+ +            fn horizontal_or<const LANES: usize>() {
+ +                test_helpers::test_1(&|x| {
+ +                    test_helpers::prop_assert_biteq! (
+ +                        $vector::<LANES>::from_array(x).horizontal_or(),
+ +                        x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitOr>::bitor),
+ +                    );
+ +                    Ok(())
+ +                });
+ +            }
+ +
+ +            fn horizontal_xor<const LANES: usize>() {
+ +                test_helpers::test_1(&|x| {
+ +                    test_helpers::prop_assert_biteq! (
+ +                        $vector::<LANES>::from_array(x).horizontal_xor(),
+ +                        x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitXor>::bitxor),
+ +                    );
+ +                    Ok(())
+ +                });
+ +            }
+ +
+ +            fn horizontal_max<const LANES: usize>() {
+ +                test_helpers::test_1(&|x| {
+ +                    test_helpers::prop_assert_biteq! (
+ +                        $vector::<LANES>::from_array(x).horizontal_max(),
+ +                        x.iter().copied().max().unwrap(),
+ +                    );
+ +                    Ok(())
+ +                });
+ +            }
+ +
+ +            fn horizontal_min<const LANES: usize>() {
+ +                test_helpers::test_1(&|x| {
+ +                    test_helpers::prop_assert_biteq! (
+ +                        $vector::<LANES>::from_array(x).horizontal_min(),
+ +                        x.iter().copied().min().unwrap(),
+ +                    );
+ +                    Ok(())
+ +                });
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +/// Implement tests for signed integers.
+ +#[macro_export]
+ +macro_rules! impl_signed_tests {
+ +    { $scalar:tt } => {
+ +        mod $scalar {
+ +            type Vector<const LANES: usize> = core_simd::Simd<Scalar, LANES>;
+ +            type Scalar = $scalar;
+ +
+ +            impl_common_integer_tests! { Vector, Scalar }
+ +
+ +            test_helpers::test_lanes! {
+ +                fn neg<const LANES: usize>() {
+ +                    test_helpers::test_unary_elementwise(
+ +                        &<Vector::<LANES> as core::ops::Neg>::neg,
+ +                        &<Scalar as core::ops::Neg>::neg,
+ +                        &|x| !x.contains(&Scalar::MIN),
+ +                    );
+ +                }
+ +
+ +                fn is_positive<const LANES: usize>() {
+ +                    test_helpers::test_unary_mask_elementwise(
+ +                        &Vector::<LANES>::is_positive,
+ +                        &Scalar::is_positive,
+ +                        &|_| true,
+ +                    );
+ +                }
+ +
+ +                fn is_negative<const LANES: usize>() {
+ +                    test_helpers::test_unary_mask_elementwise(
+ +                        &Vector::<LANES>::is_negative,
+ +                        &Scalar::is_negative,
+ +                        &|_| true,
+ +                    );
+ +                }
+ +
+ +                fn signum<const LANES: usize>() {
+ +                    test_helpers::test_unary_elementwise(
+ +                        &Vector::<LANES>::signum,
+ +                        &Scalar::signum,
+ +                        &|_| true,
+ +                    )
+ +                }
+ +
-             test_helpers::test_lanes_panic! {
-                 fn div_min_overflow_panics<const LANES: usize>() {
++                fn div_min_may_overflow<const LANES: usize>() {
++                    let a = Vector::<LANES>::splat(Scalar::MIN);
++                    let b = Vector::<LANES>::splat(-1);
++                    assert_eq!(a / b, a);
++                }
+ +
-                     let _ = a / b;
++                fn rem_min_may_overflow<const LANES: usize>() {
+ +                    let a = Vector::<LANES>::splat(Scalar::MIN);
+ +                    let b = Vector::<LANES>::splat(-1);
-                 fn rem_min_overflow_panic<const LANES: usize>() {
-                     let a = Vector::<LANES>::splat(Scalar::MIN);
-                     let b = Vector::<LANES>::splat(-1);
-                     let _ = a % b;
-                 }
- 
++                    assert_eq!(a % b, Vector::<LANES>::splat(0));
+ +                }
+ +
++            }
++
++            test_helpers::test_lanes_panic! {
+ +                fn div_by_all_zeros_panics<const LANES: usize>() {
+ +                    let a = Vector::<LANES>::splat(42);
+ +                    let b = Vector::<LANES>::splat(0);
+ +                    let _ = a / b;
+ +                }
+ +
+ +                fn div_by_one_zero_panics<const LANES: usize>() {
+ +                    let a = Vector::<LANES>::splat(42);
+ +                    let mut b = Vector::<LANES>::splat(21);
+ +                    b[0] = 0 as _;
+ +                    let _ = a / b;
+ +                }
+ +
+ +                fn rem_zero_panic<const LANES: usize>() {
+ +                    let a = Vector::<LANES>::splat(42);
+ +                    let b = Vector::<LANES>::splat(0);
+ +                    let _ = a % b;
+ +                }
+ +            }
+ +
+ +            test_helpers::test_lanes! {
+ +                fn div_neg_one_no_panic<const LANES: usize>() {
+ +                    let a = Vector::<LANES>::splat(42);
+ +                    let b = Vector::<LANES>::splat(-1);
+ +                    let _ = a / b;
+ +                }
+ +
+ +                fn rem_neg_one_no_panic<const LANES: usize>() {
+ +                    let a = Vector::<LANES>::splat(42);
+ +                    let b = Vector::<LANES>::splat(-1);
+ +                    let _ = a % b;
+ +                }
+ +            }
+ +
+ +            impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign, Scalar::wrapping_add);
+ +            impl_binary_op_test!(Scalar, Sub::sub, SubAssign::sub_assign, Scalar::wrapping_sub);
+ +            impl_binary_op_test!(Scalar, Mul::mul, MulAssign::mul_assign, Scalar::wrapping_mul);
+ +
+ +            // Exclude Div and Rem panicking cases
+ +            impl_binary_checked_op_test!(Scalar, Div::div, DivAssign::div_assign, Scalar::wrapping_div, |x, y| y != 0 && !(x == Scalar::MIN && y == -1));
+ +            impl_binary_checked_op_test!(Scalar, Rem::rem, RemAssign::rem_assign, Scalar::wrapping_rem, |x, y| y != 0 && !(x == Scalar::MIN && y == -1));
+ +
+ +            impl_unary_op_test!(Scalar, Not::not);
+ +            impl_binary_op_test!(Scalar, BitAnd::bitand, BitAndAssign::bitand_assign);
+ +            impl_binary_op_test!(Scalar, BitOr::bitor, BitOrAssign::bitor_assign);
+ +            impl_binary_op_test!(Scalar, BitXor::bitxor, BitXorAssign::bitxor_assign);
+ +        }
+ +    }
+ +}
+ +
+ +/// Implement tests for unsigned integers.
+ +#[macro_export]
+ +macro_rules! impl_unsigned_tests {
+ +    { $scalar:tt } => {
+ +        mod $scalar {
+ +            type Vector<const LANES: usize> = core_simd::Simd<Scalar, LANES>;
+ +            type Scalar = $scalar;
+ +
+ +            impl_common_integer_tests! { Vector, Scalar }
+ +
+ +            test_helpers::test_lanes_panic! {
+ +                fn rem_zero_panic<const LANES: usize>() {
+ +                    let a = Vector::<LANES>::splat(42);
+ +                    let b = Vector::<LANES>::splat(0);
+ +                    let _ = a % b;
+ +                }
+ +            }
+ +
+ +            impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign, Scalar::wrapping_add);
+ +            impl_binary_op_test!(Scalar, Sub::sub, SubAssign::sub_assign, Scalar::wrapping_sub);
+ +            impl_binary_op_test!(Scalar, Mul::mul, MulAssign::mul_assign, Scalar::wrapping_mul);
+ +
+ +            // Exclude Div and Rem panicking cases
+ +            impl_binary_checked_op_test!(Scalar, Div::div, DivAssign::div_assign, Scalar::wrapping_div, |_, y| y != 0);
+ +            impl_binary_checked_op_test!(Scalar, Rem::rem, RemAssign::rem_assign, Scalar::wrapping_rem, |_, y| y != 0);
+ +
+ +            impl_unary_op_test!(Scalar, Not::not);
+ +            impl_binary_op_test!(Scalar, BitAnd::bitand, BitAndAssign::bitand_assign);
+ +            impl_binary_op_test!(Scalar, BitOr::bitor, BitOrAssign::bitor_assign);
+ +            impl_binary_op_test!(Scalar, BitXor::bitxor, BitXorAssign::bitxor_assign);
+ +        }
+ +    }
+ +}
+ +
+ +/// Implement tests for floating point numbers.
+ +#[macro_export]
+ +macro_rules! impl_float_tests {
+ +    { $scalar:tt, $int_scalar:tt } => {
+ +        mod $scalar {
+ +            type Vector<const LANES: usize> = core_simd::Simd<Scalar, LANES>;
+ +            type Scalar = $scalar;
+ +
+ +            impl_unary_op_test!(Scalar, Neg::neg);
+ +            impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign);
+ +            impl_binary_op_test!(Scalar, Sub::sub, SubAssign::sub_assign);
+ +            impl_binary_op_test!(Scalar, Mul::mul, MulAssign::mul_assign);
+ +            impl_binary_op_test!(Scalar, Div::div, DivAssign::div_assign);
+ +            impl_binary_op_test!(Scalar, Rem::rem, RemAssign::rem_assign);
+ +
+ +            test_helpers::test_lanes! {
+ +                fn is_sign_positive<const LANES: usize>() {
+ +                    test_helpers::test_unary_mask_elementwise(
+ +                        &Vector::<LANES>::is_sign_positive,
+ +                        &Scalar::is_sign_positive,
+ +                        &|_| true,
+ +                    );
+ +                }
+ +
+ +                fn is_sign_negative<const LANES: usize>() {
+ +                    test_helpers::test_unary_mask_elementwise(
+ +                        &Vector::<LANES>::is_sign_negative,
+ +                        &Scalar::is_sign_negative,
+ +                        &|_| true,
+ +                    );
+ +                }
+ +
+ +                fn is_finite<const LANES: usize>() {
+ +                    test_helpers::test_unary_mask_elementwise(
+ +                        &Vector::<LANES>::is_finite,
+ +                        &Scalar::is_finite,
+ +                        &|_| true,
+ +                    );
+ +                }
+ +
+ +                fn is_infinite<const LANES: usize>() {
+ +                    test_helpers::test_unary_mask_elementwise(
+ +                        &Vector::<LANES>::is_infinite,
+ +                        &Scalar::is_infinite,
+ +                        &|_| true,
+ +                    );
+ +                }
+ +
+ +                fn is_nan<const LANES: usize>() {
+ +                    test_helpers::test_unary_mask_elementwise(
+ +                        &Vector::<LANES>::is_nan,
+ +                        &Scalar::is_nan,
+ +                        &|_| true,
+ +                    );
+ +                }
+ +
+ +                fn is_normal<const LANES: usize>() {
+ +                    test_helpers::test_unary_mask_elementwise(
+ +                        &Vector::<LANES>::is_normal,
+ +                        &Scalar::is_normal,
+ +                        &|_| true,
+ +                    );
+ +                }
+ +
+ +                fn is_subnormal<const LANES: usize>() {
+ +                    test_helpers::test_unary_mask_elementwise(
+ +                        &Vector::<LANES>::is_subnormal,
+ +                        &Scalar::is_subnormal,
+ +                        &|_| true,
+ +                    );
+ +                }
+ +
+ +                fn abs<const LANES: usize>() {
+ +                    test_helpers::test_unary_elementwise(
+ +                        &Vector::<LANES>::abs,
+ +                        &Scalar::abs,
+ +                        &|_| true,
+ +                    )
+ +                }
+ +
+ +                fn recip<const LANES: usize>() {
+ +                    test_helpers::test_unary_elementwise(
+ +                        &Vector::<LANES>::recip,
+ +                        &Scalar::recip,
+ +                        &|_| true,
+ +                    )
+ +                }
+ +
+ +                fn to_degrees<const LANES: usize>() {
+ +                    test_helpers::test_unary_elementwise(
+ +                        &Vector::<LANES>::to_degrees,
+ +                        &Scalar::to_degrees,
+ +                        &|_| true,
+ +                    )
+ +                }
+ +
+ +                fn to_radians<const LANES: usize>() {
+ +                    test_helpers::test_unary_elementwise(
+ +                        &Vector::<LANES>::to_radians,
+ +                        &Scalar::to_radians,
+ +                        &|_| true,
+ +                    )
+ +                }
+ +
+ +                fn signum<const LANES: usize>() {
+ +                    test_helpers::test_unary_elementwise(
+ +                        &Vector::<LANES>::signum,
+ +                        &Scalar::signum,
+ +                        &|_| true,
+ +                    )
+ +                }
+ +
+ +                fn copysign<const LANES: usize>() {
+ +                    test_helpers::test_binary_elementwise(
+ +                        &Vector::<LANES>::copysign,
+ +                        &Scalar::copysign,
+ +                        &|_, _| true,
+ +                    )
+ +                }
+ +
+ +                fn min<const LANES: usize>() {
+ +                    // Regular conditions (both values aren't zero)
+ +                    test_helpers::test_binary_elementwise(
+ +                        &Vector::<LANES>::min,
+ +                        &Scalar::min,
+ +                        // Reject the case where both values are zero with different signs
+ +                        &|a, b| {
+ +                            for (a, b) in a.iter().zip(b.iter()) {
+ +                                if *a == 0. && *b == 0. && a.signum() != b.signum() {
+ +                                    return false;
+ +                                }
+ +                            }
+ +                            true
+ +                        }
+ +                    );
+ +
+ +                    // Special case where both values are zero
+ +                    let p_zero = Vector::<LANES>::splat(0.);
+ +                    let n_zero = Vector::<LANES>::splat(-0.);
+ +                    assert!(p_zero.min(n_zero).to_array().iter().all(|x| *x == 0.));
+ +                    assert!(n_zero.min(p_zero).to_array().iter().all(|x| *x == 0.));
+ +                }
+ +
+ +                fn max<const LANES: usize>() {
+ +                    // Regular conditions (both values aren't zero)
+ +                    test_helpers::test_binary_elementwise(
+ +                        &Vector::<LANES>::max,
+ +                        &Scalar::max,
+ +                        // Reject the case where both values are zero with different signs
+ +                        &|a, b| {
+ +                            for (a, b) in a.iter().zip(b.iter()) {
+ +                                if *a == 0. && *b == 0. && a.signum() != b.signum() {
+ +                                    return false;
+ +                                }
+ +                            }
+ +                            true
+ +                        }
+ +                    );
+ +
+ +                    // Special case where both values are zero
+ +                    let p_zero = Vector::<LANES>::splat(0.);
+ +                    let n_zero = Vector::<LANES>::splat(-0.);
+ +                    assert!(p_zero.max(n_zero).to_array().iter().all(|x| *x == 0.));
+ +                    assert!(n_zero.max(p_zero).to_array().iter().all(|x| *x == 0.));
+ +                }
+ +
+ +                fn clamp<const LANES: usize>() {
+ +                    test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| {
+ +                        for (min, max) in min.iter_mut().zip(max.iter_mut()) {
+ +                            if max < min {
+ +                                core::mem::swap(min, max);
+ +                            }
+ +                            if min.is_nan() {
+ +                                *min = Scalar::NEG_INFINITY;
+ +                            }
+ +                            if max.is_nan() {
+ +                                *max = Scalar::INFINITY;
+ +                            }
+ +                        }
+ +
+ +                        let mut result_scalar = [Scalar::default(); LANES];
+ +                        for i in 0..LANES {
+ +                            result_scalar[i] = value[i].clamp(min[i], max[i]);
+ +                        }
+ +                        let result_vector = Vector::from_array(value).clamp(min.into(), max.into()).to_array();
+ +                        test_helpers::prop_assert_biteq!(result_scalar, result_vector);
+ +                        Ok(())
+ +                    })
+ +                }
+ +
+ +                fn horizontal_sum<const LANES: usize>() {
+ +                    test_helpers::test_1(&|x| {
+ +                        test_helpers::prop_assert_biteq! (
+ +                            Vector::<LANES>::from_array(x).horizontal_sum(),
+ +                            x.iter().sum(),
+ +                        );
+ +                        Ok(())
+ +                    });
+ +                }
+ +
+ +                fn horizontal_product<const LANES: usize>() {
+ +                    test_helpers::test_1(&|x| {
+ +                        test_helpers::prop_assert_biteq! (
+ +                            Vector::<LANES>::from_array(x).horizontal_product(),
+ +                            x.iter().product(),
+ +                        );
+ +                        Ok(())
+ +                    });
+ +                }
+ +
+ +                fn horizontal_max<const LANES: usize>() {
+ +                    test_helpers::test_1(&|x| {
+ +                        let vmax = Vector::<LANES>::from_array(x).horizontal_max();
+ +                        let smax = x.iter().copied().fold(Scalar::NAN, Scalar::max);
+ +                        // 0 and -0 are treated the same
+ +                        if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) {
+ +                            test_helpers::prop_assert_biteq!(vmax, smax);
+ +                        }
+ +                        Ok(())
+ +                    });
+ +                }
+ +
+ +                fn horizontal_min<const LANES: usize>() {
+ +                    test_helpers::test_1(&|x| {
+ +                        let vmax = Vector::<LANES>::from_array(x).horizontal_min();
+ +                        let smax = x.iter().copied().fold(Scalar::NAN, Scalar::min);
+ +                        // 0 and -0 are treated the same
+ +                        if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) {
+ +                            test_helpers::prop_assert_biteq!(vmax, smax);
+ +                        }
+ +                        Ok(())
+ +                    });
+ +                }
+ +            }
+ +
+ +            #[cfg(feature = "std")]
+ +            mod std {
+ +                use std_float::StdFloat;
+ +
+ +                use super::*;
+ +                test_helpers::test_lanes! {
+ +                    fn sqrt<const LANES: usize>() {
+ +                        test_helpers::test_unary_elementwise(
+ +                            &Vector::<LANES>::sqrt,
+ +                            &Scalar::sqrt,
+ +                            &|_| true,
+ +                        )
+ +                    }
+ +
+ +                    fn mul_add<const LANES: usize>() {
+ +                        test_helpers::test_ternary_elementwise(
+ +                            &Vector::<LANES>::mul_add,
+ +                            &Scalar::mul_add,
+ +                            &|_, _, _| true,
+ +                        )
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +}
diff --cc library/portable-simd/crates/core_simd/tests/round.rs

index 1a1bc9ebca76a15d0b5b1bb1b486801d20104168,0000000000000000000000000000000000000000..537323292376043cf4794f66d6f982e607c5697d

mode 100644,000000..100644
--- 1/library/portable-simd/crates/core_simd/tests/round.rs
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/tests/round.rs
@@@ -1,94 -1,0 +1,86 @@@
-                 fn from_int<const LANES: usize>() {
-                     test_helpers::test_unary_elementwise(
-                         &Vector::<LANES>::round_from_int,
-                         &|x| x as Scalar,
-                         &|_| true,
-                     )
-                 }
- 
+ +#![feature(portable_simd)]
+ +
+ +macro_rules! float_rounding_test {
+ +    { $scalar:tt, $int_scalar:tt } => {
+ +        mod $scalar {
+ +            use std_float::StdFloat;
+ +
+ +            type Vector<const LANES: usize> = core_simd::Simd<$scalar, LANES>;
+ +            type Scalar = $scalar;
+ +            type IntScalar = $int_scalar;
+ +
+ +            #[cfg(feature = "std")]
+ +            test_helpers::test_lanes! {
+ +                fn ceil<const LANES: usize>() {
+ +                    test_helpers::test_unary_elementwise(
+ +                        &Vector::<LANES>::ceil,
+ +                        &Scalar::ceil,
+ +                        &|_| true,
+ +                    )
+ +                }
+ +
+ +                fn floor<const LANES: usize>() {
+ +                    test_helpers::test_unary_elementwise(
+ +                        &Vector::<LANES>::floor,
+ +                        &Scalar::floor,
+ +                        &|_| true,
+ +                    )
+ +                }
+ +
+ +                fn round<const LANES: usize>() {
+ +                    test_helpers::test_unary_elementwise(
+ +                        &Vector::<LANES>::round,
+ +                        &Scalar::round,
+ +                        &|_| true,
+ +                    )
+ +                }
+ +
+ +                fn trunc<const LANES: usize>() {
+ +                    test_helpers::test_unary_elementwise(
+ +                        &Vector::<LANES>::trunc,
+ +                        &Scalar::trunc,
+ +                        &|_| true,
+ +                    )
+ +                }
+ +
+ +                fn fract<const LANES: usize>() {
+ +                    test_helpers::test_unary_elementwise(
+ +                        &Vector::<LANES>::fract,
+ +                        &Scalar::fract,
+ +                        &|_| true,
+ +                    )
+ +                }
+ +            }
+ +
+ +            test_helpers::test_lanes! {
-                             let result_1 = unsafe { Vector::from_array(x).to_int_unchecked().to_array() };
+ +                fn to_int_unchecked<const LANES: usize>() {
+ +                    // The maximum integer that can be represented by the equivalently sized float has
+ +                    // all of the mantissa digits set to 1, pushed up to the MSB.
+ +                    const ALL_MANTISSA_BITS: IntScalar = ((1 << <Scalar>::MANTISSA_DIGITS) - 1);
+ +                    const MAX_REPRESENTABLE_VALUE: Scalar =
+ +                        (ALL_MANTISSA_BITS << (core::mem::size_of::<Scalar>() * 8 - <Scalar>::MANTISSA_DIGITS as usize - 1)) as Scalar;
+ +
+ +                    let mut runner = proptest::test_runner::TestRunner::default();
+ +                    runner.run(
+ +                        &test_helpers::array::UniformArrayStrategy::new(-MAX_REPRESENTABLE_VALUE..MAX_REPRESENTABLE_VALUE),
+ +                        |x| {
-                                 let mut result = [0; LANES];
++                            let result_1 = unsafe { Vector::from_array(x).to_int_unchecked::<IntScalar>().to_array() };
+ +                            let result_2 = {
-                                     *o = unsafe { i.to_int_unchecked() };
++                                let mut result: [IntScalar; LANES] = [0; LANES];
+ +                                for (i, o) in x.iter().zip(result.iter_mut()) {
++                                    *o = unsafe { i.to_int_unchecked::<IntScalar>() };
+ +                                }
+ +                                result
+ +                            };
+ +                            test_helpers::prop_assert_biteq!(result_1, result_2);
+ +                            Ok(())
+ +                        },
+ +                    ).unwrap();
+ +                }
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +float_rounding_test! { f32, i32 }
+ +float_rounding_test! { f64, i64 }
author	Jubilee Young <workingjubilee@gmail.com>
	Mon, 28 Feb 2022 18:17:40 +0000 (10:17 -0800)
committer	Jubilee Young <workingjubilee@gmail.com>
	Mon, 28 Feb 2022 18:17:40 +0000 (10:17 -0800)
		1	2
library/portable-simd/crates/core_simd/examples/spectral_norm.rs	patch \|	\|	\|	blob
library/portable-simd/crates/core_simd/src/comparisons.rs	patch \|	diff1 \|	\|	blob \| history
library/portable-simd/crates/core_simd/src/intrinsics.rs	patch \|	diff1 \|	\|	blob \| history
library/portable-simd/crates/core_simd/src/lib.rs	patch \|	diff1 \|	\|	blob \| history
library/portable-simd/crates/core_simd/src/masks.rs	patch \|	diff1 \|	\|	blob \| history
library/portable-simd/crates/core_simd/src/masks/bitmask.rs	patch \|	diff1 \|	\|	blob \| history
library/portable-simd/crates/core_simd/src/masks/full_masks.rs	patch \|	diff1 \|	\|	blob \| history
library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs	patch \|	\|	\|	blob
library/portable-simd/crates/core_simd/src/math.rs	patch \|	diff1 \|	\|	blob \| history
library/portable-simd/crates/core_simd/src/ops.rs	patch \|	diff1 \|	\|	blob \| history
library/portable-simd/crates/core_simd/src/reduction.rs	patch \|	diff1 \|	\|	blob \| history
library/portable-simd/crates/core_simd/src/round.rs	patch \|	diff1 \|	\|	blob \| history
library/portable-simd/crates/core_simd/src/select.rs	patch \|	diff1 \|	\|	blob \| history
library/portable-simd/crates/core_simd/src/swizzle.rs	patch \|	diff1 \|	\|	blob \| history
library/portable-simd/crates/core_simd/src/to_bytes.rs	patch \|	diff1 \|	\|	blob \| history
library/portable-simd/crates/core_simd/src/vector.rs	patch \|	diff1 \|	\|	blob \| history
library/portable-simd/crates/core_simd/src/vector/ptr.rs	patch \|	diff1 \|	\|	blob \| history
library/portable-simd/crates/core_simd/src/vendor.rs	patch \|	diff1 \|	\|	blob \| history
library/portable-simd/crates/core_simd/tests/masks.rs	patch \|	diff1 \|	\|	blob \| history
library/portable-simd/crates/core_simd/tests/ops_macros.rs	patch \|	diff1 \|	\|	blob \| history
library/portable-simd/crates/core_simd/tests/round.rs	patch \|	diff1 \|	\|	blob \| history