--- /dev/null
--- /dev/null
++#![feature(portable_simd)]
++
++use core_simd::simd::*;
++
++fn a(i: usize, j: usize) -> f64 {
++ ((i + j) * (i + j + 1) / 2 + i + 1) as f64
++}
++
++fn mult_av(v: &[f64], out: &mut [f64]) {
++ assert!(v.len() == out.len());
++ assert!(v.len() % 2 == 0);
++
++ for (i, out) in out.iter_mut().enumerate() {
++ let mut sum = f64x2::splat(0.0);
++
++ let mut j = 0;
++ while j < v.len() {
++ let b = f64x2::from_slice(&v[j..]);
++ let a = f64x2::from_array([a(i, j), a(i, j + 1)]);
++ sum += b / a;
++ j += 2
++ }
++ *out = sum.horizontal_sum();
++ }
++}
++
++fn mult_atv(v: &[f64], out: &mut [f64]) {
++ assert!(v.len() == out.len());
++ assert!(v.len() % 2 == 0);
++
++ for (i, out) in out.iter_mut().enumerate() {
++ let mut sum = f64x2::splat(0.0);
++
++ let mut j = 0;
++ while j < v.len() {
++ let b = f64x2::from_slice(&v[j..]);
++ let a = f64x2::from_array([a(j, i), a(j + 1, i)]);
++ sum += b / a;
++ j += 2
++ }
++ *out = sum.horizontal_sum();
++ }
++}
++
++fn mult_atav(v: &[f64], out: &mut [f64], tmp: &mut [f64]) {
++ mult_av(v, tmp);
++ mult_atv(tmp, out);
++}
++
++pub fn spectral_norm(n: usize) -> f64 {
++ assert!(n % 2 == 0, "only even lengths are accepted");
++
++ let mut u = vec![1.0; n];
++ let mut v = u.clone();
++ let mut tmp = u.clone();
++
++ for _ in 0..10 {
++ mult_atav(&u, &mut v, &mut tmp);
++ mult_atav(&v, &mut u, &mut tmp);
++ }
++ (dot(&u, &v) / dot(&v, &v)).sqrt()
++}
++
++fn dot(x: &[f64], y: &[f64]) -> f64 {
++ // This is auto-vectorized:
++ x.iter().zip(y).map(|(&x, &y)| x * y).sum()
++}
++
++#[cfg(test)]
++#[test]
++fn test() {
++ assert_eq!(&format!("{:.9}", spectral_norm(100)), "1.274219991");
++}
++
++fn main() {
++ // Empty main to make cargo happy
++}
--- /dev/null
+use crate::simd::intrinsics;
+use crate::simd::{LaneCount, Mask, Simd, SimdElement, SupportedLaneCount};
+
+impl<T, const LANES: usize> Simd<T, LANES>
+where
+ T: SimdElement + PartialEq,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ /// Test if each lane is equal to the corresponding lane in `other`.
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ pub fn lanes_eq(self, other: Self) -> Mask<T::Mask, LANES> {
++ // Safety: `self` is a vector, and the result of the comparison
++ // is always a valid mask.
+ unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) }
+ }
+
+ /// Test if each lane is not equal to the corresponding lane in `other`.
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ pub fn lanes_ne(self, other: Self) -> Mask<T::Mask, LANES> {
++ // Safety: `self` is a vector, and the result of the comparison
++ // is always a valid mask.
+ unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) }
+ }
+}
+
+impl<T, const LANES: usize> Simd<T, LANES>
+where
+ T: SimdElement + PartialOrd,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ /// Test if each lane is less than the corresponding lane in `other`.
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ pub fn lanes_lt(self, other: Self) -> Mask<T::Mask, LANES> {
++ // Safety: `self` is a vector, and the result of the comparison
++ // is always a valid mask.
+ unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) }
+ }
+
+ /// Test if each lane is greater than the corresponding lane in `other`.
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ pub fn lanes_gt(self, other: Self) -> Mask<T::Mask, LANES> {
++ // Safety: `self` is a vector, and the result of the comparison
++ // is always a valid mask.
+ unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) }
+ }
+
+ /// Test if each lane is less than or equal to the corresponding lane in `other`.
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ pub fn lanes_le(self, other: Self) -> Mask<T::Mask, LANES> {
++ // Safety: `self` is a vector, and the result of the comparison
++ // is always a valid mask.
+ unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) }
+ }
+
+ /// Test if each lane is greater than or equal to the corresponding lane in `other`.
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ pub fn lanes_ge(self, other: Self) -> Mask<T::Mask, LANES> {
++ // Safety: `self` is a vector, and the result of the comparison
++ // is always a valid mask.
+ unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) }
+ }
+}
--- /dev/null
- /// simply lowered to the matching LLVM instructions by the compiler. The associated instruction
- /// is documented alongside each intrinsic.
+//! This module contains the LLVM intrinsics bindings that provide the functionality for this
+//! crate.
+//!
+//! The LLVM assembly language is documented here: <https://llvm.org/docs/LangRef.html>
++//!
++//! A quick glossary of jargon that may appear in this module, mostly paraphrasing LLVM's LangRef:
++//! - poison: "undefined behavior as a value". specifically, it is like uninit memory (such as padding bytes). it is "safe" to create poison, BUT
++//! poison MUST NOT be observed from safe code, as operations on poison return poison, like NaN. unlike NaN, which has defined comparisons,
++//! poison is neither true nor false, and LLVM may also convert it to undef (at which point it is both). so, it can't be conditioned on, either.
++//! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note:
++//! this means that division by poison or undef is like division by zero, which means it inflicts...
++//! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program:
++//! LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception, and this is the "good end".
++//! The "bad end" is that LLVM may reverse time to the moment control flow diverged on a path towards undefined behavior,
++//! and destroy the other branch, potentially deleting safe code and violating Rust's `unsafe` contract.
++//!
++//! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory.
++//!
++//! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths.
+
+/// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are
- pub(crate) fn simd_sub<T>(x: T, y: T) -> T;
++/// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner.
++/// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function.
+extern "platform-intrinsic" {
+ /// add/fadd
+ pub(crate) fn simd_add<T>(x: T, y: T) -> T;
+
+ /// sub/fsub
- pub(crate) fn simd_div<T>(x: T, y: T) -> T;
++ pub(crate) fn simd_sub<T>(lhs: T, rhs: T) -> T;
+
+ /// mul/fmul
+ pub(crate) fn simd_mul<T>(x: T, y: T) -> T;
+
+ /// udiv/sdiv/fdiv
- pub(crate) fn simd_rem<T>(x: T, y: T) -> T;
++ /// ints and uints: {s,u}div incur UB if division by zero occurs.
++ /// ints: sdiv is UB for int::MIN / -1.
++ /// floats: fdiv is never UB, but may create NaNs or infinities.
++ pub(crate) fn simd_div<T>(lhs: T, rhs: T) -> T;
+
+ /// urem/srem/frem
- pub(crate) fn simd_shl<T>(x: T, y: T) -> T;
++ /// ints and uints: {s,u}rem incur UB if division by zero occurs.
++ /// ints: srem is UB for int::MIN / -1.
++ /// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno.
++ pub(crate) fn simd_rem<T>(lhs: T, rhs: T) -> T;
+
+ /// shl
- /// lshr/ashr
- pub(crate) fn simd_shr<T>(x: T, y: T) -> T;
++ /// for (u)ints. poison if rhs >= lhs::BITS
++ pub(crate) fn simd_shl<T>(lhs: T, rhs: T) -> T;
+
- pub(crate) fn simd_saturating_sub<T>(x: T, y: T) -> T;
++ /// ints: ashr
++ /// uints: lshr
++ /// poison if rhs >= lhs::BITS
++ pub(crate) fn simd_shr<T>(lhs: T, rhs: T) -> T;
+
+ /// and
+ pub(crate) fn simd_and<T>(x: T, y: T) -> T;
+
+ /// or
+ pub(crate) fn simd_or<T>(x: T, y: T) -> T;
+
+ /// xor
+ pub(crate) fn simd_xor<T>(x: T, y: T) -> T;
+
+ /// fptoui/fptosi/uitofp/sitofp
++ /// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5
++ /// but the truncated value must fit in the target type or the result is poison.
++ /// use `simd_as` instead for a cast that performs a saturating conversion.
+ pub(crate) fn simd_cast<T, U>(x: T) -> U;
+ /// follows Rust's `T as U` semantics, including saturating float casts
+ /// which amounts to the same as `simd_cast` for many cases
+ #[cfg(not(bootstrap))]
+ pub(crate) fn simd_as<T, U>(x: T) -> U;
+
+ /// neg/fneg
++ /// ints: ultimately becomes a call to cg_ssa's BuilderMethods::neg. cg_llvm equates this to `simd_sub(Simd::splat(0), x)`.
++ /// floats: LLVM's fneg, which changes the floating point sign bit. Some arches have instructions for it.
++ /// Rust panics for Neg::neg(int::MIN) due to overflow, but it is not UB in LLVM without `nsw`.
+ pub(crate) fn simd_neg<T>(x: T) -> T;
+
+ /// fabs
+ pub(crate) fn simd_fabs<T>(x: T) -> T;
+
+ // minnum/maxnum
+ pub(crate) fn simd_fmin<T>(x: T, y: T) -> T;
+ pub(crate) fn simd_fmax<T>(x: T, y: T) -> T;
+
++ // these return Simd<int, N> with the same BITS size as the inputs
+ pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
+ pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
+ pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
+ pub(crate) fn simd_le<T, U>(x: T, y: T) -> U;
+ pub(crate) fn simd_gt<T, U>(x: T, y: T) -> U;
+ pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
+
+ // shufflevector
++ // idx: LLVM calls it a "shuffle mask vector constant", a vector of i32s
+ pub(crate) fn simd_shuffle<T, U, V>(x: T, y: T, idx: U) -> V;
+
++ /// llvm.masked.gather
++ /// like a loop of pointer reads
++ /// val: vector of values to select if a lane is masked
++ /// ptr: vector of pointers to read from
++ /// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val)
++ /// note, the LLVM intrinsic accepts a mask vector of <N x i1>
++ /// FIXME: review this if/when we fix up our mask story in general?
+ pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
++ /// llvm.masked.scatter
++ /// like gather, but more spicy, as it writes instead of reads
+ pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);
+
+ // {s,u}add.sat
+ pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T;
+
+ // {s,u}sub.sat
- pub(crate) fn simd_select<M, T>(m: M, a: T, b: T) -> T;
++ pub(crate) fn simd_saturating_sub<T>(lhs: T, rhs: T) -> T;
+
+ // reductions
++ // llvm.vector.reduce.{add,fadd}
+ pub(crate) fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
++ // llvm.vector.reduce.{mul,fmul}
+ pub(crate) fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
+ #[allow(unused)]
+ pub(crate) fn simd_reduce_all<T>(x: T) -> bool;
+ #[allow(unused)]
+ pub(crate) fn simd_reduce_any<T>(x: T) -> bool;
+ pub(crate) fn simd_reduce_max<T, U>(x: T) -> U;
+ pub(crate) fn simd_reduce_min<T, U>(x: T) -> U;
+ pub(crate) fn simd_reduce_and<T, U>(x: T) -> U;
+ pub(crate) fn simd_reduce_or<T, U>(x: T) -> U;
+ pub(crate) fn simd_reduce_xor<T, U>(x: T) -> U;
+
+ // truncate integer vector to bitmask
+ #[allow(unused)]
+ pub(crate) fn simd_bitmask<T, U>(x: T) -> U;
+
+ // select
- pub(crate) fn simd_select_bitmask<M, T>(m: M, a: T, b: T) -> T;
++ // first argument is a vector of integers, -1 (all bits 1) is "true"
++ // logically equivalent to (yes & m) | (no & (m^-1),
++ // but you can use it on floats.
++ pub(crate) fn simd_select<M, T>(m: M, yes: T, no: T) -> T;
+ #[allow(unused)]
++ pub(crate) fn simd_select_bitmask<M, T>(m: M, yes: T, no: T) -> T;
+}
--- /dev/null
+#![cfg_attr(not(feature = "std"), no_std)]
+#![feature(
+ const_fn_trait_bound,
++ convert_float_to_int,
+ decl_macro,
++ intra_doc_pointers,
+ platform_intrinsics,
+ repr_simd,
+ simd_ffi,
+ staged_api,
+ stdsimd
+)]
+#![cfg_attr(feature = "generic_const_exprs", feature(generic_const_exprs))]
+#![cfg_attr(feature = "generic_const_exprs", allow(incomplete_features))]
+#![warn(missing_docs)]
+#![deny(unsafe_op_in_unsafe_fn)]
+#![unstable(feature = "portable_simd", issue = "86656")]
+//! Portable SIMD module.
+
+#[path = "mod.rs"]
+mod core_simd;
+pub use self::core_simd::simd;
+pub use simd::*;
--- /dev/null
- use crate::simd::intrinsics;
- use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+//! Types and traits associated with masking lanes of vectors.
+//! Types representing
+#![allow(non_camel_case_types)]
+
+#[cfg_attr(
+ not(all(target_arch = "x86_64", target_feature = "avx512f")),
+ path = "masks/full_masks.rs"
+)]
+#[cfg_attr(
+ all(target_arch = "x86_64", target_feature = "avx512f"),
+ path = "masks/bitmask.rs"
+)]
+mod mask_impl;
+
- /// Convert this mask to a bitmask, with one bit set per lane.
- #[cfg(feature = "generic_const_exprs")]
- #[inline]
- #[must_use = "method returns a new array and does not mutate the original value"]
- pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
- self.0.to_bitmask()
- }
-
- /// Convert a bitmask to a mask.
- #[cfg(feature = "generic_const_exprs")]
- #[inline]
- #[must_use = "method returns a new mask and does not mutate the original value"]
- pub fn from_bitmask(bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
- Self(mask_impl::Mask::from_bitmask(bitmask))
- }
-
++mod to_bitmask;
++pub use to_bitmask::ToBitMask;
++
++use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount};
+use core::cmp::Ordering;
+use core::{fmt, mem};
+
+mod sealed {
+ use super::*;
+
+ /// Not only does this seal the `MaskElement` trait, but these functions prevent other traits
+ /// from bleeding into the parent bounds.
+ ///
+ /// For example, `eq` could be provided by requiring `MaskElement: PartialEq`, but that would
+ /// prevent us from ever removing that bound, or from implementing `MaskElement` on
+ /// non-`PartialEq` types in the future.
+ pub trait Sealed {
+ fn valid<const LANES: usize>(values: Simd<Self, LANES>) -> bool
+ where
+ LaneCount<LANES>: SupportedLaneCount,
+ Self: SimdElement;
+
+ fn eq(self, other: Self) -> bool;
+
+ const TRUE: Self;
+
+ const FALSE: Self;
+ }
+}
+use sealed::Sealed;
+
+/// Marker trait for types that may be used as SIMD mask elements.
++///
++/// # Safety
++/// Type must be a signed integer.
+pub unsafe trait MaskElement: SimdElement + Sealed {}
+
+macro_rules! impl_element {
+ { $ty:ty } => {
+ impl Sealed for $ty {
+ fn valid<const LANES: usize>(value: Simd<Self, LANES>) -> bool
+ where
+ LaneCount<LANES>: SupportedLaneCount,
+ {
+ (value.lanes_eq(Simd::splat(0)) | value.lanes_eq(Simd::splat(-1))).all()
+ }
+
+ fn eq(self, other: Self) -> bool { self == other }
+
+ const TRUE: Self = -1;
+ const FALSE: Self = 0;
+ }
+
+ unsafe impl MaskElement for $ty {}
+ }
+}
+
+impl_element! { i8 }
+impl_element! { i16 }
+impl_element! { i32 }
+impl_element! { i64 }
+impl_element! { isize }
+
+/// A SIMD vector mask for `LANES` elements of width specified by `Element`.
+///
+/// The layout of this type is unspecified.
+#[repr(transparent)]
+pub struct Mask<T, const LANES: usize>(mask_impl::Mask<T, LANES>)
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount;
+
+impl<T, const LANES: usize> Copy for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+impl<T, const LANES: usize> Clone for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ fn clone(&self) -> Self {
+ *self
+ }
+}
+
+impl<T, const LANES: usize> Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ /// Construct a mask by setting all lanes to the given value.
+ pub fn splat(value: bool) -> Self {
+ Self(mask_impl::Mask::splat(value))
+ }
+
+ /// Converts an array of bools to a SIMD mask.
+ pub fn from_array(array: [bool; LANES]) -> Self {
+ // SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of
+ // true: 0b_0000_0001
+ // false: 0b_0000_0000
+ // Thus, an array of bools is also a valid array of bytes: [u8; N]
+ // This would be hypothetically valid as an "in-place" transmute,
+ // but these are "dependently-sized" types, so copy elision it is!
+ unsafe {
+ let bytes: [u8; LANES] = mem::transmute_copy(&array);
+ let bools: Simd<i8, LANES> =
+ intrinsics::simd_ne(Simd::from_array(bytes), Simd::splat(0u8));
+ Mask::from_int_unchecked(intrinsics::simd_cast(bools))
+ }
+ }
+
+ /// Converts a SIMD mask to an array of bools.
+ pub fn to_array(self) -> [bool; LANES] {
+ // This follows mostly the same logic as from_array.
+ // SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of
+ // true: 0b_0000_0001
+ // false: 0b_0000_0000
+ // Thus, an array of bools is also a valid array of bytes: [u8; N]
+ // Since our masks are equal to integers where all bits are set,
+ // we can simply convert them to i8s, and then bitand them by the
+ // bitpattern for Rust's "true" bool.
+ // This would be hypothetically valid as an "in-place" transmute,
+ // but these are "dependently-sized" types, so copy elision it is!
+ unsafe {
+ let mut bytes: Simd<i8, LANES> = intrinsics::simd_cast(self.to_int());
+ bytes &= Simd::splat(1i8);
+ mem::transmute_copy(&bytes)
+ }
+ }
+
+ /// Converts a vector of integers to a mask, where 0 represents `false` and -1
+ /// represents `true`.
+ ///
+ /// # Safety
+ /// All lanes must be either 0 or -1.
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
++ // Safety: the caller must confirm this invariant
+ unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) }
+ }
+
+ /// Converts a vector of integers to a mask, where 0 represents `false` and -1
+ /// represents `true`.
+ ///
+ /// # Panics
+ /// Panics if any lane is not 0 or -1.
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ pub fn from_int(value: Simd<T, LANES>) -> Self {
+ assert!(T::valid(value), "all values must be either 0 or -1",);
++ // Safety: the validity has been checked
+ unsafe { Self::from_int_unchecked(value) }
+ }
+
+ /// Converts the mask to a vector of integers, where 0 represents `false` and -1
+ /// represents `true`.
+ #[inline]
+ #[must_use = "method returns a new vector and does not mutate the original value"]
+ pub fn to_int(self) -> Simd<T, LANES> {
+ self.0.to_int()
+ }
+
+ /// Tests the value of the specified lane.
+ ///
+ /// # Safety
+ /// `lane` must be less than `LANES`.
+ #[inline]
+ #[must_use = "method returns a new bool and does not mutate the original value"]
+ pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
++ // Safety: the caller must confirm this invariant
+ unsafe { self.0.test_unchecked(lane) }
+ }
+
+ /// Tests the value of the specified lane.
+ ///
+ /// # Panics
+ /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
+ #[inline]
+ #[must_use = "method returns a new bool and does not mutate the original value"]
+ pub fn test(&self, lane: usize) -> bool {
+ assert!(lane < LANES, "lane index out of range");
++ // Safety: the lane index has been checked
+ unsafe { self.test_unchecked(lane) }
+ }
+
+ /// Sets the value of the specified lane.
+ ///
+ /// # Safety
+ /// `lane` must be less than `LANES`.
+ #[inline]
+ pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
++ // Safety: the caller must confirm this invariant
+ unsafe {
+ self.0.set_unchecked(lane, value);
+ }
+ }
+
+ /// Sets the value of the specified lane.
+ ///
+ /// # Panics
+ /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
+ #[inline]
+ pub fn set(&mut self, lane: usize, value: bool) {
+ assert!(lane < LANES, "lane index out of range");
++ // Safety: the lane index has been checked
+ unsafe {
+ self.set_unchecked(lane, value);
+ }
+ }
+
+ /// Returns true if any lane is set, or false otherwise.
+ #[inline]
+ #[must_use = "method returns a new bool and does not mutate the original value"]
+ pub fn any(self) -> bool {
+ self.0.any()
+ }
+
+ /// Returns true if all lanes are set, or false otherwise.
+ #[inline]
+ #[must_use = "method returns a new bool and does not mutate the original value"]
+ pub fn all(self) -> bool {
+ self.0.all()
+ }
+}
+
+// vector/array conversion
+impl<T, const LANES: usize> From<[bool; LANES]> for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ fn from(array: [bool; LANES]) -> Self {
+ Self::from_array(array)
+ }
+}
+
+impl<T, const LANES: usize> From<Mask<T, LANES>> for [bool; LANES]
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ fn from(vector: Mask<T, LANES>) -> Self {
+ vector.to_array()
+ }
+}
+
+impl<T, const LANES: usize> Default for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ #[inline]
+ #[must_use = "method returns a defaulted mask with all lanes set to false (0)"]
+ fn default() -> Self {
+ Self::splat(false)
+ }
+}
+
+impl<T, const LANES: usize> PartialEq for Mask<T, LANES>
+where
+ T: MaskElement + PartialEq,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ #[inline]
+ #[must_use = "method returns a new bool and does not mutate the original value"]
+ fn eq(&self, other: &Self) -> bool {
+ self.0 == other.0
+ }
+}
+
+impl<T, const LANES: usize> PartialOrd for Mask<T, LANES>
+where
+ T: MaskElement + PartialOrd,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ #[inline]
+ #[must_use = "method returns a new Ordering and does not mutate the original value"]
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ self.0.partial_cmp(&other.0)
+ }
+}
+
+impl<T, const LANES: usize> fmt::Debug for Mask<T, LANES>
+where
+ T: MaskElement + fmt::Debug,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.debug_list()
+ .entries((0..LANES).map(|lane| self.test(lane)))
+ .finish()
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ type Output = Self;
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ fn bitand(self, rhs: Self) -> Self {
+ Self(self.0 & rhs.0)
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitAnd<bool> for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ type Output = Self;
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ fn bitand(self, rhs: bool) -> Self {
+ self & Self::splat(rhs)
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitAnd<Mask<T, LANES>> for bool
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ type Output = Mask<T, LANES>;
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ fn bitand(self, rhs: Mask<T, LANES>) -> Mask<T, LANES> {
+ Mask::splat(self) & rhs
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ type Output = Self;
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ fn bitor(self, rhs: Self) -> Self {
+ Self(self.0 | rhs.0)
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitOr<bool> for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ type Output = Self;
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ fn bitor(self, rhs: bool) -> Self {
+ self | Self::splat(rhs)
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitOr<Mask<T, LANES>> for bool
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ type Output = Mask<T, LANES>;
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ fn bitor(self, rhs: Mask<T, LANES>) -> Mask<T, LANES> {
+ Mask::splat(self) | rhs
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ type Output = Self;
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ fn bitxor(self, rhs: Self) -> Self::Output {
+ Self(self.0 ^ rhs.0)
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitXor<bool> for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ type Output = Self;
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ fn bitxor(self, rhs: bool) -> Self::Output {
+ self ^ Self::splat(rhs)
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitXor<Mask<T, LANES>> for bool
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ type Output = Mask<T, LANES>;
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ fn bitxor(self, rhs: Mask<T, LANES>) -> Self::Output {
+ Mask::splat(self) ^ rhs
+ }
+}
+
+impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ type Output = Mask<T, LANES>;
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ fn not(self) -> Self::Output {
+ Self(!self.0)
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitAndAssign for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ #[inline]
+ fn bitand_assign(&mut self, rhs: Self) {
+ self.0 = self.0 & rhs.0;
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitAndAssign<bool> for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ #[inline]
+ fn bitand_assign(&mut self, rhs: bool) {
+ *self &= Self::splat(rhs);
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitOrAssign for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ #[inline]
+ fn bitor_assign(&mut self, rhs: Self) {
+ self.0 = self.0 | rhs.0;
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitOrAssign<bool> for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ #[inline]
+ fn bitor_assign(&mut self, rhs: bool) {
+ *self |= Self::splat(rhs);
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitXorAssign for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ #[inline]
+ fn bitxor_assign(&mut self, rhs: Self) {
+ self.0 = self.0 ^ rhs.0;
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitXorAssign<bool> for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ #[inline]
+ fn bitxor_assign(&mut self, rhs: bool) {
+ *self ^= Self::splat(rhs);
+ }
+}
+
+/// Vector of eight 8-bit masks
+pub type mask8x8 = Mask<i8, 8>;
+
+/// Vector of 16 8-bit masks
+pub type mask8x16 = Mask<i8, 16>;
+
+/// Vector of 32 8-bit masks
+pub type mask8x32 = Mask<i8, 32>;
+
+/// Vector of 16 8-bit masks
+pub type mask8x64 = Mask<i8, 64>;
+
+/// Vector of four 16-bit masks
+pub type mask16x4 = Mask<i16, 4>;
+
+/// Vector of eight 16-bit masks
+pub type mask16x8 = Mask<i16, 8>;
+
+/// Vector of 16 16-bit masks
+pub type mask16x16 = Mask<i16, 16>;
+
+/// Vector of 32 16-bit masks
+pub type mask16x32 = Mask<i16, 32>;
+
+/// Vector of two 32-bit masks
+pub type mask32x2 = Mask<i32, 2>;
+
+/// Vector of four 32-bit masks
+pub type mask32x4 = Mask<i32, 4>;
+
+/// Vector of eight 32-bit masks
+pub type mask32x8 = Mask<i32, 8>;
+
+/// Vector of 16 32-bit masks
+pub type mask32x16 = Mask<i32, 16>;
+
+/// Vector of two 64-bit masks
+pub type mask64x2 = Mask<i64, 2>;
+
+/// Vector of four 64-bit masks
+pub type mask64x4 = Mask<i64, 4>;
+
+/// Vector of eight 64-bit masks
+pub type mask64x8 = Mask<i64, 8>;
+
+/// Vector of two pointer-width masks
+pub type masksizex2 = Mask<isize, 2>;
+
+/// Vector of four pointer-width masks
+pub type masksizex4 = Mask<isize, 4>;
+
+/// Vector of eight pointer-width masks
+pub type masksizex8 = Mask<isize, 8>;
+
+macro_rules! impl_from {
+ { $from:ty => $($to:ty),* } => {
+ $(
+ impl<const LANES: usize> From<Mask<$from, LANES>> for Mask<$to, LANES>
+ where
+ LaneCount<LANES>: SupportedLaneCount,
+ {
+ fn from(value: Mask<$from, LANES>) -> Self {
+ Self(value.0.convert())
+ }
+ }
+ )*
+ }
+}
+impl_from! { i8 => i16, i32, i64, isize }
+impl_from! { i16 => i32, i64, isize, i8 }
+impl_from! { i32 => i64, isize, i8, i16 }
+impl_from! { i64 => isize, i8, i16, i32 }
+impl_from! { isize => i8, i16, i32, i64 }
--- /dev/null
- use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+#![allow(unused_imports)]
+use super::MaskElement;
+use crate::simd::intrinsics;
- #[cfg(feature = "generic_const_exprs")]
++use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
+use core::marker::PhantomData;
+
+/// A mask where each lane is represented by a single bit.
+#[repr(transparent)]
+pub struct Mask<T, const LANES: usize>(
+ <LaneCount<LANES> as SupportedLaneCount>::BitMask,
+ PhantomData<T>,
+)
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount;
+
+impl<T, const LANES: usize> Copy for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+impl<T, const LANES: usize> Clone for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ fn clone(&self) -> Self {
+ *self
+ }
+}
+
+impl<T, const LANES: usize> PartialEq for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ fn eq(&self, other: &Self) -> bool {
+ self.0.as_ref() == other.0.as_ref()
+ }
+}
+
+impl<T, const LANES: usize> PartialOrd for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+ self.0.as_ref().partial_cmp(other.0.as_ref())
+ }
+}
+
+impl<T, const LANES: usize> Eq for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+impl<T, const LANES: usize> Ord for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+ self.0.as_ref().cmp(other.0.as_ref())
+ }
+}
+
+impl<T, const LANES: usize> Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ pub fn splat(value: bool) -> Self {
+ let mut mask = <LaneCount<LANES> as SupportedLaneCount>::BitMask::default();
+ if value {
+ mask.as_mut().fill(u8::MAX)
+ } else {
+ mask.as_mut().fill(u8::MIN)
+ }
+ if LANES % 8 > 0 {
+ *mask.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - LANES % 8);
+ }
+ Self(mask, PhantomData)
+ }
+
+ #[inline]
+ #[must_use = "method returns a new bool and does not mutate the original value"]
+ pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
+ (self.0.as_ref()[lane / 8] >> (lane % 8)) & 0x1 > 0
+ }
+
+ #[inline]
+ pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
+ unsafe {
+ self.0.as_mut()[lane / 8] ^= ((value ^ self.test_unchecked(lane)) as u8) << (lane % 8)
+ }
+ }
+
+ #[inline]
+ #[must_use = "method returns a new vector and does not mutate the original value"]
+ pub fn to_int(self) -> Simd<T, LANES> {
+ unsafe {
+ intrinsics::simd_select_bitmask(self.0, Simd::splat(T::TRUE), Simd::splat(T::FALSE))
+ }
+ }
+
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
+ unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) }
+ }
+
- #[must_use = "method returns a new array and does not mutate the original value"]
- pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
- // Safety: these are the same type and we are laundering the generic
+ #[inline]
- #[cfg(feature = "generic_const_exprs")]
++ pub fn to_bitmask_integer<U>(self) -> U
++ where
++ super::Mask<T, LANES>: ToBitMask<BitMask = U>,
++ {
++ // Safety: these are the same types
+ unsafe { core::mem::transmute_copy(&self.0) }
+ }
+
- #[must_use = "method returns a new mask and does not mutate the original value"]
- pub fn from_bitmask(bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
- // Safety: these are the same type and we are laundering the generic
- Self(unsafe { core::mem::transmute_copy(&bitmask) }, PhantomData)
+ #[inline]
++ pub fn from_bitmask_integer<U>(bitmask: U) -> Self
++ where
++ super::Mask<T, LANES>: ToBitMask<BitMask = U>,
++ {
++ // Safety: these are the same types
++ unsafe { Self(core::mem::transmute_copy(&bitmask), PhantomData) }
+ }
+
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ pub fn convert<U>(self) -> Mask<U, LANES>
+ where
+ U: MaskElement,
+ {
++ // Safety: bitmask layout does not depend on the element width
+ unsafe { core::mem::transmute_copy(&self) }
+ }
+
+ #[inline]
+ #[must_use = "method returns a new bool and does not mutate the original value"]
+ pub fn any(self) -> bool {
+ self != Self::splat(false)
+ }
+
+ #[inline]
+ #[must_use = "method returns a new bool and does not mutate the original value"]
+ pub fn all(self) -> bool {
+ self == Self::splat(true)
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+ <LaneCount<LANES> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
+{
+ type Output = Self;
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ fn bitand(mut self, rhs: Self) -> Self {
+ for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
+ *l &= r;
+ }
+ self
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+ <LaneCount<LANES> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
+{
+ type Output = Self;
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ fn bitor(mut self, rhs: Self) -> Self {
+ for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
+ *l |= r;
+ }
+ self
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ type Output = Self;
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ fn bitxor(mut self, rhs: Self) -> Self::Output {
+ for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) {
+ *l ^= r;
+ }
+ self
+ }
+}
+
+impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ type Output = Self;
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ fn not(mut self) -> Self::Output {
+ for x in self.0.as_mut() {
+ *x = !*x;
+ }
+ if LANES % 8 > 0 {
+ *self.0.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - LANES % 8);
+ }
+ self
+ }
+}
--- /dev/null
- use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+//! Masks that take up full SIMD vector registers.
+
+use super::MaskElement;
+use crate::simd::intrinsics;
- #[cfg(feature = "generic_const_exprs")]
++use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
+
+#[repr(transparent)]
+pub struct Mask<T, const LANES: usize>(Simd<T, LANES>)
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount;
+
+impl<T, const LANES: usize> Copy for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+impl<T, const LANES: usize> Clone for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ fn clone(&self) -> Self {
+ *self
+ }
+}
+
+impl<T, const LANES: usize> PartialEq for Mask<T, LANES>
+where
+ T: MaskElement + PartialEq,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ fn eq(&self, other: &Self) -> bool {
+ self.0.eq(&other.0)
+ }
+}
+
+impl<T, const LANES: usize> PartialOrd for Mask<T, LANES>
+where
+ T: MaskElement + PartialOrd,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+ self.0.partial_cmp(&other.0)
+ }
+}
+
+impl<T, const LANES: usize> Eq for Mask<T, LANES>
+where
+ T: MaskElement + Eq,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+impl<T, const LANES: usize> Ord for Mask<T, LANES>
+where
+ T: MaskElement + Ord,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+ self.0.cmp(&other.0)
+ }
+}
+
++// Used for bitmask bit order workaround
++pub(crate) trait ReverseBits {
++ fn reverse_bits(self) -> Self;
++}
++
++macro_rules! impl_reverse_bits {
++ { $($int:ty),* } => {
++ $(
++ impl ReverseBits for $int {
++ fn reverse_bits(self) -> Self { <$int>::reverse_bits(self) }
++ }
++ )*
++ }
++}
++
++impl_reverse_bits! { u8, u16, u32, u64 }
++
+impl<T, const LANES: usize> Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ pub fn splat(value: bool) -> Self {
+ Self(Simd::splat(if value { T::TRUE } else { T::FALSE }))
+ }
+
+ #[inline]
+ #[must_use = "method returns a new bool and does not mutate the original value"]
+ pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
+ T::eq(self.0[lane], T::TRUE)
+ }
+
+ #[inline]
+ pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
+ self.0[lane] = if value { T::TRUE } else { T::FALSE }
+ }
+
+ #[inline]
+ #[must_use = "method returns a new vector and does not mutate the original value"]
+ pub fn to_int(self) -> Simd<T, LANES> {
+ self.0
+ }
+
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
+ Self(value)
+ }
+
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ pub fn convert<U>(self) -> Mask<U, LANES>
+ where
+ U: MaskElement,
+ {
++ // Safety: masks are simply integer vectors of 0 and -1, and we can cast the element type.
+ unsafe { Mask(intrinsics::simd_cast(self.0)) }
+ }
+
- #[must_use = "method returns a new array and does not mutate the original value"]
- pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
- unsafe {
- let mut bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN] =
- intrinsics::simd_bitmask(self.0);
-
- // There is a bug where LLVM appears to implement this operation with the wrong
- // bit order.
- // TODO fix this in a better way
- if cfg!(target_endian = "big") {
- for x in bitmask.as_mut() {
- *x = x.reverse_bits();
- }
- }
+ #[inline]
- #[cfg(feature = "generic_const_exprs")]
++ pub(crate) fn to_bitmask_integer<U: ReverseBits>(self) -> U
++ where
++ super::Mask<T, LANES>: ToBitMask<BitMask = U>,
++ {
++ // Safety: U is required to be the appropriate bitmask type
++ let bitmask: U = unsafe { intrinsics::simd_bitmask(self.0) };
+
++ // LLVM assumes bit order should match endianness
++ if cfg!(target_endian = "big") {
++ bitmask.reverse_bits()
++ } else {
+ bitmask
+ }
+ }
+
- #[must_use = "method returns a new mask and does not mutate the original value"]
- pub fn from_bitmask(mut bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
- unsafe {
- // There is a bug where LLVM appears to implement this operation with the wrong
- // bit order.
- // TODO fix this in a better way
- if cfg!(target_endian = "big") {
- for x in bitmask.as_mut() {
- *x = x.reverse_bits();
- }
- }
+ #[inline]
++ pub(crate) fn from_bitmask_integer<U: ReverseBits>(bitmask: U) -> Self
++ where
++ super::Mask<T, LANES>: ToBitMask<BitMask = U>,
++ {
++ // LLVM assumes bit order should match endianness
++ let bitmask = if cfg!(target_endian = "big") {
++ bitmask.reverse_bits()
++ } else {
++ bitmask
++ };
+
++ // Safety: U is required to be the appropriate bitmask type
++ unsafe {
+ Self::from_int_unchecked(intrinsics::simd_select_bitmask(
+ bitmask,
+ Self::splat(true).to_int(),
+ Self::splat(false).to_int(),
+ ))
+ }
+ }
+
+ #[inline]
+ #[must_use = "method returns a new bool and does not mutate the original value"]
+ pub fn any(self) -> bool {
++ // Safety: use `self` as an integer vector
+ unsafe { intrinsics::simd_reduce_any(self.to_int()) }
+ }
+
+ #[inline]
+ #[must_use = "method returns a new vector and does not mutate the original value"]
+ pub fn all(self) -> bool {
++ // Safety: use `self` as an integer vector
+ unsafe { intrinsics::simd_reduce_all(self.to_int()) }
+ }
+}
+
+impl<T, const LANES: usize> core::convert::From<Mask<T, LANES>> for Simd<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ fn from(value: Mask<T, LANES>) -> Self {
+ value.0
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ type Output = Self;
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ fn bitand(self, rhs: Self) -> Self {
++ // Safety: `self` is an integer vector
+ unsafe { Self(intrinsics::simd_and(self.0, rhs.0)) }
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ type Output = Self;
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ fn bitor(self, rhs: Self) -> Self {
++ // Safety: `self` is an integer vector
+ unsafe { Self(intrinsics::simd_or(self.0, rhs.0)) }
+ }
+}
+
+impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ type Output = Self;
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ fn bitxor(self, rhs: Self) -> Self {
++ // Safety: `self` is an integer vector
+ unsafe { Self(intrinsics::simd_xor(self.0, rhs.0)) }
+ }
+}
+
+impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ type Output = Self;
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ fn not(self) -> Self::Output {
+ Self::splat(true) ^ self
+ }
+}
--- /dev/null
--- /dev/null
++use super::{mask_impl, Mask, MaskElement};
++use crate::simd::{LaneCount, SupportedLaneCount};
++
++mod sealed {
++ pub trait Sealed {}
++}
++pub use sealed::Sealed;
++
++impl<T, const LANES: usize> Sealed for Mask<T, LANES>
++where
++ T: MaskElement,
++ LaneCount<LANES>: SupportedLaneCount,
++{
++}
++
++/// Converts masks to and from integer bitmasks.
++///
++/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB.
++///
++/// # Safety
++/// This trait is `unsafe` and sealed, since the `BitMask` type must match the number of lanes in
++/// the mask.
++pub unsafe trait ToBitMask: Sealed {
++ /// The integer bitmask type.
++ type BitMask;
++
++ /// Converts a mask to a bitmask.
++ fn to_bitmask(self) -> Self::BitMask;
++
++ /// Converts a bitmask to a mask.
++ fn from_bitmask(bitmask: Self::BitMask) -> Self;
++}
++
++macro_rules! impl_integer_intrinsic {
++ { $(unsafe impl ToBitMask<BitMask=$int:ty> for Mask<_, $lanes:literal>)* } => {
++ $(
++ unsafe impl<T: MaskElement> ToBitMask for Mask<T, $lanes> {
++ type BitMask = $int;
++
++ fn to_bitmask(self) -> $int {
++ self.0.to_bitmask_integer()
++ }
++
++ fn from_bitmask(bitmask: $int) -> Self {
++ Self(mask_impl::Mask::from_bitmask_integer(bitmask))
++ }
++ }
++ )*
++ }
++}
++
++impl_integer_intrinsic! {
++ unsafe impl ToBitMask<BitMask=u8> for Mask<_, 8>
++ unsafe impl ToBitMask<BitMask=u16> for Mask<_, 16>
++ unsafe impl ToBitMask<BitMask=u32> for Mask<_, 32>
++ unsafe impl ToBitMask<BitMask=u64> for Mask<_, 64>
++}
--- /dev/null
+use crate::simd::intrinsics::{simd_saturating_add, simd_saturating_sub};
+use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+
+macro_rules! impl_uint_arith {
+ ($($ty:ty),+) => {
+ $( impl<const LANES: usize> Simd<$ty, LANES> where LaneCount<LANES>: SupportedLaneCount {
+
+ /// Lanewise saturating add.
+ ///
+ /// # Examples
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "std")] use core_simd::Simd;
+ /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ #[doc = concat!("# use core::", stringify!($ty), "::MAX;")]
+ /// let x = Simd::from_array([2, 1, 0, MAX]);
+ /// let max = Simd::splat(MAX);
+ /// let unsat = x + max;
+ /// let sat = x.saturating_add(max);
+ /// assert_eq!(unsat, Simd::from_array([1, 0, MAX, MAX - 1]));
+ /// assert_eq!(sat, max);
+ /// ```
+ #[inline]
+ pub fn saturating_add(self, second: Self) -> Self {
++ // Safety: `self` is a vector
+ unsafe { simd_saturating_add(self, second) }
+ }
+
+ /// Lanewise saturating subtract.
+ ///
+ /// # Examples
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "std")] use core_simd::Simd;
+ /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ #[doc = concat!("# use core::", stringify!($ty), "::MAX;")]
+ /// let x = Simd::from_array([2, 1, 0, MAX]);
+ /// let max = Simd::splat(MAX);
+ /// let unsat = x - max;
+ /// let sat = x.saturating_sub(max);
+ /// assert_eq!(unsat, Simd::from_array([3, 2, 1, 0]));
+ /// assert_eq!(sat, Simd::splat(0));
+ #[inline]
+ pub fn saturating_sub(self, second: Self) -> Self {
++ // Safety: `self` is a vector
+ unsafe { simd_saturating_sub(self, second) }
+ }
+ })+
+ }
+}
+
+macro_rules! impl_int_arith {
+ ($($ty:ty),+) => {
+ $( impl<const LANES: usize> Simd<$ty, LANES> where LaneCount<LANES>: SupportedLaneCount {
+
+ /// Lanewise saturating add.
+ ///
+ /// # Examples
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "std")] use core_simd::Simd;
+ /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
+ /// let x = Simd::from_array([MIN, 0, 1, MAX]);
+ /// let max = Simd::splat(MAX);
+ /// let unsat = x + max;
+ /// let sat = x.saturating_add(max);
+ /// assert_eq!(unsat, Simd::from_array([-1, MAX, MIN, -2]));
+ /// assert_eq!(sat, Simd::from_array([-1, MAX, MAX, MAX]));
+ /// ```
+ #[inline]
+ pub fn saturating_add(self, second: Self) -> Self {
++ // Safety: `self` is a vector
+ unsafe { simd_saturating_add(self, second) }
+ }
+
+ /// Lanewise saturating subtract.
+ ///
+ /// # Examples
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "std")] use core_simd::Simd;
+ /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
+ /// let x = Simd::from_array([MIN, -2, -1, MAX]);
+ /// let max = Simd::splat(MAX);
+ /// let unsat = x - max;
+ /// let sat = x.saturating_sub(max);
+ /// assert_eq!(unsat, Simd::from_array([1, MAX, MIN, 0]));
+ /// assert_eq!(sat, Simd::from_array([MIN, MIN, MIN, 0]));
+ #[inline]
+ pub fn saturating_sub(self, second: Self) -> Self {
++ // Safety: `self` is a vector
+ unsafe { simd_saturating_sub(self, second) }
+ }
+
+ /// Lanewise absolute value, implemented in Rust.
+ /// Every lane becomes its absolute value.
+ ///
+ /// # Examples
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "std")] use core_simd::Simd;
+ /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
+ /// let xs = Simd::from_array([MIN, MIN +1, -5, 0]);
+ /// assert_eq!(xs.abs(), Simd::from_array([MIN, MAX, 5, 0]));
+ /// ```
+ #[inline]
+ pub fn abs(self) -> Self {
+ const SHR: $ty = <$ty>::BITS as $ty - 1;
+ let m = self >> Simd::splat(SHR);
+ (self^m) - m
+ }
+
+ /// Lanewise saturating absolute value, implemented in Rust.
+ /// As abs(), except the MIN value becomes MAX instead of itself.
+ ///
+ /// # Examples
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "std")] use core_simd::Simd;
+ /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
+ /// let xs = Simd::from_array([MIN, -2, 0, 3]);
+ /// let unsat = xs.abs();
+ /// let sat = xs.saturating_abs();
+ /// assert_eq!(unsat, Simd::from_array([MIN, 2, 0, 3]));
+ /// assert_eq!(sat, Simd::from_array([MAX, 2, 0, 3]));
+ /// ```
+ #[inline]
+ pub fn saturating_abs(self) -> Self {
+ // arith shift for -1 or 0 mask based on sign bit, giving 2s complement
+ const SHR: $ty = <$ty>::BITS as $ty - 1;
+ let m = self >> Simd::splat(SHR);
+ (self^m).saturating_sub(m)
+ }
+
+ /// Lanewise saturating negation, implemented in Rust.
+ /// As neg(), except the MIN value becomes MAX instead of itself.
+ ///
+ /// # Examples
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "std")] use core_simd::Simd;
+ /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
+ /// let x = Simd::from_array([MIN, -2, 3, MAX]);
+ /// let unsat = -x;
+ /// let sat = x.saturating_neg();
+ /// assert_eq!(unsat, Simd::from_array([MIN, 2, -3, MIN + 1]));
+ /// assert_eq!(sat, Simd::from_array([MAX, 2, -3, MIN + 1]));
+ /// ```
+ #[inline]
+ pub fn saturating_neg(self) -> Self {
+ Self::splat(0).saturating_sub(self)
+ }
+ })+
+ }
+}
+
+impl_uint_arith! { u8, u16, u32, u64, usize }
+impl_int_arith! { i8, i16, i32, i64, isize }
--- /dev/null
- // Division by zero is poison, according to LLVM.
- // So is dividing the MIN value of a signed integer by -1,
- // since that would return MAX + 1.
- // FIXME: Rust allows <SInt>::MIN / -1,
- // so we should probably figure out how to make that safe.
+use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+use core::ops::{Add, Mul};
+use core::ops::{BitAnd, BitOr, BitXor};
+use core::ops::{Div, Rem, Sub};
+use core::ops::{Shl, Shr};
+
+mod assign;
+mod deref;
+mod unary;
+
+impl<I, T, const LANES: usize> core::ops::Index<I> for Simd<T, LANES>
+where
+ T: SimdElement,
+ LaneCount<LANES>: SupportedLaneCount,
+ I: core::slice::SliceIndex<[T]>,
+{
+ type Output = I::Output;
+ fn index(&self, index: I) -> &Self::Output {
+ &self.as_array()[index]
+ }
+}
+
+impl<I, T, const LANES: usize> core::ops::IndexMut<I> for Simd<T, LANES>
+where
+ T: SimdElement,
+ LaneCount<LANES>: SupportedLaneCount,
+ I: core::slice::SliceIndex<[T]>,
+{
+ fn index_mut(&mut self, index: I) -> &mut Self::Output {
+ &mut self.as_mut_array()[index]
+ }
+}
+
+macro_rules! unsafe_base {
+ ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => {
+ unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) }
+ };
+}
+
+/// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic.
+/// It handles performing a bitand in addition to calling the shift operator, so that the result
+/// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if rhs >= <Int>::BITS
+/// At worst, this will maybe add another instruction and cycle,
+/// at best, it may open up more optimization opportunities,
+/// or simply be elided entirely, especially for SIMD ISAs which default to this.
+///
+// FIXME: Consider implementing this in cg_llvm instead?
+// cg_clif defaults to this, and scalar MIR shifts also default to wrapping
+macro_rules! wrap_bitshift {
+ ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => {
+ unsafe {
+ $crate::simd::intrinsics::$simd_call(
+ $lhs,
+ $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)),
+ )
+ }
+ };
+}
+
- const PANIC_OVERFLOW: &'static str = $overflow:literal;
++/// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic.
++/// It guards against LLVM's UB conditions for integer div or rem using masks and selects,
++/// thus guaranteeing a Rust value returns instead.
++///
++/// | | LLVM | Rust
++/// | :--------------: | :--- | :----------
++/// | N {/,%} 0 | UB | panic!()
++/// | <$int>::MIN / -1 | UB | <$int>::MIN
++/// | <$int>::MIN % -1 | UB | 0
++///
+macro_rules! int_divrem_guard {
+ ( $lhs:ident,
+ $rhs:ident,
+ { const PANIC_ZERO: &'static str = $zero:literal;
- } else if <$int>::MIN != 0
- && ($lhs.lanes_eq(Simd::splat(<$int>::MIN))
- // type inference can break here, so cut an SInt to size
- & $rhs.lanes_eq(Simd::splat(-1i64 as _))).any()
- {
- panic!($overflow);
+ $simd_call:ident
+ },
+ $int:ident ) => {
+ if $rhs.lanes_eq(Simd::splat(0)).any() {
+ panic!($zero);
- unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) }
+ } else {
- const PANIC_OVERFLOW: &'static str = "attempt to divide with overflow";
++ // Prevent otherwise-UB overflow on the MIN / -1 case.
++ let rhs = if <$int>::MIN != 0 {
++ // This should, at worst, optimize to a few branchless logical ops
++ // Ideally, this entire conditional should evaporate
++ // Fire LLVM and implement those manually if it doesn't get the hint
++ ($lhs.lanes_eq(Simd::splat(<$int>::MIN))
++ // type inference can break here, so cut an SInt to size
++ & $rhs.lanes_eq(Simd::splat(-1i64 as _)))
++ .select(Simd::splat(1), $rhs)
++ } else {
++ // Nice base case to make it easy to const-fold away the other branch.
++ $rhs
++ };
++ unsafe { $crate::simd::intrinsics::$simd_call($lhs, rhs) }
+ }
+ };
+}
+
+macro_rules! for_base_types {
+ ( T = ($($scalar:ident),*);
+ type Lhs = Simd<T, N>;
+ type Rhs = Simd<T, N>;
+ type Output = $out:ty;
+
+ impl $op:ident::$call:ident {
+ $macro_impl:ident $inner:tt
+ }) => {
+ $(
+ impl<const N: usize> $op<Self> for Simd<$scalar, N>
+ where
+ $scalar: SimdElement,
+ LaneCount<N>: SupportedLaneCount,
+ {
+ type Output = $out;
+
+ #[inline]
+ #[must_use = "operator returns a new vector without mutating the inputs"]
+ fn $call(self, rhs: Self) -> Self::Output {
+ $macro_impl!(self, rhs, $inner, $scalar)
+ }
+ })*
+ }
+}
+
+// A "TokenTree muncher": takes a set of scalar types `T = {};`
+// type parameters for the ops it implements, `Op::fn` names,
+// and a macro that expands into an expr, substituting in an intrinsic.
+// It passes that to for_base_types, which expands an impl for the types,
+// using the expanded expr in the function, and recurses with itself.
+//
+// tl;dr impls a set of ops::{Traits} for a set of types
+macro_rules! for_base_ops {
+ (
+ T = $types:tt;
+ type Lhs = Simd<T, N>;
+ type Rhs = Simd<T, N>;
+ type Output = $out:ident;
+ impl $op:ident::$call:ident
+ $inner:tt
+ $($rest:tt)*
+ ) => {
+ for_base_types! {
+ T = $types;
+ type Lhs = Simd<T, N>;
+ type Rhs = Simd<T, N>;
+ type Output = $out;
+ impl $op::$call
+ $inner
+ }
+ for_base_ops! {
+ T = $types;
+ type Lhs = Simd<T, N>;
+ type Rhs = Simd<T, N>;
+ type Output = $out;
+ $($rest)*
+ }
+ };
+ ($($done:tt)*) => {
+ // Done.
+ }
+}
+
+// Integers can always accept add, mul, sub, bitand, bitor, and bitxor.
+// For all of these operations, simd_* intrinsics apply wrapping logic.
+for_base_ops! {
+ T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize);
+ type Lhs = Simd<T, N>;
+ type Rhs = Simd<T, N>;
+ type Output = Self;
+
+ impl Add::add {
+ unsafe_base { simd_add }
+ }
+
+ impl Mul::mul {
+ unsafe_base { simd_mul }
+ }
+
+ impl Sub::sub {
+ unsafe_base { simd_sub }
+ }
+
+ impl BitAnd::bitand {
+ unsafe_base { simd_and }
+ }
+
+ impl BitOr::bitor {
+ unsafe_base { simd_or }
+ }
+
+ impl BitXor::bitxor {
+ unsafe_base { simd_xor }
+ }
+
+ impl Div::div {
+ int_divrem_guard {
+ const PANIC_ZERO: &'static str = "attempt to divide by zero";
- const PANIC_OVERFLOW: &'static str = "attempt to calculate the remainder with overflow";
+ simd_div
+ }
+ }
+
+ impl Rem::rem {
+ int_divrem_guard {
+ const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
+ simd_rem
+ }
+ }
+
+ // The only question is how to handle shifts >= <Int>::BITS?
+ // Our current solution uses wrapping logic.
+ impl Shl::shl {
+ wrap_bitshift { simd_shl }
+ }
+
+ impl Shr::shr {
+ wrap_bitshift {
+ // This automatically monomorphizes to lshr or ashr, depending,
+ // so it's fine to use it for both UInts and SInts.
+ simd_shr
+ }
+ }
+}
+
+// We don't need any special precautions here:
+// Floats always accept arithmetic ops, but may become NaN.
+for_base_ops! {
+ T = (f32, f64);
+ type Lhs = Simd<T, N>;
+ type Rhs = Simd<T, N>;
+ type Output = Self;
+
+ impl Add::add {
+ unsafe_base { simd_add }
+ }
+
+ impl Mul::mul {
+ unsafe_base { simd_mul }
+ }
+
+ impl Sub::sub {
+ unsafe_base { simd_sub }
+ }
+
+ impl Div::div {
+ unsafe_base { simd_div }
+ }
+
+ impl Rem::rem {
+ unsafe_base { simd_rem }
+ }
+}
--- /dev/null
+use crate::simd::intrinsics::{
+ simd_reduce_add_ordered, simd_reduce_and, simd_reduce_max, simd_reduce_min,
+ simd_reduce_mul_ordered, simd_reduce_or, simd_reduce_xor,
+};
+use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+use core::ops::{BitAnd, BitOr, BitXor};
+
+macro_rules! impl_integer_reductions {
+ { $scalar:ty } => {
+ impl<const LANES: usize> Simd<$scalar, LANES>
+ where
+ LaneCount<LANES>: SupportedLaneCount,
+ {
+ /// Horizontal wrapping add. Returns the sum of the lanes of the vector, with wrapping addition.
+ #[inline]
+ pub fn horizontal_sum(self) -> $scalar {
++ // Safety: `self` is an integer vector
+ unsafe { simd_reduce_add_ordered(self, 0) }
+ }
+
+ /// Horizontal wrapping multiply. Returns the product of the lanes of the vector, with wrapping multiplication.
+ #[inline]
+ pub fn horizontal_product(self) -> $scalar {
++ // Safety: `self` is an integer vector
+ unsafe { simd_reduce_mul_ordered(self, 1) }
+ }
+
+ /// Horizontal maximum. Returns the maximum lane in the vector.
+ #[inline]
+ pub fn horizontal_max(self) -> $scalar {
++ // Safety: `self` is an integer vector
+ unsafe { simd_reduce_max(self) }
+ }
+
+ /// Horizontal minimum. Returns the minimum lane in the vector.
+ #[inline]
+ pub fn horizontal_min(self) -> $scalar {
++ // Safety: `self` is an integer vector
+ unsafe { simd_reduce_min(self) }
+ }
+ }
+ }
+}
+
+impl_integer_reductions! { i8 }
+impl_integer_reductions! { i16 }
+impl_integer_reductions! { i32 }
+impl_integer_reductions! { i64 }
+impl_integer_reductions! { isize }
+impl_integer_reductions! { u8 }
+impl_integer_reductions! { u16 }
+impl_integer_reductions! { u32 }
+impl_integer_reductions! { u64 }
+impl_integer_reductions! { usize }
+
+macro_rules! impl_float_reductions {
+ { $scalar:ty } => {
+ impl<const LANES: usize> Simd<$scalar, LANES>
+ where
+ LaneCount<LANES>: SupportedLaneCount,
+ {
+
+ /// Horizontal add. Returns the sum of the lanes of the vector.
+ #[inline]
+ pub fn horizontal_sum(self) -> $scalar {
+ // LLVM sum is inaccurate on i586
+ if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
+ self.as_array().iter().sum()
+ } else {
++ // Safety: `self` is a float vector
+ unsafe { simd_reduce_add_ordered(self, 0.) }
+ }
+ }
+
+ /// Horizontal multiply. Returns the product of the lanes of the vector.
+ #[inline]
+ pub fn horizontal_product(self) -> $scalar {
+ // LLVM product is inaccurate on i586
+ if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
+ self.as_array().iter().product()
+ } else {
++ // Safety: `self` is a float vector
+ unsafe { simd_reduce_mul_ordered(self, 1.) }
+ }
+ }
+
+ /// Horizontal maximum. Returns the maximum lane in the vector.
+ ///
+ /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
+ /// return either. This function will not return `NaN` unless all lanes are `NaN`.
+ #[inline]
+ pub fn horizontal_max(self) -> $scalar {
++ // Safety: `self` is a float vector
+ unsafe { simd_reduce_max(self) }
+ }
+
+ /// Horizontal minimum. Returns the minimum lane in the vector.
+ ///
+ /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
+ /// return either. This function will not return `NaN` unless all lanes are `NaN`.
+ #[inline]
+ pub fn horizontal_min(self) -> $scalar {
++ // Safety: `self` is a float vector
+ unsafe { simd_reduce_min(self) }
+ }
+ }
+ }
+}
+
+impl_float_reductions! { f32 }
+impl_float_reductions! { f64 }
+
+impl<T, const LANES: usize> Simd<T, LANES>
+where
+ Self: BitAnd<Self, Output = Self>,
+ T: SimdElement + BitAnd<T, Output = T>,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ /// Horizontal bitwise "and". Returns the cumulative bitwise "and" across the lanes of
+ /// the vector.
+ #[inline]
+ pub fn horizontal_and(self) -> T {
+ unsafe { simd_reduce_and(self) }
+ }
+}
+
+impl<T, const LANES: usize> Simd<T, LANES>
+where
+ Self: BitOr<Self, Output = Self>,
+ T: SimdElement + BitOr<T, Output = T>,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ /// Horizontal bitwise "or". Returns the cumulative bitwise "or" across the lanes of
+ /// the vector.
+ #[inline]
+ pub fn horizontal_or(self) -> T {
+ unsafe { simd_reduce_or(self) }
+ }
+}
+
+impl<T, const LANES: usize> Simd<T, LANES>
+where
+ Self: BitXor<Self, Output = Self>,
+ T: SimdElement + BitXor<T, Output = T>,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ /// Horizontal bitwise "xor". Returns the cumulative bitwise "xor" across the lanes of
+ /// the vector.
+ #[inline]
+ pub fn horizontal_xor(self) -> T {
+ unsafe { simd_reduce_xor(self) }
+ }
+}
--- /dev/null
- use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+use crate::simd::intrinsics;
- $type:ty, $int_type:ty
++use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
++use core::convert::FloatToInt;
+
+macro_rules! implement {
+ {
- pub unsafe fn to_int_unchecked(self) -> Simd<$int_type, LANES> {
++ $type:ty
+ } => {
+ impl<const LANES: usize> Simd<$type, LANES>
+ where
+ LaneCount<LANES>: SupportedLaneCount,
+ {
+ /// Rounds toward zero and converts to the same-width integer type, assuming that
+ /// the value is finite and fits in that type.
+ ///
+ /// # Safety
+ /// The value must:
+ ///
+ /// * Not be NaN
+ /// * Not be infinite
+ /// * Be representable in the return type, after truncating off its fractional part
++ ///
++ /// If these requirements are infeasible or costly, consider using the safe function [cast],
++ /// which saturates on conversion.
++ ///
++ /// [cast]: Simd::cast
+ #[inline]
-
- /// Creates a floating-point vector from an integer vector. Rounds values that are
- /// not exactly representable.
- #[inline]
- pub fn round_from_int(value: Simd<$int_type, LANES>) -> Self {
- unsafe { intrinsics::simd_cast(value) }
- }
++ pub unsafe fn to_int_unchecked<I>(self) -> Simd<I, LANES>
++ where
++ $type: FloatToInt<I>,
++ I: SimdElement,
++ {
+ unsafe { intrinsics::simd_cast(self) }
+ }
- implement! { f32, i32 }
- implement! { f64, i64 }
+ }
+ }
+}
+
++implement! { f32 }
++implement! { f64 }
--- /dev/null
+use crate::simd::intrinsics;
+use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount};
+
+impl<T, const LANES: usize> Mask<T, LANES>
+where
+ T: MaskElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ /// Choose lanes from two vectors.
+ ///
+ /// For each lane in the mask, choose the corresponding lane from `true_values` if
+ /// that lane mask is true, and `false_values` if that lane mask is false.
+ ///
++ /// # Examples
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
+ /// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask};
+ /// let a = Simd::from_array([0, 1, 2, 3]);
+ /// let b = Simd::from_array([4, 5, 6, 7]);
+ /// let mask = Mask::from_array([true, false, false, true]);
+ /// let c = mask.select(a, b);
+ /// assert_eq!(c.to_array(), [0, 5, 6, 3]);
+ /// ```
+ #[inline]
+ #[must_use = "method returns a new vector and does not mutate the original inputs"]
+ pub fn select<U>(
+ self,
+ true_values: Simd<U, LANES>,
+ false_values: Simd<U, LANES>,
+ ) -> Simd<U, LANES>
+ where
+ U: SimdElement<Mask = T>,
+ {
++ // Safety: The mask has been cast to a vector of integers,
++ // and the operands to select between are vectors of the same type and length.
+ unsafe { intrinsics::simd_select(self.to_int(), true_values, false_values) }
+ }
+
+ /// Choose lanes from two masks.
+ ///
+ /// For each lane in the mask, choose the corresponding lane from `true_values` if
+ /// that lane mask is true, and `false_values` if that lane mask is false.
+ ///
++ /// # Examples
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "std")] use core_simd::Mask;
+ /// # #[cfg(not(feature = "std"))] use core::simd::Mask;
+ /// let a = Mask::<i32, 4>::from_array([true, true, false, false]);
+ /// let b = Mask::<i32, 4>::from_array([false, false, true, true]);
+ /// let mask = Mask::<i32, 4>::from_array([true, false, false, true]);
+ /// let c = mask.select_mask(a, b);
+ /// assert_eq!(c.to_array(), [true, false, true, false]);
+ /// ```
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original inputs"]
+ pub fn select_mask(self, true_values: Self, false_values: Self) -> Self {
+ self & true_values | !self & false_values
+ }
+}
--- /dev/null
+use crate::simd::intrinsics;
+use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+
+/// Constructs a new vector by selecting values from the lanes of the source vector or vectors to use.
+///
+/// When swizzling one vector, the indices of the result vector are indicated by a `const` array
+/// of `usize`, like [`Swizzle`].
+/// When swizzling two vectors, the indices are indicated by a `const` array of [`Which`], like
+/// [`Swizzle2`].
+///
+/// # Examples
+/// ## One source vector
+/// ```
+/// # #![feature(portable_simd)]
+/// # #[cfg(feature = "std")] use core_simd::{Simd, simd_swizzle};
+/// # #[cfg(not(feature = "std"))] use core::simd::{Simd, simd_swizzle};
+/// let v = Simd::<f32, 4>::from_array([0., 1., 2., 3.]);
+///
+/// // Keeping the same size
+/// let r = simd_swizzle!(v, [3, 0, 1, 2]);
+/// assert_eq!(r.to_array(), [3., 0., 1., 2.]);
+///
+/// // Changing the number of lanes
+/// let r = simd_swizzle!(v, [3, 1]);
+/// assert_eq!(r.to_array(), [3., 1.]);
+/// ```
+///
+/// ## Two source vectors
+/// ```
+/// # #![feature(portable_simd)]
+/// # #[cfg(feature = "std")] use core_simd::{Simd, simd_swizzle, Which};
+/// # #[cfg(not(feature = "std"))] use core::simd::{Simd, simd_swizzle, Which};
+/// use Which::*;
+/// let a = Simd::<f32, 4>::from_array([0., 1., 2., 3.]);
+/// let b = Simd::<f32, 4>::from_array([4., 5., 6., 7.]);
+///
+/// // Keeping the same size
+/// let r = simd_swizzle!(a, b, [First(0), First(1), Second(2), Second(3)]);
+/// assert_eq!(r.to_array(), [0., 1., 6., 7.]);
+///
+/// // Changing the number of lanes
+/// let r = simd_swizzle!(a, b, [First(0), Second(0)]);
+/// assert_eq!(r.to_array(), [0., 4.]);
+/// ```
+#[allow(unused_macros)]
+pub macro simd_swizzle {
+ (
+ $vector:expr, $index:expr $(,)?
+ ) => {
+ {
+ use $crate::simd::Swizzle;
+ struct Impl;
+ impl<const LANES: usize> Swizzle<LANES, {$index.len()}> for Impl {
+ const INDEX: [usize; {$index.len()}] = $index;
+ }
+ Impl::swizzle($vector)
+ }
+ },
+ (
+ $first:expr, $second:expr, $index:expr $(,)?
+ ) => {
+ {
+ use $crate::simd::{Which, Swizzle2};
+ struct Impl;
+ impl<const LANES: usize> Swizzle2<LANES, {$index.len()}> for Impl {
+ const INDEX: [Which; {$index.len()}] = $index;
+ }
+ Impl::swizzle2($first, $second)
+ }
+ }
+}
+
+/// An index into one of two vectors.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Which {
+ /// Indexes the first vector.
+ First(usize),
+ /// Indexes the second vector.
+ Second(usize),
+}
+
+/// Create a vector from the elements of another vector.
+pub trait Swizzle<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
+ /// Map from the lanes of the input vector to the output vector.
+ const INDEX: [usize; OUTPUT_LANES];
+
+ /// Create a new vector from the lanes of `vector`.
+ ///
+ /// Lane `i` of the output is `vector[Self::INDEX[i]]`.
+ #[inline]
+ #[must_use = "method returns a new vector and does not mutate the original inputs"]
+ fn swizzle<T>(vector: Simd<T, INPUT_LANES>) -> Simd<T, OUTPUT_LANES>
+ where
+ T: SimdElement,
+ LaneCount<INPUT_LANES>: SupportedLaneCount,
+ LaneCount<OUTPUT_LANES>: SupportedLaneCount,
+ {
++ // Safety: `vector` is a vector, and `INDEX_IMPL` is a const array of u32.
+ unsafe { intrinsics::simd_shuffle(vector, vector, Self::INDEX_IMPL) }
+ }
+}
+
+/// Create a vector from the elements of two other vectors.
+pub trait Swizzle2<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
+ /// Map from the lanes of the input vectors to the output vector
+ const INDEX: [Which; OUTPUT_LANES];
+
+ /// Create a new vector from the lanes of `first` and `second`.
+ ///
+ /// Lane `i` is `first[j]` when `Self::INDEX[i]` is `First(j)`, or `second[j]` when it is
+ /// `Second(j)`.
+ #[inline]
+ #[must_use = "method returns a new vector and does not mutate the original inputs"]
+ fn swizzle2<T>(
+ first: Simd<T, INPUT_LANES>,
+ second: Simd<T, INPUT_LANES>,
+ ) -> Simd<T, OUTPUT_LANES>
+ where
+ T: SimdElement,
+ LaneCount<INPUT_LANES>: SupportedLaneCount,
+ LaneCount<OUTPUT_LANES>: SupportedLaneCount,
+ {
++ // Safety: `first` and `second` are vectors, and `INDEX_IMPL` is a const array of u32.
+ unsafe { intrinsics::simd_shuffle(first, second, Self::INDEX_IMPL) }
+ }
+}
+
+/// The `simd_shuffle` intrinsic expects `u32`, so do error checking and conversion here.
+/// This trait hides `INDEX_IMPL` from the public API.
+trait SwizzleImpl<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
+ const INDEX_IMPL: [u32; OUTPUT_LANES];
+}
+
+impl<T, const INPUT_LANES: usize, const OUTPUT_LANES: usize> SwizzleImpl<INPUT_LANES, OUTPUT_LANES>
+ for T
+where
+ T: Swizzle<INPUT_LANES, OUTPUT_LANES> + ?Sized,
+{
+ const INDEX_IMPL: [u32; OUTPUT_LANES] = {
+ let mut output = [0; OUTPUT_LANES];
+ let mut i = 0;
+ while i < OUTPUT_LANES {
+ let index = Self::INDEX[i];
+ assert!(index as u32 as usize == index);
+ assert!(index < INPUT_LANES, "source lane exceeds input lane count",);
+ output[i] = index as u32;
+ i += 1;
+ }
+ output
+ };
+}
+
+/// The `simd_shuffle` intrinsic expects `u32`, so do error checking and conversion here.
+/// This trait hides `INDEX_IMPL` from the public API.
+trait Swizzle2Impl<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
+ const INDEX_IMPL: [u32; OUTPUT_LANES];
+}
+
+impl<T, const INPUT_LANES: usize, const OUTPUT_LANES: usize> Swizzle2Impl<INPUT_LANES, OUTPUT_LANES>
+ for T
+where
+ T: Swizzle2<INPUT_LANES, OUTPUT_LANES> + ?Sized,
+{
+ const INDEX_IMPL: [u32; OUTPUT_LANES] = {
+ let mut output = [0; OUTPUT_LANES];
+ let mut i = 0;
+ while i < OUTPUT_LANES {
+ let (offset, index) = match Self::INDEX[i] {
+ Which::First(index) => (false, index),
+ Which::Second(index) => (true, index),
+ };
+ assert!(index < INPUT_LANES, "source lane exceeds input lane count",);
+
+ // lanes are indexed by the first vector, then second vector
+ let index = if offset { index + INPUT_LANES } else { index };
+ assert!(index as u32 as usize == index);
+ output[i] = index as u32;
+ i += 1;
+ }
+ output
+ };
+}
+
+impl<T, const LANES: usize> Simd<T, LANES>
+where
+ T: SimdElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ /// Reverse the order of the lanes in the vector.
+ #[inline]
+ #[must_use = "method returns a new vector and does not mutate the original inputs"]
+ pub fn reverse(self) -> Self {
+ const fn reverse_index<const LANES: usize>() -> [usize; LANES] {
+ let mut index = [0; LANES];
+ let mut i = 0;
+ while i < LANES {
+ index[i] = LANES - i - 1;
+ i += 1;
+ }
+ index
+ }
+
+ struct Reverse;
+
+ impl<const LANES: usize> Swizzle<LANES, LANES> for Reverse {
+ const INDEX: [usize; LANES] = reverse_index::<LANES>();
+ }
+
+ Reverse::swizzle(self)
+ }
+
+ /// Rotates the vector such that the first `OFFSET` elements of the slice move to the end
+ /// while the last `LANES - OFFSET` elements move to the front. After calling `rotate_lanes_left`,
+ /// the element previously in lane `OFFSET` will become the first element in the slice.
+ #[inline]
+ #[must_use = "method returns a new vector and does not mutate the original inputs"]
+ pub fn rotate_lanes_left<const OFFSET: usize>(self) -> Self {
+ const fn rotate_index<const OFFSET: usize, const LANES: usize>() -> [usize; LANES] {
+ let offset = OFFSET % LANES;
+ let mut index = [0; LANES];
+ let mut i = 0;
+ while i < LANES {
+ index[i] = (i + offset) % LANES;
+ i += 1;
+ }
+ index
+ }
+
+ struct Rotate<const OFFSET: usize>;
+
+ impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<OFFSET> {
+ const INDEX: [usize; LANES] = rotate_index::<OFFSET, LANES>();
+ }
+
+ Rotate::<OFFSET>::swizzle(self)
+ }
+
+ /// Rotates the vector such that the first `LANES - OFFSET` elements of the vector move to
+ /// the end while the last `OFFSET` elements move to the front. After calling `rotate_lanes_right`,
+ /// the element previously at index `LANES - OFFSET` will become the first element in the slice.
+ #[inline]
+ #[must_use = "method returns a new vector and does not mutate the original inputs"]
+ pub fn rotate_lanes_right<const OFFSET: usize>(self) -> Self {
+ const fn rotate_index<const OFFSET: usize, const LANES: usize>() -> [usize; LANES] {
+ let offset = LANES - OFFSET % LANES;
+ let mut index = [0; LANES];
+ let mut i = 0;
+ while i < LANES {
+ index[i] = (i + offset) % LANES;
+ i += 1;
+ }
+ index
+ }
+
+ struct Rotate<const OFFSET: usize>;
+
+ impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<OFFSET> {
+ const INDEX: [usize; LANES] = rotate_index::<OFFSET, LANES>();
+ }
+
+ Rotate::<OFFSET>::swizzle(self)
+ }
+
+ /// Interleave two vectors.
+ ///
+ /// Produces two vectors with lanes taken alternately from `self` and `other`.
+ ///
+ /// The first result contains the first `LANES / 2` lanes from `self` and `other`,
+ /// alternating, starting with the first lane of `self`.
+ ///
+ /// The second result contains the last `LANES / 2` lanes from `self` and `other`,
+ /// alternating, starting with the lane `LANES / 2` from the start of `self`.
+ ///
+ /// ```
+ /// #![feature(portable_simd)]
+ /// # #[cfg(feature = "std")] use core_simd::Simd;
+ /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ /// let a = Simd::from_array([0, 1, 2, 3]);
+ /// let b = Simd::from_array([4, 5, 6, 7]);
+ /// let (x, y) = a.interleave(b);
+ /// assert_eq!(x.to_array(), [0, 4, 1, 5]);
+ /// assert_eq!(y.to_array(), [2, 6, 3, 7]);
+ /// ```
+ #[inline]
+ #[must_use = "method returns a new vector and does not mutate the original inputs"]
+ pub fn interleave(self, other: Self) -> (Self, Self) {
+ const fn lo<const LANES: usize>() -> [Which; LANES] {
+ let mut idx = [Which::First(0); LANES];
+ let mut i = 0;
+ while i < LANES {
+ let offset = i / 2;
+ idx[i] = if i % 2 == 0 {
+ Which::First(offset)
+ } else {
+ Which::Second(offset)
+ };
+ i += 1;
+ }
+ idx
+ }
+ const fn hi<const LANES: usize>() -> [Which; LANES] {
+ let mut idx = [Which::First(0); LANES];
+ let mut i = 0;
+ while i < LANES {
+ let offset = (LANES + i) / 2;
+ idx[i] = if i % 2 == 0 {
+ Which::First(offset)
+ } else {
+ Which::Second(offset)
+ };
+ i += 1;
+ }
+ idx
+ }
+
+ struct Lo;
+ struct Hi;
+
+ impl<const LANES: usize> Swizzle2<LANES, LANES> for Lo {
+ const INDEX: [Which; LANES] = lo::<LANES>();
+ }
+
+ impl<const LANES: usize> Swizzle2<LANES, LANES> for Hi {
+ const INDEX: [Which; LANES] = hi::<LANES>();
+ }
+
+ (Lo::swizzle2(self, other), Hi::swizzle2(self, other))
+ }
+
+ /// Deinterleave two vectors.
+ ///
+ /// The first result takes every other lane of `self` and then `other`, starting with
+ /// the first lane.
+ ///
+ /// The second result takes every other lane of `self` and then `other`, starting with
+ /// the second lane.
+ ///
+ /// ```
+ /// #![feature(portable_simd)]
+ /// # #[cfg(feature = "std")] use core_simd::Simd;
+ /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ /// let a = Simd::from_array([0, 4, 1, 5]);
+ /// let b = Simd::from_array([2, 6, 3, 7]);
+ /// let (x, y) = a.deinterleave(b);
+ /// assert_eq!(x.to_array(), [0, 1, 2, 3]);
+ /// assert_eq!(y.to_array(), [4, 5, 6, 7]);
+ /// ```
+ #[inline]
+ #[must_use = "method returns a new vector and does not mutate the original inputs"]
+ pub fn deinterleave(self, other: Self) -> (Self, Self) {
+ const fn even<const LANES: usize>() -> [Which; LANES] {
+ let mut idx = [Which::First(0); LANES];
+ let mut i = 0;
+ while i < LANES / 2 {
+ idx[i] = Which::First(2 * i);
+ idx[i + LANES / 2] = Which::Second(2 * i);
+ i += 1;
+ }
+ idx
+ }
+ const fn odd<const LANES: usize>() -> [Which; LANES] {
+ let mut idx = [Which::First(0); LANES];
+ let mut i = 0;
+ while i < LANES / 2 {
+ idx[i] = Which::First(2 * i + 1);
+ idx[i + LANES / 2] = Which::Second(2 * i + 1);
+ i += 1;
+ }
+ idx
+ }
+
+ struct Even;
+ struct Odd;
+
+ impl<const LANES: usize> Swizzle2<LANES, LANES> for Even {
+ const INDEX: [Which; LANES] = even::<LANES>();
+ }
+
+ impl<const LANES: usize> Swizzle2<LANES, LANES> for Odd {
+ const INDEX: [Which; LANES] = odd::<LANES>();
+ }
+
+ (Even::swizzle2(self, other), Odd::swizzle2(self, other))
+ }
+}
--- /dev/null
+macro_rules! impl_to_bytes {
+ { $ty:ty, $size:literal } => {
+ impl<const LANES: usize> crate::simd::Simd<$ty, LANES>
+ where
+ crate::simd::LaneCount<LANES>: crate::simd::SupportedLaneCount,
+ crate::simd::LaneCount<{{ $size * LANES }}>: crate::simd::SupportedLaneCount,
+ {
+ /// Return the memory representation of this integer as a byte array in native byte
+ /// order.
+ pub fn to_ne_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
++ // Safety: transmuting between vectors is safe
+ unsafe { core::mem::transmute_copy(&self) }
+ }
+
+ /// Create a native endian integer value from its memory representation as a byte array
+ /// in native endianness.
+ pub fn from_ne_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self {
++ // Safety: transmuting between vectors is safe
+ unsafe { core::mem::transmute_copy(&bytes) }
+ }
+ }
+ }
+}
+
+impl_to_bytes! { u8, 1 }
+impl_to_bytes! { u16, 2 }
+impl_to_bytes! { u32, 4 }
+impl_to_bytes! { u64, 8 }
+#[cfg(target_pointer_width = "32")]
+impl_to_bytes! { usize, 4 }
+#[cfg(target_pointer_width = "64")]
+impl_to_bytes! { usize, 8 }
+
+impl_to_bytes! { i8, 1 }
+impl_to_bytes! { i16, 2 }
+impl_to_bytes! { i32, 4 }
+impl_to_bytes! { i64, 8 }
+#[cfg(target_pointer_width = "32")]
+impl_to_bytes! { isize, 4 }
+#[cfg(target_pointer_width = "64")]
+impl_to_bytes! { isize, 8 }
--- /dev/null
- /// A SIMD vector of `LANES` elements of type `T`.
+mod float;
+mod int;
+mod uint;
+
+pub use float::*;
+pub use int::*;
+pub use uint::*;
+
+// Vectors of pointers are not for public use at the current time.
+pub(crate) mod ptr;
+
+use crate::simd::intrinsics;
+use crate::simd::{LaneCount, Mask, MaskElement, SupportedLaneCount};
+
- // SAFETY: We have masked-off out-of-bounds lanes.
++/// A SIMD vector of `LANES` elements of type `T`. `Simd<T, N>` has the same shape as [`[T; N]`](array), but operates like `T`.
++///
++/// Two vectors of the same type and length will, by convention, support the operators (+, *, etc.) that `T` does.
++/// These take the lanes at each index on the left-hand side and right-hand side, perform the operation,
++/// and return the result in the same lane in a vector of equal size. For a given operator, this is equivalent to zipping
++/// the two arrays together and mapping the operator over each lane.
++///
++/// ```rust
++/// # #![feature(array_zip, portable_simd)]
++/// # use core::simd::{Simd};
++/// let a0: [i32; 4] = [-2, 0, 2, 4];
++/// let a1 = [10, 9, 8, 7];
++/// let zm_add = a0.zip(a1).map(|(lhs, rhs)| lhs + rhs);
++/// let zm_mul = a0.zip(a1).map(|(lhs, rhs)| lhs * rhs);
++///
++/// // `Simd<T, N>` implements `From<[T; N]>
++/// let (v0, v1) = (Simd::from(a0), Simd::from(a1));
++/// // Which means arrays implement `Into<Simd<T, N>>`.
++/// assert_eq!(v0 + v1, zm_add.into());
++/// assert_eq!(v0 * v1, zm_mul.into());
++/// ```
++///
++/// `Simd` with integers has the quirk that these operations are also inherently wrapping, as if `T` was [`Wrapping<T>`].
++/// Thus, `Simd` does not implement `wrapping_add`, because that is the default behavior.
++/// This means there is no warning on overflows, even in "debug" builds.
++/// For most applications where `Simd` is appropriate, it is "not a bug" to wrap,
++/// and even "debug builds" are unlikely to tolerate the loss of performance.
++/// You may want to consider using explicitly checked arithmetic if such is required.
++/// Division by zero still causes a panic, so you may want to consider using floating point numbers if that is unacceptable.
++///
++/// [`Wrapping<T>`]: core::num::Wrapping
++///
++/// # Layout
++/// `Simd<T, N>` has a layout similar to `[T; N]` (identical "shapes"), but with a greater alignment.
++/// `[T; N]` is aligned to `T`, but `Simd<T, N>` will have an alignment based on both `T` and `N`.
++/// It is thus sound to [`transmute`] `Simd<T, N>` to `[T; N]`, and will typically optimize to zero cost,
++/// but the reverse transmutation is more likely to require a copy the compiler cannot simply elide.
++///
++/// # ABI "Features"
++/// Due to Rust's safety guarantees, `Simd<T, N>` is currently passed to and from functions via memory, not SIMD registers,
++/// except as an optimization. `#[inline]` hints are recommended on functions that accept `Simd<T, N>` or return it.
++/// The need for this may be corrected in the future.
++///
++/// # Safe SIMD with Unsafe Rust
++///
++/// Operations with `Simd` are typically safe, but there are many reasons to want to combine SIMD with `unsafe` code.
++/// Care must be taken to respect differences between `Simd` and other types it may be transformed into or derived from.
++/// In particular, the layout of `Simd<T, N>` may be similar to `[T; N]`, and may allow some transmutations,
++/// but references to `[T; N]` are not interchangeable with those to `Simd<T, N>`.
++/// Thus, when using `unsafe` Rust to read and write `Simd<T, N>` through [raw pointers], it is a good idea to first try with
++/// [`read_unaligned`] and [`write_unaligned`]. This is because:
++/// - [`read`] and [`write`] require full alignment (in this case, `Simd<T, N>`'s alignment)
++/// - the likely source for reading or destination for writing `Simd<T, N>` is [`[T]`](slice) and similar types, aligned to `T`
++/// - combining these actions would violate the `unsafe` contract and explode the program into a puff of **undefined behavior**
++/// - the compiler can implicitly adjust layouts to make unaligned reads or writes fully aligned if it sees the optimization
++/// - most contemporary processors suffer no performance penalty for "unaligned" reads and writes that are aligned at runtime
++///
++/// By imposing less obligations, unaligned functions are less likely to make the program unsound,
++/// and may be just as fast as stricter alternatives.
++/// When trying to guarantee alignment, [`[T]::as_simd`][as_simd] is an option for converting `[T]` to `[Simd<T, N>]`,
++/// and allows soundly operating on an aligned SIMD body, but it may cost more time when handling the scalar head and tail.
++/// If these are not sufficient, then it is most ideal to design data structures to be already aligned
++/// to the `Simd<T, N>` you wish to use before using `unsafe` Rust to read or write.
++/// More conventional ways to compensate for these facts, like materializing `Simd` to or from an array first,
++/// are handled by safe methods like [`Simd::from_array`] and [`Simd::from_slice`].
++///
++/// [`transmute`]: core::mem::transmute
++/// [raw pointers]: pointer
++/// [`read_unaligned`]: pointer::read_unaligned
++/// [`write_unaligned`]: pointer::write_unaligned
++/// [`read`]: pointer::read
++/// [`write`]: pointer::write
++/// [as_simd]: slice::as_simd
+#[repr(simd)]
+pub struct Simd<T, const LANES: usize>([T; LANES])
+where
+ T: SimdElement,
+ LaneCount<LANES>: SupportedLaneCount;
+
+impl<T, const LANES: usize> Simd<T, LANES>
+where
+ LaneCount<LANES>: SupportedLaneCount,
+ T: SimdElement,
+{
+ /// Number of lanes in this vector.
+ pub const LANES: usize = LANES;
+
+ /// Get the number of lanes in this vector.
+ pub const fn lanes(&self) -> usize {
+ LANES
+ }
+
+ /// Construct a SIMD vector by setting all lanes to the given value.
+ pub const fn splat(value: T) -> Self {
+ Self([value; LANES])
+ }
+
+ /// Returns an array reference containing the entire SIMD vector.
+ pub const fn as_array(&self) -> &[T; LANES] {
+ &self.0
+ }
+
+ /// Returns a mutable array reference containing the entire SIMD vector.
+ pub fn as_mut_array(&mut self) -> &mut [T; LANES] {
+ &mut self.0
+ }
+
+ /// Converts an array to a SIMD vector.
+ pub const fn from_array(array: [T; LANES]) -> Self {
+ Self(array)
+ }
+
+ /// Converts a SIMD vector to an array.
+ pub const fn to_array(self) -> [T; LANES] {
+ self.0
+ }
+
+ /// Converts a slice to a SIMD vector containing `slice[..LANES]`
+ /// # Panics
+ /// `from_slice` will panic if the slice's `len` is less than the vector's `Simd::LANES`.
+ #[must_use]
+ pub const fn from_slice(slice: &[T]) -> Self {
+ assert!(
+ slice.len() >= LANES,
+ "slice length must be at least the number of lanes"
+ );
+ let mut array = [slice[0]; LANES];
+ let mut i = 0;
+ while i < LANES {
+ array[i] = slice[i];
+ i += 1;
+ }
+ Self(array)
+ }
+
+ /// Performs lanewise conversion of a SIMD vector's elements to another SIMD-valid type.
+ /// This follows the semantics of Rust's `as` conversion for casting
+ /// integers to unsigned integers (interpreting as the other type, so `-1` to `MAX`),
+ /// and from floats to integers (truncating, or saturating at the limits) for each lane,
+ /// or vice versa.
+ ///
+ /// # Examples
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "std")] use core_simd::Simd;
+ /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ /// let floats: Simd<f32, 4> = Simd::from_array([1.9, -4.5, f32::INFINITY, f32::NAN]);
+ /// let ints = floats.cast::<i32>();
+ /// assert_eq!(ints, Simd::from_array([1, -4, i32::MAX, 0]));
+ ///
+ /// // Formally equivalent, but `Simd::cast` can optimize better.
+ /// assert_eq!(ints, Simd::from_array(floats.to_array().map(|x| x as i32)));
+ ///
+ /// // The float conversion does not round-trip.
+ /// let floats_again = ints.cast();
+ /// assert_ne!(floats, floats_again);
+ /// assert_eq!(floats_again, Simd::from_array([1.0, -4.0, 2147483647.0, 0.0]));
+ /// ```
+ #[must_use]
+ #[inline]
+ #[cfg(not(bootstrap))]
+ pub fn cast<U: SimdElement>(self) -> Simd<U, LANES> {
++ // Safety: The input argument is a vector of a known SIMD type.
+ unsafe { intrinsics::simd_as(self) }
+ }
+
+ /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
+ /// If an index is out-of-bounds, the lane is instead selected from the `or` vector.
+ ///
+ /// # Examples
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "std")] use core_simd::Simd;
+ /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+ /// let idxs = Simd::from_array([9, 3, 0, 5]);
+ /// let alt = Simd::from_array([-5, -4, -3, -2]);
+ ///
+ /// let result = Simd::gather_or(&vec, idxs, alt); // Note the lane that is out-of-bounds.
+ /// assert_eq!(result, Simd::from_array([-5, 13, 10, 15]));
+ /// ```
+ #[must_use]
+ #[inline]
+ pub fn gather_or(slice: &[T], idxs: Simd<usize, LANES>, or: Self) -> Self {
+ Self::gather_select(slice, Mask::splat(true), idxs, or)
+ }
+
+ /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
+ /// If an index is out-of-bounds, the lane is set to the default value for the type.
+ ///
+ /// # Examples
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "std")] use core_simd::Simd;
+ /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+ /// let idxs = Simd::from_array([9, 3, 0, 5]);
+ ///
+ /// let result = Simd::gather_or_default(&vec, idxs); // Note the lane that is out-of-bounds.
+ /// assert_eq!(result, Simd::from_array([0, 13, 10, 15]));
+ /// ```
+ #[must_use]
+ #[inline]
+ pub fn gather_or_default(slice: &[T], idxs: Simd<usize, LANES>) -> Self
+ where
+ T: Default,
+ {
+ Self::gather_or(slice, idxs, Self::splat(T::default()))
+ }
+
+ /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
+ /// The mask `enable`s all `true` lanes and disables all `false` lanes.
+ /// If an index is disabled or is out-of-bounds, the lane is selected from the `or` vector.
+ ///
+ /// # Examples
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
+ /// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask};
+ /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+ /// let idxs = Simd::from_array([9, 3, 0, 5]);
+ /// let alt = Simd::from_array([-5, -4, -3, -2]);
+ /// let enable = Mask::from_array([true, true, true, false]); // Note the mask of the last lane.
+ ///
+ /// let result = Simd::gather_select(&vec, enable, idxs, alt); // Note the lane that is out-of-bounds.
+ /// assert_eq!(result, Simd::from_array([-5, 13, 10, -2]));
+ /// ```
+ #[must_use]
+ #[inline]
+ pub fn gather_select(
+ slice: &[T],
+ enable: Mask<isize, LANES>,
+ idxs: Simd<usize, LANES>,
+ or: Self,
+ ) -> Self {
+ let enable: Mask<isize, LANES> = enable & idxs.lanes_lt(Simd::splat(slice.len()));
- // SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
++ // Safety: We have masked-off out-of-bounds lanes.
+ unsafe { Self::gather_select_unchecked(slice, enable, idxs, or) }
+ }
+
+ /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
+ /// The mask `enable`s all `true` lanes and disables all `false` lanes.
+ /// If an index is disabled, the lane is selected from the `or` vector.
+ ///
+ /// # Safety
+ ///
+ /// Calling this function with an `enable`d out-of-bounds index is *[undefined behavior]*
+ /// even if the resulting value is not used.
+ ///
+ /// # Examples
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
+ /// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask};
+ /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+ /// let idxs = Simd::from_array([9, 3, 0, 5]);
+ /// let alt = Simd::from_array([-5, -4, -3, -2]);
+ /// let enable = Mask::from_array([true, true, true, false]); // Note the final mask lane.
+ /// // If this mask was used to gather, it would be unsound. Let's fix that.
+ /// let enable = enable & idxs.lanes_lt(Simd::splat(vec.len()));
+ ///
+ /// // We have masked the OOB lane, so it's safe to gather now.
+ /// let result = unsafe { Simd::gather_select_unchecked(&vec, enable, idxs, alt) };
+ /// assert_eq!(result, Simd::from_array([-5, 13, 10, -2]));
+ /// ```
+ /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
+ #[must_use]
+ #[inline]
+ pub unsafe fn gather_select_unchecked(
+ slice: &[T],
+ enable: Mask<isize, LANES>,
+ idxs: Simd<usize, LANES>,
+ or: Self,
+ ) -> Self {
+ let base_ptr = crate::simd::ptr::SimdConstPtr::splat(slice.as_ptr());
+ // Ferris forgive me, I have done pointer arithmetic here.
+ let ptrs = base_ptr.wrapping_add(idxs);
- // SAFETY: We have masked-off out-of-bounds lanes.
++ // Safety: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
+ unsafe { intrinsics::simd_gather(or, ptrs, enable.to_int()) }
+ }
+
+ /// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`.
+ /// If two lanes in the scattered vector would write to the same index
+ /// only the last lane is guaranteed to actually be written.
+ ///
+ /// # Examples
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "std")] use core_simd::Simd;
+ /// # #[cfg(not(feature = "std"))] use core::simd::Simd;
+ /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+ /// let idxs = Simd::from_array([9, 3, 0, 0]);
+ /// let vals = Simd::from_array([-27, 82, -41, 124]);
+ ///
+ /// vals.scatter(&mut vec, idxs); // index 0 receives two writes.
+ /// assert_eq!(vec, vec![124, 11, 12, 82, 14, 15, 16, 17, 18]);
+ /// ```
+ #[inline]
+ pub fn scatter(self, slice: &mut [T], idxs: Simd<usize, LANES>) {
+ self.scatter_select(slice, Mask::splat(true), idxs)
+ }
+
+ /// Writes the values in a SIMD vector to multiple potentially discontiguous indices in `slice`.
+ /// The mask `enable`s all `true` lanes and disables all `false` lanes.
+ /// If an enabled index is out-of-bounds, the lane is not written.
+ /// If two enabled lanes in the scattered vector would write to the same index,
+ /// only the last lane is guaranteed to actually be written.
+ ///
+ /// # Examples
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
+ /// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask};
+ /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+ /// let idxs = Simd::from_array([9, 3, 0, 0]);
+ /// let vals = Simd::from_array([-27, 82, -41, 124]);
+ /// let enable = Mask::from_array([true, true, true, false]); // Note the mask of the last lane.
+ ///
+ /// vals.scatter_select(&mut vec, enable, idxs); // index 0's second write is masked, thus omitted.
+ /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]);
+ /// ```
+ #[inline]
+ pub fn scatter_select(
+ self,
+ slice: &mut [T],
+ enable: Mask<isize, LANES>,
+ idxs: Simd<usize, LANES>,
+ ) {
+ let enable: Mask<isize, LANES> = enable & idxs.lanes_lt(Simd::splat(slice.len()));
- // SAFETY: This block works with *mut T derived from &mut 'a [T],
++ // Safety: We have masked-off out-of-bounds lanes.
+ unsafe { self.scatter_select_unchecked(slice, enable, idxs) }
+ }
+
+ /// Writes the values in a SIMD vector to multiple potentially discontiguous indices in `slice`.
+ /// The mask `enable`s all `true` lanes and disables all `false` lanes.
+ /// If two enabled lanes in the scattered vector would write to the same index,
+ /// only the last lane is guaranteed to actually be written.
+ ///
+ /// # Safety
+ ///
+ /// Calling this function with an enabled out-of-bounds index is *[undefined behavior]*,
+ /// and may lead to memory corruption.
+ ///
+ /// # Examples
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
+ /// # #[cfg(not(feature = "std"))] use core::simd::{Simd, Mask};
+ /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
+ /// let idxs = Simd::from_array([9, 3, 0, 0]);
+ /// let vals = Simd::from_array([-27, 82, -41, 124]);
+ /// let enable = Mask::from_array([true, true, true, false]); // Note the mask of the last lane.
+ /// // If this mask was used to scatter, it would be unsound. Let's fix that.
+ /// let enable = enable & idxs.lanes_lt(Simd::splat(vec.len()));
+ ///
+ /// // We have masked the OOB lane, so it's safe to scatter now.
+ /// unsafe { vals.scatter_select_unchecked(&mut vec, enable, idxs); }
+ /// // index 0's second write is masked, thus was omitted.
+ /// assert_eq!(vec, vec![-41, 11, 12, 82, 14, 15, 16, 17, 18]);
+ /// ```
+ /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html
+ #[inline]
+ pub unsafe fn scatter_select_unchecked(
+ self,
+ slice: &mut [T],
+ enable: Mask<isize, LANES>,
+ idxs: Simd<usize, LANES>,
+ ) {
- /// SAFETY: This trait, when implemented, asserts the compiler can monomorphize
++ // Safety: This block works with *mut T derived from &mut 'a [T],
+ // which means it is delicate in Rust's borrowing model, circa 2021:
+ // &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts!
+ // Even though this block is largely safe methods, it must be exactly this way
+ // to prevent invalidating the raw ptrs while they're live.
+ // Thus, entering this block requires all values to use being already ready:
+ // 0. idxs we want to write to, which are used to construct the mask.
+ // 1. enable, which depends on an initial &'a [T] and the idxs.
+ // 2. actual values to scatter (self).
+ // 3. &mut [T] which will become our base ptr.
+ unsafe {
+ // Now Entering ☢️ *mut T Zone
+ let base_ptr = crate::simd::ptr::SimdMutPtr::splat(slice.as_mut_ptr());
+ // Ferris forgive me, I have done pointer arithmetic here.
+ let ptrs = base_ptr.wrapping_add(idxs);
+ // The ptrs have been bounds-masked to prevent memory-unsafe writes insha'allah
+ intrinsics::simd_scatter(self, ptrs, enable.to_int())
+ // Cleared ☢️ *mut T Zone
+ }
+ }
+}
+
+impl<T, const LANES: usize> Copy for Simd<T, LANES>
+where
+ T: SimdElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+impl<T, const LANES: usize> Clone for Simd<T, LANES>
+where
+ T: SimdElement,
+ LaneCount<LANES>: SupportedLaneCount,
+{
+ fn clone(&self) -> Self {
+ *self
+ }
+}
+
+impl<T, const LANES: usize> Default for Simd<T, LANES>
+where
+ LaneCount<LANES>: SupportedLaneCount,
+ T: SimdElement + Default,
+{
+ #[inline]
+ fn default() -> Self {
+ Self::splat(T::default())
+ }
+}
+
+impl<T, const LANES: usize> PartialEq for Simd<T, LANES>
+where
+ LaneCount<LANES>: SupportedLaneCount,
+ T: SimdElement + PartialEq,
+{
+ #[inline]
+ fn eq(&self, other: &Self) -> bool {
+ // TODO use SIMD equality
+ self.to_array() == other.to_array()
+ }
+}
+
+impl<T, const LANES: usize> PartialOrd for Simd<T, LANES>
+where
+ LaneCount<LANES>: SupportedLaneCount,
+ T: SimdElement + PartialOrd,
+{
+ #[inline]
+ fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+ // TODO use SIMD equality
+ self.to_array().partial_cmp(other.as_ref())
+ }
+}
+
+impl<T, const LANES: usize> Eq for Simd<T, LANES>
+where
+ LaneCount<LANES>: SupportedLaneCount,
+ T: SimdElement + Eq,
+{
+}
+
+impl<T, const LANES: usize> Ord for Simd<T, LANES>
+where
+ LaneCount<LANES>: SupportedLaneCount,
+ T: SimdElement + Ord,
+{
+ #[inline]
+ fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+ // TODO use SIMD equality
+ self.to_array().cmp(other.as_ref())
+ }
+}
+
+impl<T, const LANES: usize> core::hash::Hash for Simd<T, LANES>
+where
+ LaneCount<LANES>: SupportedLaneCount,
+ T: SimdElement + core::hash::Hash,
+{
+ #[inline]
+ fn hash<H>(&self, state: &mut H)
+ where
+ H: core::hash::Hasher,
+ {
+ self.as_array().hash(state)
+ }
+}
+
+// array references
+impl<T, const LANES: usize> AsRef<[T; LANES]> for Simd<T, LANES>
+where
+ LaneCount<LANES>: SupportedLaneCount,
+ T: SimdElement,
+{
+ #[inline]
+ fn as_ref(&self) -> &[T; LANES] {
+ &self.0
+ }
+}
+
+impl<T, const LANES: usize> AsMut<[T; LANES]> for Simd<T, LANES>
+where
+ LaneCount<LANES>: SupportedLaneCount,
+ T: SimdElement,
+{
+ #[inline]
+ fn as_mut(&mut self) -> &mut [T; LANES] {
+ &mut self.0
+ }
+}
+
+// slice references
+impl<T, const LANES: usize> AsRef<[T]> for Simd<T, LANES>
+where
+ LaneCount<LANES>: SupportedLaneCount,
+ T: SimdElement,
+{
+ #[inline]
+ fn as_ref(&self) -> &[T] {
+ &self.0
+ }
+}
+
+impl<T, const LANES: usize> AsMut<[T]> for Simd<T, LANES>
+where
+ LaneCount<LANES>: SupportedLaneCount,
+ T: SimdElement,
+{
+ #[inline]
+ fn as_mut(&mut self) -> &mut [T] {
+ &mut self.0
+ }
+}
+
+// vector/array conversion
+impl<T, const LANES: usize> From<[T; LANES]> for Simd<T, LANES>
+where
+ LaneCount<LANES>: SupportedLaneCount,
+ T: SimdElement,
+{
+ fn from(array: [T; LANES]) -> Self {
+ Self(array)
+ }
+}
+
+impl<T, const LANES: usize> From<Simd<T, LANES>> for [T; LANES]
+where
+ LaneCount<LANES>: SupportedLaneCount,
+ T: SimdElement,
+{
+ fn from(vector: Simd<T, LANES>) -> Self {
+ vector.to_array()
+ }
+}
+
+mod sealed {
+ pub trait Sealed {}
+}
+use sealed::Sealed;
+
+/// Marker trait for types that may be used as SIMD vector elements.
++///
++/// # Safety
++/// This trait, when implemented, asserts the compiler can monomorphize
+/// `#[repr(simd)]` structs with the marked type as an element.
+/// Strictly, it is valid to impl if the vector will not be miscompiled.
+/// Practically, it is user-unfriendly to impl it if the vector won't compile,
+/// even when no soundness guarantees are broken by allowing the user to try.
+pub unsafe trait SimdElement: Sealed + Copy {
+ /// The mask element type corresponding to this element type.
+ type Mask: MaskElement;
+}
+
+impl Sealed for u8 {}
+unsafe impl SimdElement for u8 {
+ type Mask = i8;
+}
+
+impl Sealed for u16 {}
+unsafe impl SimdElement for u16 {
+ type Mask = i16;
+}
+
+impl Sealed for u32 {}
+unsafe impl SimdElement for u32 {
+ type Mask = i32;
+}
+
+impl Sealed for u64 {}
+unsafe impl SimdElement for u64 {
+ type Mask = i64;
+}
+
+impl Sealed for usize {}
+unsafe impl SimdElement for usize {
+ type Mask = isize;
+}
+
+impl Sealed for i8 {}
+unsafe impl SimdElement for i8 {
+ type Mask = i8;
+}
+
+impl Sealed for i16 {}
+unsafe impl SimdElement for i16 {
+ type Mask = i16;
+}
+
+impl Sealed for i32 {}
+unsafe impl SimdElement for i32 {
+ type Mask = i32;
+}
+
+impl Sealed for i64 {}
+unsafe impl SimdElement for i64 {
+ type Mask = i64;
+}
+
+impl Sealed for isize {}
+unsafe impl SimdElement for isize {
+ type Mask = isize;
+}
+
+impl Sealed for f32 {}
+unsafe impl SimdElement for f32 {
+ type Mask = i32;
+}
+
+impl Sealed for f64 {}
+unsafe impl SimdElement for f64 {
+ type Mask = i64;
+}
--- /dev/null
+//! Private implementation details of public gather/scatter APIs.
+use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+use core::mem;
+
+/// A vector of *const T.
+#[derive(Debug, Copy, Clone)]
+#[repr(simd)]
+pub(crate) struct SimdConstPtr<T, const LANES: usize>([*const T; LANES]);
+
+impl<T, const LANES: usize> SimdConstPtr<T, LANES>
+where
+ LaneCount<LANES>: SupportedLaneCount,
+ T: Sized,
+{
+ #[inline]
+ #[must_use]
+ pub fn splat(ptr: *const T) -> Self {
+ Self([ptr; LANES])
+ }
+
+ #[inline]
+ #[must_use]
+ pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
++ // Safety: converting pointers to usize and vice-versa is safe
++ // (even if using that pointer is not)
+ unsafe {
+ let x: Simd<usize, LANES> = mem::transmute_copy(&self);
+ mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::<T>())) })
+ }
+ }
+}
+
+/// A vector of *mut T. Be very careful around potential aliasing.
+#[derive(Debug, Copy, Clone)]
+#[repr(simd)]
+pub(crate) struct SimdMutPtr<T, const LANES: usize>([*mut T; LANES]);
+
+impl<T, const LANES: usize> SimdMutPtr<T, LANES>
+where
+ LaneCount<LANES>: SupportedLaneCount,
+ T: Sized,
+{
+ #[inline]
+ #[must_use]
+ pub fn splat(ptr: *mut T) -> Self {
+ Self([ptr; LANES])
+ }
+
+ #[inline]
+ #[must_use]
+ pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
++ // Safety: converting pointers to usize and vice-versa is safe
++ // (even if using that pointer is not)
+ unsafe {
+ let x: Simd<usize, LANES> = mem::transmute_copy(&self);
+ mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::<T>())) })
+ }
+ }
+}
--- /dev/null
+/// Provides implementations of `From<$a> for $b` and `From<$b> for $a` that transmutes the value.
+#[allow(unused)]
+macro_rules! from_transmute {
+ { unsafe $a:ty => $b:ty } => {
+ from_transmute!{ @impl $a => $b }
+ from_transmute!{ @impl $b => $a }
+ };
+ { @impl $from:ty => $to:ty } => {
+ impl core::convert::From<$from> for $to {
+ #[inline]
+ fn from(value: $from) -> $to {
++ // Safety: transmuting between vectors is safe, but the caller of this macro
++ // checks the invariants
+ unsafe { core::mem::transmute(value) }
+ }
+ }
+ };
+}
+
+/// Conversions to x86's SIMD types.
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+mod x86;
+
+#[cfg(any(target_arch = "wasm32"))]
+mod wasm32;
+
+#[cfg(any(target_arch = "aarch64", target_arch = "arm",))]
+mod arm;
+
+#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
+mod powerpc;
--- /dev/null
- #[cfg(feature = "generic_const_exprs")]
+#![feature(portable_simd)]
+
+#[cfg(target_arch = "wasm32")]
+use wasm_bindgen_test::*;
+
+#[cfg(target_arch = "wasm32")]
+wasm_bindgen_test_configure!(run_in_browser);
+
+macro_rules! test_mask_api {
+ { $type:ident } => {
+ #[allow(non_snake_case)]
+ mod $type {
+ #[cfg(target_arch = "wasm32")]
+ use wasm_bindgen_test::*;
+
+ #[test]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn set_and_test() {
+ let values = [true, false, false, true, false, false, true, false];
+ let mut mask = core_simd::Mask::<$type, 8>::splat(false);
+ for (lane, value) in values.iter().copied().enumerate() {
+ mask.set(lane, value);
+ }
+ for (lane, value) in values.iter().copied().enumerate() {
+ assert_eq!(mask.test(lane), value);
+ }
+ }
+
+ #[test]
+ #[should_panic]
+ fn set_invalid_lane() {
+ let mut mask = core_simd::Mask::<$type, 8>::splat(false);
+ mask.set(8, true);
+ let _ = mask;
+ }
+
+ #[test]
+ #[should_panic]
+ fn test_invalid_lane() {
+ let mask = core_simd::Mask::<$type, 8>::splat(false);
+ let _ = mask.test(8);
+ }
+
+ #[test]
+ fn any() {
+ assert!(!core_simd::Mask::<$type, 8>::splat(false).any());
+ assert!(core_simd::Mask::<$type, 8>::splat(true).any());
+ let mut v = core_simd::Mask::<$type, 8>::splat(false);
+ v.set(2, true);
+ assert!(v.any());
+ }
+
+ #[test]
+ fn all() {
+ assert!(!core_simd::Mask::<$type, 8>::splat(false).all());
+ assert!(core_simd::Mask::<$type, 8>::splat(true).all());
+ let mut v = core_simd::Mask::<$type, 8>::splat(false);
+ v.set(2, true);
+ assert!(!v.all());
+ }
+
+ #[test]
+ fn roundtrip_int_conversion() {
+ let values = [true, false, false, true, false, false, true, false];
+ let mask = core_simd::Mask::<$type, 8>::from_array(values);
+ let int = mask.to_int();
+ assert_eq!(int.to_array(), [-1, 0, 0, -1, 0, 0, -1, 0]);
+ assert_eq!(core_simd::Mask::<$type, 8>::from_int(int), mask);
+ }
+
- assert_eq!(bitmask, [0b01001001, 0b10000011]);
+ #[test]
+ fn roundtrip_bitmask_conversion() {
++ use core_simd::ToBitMask;
+ let values = [
+ true, false, false, true, false, false, true, false,
+ true, true, false, false, false, false, false, true,
+ ];
+ let mask = core_simd::Mask::<$type, 16>::from_array(values);
+ let bitmask = mask.to_bitmask();
++ assert_eq!(bitmask, 0b1000001101001001);
+ assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask(bitmask), mask);
+ }
+ }
+ }
+}
+
+mod mask_api {
+ test_mask_api! { i8 }
+ test_mask_api! { i16 }
+ test_mask_api! { i32 }
+ test_mask_api! { i64 }
+ test_mask_api! { isize }
+}
+
+#[test]
+fn convert() {
+ let values = [true, false, false, true, false, false, true, false];
+ assert_eq!(
+ core_simd::Mask::<i8, 8>::from_array(values),
+ core_simd::Mask::<i32, 8>::from_array(values).into()
+ );
+}
--- /dev/null
- }
+/// Implements a test on a unary operation using proptest.
+///
+/// Compares the vector operation to the equivalent scalar operation.
+#[macro_export]
+macro_rules! impl_unary_op_test {
+ { $scalar:ty, $trait:ident :: $fn:ident, $scalar_fn:expr } => {
+ test_helpers::test_lanes! {
+ fn $fn<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &<core_simd::Simd<$scalar, LANES> as core::ops::$trait>::$fn,
+ &$scalar_fn,
+ &|_| true,
+ );
+ }
+ }
+ };
+ { $scalar:ty, $trait:ident :: $fn:ident } => {
+ impl_unary_op_test! { $scalar, $trait::$fn, <$scalar as core::ops::$trait>::$fn }
+ };
+}
+
+/// Implements a test on a binary operation using proptest.
+///
+/// Compares the vector operation to the equivalent scalar operation.
+#[macro_export]
+macro_rules! impl_binary_op_test {
+ { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident, $scalar_fn:expr } => {
+ mod $fn {
+ use super::*;
+ use core_simd::Simd;
+
+ test_helpers::test_lanes! {
+ fn normal<const LANES: usize>() {
+ test_helpers::test_binary_elementwise(
+ &<Simd<$scalar, LANES> as core::ops::$trait>::$fn,
+ &$scalar_fn,
+ &|_, _| true,
+ );
+ }
+
+ fn assign<const LANES: usize>() {
+ test_helpers::test_binary_elementwise(
+ &|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign>::$fn_assign(&mut a, b); a },
+ &$scalar_fn,
+ &|_, _| true,
+ );
+ }
+ }
+ }
+ };
+ { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident } => {
+ impl_binary_op_test! { $scalar, $trait::$fn, $trait_assign::$fn_assign, <$scalar as core::ops::$trait>::$fn }
+ };
+}
+
+/// Implements a test on a binary operation using proptest.
+///
+/// Like `impl_binary_op_test`, but allows providing a function for rejecting particular inputs
+/// (like the `proptest_assume` macro).
+///
+/// Compares the vector operation to the equivalent scalar operation.
+#[macro_export]
+macro_rules! impl_binary_checked_op_test {
+ { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident, $scalar_fn:expr, $check_fn:expr } => {
+ mod $fn {
+ use super::*;
+ use core_simd::Simd;
+
+ test_helpers::test_lanes! {
+ fn normal<const LANES: usize>() {
+ test_helpers::test_binary_elementwise(
+ &<Simd<$scalar, LANES> as core::ops::$trait>::$fn,
+ &$scalar_fn,
+ &|x, y| x.iter().zip(y.iter()).all(|(x, y)| $check_fn(*x, *y)),
+ );
+ }
+
+ fn assign<const LANES: usize>() {
+ test_helpers::test_binary_elementwise(
+ &|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign>::$fn_assign(&mut a, b); a },
+ &$scalar_fn,
+ &|x, y| x.iter().zip(y.iter()).all(|(x, y)| $check_fn(*x, *y)),
+ )
+ }
+ }
+ }
+ };
+ { $scalar:ty, $trait:ident :: $fn:ident, $trait_assign:ident :: $fn_assign:ident, $check_fn:expr } => {
+ impl_binary_checked_op_test! { $scalar, $trait::$fn, $trait_assign::$fn_assign, <$scalar as core::ops::$trait>::$fn, $check_fn }
+ };
+}
+
+#[macro_export]
+macro_rules! impl_common_integer_tests {
+ { $vector:ident, $scalar:ident } => {
+ test_helpers::test_lanes! {
+ fn horizontal_sum<const LANES: usize>() {
+ test_helpers::test_1(&|x| {
+ test_helpers::prop_assert_biteq! (
+ $vector::<LANES>::from_array(x).horizontal_sum(),
+ x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add),
+ );
+ Ok(())
+ });
+ }
+
+ fn horizontal_product<const LANES: usize>() {
+ test_helpers::test_1(&|x| {
+ test_helpers::prop_assert_biteq! (
+ $vector::<LANES>::from_array(x).horizontal_product(),
+ x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul),
+ );
+ Ok(())
+ });
+ }
+
+ fn horizontal_and<const LANES: usize>() {
+ test_helpers::test_1(&|x| {
+ test_helpers::prop_assert_biteq! (
+ $vector::<LANES>::from_array(x).horizontal_and(),
+ x.iter().copied().fold(-1i8 as $scalar, <$scalar as core::ops::BitAnd>::bitand),
+ );
+ Ok(())
+ });
+ }
+
+ fn horizontal_or<const LANES: usize>() {
+ test_helpers::test_1(&|x| {
+ test_helpers::prop_assert_biteq! (
+ $vector::<LANES>::from_array(x).horizontal_or(),
+ x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitOr>::bitor),
+ );
+ Ok(())
+ });
+ }
+
+ fn horizontal_xor<const LANES: usize>() {
+ test_helpers::test_1(&|x| {
+ test_helpers::prop_assert_biteq! (
+ $vector::<LANES>::from_array(x).horizontal_xor(),
+ x.iter().copied().fold(0 as $scalar, <$scalar as core::ops::BitXor>::bitxor),
+ );
+ Ok(())
+ });
+ }
+
+ fn horizontal_max<const LANES: usize>() {
+ test_helpers::test_1(&|x| {
+ test_helpers::prop_assert_biteq! (
+ $vector::<LANES>::from_array(x).horizontal_max(),
+ x.iter().copied().max().unwrap(),
+ );
+ Ok(())
+ });
+ }
+
+ fn horizontal_min<const LANES: usize>() {
+ test_helpers::test_1(&|x| {
+ test_helpers::prop_assert_biteq! (
+ $vector::<LANES>::from_array(x).horizontal_min(),
+ x.iter().copied().min().unwrap(),
+ );
+ Ok(())
+ });
+ }
+ }
+ }
+}
+
+/// Implement tests for signed integers.
+#[macro_export]
+macro_rules! impl_signed_tests {
+ { $scalar:tt } => {
+ mod $scalar {
+ type Vector<const LANES: usize> = core_simd::Simd<Scalar, LANES>;
+ type Scalar = $scalar;
+
+ impl_common_integer_tests! { Vector, Scalar }
+
+ test_helpers::test_lanes! {
+ fn neg<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &<Vector::<LANES> as core::ops::Neg>::neg,
+ &<Scalar as core::ops::Neg>::neg,
+ &|x| !x.contains(&Scalar::MIN),
+ );
+ }
+
+ fn is_positive<const LANES: usize>() {
+ test_helpers::test_unary_mask_elementwise(
+ &Vector::<LANES>::is_positive,
+ &Scalar::is_positive,
+ &|_| true,
+ );
+ }
+
+ fn is_negative<const LANES: usize>() {
+ test_helpers::test_unary_mask_elementwise(
+ &Vector::<LANES>::is_negative,
+ &Scalar::is_negative,
+ &|_| true,
+ );
+ }
+
+ fn signum<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &Vector::<LANES>::signum,
+ &Scalar::signum,
+ &|_| true,
+ )
+ }
+
- test_helpers::test_lanes_panic! {
- fn div_min_overflow_panics<const LANES: usize>() {
++ fn div_min_may_overflow<const LANES: usize>() {
++ let a = Vector::<LANES>::splat(Scalar::MIN);
++ let b = Vector::<LANES>::splat(-1);
++ assert_eq!(a / b, a);
++ }
+
- let _ = a / b;
++ fn rem_min_may_overflow<const LANES: usize>() {
+ let a = Vector::<LANES>::splat(Scalar::MIN);
+ let b = Vector::<LANES>::splat(-1);
- fn rem_min_overflow_panic<const LANES: usize>() {
- let a = Vector::<LANES>::splat(Scalar::MIN);
- let b = Vector::<LANES>::splat(-1);
- let _ = a % b;
- }
-
++ assert_eq!(a % b, Vector::<LANES>::splat(0));
+ }
+
++ }
++
++ test_helpers::test_lanes_panic! {
+ fn div_by_all_zeros_panics<const LANES: usize>() {
+ let a = Vector::<LANES>::splat(42);
+ let b = Vector::<LANES>::splat(0);
+ let _ = a / b;
+ }
+
+ fn div_by_one_zero_panics<const LANES: usize>() {
+ let a = Vector::<LANES>::splat(42);
+ let mut b = Vector::<LANES>::splat(21);
+ b[0] = 0 as _;
+ let _ = a / b;
+ }
+
+ fn rem_zero_panic<const LANES: usize>() {
+ let a = Vector::<LANES>::splat(42);
+ let b = Vector::<LANES>::splat(0);
+ let _ = a % b;
+ }
+ }
+
+ test_helpers::test_lanes! {
+ fn div_neg_one_no_panic<const LANES: usize>() {
+ let a = Vector::<LANES>::splat(42);
+ let b = Vector::<LANES>::splat(-1);
+ let _ = a / b;
+ }
+
+ fn rem_neg_one_no_panic<const LANES: usize>() {
+ let a = Vector::<LANES>::splat(42);
+ let b = Vector::<LANES>::splat(-1);
+ let _ = a % b;
+ }
+ }
+
+ impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign, Scalar::wrapping_add);
+ impl_binary_op_test!(Scalar, Sub::sub, SubAssign::sub_assign, Scalar::wrapping_sub);
+ impl_binary_op_test!(Scalar, Mul::mul, MulAssign::mul_assign, Scalar::wrapping_mul);
+
+ // Exclude Div and Rem panicking cases
+ impl_binary_checked_op_test!(Scalar, Div::div, DivAssign::div_assign, Scalar::wrapping_div, |x, y| y != 0 && !(x == Scalar::MIN && y == -1));
+ impl_binary_checked_op_test!(Scalar, Rem::rem, RemAssign::rem_assign, Scalar::wrapping_rem, |x, y| y != 0 && !(x == Scalar::MIN && y == -1));
+
+ impl_unary_op_test!(Scalar, Not::not);
+ impl_binary_op_test!(Scalar, BitAnd::bitand, BitAndAssign::bitand_assign);
+ impl_binary_op_test!(Scalar, BitOr::bitor, BitOrAssign::bitor_assign);
+ impl_binary_op_test!(Scalar, BitXor::bitxor, BitXorAssign::bitxor_assign);
+ }
+ }
+}
+
+/// Implement tests for unsigned integers.
+#[macro_export]
+macro_rules! impl_unsigned_tests {
+ { $scalar:tt } => {
+ mod $scalar {
+ type Vector<const LANES: usize> = core_simd::Simd<Scalar, LANES>;
+ type Scalar = $scalar;
+
+ impl_common_integer_tests! { Vector, Scalar }
+
+ test_helpers::test_lanes_panic! {
+ fn rem_zero_panic<const LANES: usize>() {
+ let a = Vector::<LANES>::splat(42);
+ let b = Vector::<LANES>::splat(0);
+ let _ = a % b;
+ }
+ }
+
+ impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign, Scalar::wrapping_add);
+ impl_binary_op_test!(Scalar, Sub::sub, SubAssign::sub_assign, Scalar::wrapping_sub);
+ impl_binary_op_test!(Scalar, Mul::mul, MulAssign::mul_assign, Scalar::wrapping_mul);
+
+ // Exclude Div and Rem panicking cases
+ impl_binary_checked_op_test!(Scalar, Div::div, DivAssign::div_assign, Scalar::wrapping_div, |_, y| y != 0);
+ impl_binary_checked_op_test!(Scalar, Rem::rem, RemAssign::rem_assign, Scalar::wrapping_rem, |_, y| y != 0);
+
+ impl_unary_op_test!(Scalar, Not::not);
+ impl_binary_op_test!(Scalar, BitAnd::bitand, BitAndAssign::bitand_assign);
+ impl_binary_op_test!(Scalar, BitOr::bitor, BitOrAssign::bitor_assign);
+ impl_binary_op_test!(Scalar, BitXor::bitxor, BitXorAssign::bitxor_assign);
+ }
+ }
+}
+
+/// Implement tests for floating point numbers.
+#[macro_export]
+macro_rules! impl_float_tests {
+ { $scalar:tt, $int_scalar:tt } => {
+ mod $scalar {
+ type Vector<const LANES: usize> = core_simd::Simd<Scalar, LANES>;
+ type Scalar = $scalar;
+
+ impl_unary_op_test!(Scalar, Neg::neg);
+ impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign);
+ impl_binary_op_test!(Scalar, Sub::sub, SubAssign::sub_assign);
+ impl_binary_op_test!(Scalar, Mul::mul, MulAssign::mul_assign);
+ impl_binary_op_test!(Scalar, Div::div, DivAssign::div_assign);
+ impl_binary_op_test!(Scalar, Rem::rem, RemAssign::rem_assign);
+
+ test_helpers::test_lanes! {
+ fn is_sign_positive<const LANES: usize>() {
+ test_helpers::test_unary_mask_elementwise(
+ &Vector::<LANES>::is_sign_positive,
+ &Scalar::is_sign_positive,
+ &|_| true,
+ );
+ }
+
+ fn is_sign_negative<const LANES: usize>() {
+ test_helpers::test_unary_mask_elementwise(
+ &Vector::<LANES>::is_sign_negative,
+ &Scalar::is_sign_negative,
+ &|_| true,
+ );
+ }
+
+ fn is_finite<const LANES: usize>() {
+ test_helpers::test_unary_mask_elementwise(
+ &Vector::<LANES>::is_finite,
+ &Scalar::is_finite,
+ &|_| true,
+ );
+ }
+
+ fn is_infinite<const LANES: usize>() {
+ test_helpers::test_unary_mask_elementwise(
+ &Vector::<LANES>::is_infinite,
+ &Scalar::is_infinite,
+ &|_| true,
+ );
+ }
+
+ fn is_nan<const LANES: usize>() {
+ test_helpers::test_unary_mask_elementwise(
+ &Vector::<LANES>::is_nan,
+ &Scalar::is_nan,
+ &|_| true,
+ );
+ }
+
+ fn is_normal<const LANES: usize>() {
+ test_helpers::test_unary_mask_elementwise(
+ &Vector::<LANES>::is_normal,
+ &Scalar::is_normal,
+ &|_| true,
+ );
+ }
+
+ fn is_subnormal<const LANES: usize>() {
+ test_helpers::test_unary_mask_elementwise(
+ &Vector::<LANES>::is_subnormal,
+ &Scalar::is_subnormal,
+ &|_| true,
+ );
+ }
+
+ fn abs<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &Vector::<LANES>::abs,
+ &Scalar::abs,
+ &|_| true,
+ )
+ }
+
+ fn recip<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &Vector::<LANES>::recip,
+ &Scalar::recip,
+ &|_| true,
+ )
+ }
+
+ fn to_degrees<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &Vector::<LANES>::to_degrees,
+ &Scalar::to_degrees,
+ &|_| true,
+ )
+ }
+
+ fn to_radians<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &Vector::<LANES>::to_radians,
+ &Scalar::to_radians,
+ &|_| true,
+ )
+ }
+
+ fn signum<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &Vector::<LANES>::signum,
+ &Scalar::signum,
+ &|_| true,
+ )
+ }
+
+ fn copysign<const LANES: usize>() {
+ test_helpers::test_binary_elementwise(
+ &Vector::<LANES>::copysign,
+ &Scalar::copysign,
+ &|_, _| true,
+ )
+ }
+
+ fn min<const LANES: usize>() {
+ // Regular conditions (both values aren't zero)
+ test_helpers::test_binary_elementwise(
+ &Vector::<LANES>::min,
+ &Scalar::min,
+ // Reject the case where both values are zero with different signs
+ &|a, b| {
+ for (a, b) in a.iter().zip(b.iter()) {
+ if *a == 0. && *b == 0. && a.signum() != b.signum() {
+ return false;
+ }
+ }
+ true
+ }
+ );
+
+ // Special case where both values are zero
+ let p_zero = Vector::<LANES>::splat(0.);
+ let n_zero = Vector::<LANES>::splat(-0.);
+ assert!(p_zero.min(n_zero).to_array().iter().all(|x| *x == 0.));
+ assert!(n_zero.min(p_zero).to_array().iter().all(|x| *x == 0.));
+ }
+
+ fn max<const LANES: usize>() {
+ // Regular conditions (both values aren't zero)
+ test_helpers::test_binary_elementwise(
+ &Vector::<LANES>::max,
+ &Scalar::max,
+ // Reject the case where both values are zero with different signs
+ &|a, b| {
+ for (a, b) in a.iter().zip(b.iter()) {
+ if *a == 0. && *b == 0. && a.signum() != b.signum() {
+ return false;
+ }
+ }
+ true
+ }
+ );
+
+ // Special case where both values are zero
+ let p_zero = Vector::<LANES>::splat(0.);
+ let n_zero = Vector::<LANES>::splat(-0.);
+ assert!(p_zero.max(n_zero).to_array().iter().all(|x| *x == 0.));
+ assert!(n_zero.max(p_zero).to_array().iter().all(|x| *x == 0.));
+ }
+
+ fn clamp<const LANES: usize>() {
+ test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| {
+ for (min, max) in min.iter_mut().zip(max.iter_mut()) {
+ if max < min {
+ core::mem::swap(min, max);
+ }
+ if min.is_nan() {
+ *min = Scalar::NEG_INFINITY;
+ }
+ if max.is_nan() {
+ *max = Scalar::INFINITY;
+ }
+ }
+
+ let mut result_scalar = [Scalar::default(); LANES];
+ for i in 0..LANES {
+ result_scalar[i] = value[i].clamp(min[i], max[i]);
+ }
+ let result_vector = Vector::from_array(value).clamp(min.into(), max.into()).to_array();
+ test_helpers::prop_assert_biteq!(result_scalar, result_vector);
+ Ok(())
+ })
+ }
+
+ fn horizontal_sum<const LANES: usize>() {
+ test_helpers::test_1(&|x| {
+ test_helpers::prop_assert_biteq! (
+ Vector::<LANES>::from_array(x).horizontal_sum(),
+ x.iter().sum(),
+ );
+ Ok(())
+ });
+ }
+
+ fn horizontal_product<const LANES: usize>() {
+ test_helpers::test_1(&|x| {
+ test_helpers::prop_assert_biteq! (
+ Vector::<LANES>::from_array(x).horizontal_product(),
+ x.iter().product(),
+ );
+ Ok(())
+ });
+ }
+
+ fn horizontal_max<const LANES: usize>() {
+ test_helpers::test_1(&|x| {
+ let vmax = Vector::<LANES>::from_array(x).horizontal_max();
+ let smax = x.iter().copied().fold(Scalar::NAN, Scalar::max);
+ // 0 and -0 are treated the same
+ if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) {
+ test_helpers::prop_assert_biteq!(vmax, smax);
+ }
+ Ok(())
+ });
+ }
+
+ fn horizontal_min<const LANES: usize>() {
+ test_helpers::test_1(&|x| {
+ let vmax = Vector::<LANES>::from_array(x).horizontal_min();
+ let smax = x.iter().copied().fold(Scalar::NAN, Scalar::min);
+ // 0 and -0 are treated the same
+ if !(x.contains(&0.) && x.contains(&-0.) && vmax.abs() == 0. && smax.abs() == 0.) {
+ test_helpers::prop_assert_biteq!(vmax, smax);
+ }
+ Ok(())
+ });
+ }
+ }
+
+ #[cfg(feature = "std")]
+ mod std {
+ use std_float::StdFloat;
+
+ use super::*;
+ test_helpers::test_lanes! {
+ fn sqrt<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &Vector::<LANES>::sqrt,
+ &Scalar::sqrt,
+ &|_| true,
+ )
+ }
+
+ fn mul_add<const LANES: usize>() {
+ test_helpers::test_ternary_elementwise(
+ &Vector::<LANES>::mul_add,
+ &Scalar::mul_add,
+ &|_, _, _| true,
+ )
+ }
+ }
+ }
+ }
+ }
+}
--- /dev/null
- fn from_int<const LANES: usize>() {
- test_helpers::test_unary_elementwise(
- &Vector::<LANES>::round_from_int,
- &|x| x as Scalar,
- &|_| true,
- )
- }
-
+#![feature(portable_simd)]
+
+macro_rules! float_rounding_test {
+ { $scalar:tt, $int_scalar:tt } => {
+ mod $scalar {
+ use std_float::StdFloat;
+
+ type Vector<const LANES: usize> = core_simd::Simd<$scalar, LANES>;
+ type Scalar = $scalar;
+ type IntScalar = $int_scalar;
+
+ #[cfg(feature = "std")]
+ test_helpers::test_lanes! {
+ fn ceil<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &Vector::<LANES>::ceil,
+ &Scalar::ceil,
+ &|_| true,
+ )
+ }
+
+ fn floor<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &Vector::<LANES>::floor,
+ &Scalar::floor,
+ &|_| true,
+ )
+ }
+
+ fn round<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &Vector::<LANES>::round,
+ &Scalar::round,
+ &|_| true,
+ )
+ }
+
+ fn trunc<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &Vector::<LANES>::trunc,
+ &Scalar::trunc,
+ &|_| true,
+ )
+ }
+
+ fn fract<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &Vector::<LANES>::fract,
+ &Scalar::fract,
+ &|_| true,
+ )
+ }
+ }
+
+ test_helpers::test_lanes! {
- let result_1 = unsafe { Vector::from_array(x).to_int_unchecked().to_array() };
+ fn to_int_unchecked<const LANES: usize>() {
+ // The maximum integer that can be represented by the equivalently sized float has
+ // all of the mantissa digits set to 1, pushed up to the MSB.
+ const ALL_MANTISSA_BITS: IntScalar = ((1 << <Scalar>::MANTISSA_DIGITS) - 1);
+ const MAX_REPRESENTABLE_VALUE: Scalar =
+ (ALL_MANTISSA_BITS << (core::mem::size_of::<Scalar>() * 8 - <Scalar>::MANTISSA_DIGITS as usize - 1)) as Scalar;
+
+ let mut runner = proptest::test_runner::TestRunner::default();
+ runner.run(
+ &test_helpers::array::UniformArrayStrategy::new(-MAX_REPRESENTABLE_VALUE..MAX_REPRESENTABLE_VALUE),
+ |x| {
- let mut result = [0; LANES];
++ let result_1 = unsafe { Vector::from_array(x).to_int_unchecked::<IntScalar>().to_array() };
+ let result_2 = {
- *o = unsafe { i.to_int_unchecked() };
++ let mut result: [IntScalar; LANES] = [0; LANES];
+ for (i, o) in x.iter().zip(result.iter_mut()) {
++ *o = unsafe { i.to_int_unchecked::<IntScalar>() };
+ }
+ result
+ };
+ test_helpers::prop_assert_biteq!(result_1, result_2);
+ Ok(())
+ },
+ ).unwrap();
+ }
+ }
+ }
+ }
+}
+
+float_rounding_test! { f32, i32 }
+float_rounding_test! { f64, i64 }