1 #![allow(non_camel_case_types)]
3 use crate::simd::intrinsics;
4 use crate::simd::{LaneCount, Mask, Simd, SupportedLaneCount};
6 /// Implements inherent methods for a float vector containing multiple
7 /// `$lanes` of float `$type`, which uses `$bits_ty` as its binary
9 macro_rules! impl_float_vector {
10 { $type:ty, $bits_ty:ty, $mask_ty:ty } => {
11 impl<const LANES: usize> Simd<$type, LANES>
13 LaneCount<LANES>: SupportedLaneCount,
15 /// Raw transmutation to an unsigned integer vector type with the
16 /// same size and number of lanes.
18 pub fn to_bits(self) -> Simd<$bits_ty, LANES> {
19 assert_eq!(core::mem::size_of::<Self>(), core::mem::size_of::<Simd<$bits_ty, LANES>>());
20 unsafe { core::mem::transmute_copy(&self) }
23 /// Raw transmutation from an unsigned integer vector type with the
24 /// same size and number of lanes.
26 pub fn from_bits(bits: Simd<$bits_ty, LANES>) -> Self {
27 assert_eq!(core::mem::size_of::<Self>(), core::mem::size_of::<Simd<$bits_ty, LANES>>());
28 unsafe { core::mem::transmute_copy(&bits) }
31 /// Produces a vector where every lane has the absolute value of the
32 /// equivalently-indexed lane in `self`.
34 pub fn abs(self) -> Self {
35 unsafe { intrinsics::simd_fabs(self) }
38 /// Fused multiply-add. Computes `(self * a) + b` with only one rounding error,
39 /// yielding a more accurate result than an unfused multiply-add.
41 /// Using `mul_add` *may* be more performant than an unfused multiply-add if the target
42 /// architecture has a dedicated `fma` CPU instruction. However, this is not always
43 /// true, and will be heavily dependent on designing algorithms with specific target
45 #[cfg(feature = "std")]
47 pub fn mul_add(self, a: Self, b: Self) -> Self {
48 unsafe { intrinsics::simd_fma(self, a, b) }
51 /// Produces a vector where every lane has the square root value
52 /// of the equivalently-indexed lane in `self`
54 #[cfg(feature = "std")]
55 pub fn sqrt(self) -> Self {
56 unsafe { intrinsics::simd_fsqrt(self) }
59 /// Takes the reciprocal (inverse) of each lane, `1/x`.
61 pub fn recip(self) -> Self {
62 Self::splat(1.0) / self
65 /// Converts each lane from radians to degrees.
67 pub fn to_degrees(self) -> Self {
68 // to_degrees uses a special constant for better precision, so extract that constant
69 self * Self::splat(<$type>::to_degrees(1.))
72 /// Converts each lane from degrees to radians.
74 pub fn to_radians(self) -> Self {
75 self * Self::splat(<$type>::to_radians(1.))
78 /// Returns true for each lane if it has a positive sign, including
79 /// `+0.0`, `NaN`s with positive sign bit and positive infinity.
81 pub fn is_sign_positive(self) -> Mask<$mask_ty, LANES> {
82 !self.is_sign_negative()
85 /// Returns true for each lane if it has a negative sign, including
86 /// `-0.0`, `NaN`s with negative sign bit and negative infinity.
88 pub fn is_sign_negative(self) -> Mask<$mask_ty, LANES> {
89 let sign_bits = self.to_bits() & Simd::splat((!0 >> 1) + 1);
90 sign_bits.lanes_gt(Simd::splat(0))
93 /// Returns true for each lane if its value is `NaN`.
95 pub fn is_nan(self) -> Mask<$mask_ty, LANES> {
99 /// Returns true for each lane if its value is positive infinity or negative infinity.
101 pub fn is_infinite(self) -> Mask<$mask_ty, LANES> {
102 self.abs().lanes_eq(Self::splat(<$type>::INFINITY))
105 /// Returns true for each lane if its value is neither infinite nor `NaN`.
107 pub fn is_finite(self) -> Mask<$mask_ty, LANES> {
108 self.abs().lanes_lt(Self::splat(<$type>::INFINITY))
111 /// Returns true for each lane if its value is subnormal.
113 pub fn is_subnormal(self) -> Mask<$mask_ty, LANES> {
114 self.abs().lanes_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(<$type>::INFINITY).to_bits()).lanes_eq(Simd::splat(0))
117 /// Returns true for each lane if its value is neither neither zero, infinite,
118 /// subnormal, or `NaN`.
120 pub fn is_normal(self) -> Mask<$mask_ty, LANES> {
121 !(self.abs().lanes_eq(Self::splat(0.0)) | self.is_nan() | self.is_subnormal() | self.is_infinite())
124 /// Replaces each lane with a number that represents its sign.
126 /// * `1.0` if the number is positive, `+0.0`, or `INFINITY`
127 /// * `-1.0` if the number is negative, `-0.0`, or `NEG_INFINITY`
128 /// * `NAN` if the number is `NAN`
130 pub fn signum(self) -> Self {
131 self.is_nan().select(Self::splat(<$type>::NAN), Self::splat(1.0).copysign(self))
134 /// Returns each lane with the magnitude of `self` and the sign of `sign`.
136 /// If any lane is a `NAN`, then a `NAN` with the sign of `sign` is returned.
138 pub fn copysign(self, sign: Self) -> Self {
139 let sign_bit = sign.to_bits() & Self::splat(-0.).to_bits();
140 let magnitude = self.to_bits() & !Self::splat(-0.).to_bits();
141 Self::from_bits(sign_bit | magnitude)
144 /// Returns the minimum of each lane.
146 /// If one of the values is `NAN`, then the other value is returned.
148 pub fn min(self, other: Self) -> Self {
149 // TODO consider using an intrinsic
150 self.is_nan().select(
152 self.lanes_ge(other).select(other, self)
156 /// Returns the maximum of each lane.
158 /// If one of the values is `NAN`, then the other value is returned.
160 pub fn max(self, other: Self) -> Self {
161 // TODO consider using an intrinsic
162 self.is_nan().select(
164 self.lanes_le(other).select(other, self)
168 /// Restrict each lane to a certain interval unless it is NaN.
170 /// For each lane in `self`, returns the corresponding lane in `max` if the lane is
171 /// greater than `max`, and the corresponding lane in `min` if the lane is less
172 /// than `min`. Otherwise returns the lane in `self`.
174 pub fn clamp(self, min: Self, max: Self) -> Self {
176 min.lanes_le(max).all(),
177 "each lane in `min` must be less than or equal to the corresponding lane in `max`",
180 x = x.lanes_lt(min).select(min, x);
181 x = x.lanes_gt(max).select(max, x);
188 impl_float_vector! { f32, u32, i32 }
189 impl_float_vector! { f64, u64, i64 }
191 /// Vector of two `f32` values
192 pub type f32x2 = Simd<f32, 2>;
194 /// Vector of four `f32` values
195 pub type f32x4 = Simd<f32, 4>;
197 /// Vector of eight `f32` values
198 pub type f32x8 = Simd<f32, 8>;
200 /// Vector of 16 `f32` values
201 pub type f32x16 = Simd<f32, 16>;
203 /// Vector of two `f64` values
204 pub type f64x2 = Simd<f64, 2>;
206 /// Vector of four `f64` values
207 pub type f64x4 = Simd<f64, 4>;
209 /// Vector of eight `f64` values
210 pub type f64x8 = Simd<f64, 8>;