4 use std::arch::x86_64::*;
9 type Vf32: Copy + Debug + Add<Self::Vf32, Output = Self::Vf32> + Add<f32, Output = Self::Vf32>;
11 unsafe fn set1_ps(a: f32) -> Self::Vf32;
12 unsafe fn add_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32;
15 #[derive(Copy, Debug, Clone)]
16 pub struct F32x4(pub __m128);
18 impl Add<F32x4> for F32x4 {
21 fn add(self, rhs: F32x4) -> F32x4 {
22 F32x4(unsafe { _mm_add_ps(self.0, rhs.0) })
26 impl Add<f32> for F32x4 {
28 fn add(self, rhs: f32) -> F32x4 {
29 F32x4(unsafe { _mm_add_ps(self.0, _mm_set1_ps(rhs)) })
38 unsafe fn set1_ps(a: f32) -> Self::Vf32 {
43 unsafe fn add_ps(a: Self::Vf32, b: Self::Vf32) -> Self::Vf32 {
44 F32x4(_mm_add_ps(a.0, b.0))
48 unsafe fn test<S: Simd>() -> S::Vf32 {
49 let a = S::set1_ps(3.0);
50 let b = S::set1_ps(2.0);
56 println!("{:?}", unsafe { test::<Sse2>() });