2 // Code ported from the `packed_simd` crate
3 // Run this code with `cargo test --example matrix_inversion`
4 #![feature(array_chunks, portable_simd)]
5 use core_simd::simd::*;
8 // Gotta define our own 4x4 matrix since Rust doesn't ship multidim arrays yet :^)
9 #[derive(Copy, Clone, Debug, PartialEq, PartialOrd)]
10 pub struct Matrix4x4([[f32; 4]; 4]);
12 #[allow(clippy::too_many_lines)]
13 pub fn scalar_inv4x4(m: Matrix4x4) -> Option<Matrix4x4> {
21 m[1][1] * m[2][2] * m[3][3] -
22 m[1][1] * m[2][3] * m[3][2] -
23 m[2][1] * m[1][2] * m[3][3] +
24 m[2][1] * m[1][3] * m[3][2] +
25 m[3][1] * m[1][2] * m[2][3] -
26 m[3][1] * m[1][3] * m[2][2],
28 -m[0][1] * m[2][2] * m[3][3] +
29 m[0][1] * m[2][3] * m[3][2] +
30 m[2][1] * m[0][2] * m[3][3] -
31 m[2][1] * m[0][3] * m[3][2] -
32 m[3][1] * m[0][2] * m[2][3] +
33 m[3][1] * m[0][3] * m[2][2],
35 m[0][1] * m[1][2] * m[3][3] -
36 m[0][1] * m[1][3] * m[3][2] -
37 m[1][1] * m[0][2] * m[3][3] +
38 m[1][1] * m[0][3] * m[3][2] +
39 m[3][1] * m[0][2] * m[1][3] -
40 m[3][1] * m[0][3] * m[1][2],
42 -m[0][1] * m[1][2] * m[2][3] +
43 m[0][1] * m[1][3] * m[2][2] +
44 m[1][1] * m[0][2] * m[2][3] -
45 m[1][1] * m[0][3] * m[2][2] -
46 m[2][1] * m[0][2] * m[1][3] +
47 m[2][1] * m[0][3] * m[1][2],
52 -m[1][0] * m[2][2] * m[3][3] +
53 m[1][0] * m[2][3] * m[3][2] +
54 m[2][0] * m[1][2] * m[3][3] -
55 m[2][0] * m[1][3] * m[3][2] -
56 m[3][0] * m[1][2] * m[2][3] +
57 m[3][0] * m[1][3] * m[2][2],
59 m[0][0] * m[2][2] * m[3][3] -
60 m[0][0] * m[2][3] * m[3][2] -
61 m[2][0] * m[0][2] * m[3][3] +
62 m[2][0] * m[0][3] * m[3][2] +
63 m[3][0] * m[0][2] * m[2][3] -
64 m[3][0] * m[0][3] * m[2][2],
66 -m[0][0] * m[1][2] * m[3][3] +
67 m[0][0] * m[1][3] * m[3][2] +
68 m[1][0] * m[0][2] * m[3][3] -
69 m[1][0] * m[0][3] * m[3][2] -
70 m[3][0] * m[0][2] * m[1][3] +
71 m[3][0] * m[0][3] * m[1][2],
73 m[0][0] * m[1][2] * m[2][3] -
74 m[0][0] * m[1][3] * m[2][2] -
75 m[1][0] * m[0][2] * m[2][3] +
76 m[1][0] * m[0][3] * m[2][2] +
77 m[2][0] * m[0][2] * m[1][3] -
78 m[2][0] * m[0][3] * m[1][2],
83 m[1][0] * m[2][1] * m[3][3] -
84 m[1][0] * m[2][3] * m[3][1] -
85 m[2][0] * m[1][1] * m[3][3] +
86 m[2][0] * m[1][3] * m[3][1] +
87 m[3][0] * m[1][1] * m[2][3] -
88 m[3][0] * m[1][3] * m[2][1],
90 -m[0][0] * m[2][1] * m[3][3] +
91 m[0][0] * m[2][3] * m[3][1] +
92 m[2][0] * m[0][1] * m[3][3] -
93 m[2][0] * m[0][3] * m[3][1] -
94 m[3][0] * m[0][1] * m[2][3] +
95 m[3][0] * m[0][3] * m[2][1],
97 m[0][0] * m[1][1] * m[3][3] -
98 m[0][0] * m[1][3] * m[3][1] -
99 m[1][0] * m[0][1] * m[3][3] +
100 m[1][0] * m[0][3] * m[3][1] +
101 m[3][0] * m[0][1] * m[1][3] -
102 m[3][0] * m[0][3] * m[1][1],
104 -m[0][0] * m[1][1] * m[2][3] +
105 m[0][0] * m[1][3] * m[2][1] +
106 m[1][0] * m[0][1] * m[2][3] -
107 m[1][0] * m[0][3] * m[2][1] -
108 m[2][0] * m[0][1] * m[1][3] +
109 m[2][0] * m[0][3] * m[1][1],
114 -m[1][0] * m[2][1] * m[3][2] +
115 m[1][0] * m[2][2] * m[3][1] +
116 m[2][0] * m[1][1] * m[3][2] -
117 m[2][0] * m[1][2] * m[3][1] -
118 m[3][0] * m[1][1] * m[2][2] +
119 m[3][0] * m[1][2] * m[2][1],
121 m[0][0] * m[2][1] * m[3][2] -
122 m[0][0] * m[2][2] * m[3][1] -
123 m[2][0] * m[0][1] * m[3][2] +
124 m[2][0] * m[0][2] * m[3][1] +
125 m[3][0] * m[0][1] * m[2][2] -
126 m[3][0] * m[0][2] * m[2][1],
128 -m[0][0] * m[1][1] * m[3][2] +
129 m[0][0] * m[1][2] * m[3][1] +
130 m[1][0] * m[0][1] * m[3][2] -
131 m[1][0] * m[0][2] * m[3][1] -
132 m[3][0] * m[0][1] * m[1][2] +
133 m[3][0] * m[0][2] * m[1][1],
135 m[0][0] * m[1][1] * m[2][2] -
136 m[0][0] * m[1][2] * m[2][1] -
137 m[1][0] * m[0][1] * m[2][2] +
138 m[1][0] * m[0][2] * m[2][1] +
139 m[2][0] * m[0][1] * m[1][2] -
140 m[2][0] * m[0][2] * m[1][1],
144 let det = m[0][0] * inv[0][0] + m[0][1] * inv[1][0] + m[0][2] * inv[2][0] + m[0][3] * inv[3][0];
149 let det_inv = 1. / det;
151 for row in &mut inv {
152 for elem in row.iter_mut() {
160 pub fn simd_inv4x4(m: Matrix4x4) -> Option<Matrix4x4> {
162 let m_0 = f32x4::from_array(m[0]);
163 let m_1 = f32x4::from_array(m[1]);
164 let m_2 = f32x4::from_array(m[2]);
165 let m_3 = f32x4::from_array(m[3]);
167 const SHUFFLE01: [Which; 4] = [First(0), First(1), Second(0), Second(1)];
168 const SHUFFLE02: [Which; 4] = [First(0), First(2), Second(0), Second(2)];
169 const SHUFFLE13: [Which; 4] = [First(1), First(3), Second(1), Second(3)];
170 const SHUFFLE23: [Which; 4] = [First(2), First(3), Second(2), Second(3)];
172 let tmp = simd_swizzle!(m_0, m_1, SHUFFLE01);
173 let row1 = simd_swizzle!(m_2, m_3, SHUFFLE01);
175 let row0 = simd_swizzle!(tmp, row1, SHUFFLE02);
176 let row1 = simd_swizzle!(row1, tmp, SHUFFLE13);
178 let tmp = simd_swizzle!(m_0, m_1, SHUFFLE23);
179 let row3 = simd_swizzle!(m_2, m_3, SHUFFLE23);
180 let row2 = simd_swizzle!(tmp, row3, SHUFFLE02);
181 let row3 = simd_swizzle!(row3, tmp, SHUFFLE13);
183 let tmp = (row2 * row3).reverse().rotate_lanes_right::<2>();
184 let minor0 = row1 * tmp;
185 let minor1 = row0 * tmp;
186 let tmp = tmp.rotate_lanes_right::<2>();
187 let minor0 = (row1 * tmp) - minor0;
188 let minor1 = (row0 * tmp) - minor1;
189 let minor1 = minor1.rotate_lanes_right::<2>();
191 let tmp = (row1 * row2).reverse().rotate_lanes_right::<2>();
192 let minor0 = (row3 * tmp) + minor0;
193 let minor3 = row0 * tmp;
194 let tmp = tmp.rotate_lanes_right::<2>();
196 let minor0 = minor0 - row3 * tmp;
197 let minor3 = row0 * tmp - minor3;
198 let minor3 = minor3.rotate_lanes_right::<2>();
200 let tmp = (row3 * row1.rotate_lanes_right::<2>())
202 .rotate_lanes_right::<2>();
203 let row2 = row2.rotate_lanes_right::<2>();
204 let minor0 = row2 * tmp + minor0;
205 let minor2 = row0 * tmp;
206 let tmp = tmp.rotate_lanes_right::<2>();
207 let minor0 = minor0 - row2 * tmp;
208 let minor2 = row0 * tmp - minor2;
209 let minor2 = minor2.rotate_lanes_right::<2>();
211 let tmp = (row0 * row1).reverse().rotate_lanes_right::<2>();
212 let minor2 = minor2 + row3 * tmp;
213 let minor3 = row2 * tmp - minor3;
214 let tmp = tmp.rotate_lanes_right::<2>();
215 let minor2 = row3 * tmp - minor2;
216 let minor3 = minor3 - row2 * tmp;
218 let tmp = (row0 * row3).reverse().rotate_lanes_right::<2>();
219 let minor1 = minor1 - row2 * tmp;
220 let minor2 = row1 * tmp + minor2;
221 let tmp = tmp.rotate_lanes_right::<2>();
222 let minor1 = row2 * tmp + minor1;
223 let minor2 = minor2 - row1 * tmp;
225 let tmp = (row0 * row2).reverse().rotate_lanes_right::<2>();
226 let minor1 = row3 * tmp + minor1;
227 let minor3 = minor3 - row1 * tmp;
228 let tmp = tmp.rotate_lanes_right::<2>();
229 let minor1 = minor1 - row3 * tmp;
230 let minor3 = row1 * tmp + minor3;
232 let det = row0 * minor0;
233 let det = det.rotate_lanes_right::<2>() + det;
234 let det = det.reverse().rotate_lanes_right::<2>() + det;
236 if det.reduce_sum() == 0. {
239 // calculate the reciprocal
240 let tmp = f32x4::splat(1.0) / det;
241 let det = tmp + tmp - det * tmp * tmp;
243 let res0 = minor0 * det;
244 let res1 = minor1 * det;
245 let res2 = minor2 * det;
246 let res3 = minor3 * det;
250 m[0] = res0.to_array();
251 m[1] = res1.to_array();
252 m[2] = res2.to_array();
253 m[3] = res3.to_array();
265 let tests: &[(Matrix4x4, Option<Matrix4x4>)] = &[
285 [16., 15., 14., 13.],
297 [-3., -0.5, 1.5, 1.0],
298 [ 1., 0.25, -0.25, -0.5],
299 [ 3., 0.25, -1.25, -0.5],
300 [-3., 0.0, 1.0, 1.0],
307 for &(input, output) in tests {
308 assert_eq!(scalar_inv4x4(input), output);
309 assert_eq!(simd_inv4x4(input), output);
315 // Empty main to make cargo happy