3 // ignore-debug: the debug assertions get in the way
8 use std::ptr::{read, copy_nonoverlapping, write};
10 type KeccakBuffer = [[u64; 5]; 5];
12 // A basic read+copy+write swap implementation ends up copying one of the values
13 // to stack for large types, which is completely unnecessary as the lack of
14 // overlap means we can just do whatever fits in registers at a time.
16 // CHECK-LABEL: @swap_basic
18 pub fn swap_basic(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
19 // CHECK: alloca [5 x [5 x i64]]
21 // SAFETY: exclusive references are always valid to read/write,
22 // are non-overlapping, and nothing here panics so it's drop-safe.
25 copy_nonoverlapping(y, x, 1);
30 // This test verifies that the library does something smarter, and thus
31 // doesn't need any scratch space on the stack.
33 // CHECK-LABEL: @swap_std
35 pub fn swap_std(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
37 // CHECK: load <{{[0-9]+}} x i64>
38 // CHECK: store <{{[0-9]+}} x i64>
42 // Verify that types with usize alignment are swapped via vectored usizes,
43 // not falling back to byte-level code.
45 // CHECK-LABEL: @swap_slice
47 pub fn swap_slice(x: &mut [KeccakBuffer], y: &mut [KeccakBuffer]) {
49 // CHECK: load <{{[0-9]+}} x i64>
50 // CHECK: store <{{[0-9]+}} x i64>
51 if x.len() == y.len() {
56 // But for a large align-1 type, vectorized byte copying is what we want.
58 type OneKilobyteBuffer = [u8; 1024];
60 // CHECK-LABEL: @swap_1kb_slices
62 pub fn swap_1kb_slices(x: &mut [OneKilobyteBuffer], y: &mut [OneKilobyteBuffer]) {
64 // CHECK: load <{{[0-9]+}} x i8>
65 // CHECK: store <{{[0-9]+}} x i8>
66 if x.len() == y.len() {
71 // This verifies that the 2×read + 2×write optimizes to just 3 memcpys
72 // for an unusual type like this. It's not clear whether we should do anything
73 // smarter in Rust for these, so for now it's fine to leave these up to the backend.
74 // That's not as bad as it might seem, as for example, LLVM will lower the
75 // memcpys below to VMOVAPS on YMMs if one enables the AVX target feature.
76 // Eventually we'll be able to pass `align_of::<T>` to a const generic and
77 // thus pick a smarter chunk size ourselves without huge code duplication.
80 pub struct BigButHighlyAligned([u8; 64 * 3]);
82 // CHECK-LABEL: @swap_big_aligned
84 pub fn swap_big_aligned(x: &mut BigButHighlyAligned, y: &mut BigButHighlyAligned) {
85 // CHECK-NOT: call void @llvm.memcpy
86 // CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} noundef nonnull align 64 dereferenceable(192)
87 // CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} noundef nonnull align 64 dereferenceable(192)
88 // CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} noundef nonnull align 64 dereferenceable(192)
89 // CHECK-NOT: call void @llvm.memcpy