1 //! This is a copy of `core::hash::sip` adapted to providing 128 bit hashes.
4 use std::mem::{self, MaybeUninit};
10 const ELEM_SIZE: usize = mem::size_of::<u64>();
11 const BUFFER_SIZE_ELEMS: usize = 8;
12 const BUFFER_SIZE_BYTES: usize = BUFFER_SIZE_ELEMS * ELEM_SIZE;
13 const BUFFER_SIZE_ELEMS_SPILL: usize = BUFFER_SIZE_ELEMS + 1;
14 const BUFFER_SIZE_BYTES_SPILL: usize = BUFFER_SIZE_ELEMS_SPILL * ELEM_SIZE;
15 const BUFFER_SPILL_INDEX: usize = BUFFER_SIZE_ELEMS_SPILL - 1;
17 #[derive(Debug, Clone)]
19 pub struct SipHasher128 {
20 // The access pattern during hashing consists of accesses to `nbuf` and
21 // `buf` until the buffer is full, followed by accesses to `state` and
22 // `processed`, and then repetition of that pattern until hashing is done.
23 // This is the basis for the ordering of fields below. However, in practice
24 // the cache miss-rate for data access is extremely low regardless of order.
25 nbuf: usize, // how many bytes in buf are valid
26 buf: [MaybeUninit<u64>; BUFFER_SIZE_ELEMS_SPILL], // unprocessed bytes le
27 state: State, // hash State
28 processed: usize, // how many bytes we've processed
31 #[derive(Debug, Clone, Copy)]
34 // v0, v2 and v1, v3 show up in pairs in the algorithm,
35 // and simd implementations of SipHash will use vectors
36 // of v02 and v13. By placing them in this order in the struct,
37 // the compiler can pick up on just a few simd optimizations by itself.
44 macro_rules! compress {
45 ($state:expr) => {{ compress!($state.v0, $state.v1, $state.v2, $state.v3) }};
46 ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
47 $v0 = $v0.wrapping_add($v1);
48 $v1 = $v1.rotate_left(13);
50 $v0 = $v0.rotate_left(32);
51 $v2 = $v2.wrapping_add($v3);
52 $v3 = $v3.rotate_left(16);
54 $v0 = $v0.wrapping_add($v3);
55 $v3 = $v3.rotate_left(21);
57 $v2 = $v2.wrapping_add($v1);
58 $v1 = $v1.rotate_left(17);
60 $v2 = $v2.rotate_left(32);
64 // Copies up to 8 bytes from source to destination. This performs better than
65 // `ptr::copy_nonoverlapping` on microbenchmarks and may perform better on real
66 // workloads since all of the copies have fixed sizes and avoid calling memcpy.
68 unsafe fn copy_nonoverlapping_small(src: *const u8, dst: *mut u8, count: usize) {
69 const COUNT_MAX: usize = 8;
70 debug_assert!(count <= COUNT_MAX);
72 if count == COUNT_MAX {
73 ptr::copy_nonoverlapping(src, dst, COUNT_MAX);
79 ptr::copy_nonoverlapping(src.add(i), dst.add(i), 4);
84 ptr::copy_nonoverlapping(src.add(i), dst.add(i), 2);
89 *dst.add(i) = *src.add(i);
93 debug_assert_eq!(i, count);
98 // This implementation uses buffering to reduce the hashing cost for inputs
99 // consisting of many small integers. Buffering simplifies the integration of
100 // integer input--the integer write function typically just appends to the
101 // buffer with a statically sized write, updates metadata, and returns.
103 // Buffering also prevents alternating between writes that do and do not trigger
104 // the hashing process. Only when the entire buffer is full do we transition
105 // into hashing. This allows us to keep the hash state in registers for longer,
106 // instead of loading and storing it before and after processing each element.
108 // When a write fills the buffer, a buffer processing function is invoked to
109 // hash all of the buffered input. The buffer processing functions are marked
110 // `#[inline(never)]` so that they aren't inlined into the append functions,
111 // which ensures the more frequently called append functions remain inlineable
112 // and don't include register pushing/popping that would only be made necessary
113 // by inclusion of the complex buffer processing path which uses those
116 // The buffer includes a "spill"--an extra element at the end--which simplifies
117 // the integer write buffer processing path. The value that fills the buffer can
118 // be written with a statically sized write that may spill over into the spill.
119 // After the buffer is processed, the part of the value that spilled over can
120 // written from the spill to the beginning of the buffer with another statically
121 // sized write. Due to static sizes, this scheme performs better than copying
122 // the exact number of bytes needed into the end and beginning of the buffer.
124 // The buffer is uninitialized, which improves performance, but may preclude
125 // efficient implementation of alternative approaches. The improvement is not so
126 // large that an alternative approach should be disregarded because it cannot be
127 // efficiently implemented with an uninitialized buffer. On the other hand, an
128 // uninitialized buffer may become more important should a larger one be used.
130 // # Platform Dependence
132 // The SipHash algorithm operates on byte sequences. It parses the input stream
133 // as 8-byte little-endian integers. Therefore, given the same byte sequence, it
134 // produces the same result on big- and little-endian hardware.
136 // However, the Hasher trait has methods which operate on multi-byte integers.
137 // How they are converted into byte sequences can be endian-dependent (by using
138 // native byte order) or independent (by consistently using either LE or BE byte
139 // order). It can also be `isize` and `usize` size dependent (by using the
140 // native size), or independent (by converting to a common size), supposing the
141 // values can be represented in 32 bits.
143 // In order to make `SipHasher128` consistent with `SipHasher` in libstd, we
144 // choose to do the integer to byte sequence conversion in the platform-
145 // dependent way. Clients can achieve (nearly) platform-independent hashing by
146 // widening `isize` and `usize` integers to 64 bits on 32-bit systems and
147 // byte-swapping integers on big-endian systems before passing them to the
148 // writing functions. This causes the input byte sequence to look identical on
149 // big- and little- endian systems (supposing `isize` and `usize` values can be
150 // represented in 32 bits), which ensures platform-independent results.
153 pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher128 {
154 let mut hasher = SipHasher128 {
156 buf: MaybeUninit::uninit_array(),
158 v0: key0 ^ 0x736f6d6570736575,
159 // The XOR with 0xee is only done on 128-bit algorithm version.
160 v1: key1 ^ (0x646f72616e646f6d ^ 0xee),
161 v2: key0 ^ 0x6c7967656e657261,
162 v3: key1 ^ 0x7465646279746573,
168 // Initialize spill because we read from it in `short_write_process_buffer`.
169 *hasher.buf.get_unchecked_mut(BUFFER_SPILL_INDEX) = MaybeUninit::zeroed();
175 // A specialized write function for values with size <= 8.
177 fn short_write<T>(&mut self, x: T) {
178 let size = mem::size_of::<T>();
179 let nbuf = self.nbuf;
180 debug_assert!(size <= 8);
181 debug_assert!(nbuf < BUFFER_SIZE_BYTES);
182 debug_assert!(nbuf + size < BUFFER_SIZE_BYTES_SPILL);
184 if nbuf + size < BUFFER_SIZE_BYTES {
186 // The memcpy call is optimized away because the size is known.
187 let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
188 ptr::copy_nonoverlapping(&x as *const _ as *const u8, dst, size);
191 self.nbuf = nbuf + size;
196 unsafe { self.short_write_process_buffer(x) }
199 // A specialized write function for values with size <= 8 that should only
200 // be called when the write would cause the buffer to fill.
202 // SAFETY: the write of `x` into `self.buf` starting at byte offset
203 // `self.nbuf` must cause `self.buf` to become fully initialized (and not
204 // overflow) if it wasn't already.
206 unsafe fn short_write_process_buffer<T>(&mut self, x: T) {
207 let size = mem::size_of::<T>();
208 let nbuf = self.nbuf;
209 debug_assert!(size <= 8);
210 debug_assert!(nbuf < BUFFER_SIZE_BYTES);
211 debug_assert!(nbuf + size >= BUFFER_SIZE_BYTES);
212 debug_assert!(nbuf + size < BUFFER_SIZE_BYTES_SPILL);
214 // Copy first part of input into end of buffer, possibly into spill
215 // element. The memcpy call is optimized away because the size is known.
216 let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
217 ptr::copy_nonoverlapping(&x as *const _ as *const u8, dst, size);
220 for i in 0..BUFFER_SIZE_ELEMS {
221 let elem = self.buf.get_unchecked(i).assume_init().to_le();
222 self.state.v3 ^= elem;
223 Sip24Rounds::c_rounds(&mut self.state);
224 self.state.v0 ^= elem;
227 // Copy remaining input into start of buffer by copying size - 1
228 // elements from spill (at most size - 1 bytes could have overflowed
229 // into the spill). The memcpy call is optimized away because the size
230 // is known. And the whole copy is optimized away for size == 1.
231 let src = self.buf.get_unchecked(BUFFER_SPILL_INDEX) as *const _ as *const u8;
232 ptr::copy_nonoverlapping(src, self.buf.as_mut_ptr() as *mut u8, size - 1);
234 // This function should only be called when the write fills the buffer.
235 // Therefore, when size == 1, the new `self.nbuf` must be zero. The size
236 // is statically known, so the branch is optimized away.
237 self.nbuf = if size == 1 { 0 } else { nbuf + size - BUFFER_SIZE_BYTES };
238 self.processed += BUFFER_SIZE_BYTES;
241 // A write function for byte slices.
243 fn slice_write(&mut self, msg: &[u8]) {
244 let length = msg.len();
245 let nbuf = self.nbuf;
246 debug_assert!(nbuf < BUFFER_SIZE_BYTES);
248 if nbuf + length < BUFFER_SIZE_BYTES {
250 let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
253 copy_nonoverlapping_small(msg.as_ptr(), dst, length);
255 // This memcpy is *not* optimized away.
256 ptr::copy_nonoverlapping(msg.as_ptr(), dst, length);
260 self.nbuf = nbuf + length;
265 unsafe { self.slice_write_process_buffer(msg) }
268 // A write function for byte slices that should only be called when the
269 // write would cause the buffer to fill.
271 // SAFETY: `self.buf` must be initialized up to the byte offset `self.nbuf`,
272 // and `msg` must contain enough bytes to initialize the rest of the element
273 // containing the byte offset `self.nbuf`.
275 unsafe fn slice_write_process_buffer(&mut self, msg: &[u8]) {
276 let length = msg.len();
277 let nbuf = self.nbuf;
278 debug_assert!(nbuf < BUFFER_SIZE_BYTES);
279 debug_assert!(nbuf + length >= BUFFER_SIZE_BYTES);
281 // Always copy first part of input into current element of buffer.
282 // This function should only be called when the write fills the buffer,
283 // so we know that there is enough input to fill the current element.
284 let valid_in_elem = nbuf % ELEM_SIZE;
285 let needed_in_elem = ELEM_SIZE - valid_in_elem;
287 let src = msg.as_ptr();
288 let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
289 copy_nonoverlapping_small(src, dst, needed_in_elem);
293 // Using `nbuf / ELEM_SIZE + 1` rather than `(nbuf + needed_in_elem) /
294 // ELEM_SIZE` to show the compiler that this loop's upper bound is > 0.
295 // We know that is true, because last step ensured we have a full
296 // element in the buffer.
297 let last = nbuf / ELEM_SIZE + 1;
300 let elem = self.buf.get_unchecked(i).assume_init().to_le();
301 self.state.v3 ^= elem;
302 Sip24Rounds::c_rounds(&mut self.state);
303 self.state.v0 ^= elem;
306 // Process the remaining element-sized chunks of input.
307 let mut processed = needed_in_elem;
308 let input_left = length - processed;
309 let elems_left = input_left / ELEM_SIZE;
310 let extra_bytes_left = input_left % ELEM_SIZE;
312 for _ in 0..elems_left {
313 let elem = (msg.as_ptr().add(processed) as *const u64).read_unaligned().to_le();
314 self.state.v3 ^= elem;
315 Sip24Rounds::c_rounds(&mut self.state);
316 self.state.v0 ^= elem;
317 processed += ELEM_SIZE;
320 // Copy remaining input into start of buffer.
321 let src = msg.as_ptr().add(processed);
322 let dst = self.buf.as_mut_ptr() as *mut u8;
323 copy_nonoverlapping_small(src, dst, extra_bytes_left);
325 self.nbuf = extra_bytes_left;
326 self.processed += nbuf + processed;
330 pub fn finish128(mut self) -> (u64, u64) {
331 debug_assert!(self.nbuf < BUFFER_SIZE_BYTES);
333 // Process full elements in buffer.
334 let last = self.nbuf / ELEM_SIZE;
336 // Since we're consuming self, avoid updating members for a potential
338 let mut state = self.state;
341 let elem = unsafe { self.buf.get_unchecked(i).assume_init().to_le() };
343 Sip24Rounds::c_rounds(&mut state);
347 // Get remaining partial element.
348 let elem = if self.nbuf % ELEM_SIZE != 0 {
350 // Ensure element is initialized by writing zero bytes. At most
351 // `ELEM_SIZE - 1` are required given the above check. It's safe
352 // to write this many because we have the spill and we maintain
353 // `self.nbuf` such that this write will start before the spill.
354 let dst = (self.buf.as_mut_ptr() as *mut u8).add(self.nbuf);
355 ptr::write_bytes(dst, 0, ELEM_SIZE - 1);
356 self.buf.get_unchecked(last).assume_init().to_le()
362 // Finalize the hash.
363 let length = self.processed + self.nbuf;
364 let b: u64 = ((length as u64 & 0xff) << 56) | elem;
367 Sip24Rounds::c_rounds(&mut state);
371 Sip24Rounds::d_rounds(&mut state);
372 let _0 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
375 Sip24Rounds::d_rounds(&mut state);
376 let _1 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
382 impl Hasher for SipHasher128 {
384 fn write_u8(&mut self, i: u8) {
389 fn write_u16(&mut self, i: u16) {
394 fn write_u32(&mut self, i: u32) {
399 fn write_u64(&mut self, i: u64) {
404 fn write_usize(&mut self, i: usize) {
409 fn write_i8(&mut self, i: i8) {
410 self.short_write(i as u8);
414 fn write_i16(&mut self, i: i16) {
415 self.short_write(i as u16);
419 fn write_i32(&mut self, i: i32) {
420 self.short_write(i as u32);
424 fn write_i64(&mut self, i: i64) {
425 self.short_write(i as u64);
429 fn write_isize(&mut self, i: isize) {
430 self.short_write(i as usize);
434 fn write(&mut self, msg: &[u8]) {
435 self.slice_write(msg);
438 fn finish(&self) -> u64 {
439 panic!("SipHasher128 cannot provide valid 64 bit hashes")
443 #[derive(Debug, Clone, Default)]
448 fn c_rounds(state: &mut State) {
454 fn d_rounds(state: &mut State) {