1 //! This is a copy of `core::hash::sip` adapted to providing 128 bit hashes.
11 #[derive(Debug, Clone)]
12 pub struct SipHasher128 {
15 length: usize, // how many bytes we've processed
16 state: State, // hash State
17 tail: u64, // unprocessed bytes le
18 ntail: usize, // how many bytes in tail are valid
21 #[derive(Debug, Clone, Copy)]
24 // v0, v2 and v1, v3 show up in pairs in the algorithm,
25 // and simd implementations of SipHash will use vectors
26 // of v02 and v13. By placing them in this order in the struct,
27 // the compiler can pick up on just a few simd optimizations by itself.
34 macro_rules! compress {
35 ($state:expr) => {{ compress!($state.v0, $state.v1, $state.v2, $state.v3) }};
36 ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
37 $v0 = $v0.wrapping_add($v1);
38 $v1 = $v1.rotate_left(13);
40 $v0 = $v0.rotate_left(32);
41 $v2 = $v2.wrapping_add($v3);
42 $v3 = $v3.rotate_left(16);
44 $v0 = $v0.wrapping_add($v3);
45 $v3 = $v3.rotate_left(21);
47 $v2 = $v2.wrapping_add($v1);
48 $v1 = $v1.rotate_left(17);
50 $v2 = $v2.rotate_left(32);
54 /// Loads an integer of the desired type from a byte stream, in LE order. Uses
55 /// `copy_nonoverlapping` to let the compiler generate the most efficient way
56 /// to load it from a possibly unaligned address.
58 /// Unsafe because: unchecked indexing at i..i+size_of(int_ty)
59 macro_rules! load_int_le {
60 ($buf:expr, $i:expr, $int_ty:ident) => {{
61 debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
62 let mut data = 0 as $int_ty;
63 ptr::copy_nonoverlapping(
64 $buf.get_unchecked($i),
65 &mut data as *mut _ as *mut u8,
66 mem::size_of::<$int_ty>(),
72 /// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
73 /// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
74 /// sizes and avoid calling `memcpy`, which is good for speed.
76 /// Unsafe because: unchecked indexing at start..start+len
78 unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
79 debug_assert!(len < 8);
80 let mut i = 0; // current byte index (from LSB) in the output u64
83 out = load_int_le!(buf, start + i, u32) as u64;
87 out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8);
91 out |= (*buf.get_unchecked(start + i) as u64) << (i * 8);
94 debug_assert_eq!(i, len);
100 pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher128 {
101 let mut state = SipHasher128 {
105 state: State { v0: 0, v1: 0, v2: 0, v3: 0 },
114 fn reset(&mut self) {
116 self.state.v0 = self.k0 ^ 0x736f6d6570736575;
117 self.state.v1 = self.k1 ^ 0x646f72616e646f6d;
118 self.state.v2 = self.k0 ^ 0x6c7967656e657261;
119 self.state.v3 = self.k1 ^ 0x7465646279746573;
122 // This is only done in the 128 bit version:
123 self.state.v1 ^= 0xee;
126 // A specialized write function for values with size <= 8.
128 // The hashing of multi-byte integers depends on endianness. E.g.:
129 // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
130 // - big-endian: `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
132 // This function does the right thing for little-endian hardware. On
133 // big-endian hardware `x` must be byte-swapped first to give the right
134 // behaviour. After any byte-swapping, the input must be zero-extended to
135 // 64-bits. The caller is responsible for the byte-swapping and
138 fn short_write<T>(&mut self, _x: T, x: u64) {
139 let size = mem::size_of::<T>();
142 // The original number must be zero-extended, not sign-extended.
143 debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true });
145 // The number of bytes needed to fill `self.tail`.
146 let needed = 8 - self.ntail;
148 // SipHash parses the input stream as 8-byte little-endian integers.
149 // Inputs are put into `self.tail` until 8 bytes of data have been
150 // collected, and then that word is processed.
152 // For example, imagine that `self.tail` is 0x0000_00EE_DDCC_BBAA,
153 // `self.ntail` is 5 (because 5 bytes have been put into `self.tail`),
154 // and `needed` is therefore 3.
156 // - Scenario 1, `self.write_u8(0xFF)`: we have already zero-extended
157 // the input to 0x0000_0000_0000_00FF. We now left-shift it five
158 // bytes, giving 0x0000_FF00_0000_0000. We then bitwise-OR that value
159 // into `self.tail`, resulting in 0x0000_FFEE_DDCC_BBAA.
160 // (Zero-extension of the original input is critical in this scenario
161 // because we don't want the high two bytes of `self.tail` to be
162 // touched by the bitwise-OR.) `self.tail` is not yet full, so we
163 // return early, after updating `self.ntail` to 6.
165 // - Scenario 2, `self.write_u32(0xIIHH_GGFF)`: we have already
166 // zero-extended the input to 0x0000_0000_IIHH_GGFF. We now
167 // left-shift it five bytes, giving 0xHHGG_FF00_0000_0000. We then
168 // bitwise-OR that value into `self.tail`, resulting in
169 // 0xHHGG_FFEE_DDCC_BBAA. `self.tail` is now full, and we can use it
170 // to update `self.state`. (As mentioned above, this assumes a
171 // little-endian machine; on a big-endian machine we would have
172 // byte-swapped 0xIIHH_GGFF in the caller, giving 0xFFGG_HHII, and we
173 // would then end up bitwise-ORing 0xGGHH_II00_0000_0000 into
176 self.tail |= x << (8 * self.ntail);
182 // `self.tail` is full, process it.
183 self.state.v3 ^= self.tail;
184 Sip24Rounds::c_rounds(&mut self.state);
185 self.state.v0 ^= self.tail;
187 // Continuing scenario 2: we have one byte left over from the input. We
188 // set `self.ntail` to 1 and `self.tail` to `0x0000_0000_IIHH_GGFF >>
189 // 8*3`, which is 0x0000_0000_0000_00II. (Or on a big-endian machine
190 // the prior byte-swapping would leave us with 0x0000_0000_0000_00FF.)
192 // The `if` is needed to avoid shifting by 64 bits, which Rust
194 self.ntail = size - needed;
195 self.tail = if needed < 8 { x >> (8 * needed) } else { 0 };
199 pub fn finish128(mut self) -> (u64, u64) {
200 let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
203 Sip24Rounds::c_rounds(&mut self.state);
206 self.state.v2 ^= 0xee;
207 Sip24Rounds::d_rounds(&mut self.state);
208 let _0 = self.state.v0 ^ self.state.v1 ^ self.state.v2 ^ self.state.v3;
210 self.state.v1 ^= 0xdd;
211 Sip24Rounds::d_rounds(&mut self.state);
212 let _1 = self.state.v0 ^ self.state.v1 ^ self.state.v2 ^ self.state.v3;
217 impl Hasher for SipHasher128 {
219 fn write_u8(&mut self, i: u8) {
220 self.short_write(i, i as u64);
224 fn write_u16(&mut self, i: u16) {
225 self.short_write(i, i.to_le() as u64);
229 fn write_u32(&mut self, i: u32) {
230 self.short_write(i, i.to_le() as u64);
234 fn write_u64(&mut self, i: u64) {
235 self.short_write(i, i.to_le() as u64);
239 fn write_usize(&mut self, i: usize) {
240 self.short_write(i, i.to_le() as u64);
244 fn write_i8(&mut self, i: i8) {
245 self.short_write(i, i as u8 as u64);
249 fn write_i16(&mut self, i: i16) {
250 self.short_write(i, (i as u16).to_le() as u64);
254 fn write_i32(&mut self, i: i32) {
255 self.short_write(i, (i as u32).to_le() as u64);
259 fn write_i64(&mut self, i: i64) {
260 self.short_write(i, (i as u64).to_le() as u64);
264 fn write_isize(&mut self, i: isize) {
265 self.short_write(i, (i as usize).to_le() as u64);
269 fn write(&mut self, msg: &[u8]) {
270 let length = msg.len();
271 self.length += length;
276 needed = 8 - self.ntail;
277 self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
279 self.ntail += length;
282 self.state.v3 ^= self.tail;
283 Sip24Rounds::c_rounds(&mut self.state);
284 self.state.v0 ^= self.tail;
289 // Buffered tail is now flushed, process new input.
290 let len = length - needed;
291 let left = len & 0x7;
294 while i < len - left {
295 let mi = unsafe { load_int_le!(msg, i, u64) };
298 Sip24Rounds::c_rounds(&mut self.state);
304 self.tail = unsafe { u8to64_le(msg, i, left) };
308 fn finish(&self) -> u64 {
309 panic!("SipHasher128 cannot provide valid 64 bit hashes")
313 #[derive(Debug, Clone, Default)]
318 fn c_rounds(state: &mut State) {
324 fn d_rounds(state: &mut State) {