1use crate::multiversion;
9use crate::multiversion::Version;
10use std::array;
11use std::sync::LazyLock;
12
13mod bruteforce;
14pub use bruteforce::find_hash_with_appended_count;
15
16#[cfg(test)]
17mod tests;
18
19pub static FASTEST: LazyLock<Version> = multiversion! { fastest(microbenchmark()) };
23
24#[must_use]
36pub fn hash(buf: &[u8]) -> [u32; 4] {
37 scalar::hash(buf)[0]
38}
39
40multiversion! {
41 use {crate::simd::*};
42
43 const ONE_BYTE: usize = U32Vector::LANES;
45 const TWO_BYTES: usize = 2 * U32Vector::LANES;
46 const THREE_BYTES: usize = 3 * U32Vector::LANES;
47 const FOUR_BYTES: usize = 4 * U32Vector::LANES;
48
49 #[must_use]
68 pub fn hash(mut buf: &[u8]) -> [[u32; 4]; U32Vector::LANES] {
69 assert_eq!(buf.len() % U32Vector::LANES, 0);
70 let bytes = buf.len() / U32Vector::LANES;
71
72 let mut state = [
73 U32Vector::splat(0x6745_2301),
74 U32Vector::splat(0xefcd_ab89),
75 U32Vector::splat(0x98ba_dcfe),
76 U32Vector::splat(0x1032_5476),
77 ];
78
79 let mut end_marker_written = false;
80 let mut bit_count_written = false;
81 while !bit_count_written {
82 let mut words = [U32Vector::splat(0); 16];
83
84 let remaining = (buf.len() / FOUR_BYTES).min(16);
85 for (w, chunk) in words.iter_mut().zip(buf.chunks_exact(FOUR_BYTES)) {
86 *w = gather(chunk.try_into().unwrap());
87 }
88 buf = &buf[remaining * FOUR_BYTES..];
89
90 if remaining < 16 {
91 if !end_marker_written {
92 words[remaining] = gather_remaining(buf);
94 buf = &[];
95 end_marker_written = true;
96 }
97
98 if !bit_count_written && remaining <= 13 {
99 let bits = bytes as u64 * 8;
100 words[14] = U32Vector::splat((bits & 0xFFFF_FFFF) as u32);
101 words[15] = U32Vector::splat((bits >> 32) as u32);
102 bit_count_written = true;
103 }
104 }
105
106 state = md5_block(state, &words);
107 }
108
109 let state: [[u32; U32Vector::LANES]; 4] = array::from_fn(|i| state[i].into());
111
112 array::from_fn(|i| {
113 [
114 state[0][i].swap_bytes(),
115 state[1][i].swap_bytes(),
116 state[2][i].swap_bytes(),
117 state[3][i].swap_bytes(),
118 ]
119 })
120 }
121
122 #[inline]
123 fn gather(buf: &[u8; FOUR_BYTES]) -> U32Vector {
124 let mut values = [0u32; U32Vector::LANES];
125 for (i, v) in values.iter_mut().enumerate() {
126 *v = u32::from_le_bytes([
127 buf[i], buf[ONE_BYTE + i], buf[TWO_BYTES + i], buf[THREE_BYTES + i]
128 ]);
129 }
130 values.into()
131 }
132
133 #[inline]
134 fn gather_remaining(buf: &[u8]) -> U32Vector {
135 match buf.len() {
136 THREE_BYTES => {
137 let mut values = [0u32; U32Vector::LANES];
138 for (i, v) in values.iter_mut().enumerate() {
139 *v = u32::from_le_bytes([buf[i], buf[ONE_BYTE + i], buf[TWO_BYTES + i], 0x80]);
140 }
141 values.into()
142 }
143 TWO_BYTES => {
144 let mut values = [0u32; U32Vector::LANES];
145 for (i, v) in values.iter_mut().enumerate() {
146 *v = u32::from_le_bytes([buf[i], buf[ONE_BYTE + i], 0x80, 0]);
147 }
148 values.into()
149 }
150 ONE_BYTE => {
151 let mut values = [0u32; U32Vector::LANES];
152 for (i, v) in values.iter_mut().enumerate() {
153 *v = u32::from_le_bytes([buf[i], 0x80, 0, 0]);
154 }
155 values.into()
156 }
157 0 => U32Vector::splat(0x80),
158 _ => unreachable!("less than 4 bytes left"),
159 }
160 }
161
162 #[expect(clippy::many_single_char_names)]
163 fn md5_block([a0, b0, c0, d0]: [U32Vector; 4], m: &[U32Vector; 16]) -> [U32Vector; 4] {
164 let (mut a, mut b, mut c, mut d) = (a0, b0, c0, d0);
165
166 a = md5_round(md5_f(b, c, d), a, b, m[0], 7, 0xd76a_a478);
167 d = md5_round(md5_f(a, b, c), d, a, m[1], 12, 0xe8c7_b756);
168 c = md5_round(md5_f(d, a, b), c, d, m[2], 17, 0x2420_70db);
169 b = md5_round(md5_f(c, d, a), b, c, m[3], 22, 0xc1bd_ceee);
170 a = md5_round(md5_f(b, c, d), a, b, m[4], 7, 0xf57c_0faf);
171 d = md5_round(md5_f(a, b, c), d, a, m[5], 12, 0x4787_c62a);
172 c = md5_round(md5_f(d, a, b), c, d, m[6], 17, 0xa830_4613);
173 b = md5_round(md5_f(c, d, a), b, c, m[7], 22, 0xfd46_9501);
174 a = md5_round(md5_f(b, c, d), a, b, m[8], 7, 0x6980_98d8);
175 d = md5_round(md5_f(a, b, c), d, a, m[9], 12, 0x8b44_f7af);
176 c = md5_round(md5_f(d, a, b), c, d, m[10], 17, 0xffff_5bb1);
177 b = md5_round(md5_f(c, d, a), b, c, m[11], 22, 0x895c_d7be);
178 a = md5_round(md5_f(b, c, d), a, b, m[12], 7, 0x6b90_1122);
179 d = md5_round(md5_f(a, b, c), d, a, m[13], 12, 0xfd98_7193);
180 c = md5_round(md5_f(d, a, b), c, d, m[14], 17, 0xa679_438e);
181 b = md5_round(md5_f(c, d, a), b, c, m[15], 22, 0x49b4_0821);
182
183 a = md5_round(md5_g(b, c, d), a, b, m[1], 5, 0xf61e_2562);
184 d = md5_round(md5_g(a, b, c), d, a, m[6], 9, 0xc040_b340);
185 c = md5_round(md5_g(d, a, b), c, d, m[11], 14, 0x265e_5a51);
186 b = md5_round(md5_g(c, d, a), b, c, m[0], 20, 0xe9b6_c7aa);
187 a = md5_round(md5_g(b, c, d), a, b, m[5], 5, 0xd62f_105d);
188 d = md5_round(md5_g(a, b, c), d, a, m[10], 9, 0x0244_1453);
189 c = md5_round(md5_g(d, a, b), c, d, m[15], 14, 0xd8a1_e681);
190 b = md5_round(md5_g(c, d, a), b, c, m[4], 20, 0xe7d3_fbc8);
191 a = md5_round(md5_g(b, c, d), a, b, m[9], 5, 0x21e1_cde6);
192 d = md5_round(md5_g(a, b, c), d, a, m[14], 9, 0xc337_07d6);
193 c = md5_round(md5_g(d, a, b), c, d, m[3], 14, 0xf4d5_0d87);
194 b = md5_round(md5_g(c, d, a), b, c, m[8], 20, 0x455a_14ed);
195 a = md5_round(md5_g(b, c, d), a, b, m[13], 5, 0xa9e3_e905);
196 d = md5_round(md5_g(a, b, c), d, a, m[2], 9, 0xfcef_a3f8);
197 c = md5_round(md5_g(d, a, b), c, d, m[7], 14, 0x676f_02d9);
198 b = md5_round(md5_g(c, d, a), b, c, m[12], 20, 0x8d2a_4c8a);
199
200 a = md5_round(md5_h(b, c, d), a, b, m[5], 4, 0xfffa_3942);
201 d = md5_round(md5_h(a, b, c), d, a, m[8], 11, 0x8771_f681);
202 c = md5_round(md5_h(d, a, b), c, d, m[11], 16, 0x6d9d_6122);
203 b = md5_round(md5_h(c, d, a), b, c, m[14], 23, 0xfde5_380c);
204 a = md5_round(md5_h(b, c, d), a, b, m[1], 4, 0xa4be_ea44);
205 d = md5_round(md5_h(a, b, c), d, a, m[4], 11, 0x4bde_cfa9);
206 c = md5_round(md5_h(d, a, b), c, d, m[7], 16, 0xf6bb_4b60);
207 b = md5_round(md5_h(c, d, a), b, c, m[10], 23, 0xbebf_bc70);
208 a = md5_round(md5_h(b, c, d), a, b, m[13], 4, 0x289b_7ec6);
209 d = md5_round(md5_h(a, b, c), d, a, m[0], 11, 0xeaa1_27fa);
210 c = md5_round(md5_h(d, a, b), c, d, m[3], 16, 0xd4ef_3085);
211 b = md5_round(md5_h(c, d, a), b, c, m[6], 23, 0x0488_1d05);
212 a = md5_round(md5_h(b, c, d), a, b, m[9], 4, 0xd9d4_d039);
213 d = md5_round(md5_h(a, b, c), d, a, m[12], 11, 0xe6db_99e5);
214 c = md5_round(md5_h(d, a, b), c, d, m[15], 16, 0x1fa2_7cf8);
215 b = md5_round(md5_h(c, d, a), b, c, m[2], 23, 0xc4ac_5665);
216
217 a = md5_round(md5_i(b, c, d), a, b, m[0], 6, 0xf429_2244);
218 d = md5_round(md5_i(a, b, c), d, a, m[7], 10, 0x432a_ff97);
219 c = md5_round(md5_i(d, a, b), c, d, m[14], 15, 0xab94_23a7);
220 b = md5_round(md5_i(c, d, a), b, c, m[5], 21, 0xfc93_a039);
221 a = md5_round(md5_i(b, c, d), a, b, m[12], 6, 0x655b_59c3);
222 d = md5_round(md5_i(a, b, c), d, a, m[3], 10, 0x8f0c_cc92);
223 c = md5_round(md5_i(d, a, b), c, d, m[10], 15, 0xffef_f47d);
224 b = md5_round(md5_i(c, d, a), b, c, m[1], 21, 0x8584_5dd1);
225 a = md5_round(md5_i(b, c, d), a, b, m[8], 6, 0x6fa8_7e4f);
226 d = md5_round(md5_i(a, b, c), d, a, m[15], 10, 0xfe2c_e6e0);
227 c = md5_round(md5_i(d, a, b), c, d, m[6], 15, 0xa301_4314);
228 b = md5_round(md5_i(c, d, a), b, c, m[13], 21, 0x4e08_11a1);
229 a = md5_round(md5_i(b, c, d), a, b, m[4], 6, 0xf753_7e82);
230 d = md5_round(md5_i(a, b, c), d, a, m[11], 10, 0xbd3a_f235);
231 c = md5_round(md5_i(d, a, b), c, d, m[2], 15, 0x2ad7_d2bb);
232 b = md5_round(md5_i(c, d, a), b, c, m[9], 21, 0xeb86_d391);
233
234 [a0 + a, b0 + b, c0 + c, d0 + d]
235 }
236
237 #[inline]
238 #[expect(clippy::many_single_char_names)]
239 fn md5_round(f: U32Vector, a: U32Vector, b: U32Vector, m: U32Vector, s: u32, k: u32) -> U32Vector {
240 (f + a + m + U32Vector::splat(k)).rotate_left(s) + b
241 }
242
243 #[inline]
244 fn md5_f(b: U32Vector, c: U32Vector, d: U32Vector) -> U32Vector {
245 (b & c) | (d & !b)
246 }
247
248 #[inline]
249 fn md5_g(b: U32Vector, c: U32Vector, d: U32Vector) -> U32Vector {
250 (d & b) | (c & !d)
251 }
252
253 #[inline]
254 fn md5_h(b: U32Vector, c: U32Vector, d: U32Vector) -> U32Vector {
255 b ^ c ^ d
256 }
257
258 #[inline]
259 fn md5_i(b: U32Vector, c: U32Vector, d: U32Vector) -> U32Vector {
260 c ^ (b | !d)
261 }
262
263 pub(super) fn microbenchmark() {
264 let bench_string = BENCH_STRING.as_flattened();
265 for chunk in bench_string.chunks(32 * U32Vector::LANES) {
266 for len in 1..=32 {
267 std::hint::black_box(hash(&chunk[..len * U32Vector::LANES]));
268 }
269 }
270 }
271}
272
273const BENCH_STRING: [[u8; 32]; 128] = {
274 let mut out = [*b"abcdefghijklmnopqrstuvwxyz012345"; 128];
275 let mut i = 0;
276
277 #[expect(clippy::cast_possible_truncation)]
278 while i < out.len() {
279 out[i][i % 32] = i as u8;
280 i += 1;
281 }
282
283 out
284};
285
286#[inline]
305#[must_use]
306pub fn to_hex([a, b, c, d]: [u32; 4]) -> [u8; 32] {
307 let mut result = [0u8; 32];
308 result[0..8].copy_from_slice(&u32_to_hex(a));
309 result[8..16].copy_from_slice(&u32_to_hex(b));
310 result[16..24].copy_from_slice(&u32_to_hex(c));
311 result[24..32].copy_from_slice(&u32_to_hex(d));
312 result
313}
314
315#[inline]
316fn u32_to_hex(n: u32) -> [u8; 8] {
317 const SPLAT: u64 = 0x0101_0101_0101_0101;
318
319 let mut n = u64::from(n);
320 n = ((n & 0x0000_0000_FFFF_0000) << 16) | (n & 0x0000_0000_0000_FFFF);
323 n = ((n & 0x0000_FF00_0000_FF00) << 8) | (n & 0x0000_00FF_0000_00FF);
326 n = ((n & 0x00F0_00F0_00F0_00F0) << 4) | (n & 0x000F_000F_000F_000F);
329 let letter_positions = (n + ((128 - 10) * SPLAT)) & (128 * SPLAT);
332 let letter_mask = letter_positions - (letter_positions >> 7);
335 let hex = (n + u64::from(b'0') * SPLAT) + (letter_mask & (u64::from(b'a' - b'0' - 10) * SPLAT));
338 hex.to_be_bytes()
341}