utils/md5/
mod.rs

1//! Implementation of the MD5 hash function.
2//!
3//! **WARNING: Don't use MD5 for anything remotely security-sensitive!**
4//! This implementation is meant to be used for Advent of Code puzzles only.
5//!
6//! The vectorized versions hash multiple inputs of the same length at once, which provides a
7//! significant performance increase for the brute force puzzle solutions.
8use crate::multiversion;
9use crate::multiversion::Version;
10use std::array;
11use std::sync::LazyLock;
12
13mod bruteforce;
14pub use bruteforce::find_hash_with_appended_count;
15
16#[cfg(test)]
17mod tests;
18
19/// Fastest supported implementation, for dynamic dispatch.
20///
21/// Determined using a small microbenchmark at runtime.
22pub static FASTEST: LazyLock<Version> = multiversion! { fastest(microbenchmark()) };
23
24/// Returns the MD5 hash of the input slice.
25///
26/// Wrapper around the [`scalar`] implementation.
27///
28/// # Examples
29///
30/// ```
31/// # use utils::md5::hash;
32/// assert_eq!(hash(b"").as_slice(), &[0xd41d8cd9, 0x8f00b204, 0xe9800998, 0xecf8427e]);
33/// assert_eq!(hash(b"Hello World").as_slice(), &[0xb10a8db1, 0x64e07541, 0x05b7a99b, 0xe72e3fe5]);
34/// ```
35#[must_use]
36pub fn hash(buf: &[u8]) -> [u32; 4] {
37    scalar::hash(buf)[0]
38}
39
40multiversion! {
41    use {crate::simd::*};
42
43    // The length of 1/2/3/4 bytes for each lane
44    const ONE_BYTE: usize = U32Vector::LANES;
45    const TWO_BYTES: usize = 2 * U32Vector::LANES;
46    const THREE_BYTES: usize = 3 * U32Vector::LANES;
47    const FOUR_BYTES: usize = 4 * U32Vector::LANES;
48
49    /// [`multiversion!`] MD5 hash implementation.
50    ///
51    /// The bytes for each lane must be interweaved, and each lane must be the same length.
52    ///
53    /// # Examples
54    ///
55    /// For [`array128`](crate::simd::array128) with four lanes:
56    /// ```
57    /// # use utils::md5::{self, array128};
58    /// assert_eq!(
59    ///     array128::hash(b"hwafeobglrchlldiodej"),
60    ///     [
61    ///         md5::hash(b"hello"),
62    ///         md5::hash(b"world"),
63    ///         md5::hash(b"abcde"),
64    ///         md5::hash(b"fghij"),
65    ///     ],
66    /// );
67    #[must_use]
68    pub fn hash(mut buf: &[u8]) -> [[u32; 4]; U32Vector::LANES] {
69        assert_eq!(buf.len() % U32Vector::LANES, 0);
70        let bytes = buf.len() / U32Vector::LANES;
71
72        let mut state = [
73            U32Vector::splat(0x6745_2301),
74            U32Vector::splat(0xefcd_ab89),
75            U32Vector::splat(0x98ba_dcfe),
76            U32Vector::splat(0x1032_5476),
77        ];
78
79        let mut end_marker_written = false;
80        let mut bit_count_written = false;
81        while !bit_count_written {
82            let mut words = [U32Vector::splat(0); 16];
83
84            let remaining = (buf.len() / FOUR_BYTES).min(16);
85            for (w, chunk) in words.iter_mut().zip(buf.chunks_exact(FOUR_BYTES)) {
86                *w = gather(chunk.try_into().unwrap());
87            }
88            buf = &buf[remaining * FOUR_BYTES..];
89
90            if remaining < 16 {
91                if !end_marker_written {
92                    // 0x80 end marker after final byte
93                    words[remaining] = gather_remaining(buf);
94                    buf = &[];
95                    end_marker_written = true;
96                }
97
98                if !bit_count_written && remaining <= 13 {
99                    let bits = bytes as u64 * 8;
100                    words[14] = U32Vector::splat((bits & 0xFFFF_FFFF) as u32);
101                    words[15] = U32Vector::splat((bits >> 32) as u32);
102                    bit_count_written = true;
103                }
104            }
105
106            state = md5_block(state, &words);
107        }
108
109        // `state.map(|x| x.into());` doesn't always get vectorised
110        let state: [[u32; U32Vector::LANES]; 4] = array::from_fn(|i| state[i].into());
111
112        array::from_fn(|i| {
113            [
114                state[0][i].swap_bytes(),
115                state[1][i].swap_bytes(),
116                state[2][i].swap_bytes(),
117                state[3][i].swap_bytes(),
118            ]
119        })
120    }
121
122    #[inline]
123    fn gather(buf: &[u8; FOUR_BYTES]) -> U32Vector {
124        let mut values = [0u32; U32Vector::LANES];
125        for (i, v) in values.iter_mut().enumerate() {
126            *v = u32::from_le_bytes([
127                buf[i], buf[ONE_BYTE + i], buf[TWO_BYTES + i], buf[THREE_BYTES + i]
128            ]);
129        }
130        values.into()
131    }
132
133    #[inline]
134    fn gather_remaining(buf: &[u8]) -> U32Vector {
135        match buf.len() {
136            THREE_BYTES => {
137                let mut values = [0u32; U32Vector::LANES];
138                for (i, v) in values.iter_mut().enumerate() {
139                    *v = u32::from_le_bytes([buf[i], buf[ONE_BYTE + i], buf[TWO_BYTES + i], 0x80]);
140                }
141                values.into()
142            }
143            TWO_BYTES => {
144                let mut values = [0u32; U32Vector::LANES];
145                for (i, v) in values.iter_mut().enumerate() {
146                    *v = u32::from_le_bytes([buf[i], buf[ONE_BYTE + i], 0x80, 0]);
147                }
148                values.into()
149            }
150            ONE_BYTE => {
151                let mut values = [0u32; U32Vector::LANES];
152                for (i, v) in values.iter_mut().enumerate() {
153                    *v = u32::from_le_bytes([buf[i], 0x80, 0, 0]);
154                }
155                values.into()
156            }
157            0 => U32Vector::splat(0x80),
158            _ => unreachable!("less than 4 bytes left"),
159        }
160    }
161
162    #[expect(clippy::many_single_char_names)]
163    fn md5_block([a0, b0, c0, d0]: [U32Vector; 4], m: &[U32Vector; 16]) -> [U32Vector; 4] {
164        let (mut a, mut b, mut c, mut d) = (a0, b0, c0, d0);
165
166        a = md5_round(md5_f(b, c, d), a, b, m[0], 7, 0xd76a_a478);
167        d = md5_round(md5_f(a, b, c), d, a, m[1], 12, 0xe8c7_b756);
168        c = md5_round(md5_f(d, a, b), c, d, m[2], 17, 0x2420_70db);
169        b = md5_round(md5_f(c, d, a), b, c, m[3], 22, 0xc1bd_ceee);
170        a = md5_round(md5_f(b, c, d), a, b, m[4], 7, 0xf57c_0faf);
171        d = md5_round(md5_f(a, b, c), d, a, m[5], 12, 0x4787_c62a);
172        c = md5_round(md5_f(d, a, b), c, d, m[6], 17, 0xa830_4613);
173        b = md5_round(md5_f(c, d, a), b, c, m[7], 22, 0xfd46_9501);
174        a = md5_round(md5_f(b, c, d), a, b, m[8], 7, 0x6980_98d8);
175        d = md5_round(md5_f(a, b, c), d, a, m[9], 12, 0x8b44_f7af);
176        c = md5_round(md5_f(d, a, b), c, d, m[10], 17, 0xffff_5bb1);
177        b = md5_round(md5_f(c, d, a), b, c, m[11], 22, 0x895c_d7be);
178        a = md5_round(md5_f(b, c, d), a, b, m[12], 7, 0x6b90_1122);
179        d = md5_round(md5_f(a, b, c), d, a, m[13], 12, 0xfd98_7193);
180        c = md5_round(md5_f(d, a, b), c, d, m[14], 17, 0xa679_438e);
181        b = md5_round(md5_f(c, d, a), b, c, m[15], 22, 0x49b4_0821);
182
183        a = md5_round(md5_g(b, c, d), a, b, m[1], 5, 0xf61e_2562);
184        d = md5_round(md5_g(a, b, c), d, a, m[6], 9, 0xc040_b340);
185        c = md5_round(md5_g(d, a, b), c, d, m[11], 14, 0x265e_5a51);
186        b = md5_round(md5_g(c, d, a), b, c, m[0], 20, 0xe9b6_c7aa);
187        a = md5_round(md5_g(b, c, d), a, b, m[5], 5, 0xd62f_105d);
188        d = md5_round(md5_g(a, b, c), d, a, m[10], 9, 0x0244_1453);
189        c = md5_round(md5_g(d, a, b), c, d, m[15], 14, 0xd8a1_e681);
190        b = md5_round(md5_g(c, d, a), b, c, m[4], 20, 0xe7d3_fbc8);
191        a = md5_round(md5_g(b, c, d), a, b, m[9], 5, 0x21e1_cde6);
192        d = md5_round(md5_g(a, b, c), d, a, m[14], 9, 0xc337_07d6);
193        c = md5_round(md5_g(d, a, b), c, d, m[3], 14, 0xf4d5_0d87);
194        b = md5_round(md5_g(c, d, a), b, c, m[8], 20, 0x455a_14ed);
195        a = md5_round(md5_g(b, c, d), a, b, m[13], 5, 0xa9e3_e905);
196        d = md5_round(md5_g(a, b, c), d, a, m[2], 9, 0xfcef_a3f8);
197        c = md5_round(md5_g(d, a, b), c, d, m[7], 14, 0x676f_02d9);
198        b = md5_round(md5_g(c, d, a), b, c, m[12], 20, 0x8d2a_4c8a);
199
200        a = md5_round(md5_h(b, c, d), a, b, m[5], 4, 0xfffa_3942);
201        d = md5_round(md5_h(a, b, c), d, a, m[8], 11, 0x8771_f681);
202        c = md5_round(md5_h(d, a, b), c, d, m[11], 16, 0x6d9d_6122);
203        b = md5_round(md5_h(c, d, a), b, c, m[14], 23, 0xfde5_380c);
204        a = md5_round(md5_h(b, c, d), a, b, m[1], 4, 0xa4be_ea44);
205        d = md5_round(md5_h(a, b, c), d, a, m[4], 11, 0x4bde_cfa9);
206        c = md5_round(md5_h(d, a, b), c, d, m[7], 16, 0xf6bb_4b60);
207        b = md5_round(md5_h(c, d, a), b, c, m[10], 23, 0xbebf_bc70);
208        a = md5_round(md5_h(b, c, d), a, b, m[13], 4, 0x289b_7ec6);
209        d = md5_round(md5_h(a, b, c), d, a, m[0], 11, 0xeaa1_27fa);
210        c = md5_round(md5_h(d, a, b), c, d, m[3], 16, 0xd4ef_3085);
211        b = md5_round(md5_h(c, d, a), b, c, m[6], 23, 0x0488_1d05);
212        a = md5_round(md5_h(b, c, d), a, b, m[9], 4, 0xd9d4_d039);
213        d = md5_round(md5_h(a, b, c), d, a, m[12], 11, 0xe6db_99e5);
214        c = md5_round(md5_h(d, a, b), c, d, m[15], 16, 0x1fa2_7cf8);
215        b = md5_round(md5_h(c, d, a), b, c, m[2], 23, 0xc4ac_5665);
216
217        a = md5_round(md5_i(b, c, d), a, b, m[0], 6, 0xf429_2244);
218        d = md5_round(md5_i(a, b, c), d, a, m[7], 10, 0x432a_ff97);
219        c = md5_round(md5_i(d, a, b), c, d, m[14], 15, 0xab94_23a7);
220        b = md5_round(md5_i(c, d, a), b, c, m[5], 21, 0xfc93_a039);
221        a = md5_round(md5_i(b, c, d), a, b, m[12], 6, 0x655b_59c3);
222        d = md5_round(md5_i(a, b, c), d, a, m[3], 10, 0x8f0c_cc92);
223        c = md5_round(md5_i(d, a, b), c, d, m[10], 15, 0xffef_f47d);
224        b = md5_round(md5_i(c, d, a), b, c, m[1], 21, 0x8584_5dd1);
225        a = md5_round(md5_i(b, c, d), a, b, m[8], 6, 0x6fa8_7e4f);
226        d = md5_round(md5_i(a, b, c), d, a, m[15], 10, 0xfe2c_e6e0);
227        c = md5_round(md5_i(d, a, b), c, d, m[6], 15, 0xa301_4314);
228        b = md5_round(md5_i(c, d, a), b, c, m[13], 21, 0x4e08_11a1);
229        a = md5_round(md5_i(b, c, d), a, b, m[4], 6, 0xf753_7e82);
230        d = md5_round(md5_i(a, b, c), d, a, m[11], 10, 0xbd3a_f235);
231        c = md5_round(md5_i(d, a, b), c, d, m[2], 15, 0x2ad7_d2bb);
232        b = md5_round(md5_i(c, d, a), b, c, m[9], 21, 0xeb86_d391);
233
234        [a0 + a, b0 + b, c0 + c, d0 + d]
235    }
236
237    #[inline]
238    #[expect(clippy::many_single_char_names)]
239    fn md5_round(f: U32Vector, a: U32Vector, b: U32Vector, m: U32Vector, s: u32, k: u32) -> U32Vector {
240        (f + a + m + U32Vector::splat(k)).rotate_left(s) + b
241    }
242
243    #[inline]
244    fn md5_f(b: U32Vector, c: U32Vector, d: U32Vector) -> U32Vector {
245        (b & c) | (d & !b)
246    }
247
248    #[inline]
249    fn md5_g(b: U32Vector, c: U32Vector, d: U32Vector) -> U32Vector {
250        (d & b) | (c & !d)
251    }
252
253    #[inline]
254    fn md5_h(b: U32Vector, c: U32Vector, d: U32Vector) -> U32Vector {
255        b ^ c ^ d
256    }
257
258    #[inline]
259    fn md5_i(b: U32Vector, c: U32Vector, d: U32Vector) -> U32Vector {
260        c ^ (b | !d)
261    }
262
263    pub(super) fn microbenchmark() {
264        let bench_string = BENCH_STRING.as_flattened();
265        for chunk in bench_string.chunks(32 * U32Vector::LANES) {
266            for len in 1..=32 {
267                std::hint::black_box(hash(&chunk[..len * U32Vector::LANES]));
268            }
269        }
270    }
271}
272
273const BENCH_STRING: [[u8; 32]; 128] = {
274    let mut out = [*b"abcdefghijklmnopqrstuvwxyz012345"; 128];
275    let mut i = 0;
276
277    #[expect(clippy::cast_possible_truncation)]
278    while i < out.len() {
279        out[i][i % 32] = i as u8;
280        i += 1;
281    }
282
283    out
284};
285
286/// Convert an MD5 hash to ASCII hex.
287///
288/// Implemented using bitwise operations on each [`u32`] to spread each nibble into separate bytes,
289/// followed by adding either '0' or 'a' to each byte using a bitmask.
290///
291/// When using the vectorized AVX2 MD5 implementation, this makes
292/// [2016 day 14](../../year2016/struct.Day14.html) roughly 4x faster compared to using a naive
293/// [`format!`] implementation `format!("{a:08x}{b:08x}{c:08x}{d:08x}")`.
294///
295/// # Examples
296///
297/// ```
298/// # use utils::md5::to_hex;
299/// assert_eq!(
300///     to_hex([0xd41d8cd9, 0x8f00b204, 0xe9800998, 0xecf8427e]),
301///     *b"d41d8cd98f00b204e9800998ecf8427e",
302/// );
303/// ```
304#[inline]
305#[must_use]
306pub fn to_hex([a, b, c, d]: [u32; 4]) -> [u8; 32] {
307    let mut result = [0u8; 32];
308    result[0..8].copy_from_slice(&u32_to_hex(a));
309    result[8..16].copy_from_slice(&u32_to_hex(b));
310    result[16..24].copy_from_slice(&u32_to_hex(c));
311    result[24..32].copy_from_slice(&u32_to_hex(d));
312    result
313}
314
315#[inline]
316fn u32_to_hex(n: u32) -> [u8; 8] {
317    const SPLAT: u64 = 0x0101_0101_0101_0101;
318
319    let mut n = u64::from(n);
320    // n = 0x0000_0000_1234_ABCD
321
322    n = ((n & 0x0000_0000_FFFF_0000) << 16) | (n & 0x0000_0000_0000_FFFF);
323    // n = 0x0000_1234_0000_ABCD
324
325    n = ((n & 0x0000_FF00_0000_FF00) << 8) | (n & 0x0000_00FF_0000_00FF);
326    // n = 0x0012_0034_00AB_00CD
327
328    n = ((n & 0x00F0_00F0_00F0_00F0) << 4) | (n & 0x000F_000F_000F_000F);
329    // n = 0x0102_0304_0A0B_0C0D
330
331    let letter_positions = (n + ((128 - 10) * SPLAT)) & (128 * SPLAT);
332    // letter_positions = 0x0000_0000_8080_8080
333
334    let letter_mask = letter_positions - (letter_positions >> 7);
335    // letter_mask = 0x0000_0000_7F7F_7F7F
336
337    let hex = (n + u64::from(b'0') * SPLAT) + (letter_mask & (u64::from(b'a' - b'0' - 10) * SPLAT));
338    // hex = 0x3132_3334_6162_6364
339
340    hex.to_be_bytes()
341}