year2018/
day14.rs

1use utils::prelude::*;
2
3/// Searching for a pattern in an infinite recipe list.
4///
5/// This implementation is based on
6/// [/u/askalski's post "Breaking the 1 billion recipes per second barrier"](https://www.reddit.com/r/adventofcode/comments/a6wpwa/2018_day_14_breaking_the_1_billion_recipes_per/)
7/// and accompanying [gist](https://gist.github.com/Voltara/5069980afbf6cf0762fcbb09948e5649),
8/// adapted to safe Rust without intrinsics or explicit SIMD while achieving similar performance.
9///
10/// The key optimization is that all the elves converge on the same subset of recipes after the
11/// first 23 recipes, allowing them to be processed in bulk using SIMD.
12#[derive(Clone, Debug)]
13pub struct Day14 {
14    part1: [u8; 10],
15    part2: usize,
16}
17
18impl Day14 {
19    pub fn new(input: &str, _: InputType) -> Result<Self, InputError> {
20        let pattern = parser::digit()
21            .repeat_n(parser::noop())
22            .parse_complete(input)?;
23        let index = input.parse().unwrap();
24
25        let mut searcher = Searcher::new(pattern);
26        let part2 = searcher.run();
27
28        // Use a string for part 1 to preserve leading zeros
29        let mut part1 = [0u8; 10];
30        part1.copy_from_slice(&searcher.recipes[index..index + 10]);
31        part1.iter_mut().for_each(|x| *x += b'0');
32
33        Ok(Self { part1, part2 })
34    }
35
36    #[must_use]
37    pub fn part1(&self) -> &str {
38        str::from_utf8(&self.part1).unwrap()
39    }
40
41    #[must_use]
42    pub fn part2(&self) -> usize {
43        self.part2
44    }
45}
46
47struct Searcher {
48    pattern: [u8; 6],
49    recipes: Vec<u8>,
50    recipes_len: usize,
51    packed: Vec<u8>,
52    packed_len: usize,
53    packed_next_idx: usize,
54    packed_last_idx: usize,
55    elves: [usize; 2],
56    initial: [i32; 2],
57    searched: usize,
58}
59
60// Pre-calculated sequences of recipes processed before index 23 where they all converge
61const INITIAL_SEQUENCES: [u32; 9] = [
62    0xfff94113, 0xffff0657, 0xfff94111, 0xfff94110, 0xffff9411, 0xffff9410, 0xfffff941, 0xffff1812,
63    0xffffff94,
64];
65
66// The first 23 recipes
67const RECIPES: [u8; 23] = [
68    3, 7, 1, 0, 1, 0, 1, 2, 4, 5, 1, 5, 8, 9, 1, 6, 7, 7, 9, 2, 5, 1, 0,
69];
70
71const RECIPES_LEN: usize = 22_000_000;
72const PACKED_LEN: usize = RECIPES_LEN / 4;
73const SEARCH_INTERVAL: usize = 16384;
74
75impl Searcher {
76    fn new(pattern: [u8; 6]) -> Self {
77        // Prefill the recipes array with ones so 2-digit recipes can be written with a single
78        // write by skipping over the leading 1.
79        let mut recipes = vec![1u8; RECIPES_LEN];
80        recipes[..RECIPES.len()].copy_from_slice(&RECIPES);
81
82        Self {
83            pattern,
84            recipes,
85            recipes_len: RECIPES.len(),
86            packed: vec![0u8; PACKED_LEN],
87            packed_len: 0,
88            packed_next_idx: RECIPES.len(),
89            packed_last_idx: RECIPES.len(),
90            elves: [0, 0],
91            initial: [INITIAL_SEQUENCES[0] as i32, INITIAL_SEQUENCES[8] as i32],
92            searched: 0,
93        }
94    }
95
96    #[inline]
97    fn append_recipe(&mut self, mut recipe: u8) {
98        // Tens digit
99        if recipe >= 10 {
100            recipe -= 10;
101            if self.recipes_len == self.packed_next_idx {
102                self.packed[self.packed_len] = 1;
103                self.packed_len += 1;
104                self.packed_last_idx = self.packed_next_idx;
105                self.packed_next_idx += 2;
106            }
107            // No write to self.recipes as the array is prefilled with 1
108            self.recipes_len += 1;
109        }
110
111        // Ones digit
112        if self.recipes_len == self.packed_next_idx {
113            self.packed[self.packed_len] = recipe;
114            self.packed_len += 1;
115            self.packed_last_idx = self.packed_next_idx;
116            self.packed_next_idx += recipe as usize + 1;
117        }
118        self.recipes[self.recipes_len] = recipe;
119        self.recipes_len += 1;
120
121        // Handle wrapping around to the start
122        for i in 0..2 {
123            if self.elves[i] == self.packed_len {
124                let initial_index = self.packed_last_idx
125                    + (self.packed[self.packed_len - 1] as usize + 1)
126                    - self.recipes_len;
127                self.initial[i] = INITIAL_SEQUENCES[initial_index] as i32;
128                self.elves[i] = 0;
129            }
130        }
131    }
132
133    #[inline]
134    fn run(&mut self) -> usize {
135        loop {
136            // At least one elf is in the first 23 recipes, before the pattern coverages
137            while self.initial[0] != -1 || self.initial[1] != -1 {
138                let mut recipe = 0;
139                for i in 0..2 {
140                    if self.initial[i] != -1 {
141                        recipe += (self.initial[i] & 0xF) as u8;
142                        self.initial[i] >>= 4;
143                    } else {
144                        recipe += self.packed[self.elves[i]];
145                        self.elves[i] += 1;
146                    }
147                }
148                self.append_recipe(recipe);
149            }
150
151            // Both elves are after recipe 23, so recipes can be processed in bulk
152            loop {
153                let mut iterations = (self.packed_len - self.elves[0].max(self.elves[1])) / 32;
154                if iterations == 0 {
155                    break;
156                }
157
158                while iterations > 0 {
159                    if iterations > 16 {
160                        self.bulk_mix::<512>();
161                        iterations -= 16;
162                    } else {
163                        self.bulk_mix::<32>();
164                        iterations -= 1;
165                    }
166
167                    // Periodically search for the pattern
168                    if self.searched + SEARCH_INTERVAL < self.recipes_len
169                        && let Some(index) = self.search()
170                    {
171                        return index;
172                    }
173                }
174
175                // Pack the new recipes
176                while self.packed_next_idx < self.recipes_len {
177                    self.packed_last_idx = self.packed_next_idx;
178                    self.packed[self.packed_len] = self.recipes[self.packed_next_idx];
179                    self.packed_len += 1;
180                    self.packed_next_idx += self.recipes[self.packed_next_idx] as usize + 1;
181                }
182            }
183
184            // Handle the remaining recipes before wrapping around to the start
185            while self.initial[0] == -1 && self.initial[1] == -1 {
186                let sum = self.packed[self.elves[0]] + self.packed[self.elves[1]];
187                self.elves[0] += 1;
188                self.elves[1] += 1;
189                self.append_recipe(sum);
190            }
191        }
192    }
193
194    #[inline]
195    fn bulk_mix<const N: usize>(&mut self) {
196        const { assert!(N.is_multiple_of(32)) }
197
198        let v0 = &self.packed[self.elves[0]..self.elves[0] + N];
199        let v1 = &self.packed[self.elves[1]..self.elves[1] + N];
200        self.elves[0] += N;
201        self.elves[1] += N;
202
203        // Calculate the ones digit and carry for each recipe sum.
204        // This loop should be vectorized by the compiler.
205        let mut digits = [0u8; N];
206        let mut carry = [0u8; N];
207        for i in 0..N {
208            let s = v0[i] + v1[i];
209            let c = u8::from(s >= 10);
210            carry[i] = c;
211            digits[i] = s - (c * 10);
212        }
213
214        // Process the carry and digits sequentially in chunks of 8. This allows calculating the
215        // indexes for all 8 items in a few u64 operations, avoiding dependencies between each
216        // recipe.
217        for (digits, carry) in digits.chunks_exact(8).zip(carry.chunks_exact(8)) {
218            let slice = &mut self.recipes[self.recipes_len..self.recipes_len + 256];
219
220            let mut indexes = u64::from_le_bytes(carry.try_into().unwrap());
221            // Each recipe after the first is offset by 1
222            indexes += 0x0101_0101_0101_0100;
223            // Bytewise prefix sum
224            indexes += indexes << 8;
225            indexes += indexes << 16;
226            indexes += indexes << 32;
227
228            for (i, &d) in digits.iter().enumerate() {
229                // Use a u8 index into a 256 long slice to remove bounds checks here, reducing
230                // the number of bounds checks to 1 per 8 recipes.
231                let idx = (indexes >> (i * 8)) as u8;
232                slice[idx as usize] = d;
233            }
234
235            self.recipes_len += (indexes >> 56) as usize + 1;
236        }
237    }
238
239    #[inline]
240    fn search(&mut self) -> Option<usize> {
241        // Search for the pattern in sliding 64-byte windows
242        while self.searched + 64 + 5 < self.recipes_len {
243            // Find matches of the first two bytes across the window.
244            // This loop should be vectorized by the compiler.
245            let mut candidates = [0u8; 64];
246            let mut any_matches = false;
247            for (i, (&a, &b)) in self.recipes[self.searched..self.searched + 64]
248                .iter()
249                .zip(&self.recipes[self.searched + 1..self.searched + 65])
250                .enumerate()
251            {
252                let matches = (a == self.pattern[0]) & (b == self.pattern[1]);
253                // wrapping_neg produces 0xFF for matches and 0x00 for non-matches, which matches
254                // x86 SIMD comparison function semantics, slightly improving codegen.
255                candidates[i] = u8::from(matches).wrapping_neg();
256                any_matches |= matches;
257            }
258
259            // If no matches were found, skip to the next window.
260            // Do this before calculating the mask which can be expensive.
261            if !any_matches {
262                self.searched += 64;
263                continue;
264            }
265
266            let mut mask = candidates
267                .iter()
268                .enumerate()
269                .fold(0u64, |acc, (i, &x)| acc | u64::from(x != 0) << i);
270
271            // For each bit in the mask, check if the full pattern matches.
272            while mask != 0 {
273                let m = mask.trailing_zeros() as usize;
274                mask &= !(1 << m);
275
276                let index = self.searched + m;
277                if self.recipes[index..index + 6] == self.pattern {
278                    return Some(index);
279                }
280            }
281
282            self.searched += 64;
283        }
284
285        None
286    }
287}
288
289examples!(Day14 -> (&'static str, usize) [
290    // Custom examples
291    {input: "100100", part1: "8239938101", part2: 377203},
292    {input: "123456", part1: "1371276618", part2: 450697},
293    {input: "924933", part1: "0012267210", part2: 16462928},
294    {input: "054274", part1: "6112136872", part2: 21000203},
295]);