utils/parser/
macros.rs

1/// Macro to define a parser which consumes a single byte and maps it using a lookup table.
2///
3/// This macro is a wrapper around [`parser::byte_lut`](crate::parser::byte_lut) to allow defining
4/// the lookup table using a match-like syntax. Each expression must be const and evaluate to a
5/// value of the same copy type.
6///
7/// # Examples
8/// ```
9/// # use utils::parser::{Parser, self};
10/// let parser = parser::byte_map!(
11///     b'#' => true,
12///     b'.' | b'S' => false,
13/// );
14/// assert_eq!(parser.parse(b"#.S##"), Ok((true, &b".S##"[..])));
15/// assert_eq!(parser.parse(b".S##"), Ok((false, &b"S##"[..])));
16/// assert_eq!(parser.parse(b"S##"), Ok((false, &b"##"[..])));
17///
18/// let (err, remaining) = parser.parse(b"abc").unwrap_err();
19/// assert_eq!(err.to_string(), "expected one of '#', '.', 'S'");
20/// assert_eq!(remaining, &b"abc"[..]);
21/// ```
22#[macro_export]
23macro_rules! parser_byte_map {
24    (
25        $($($l:literal)|+ => $e:expr),+$(,)?
26    ) => {{
27        // `let _: u8 = $l` ensures $l is used in the repetition and also ensures all the literals
28        // are byte literals
29        const COUNT: usize = 0usize $($(+ {let _: u8 = $l; 1usize})+)+;
30        const LEN: usize = 14 + 5 * COUNT;
31        const {
32            assert!(COUNT >= 2, "at least two literals must be provided");
33        }
34
35        // Once concat_bytes! is stabilized this error message can be created in the macro similar
36        // to parser_literal_map!
37        const ERROR: [u8; LEN] = {
38            let mut result = [0u8; LEN];
39            let (prefix, vals) = result.split_at_mut(16);
40            prefix.copy_from_slice(b"expected one of ");
41
42            let mut i = 0;
43            let literals = [$($($l),+),+];
44            while i < COUNT {
45                vals[i * 5] = b'\'';
46                vals[i * 5 + 1] = literals[i];
47                vals[i * 5 + 2] = b'\'';
48                if i + 1 < COUNT {
49                    vals[i * 5 + 3] = b',';
50                    vals[i * 5 + 4] = b' ';
51                }
52                i += 1;
53            }
54
55            result
56        };
57
58        $crate::parser::byte_lut(&const {
59            // Don't use a const item for the lut to avoid naming the value type
60            let mut lut = [None; 256];
61            $($(
62                assert!(lut[$l as usize].is_none(), "duplicate literal");
63                lut[$l as usize] = Some($e);
64            )+)+
65            lut
66        }, const {
67            match str::from_utf8(&ERROR) {
68                Ok(v) => v,
69                Err(_) => panic!("one or more of the provided literals is invalid unicode"),
70            }
71        })
72    }};
73}
74
75/// Macro to define a parser for one or more string literals, mapping the results.
76///
77/// This is a replacement for
78/// [`parser::one_of`](crate::parser::one_of())`(("a".map(|_| Enum::A), "b".map(|_| Enum::b)))`
79/// which produces more optimized assembly and is easier to read and write.
80///
81/// The string patterns are matched in the order provided, so strings should be ordered by length.
82///
83/// Using this makes [2017 day 11](../../year2017/struct.Day11.html), which parses a sequence of
84/// literals separated by commas, over 2x faster.
85///
86/// See also [`parser::parsable_enum!`](crate::parser::parsable_enum), which provides a macro to
87/// define an enum and literal parser together.
88///
89/// # Examples
90/// ```
91/// # use utils::parser::{Parser, self};
92/// #[derive(Debug, PartialEq)]
93/// enum Example {
94///     A,
95///     B,
96///     C,
97/// }
98///
99/// let parser = parser::literal_map!(
100///     "A" | "a" => Example::A,
101///     "B" => Example::B,
102///     "C" => Example::C,
103/// );
104/// assert_eq!(parser.parse(b"A"), Ok((Example::A, &b""[..])));
105/// assert_eq!(parser.parse(b"a"), Ok((Example::A, &b""[..])));
106/// assert_eq!(parser.parse(b"B"), Ok((Example::B, &b""[..])));
107/// assert_eq!(parser.parse(b"C"), Ok((Example::C, &b""[..])));
108/// assert!(parser.parse(b"D").is_err());
109/// ```
110#[macro_export]
111macro_rules! parser_literal_map {
112    (
113        $($($l:literal)|+ => $e:expr),+$(,)?
114    ) => {{
115        const fn coerce_to_parser<F: Fn(&[u8]) -> $crate::parser::ParseResult<'_, O>, O>(f: F) -> F { f }
116
117        coerce_to_parser(|input| {
118            $($(
119                if input.len() >= const { $l.len() } && const { $l.as_bytes() } == &input[..const { $l.len() }] {
120                    return Ok((($e), &input[const { $l.len() }..]));
121                }
122            )+)*
123
124            Err(($crate::parser_literal_map!(@error $($($l)+)+), input))
125        })
126    }};
127    (@error $first:literal $($l:literal)+) => {
128        $crate::parser::ParseError::Custom(concat!("expected one of '", $first, "'", $(", '", $l, "'",)+))
129    };
130    (@error $first:literal) => {
131        $crate::parser::ParseError::ExpectedLiteral($first)
132    };
133}
134
135/// Macro to define an enum that implements [`Parseable`](crate::parser::Parseable).
136///
137/// The parser is implemented using [`parser::literal_map!`](crate::parser::literal_map).
138///
139/// # Examples
140/// ```
141/// # use utils::parser::{Parser, Parseable, self};
142/// parser::parsable_enum! {
143///     #[derive(Debug, PartialEq, Default)]
144///     enum Direction {
145///         #[default]
146///         "north" | "n" => North,
147///         "south" | "s" => South,
148///         "east" | "e" => East,
149///         "west" | "w" => West,
150///     }
151/// }
152///
153/// assert_eq!(Direction::PARSER.parse(b"north"), Ok((Direction::North, &b""[..])));
154/// assert_eq!(Direction::PARSER.parse(b"s"), Ok((Direction::South, &b""[..])));
155/// assert!(Direction::PARSER.parse(b"a").is_err());
156/// ```
157#[macro_export]
158macro_rules! parser_parsable_enum {
159    (
160        $(#[$enum_meta:meta])*
161        enum $name:ident {$(
162            $(#[$meta:meta])*
163            $($l:literal)|+ => $variant:ident $(= $value:expr)?,
164        )+}
165    ) => {
166        $(#[$enum_meta])*
167        pub enum $name {$(
168            $(#[$meta])*
169            $variant $(= $value)?,
170        )+}
171
172        impl $name {
173            const ALL: &'static [$name] = &[$(
174                Self::$variant,
175            )+];
176        }
177
178        impl $crate::parser::Parseable for $name {
179            type Parser = for<'a> fn(&'a [u8]) -> $crate::parser::ParseResult<'a, Self>;
180            const PARSER: Self::Parser = $crate::parser_literal_map!($(
181                $($l)|+ => Self::$variant,
182            )+);
183        }
184    };
185}
186
187/// Macro to define a custom parser using a `match` inspired parse tree syntax.
188///
189/// Each rule is made up of a list of chained parsers enclosed in brackets on the left-hand side.
190/// Parsers can be prefixed with an identifier followed by `@` to store the result of that parser in
191/// the supplied variable, similar to normal match patterns.
192///
193/// After the list of parsers, there is an arrow determining the functionality of the rule when the
194/// parsers match:
195/// - **Expression (`=>`)**: The expression on the right-hand is evaluated and returned.
196/// - **Fallible (`=?>`)**: Similar to Expression, but the right-hand side evaluates a result. If
197///   the expression evaluates to [`Ok`], the value contained inside is returned. Otherwise, the
198///   string contained inside the [`Err`] is handled as a custom [`ParseError`](super::ParseError),
199///   and parsing will continue with the following rule.
200/// - **Subtree (`=>>`)**: The right-hand side is a nested set of rules enclosed in braces.
201///
202/// If none of the rules match successfully, the error from the rule which parsed furthest into
203/// the input is returned.
204///
205/// # Examples
206/// ```
207/// # use utils::parser::{self, Parser};
208/// #
209/// #[derive(Debug, PartialEq)]
210/// enum Register {
211///     A, B, C
212/// }
213///
214/// #[derive(Debug, PartialEq)]
215/// enum Instruction {
216///     Add(Register, Register),
217///     AddConstant(Register, i32),
218///     Copy(Register, Register),
219///     Noop,
220/// }
221///
222/// let register = parser::literal_map!(
223///     "A" => Register::A, "B" => Register::B, "C" => Register::C,
224/// );
225///
226/// let instruction = parser::parse_tree!(
227///     ("add ", r @ register, ", ") =>> {
228///         (r2 @ register) => Instruction::Add(r, r2),
229///         (v @ parser::i32()) => Instruction::AddConstant(r, v),
230///     },
231///     ("copy ", r @ register, ", ", r2 @ register) =?> {
232///         if r == r2 {
233///             Err("cannot copy register to itself")
234///         } else {
235///             Ok(Instruction::Copy(r, r2))
236///         }
237///     },
238///     ("noop") => Instruction::Noop,
239/// );
240///
241/// assert_eq!(
242///     instruction.parse_complete("add A, B").unwrap(),
243///     Instruction::Add(Register::A, Register::B)
244/// );
245/// assert_eq!(
246///     instruction.parse_complete("add C, 100").unwrap(),
247///     Instruction::AddConstant(Register::C, 100)
248/// );
249/// assert_eq!(
250///     instruction.parse_complete("copy A, B").unwrap(),
251///     Instruction::Copy(Register::A, Register::B)
252/// );
253/// assert!(instruction
254///     .parse_complete("copy A, A")
255///     .is_err_and(|err| err.to_string().contains("cannot copy register to itself")));
256/// ```
257#[macro_export]
258macro_rules! parser_parse_tree {
259    (@rule $input:ident $furthest_err:ident $furthest_remaining:ident [$(,)?] @expr $rhs:expr) => {
260        return Ok(($rhs, $input));
261    };
262    (@rule $input:ident $furthest_err:ident $furthest_remaining:ident [$(,)?] @expr_res $rhs:expr) => {
263        match $rhs {
264            Ok(v) => return Ok((v, $input)),
265            Err(e) => {
266                if $input.len() < $furthest_remaining {
267                    $furthest_err = $crate::parser::ParseError::Custom(e);
268                    $furthest_remaining = $input.len();
269                }
270            }
271        };
272    };
273    (@rule $input:ident $furthest_err:ident $furthest_remaining:ident [$(,)?] @subtree $($rhs:tt)+) => {
274        $crate::parser_parse_tree!(@toplevel $input $furthest_err $furthest_remaining $($rhs)+);
275    };
276
277    (@rule $input:ident $furthest_err:ident $furthest_remaining:ident
278        [$n:ident @ $lhs:expr $(,$($tail:tt)*)?] $($rhs:tt)+
279    ) => {
280        match $crate::parser::Parser::parse(&($lhs), $input) {
281            Ok(($n, $input)) => {
282                $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining
283                    [$($($tail)*)?] $($rhs)+
284                );
285            },
286            Err((err, remaining)) => {
287                if remaining.len() < $furthest_remaining {
288                    $furthest_err = err;
289                    $furthest_remaining = remaining.len();
290                }
291            }
292        };
293    };
294    (@rule $input:ident $furthest_err:ident $furthest_remaining:ident
295        [$lhs:expr $(,$($tail:tt)*)?] $($rhs:tt)+
296    ) => {
297        match $crate::parser::Parser::parse(&($lhs), $input) {
298            Ok((_, $input)) => {
299                $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining
300                    [$($($tail)*)?] $($rhs)+
301                );
302            },
303            Err((err, remaining)) => {
304                if remaining.len() < $furthest_remaining {
305                    $furthest_err = err;
306                    $furthest_remaining = remaining.len();
307                }
308            }
309        };
310    };
311
312    (@toplevel $input:ident $furthest_err:ident $furthest_remaining:ident
313        ($($lhs:tt)+) => $rhs:expr $(, $($tail:tt)*)?
314    ) => {
315        $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining [$($lhs)+] @expr $rhs);
316        $($crate::parser_parse_tree!(@toplevel $input $furthest_err $furthest_remaining $($tail)*);)?
317    };
318    (@toplevel $input:ident $furthest_err:ident $furthest_remaining:ident
319        ($($lhs:tt)+) =?> $rhs:expr $(, $($tail:tt)*)?
320    ) => {
321        $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining [$($lhs)+] @expr_res $rhs);
322        $($crate::parser_parse_tree!(@toplevel $input $furthest_err $furthest_remaining $($tail)*);)?
323    };
324    (@toplevel $input:ident $furthest_err:ident $furthest_remaining:ident
325        ($($lhs:tt)+) =>> {$($rhs:tt)+} $(, $($tail:tt)*)?
326    ) => {
327        $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining [$($lhs)+] @subtree $($rhs)+);
328        $($crate::parser_parse_tree!(@toplevel $input $furthest_err $furthest_remaining $($tail)*);)?
329    };
330    (@toplevel $input:ident $furthest_err:ident $furthest_remaining:ident $(,)?) => {};
331
332    // Ensures this branch only matches inputs starting with (, giving each rule set a unique prefix
333    (($($first:tt)+) $($tail:tt)+) => {{
334        const fn coerce_to_parser<F: Fn(&[u8]) -> $crate::parser::ParseResult<'_, O>, O>(f: F) -> F { f }
335
336        coerce_to_parser(|input| {
337            let mut furthest_err = $crate::parser::ParseError::Custom("unreachable");
338            let mut furthest_remaining = usize::MAX;
339
340            $crate::parser_parse_tree!(@toplevel input furthest_err furthest_remaining ($($first)+) $($tail)+);
341
342            Err((furthest_err, &input[input.len() - furthest_remaining..]))
343        })
344    }};
345}