utils/parser/
macros.rs

1/// Macro to define a parser which consumes a single byte and maps it using a lookup table.
2///
3/// This macro is a wrapper around [`parser::byte_lut`](crate::parser::byte_lut) to allow defining
4/// the lookup table using a match-like syntax. Each expression must be const and evaluate to a
5/// value of the same copy type.
6///
7/// # Examples
8/// ```
9/// # use utils::parser::{Parser, self};
10/// let parser = parser::byte_map!(
11///     b'#' => true,
12///     b'.' | b'S' => false,
13/// );
14/// assert_eq!(parser.parse(b"#.S##"), Ok((true, &b".S##"[..])));
15/// assert_eq!(parser.parse(b".S##"), Ok((false, &b"S##"[..])));
16/// assert_eq!(parser.parse(b"S##"), Ok((false, &b"##"[..])));
17///
18/// let (err, remaining) = parser.parse(b"abc").unwrap_err();
19/// assert_eq!(err.to_string(), "expected one of '#', '.', 'S'");
20/// assert_eq!(remaining, &b"abc"[..]);
21/// ```
22#[macro_export]
23macro_rules! parser_byte_map {
24    (
25        $($($l:literal)|+ => $e:expr),+$(,)?
26    ) => {{
27        $crate::parser::byte_lut(&const {
28            // Don't use a const item for the lut to avoid naming the value type
29            let mut lut = [None; 256];
30            $($(
31                assert!(lut[$l as usize].is_none(), "duplicate literal");
32                lut[$l as usize] = Some($e);
33            )+)+
34            lut
35        }, {
36            let mut set = 0u128;
37            $($(
38                let v: u8 = $l;
39                assert!(v < 128, "invalid ASCII");
40                set |= 1u128 << v;
41            )+)+
42            $crate::parser::ParseError::ExpectedOneOf($crate::ascii::AsciiSet::new(set))
43        })
44    }};
45}
46
47/// Macro to define a parser for one or more string literals, mapping the results.
48///
49/// This is a replacement for
50/// [`parser::one_of`](crate::parser::one_of())`(("a".map(|_| Enum::A), "b".map(|_| Enum::b)))`
51/// which produces more optimized assembly and is easier to read and write.
52///
53/// The string patterns are matched in the order provided, so strings should be ordered by length.
54///
55/// Using this makes [2017 day 11](../../year2017/struct.Day11.html), which parses a sequence of
56/// literals separated by commas, over 2x faster.
57///
58/// See also [`parser::parsable_enum!`](crate::parser::parsable_enum), which provides a macro to
59/// define an enum and literal parser together.
60///
61/// # Examples
62/// ```
63/// # use utils::parser::{Parser, self};
64/// #[derive(Debug, PartialEq)]
65/// enum Example {
66///     A,
67///     B,
68///     C,
69/// }
70///
71/// let parser = parser::literal_map!(
72///     "A" | "a" => Example::A,
73///     "B" => Example::B,
74///     "C" => Example::C,
75/// );
76/// assert_eq!(parser.parse(b"A"), Ok((Example::A, &b""[..])));
77/// assert_eq!(parser.parse(b"a"), Ok((Example::A, &b""[..])));
78/// assert_eq!(parser.parse(b"B"), Ok((Example::B, &b""[..])));
79/// assert_eq!(parser.parse(b"C"), Ok((Example::C, &b""[..])));
80/// assert!(parser.parse(b"D").is_err());
81/// ```
82#[macro_export]
83macro_rules! parser_literal_map {
84    (
85        $($($l:literal)|+ => $e:expr),+$(,)?
86    ) => {{
87        const fn coerce_to_parser<F: Fn(&[u8]) -> $crate::parser::ParseResult<'_, O>, O>(f: F) -> F { f }
88
89        coerce_to_parser(|input| {
90            $($(
91                if input.len() >= const { $l.len() } && const { $l.as_bytes() } == &input[..const { $l.len() }] {
92                    return Ok((($e), &input[const { $l.len() }..]));
93                }
94            )+)*
95
96            Err(($crate::parser_literal_map!(@error $($($l)+)+), input))
97        })
98    }};
99    (@error $first:literal $($l:literal)+) => {
100        $crate::parser::ParseError::Custom(concat!("expected one of '", $first, "'", $(", '", $l, "'",)+))
101    };
102    (@error $first:literal) => {
103        $crate::parser::ParseError::ExpectedLiteral($first)
104    };
105}
106
107/// Macro to define an enumerable enum that implements [`Parseable`](crate::parser::Parseable).
108///
109/// The parser is implemented using [`parser::literal_map!`](crate::parser::literal_map) and
110/// [`enumerable_enum!`](crate::enumerable_enum!).
111///
112/// # Examples
113/// ```
114/// # use utils::parser::{Parser, Parseable, self};
115/// parser::parsable_enum! {
116///     #[derive(Debug, PartialEq, Default)]
117///     enum Direction {
118///         #[default]
119///         "north" | "n" => North,
120///         "south" | "s" => South,
121///         "east" | "e" => East,
122///         "west" | "w" => West,
123///     }
124/// }
125///
126/// assert_eq!(Direction::PARSER.parse(b"north"), Ok((Direction::North, &b""[..])));
127/// assert_eq!(Direction::PARSER.parse(b"s"), Ok((Direction::South, &b""[..])));
128/// assert!(Direction::PARSER.parse(b"a").is_err());
129///
130/// assert_eq!(Direction::COUNT, 4);
131/// ```
132///
133/// With discriminant helpers (requires an explicit `#[repr(...)]` attribute first):
134/// ```
135/// # use utils::parser::{Parser, Parseable, self};
136/// parser::parsable_enum! {
137///     #[repr(u8)]
138///     #[derive(Debug, PartialEq)]
139///     enum Operation {
140///         "add" => Add,
141///         "mul" => Mul,
142///         "div" => Div,
143///         "mod" => Mod,
144///         "eql" => Eql,
145///     }
146/// }
147///
148/// assert_eq!(Operation::PARSER.parse(b"add5"), Ok((Operation::Add, &b"5"[..])));
149/// assert_eq!(Operation::PARSER.parse(b"eql"), Ok((Operation::Eql, &b""[..])));
150///
151/// assert_eq!(Operation::COUNT, 5);
152/// assert_eq!(Operation::checked_from_discriminant(2), Some(Operation::Div));
153/// ```
154#[macro_export]
155macro_rules! parser_parsable_enum {
156    (
157        $(#[$($enum_meta:tt)+])*
158        enum $name:ident {$(
159            $(#[$meta:meta])*
160            $($l:literal)|+ => $variant:ident $(= $value:expr)?,
161        )+}
162    ) => {
163        // Use tt for enum_meta to avoid the attributes being captured as opaque fragments, which
164        // is required for the correct enumerable_enum arm to be selected for repr enums.
165        $crate::enumerable_enum! {
166            $(#[$($enum_meta)+])*
167            enum $name {$(
168                $(#[$meta])*
169                $variant $(= $value)?,
170            )+}
171        }
172
173        impl $crate::parser::Parseable for $name {
174            type Parser = for<'a> fn(&'a [u8]) -> $crate::parser::ParseResult<'a, Self>;
175            const PARSER: Self::Parser = $crate::parser_literal_map!($(
176                $($l)|+ => Self::$variant,
177            )+);
178        }
179    };
180}
181
182/// Macro to define a custom parser using a `match` inspired parse tree syntax.
183///
184/// Each rule is made up of a list of chained parsers enclosed in brackets on the left-hand side.
185/// Parsers can be prefixed with an identifier followed by `@` to store the result of that parser in
186/// the supplied variable, similar to normal match patterns.
187///
188/// After the list of parsers, there is an arrow determining the functionality of the rule when the
189/// parsers match:
190/// - **Expression (`=>`)**: The expression on the right-hand is evaluated and returned.
191/// - **Fallible (`=?>`)**: Similar to Expression, but the right-hand side evaluates a result. If
192///   the expression evaluates to [`Ok`], the value contained inside is returned. Otherwise, the
193///   string contained inside the [`Err`] is handled as a custom [`ParseError`](super::ParseError),
194///   and parsing will continue with the following rule.
195/// - **Subtree (`=>>`)**: The right-hand side is a nested set of rules enclosed in braces.
196///
197/// If none of the rules match successfully, the error from the rule which parsed furthest into
198/// the input is returned.
199///
200/// # Examples
201/// ```
202/// # use utils::parser::{self, Parser};
203/// #
204/// #[derive(Debug, PartialEq)]
205/// enum Register {
206///     A, B, C
207/// }
208///
209/// #[derive(Debug, PartialEq)]
210/// enum Instruction {
211///     Add(Register, Register),
212///     AddConstant(Register, i32),
213///     Copy(Register, Register),
214///     Noop,
215/// }
216///
217/// let register = parser::literal_map!(
218///     "A" => Register::A, "B" => Register::B, "C" => Register::C,
219/// );
220///
221/// let instruction = parser::parse_tree!(
222///     ("add ", r @ register, ", ") =>> {
223///         (r2 @ register) => Instruction::Add(r, r2),
224///         (v @ parser::i32()) => Instruction::AddConstant(r, v),
225///     },
226///     ("copy ", r @ register, ", ", r2 @ register) =?> {
227///         if r == r2 {
228///             Err("cannot copy register to itself")
229///         } else {
230///             Ok(Instruction::Copy(r, r2))
231///         }
232///     },
233///     ("noop") => Instruction::Noop,
234/// );
235///
236/// assert_eq!(
237///     instruction.parse_complete("add A, B").unwrap(),
238///     Instruction::Add(Register::A, Register::B)
239/// );
240/// assert_eq!(
241///     instruction.parse_complete("add C, 100").unwrap(),
242///     Instruction::AddConstant(Register::C, 100)
243/// );
244/// assert_eq!(
245///     instruction.parse_complete("copy A, B").unwrap(),
246///     Instruction::Copy(Register::A, Register::B)
247/// );
248/// assert!(instruction
249///     .parse_complete("copy A, A")
250///     .is_err_and(|err| err.to_string().contains("cannot copy register to itself")));
251/// ```
252#[macro_export]
253macro_rules! parser_parse_tree {
254    (@rule $input:ident $furthest_err:ident $furthest_remaining:ident [$(,)?] @expr $rhs:expr) => {
255        return Ok(($rhs, $input));
256    };
257    (@rule $input:ident $furthest_err:ident $furthest_remaining:ident [$(,)?] @expr_res $rhs:expr) => {
258        match $rhs {
259            Ok(v) => return Ok((v, $input)),
260            Err(e) => {
261                if $input.len() < $furthest_remaining {
262                    $furthest_err = $crate::parser::ParseError::Custom(e);
263                    $furthest_remaining = $input.len();
264                }
265            }
266        };
267    };
268    (@rule $input:ident $furthest_err:ident $furthest_remaining:ident [$(,)?] @subtree $($rhs:tt)+) => {
269        $crate::parser_parse_tree!(@toplevel $input $furthest_err $furthest_remaining $($rhs)+);
270    };
271
272    (@rule $input:ident $furthest_err:ident $furthest_remaining:ident
273        [$n:ident @ $lhs:expr $(,$($tail:tt)*)?] $($rhs:tt)+
274    ) => {
275        match $crate::parser::Parser::parse(&($lhs), $input) {
276            Ok(($n, $input)) => {
277                $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining
278                    [$($($tail)*)?] $($rhs)+
279                );
280            },
281            Err((err, remaining)) => {
282                if remaining.len() < $furthest_remaining {
283                    $furthest_err = err;
284                    $furthest_remaining = remaining.len();
285                }
286            }
287        };
288    };
289    (@rule $input:ident $furthest_err:ident $furthest_remaining:ident
290        [$lhs:expr $(,$($tail:tt)*)?] $($rhs:tt)+
291    ) => {
292        match $crate::parser::Parser::parse(&($lhs), $input) {
293            Ok((_, $input)) => {
294                $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining
295                    [$($($tail)*)?] $($rhs)+
296                );
297            },
298            Err((err, remaining)) => {
299                if remaining.len() < $furthest_remaining {
300                    $furthest_err = err;
301                    $furthest_remaining = remaining.len();
302                }
303            }
304        };
305    };
306
307    (@toplevel $input:ident $furthest_err:ident $furthest_remaining:ident
308        ($($lhs:tt)+) => $rhs:expr $(, $($tail:tt)*)?
309    ) => {
310        $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining [$($lhs)+] @expr $rhs);
311        $($crate::parser_parse_tree!(@toplevel $input $furthest_err $furthest_remaining $($tail)*);)?
312    };
313    (@toplevel $input:ident $furthest_err:ident $furthest_remaining:ident
314        ($($lhs:tt)+) =?> $rhs:expr $(, $($tail:tt)*)?
315    ) => {
316        $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining [$($lhs)+] @expr_res $rhs);
317        $($crate::parser_parse_tree!(@toplevel $input $furthest_err $furthest_remaining $($tail)*);)?
318    };
319    (@toplevel $input:ident $furthest_err:ident $furthest_remaining:ident
320        ($($lhs:tt)+) =>> {$($rhs:tt)+} $(, $($tail:tt)*)?
321    ) => {
322        $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining [$($lhs)+] @subtree $($rhs)+);
323        $($crate::parser_parse_tree!(@toplevel $input $furthest_err $furthest_remaining $($tail)*);)?
324    };
325    (@toplevel $input:ident $furthest_err:ident $furthest_remaining:ident $(,)?) => {};
326
327    // Ensures this branch only matches inputs starting with (, giving each rule set a unique prefix
328    (($($first:tt)+) $($tail:tt)+) => {{
329        const fn coerce_to_parser<F: Fn(&[u8]) -> $crate::parser::ParseResult<'_, O>, O>(f: F) -> F { f }
330
331        coerce_to_parser(|input| {
332            let mut furthest_err = $crate::parser::ParseError::Custom("unreachable");
333            let mut furthest_remaining = usize::MAX;
334
335            $crate::parser_parse_tree!(@toplevel input furthest_err furthest_remaining ($($first)+) $($tail)+);
336
337            Err((furthest_err, &input[input.len() - furthest_remaining..]))
338        })
339    }};
340}