utils/parser/
macros.rs

1/// Helper to create a [`parser::byte_lut`](super::byte_lut) parser using `match`-like syntax.
2///
3/// Each expression must be const and must evaluate to a value of the same copy type.
4///
5/// # Examples
6/// ```
7/// # use utils::parser::{Leaf, self};
8/// let parser = parser::byte_map!(
9///     b'#' => true,
10///     b'.' | b'S' => false,
11/// );
12/// assert_eq!(parser.parse(b"#.S##"), Ok((true, &b".S##"[..])));
13/// assert_eq!(parser.parse(b".S##"), Ok((false, &b"S##"[..])));
14/// assert_eq!(parser.parse(b"S##"), Ok((false, &b"##"[..])));
15///
16/// let (err, remaining) = parser.parse(b"abc").unwrap_err();
17/// assert_eq!(err.to_string(), "expected one of '#', '.', 'S'");
18/// assert_eq!(remaining, &b"abc"[..]);
19/// ```
20#[macro_export]
21macro_rules! parser_byte_map {
22    (
23        $($($l:literal)|+ => $e:expr),+$(,)?
24    ) => {{
25        $crate::parser::byte_lut(&const {
26            // Don't use a const item for the lut to avoid naming the value type
27            let mut lut = [None; 256];
28            $($(
29                assert!(lut[$l as usize].is_none(), "duplicate literal");
30                lut[$l as usize] = Some($e);
31            )+)+
32            lut
33        }, {
34            let mut set = 0u128;
35            $($(
36                let v: u8 = $l;
37                assert!(v < 128, "invalid ASCII");
38                set |= 1u128 << v;
39            )+)+
40            $crate::parser::ParseError::ExpectedOneOf($crate::ascii::AsciiSet::new(set))
41        })
42    }};
43}
44
45/// Helper to create a [`Leaf`](super::Leaf) parser matching string literals using `match`-like
46/// syntax.
47///
48/// This is a replacement for
49/// [`parser::one_of`](crate::parser::one_of())`(("a".map(|_| Enum::A), "b".map(|_| Enum::b)))`
50/// which produces more optimized assembly and is easier to read and write.
51///
52/// The string patterns are matched in the order provided, so strings should be ordered by length.
53///
54/// Using this makes [2017 day 11](../../year2017/struct.Day11.html), which parses a sequence of
55/// literals separated by commas, over 2x faster.
56///
57/// See also [`parser::parsable_enum!`](crate::parser::parsable_enum), which provides a macro to
58/// define an enum and literal parser together.
59///
60/// # Examples
61/// ```
62/// # use utils::parser::{Leaf, self};
63/// #[derive(Debug, PartialEq)]
64/// enum Example {
65///     A,
66///     B,
67///     C,
68/// }
69///
70/// let parser = parser::literal_map!(
71///     "A" | "a" => Example::A,
72///     "B" => Example::B,
73///     "C" => Example::C,
74/// );
75/// assert_eq!(parser.parse(b"A"), Ok((Example::A, &b""[..])));
76/// assert_eq!(parser.parse(b"a"), Ok((Example::A, &b""[..])));
77/// assert_eq!(parser.parse(b"B"), Ok((Example::B, &b""[..])));
78/// assert_eq!(parser.parse(b"C"), Ok((Example::C, &b""[..])));
79/// assert!(parser.parse(b"D").is_err());
80/// ```
81#[macro_export]
82macro_rules! parser_literal_map {
83    (
84        $($($l:literal)|+ => $e:expr),+$(,)?
85    ) => {{
86        $crate::parser::from_leaf_fn(|input| {
87            $($(
88                if input.len() >= const { $l.len() } && const { $l.as_bytes() } == &input[..const { $l.len() }] {
89                    return Ok((($e), &input[const { $l.len() }..]));
90                }
91            )+)*
92
93            Err(($crate::parser_literal_map!(@error $($($l)+)+), input))
94        })
95    }};
96    (@error $first:literal $($l:literal)+) => {
97        $crate::parser::ParseError::Custom(concat!("expected one of '", $first, "'", $(", '", $l, "'",)+))
98    };
99    (@error $first:literal) => {
100        $crate::parser::ParseError::ExpectedLiteral($first)
101    };
102}
103
104/// Helper to define a [`Parseable`](crate::parser::Parseable) fieldless unit-only enum.
105///
106/// The parser is implemented using [`parser::literal_map!`](crate::parser::literal_map) and
107/// [`enumerable_enum!`](crate::enumerable_enum!).
108///
109/// # Examples
110/// ```
111/// # use utils::parser::{Leaf, Parseable, self};
112/// parser::parsable_enum! {
113///     #[derive(Debug, PartialEq, Default)]
114///     enum Direction {
115///         #[default]
116///         "north" | "n" => North,
117///         "south" | "s" => South,
118///         "east" | "e" => East,
119///         "west" | "w" => West,
120///     }
121/// }
122///
123/// assert_eq!(Direction::PARSER.parse(b"north"), Ok((Direction::North, &b""[..])));
124/// assert_eq!(Direction::PARSER.parse(b"s"), Ok((Direction::South, &b""[..])));
125/// assert!(Direction::PARSER.parse(b"a").is_err());
126///
127/// assert_eq!(Direction::COUNT, 4);
128/// ```
129///
130/// With discriminant helpers (requires an explicit `#[repr(...)]` attribute first):
131/// ```
132/// # use utils::parser::{Leaf, Parseable, self};
133/// parser::parsable_enum! {
134///     #[repr(u8)]
135///     #[derive(Debug, PartialEq)]
136///     enum Operation {
137///         "add" => Add,
138///         "mul" => Mul,
139///         "div" => Div,
140///         "mod" => Mod,
141///         "eql" => Eql,
142///     }
143/// }
144///
145/// assert_eq!(Operation::PARSER.parse(b"add5"), Ok((Operation::Add, &b"5"[..])));
146/// assert_eq!(Operation::PARSER.parse(b"eql"), Ok((Operation::Eql, &b""[..])));
147///
148/// assert_eq!(Operation::COUNT, 5);
149/// assert_eq!(Operation::checked_from_discriminant(2), Some(Operation::Div));
150/// ```
151#[macro_export]
152macro_rules! parser_parsable_enum {
153    (
154        $(#[$($enum_meta:tt)+])*
155        enum $name:ident {$(
156            $(#[$meta:meta])*
157            $($l:literal)|+ => $variant:ident $(= $value:expr)?,
158        )+}
159    ) => {
160        // Use tt for enum_meta to avoid the attributes being captured as opaque fragments, which
161        // is required for the correct enumerable_enum arm to be selected for repr enums.
162        $crate::enumerable_enum! {
163            $(#[$($enum_meta)+])*
164            enum $name {$(
165                $(#[$meta])*
166                $variant $(= $value)?,
167            )+}
168        }
169
170        impl $crate::parser::Parseable for $name {
171            type Parser = for<'a> fn(&'a [u8]) -> $crate::parser::LeafResult<'a, Self>;
172            const PARSER: Self::Parser = $crate::parser_literal_map!($(
173                $($l)|+ => Self::$variant,
174            )+);
175        }
176    };
177}
178
179/// Helper to define a custom [`Parser`](super::Parser) using a `match` inspired parse tree syntax.
180///
181/// Each rule is made up of a list of chained parsers enclosed in brackets on the left-hand side.
182/// Parsers can be prefixed with an identifier followed by `@` to store the result of that parser in
183/// the supplied variable, similar to normal match patterns.
184///
185/// After the list of parsers, there is an arrow determining the functionality of the rule when the
186/// parsers match:
187/// - **Expression (`=>`)**: The expression on the right-hand is evaluated and returned.
188/// - **Fallible (`=?>`)**: Similar to Expression, but the right-hand side evaluates a result. If
189///   the expression evaluates to [`Ok`], the value contained inside is returned. Otherwise, the
190///   string contained inside the [`Err`] is handled as a custom [`ParseError`](super::ParseError),
191///   and parsing will continue with the following rule.
192/// - **Subtree (`=>>`)**: The right-hand side is a nested set of rules enclosed in braces.
193///
194/// Both the top-level and each `=>>` subtree create their own commit scopes. If a parser commits,
195/// no more branches within the current scope are tried.
196///
197/// # Examples
198/// ```
199/// # use utils::parser::{self, Parser};
200/// #
201/// #[derive(Debug, PartialEq)]
202/// enum Register {
203///     A, B, C
204/// }
205///
206/// #[derive(Debug, PartialEq)]
207/// enum Instruction {
208///     Add(Register, Register),
209///     AddConstant(Register, i32),
210///     Copy(Register, Register),
211///     Noop,
212/// }
213///
214/// let register = parser::literal_map!(
215///     "A" => Register::A, "B" => Register::B, "C" => Register::C,
216/// );
217///
218/// let instruction = parser::parse_tree!(
219///     ("add ", r @ register, ", ") =>> {
220///         (r2 @ register) => Instruction::Add(r, r2),
221///         (v @ parser::i32()) => Instruction::AddConstant(r, v),
222///     },
223///     ("copy ", r @ register, ", ", r2 @ register) =?> {
224///         if r == r2 {
225///             Err("cannot copy register to itself")
226///         } else {
227///             Ok(Instruction::Copy(r, r2))
228///         }
229///     },
230///     ("noop") => Instruction::Noop,
231/// );
232///
233/// assert_eq!(
234///     instruction.parse_complete("add A, B").unwrap(),
235///     Instruction::Add(Register::A, Register::B)
236/// );
237/// assert_eq!(
238///     instruction.parse_complete("add C, 100").unwrap(),
239///     Instruction::AddConstant(Register::C, 100)
240/// );
241/// assert_eq!(
242///     instruction.parse_complete("copy A, B").unwrap(),
243///     Instruction::Copy(Register::A, Register::B)
244/// );
245/// assert!(instruction
246///     .parse_complete("copy A, A")
247///     .is_err_and(|err| err.to_string().contains("cannot copy register to itself")));
248/// ```
249#[macro_export]
250macro_rules! parser_parse_tree {
251    (@rule $input:ident $state:ident $commit:ident $token:ident [$(,)?] @expr $rhs:expr) => {
252        return Ok(($rhs, $input));
253    };
254    (@rule $input:ident $state:ident $commit:ident $token:ident [$(,)?] @expr_res $rhs:expr) => {
255        match $rhs {
256            Ok(v) => return Ok((v, $input)),
257            Err(e) => {
258                $token = $state.error($crate::parser::ParseError::Custom(e), $input);
259                if ($commit) {
260                    return Err($token);
261                }
262            }
263        };
264    };
265    (@rule $input:ident $state:ident $commit:ident $token:ident [$(,)?] @subtree $($rhs:tt)+) => {
266        // Consider
267        //  ("add ".commit(), r @ register, ", ") =>> {
268        //      (r2 @ register) => Instruction::Add(r, r2),
269        //      (v @ parser::i32()) => Instruction::AddConstant(r, v),
270        //  },
271        // The inner alternative has its own commit scope, so both branches are tried normally.
272        // However, if both fail then an error should be returned.
273        {
274            let mut $commit = false;
275            $crate::parser_parse_tree!(@toplevel $input $state $commit $token $($rhs)+);
276        }
277        if ($commit) {
278            return Err($token);
279        }
280    };
281
282    (@rule $input:ident $state:ident $commit:ident $token:ident
283        [$n:ident @ $lhs:expr $(,$($tail:tt)*)?] $($rhs:tt)+
284    ) => {
285        match $crate::parser::Parser::parse_ctx(&($lhs), $input, $state, &mut $commit, false) {
286            Ok(($n, $input)) => {
287                $crate::parser_parse_tree!(@rule $input $state $commit $token
288                    [$($($tail)*)?] $($rhs)+
289                );
290            }
291            Err(t) if $commit => return Err(t),
292            Err(t) => $token = t,
293        };
294    };
295    (@rule $input:ident $state:ident $commit:ident $token:ident
296        [$lhs:expr $(,$($tail:tt)*)?] $($rhs:tt)+
297    ) => {
298        match $crate::parser::Parser::parse_ctx(&($lhs), $input, $state, &mut $commit, false) {
299            Ok((_, $input)) => {
300                $crate::parser_parse_tree!(@rule $input $state $commit $token
301                    [$($($tail)*)?] $($rhs)+
302                );
303            }
304            Err(t) if $commit => return Err(t),
305            Err(t) => $token = t,
306        };
307    };
308
309    (@toplevel $input:ident $state:ident $commit:ident $token:ident
310        ($($lhs:tt)+) => $rhs:expr $(, $($tail:tt)*)?
311    ) => {
312        $crate::parser_parse_tree!(@rule $input $state $commit $token [$($lhs)+] @expr $rhs);
313        $($crate::parser_parse_tree!(@toplevel $input $state $commit $token $($tail)*);)?
314    };
315    (@toplevel $input:ident $state:ident $commit:ident $token:ident
316        ($($lhs:tt)+) =?> $rhs:expr $(, $($tail:tt)*)?
317    ) => {
318        $crate::parser_parse_tree!(@rule $input $state $commit $token [$($lhs)+] @expr_res $rhs);
319        $($crate::parser_parse_tree!(@toplevel $input $state $commit $token $($tail)*);)?
320    };
321    (@toplevel $input:ident $state:ident $commit:ident $token:ident
322        ($($lhs:tt)+) =>> {$($rhs:tt)+} $(, $($tail:tt)*)?
323    ) => {
324        $crate::parser_parse_tree!(@rule $input $state $commit $token [$($lhs)+] @subtree $($rhs)+);
325        $($crate::parser_parse_tree!(@toplevel $input $state $commit $token $($tail)*);)?
326    };
327    (@toplevel $input:ident $state:ident $commit:ident $token:ident $(,)?) => {};
328
329    // Ensures this branch only matches inputs starting with (, giving each rule set a unique prefix
330    (($($first:tt)+) $($tail:tt)+) => {{
331        $crate::parser::from_parser_fn(|input, state, _, _| {
332            let mut commit = false;
333            let mut token;
334
335            $crate::parser_parse_tree!(@toplevel input state commit token ($($first)+) $($tail)+);
336
337            Err(token)
338        })
339    }};
340}