utils/parser/
macros.rs

1/// Macro to define a parser for one or more string literals, mapping the results.
2///
3/// This is a replacement for
4/// [`parser::one_of`](crate::parser::one_of())`(("a".map(|_| Enum::A), "b".map(|_| Enum::b)))`
5/// which produces more optimized assembly and is easier to read and write.
6///
7/// The string patterns are matched in the order provided, so strings should be ordered by length.
8///
9/// Using this makes [2017 day 11](../../year2017/struct.Day11.html), which parses a sequence of
10/// literals separated by commas, over 2x faster.
11///
12/// # Examples
13/// ```
14/// # use utils::parser::{Parser, self};
15///
16/// #[derive(Debug, PartialEq)]
17/// enum Example {
18///     A,
19///     B,
20///     C,
21/// }
22///
23/// let parser = parser::literal_map!(
24///     "A" | "a" => Example::A,
25///     "B" => Example::B,
26///     "C" => Example::C,
27/// );
28/// assert_eq!(parser.parse(b"A"), Ok((Example::A, &b""[..])));
29/// assert_eq!(parser.parse(b"a"), Ok((Example::A, &b""[..])));
30/// assert_eq!(parser.parse(b"B"), Ok((Example::B, &b""[..])));
31/// assert_eq!(parser.parse(b"C"), Ok((Example::C, &b""[..])));
32/// assert!(parser.parse(b"D").is_err());
33/// ```
34#[macro_export]
35macro_rules! parser_literal_map {
36    (
37        $($($l:literal)|+ => $e:expr),+$(,)?
38    ) => {{
39        fn coerce_to_parser<F: Fn(&[u8]) -> $crate::parser::ParseResult<'_, O>, O>(f: F) -> F { f }
40
41        coerce_to_parser(|input| {
42            $($(
43                if input.len() >= const { $l.len() } && const { $l.as_bytes() } == &input[..const { $l.len() }] {
44                    return Ok((($e), &input[const { $l.len() }..]));
45                }
46            )+)*
47
48            Err(($crate::parser_literal_map!(@error $($($l)+)+), input))
49        })
50    }};
51    (@error $first:literal $($l:literal)+) => {
52        $crate::parser::ParseError::Custom(concat!("expected one of '", $first, "'", $(", '", $l, "'",)+))
53    };
54    (@error $first:literal) => {
55        $crate::parser::ParseError::ExpectedLiteral($first)
56    };
57}
58
59/// Macro to define a custom parser using a `match` inspired parse tree syntax.
60///
61/// Each rule is made up of a list of chained parsers enclosed in brackets on the left-hand side.
62/// Parsers can be prefixed with an identifier followed by `@` to store the result of that parser in
63/// the supplied variable, similar to normal match patterns.
64///
65/// After the list of parsers, there is an arrow determining the functionality of the rule when the
66/// parsers match:
67/// - **Expression (`=>`)**: The expression on the right-hand is evaluated and returned.
68/// - **Fallible (`=?>`)**: Similar to Expression, but the right-hand side evaluates a result. If
69///   the expression evaluates to [`Ok`], the value contained inside is returned. Otherwise, the
70///   string contained inside the [`Err`] is handled as a custom [`ParseError`](super::ParseError),
71///   and parsing will continue with the following rule.
72/// - **Subtree (`=>>`)**: The right-hand side is a nested set of rules enclosed in braces.
73///
74/// If none of the rules match successfully, the error from the rule which parsed furthest into
75/// the input is returned.
76///
77/// # Examples
78/// ```
79/// # use utils::parser::{self, Parser};
80/// #
81/// #[derive(Debug, PartialEq)]
82/// enum Register {
83///     A, B, C
84/// }
85///
86/// #[derive(Debug, PartialEq)]
87/// enum Instruction {
88///     Add(Register, Register),
89///     AddConstant(Register, i32),
90///     Copy(Register, Register),
91///     Noop,
92/// }
93///
94/// let register = parser::literal_map!(
95///     "A" => Register::A, "B" => Register::B, "C" => Register::C,
96/// );
97///
98/// let instruction = parser::parse_tree!(
99///     ("add ", r @ register, ", ") =>> {
100///         (r2 @ register) => Instruction::Add(r, r2),
101///         (v @ parser::i32()) => Instruction::AddConstant(r, v),
102///     },
103///     ("copy ", r @ register, ", ", r2 @ register) =?> {
104///         if r == r2 {
105///             Err("cannot copy register to itself")
106///         } else {
107///             Ok(Instruction::Copy(r, r2))
108///         }
109///     },
110///     ("noop") => Instruction::Noop,
111/// );
112///
113/// assert_eq!(
114///     instruction.parse_complete("add A, B").unwrap(),
115///     Instruction::Add(Register::A, Register::B)
116/// );
117/// assert_eq!(
118///     instruction.parse_complete("add C, 100").unwrap(),
119///     Instruction::AddConstant(Register::C, 100)
120/// );
121/// assert_eq!(
122///     instruction.parse_complete("copy A, B").unwrap(),
123///     Instruction::Copy(Register::A, Register::B)
124/// );
125/// assert!(instruction
126///     .parse_complete("copy A, A")
127///     .is_err_and(|err| err.to_string().contains("cannot copy register to itself")));
128/// ```
129#[macro_export]
130macro_rules! parser_parse_tree {
131    (@rule $input:ident $furthest_err:ident $furthest_remaining:ident [$(,)?] @expr $rhs:expr) => {
132        return Ok(($rhs, $input));
133    };
134    (@rule $input:ident $furthest_err:ident $furthest_remaining:ident [$(,)?] @expr_res $rhs:expr) => {
135        match $rhs {
136            Ok(v) => return Ok((v, $input)),
137            Err(e) => {
138                if $input.len() < $furthest_remaining {
139                    $furthest_err = $crate::parser::ParseError::Custom(e);
140                    $furthest_remaining = $input.len();
141                }
142            }
143        };
144    };
145    (@rule $input:ident $furthest_err:ident $furthest_remaining:ident [$(,)?] @subtree $($rhs:tt)+) => {
146        $crate::parser_parse_tree!(@toplevel $input $furthest_err $furthest_remaining $($rhs)+);
147    };
148
149    (@rule $input:ident $furthest_err:ident $furthest_remaining:ident
150        [$n:ident @ $lhs:expr $(,$($tail:tt)*)?] $($rhs:tt)+
151    ) => {
152        match $crate::parser::Parser::parse(&($lhs), $input) {
153            Ok(($n, $input)) => {
154                $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining
155                    [$($($tail)*)?] $($rhs)+
156                );
157            },
158            Err((err, remaining)) => {
159                if remaining.len() < $furthest_remaining {
160                    $furthest_err = err;
161                    $furthest_remaining = remaining.len();
162                }
163            }
164        };
165    };
166    (@rule $input:ident $furthest_err:ident $furthest_remaining:ident
167        [$lhs:expr $(,$($tail:tt)*)?] $($rhs:tt)+
168    ) => {
169        match $crate::parser::Parser::parse(&($lhs), $input) {
170            Ok((_, $input)) => {
171                $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining
172                    [$($($tail)*)?] $($rhs)+
173                );
174            },
175            Err((err, remaining)) => {
176                if remaining.len() < $furthest_remaining {
177                    $furthest_err = err;
178                    $furthest_remaining = remaining.len();
179                }
180            }
181        };
182    };
183
184    (@toplevel $input:ident $furthest_err:ident $furthest_remaining:ident
185        ($($lhs:tt)+) => $rhs:expr $(, $($tail:tt)*)?
186    ) => {
187        $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining [$($lhs)+] @expr $rhs);
188        $($crate::parser_parse_tree!(@toplevel $input $furthest_err $furthest_remaining $($tail)*);)?
189    };
190    (@toplevel $input:ident $furthest_err:ident $furthest_remaining:ident
191        ($($lhs:tt)+) =?> $rhs:expr $(, $($tail:tt)*)?
192    ) => {
193        $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining [$($lhs)+] @expr_res $rhs);
194        $($crate::parser_parse_tree!(@toplevel $input $furthest_err $furthest_remaining $($tail)*);)?
195    };
196    (@toplevel $input:ident $furthest_err:ident $furthest_remaining:ident
197        ($($lhs:tt)+) =>> {$($rhs:tt)+} $(, $($tail:tt)*)?
198    ) => {
199        $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining [$($lhs)+] @subtree $($rhs)+);
200        $($crate::parser_parse_tree!(@toplevel $input $furthest_err $furthest_remaining $($tail)*);)?
201    };
202    (@toplevel $input:ident $furthest_err:ident $furthest_remaining:ident $(,)?) => {};
203
204    // Ensures this branch only matches inputs starting with (, giving each rule set a unique prefix
205    (($($first:tt)+) $($tail:tt)+) => {{
206        fn coerce_to_parser<F: Fn(&[u8]) -> $crate::parser::ParseResult<'_, O>, O>(f: F) -> F { f }
207
208        coerce_to_parser(|input| {
209            let mut furthest_err = $crate::parser::ParseError::Custom("unreachable");
210            let mut furthest_remaining = usize::MAX;
211
212            $crate::parser_parse_tree!(@toplevel input furthest_err furthest_remaining ($($first)+) $($tail)+);
213
214            Err((furthest_err, &input[input.len() - furthest_remaining..]))
215        })
216    }};
217}