utils/parser/macros.rs
1/// Macro to define a parser which consumes a single byte and maps it using a lookup table.
2///
3/// This macro is a wrapper around [`parser::byte_lut`](crate::parser::byte_lut) to allow defining
4/// the lookup table using a match-like syntax. Each expression must be const and evaluate to a
5/// value of the same copy type.
6///
7/// # Examples
8/// ```
9/// # use utils::parser::{Parser, self};
10/// let parser = parser::byte_map!(
11/// b'#' => true,
12/// b'.' | b'S' => false,
13/// );
14/// assert_eq!(parser.parse(b"#.S##"), Ok((true, &b".S##"[..])));
15/// assert_eq!(parser.parse(b".S##"), Ok((false, &b"S##"[..])));
16/// assert_eq!(parser.parse(b"S##"), Ok((false, &b"##"[..])));
17///
18/// let (err, remaining) = parser.parse(b"abc").unwrap_err();
19/// assert_eq!(err.to_string(), "expected one of '#', '.', 'S'");
20/// assert_eq!(remaining, &b"abc"[..]);
21/// ```
22#[macro_export]
23macro_rules! parser_byte_map {
24 (
25 $($($l:literal)|+ => $e:expr),+$(,)?
26 ) => {{
27 // `let _: u8 = $l` ensures $l is used in the repetition and also ensures all the literals
28 // are byte literals
29 const COUNT: usize = 0usize $($(+ {let _: u8 = $l; 1usize})+)+;
30 const LEN: usize = 14 + 5 * COUNT;
31 const {
32 assert!(COUNT >= 2, "at least two literals must be provided");
33 }
34
35 // Once concat_bytes! is stabilized this error message can be created in the macro similar
36 // to parser_literal_map!
37 const ERROR: [u8; LEN] = {
38 let mut result = [0u8; LEN];
39 let (prefix, vals) = result.split_at_mut(16);
40 prefix.copy_from_slice(b"expected one of ");
41
42 let mut i = 0;
43 let literals = [$($($l),+),+];
44 while i < COUNT {
45 vals[i * 5] = b'\'';
46 vals[i * 5 + 1] = literals[i];
47 vals[i * 5 + 2] = b'\'';
48 if i + 1 < COUNT {
49 vals[i * 5 + 3] = b',';
50 vals[i * 5 + 4] = b' ';
51 }
52 i += 1;
53 }
54
55 result
56 };
57
58 $crate::parser::byte_lut(&const {
59 // Don't use a const item for the lut to avoid naming the value type
60 let mut lut = [None; 256];
61 $($(
62 assert!(lut[$l as usize].is_none(), "duplicate literal");
63 lut[$l as usize] = Some($e);
64 )+)+
65 lut
66 }, const {
67 match str::from_utf8(&ERROR) {
68 Ok(v) => v,
69 Err(_) => panic!("one or more of the provided literals is invalid unicode"),
70 }
71 })
72 }};
73}
74
75/// Macro to define a parser for one or more string literals, mapping the results.
76///
77/// This is a replacement for
78/// [`parser::one_of`](crate::parser::one_of())`(("a".map(|_| Enum::A), "b".map(|_| Enum::b)))`
79/// which produces more optimized assembly and is easier to read and write.
80///
81/// The string patterns are matched in the order provided, so strings should be ordered by length.
82///
83/// Using this makes [2017 day 11](../../year2017/struct.Day11.html), which parses a sequence of
84/// literals separated by commas, over 2x faster.
85///
86/// See also [`parser::parsable_enum!`](crate::parser::parsable_enum), which provides a macro to
87/// define an enum and literal parser together.
88///
89/// # Examples
90/// ```
91/// # use utils::parser::{Parser, self};
92/// #[derive(Debug, PartialEq)]
93/// enum Example {
94/// A,
95/// B,
96/// C,
97/// }
98///
99/// let parser = parser::literal_map!(
100/// "A" | "a" => Example::A,
101/// "B" => Example::B,
102/// "C" => Example::C,
103/// );
104/// assert_eq!(parser.parse(b"A"), Ok((Example::A, &b""[..])));
105/// assert_eq!(parser.parse(b"a"), Ok((Example::A, &b""[..])));
106/// assert_eq!(parser.parse(b"B"), Ok((Example::B, &b""[..])));
107/// assert_eq!(parser.parse(b"C"), Ok((Example::C, &b""[..])));
108/// assert!(parser.parse(b"D").is_err());
109/// ```
110#[macro_export]
111macro_rules! parser_literal_map {
112 (
113 $($($l:literal)|+ => $e:expr),+$(,)?
114 ) => {{
115 const fn coerce_to_parser<F: Fn(&[u8]) -> $crate::parser::ParseResult<'_, O>, O>(f: F) -> F { f }
116
117 coerce_to_parser(|input| {
118 $($(
119 if input.len() >= const { $l.len() } && const { $l.as_bytes() } == &input[..const { $l.len() }] {
120 return Ok((($e), &input[const { $l.len() }..]));
121 }
122 )+)*
123
124 Err(($crate::parser_literal_map!(@error $($($l)+)+), input))
125 })
126 }};
127 (@error $first:literal $($l:literal)+) => {
128 $crate::parser::ParseError::Custom(concat!("expected one of '", $first, "'", $(", '", $l, "'",)+))
129 };
130 (@error $first:literal) => {
131 $crate::parser::ParseError::ExpectedLiteral($first)
132 };
133}
134
135/// Macro to define an enum that implements [`Parseable`](crate::parser::Parseable).
136///
137/// The parser is implemented using [`parser::literal_map!`](crate::parser::literal_map).
138///
139/// # Examples
140/// ```
141/// # use utils::parser::{Parser, Parseable, self};
142/// parser::parsable_enum! {
143/// #[derive(Debug, PartialEq, Default)]
144/// enum Direction {
145/// #[default]
146/// "north" | "n" => North,
147/// "south" | "s" => South,
148/// "east" | "e" => East,
149/// "west" | "w" => West,
150/// }
151/// }
152///
153/// assert_eq!(Direction::PARSER.parse(b"north"), Ok((Direction::North, &b""[..])));
154/// assert_eq!(Direction::PARSER.parse(b"s"), Ok((Direction::South, &b""[..])));
155/// assert!(Direction::PARSER.parse(b"a").is_err());
156/// ```
157#[macro_export]
158macro_rules! parser_parsable_enum {
159 (
160 $(#[$enum_meta:meta])*
161 enum $name:ident {$(
162 $(#[$meta:meta])*
163 $($l:literal)|+ => $variant:ident $(= $value:expr)?,
164 )+}
165 ) => {
166 $(#[$enum_meta])*
167 pub enum $name {$(
168 $(#[$meta])*
169 $variant $(= $value)?,
170 )+}
171
172 impl $name {
173 const ALL: &'static [$name] = &[$(
174 Self::$variant,
175 )+];
176 }
177
178 impl $crate::parser::Parseable for $name {
179 type Parser = for<'a> fn(&'a [u8]) -> $crate::parser::ParseResult<'a, Self>;
180 const PARSER: Self::Parser = $crate::parser_literal_map!($(
181 $($l)|+ => Self::$variant,
182 )+);
183 }
184 };
185}
186
187/// Macro to define a custom parser using a `match` inspired parse tree syntax.
188///
189/// Each rule is made up of a list of chained parsers enclosed in brackets on the left-hand side.
190/// Parsers can be prefixed with an identifier followed by `@` to store the result of that parser in
191/// the supplied variable, similar to normal match patterns.
192///
193/// After the list of parsers, there is an arrow determining the functionality of the rule when the
194/// parsers match:
195/// - **Expression (`=>`)**: The expression on the right-hand is evaluated and returned.
196/// - **Fallible (`=?>`)**: Similar to Expression, but the right-hand side evaluates a result. If
197/// the expression evaluates to [`Ok`], the value contained inside is returned. Otherwise, the
198/// string contained inside the [`Err`] is handled as a custom [`ParseError`](super::ParseError),
199/// and parsing will continue with the following rule.
200/// - **Subtree (`=>>`)**: The right-hand side is a nested set of rules enclosed in braces.
201///
202/// If none of the rules match successfully, the error from the rule which parsed furthest into
203/// the input is returned.
204///
205/// # Examples
206/// ```
207/// # use utils::parser::{self, Parser};
208/// #
209/// #[derive(Debug, PartialEq)]
210/// enum Register {
211/// A, B, C
212/// }
213///
214/// #[derive(Debug, PartialEq)]
215/// enum Instruction {
216/// Add(Register, Register),
217/// AddConstant(Register, i32),
218/// Copy(Register, Register),
219/// Noop,
220/// }
221///
222/// let register = parser::literal_map!(
223/// "A" => Register::A, "B" => Register::B, "C" => Register::C,
224/// );
225///
226/// let instruction = parser::parse_tree!(
227/// ("add ", r @ register, ", ") =>> {
228/// (r2 @ register) => Instruction::Add(r, r2),
229/// (v @ parser::i32()) => Instruction::AddConstant(r, v),
230/// },
231/// ("copy ", r @ register, ", ", r2 @ register) =?> {
232/// if r == r2 {
233/// Err("cannot copy register to itself")
234/// } else {
235/// Ok(Instruction::Copy(r, r2))
236/// }
237/// },
238/// ("noop") => Instruction::Noop,
239/// );
240///
241/// assert_eq!(
242/// instruction.parse_complete("add A, B").unwrap(),
243/// Instruction::Add(Register::A, Register::B)
244/// );
245/// assert_eq!(
246/// instruction.parse_complete("add C, 100").unwrap(),
247/// Instruction::AddConstant(Register::C, 100)
248/// );
249/// assert_eq!(
250/// instruction.parse_complete("copy A, B").unwrap(),
251/// Instruction::Copy(Register::A, Register::B)
252/// );
253/// assert!(instruction
254/// .parse_complete("copy A, A")
255/// .is_err_and(|err| err.to_string().contains("cannot copy register to itself")));
256/// ```
257#[macro_export]
258macro_rules! parser_parse_tree {
259 (@rule $input:ident $furthest_err:ident $furthest_remaining:ident [$(,)?] @expr $rhs:expr) => {
260 return Ok(($rhs, $input));
261 };
262 (@rule $input:ident $furthest_err:ident $furthest_remaining:ident [$(,)?] @expr_res $rhs:expr) => {
263 match $rhs {
264 Ok(v) => return Ok((v, $input)),
265 Err(e) => {
266 if $input.len() < $furthest_remaining {
267 $furthest_err = $crate::parser::ParseError::Custom(e);
268 $furthest_remaining = $input.len();
269 }
270 }
271 };
272 };
273 (@rule $input:ident $furthest_err:ident $furthest_remaining:ident [$(,)?] @subtree $($rhs:tt)+) => {
274 $crate::parser_parse_tree!(@toplevel $input $furthest_err $furthest_remaining $($rhs)+);
275 };
276
277 (@rule $input:ident $furthest_err:ident $furthest_remaining:ident
278 [$n:ident @ $lhs:expr $(,$($tail:tt)*)?] $($rhs:tt)+
279 ) => {
280 match $crate::parser::Parser::parse(&($lhs), $input) {
281 Ok(($n, $input)) => {
282 $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining
283 [$($($tail)*)?] $($rhs)+
284 );
285 },
286 Err((err, remaining)) => {
287 if remaining.len() < $furthest_remaining {
288 $furthest_err = err;
289 $furthest_remaining = remaining.len();
290 }
291 }
292 };
293 };
294 (@rule $input:ident $furthest_err:ident $furthest_remaining:ident
295 [$lhs:expr $(,$($tail:tt)*)?] $($rhs:tt)+
296 ) => {
297 match $crate::parser::Parser::parse(&($lhs), $input) {
298 Ok((_, $input)) => {
299 $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining
300 [$($($tail)*)?] $($rhs)+
301 );
302 },
303 Err((err, remaining)) => {
304 if remaining.len() < $furthest_remaining {
305 $furthest_err = err;
306 $furthest_remaining = remaining.len();
307 }
308 }
309 };
310 };
311
312 (@toplevel $input:ident $furthest_err:ident $furthest_remaining:ident
313 ($($lhs:tt)+) => $rhs:expr $(, $($tail:tt)*)?
314 ) => {
315 $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining [$($lhs)+] @expr $rhs);
316 $($crate::parser_parse_tree!(@toplevel $input $furthest_err $furthest_remaining $($tail)*);)?
317 };
318 (@toplevel $input:ident $furthest_err:ident $furthest_remaining:ident
319 ($($lhs:tt)+) =?> $rhs:expr $(, $($tail:tt)*)?
320 ) => {
321 $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining [$($lhs)+] @expr_res $rhs);
322 $($crate::parser_parse_tree!(@toplevel $input $furthest_err $furthest_remaining $($tail)*);)?
323 };
324 (@toplevel $input:ident $furthest_err:ident $furthest_remaining:ident
325 ($($lhs:tt)+) =>> {$($rhs:tt)+} $(, $($tail:tt)*)?
326 ) => {
327 $crate::parser_parse_tree!(@rule $input $furthest_err $furthest_remaining [$($lhs)+] @subtree $($rhs)+);
328 $($crate::parser_parse_tree!(@toplevel $input $furthest_err $furthest_remaining $($tail)*);)?
329 };
330 (@toplevel $input:ident $furthest_err:ident $furthest_remaining:ident $(,)?) => {};
331
332 // Ensures this branch only matches inputs starting with (, giving each rule set a unique prefix
333 (($($first:tt)+) $($tail:tt)+) => {{
334 const fn coerce_to_parser<F: Fn(&[u8]) -> $crate::parser::ParseResult<'_, O>, O>(f: F) -> F { f }
335
336 coerce_to_parser(|input| {
337 let mut furthest_err = $crate::parser::ParseError::Custom("unreachable");
338 let mut furthest_remaining = usize::MAX;
339
340 $crate::parser_parse_tree!(@toplevel input furthest_err furthest_remaining ($($first)+) $($tail)+);
341
342 Err((furthest_err, &input[input.len() - furthest_remaining..]))
343 })
344 }};
345}