utils/parser/
base.rs

1use crate::input::{InputError, MapWithInputExt};
2use crate::parser::combinator::{
3    Map, MapResult, Optional, Or, RepeatArrayVec, RepeatN, RepeatVec, WithConsumed, WithPrefix,
4    WithSuffix,
5};
6use crate::parser::error::{ParseError, WithErrorMsg};
7use crate::parser::iterator::{ParserIterator, ParserMatchesIterator};
8use crate::parser::simple::{Constant, Eol};
9use crate::parser::then::{Then, Then2, Unimplemented};
10
11/// [`Result`] type returned by [`Parser::parse`].
12pub type ParseResult<'i, T> = Result<(T, &'i [u8]), (ParseError, &'i [u8])>;
13
14/// Parser trait.
15///
16/// Implementations should avoid allocating where possible.
17pub trait Parser: Sized {
18    /// Type of the value produced by [`parse`](Self::parse) when successful.
19    ///
20    /// Generic over the input `'i` lifetime.
21    type Output<'i>;
22
23    /// Type of the chained parser returned by [`then`](Self::then).
24    ///
25    /// This is used to allow multiple [`then`](Self::then) calls to extend one tuple, instead of
26    /// nesting tuples inside each other.
27    type Then<T: Parser>: Then<Self, T>;
28
29    /// Parse the given sequence of bytes.
30    ///
31    /// Returns a tuple of the successfully parsed [`Output`](Self::Output) value and the
32    /// remaining bytes, or a tuple containing a [`ParseError`] and the location of the error.
33    ///
34    /// The returned slices *must* be subslices of the input slice, otherwise [`InputError::new`]
35    /// (in [`parse_all`](Self::parse_all)) will panic.
36    ///
37    /// # Examples
38    /// ```
39    /// # use utils::parser::{self, Parser};
40    /// assert_eq!(parser::u32().parse(b"1234abc"), Ok((1234, &b"abc"[..])));
41    /// assert!(parser::u32().parse(b"abc1234").is_err());
42    /// ```
43    fn parse<'i>(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output<'i>>;
44
45    // Provided methods
46
47    /// Sequence another parser after this one.
48    ///
49    /// # Examples
50    /// ```
51    /// # use utils::parser::{self, Parser};
52    /// assert_eq!(
53    ///     parser::i32()
54    ///         .then(parser::i32())
55    ///         .parse(b"123-123"),
56    ///     Ok(((123, -123), &b""[..]))
57    /// );
58    /// ```
59    fn then<T: Parser>(self, next: T) -> Self::Then<T> {
60        Then::then(self, next)
61    }
62
63    /// Attempt to parse using this parser, followed by provided parser.
64    ///
65    /// If this parser succeeds, the alternative provider won't be tried. If both error, the error
66    /// from the parser which parsed further into the input is returned (preferring the first error
67    /// if both errored at the same position).
68    ///
69    /// See also [`parser::one_of`](super::one_of()).
70    ///
71    /// # Examples
72    /// ```
73    /// # use utils::parser::{self, ParseError, Parser};
74    /// let parser = parser::u8()
75    ///     .map(|x| u32::from(x) * 1001001)
76    ///     .or(parser::u32());
77    /// assert_eq!(
78    ///     parser.parse(b"123"),
79    ///     Ok((123123123, &b""[..]))
80    /// );
81    /// assert_eq!(
82    ///     parser.parse(b"1000"),
83    ///     Ok((1000, &b""[..]))
84    /// );
85    /// ```
86    fn or<T: for<'i> Parser<Output<'i> = Self::Output<'i>>>(self, alternative: T) -> Or<Self, T> {
87        Or {
88            first: self,
89            second: alternative,
90        }
91    }
92
93    /// Map the output of this parser using the supplied function.
94    ///
95    /// # Examples
96    /// ```
97    /// # use utils::parser::{self, Parser};
98    /// assert_eq!(
99    ///     parser::u32()
100    ///         .map(|x| x * 2)
101    ///         .parse(b"123"),
102    ///     Ok((246, &b""[..]))
103    /// );
104    /// ```
105    fn map<O, F: for<'i> Fn(Self::Output<'i>) -> O>(self, f: F) -> Map<Self, F> {
106        Map {
107            parser: self,
108            map_fn: f,
109        }
110    }
111
112    /// Map the output of this parser using the supplied fallible function.
113    ///
114    /// Errors must be `&'static str`, which will be mapped to [`ParseError::Custom`].
115    ///
116    /// # Examples
117    /// ```
118    /// # use utils::parser::{self, ParseError, Parser};
119    /// let parser = parser::u8()
120    ///     .map_res(|x| x.checked_mul(2).ok_or("input too large"));
121    /// assert_eq!(
122    ///     parser.parse(b"123"),
123    ///     Ok((246, &b""[..]))
124    /// );
125    /// assert_eq!(
126    ///     parser.parse(b"200"),
127    ///     Err((ParseError::Custom("input too large"), &b"200"[..]))
128    /// );
129    /// ```
130    fn map_res<O, F: for<'i> Fn(Self::Output<'i>) -> Result<O, &'static str>>(
131        self,
132        f: F,
133    ) -> MapResult<Self, F> {
134        MapResult {
135            parser: self,
136            map_fn: f,
137        }
138    }
139
140    /// Wrap [`Output`](Self::Output) in [`Option`], returning [`None`] on error.
141    ///
142    /// # Examples
143    /// ```
144    /// # use utils::parser::{self, ParseError, Parser};
145    /// let parser = parser::u32()
146    ///     .optional();
147    /// assert_eq!(
148    ///     parser.parse(b"123"),
149    ///     Ok((Some(123), &b""[..]))
150    /// );
151    /// assert_eq!(
152    ///     parser.parse(b"abc"),
153    ///     Ok((None, &b"abc"[..]))
154    /// );
155    /// ```
156    fn optional(self) -> Optional<Self> {
157        Optional { parser: self }
158    }
159
160    /// Repeat this parser `N` times, returning an [`array`].
161    ///
162    /// If the number of items is variable use [`repeat_arrayvec`](Self::repeat_arrayvec) or
163    /// [`repeat`](Self::repeat).
164    ///
165    /// # Examples
166    /// ```
167    /// # use utils::parser::{self, Parser};
168    /// assert_eq!(
169    ///     parser::u32()
170    ///         .repeat_n(",") // N = 3 is inferred
171    ///         .parse(b"12,34,56"),
172    ///     Ok(([12, 34, 56], &b""[..]))
173    /// );
174    /// ```
175    fn repeat_n<const N: usize, S: Parser>(self, separator: S) -> RepeatN<N, Self, S>
176    where
177        for<'i> Self::Output<'i>: Copy + Default,
178    {
179        RepeatN {
180            parser: self,
181            separator,
182        }
183    }
184
185    /// Repeat this parser while it matches, returning a [`ArrayVec`](crate::array::ArrayVec).
186    ///
187    /// This parser can parse up to `N` items. If more items match, it will return an error.
188    ///
189    /// See [`repeat`](Self::repeat) if the upper bound is large or not known, and
190    /// [`repeat_n`](Self::repeat_n) if the number of items is consistent.
191    ///
192    /// # Examples
193    /// ```
194    /// # use utils::parser::{self, Parser};
195    /// let parser = parser::u32()
196    ///     .repeat(",", 3);
197    /// assert_eq!(parser.parse(b"12,34,56,78"), Ok((vec![12, 34, 56, 78], &b""[..])));
198    /// assert!(parser.parse(b"12,34").is_err());
199    /// ```
200    fn repeat_arrayvec<const N: usize, S: Parser>(
201        self,
202        separator: S,
203        min_elements: usize,
204    ) -> RepeatArrayVec<N, Self, S>
205    where
206        for<'a> Self::Output<'a>: Copy + Default,
207    {
208        RepeatArrayVec {
209            parser: self,
210            separator,
211            min_elements,
212        }
213    }
214
215    /// Repeat this parser while it matches, returning a [`Vec`].
216    ///
217    /// To avoid allocating, prefer [`repeat_n`](Self::repeat_n) if the number of items is
218    /// consistent and known in advance, or [`repeat_arrayvec`](Self::repeat_arrayvec) if the number
219    /// of items is variable but has a known upper bound.
220    ///
221    /// # Examples
222    /// ```
223    /// # use utils::parser::{self, Parser};
224    /// let parser = parser::u32()
225    ///     .repeat(",", 3);
226    /// assert_eq!(parser.parse(b"12,34,56,78"), Ok((vec![12, 34, 56, 78], &b""[..])));
227    /// assert!(parser.parse(b"12,34").is_err());
228    /// ```
229    fn repeat<S: Parser>(self, separator: S, min_elements: usize) -> RepeatVec<Self, S> {
230        RepeatVec {
231            parser: self,
232            separator,
233            min_elements,
234        }
235    }
236
237    /// Return the output of this parser as well as the bytes consumed.
238    ///
239    /// This can be used to map any errors that occur while processing the parsed input back to the
240    /// problematic item's position in the input.
241    ///
242    /// # Examples
243    /// ```
244    /// # use utils::parser::{self, Parser};
245    /// assert_eq!(
246    ///     parser::u32().with_consumed().parse(b"012,345,678"),
247    ///     Ok(((12, &b"012"[..]), &b",345,678"[..]))
248    /// );
249    /// ```
250    fn with_consumed(self) -> WithConsumed<Self> {
251        WithConsumed { parser: self }
252    }
253
254    /// Parse a prefix (normally a string literal) before this parser.
255    ///
256    /// The result of the prefix parser is discarded.
257    ///
258    /// # Examples
259    /// ```
260    /// # use utils::parser::{self, Parser};
261    /// assert_eq!(
262    ///     parser::u32()
263    ///         .with_prefix("abc")
264    ///         .parse(b"abc123"),
265    ///     Ok((123, &b""[..]))
266    /// );
267    /// ```
268    fn with_prefix<T: Parser>(self, prefix: T) -> WithPrefix<Self, T> {
269        WithPrefix {
270            parser: self,
271            prefix,
272        }
273    }
274
275    /// Parse a suffix (normally a string literal) after this parser.
276    ///
277    /// The result of the suffix parser is discarded.
278    ///
279    /// # Examples
280    /// ```
281    /// # use utils::parser::{self, Parser};
282    /// assert_eq!(
283    ///     parser::u32()
284    ///         .with_suffix("abc")
285    ///         .parse(b"123abc"),
286    ///     Ok((123, &b""[..]))
287    /// );
288    /// ```
289    fn with_suffix<T: Parser>(self, suffix: T) -> WithSuffix<Self, T> {
290        WithSuffix {
291            parser: self,
292            suffix,
293        }
294    }
295
296    /// Replace this parser's error message with the provided string.
297    ///
298    /// # Examples
299    /// ```
300    /// # use utils::parser::{self, ParseError, Parser};
301    /// let parser = parser::u8()
302    ///     .error_msg("expected power level");
303    /// assert_eq!(
304    ///     parser.parse(b"123"),
305    ///     Ok((123, &b""[..]))
306    /// );
307    /// assert_eq!(
308    ///     parser.parse(b"abc"),
309    ///     Err((ParseError::Custom("expected power level"), &b"abc"[..]))
310    /// );
311    /// ```
312    fn error_msg(self, message: &'static str) -> WithErrorMsg<Self> {
313        WithErrorMsg {
314            parser: self,
315            message,
316        }
317    }
318
319    /// Apply this parser once, checking the provided input is fully consumed.
320    ///
321    /// # Examples
322    /// ```
323    /// # use utils::parser::{self, Parser};
324    /// assert_eq!(parser::u32().parse_complete("1234").unwrap(), 1234);
325    /// assert!(parser::u32().parse_complete("1234abc").is_err());
326    /// ```
327    fn parse_complete<'i>(&self, input: &'i str) -> Result<Self::Output<'i>, InputError> {
328        match self.parse(input.as_bytes()).map_with_input(input)? {
329            (v, []) => Ok(v),
330            (_, remaining) => Err(InputError::new(input, remaining, "expected end of input")),
331        }
332    }
333
334    /// Apply this parser repeatedly until the provided input is fully consumed.
335    ///
336    /// Equivalent to `parser.repeat(parser::noop(), 0).parse_complete(input)`.
337    ///
338    /// # Examples
339    /// ```
340    /// # use utils::parser::{self, Parser};
341    /// assert_eq!(
342    ///     parser::u32()
343    ///         .then(parser::u32().with_prefix("x"))
344    ///         .with_suffix(",".or(parser::eof()))
345    ///         .parse_all("1x2,3x4,1234x5678")
346    ///         .unwrap(),
347    ///     vec![
348    ///         (1, 2),
349    ///         (3, 4),
350    ///         (1234, 5678),
351    ///     ]
352    /// );
353    /// ```
354    fn parse_all<'i>(&self, input: &'i str) -> Result<Vec<Self::Output<'i>>, InputError> {
355        ParserRef(self)
356            .repeat(Constant(()), 0)
357            .parse_complete(input)
358    }
359
360    /// Similar to [`parse_all`](Self::parse_all) but expects a newline after each item.
361    ///
362    /// Equivalent to `parser.with_suffix(`[`parser::eol()`](super::eol)`).parse_all(input)`.
363    ///
364    /// # Examples
365    /// ```
366    /// # use utils::parser::{self, Parser};
367    /// assert_eq!(
368    ///     parser::u32()
369    ///         .then(parser::u32().with_prefix("x"))
370    ///         .parse_lines("1x2\n3x4\n1234x5678")
371    ///         .unwrap(),
372    ///     vec![
373    ///         (1, 2),
374    ///         (3, 4),
375    ///         (1234, 5678),
376    ///     ]
377    /// );
378    /// ```
379    fn parse_lines<'i>(&self, input: &'i str) -> Result<Vec<Self::Output<'i>>, InputError> {
380        ParserRef(self)
381            .with_suffix(Eol())
382            .repeat(Constant(()), 0)
383            .parse_complete(input)
384    }
385
386    /// Create an iterator which applies this parser repeatedly until the provided input is fully
387    /// consumed.
388    ///
389    /// The returned iterator will lazily parse the provided input string, producing a sequence of
390    /// [`Result`] values. Once the end of input is reached, or an error is returned, the parser
391    /// will always return [`None`].
392    ///
393    /// # Examples
394    /// ```
395    /// # use utils::input::InputError;
396    /// # use utils::parser::{self, Parser};
397    /// let iterator = parser::u32()
398    ///     .with_suffix(parser::eol())
399    ///     .parse_iterator("12\n34\n56\n78");
400    /// for item in iterator {
401    ///     println!("{}", item?);
402    /// }
403    /// # Ok::<(), InputError>(())
404    /// ```
405    ///
406    /// ```
407    /// # use utils::parser::{self, Parser};
408    /// let mut iterator = parser::u32()
409    ///     .with_suffix(parser::eol())
410    ///     .parse_iterator("12\n34\nnot a integer");
411    /// assert_eq!(iterator.next().unwrap().unwrap(), 12);
412    /// assert_eq!(iterator.next().unwrap().unwrap(), 34);
413    /// assert!(iterator.next().unwrap().is_err());
414    /// assert!(iterator.next().is_none());
415    /// ```
416    ///
417    /// ```
418    /// # use utils::input::InputError;
419    /// # use utils::parser::{self, Parser};
420    /// let filtered = parser::u32()
421    ///     .with_suffix(parser::eol())
422    ///     .parse_iterator("11\n22\n33\n44\n55")
423    ///     .filter(|r| r.is_err() || r.as_ref().is_ok_and(|v| v % 2 == 0))
424    ///     .collect::<Result<Vec<u32>, InputError>>()?;
425    /// assert_eq!(filtered, vec![22, 44]);
426    /// # Ok::<(), InputError>(())
427    /// ```
428    fn parse_iterator(self, input: &str) -> ParserIterator<Self> {
429        ParserIterator {
430            input,
431            remaining: input.as_bytes(),
432            parser: self,
433        }
434    }
435
436    /// Create an iterator which returns matches only and skips over errors.
437    ///
438    /// This is intended for cases that require extracting matches out of the input.
439    /// Otherwise, [`parse_iterator`](Self::parse_iterator) should be used with a parser that can
440    /// match the entire input structure.
441    ///
442    /// # Examples
443    /// ```
444    /// # use utils::parser::{self, Parser};
445    /// assert_eq!(
446    ///     parser::u32()
447    ///         .matches_iterator("abc123d456efg7hi8jk9lmnop")
448    ///         .collect::<Vec<_>>(),
449    ///     vec![123, 456, 7, 8, 9]
450    /// );
451    /// ```
452    fn matches_iterator(self, input: &str) -> ParserMatchesIterator<Self> {
453        ParserMatchesIterator {
454            remaining: input.as_bytes(),
455            parser: self,
456        }
457    }
458}
459
460// Workaround to allow using methods which consume a parser in methods which take references.
461struct ParserRef<'a, P>(&'a P);
462impl<P: Parser> Parser for ParserRef<'_, P> {
463    type Output<'i> = P::Output<'i>;
464    type Then<T: Parser> = Unimplemented;
465
466    #[inline]
467    fn parse<'i>(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output<'i>> {
468        self.0.parse(input)
469    }
470}
471
472/// Matches the string literal exactly.
473///
474/// Normally used with [`with_prefix`](Parser::with_prefix)/[`with_suffix`](Parser::with_suffix).
475impl Parser for &'static str {
476    type Output<'i> = ();
477    type Then<T: Parser> = Then2<Self, T>;
478
479    #[inline]
480    fn parse<'i>(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output<'i>> {
481        // This is faster than using strip_prefix for the common case where the string is a short
482        // string literal known at compile time.
483        if input.len() >= self.len() && self.bytes().zip(input).all(|(a, &b)| a == b) {
484            Ok(((), &input[self.len()..]))
485        } else {
486            Err((ParseError::ExpectedLiteral(self), input))
487        }
488    }
489}
490
491/// Matches the byte exactly.
492///
493/// Normally used with [`with_prefix`](Parser::with_prefix)/[`with_suffix`](Parser::with_suffix).
494impl Parser for u8 {
495    type Output<'i> = ();
496    type Then<T: Parser> = Then2<Self, T>;
497
498    #[inline]
499    fn parse<'i>(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output<'i>> {
500        if input.first() == Some(self) {
501            Ok(((), &input[1..]))
502        } else {
503            Err((ParseError::ExpectedByte(*self), input))
504        }
505    }
506}
507
508/// Allow custom functions and closures to be used as parsers.
509impl<O, F: Fn(&[u8]) -> ParseResult<O>> Parser for F {
510    type Output<'i> = O;
511    type Then<T: Parser> = Then2<Self, T>;
512
513    #[inline]
514    fn parse<'i>(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output<'i>> {
515        self(input)
516    }
517}
518
519/// Trait for types that have a canonical parser.
520pub trait Parseable {
521    type Parser: for<'i> Parser<Output<'i> = Self>;
522    const PARSER: Self::Parser;
523}