utils/parser/
base.rs

1use crate::input::{InputError, MapWithInputExt};
2use crate::parser::combinator::{
3    Map, MapResult, Optional, Or, RepeatArrayVec, RepeatN, RepeatVec, WithConsumed, WithPrefix,
4    WithSuffix,
5};
6use crate::parser::error::{ParseError, WithErrorMsg};
7use crate::parser::iterator::{ParserIterator, ParserMatchesIterator};
8use crate::parser::simple::{Constant, Eol};
9use crate::parser::then::{Then, Then2, Unimplemented};
10
11/// [`Result`] type returned by [`Parser::parse`].
12pub type ParseResult<'i, T> = Result<(T, &'i [u8]), (ParseError, &'i [u8])>;
13
14/// Parser trait.
15///
16/// Implementations should avoid allocating where possible.
17pub trait Parser<'i>: Sized {
18    /// Type of the value produced by [`parse`](Self::parse) when successful.
19    type Output;
20
21    /// Type of the chained parser returned by [`then`](Self::then).
22    ///
23    /// This is used to allow multiple [`then`](Self::then) calls to extend one tuple, instead of
24    /// nesting tuples inside each other.
25    type Then<T: Parser<'i>>: Then<'i, Self, T>;
26
27    /// Parse the given sequence of bytes.
28    ///
29    /// Returns a tuple of the successfully parsed [`Output`](Self::Output) value and the
30    /// remaining bytes, or a tuple containing a [`ParseError`] and the location of the error.
31    ///
32    /// The returned slices *must* be subslices of the input slice, otherwise [`InputError::new`]
33    /// (in [`parse_all`](Self::parse_all)) will panic.
34    ///
35    /// # Examples
36    /// ```
37    /// # use utils::parser::{self, Parser};
38    /// assert_eq!(parser::u32().parse(b"1234abc"), Ok((1234, &b"abc"[..])));
39    /// assert!(parser::u32().parse(b"abc1234").is_err());
40    /// ```
41    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output>;
42
43    // Provided methods
44
45    /// Sequence another parser after this one.
46    ///
47    /// # Examples
48    /// ```
49    /// # use utils::parser::{self, Parser};
50    /// assert_eq!(
51    ///     parser::i32()
52    ///         .then(parser::i32())
53    ///         .parse(b"123-123"),
54    ///     Ok(((123, -123), &b""[..]))
55    /// );
56    /// ```
57    fn then<T: Parser<'i>>(self, next: T) -> Self::Then<T> {
58        Then::then(self, next)
59    }
60
61    /// Attempt to parse using this parser, followed by provided parser.
62    ///
63    /// If this parser succeeds, the alternative provider won't be tried. If both error, the error
64    /// from the parser which parsed further into the input is returned (preferring the first error
65    /// if both errored at the same position).
66    ///
67    /// See also [`parser::one_of`](super::one_of()).
68    ///
69    /// # Examples
70    /// ```
71    /// # use utils::parser::{self, ParseError, Parser};
72    /// let parser = parser::u8()
73    ///     .map(|x| u32::from(x) * 1001001)
74    ///     .or(parser::u32());
75    /// assert_eq!(
76    ///     parser.parse(b"123"),
77    ///     Ok((123123123, &b""[..]))
78    /// );
79    /// assert_eq!(
80    ///     parser.parse(b"1000"),
81    ///     Ok((1000, &b""[..]))
82    /// );
83    /// ```
84    fn or<T: Parser<'i, Output = Self::Output>>(self, alternative: T) -> Or<Self, T> {
85        Or {
86            first: self,
87            second: alternative,
88        }
89    }
90
91    /// Map the output of this parser using the supplied function.
92    ///
93    /// # Examples
94    /// ```
95    /// # use utils::parser::{self, Parser};
96    /// assert_eq!(
97    ///     parser::u32()
98    ///         .map(|x| x * 2)
99    ///         .parse(b"123"),
100    ///     Ok((246, &b""[..]))
101    /// );
102    /// ```
103    ///
104    /// Closure that returns a value borrowing from both its input and an outer variable:
105    /// ```
106    /// # use utils::parser::{self, Parser};
107    /// let my_vec = vec![1, 2, 3];
108    /// assert_eq!(
109    ///     parser::take_while(u8::is_ascii_digit)
110    ///         .map(|x| (x, my_vec.as_slice()))
111    ///         .parse(b"123"),
112    ///     Ok(((&b"123"[..], &[1, 2, 3][..]), &b""[..]))
113    /// );
114    /// ```
115    fn map<O, F: Fn(Self::Output) -> O>(self, f: F) -> Map<Self, F> {
116        Map {
117            parser: self,
118            map_fn: f,
119        }
120    }
121
122    /// Map the output of this parser using the supplied fallible function.
123    ///
124    /// Errors must be `&'static str`, which will be mapped to [`ParseError::Custom`].
125    ///
126    /// # Examples
127    /// ```
128    /// # use utils::parser::{self, ParseError, Parser};
129    /// let parser = parser::u8()
130    ///     .map_res(|x| x.checked_mul(2).ok_or("input too large"));
131    /// assert_eq!(
132    ///     parser.parse(b"123"),
133    ///     Ok((246, &b""[..]))
134    /// );
135    /// assert_eq!(
136    ///     parser.parse(b"200"),
137    ///     Err((ParseError::Custom("input too large"), &b"200"[..]))
138    /// );
139    /// ```
140    ///
141    /// Closure that returns a value borrowing from both its input and an outer variable:
142    /// ```
143    /// # use utils::parser::{self, Parser};
144    /// let my_vec = vec![1, 2, 3];
145    /// assert_eq!(
146    ///     parser::take_while(u8::is_ascii_digit)
147    ///         .map_res(|x| {
148    ///             if x.len() < 100 {
149    ///                 Ok((x, my_vec.as_slice()))
150    ///             } else {
151    ///                 Err("expected more digits")
152    ///             }
153    ///         })
154    ///         .parse(b"123"),
155    ///     Ok(((&b"123"[..], &[1, 2, 3][..]), &b""[..]))
156    /// );
157    /// ```
158    fn map_res<O, F: Fn(Self::Output) -> Result<O, &'static str>>(
159        self,
160        f: F,
161    ) -> MapResult<Self, F> {
162        MapResult {
163            parser: self,
164            map_fn: f,
165        }
166    }
167
168    /// Wrap [`Output`](Self::Output) in [`Option`], returning [`None`] on error.
169    ///
170    /// # Examples
171    /// ```
172    /// # use utils::parser::{self, ParseError, Parser};
173    /// let parser = parser::u32()
174    ///     .optional();
175    /// assert_eq!(
176    ///     parser.parse(b"123"),
177    ///     Ok((Some(123), &b""[..]))
178    /// );
179    /// assert_eq!(
180    ///     parser.parse(b"abc"),
181    ///     Ok((None, &b"abc"[..]))
182    /// );
183    /// ```
184    fn optional(self) -> Optional<Self> {
185        Optional { parser: self }
186    }
187
188    /// Repeat this parser `N` times, returning an [`array`].
189    ///
190    /// If the number of items is variable use [`repeat_arrayvec`](Self::repeat_arrayvec) or
191    /// [`repeat`](Self::repeat).
192    ///
193    /// # Examples
194    /// ```
195    /// # use utils::parser::{self, Parser};
196    /// assert_eq!(
197    ///     parser::u32()
198    ///         .repeat_n(",") // N = 3 is inferred
199    ///         .parse(b"12,34,56"),
200    ///     Ok(([12, 34, 56], &b""[..]))
201    /// );
202    /// ```
203    fn repeat_n<const N: usize, S: Parser<'i>>(self, separator: S) -> RepeatN<N, Self, S>
204    where
205        Self::Output: Copy + Default,
206    {
207        RepeatN {
208            parser: self,
209            separator,
210        }
211    }
212
213    /// Repeat this parser while it matches, returning a [`ArrayVec`](crate::array::ArrayVec).
214    ///
215    /// This parser can parse up to `N` items. If more items match, it will return an error.
216    ///
217    /// See [`repeat`](Self::repeat) if the upper bound is large or not known, and
218    /// [`repeat_n`](Self::repeat_n) if the number of items is consistent.
219    ///
220    /// # Examples
221    /// ```
222    /// # use utils::parser::{self, Parser};
223    /// let parser = parser::u32()
224    ///     .repeat(",", 3);
225    /// assert_eq!(parser.parse(b"12,34,56,78"), Ok((vec![12, 34, 56, 78], &b""[..])));
226    /// assert!(parser.parse(b"12,34").is_err());
227    /// ```
228    fn repeat_arrayvec<const N: usize, S: Parser<'i>>(
229        self,
230        separator: S,
231        min_elements: usize,
232    ) -> RepeatArrayVec<N, Self, S>
233    where
234        Self::Output: Copy + Default,
235    {
236        RepeatArrayVec {
237            parser: self,
238            separator,
239            min_elements,
240        }
241    }
242
243    /// Repeat this parser while it matches, returning a [`Vec`].
244    ///
245    /// To avoid allocating, prefer [`repeat_n`](Self::repeat_n) if the number of items is
246    /// consistent and known in advance, or [`repeat_arrayvec`](Self::repeat_arrayvec) if the number
247    /// of items is variable but has a known upper bound.
248    ///
249    /// # Examples
250    /// ```
251    /// # use utils::parser::{self, Parser};
252    /// let parser = parser::u32()
253    ///     .repeat(",", 3);
254    /// assert_eq!(parser.parse(b"12,34,56,78"), Ok((vec![12, 34, 56, 78], &b""[..])));
255    /// assert!(parser.parse(b"12,34").is_err());
256    /// ```
257    fn repeat<S: Parser<'i>>(self, separator: S, min_elements: usize) -> RepeatVec<Self, S> {
258        RepeatVec {
259            parser: self,
260            separator,
261            min_elements,
262        }
263    }
264
265    /// Return the output of this parser as well as the bytes consumed.
266    ///
267    /// This can be used to map any errors that occur while processing the parsed input back to the
268    /// problematic item's position in the input.
269    ///
270    /// # Examples
271    /// ```
272    /// # use utils::parser::{self, Parser};
273    /// assert_eq!(
274    ///     parser::u32().with_consumed().parse(b"012,345,678"),
275    ///     Ok(((12, &b"012"[..]), &b",345,678"[..]))
276    /// );
277    /// ```
278    fn with_consumed(self) -> WithConsumed<Self> {
279        WithConsumed { parser: self }
280    }
281
282    /// Parse a prefix (normally a string literal) before this parser.
283    ///
284    /// The result of the prefix parser is discarded.
285    ///
286    /// # Examples
287    /// ```
288    /// # use utils::parser::{self, Parser};
289    /// assert_eq!(
290    ///     parser::u32()
291    ///         .with_prefix("abc")
292    ///         .parse(b"abc123"),
293    ///     Ok((123, &b""[..]))
294    /// );
295    /// ```
296    fn with_prefix<T: Parser<'i>>(self, prefix: T) -> WithPrefix<Self, T> {
297        WithPrefix {
298            parser: self,
299            prefix,
300        }
301    }
302
303    /// Parse a suffix (normally a string literal) after this parser.
304    ///
305    /// The result of the suffix parser is discarded.
306    ///
307    /// # Examples
308    /// ```
309    /// # use utils::parser::{self, Parser};
310    /// assert_eq!(
311    ///     parser::u32()
312    ///         .with_suffix("abc")
313    ///         .parse(b"123abc"),
314    ///     Ok((123, &b""[..]))
315    /// );
316    /// ```
317    fn with_suffix<T: Parser<'i>>(self, suffix: T) -> WithSuffix<Self, T> {
318        WithSuffix {
319            parser: self,
320            suffix,
321        }
322    }
323
324    /// Replace this parser's error message with the provided string.
325    ///
326    /// # Examples
327    /// ```
328    /// # use utils::parser::{self, ParseError, Parser};
329    /// let parser = parser::u8()
330    ///     .error_msg("expected power level");
331    /// assert_eq!(
332    ///     parser.parse(b"123"),
333    ///     Ok((123, &b""[..]))
334    /// );
335    /// assert_eq!(
336    ///     parser.parse(b"abc"),
337    ///     Err((ParseError::Custom("expected power level"), &b"abc"[..]))
338    /// );
339    /// ```
340    fn error_msg(self, message: &'static str) -> WithErrorMsg<Self> {
341        WithErrorMsg {
342            parser: self,
343            message,
344        }
345    }
346
347    /// Apply this parser once, checking the provided input is fully consumed.
348    ///
349    /// # Examples
350    /// ```
351    /// # use utils::parser::{self, Parser};
352    /// assert_eq!(parser::u32().parse_complete("1234").unwrap(), 1234);
353    /// assert!(parser::u32().parse_complete("1234abc").is_err());
354    /// ```
355    fn parse_complete(&self, input: &'i str) -> Result<Self::Output, InputError> {
356        match self.parse(input.as_bytes()).map_with_input(input)? {
357            (v, []) => Ok(v),
358            (_, remaining) => Err(InputError::new(input, remaining, ParseError::ExpectedEof())),
359        }
360    }
361
362    /// Apply this parser repeatedly until the provided input is fully consumed.
363    ///
364    /// Equivalent to `parser.repeat(parser::noop(), 0).parse_complete(input)`.
365    ///
366    /// # Examples
367    /// ```
368    /// # use utils::parser::{self, Parser};
369    /// assert_eq!(
370    ///     parser::u32()
371    ///         .then(parser::u32().with_prefix("x"))
372    ///         .with_suffix(",".or(parser::eof()))
373    ///         .parse_all("1x2,3x4,1234x5678")
374    ///         .unwrap(),
375    ///     vec![
376    ///         (1, 2),
377    ///         (3, 4),
378    ///         (1234, 5678),
379    ///     ]
380    /// );
381    /// ```
382    fn parse_all(&self, input: &'i str) -> Result<Vec<Self::Output>, InputError> {
383        ParserRef(self)
384            .repeat(Constant(()), 0)
385            .parse_complete(input)
386    }
387
388    /// Similar to [`parse_all`](Self::parse_all) but expects a newline after each item.
389    ///
390    /// Equivalent to `parser.with_suffix(`[`parser::eol()`](super::eol)`).parse_all(input)`.
391    ///
392    /// # Examples
393    /// ```
394    /// # use utils::parser::{self, Parser};
395    /// assert_eq!(
396    ///     parser::u32()
397    ///         .then(parser::u32().with_prefix("x"))
398    ///         .parse_lines("1x2\n3x4\n1234x5678")
399    ///         .unwrap(),
400    ///     vec![
401    ///         (1, 2),
402    ///         (3, 4),
403    ///         (1234, 5678),
404    ///     ]
405    /// );
406    /// ```
407    fn parse_lines(&self, input: &'i str) -> Result<Vec<Self::Output>, InputError> {
408        ParserRef(self)
409            .with_suffix(Eol())
410            .repeat(Constant(()), 0)
411            .parse_complete(input)
412    }
413
414    /// Create an iterator which applies this parser repeatedly until the provided input is fully
415    /// consumed.
416    ///
417    /// The returned iterator will lazily parse the provided input string, producing a sequence of
418    /// [`Result`] values. Once the end of input is reached, or an error is returned, the parser
419    /// will always return [`None`].
420    ///
421    /// # Examples
422    /// ```
423    /// # use utils::input::InputError;
424    /// # use utils::parser::{self, Parser};
425    /// let iterator = parser::u32()
426    ///     .with_suffix(parser::eol())
427    ///     .parse_iterator("12\n34\n56\n78");
428    /// for item in iterator {
429    ///     println!("{}", item?);
430    /// }
431    /// # Ok::<(), InputError>(())
432    /// ```
433    ///
434    /// ```
435    /// # use utils::parser::{self, Parser};
436    /// let mut iterator = parser::u32()
437    ///     .with_suffix(parser::eol())
438    ///     .parse_iterator("12\n34\nnot a integer");
439    /// assert_eq!(iterator.next().unwrap().unwrap(), 12);
440    /// assert_eq!(iterator.next().unwrap().unwrap(), 34);
441    /// assert!(iterator.next().unwrap().is_err());
442    /// assert!(iterator.next().is_none());
443    /// ```
444    ///
445    /// ```
446    /// # use utils::input::InputError;
447    /// # use utils::parser::{self, Parser};
448    /// let filtered = parser::u32()
449    ///     .with_suffix(parser::eol())
450    ///     .parse_iterator("11\n22\n33\n44\n55")
451    ///     .filter(|r| r.is_err() || r.as_ref().is_ok_and(|v| v % 2 == 0))
452    ///     .collect::<Result<Vec<u32>, InputError>>()?;
453    /// assert_eq!(filtered, vec![22, 44]);
454    /// # Ok::<(), InputError>(())
455    /// ```
456    fn parse_iterator(self, input: &str) -> ParserIterator<'_, Self> {
457        ParserIterator {
458            input,
459            remaining: input.as_bytes(),
460            parser: self,
461        }
462    }
463
464    /// Create an iterator which returns matches only and skips over errors.
465    ///
466    /// This is intended for cases that require extracting matches out of the input.
467    /// Otherwise, [`parse_iterator`](Self::parse_iterator) should be used with a parser that can
468    /// match the entire input structure.
469    ///
470    /// # Examples
471    /// ```
472    /// # use utils::parser::{self, Parser};
473    /// assert_eq!(
474    ///     parser::u32()
475    ///         .matches_iterator("abc123d456efg7hi8jk9lmnop")
476    ///         .collect::<Vec<_>>(),
477    ///     vec![123, 456, 7, 8, 9]
478    /// );
479    /// ```
480    fn matches_iterator(self, input: &str) -> ParserMatchesIterator<'_, Self> {
481        ParserMatchesIterator {
482            remaining: input.as_bytes(),
483            parser: self,
484        }
485    }
486}
487
488// Workaround to allow using methods which consume a parser in methods which take references.
489struct ParserRef<'a, P>(&'a P);
490impl<'i, P: Parser<'i>> Parser<'i> for ParserRef<'_, P> {
491    type Output = P::Output;
492    type Then<T: Parser<'i>> = Unimplemented;
493
494    #[inline]
495    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
496        self.0.parse(input)
497    }
498}
499
500/// Matches the string literal exactly.
501///
502/// Normally used with [`with_prefix`](Parser::with_prefix)/[`with_suffix`](Parser::with_suffix).
503impl<'i> Parser<'i> for &'static str {
504    type Output = ();
505    type Then<T: Parser<'i>> = Then2<Self, T>;
506
507    #[inline]
508    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
509        // This is faster than using strip_prefix for the common case where the string is a short
510        // string literal known at compile time.
511        if input.len() >= self.len() && self.bytes().zip(input).all(|(a, &b)| a == b) {
512            Ok(((), &input[self.len()..]))
513        } else {
514            Err((ParseError::ExpectedLiteral(self), input))
515        }
516    }
517}
518
519/// Matches the byte exactly.
520///
521/// Normally used with [`with_prefix`](Parser::with_prefix)/[`with_suffix`](Parser::with_suffix).
522impl<'i> Parser<'i> for u8 {
523    type Output = ();
524    type Then<T: Parser<'i>> = Then2<Self, T>;
525
526    #[inline]
527    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
528        if input.first() == Some(self) {
529            Ok(((), &input[1..]))
530        } else {
531            Err((ParseError::ExpectedByte(*self), input))
532        }
533    }
534}
535
536/// Allow custom functions and closures to be used as parsers.
537impl<'i, O, F: Fn(&'i [u8]) -> ParseResult<'i, O>> Parser<'i> for F {
538    type Output = O;
539    type Then<T: Parser<'i>> = Then2<Self, T>;
540
541    #[inline]
542    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
543        self(input)
544    }
545}
546
547/// Trait for types that have a canonical parser.
548pub trait Parseable {
549    type Parser: for<'i> Parser<'i, Output = Self>;
550    const PARSER: Self::Parser;
551}