utils/parser/
base.rs

1use crate::input::{InputError, MapWithInputExt};
2use crate::parser::combinator::{
3    Map, MapResult, Optional, Or, RepeatArrayVec, RepeatN, RepeatVec, WithConsumed, WithPrefix,
4    WithSuffix,
5};
6use crate::parser::error::{ParseError, WithErrorMsg};
7use crate::parser::iterator::{ParserIterator, ParserMatchesIterator};
8use crate::parser::simple::{Constant, Eol};
9use crate::parser::then::{Then, Then2, Unimplemented};
10
11/// [`Result`] type returned by [`Parser::parse`].
12pub type ParseResult<'i, T> = Result<(T, &'i [u8]), (ParseError, &'i [u8])>;
13
14/// Parser trait.
15///
16/// Implementations should avoid allocating where possible.
17#[must_use]
18pub trait Parser<'i>: Sized {
19    /// Type of the value produced by [`parse`](Self::parse) when successful.
20    type Output;
21
22    /// Type of the chained parser returned by [`then`](Self::then).
23    ///
24    /// This is used to allow multiple [`then`](Self::then) calls to extend one tuple, instead of
25    /// nesting tuples inside each other.
26    type Then<T: Parser<'i>>: Then<'i, Self, T>;
27
28    /// Parse the given sequence of bytes.
29    ///
30    /// Returns a tuple of the successfully parsed [`Output`](Self::Output) value and the
31    /// remaining bytes, or a tuple containing a [`ParseError`] and the location of the error.
32    ///
33    /// The returned slices *must* be subslices of the input slice, otherwise [`InputError::new`]
34    /// (in [`parse_all`](Self::parse_all)) will panic.
35    ///
36    /// # Examples
37    /// ```
38    /// # use utils::parser::{self, Parser};
39    /// assert_eq!(parser::u32().parse(b"1234abc"), Ok((1234, &b"abc"[..])));
40    /// assert!(parser::u32().parse(b"abc1234").is_err());
41    /// ```
42    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output>;
43
44    // Provided methods
45
46    /// Sequence another parser after this one.
47    ///
48    /// # Examples
49    /// ```
50    /// # use utils::parser::{self, Parser};
51    /// assert_eq!(
52    ///     parser::i32()
53    ///         .then(parser::i32())
54    ///         .parse(b"123-123"),
55    ///     Ok(((123, -123), &b""[..]))
56    /// );
57    /// ```
58    #[inline]
59    fn then<T: Parser<'i>>(self, next: T) -> Self::Then<T> {
60        Then::then(self, next)
61    }
62
63    /// Attempt to parse using this parser, followed by provided parser.
64    ///
65    /// If this parser succeeds, the alternative provider won't be tried. If both error, the error
66    /// from the parser which parsed further into the input is returned (preferring the first error
67    /// if both errored at the same position).
68    ///
69    /// See also [`parser::one_of`](super::one_of()).
70    ///
71    /// # Examples
72    /// ```
73    /// # use utils::parser::{self, ParseError, Parser};
74    /// let parser = parser::u8()
75    ///     .map(|x| u32::from(x) * 1001001)
76    ///     .or(parser::u32());
77    /// assert_eq!(
78    ///     parser.parse(b"123"),
79    ///     Ok((123123123, &b""[..]))
80    /// );
81    /// assert_eq!(
82    ///     parser.parse(b"1000"),
83    ///     Ok((1000, &b""[..]))
84    /// );
85    /// ```
86    #[inline]
87    fn or<T: Parser<'i, Output = Self::Output>>(self, alternative: T) -> Or<Self, T> {
88        Or {
89            first: self,
90            second: alternative,
91        }
92    }
93
94    /// Map the output of this parser using the supplied function.
95    ///
96    /// # Examples
97    /// ```
98    /// # use utils::parser::{self, Parser};
99    /// assert_eq!(
100    ///     parser::u32()
101    ///         .map(|x| x * 2)
102    ///         .parse(b"123"),
103    ///     Ok((246, &b""[..]))
104    /// );
105    /// ```
106    ///
107    /// Closure that returns a value borrowing from both its input and an outer variable:
108    /// ```
109    /// # use utils::parser::{self, Parser};
110    /// let my_vec = vec![1, 2, 3];
111    /// assert_eq!(
112    ///     parser::take_while(u8::is_ascii_digit)
113    ///         .map(|x| (x, my_vec.as_slice()))
114    ///         .parse(b"123"),
115    ///     Ok(((&b"123"[..], &[1, 2, 3][..]), &b""[..]))
116    /// );
117    /// ```
118    #[inline]
119    fn map<O, F: Fn(Self::Output) -> O>(self, f: F) -> Map<Self, F> {
120        Map {
121            parser: self,
122            map_fn: f,
123        }
124    }
125
126    /// Map the output of this parser using the supplied fallible function.
127    ///
128    /// Errors must be `&'static str`, which will be mapped to [`ParseError::Custom`].
129    ///
130    /// # Examples
131    /// ```
132    /// # use utils::parser::{self, ParseError, Parser};
133    /// let parser = parser::u8()
134    ///     .map_res(|x| x.checked_mul(2).ok_or("input too large"));
135    /// assert_eq!(
136    ///     parser.parse(b"123"),
137    ///     Ok((246, &b""[..]))
138    /// );
139    /// assert_eq!(
140    ///     parser.parse(b"200"),
141    ///     Err((ParseError::Custom("input too large"), &b"200"[..]))
142    /// );
143    /// ```
144    ///
145    /// Closure that returns a value borrowing from both its input and an outer variable:
146    /// ```
147    /// # use utils::parser::{self, Parser};
148    /// let my_vec = vec![1, 2, 3];
149    /// assert_eq!(
150    ///     parser::take_while(u8::is_ascii_digit)
151    ///         .map_res(|x| {
152    ///             if x.len() < 100 {
153    ///                 Ok((x, my_vec.as_slice()))
154    ///             } else {
155    ///                 Err("expected more digits")
156    ///             }
157    ///         })
158    ///         .parse(b"123"),
159    ///     Ok(((&b"123"[..], &[1, 2, 3][..]), &b""[..]))
160    /// );
161    /// ```
162    #[inline]
163    fn map_res<O, F: Fn(Self::Output) -> Result<O, &'static str>>(
164        self,
165        f: F,
166    ) -> MapResult<Self, F> {
167        MapResult {
168            parser: self,
169            map_fn: f,
170        }
171    }
172
173    /// Wrap [`Output`](Self::Output) in [`Option`], returning [`None`] on error.
174    ///
175    /// # Examples
176    /// ```
177    /// # use utils::parser::{self, ParseError, Parser};
178    /// let parser = parser::u32()
179    ///     .optional();
180    /// assert_eq!(
181    ///     parser.parse(b"123"),
182    ///     Ok((Some(123), &b""[..]))
183    /// );
184    /// assert_eq!(
185    ///     parser.parse(b"abc"),
186    ///     Ok((None, &b"abc"[..]))
187    /// );
188    /// ```
189    #[inline]
190    fn optional(self) -> Optional<Self> {
191        Optional { parser: self }
192    }
193
194    /// Repeat this parser `N` times, returning an [`array`].
195    ///
196    /// If the number of items is variable use [`repeat_arrayvec`](Self::repeat_arrayvec) or
197    /// [`repeat`](Self::repeat).
198    ///
199    /// # Examples
200    /// ```
201    /// # use utils::parser::{self, Parser};
202    /// assert_eq!(
203    ///     parser::u32()
204    ///         .repeat_n(",") // N = 3 is inferred
205    ///         .parse(b"12,34,56"),
206    ///     Ok(([12, 34, 56], &b""[..]))
207    /// );
208    /// ```
209    #[inline]
210    fn repeat_n<const N: usize, S: Parser<'i>>(self, separator: S) -> RepeatN<N, Self, S>
211    where
212        Self::Output: Copy + Default,
213    {
214        RepeatN {
215            parser: self,
216            separator,
217        }
218    }
219
220    /// Repeat this parser while it matches, returning a [`ArrayVec`](crate::array::ArrayVec).
221    ///
222    /// This parser can parse up to `N` items. If more items match, it will return an error.
223    ///
224    /// See [`repeat`](Self::repeat) if the upper bound is large or not known, and
225    /// [`repeat_n`](Self::repeat_n) if the number of items is consistent.
226    ///
227    /// # Examples
228    /// ```
229    /// # use utils::array::ArrayVec;
230    /// use utils::parser::{self, Parser};
231    /// let parser = parser::u32()
232    ///     .repeat_arrayvec::<5, _>(",", 3);
233    /// assert_eq!(
234    ///     parser.parse(b"12,34,56,78"),
235    ///     Ok((ArrayVec::from_slice(&[12, 34, 56, 78]).unwrap(), &b""[..]))
236    /// );
237    /// assert_eq!(
238    ///     parser.parse(b"12,34,56,abc"),
239    ///     Ok((ArrayVec::from_slice(&[12, 34, 56]).unwrap(), &b",abc"[..]))
240    /// );
241    /// assert!(parser.parse(b"12,34").is_err());
242    /// ```
243    #[inline]
244    fn repeat_arrayvec<const N: usize, S: Parser<'i>>(
245        self,
246        separator: S,
247        min_elements: usize,
248    ) -> RepeatArrayVec<N, Self, S>
249    where
250        Self::Output: Copy + Default,
251    {
252        RepeatArrayVec {
253            parser: self,
254            separator,
255            min_elements,
256        }
257    }
258
259    /// Repeat this parser while it matches, returning a [`Vec`].
260    ///
261    /// To avoid allocating, prefer [`repeat_n`](Self::repeat_n) if the number of items is
262    /// consistent and known in advance, or [`repeat_arrayvec`](Self::repeat_arrayvec) if the number
263    /// of items is variable but has a known upper bound.
264    ///
265    /// # Examples
266    /// ```
267    /// # use utils::parser::{self, Parser};
268    /// let parser = parser::u32()
269    ///     .repeat(",", 3);
270    /// assert_eq!(parser.parse(b"12,34,56,78"), Ok((vec![12, 34, 56, 78], &b""[..])));
271    /// assert_eq!(parser.parse(b"12,34,56,abc"), Ok((vec![12, 34, 56], &b",abc"[..])));
272    /// assert!(parser.parse(b"12,34").is_err());
273    /// ```
274    #[inline]
275    fn repeat<S: Parser<'i>>(self, separator: S, min_elements: usize) -> RepeatVec<Self, S> {
276        RepeatVec {
277            parser: self,
278            separator,
279            min_elements,
280        }
281    }
282
283    /// Return the output of this parser as well as the bytes consumed.
284    ///
285    /// This can be used to map any errors that occur while processing the parsed input back to the
286    /// problematic item's position in the input.
287    ///
288    /// # Examples
289    /// ```
290    /// # use utils::parser::{self, Parser};
291    /// assert_eq!(
292    ///     parser::u32().with_consumed().parse(b"012,345,678"),
293    ///     Ok(((12, &b"012"[..]), &b",345,678"[..]))
294    /// );
295    /// ```
296    #[inline]
297    fn with_consumed(self) -> WithConsumed<Self> {
298        WithConsumed { parser: self }
299    }
300
301    /// Parse a prefix (normally a string literal) before this parser.
302    ///
303    /// The result of the prefix parser is discarded.
304    ///
305    /// # Examples
306    /// ```
307    /// # use utils::parser::{self, Parser};
308    /// assert_eq!(
309    ///     parser::u32()
310    ///         .with_prefix("abc")
311    ///         .parse(b"abc123"),
312    ///     Ok((123, &b""[..]))
313    /// );
314    /// ```
315    #[inline]
316    fn with_prefix<T: Parser<'i>>(self, prefix: T) -> WithPrefix<Self, T> {
317        WithPrefix {
318            parser: self,
319            prefix,
320        }
321    }
322
323    /// Parse a suffix (normally a string literal) after this parser.
324    ///
325    /// The result of the suffix parser is discarded.
326    ///
327    /// # Examples
328    /// ```
329    /// # use utils::parser::{self, Parser};
330    /// assert_eq!(
331    ///     parser::u32()
332    ///         .with_suffix("abc")
333    ///         .parse(b"123abc"),
334    ///     Ok((123, &b""[..]))
335    /// );
336    /// ```
337    #[inline]
338    fn with_suffix<T: Parser<'i>>(self, suffix: T) -> WithSuffix<Self, T> {
339        WithSuffix {
340            parser: self,
341            suffix,
342        }
343    }
344
345    /// Parse a end of line (or end of string) after this parser.
346    ///
347    /// Equivalent to [`parser.with_suffix`](Parser::with_suffix)`(`[`parser::eol()`](super::eol)`)`.
348    ///
349    /// # Examples
350    /// ```
351    /// # use utils::parser::{self, Parser};
352    /// assert_eq!(
353    ///     parser::u32().with_eol()
354    ///         .parse(b"123\nabc"),
355    ///     Ok((123, &b"abc"[..]))
356    /// );
357    /// ```
358    #[inline]
359    fn with_eol(self) -> WithSuffix<Self, Eol> {
360        WithSuffix {
361            parser: self,
362            suffix: Eol(),
363        }
364    }
365
366    /// Replace this parser's error message with the provided string.
367    ///
368    /// # Examples
369    /// ```
370    /// # use utils::parser::{self, ParseError, Parser};
371    /// let parser = parser::u8()
372    ///     .error_msg("expected power level");
373    /// assert_eq!(
374    ///     parser.parse(b"123"),
375    ///     Ok((123, &b""[..]))
376    /// );
377    /// assert_eq!(
378    ///     parser.parse(b"abc"),
379    ///     Err((ParseError::Custom("expected power level"), &b"abc"[..]))
380    /// );
381    /// ```
382    #[inline]
383    fn error_msg(self, message: &'static str) -> WithErrorMsg<Self> {
384        WithErrorMsg {
385            parser: self,
386            message,
387        }
388    }
389
390    /// Apply this parser once, checking the provided input is fully consumed.
391    ///
392    /// # Examples
393    /// ```
394    /// # use utils::parser::{self, Parser};
395    /// assert_eq!(parser::u32().parse_complete("1234").unwrap(), 1234);
396    /// assert!(parser::u32().parse_complete("1234abc").is_err());
397    /// ```
398    #[inline]
399    fn parse_complete(&self, input: &'i str) -> Result<Self::Output, InputError> {
400        match self.parse(input.as_bytes()).map_with_input(input)? {
401            (v, []) => Ok(v),
402            (_, remaining) => Err(InputError::new(input, remaining, ParseError::ExpectedEof())),
403        }
404    }
405
406    /// Apply this parser repeatedly until the provided input is fully consumed.
407    ///
408    /// Equivalent to `parser.repeat(parser::noop(), 0).parse_complete(input)`.
409    ///
410    /// # Examples
411    /// ```
412    /// # use utils::parser::{self, Parser};
413    /// assert_eq!(
414    ///     parser::u32()
415    ///         .then(parser::u32().with_prefix("x"))
416    ///         .with_suffix(",".or(parser::eof()))
417    ///         .parse_all("1x2,3x4,1234x5678")
418    ///         .unwrap(),
419    ///     vec![
420    ///         (1, 2),
421    ///         (3, 4),
422    ///         (1234, 5678),
423    ///     ]
424    /// );
425    /// ```
426    #[inline]
427    fn parse_all(&self, input: &'i str) -> Result<Vec<Self::Output>, InputError> {
428        ParserRef(self)
429            .repeat(Constant(()), 0)
430            .parse_complete(input)
431    }
432
433    /// Similar to [`parse_all`](Self::parse_all) but expects a newline after each item.
434    ///
435    /// Equivalent to [`parser.with_eol()`](Parser::with_eol)`.parse_all(input)`.
436    ///
437    /// # Examples
438    /// ```
439    /// # use utils::parser::{self, Parser};
440    /// assert_eq!(
441    ///     parser::u32()
442    ///         .then(parser::u32().with_prefix("x"))
443    ///         .parse_lines("1x2\n3x4\n1234x5678")
444    ///         .unwrap(),
445    ///     vec![
446    ///         (1, 2),
447    ///         (3, 4),
448    ///         (1234, 5678),
449    ///     ]
450    /// );
451    /// ```
452    #[inline]
453    fn parse_lines(&self, input: &'i str) -> Result<Vec<Self::Output>, InputError> {
454        ParserRef(self)
455            .with_suffix(Eol())
456            .repeat(Constant(()), 0)
457            .parse_complete(input)
458    }
459
460    /// Create an iterator which applies this parser repeatedly until the provided input is fully
461    /// consumed.
462    ///
463    /// The returned iterator will lazily parse the provided input string, producing a sequence of
464    /// [`Result`] values. Once the end of input is reached, or an error is returned, the parser
465    /// will always return [`None`].
466    ///
467    /// # Examples
468    /// ```
469    /// # use utils::input::InputError;
470    /// # use utils::parser::{self, Parser};
471    /// let iterator = parser::u32()
472    ///     .with_eol()
473    ///     .parse_iterator("12\n34\n56\n78");
474    /// for item in iterator {
475    ///     println!("{}", item?);
476    /// }
477    /// # Ok::<(), InputError>(())
478    /// ```
479    ///
480    /// ```
481    /// # use utils::parser::{self, Parser};
482    /// let mut iterator = parser::u32()
483    ///     .with_eol()
484    ///     .parse_iterator("12\n34\nnot a integer");
485    /// assert_eq!(iterator.next().unwrap().unwrap(), 12);
486    /// assert_eq!(iterator.next().unwrap().unwrap(), 34);
487    /// assert!(iterator.next().unwrap().is_err());
488    /// assert!(iterator.next().is_none());
489    /// ```
490    ///
491    /// ```
492    /// # use utils::input::InputError;
493    /// # use utils::parser::{self, Parser};
494    /// let filtered = parser::u32()
495    ///     .with_eol()
496    ///     .parse_iterator("11\n22\n33\n44\n55")
497    ///     .filter(|r| r.is_err() || r.as_ref().is_ok_and(|v| v % 2 == 0))
498    ///     .collect::<Result<Vec<u32>, InputError>>()?;
499    /// assert_eq!(filtered, vec![22, 44]);
500    /// # Ok::<(), InputError>(())
501    /// ```
502    #[inline]
503    fn parse_iterator(self, input: &str) -> ParserIterator<'_, Self> {
504        ParserIterator {
505            input,
506            remaining: input.as_bytes(),
507            parser: self,
508        }
509    }
510
511    /// Create an iterator which returns matches only and skips over errors.
512    ///
513    /// This is intended for cases that require extracting matches out of the input.
514    /// Otherwise, [`parse_iterator`](Self::parse_iterator) should be used with a parser that can
515    /// match the entire input structure.
516    ///
517    /// # Examples
518    /// ```
519    /// # use utils::parser::{self, Parser};
520    /// assert_eq!(
521    ///     parser::u32()
522    ///         .matches_iterator("abc123d456efg7hi8jk9lmnop")
523    ///         .collect::<Vec<_>>(),
524    ///     vec![123, 456, 7, 8, 9]
525    /// );
526    /// ```
527    #[inline]
528    fn matches_iterator(self, input: &str) -> ParserMatchesIterator<'_, Self> {
529        ParserMatchesIterator {
530            remaining: input.as_bytes(),
531            parser: self,
532        }
533    }
534}
535
536// Workaround to allow using methods which consume a parser in methods which take references.
537struct ParserRef<'a, P>(&'a P);
538impl<'i, P: Parser<'i>> Parser<'i> for ParserRef<'_, P> {
539    type Output = P::Output;
540    type Then<T: Parser<'i>> = Unimplemented;
541
542    #[inline]
543    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
544        self.0.parse(input)
545    }
546}
547
548/// Matches the string literal exactly.
549///
550/// Normally used with [`with_prefix`](Parser::with_prefix)/[`with_suffix`](Parser::with_suffix).
551impl<'i> Parser<'i> for &'static str {
552    type Output = ();
553    type Then<T: Parser<'i>> = Then2<Self, T>;
554
555    #[inline]
556    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
557        // This is faster than using strip_prefix for the common case where the string is a short
558        // string literal known at compile time.
559        if input.len() >= self.len() && self.bytes().zip(input).all(|(a, &b)| a == b) {
560            Ok(((), &input[self.len()..]))
561        } else {
562            Err((ParseError::ExpectedLiteral(self), input))
563        }
564    }
565}
566
567/// Matches the byte exactly.
568///
569/// Normally used with [`with_prefix`](Parser::with_prefix)/[`with_suffix`](Parser::with_suffix).
570impl<'i> Parser<'i> for u8 {
571    type Output = ();
572    type Then<T: Parser<'i>> = Then2<Self, T>;
573
574    #[inline]
575    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
576        if input.first() == Some(self) {
577            Ok(((), &input[1..]))
578        } else {
579            Err((ParseError::ExpectedByte(*self), input))
580        }
581    }
582}
583
584/// Allow custom functions and closures to be used as parsers.
585impl<'i, O, F: Fn(&'i [u8]) -> ParseResult<'i, O>> Parser<'i> for F {
586    type Output = O;
587    type Then<T: Parser<'i>> = Then2<Self, T>;
588
589    #[inline]
590    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
591        self(input)
592    }
593}
594
595/// Trait for types that have a canonical parser.
596pub trait Parseable {
597    type Parser: for<'i> Parser<'i, Output = Self>;
598    const PARSER: Self::Parser;
599}