utils/parser/base.rs
1use crate::input::{InputError, MapWithInputExt};
2use crate::parser::combinator::{
3 Map, MapResult, Optional, Or, RepeatArrayVec, RepeatN, RepeatVec, WithConsumed, WithPrefix,
4 WithSuffix,
5};
6use crate::parser::error::{ParseError, WithErrorMsg};
7use crate::parser::iterator::{ParserIterator, ParserMatchesIterator};
8use crate::parser::simple::{Constant, Eol};
9use crate::parser::then::{Then, Then2, Unimplemented};
10
11/// [`Result`] type returned by [`Parser::parse`].
12pub type ParseResult<'i, T> = Result<(T, &'i [u8]), (ParseError, &'i [u8])>;
13
14/// Parser trait.
15///
16/// Implementations should avoid allocating where possible.
17pub trait Parser: Sized {
18 /// Type of the value produced by [`parse`](Self::parse) when successful.
19 ///
20 /// Generic over the input `'i` lifetime.
21 type Output<'i>;
22
23 /// Type of the chained parser returned by [`then`](Self::then).
24 ///
25 /// This is used to allow multiple [`then`](Self::then) calls to extend one tuple, instead of
26 /// nesting tuples inside each other.
27 type Then<T: Parser>: Then<Self, T>;
28
29 /// Parse the given sequence of bytes.
30 ///
31 /// Returns a tuple of the successfully parsed [`Output`](Self::Output) value and the
32 /// remaining bytes, or a tuple containing a [`ParseError`] and the location of the error.
33 ///
34 /// The returned slices *must* be subslices of the input slice, otherwise [`InputError::new`]
35 /// (in [`parse_all`](Self::parse_all)) will panic.
36 ///
37 /// # Examples
38 /// ```
39 /// # use utils::parser::{self, Parser};
40 /// assert_eq!(parser::u32().parse(b"1234abc"), Ok((1234, &b"abc"[..])));
41 /// assert!(parser::u32().parse(b"abc1234").is_err());
42 /// ```
43 fn parse<'i>(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output<'i>>;
44
45 // Provided methods
46
47 /// Sequence another parser after this one.
48 ///
49 /// # Examples
50 /// ```
51 /// # use utils::parser::{self, Parser};
52 /// assert_eq!(
53 /// parser::i32()
54 /// .then(parser::i32())
55 /// .parse(b"123-123"),
56 /// Ok(((123, -123), &b""[..]))
57 /// );
58 /// ```
59 fn then<T: Parser>(self, next: T) -> Self::Then<T> {
60 Then::then(self, next)
61 }
62
63 /// Attempt to parse using this parser, followed by provided parser.
64 ///
65 /// If this parser succeeds, the alternative provider won't be tried. If both error, the error
66 /// from the parser which parsed further into the input is returned (preferring the first error
67 /// if both errored at the same position).
68 ///
69 /// See also [`parser::one_of`](super::one_of()).
70 ///
71 /// # Examples
72 /// ```
73 /// # use utils::parser::{self, ParseError, Parser};
74 /// let parser = parser::u8()
75 /// .map(|x| u32::from(x) * 1001001)
76 /// .or(parser::u32());
77 /// assert_eq!(
78 /// parser.parse(b"123"),
79 /// Ok((123123123, &b""[..]))
80 /// );
81 /// assert_eq!(
82 /// parser.parse(b"1000"),
83 /// Ok((1000, &b""[..]))
84 /// );
85 /// ```
86 fn or<T: for<'i> Parser<Output<'i> = Self::Output<'i>>>(self, alternative: T) -> Or<Self, T> {
87 Or {
88 first: self,
89 second: alternative,
90 }
91 }
92
93 /// Map the output of this parser using the supplied function.
94 ///
95 /// # Examples
96 /// ```
97 /// # use utils::parser::{self, Parser};
98 /// assert_eq!(
99 /// parser::u32()
100 /// .map(|x| x * 2)
101 /// .parse(b"123"),
102 /// Ok((246, &b""[..]))
103 /// );
104 /// ```
105 fn map<O, F: for<'i> Fn(Self::Output<'i>) -> O>(self, f: F) -> Map<Self, F> {
106 Map {
107 parser: self,
108 map_fn: f,
109 }
110 }
111
112 /// Map the output of this parser using the supplied fallible function.
113 ///
114 /// Errors must be `&'static str`, which will be mapped to [`ParseError::Custom`].
115 ///
116 /// # Examples
117 /// ```
118 /// # use utils::parser::{self, ParseError, Parser};
119 /// let parser = parser::u8()
120 /// .map_res(|x| x.checked_mul(2).ok_or("input too large"));
121 /// assert_eq!(
122 /// parser.parse(b"123"),
123 /// Ok((246, &b""[..]))
124 /// );
125 /// assert_eq!(
126 /// parser.parse(b"200"),
127 /// Err((ParseError::Custom("input too large"), &b"200"[..]))
128 /// );
129 /// ```
130 fn map_res<O, F: for<'i> Fn(Self::Output<'i>) -> Result<O, &'static str>>(
131 self,
132 f: F,
133 ) -> MapResult<Self, F> {
134 MapResult {
135 parser: self,
136 map_fn: f,
137 }
138 }
139
140 /// Wrap [`Output`](Self::Output) in [`Option`], returning [`None`] on error.
141 ///
142 /// # Examples
143 /// ```
144 /// # use utils::parser::{self, ParseError, Parser};
145 /// let parser = parser::u32()
146 /// .optional();
147 /// assert_eq!(
148 /// parser.parse(b"123"),
149 /// Ok((Some(123), &b""[..]))
150 /// );
151 /// assert_eq!(
152 /// parser.parse(b"abc"),
153 /// Ok((None, &b"abc"[..]))
154 /// );
155 /// ```
156 fn optional(self) -> Optional<Self> {
157 Optional { parser: self }
158 }
159
160 /// Repeat this parser `N` times, returning an [`array`].
161 ///
162 /// If the number of items is variable use [`repeat_arrayvec`](Self::repeat_arrayvec) or
163 /// [`repeat`](Self::repeat).
164 ///
165 /// # Examples
166 /// ```
167 /// # use utils::parser::{self, Parser};
168 /// assert_eq!(
169 /// parser::u32()
170 /// .repeat_n(",") // N = 3 is inferred
171 /// .parse(b"12,34,56"),
172 /// Ok(([12, 34, 56], &b""[..]))
173 /// );
174 /// ```
175 fn repeat_n<const N: usize, S: Parser>(self, separator: S) -> RepeatN<N, Self, S>
176 where
177 for<'i> Self::Output<'i>: Copy + Default,
178 {
179 RepeatN {
180 parser: self,
181 separator,
182 }
183 }
184
185 /// Repeat this parser while it matches, returning a [`ArrayVec`](crate::array::ArrayVec).
186 ///
187 /// This parser can parse up to `N` items. If more items match, it will return an error.
188 ///
189 /// See [`repeat`](Self::repeat) if the upper bound is large or not known, and
190 /// [`repeat_n`](Self::repeat_n) if the number of items is consistent.
191 ///
192 /// # Examples
193 /// ```
194 /// # use utils::parser::{self, Parser};
195 /// let parser = parser::u32()
196 /// .repeat(",", 3);
197 /// assert_eq!(parser.parse(b"12,34,56,78"), Ok((vec![12, 34, 56, 78], &b""[..])));
198 /// assert!(parser.parse(b"12,34").is_err());
199 /// ```
200 fn repeat_arrayvec<const N: usize, S: Parser>(
201 self,
202 separator: S,
203 min_elements: usize,
204 ) -> RepeatArrayVec<N, Self, S>
205 where
206 for<'a> Self::Output<'a>: Copy + Default,
207 {
208 RepeatArrayVec {
209 parser: self,
210 separator,
211 min_elements,
212 }
213 }
214
215 /// Repeat this parser while it matches, returning a [`Vec`].
216 ///
217 /// To avoid allocating, prefer [`repeat_n`](Self::repeat_n) if the number of items is
218 /// consistent and known in advance, or [`repeat_arrayvec`](Self::repeat_arrayvec) if the number
219 /// of items is variable but has a known upper bound.
220 ///
221 /// # Examples
222 /// ```
223 /// # use utils::parser::{self, Parser};
224 /// let parser = parser::u32()
225 /// .repeat(",", 3);
226 /// assert_eq!(parser.parse(b"12,34,56,78"), Ok((vec![12, 34, 56, 78], &b""[..])));
227 /// assert!(parser.parse(b"12,34").is_err());
228 /// ```
229 fn repeat<S: Parser>(self, separator: S, min_elements: usize) -> RepeatVec<Self, S> {
230 RepeatVec {
231 parser: self,
232 separator,
233 min_elements,
234 }
235 }
236
237 /// Return the output of this parser as well as the bytes consumed.
238 ///
239 /// This can be used to map any errors that occur while processing the parsed input back to the
240 /// problematic item's position in the input.
241 ///
242 /// # Examples
243 /// ```
244 /// # use utils::parser::{self, Parser};
245 /// assert_eq!(
246 /// parser::u32().with_consumed().parse(b"012,345,678"),
247 /// Ok(((12, &b"012"[..]), &b",345,678"[..]))
248 /// );
249 /// ```
250 fn with_consumed(self) -> WithConsumed<Self> {
251 WithConsumed { parser: self }
252 }
253
254 /// Parse a prefix (normally a string literal) before this parser.
255 ///
256 /// The result of the prefix parser is discarded.
257 ///
258 /// # Examples
259 /// ```
260 /// # use utils::parser::{self, Parser};
261 /// assert_eq!(
262 /// parser::u32()
263 /// .with_prefix("abc")
264 /// .parse(b"abc123"),
265 /// Ok((123, &b""[..]))
266 /// );
267 /// ```
268 fn with_prefix<T: Parser>(self, prefix: T) -> WithPrefix<Self, T> {
269 WithPrefix {
270 parser: self,
271 prefix,
272 }
273 }
274
275 /// Parse a suffix (normally a string literal) after this parser.
276 ///
277 /// The result of the suffix parser is discarded.
278 ///
279 /// # Examples
280 /// ```
281 /// # use utils::parser::{self, Parser};
282 /// assert_eq!(
283 /// parser::u32()
284 /// .with_suffix("abc")
285 /// .parse(b"123abc"),
286 /// Ok((123, &b""[..]))
287 /// );
288 /// ```
289 fn with_suffix<T: Parser>(self, suffix: T) -> WithSuffix<Self, T> {
290 WithSuffix {
291 parser: self,
292 suffix,
293 }
294 }
295
296 /// Replace this parser's error message with the provided string.
297 ///
298 /// # Examples
299 /// ```
300 /// # use utils::parser::{self, ParseError, Parser};
301 /// let parser = parser::u8()
302 /// .error_msg("expected power level");
303 /// assert_eq!(
304 /// parser.parse(b"123"),
305 /// Ok((123, &b""[..]))
306 /// );
307 /// assert_eq!(
308 /// parser.parse(b"abc"),
309 /// Err((ParseError::Custom("expected power level"), &b"abc"[..]))
310 /// );
311 /// ```
312 fn error_msg(self, message: &'static str) -> WithErrorMsg<Self> {
313 WithErrorMsg {
314 parser: self,
315 message,
316 }
317 }
318
319 /// Apply this parser once, checking the provided input is fully consumed.
320 ///
321 /// # Examples
322 /// ```
323 /// # use utils::parser::{self, Parser};
324 /// assert_eq!(parser::u32().parse_complete("1234").unwrap(), 1234);
325 /// assert!(parser::u32().parse_complete("1234abc").is_err());
326 /// ```
327 fn parse_complete<'i>(&self, input: &'i str) -> Result<Self::Output<'i>, InputError> {
328 match self.parse(input.as_bytes()).map_with_input(input)? {
329 (v, []) => Ok(v),
330 (_, remaining) => Err(InputError::new(input, remaining, "expected end of input")),
331 }
332 }
333
334 /// Apply this parser repeatedly until the provided input is fully consumed.
335 ///
336 /// Equivalent to `parser.repeat(parser::noop(), 0).parse_complete(input)`.
337 ///
338 /// # Examples
339 /// ```
340 /// # use utils::parser::{self, Parser};
341 /// assert_eq!(
342 /// parser::u32()
343 /// .then(parser::u32().with_prefix("x"))
344 /// .with_suffix(",".or(parser::eof()))
345 /// .parse_all("1x2,3x4,1234x5678")
346 /// .unwrap(),
347 /// vec![
348 /// (1, 2),
349 /// (3, 4),
350 /// (1234, 5678),
351 /// ]
352 /// );
353 /// ```
354 fn parse_all<'i>(&self, input: &'i str) -> Result<Vec<Self::Output<'i>>, InputError> {
355 ParserRef(self)
356 .repeat(Constant(()), 0)
357 .parse_complete(input)
358 }
359
360 /// Similar to [`parse_all`](Self::parse_all) but expects a newline after each item.
361 ///
362 /// Equivalent to `parser.with_suffix(`[`parser::eol()`](super::eol)`).parse_all(input)`.
363 ///
364 /// # Examples
365 /// ```
366 /// # use utils::parser::{self, Parser};
367 /// assert_eq!(
368 /// parser::u32()
369 /// .then(parser::u32().with_prefix("x"))
370 /// .parse_lines("1x2\n3x4\n1234x5678")
371 /// .unwrap(),
372 /// vec![
373 /// (1, 2),
374 /// (3, 4),
375 /// (1234, 5678),
376 /// ]
377 /// );
378 /// ```
379 fn parse_lines<'i>(&self, input: &'i str) -> Result<Vec<Self::Output<'i>>, InputError> {
380 ParserRef(self)
381 .with_suffix(Eol())
382 .repeat(Constant(()), 0)
383 .parse_complete(input)
384 }
385
386 /// Create an iterator which applies this parser repeatedly until the provided input is fully
387 /// consumed.
388 ///
389 /// The returned iterator will lazily parse the provided input string, producing a sequence of
390 /// [`Result`] values. Once the end of input is reached, or an error is returned, the parser
391 /// will always return [`None`].
392 ///
393 /// # Examples
394 /// ```
395 /// # use utils::input::InputError;
396 /// # use utils::parser::{self, Parser};
397 /// let iterator = parser::u32()
398 /// .with_suffix(parser::eol())
399 /// .parse_iterator("12\n34\n56\n78");
400 /// for item in iterator {
401 /// println!("{}", item?);
402 /// }
403 /// # Ok::<(), InputError>(())
404 /// ```
405 ///
406 /// ```
407 /// # use utils::parser::{self, Parser};
408 /// let mut iterator = parser::u32()
409 /// .with_suffix(parser::eol())
410 /// .parse_iterator("12\n34\nnot a integer");
411 /// assert_eq!(iterator.next().unwrap().unwrap(), 12);
412 /// assert_eq!(iterator.next().unwrap().unwrap(), 34);
413 /// assert!(iterator.next().unwrap().is_err());
414 /// assert!(iterator.next().is_none());
415 /// ```
416 ///
417 /// ```
418 /// # use utils::input::InputError;
419 /// # use utils::parser::{self, Parser};
420 /// let filtered = parser::u32()
421 /// .with_suffix(parser::eol())
422 /// .parse_iterator("11\n22\n33\n44\n55")
423 /// .filter(|r| r.is_err() || r.as_ref().is_ok_and(|v| v % 2 == 0))
424 /// .collect::<Result<Vec<u32>, InputError>>()?;
425 /// assert_eq!(filtered, vec![22, 44]);
426 /// # Ok::<(), InputError>(())
427 /// ```
428 fn parse_iterator(self, input: &str) -> ParserIterator<Self> {
429 ParserIterator {
430 input,
431 remaining: input.as_bytes(),
432 parser: self,
433 }
434 }
435
436 /// Create an iterator which returns matches only and skips over errors.
437 ///
438 /// This is intended for cases that require extracting matches out of the input.
439 /// Otherwise, [`parse_iterator`](Self::parse_iterator) should be used with a parser that can
440 /// match the entire input structure.
441 ///
442 /// # Examples
443 /// ```
444 /// # use utils::parser::{self, Parser};
445 /// assert_eq!(
446 /// parser::u32()
447 /// .matches_iterator("abc123d456efg7hi8jk9lmnop")
448 /// .collect::<Vec<_>>(),
449 /// vec![123, 456, 7, 8, 9]
450 /// );
451 /// ```
452 fn matches_iterator(self, input: &str) -> ParserMatchesIterator<Self> {
453 ParserMatchesIterator {
454 remaining: input.as_bytes(),
455 parser: self,
456 }
457 }
458}
459
460// Workaround to allow using methods which consume a parser in methods which take references.
461struct ParserRef<'a, P>(&'a P);
462impl<P: Parser> Parser for ParserRef<'_, P> {
463 type Output<'i> = P::Output<'i>;
464 type Then<T: Parser> = Unimplemented;
465
466 #[inline]
467 fn parse<'i>(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output<'i>> {
468 self.0.parse(input)
469 }
470}
471
472/// Matches the string literal exactly.
473///
474/// Normally used with [`with_prefix`](Parser::with_prefix)/[`with_suffix`](Parser::with_suffix).
475impl Parser for &'static str {
476 type Output<'i> = ();
477 type Then<T: Parser> = Then2<Self, T>;
478
479 #[inline]
480 fn parse<'i>(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output<'i>> {
481 // This is faster than using strip_prefix for the common case where the string is a short
482 // string literal known at compile time.
483 if input.len() >= self.len() && self.bytes().zip(input).all(|(a, &b)| a == b) {
484 Ok(((), &input[self.len()..]))
485 } else {
486 Err((ParseError::ExpectedLiteral(self), input))
487 }
488 }
489}
490
491/// Matches the byte exactly.
492///
493/// Normally used with [`with_prefix`](Parser::with_prefix)/[`with_suffix`](Parser::with_suffix).
494impl Parser for u8 {
495 type Output<'i> = ();
496 type Then<T: Parser> = Then2<Self, T>;
497
498 #[inline]
499 fn parse<'i>(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output<'i>> {
500 if input.first() == Some(self) {
501 Ok(((), &input[1..]))
502 } else {
503 Err((ParseError::ExpectedByte(*self), input))
504 }
505 }
506}
507
508/// Allow custom functions and closures to be used as parsers.
509impl<O, F: Fn(&[u8]) -> ParseResult<O>> Parser for F {
510 type Output<'i> = O;
511 type Then<T: Parser> = Then2<Self, T>;
512
513 #[inline]
514 fn parse<'i>(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output<'i>> {
515 self(input)
516 }
517}
518
519/// Trait for types that have a canonical parser.
520pub trait Parseable {
521 type Parser: for<'i> Parser<Output<'i> = Self>;
522 const PARSER: Self::Parser;
523}