utils/parser/base.rs
1use crate::input::{InputError, MapWithInputExt};
2use crate::parser::combinator::{
3 Map, MapResult, Optional, Or, RepeatArrayVec, RepeatN, RepeatVec, WithConsumed, WithPrefix,
4 WithSuffix,
5};
6use crate::parser::error::{ParseError, WithErrorMsg};
7use crate::parser::iterator::{ParserIterator, ParserMatchesIterator};
8use crate::parser::simple::{Constant, Eol};
9use crate::parser::then::{Then, Then2, Unimplemented};
10
11/// [`Result`] type returned by [`Parser::parse`].
12pub type ParseResult<'i, T> = Result<(T, &'i [u8]), (ParseError, &'i [u8])>;
13
14/// Parser trait.
15///
16/// Implementations should avoid allocating where possible.
17#[must_use]
18pub trait Parser<'i>: Sized {
19 /// Type of the value produced by [`parse`](Self::parse) when successful.
20 type Output;
21
22 /// Type of the chained parser returned by [`then`](Self::then).
23 ///
24 /// This is used to allow multiple [`then`](Self::then) calls to extend one tuple, instead of
25 /// nesting tuples inside each other.
26 type Then<T: Parser<'i>>: Then<'i, Self, T>;
27
28 /// Parse the given sequence of bytes.
29 ///
30 /// Returns a tuple of the successfully parsed [`Output`](Self::Output) value and the
31 /// remaining bytes, or a tuple containing a [`ParseError`] and the location of the error.
32 ///
33 /// The returned slices *must* be subslices of the input slice, otherwise [`InputError::new`]
34 /// (in [`parse_all`](Self::parse_all)) will panic.
35 ///
36 /// # Examples
37 /// ```
38 /// # use utils::parser::{self, Parser};
39 /// assert_eq!(parser::u32().parse(b"1234abc"), Ok((1234, &b"abc"[..])));
40 /// assert!(parser::u32().parse(b"abc1234").is_err());
41 /// ```
42 fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output>;
43
44 // Provided methods
45
46 /// Sequence another parser after this one.
47 ///
48 /// # Examples
49 /// ```
50 /// # use utils::parser::{self, Parser};
51 /// assert_eq!(
52 /// parser::i32()
53 /// .then(parser::i32())
54 /// .parse(b"123-123"),
55 /// Ok(((123, -123), &b""[..]))
56 /// );
57 /// ```
58 #[inline]
59 fn then<T: Parser<'i>>(self, next: T) -> Self::Then<T> {
60 Then::then(self, next)
61 }
62
63 /// Attempt to parse using this parser, followed by provided parser.
64 ///
65 /// If this parser succeeds, the alternative provider won't be tried. If both error, the error
66 /// from the parser which parsed further into the input is returned (preferring the first error
67 /// if both errored at the same position).
68 ///
69 /// See also [`parser::one_of`](super::one_of()).
70 ///
71 /// # Examples
72 /// ```
73 /// # use utils::parser::{self, ParseError, Parser};
74 /// let parser = parser::u8()
75 /// .map(|x| u32::from(x) * 1001001)
76 /// .or(parser::u32());
77 /// assert_eq!(
78 /// parser.parse(b"123"),
79 /// Ok((123123123, &b""[..]))
80 /// );
81 /// assert_eq!(
82 /// parser.parse(b"1000"),
83 /// Ok((1000, &b""[..]))
84 /// );
85 /// ```
86 #[inline]
87 fn or<T: Parser<'i, Output = Self::Output>>(self, alternative: T) -> Or<Self, T> {
88 Or {
89 first: self,
90 second: alternative,
91 }
92 }
93
94 /// Map the output of this parser using the supplied function.
95 ///
96 /// # Examples
97 /// ```
98 /// # use utils::parser::{self, Parser};
99 /// assert_eq!(
100 /// parser::u32()
101 /// .map(|x| x * 2)
102 /// .parse(b"123"),
103 /// Ok((246, &b""[..]))
104 /// );
105 /// ```
106 ///
107 /// Closure that returns a value borrowing from both its input and an outer variable:
108 /// ```
109 /// # use utils::parser::{self, Parser};
110 /// let my_vec = vec![1, 2, 3];
111 /// assert_eq!(
112 /// parser::take_while(u8::is_ascii_digit)
113 /// .map(|x| (x, my_vec.as_slice()))
114 /// .parse(b"123"),
115 /// Ok(((&b"123"[..], &[1, 2, 3][..]), &b""[..]))
116 /// );
117 /// ```
118 #[inline]
119 fn map<O, F: Fn(Self::Output) -> O>(self, f: F) -> Map<Self, F> {
120 Map {
121 parser: self,
122 map_fn: f,
123 }
124 }
125
126 /// Map the output of this parser using the supplied fallible function.
127 ///
128 /// Errors must be `&'static str`, which will be mapped to [`ParseError::Custom`].
129 ///
130 /// # Examples
131 /// ```
132 /// # use utils::parser::{self, ParseError, Parser};
133 /// let parser = parser::u8()
134 /// .map_res(|x| x.checked_mul(2).ok_or("input too large"));
135 /// assert_eq!(
136 /// parser.parse(b"123"),
137 /// Ok((246, &b""[..]))
138 /// );
139 /// assert_eq!(
140 /// parser.parse(b"200"),
141 /// Err((ParseError::Custom("input too large"), &b"200"[..]))
142 /// );
143 /// ```
144 ///
145 /// Closure that returns a value borrowing from both its input and an outer variable:
146 /// ```
147 /// # use utils::parser::{self, Parser};
148 /// let my_vec = vec![1, 2, 3];
149 /// assert_eq!(
150 /// parser::take_while(u8::is_ascii_digit)
151 /// .map_res(|x| {
152 /// if x.len() < 100 {
153 /// Ok((x, my_vec.as_slice()))
154 /// } else {
155 /// Err("expected more digits")
156 /// }
157 /// })
158 /// .parse(b"123"),
159 /// Ok(((&b"123"[..], &[1, 2, 3][..]), &b""[..]))
160 /// );
161 /// ```
162 #[inline]
163 fn map_res<O, F: Fn(Self::Output) -> Result<O, &'static str>>(
164 self,
165 f: F,
166 ) -> MapResult<Self, F> {
167 MapResult {
168 parser: self,
169 map_fn: f,
170 }
171 }
172
173 /// Wrap [`Output`](Self::Output) in [`Option`], returning [`None`] on error.
174 ///
175 /// # Examples
176 /// ```
177 /// # use utils::parser::{self, ParseError, Parser};
178 /// let parser = parser::u32()
179 /// .optional();
180 /// assert_eq!(
181 /// parser.parse(b"123"),
182 /// Ok((Some(123), &b""[..]))
183 /// );
184 /// assert_eq!(
185 /// parser.parse(b"abc"),
186 /// Ok((None, &b"abc"[..]))
187 /// );
188 /// ```
189 #[inline]
190 fn optional(self) -> Optional<Self> {
191 Optional { parser: self }
192 }
193
194 /// Repeat this parser `N` times, returning an [`array`].
195 ///
196 /// If the number of items is variable use [`repeat_arrayvec`](Self::repeat_arrayvec) or
197 /// [`repeat`](Self::repeat).
198 ///
199 /// # Examples
200 /// ```
201 /// # use utils::parser::{self, Parser};
202 /// assert_eq!(
203 /// parser::u32()
204 /// .repeat_n(",") // N = 3 is inferred
205 /// .parse(b"12,34,56"),
206 /// Ok(([12, 34, 56], &b""[..]))
207 /// );
208 /// ```
209 #[inline]
210 fn repeat_n<const N: usize, S: Parser<'i>>(self, separator: S) -> RepeatN<N, Self, S>
211 where
212 Self::Output: Copy + Default,
213 {
214 RepeatN {
215 parser: self,
216 separator,
217 }
218 }
219
220 /// Repeat this parser while it matches, returning a [`ArrayVec`](crate::array::ArrayVec).
221 ///
222 /// This parser can parse up to `N` items. If more items match, it will return an error.
223 ///
224 /// See [`repeat`](Self::repeat) if the upper bound is large or not known, and
225 /// [`repeat_n`](Self::repeat_n) if the number of items is consistent.
226 ///
227 /// # Examples
228 /// ```
229 /// # use utils::array::ArrayVec;
230 /// use utils::parser::{self, Parser};
231 /// let parser = parser::u32()
232 /// .repeat_arrayvec::<5, _>(",", 3);
233 /// assert_eq!(
234 /// parser.parse(b"12,34,56,78"),
235 /// Ok((ArrayVec::from_slice(&[12, 34, 56, 78]).unwrap(), &b""[..]))
236 /// );
237 /// assert_eq!(
238 /// parser.parse(b"12,34,56,abc"),
239 /// Ok((ArrayVec::from_slice(&[12, 34, 56]).unwrap(), &b",abc"[..]))
240 /// );
241 /// assert!(parser.parse(b"12,34").is_err());
242 /// ```
243 #[inline]
244 fn repeat_arrayvec<const N: usize, S: Parser<'i>>(
245 self,
246 separator: S,
247 min_elements: usize,
248 ) -> RepeatArrayVec<N, Self, S>
249 where
250 Self::Output: Copy + Default,
251 {
252 RepeatArrayVec {
253 parser: self,
254 separator,
255 min_elements,
256 }
257 }
258
259 /// Repeat this parser while it matches, returning a [`Vec`].
260 ///
261 /// To avoid allocating, prefer [`repeat_n`](Self::repeat_n) if the number of items is
262 /// consistent and known in advance, or [`repeat_arrayvec`](Self::repeat_arrayvec) if the number
263 /// of items is variable but has a known upper bound.
264 ///
265 /// # Examples
266 /// ```
267 /// # use utils::parser::{self, Parser};
268 /// let parser = parser::u32()
269 /// .repeat(",", 3);
270 /// assert_eq!(parser.parse(b"12,34,56,78"), Ok((vec![12, 34, 56, 78], &b""[..])));
271 /// assert_eq!(parser.parse(b"12,34,56,abc"), Ok((vec![12, 34, 56], &b",abc"[..])));
272 /// assert!(parser.parse(b"12,34").is_err());
273 /// ```
274 #[inline]
275 fn repeat<S: Parser<'i>>(self, separator: S, min_elements: usize) -> RepeatVec<Self, S> {
276 RepeatVec {
277 parser: self,
278 separator,
279 min_elements,
280 }
281 }
282
283 /// Return the output of this parser as well as the bytes consumed.
284 ///
285 /// This can be used to map any errors that occur while processing the parsed input back to the
286 /// problematic item's position in the input.
287 ///
288 /// # Examples
289 /// ```
290 /// # use utils::parser::{self, Parser};
291 /// assert_eq!(
292 /// parser::u32().with_consumed().parse(b"012,345,678"),
293 /// Ok(((12, &b"012"[..]), &b",345,678"[..]))
294 /// );
295 /// ```
296 #[inline]
297 fn with_consumed(self) -> WithConsumed<Self> {
298 WithConsumed { parser: self }
299 }
300
301 /// Parse a prefix (normally a string literal) before this parser.
302 ///
303 /// The result of the prefix parser is discarded.
304 ///
305 /// # Examples
306 /// ```
307 /// # use utils::parser::{self, Parser};
308 /// assert_eq!(
309 /// parser::u32()
310 /// .with_prefix("abc")
311 /// .parse(b"abc123"),
312 /// Ok((123, &b""[..]))
313 /// );
314 /// ```
315 #[inline]
316 fn with_prefix<T: Parser<'i>>(self, prefix: T) -> WithPrefix<Self, T> {
317 WithPrefix {
318 parser: self,
319 prefix,
320 }
321 }
322
323 /// Parse a suffix (normally a string literal) after this parser.
324 ///
325 /// The result of the suffix parser is discarded.
326 ///
327 /// # Examples
328 /// ```
329 /// # use utils::parser::{self, Parser};
330 /// assert_eq!(
331 /// parser::u32()
332 /// .with_suffix("abc")
333 /// .parse(b"123abc"),
334 /// Ok((123, &b""[..]))
335 /// );
336 /// ```
337 #[inline]
338 fn with_suffix<T: Parser<'i>>(self, suffix: T) -> WithSuffix<Self, T> {
339 WithSuffix {
340 parser: self,
341 suffix,
342 }
343 }
344
345 /// Parse a end of line (or end of string) after this parser.
346 ///
347 /// Equivalent to [`parser.with_suffix`](Parser::with_suffix)`(`[`parser::eol()`](super::eol)`)`.
348 ///
349 /// # Examples
350 /// ```
351 /// # use utils::parser::{self, Parser};
352 /// assert_eq!(
353 /// parser::u32().with_eol()
354 /// .parse(b"123\nabc"),
355 /// Ok((123, &b"abc"[..]))
356 /// );
357 /// ```
358 #[inline]
359 fn with_eol(self) -> WithSuffix<Self, Eol> {
360 WithSuffix {
361 parser: self,
362 suffix: Eol(),
363 }
364 }
365
366 /// Replace this parser's error message with the provided string.
367 ///
368 /// # Examples
369 /// ```
370 /// # use utils::parser::{self, ParseError, Parser};
371 /// let parser = parser::u8()
372 /// .error_msg("expected power level");
373 /// assert_eq!(
374 /// parser.parse(b"123"),
375 /// Ok((123, &b""[..]))
376 /// );
377 /// assert_eq!(
378 /// parser.parse(b"abc"),
379 /// Err((ParseError::Custom("expected power level"), &b"abc"[..]))
380 /// );
381 /// ```
382 #[inline]
383 fn error_msg(self, message: &'static str) -> WithErrorMsg<Self> {
384 WithErrorMsg {
385 parser: self,
386 message,
387 }
388 }
389
390 /// Apply this parser once, checking the provided input is fully consumed.
391 ///
392 /// # Examples
393 /// ```
394 /// # use utils::parser::{self, Parser};
395 /// assert_eq!(parser::u32().parse_complete("1234").unwrap(), 1234);
396 /// assert!(parser::u32().parse_complete("1234abc").is_err());
397 /// ```
398 #[inline]
399 fn parse_complete(&self, input: &'i str) -> Result<Self::Output, InputError> {
400 match self.parse(input.as_bytes()).map_with_input(input)? {
401 (v, []) => Ok(v),
402 (_, remaining) => Err(InputError::new(input, remaining, ParseError::ExpectedEof())),
403 }
404 }
405
406 /// Apply this parser repeatedly until the provided input is fully consumed.
407 ///
408 /// Equivalent to `parser.repeat(parser::noop(), 0).parse_complete(input)`.
409 ///
410 /// # Examples
411 /// ```
412 /// # use utils::parser::{self, Parser};
413 /// assert_eq!(
414 /// parser::u32()
415 /// .then(parser::u32().with_prefix("x"))
416 /// .with_suffix(",".or(parser::eof()))
417 /// .parse_all("1x2,3x4,1234x5678")
418 /// .unwrap(),
419 /// vec![
420 /// (1, 2),
421 /// (3, 4),
422 /// (1234, 5678),
423 /// ]
424 /// );
425 /// ```
426 #[inline]
427 fn parse_all(&self, input: &'i str) -> Result<Vec<Self::Output>, InputError> {
428 ParserRef(self)
429 .repeat(Constant(()), 0)
430 .parse_complete(input)
431 }
432
433 /// Similar to [`parse_all`](Self::parse_all) but expects a newline after each item.
434 ///
435 /// Equivalent to [`parser.with_eol()`](Parser::with_eol)`.parse_all(input)`.
436 ///
437 /// # Examples
438 /// ```
439 /// # use utils::parser::{self, Parser};
440 /// assert_eq!(
441 /// parser::u32()
442 /// .then(parser::u32().with_prefix("x"))
443 /// .parse_lines("1x2\n3x4\n1234x5678")
444 /// .unwrap(),
445 /// vec![
446 /// (1, 2),
447 /// (3, 4),
448 /// (1234, 5678),
449 /// ]
450 /// );
451 /// ```
452 #[inline]
453 fn parse_lines(&self, input: &'i str) -> Result<Vec<Self::Output>, InputError> {
454 ParserRef(self)
455 .with_suffix(Eol())
456 .repeat(Constant(()), 0)
457 .parse_complete(input)
458 }
459
460 /// Create an iterator which applies this parser repeatedly until the provided input is fully
461 /// consumed.
462 ///
463 /// The returned iterator will lazily parse the provided input string, producing a sequence of
464 /// [`Result`] values. Once the end of input is reached, or an error is returned, the parser
465 /// will always return [`None`].
466 ///
467 /// # Examples
468 /// ```
469 /// # use utils::input::InputError;
470 /// # use utils::parser::{self, Parser};
471 /// let iterator = parser::u32()
472 /// .with_eol()
473 /// .parse_iterator("12\n34\n56\n78");
474 /// for item in iterator {
475 /// println!("{}", item?);
476 /// }
477 /// # Ok::<(), InputError>(())
478 /// ```
479 ///
480 /// ```
481 /// # use utils::parser::{self, Parser};
482 /// let mut iterator = parser::u32()
483 /// .with_eol()
484 /// .parse_iterator("12\n34\nnot a integer");
485 /// assert_eq!(iterator.next().unwrap().unwrap(), 12);
486 /// assert_eq!(iterator.next().unwrap().unwrap(), 34);
487 /// assert!(iterator.next().unwrap().is_err());
488 /// assert!(iterator.next().is_none());
489 /// ```
490 ///
491 /// ```
492 /// # use utils::input::InputError;
493 /// # use utils::parser::{self, Parser};
494 /// let filtered = parser::u32()
495 /// .with_eol()
496 /// .parse_iterator("11\n22\n33\n44\n55")
497 /// .filter(|r| r.is_err() || r.as_ref().is_ok_and(|v| v % 2 == 0))
498 /// .collect::<Result<Vec<u32>, InputError>>()?;
499 /// assert_eq!(filtered, vec![22, 44]);
500 /// # Ok::<(), InputError>(())
501 /// ```
502 #[inline]
503 fn parse_iterator(self, input: &str) -> ParserIterator<'_, Self> {
504 ParserIterator {
505 input,
506 remaining: input.as_bytes(),
507 parser: self,
508 }
509 }
510
511 /// Create an iterator which returns matches only and skips over errors.
512 ///
513 /// This is intended for cases that require extracting matches out of the input.
514 /// Otherwise, [`parse_iterator`](Self::parse_iterator) should be used with a parser that can
515 /// match the entire input structure.
516 ///
517 /// # Examples
518 /// ```
519 /// # use utils::parser::{self, Parser};
520 /// assert_eq!(
521 /// parser::u32()
522 /// .matches_iterator("abc123d456efg7hi8jk9lmnop")
523 /// .collect::<Vec<_>>(),
524 /// vec![123, 456, 7, 8, 9]
525 /// );
526 /// ```
527 #[inline]
528 fn matches_iterator(self, input: &str) -> ParserMatchesIterator<'_, Self> {
529 ParserMatchesIterator {
530 remaining: input.as_bytes(),
531 parser: self,
532 }
533 }
534}
535
536// Workaround to allow using methods which consume a parser in methods which take references.
537struct ParserRef<'a, P>(&'a P);
538impl<'i, P: Parser<'i>> Parser<'i> for ParserRef<'_, P> {
539 type Output = P::Output;
540 type Then<T: Parser<'i>> = Unimplemented;
541
542 #[inline]
543 fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
544 self.0.parse(input)
545 }
546}
547
548/// Matches the string literal exactly.
549///
550/// Normally used with [`with_prefix`](Parser::with_prefix)/[`with_suffix`](Parser::with_suffix).
551impl<'i> Parser<'i> for &'static str {
552 type Output = ();
553 type Then<T: Parser<'i>> = Then2<Self, T>;
554
555 #[inline]
556 fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
557 // This is faster than using strip_prefix for the common case where the string is a short
558 // string literal known at compile time.
559 if input.len() >= self.len() && self.bytes().zip(input).all(|(a, &b)| a == b) {
560 Ok(((), &input[self.len()..]))
561 } else {
562 Err((ParseError::ExpectedLiteral(self), input))
563 }
564 }
565}
566
567/// Matches the byte exactly.
568///
569/// Normally used with [`with_prefix`](Parser::with_prefix)/[`with_suffix`](Parser::with_suffix).
570impl<'i> Parser<'i> for u8 {
571 type Output = ();
572 type Then<T: Parser<'i>> = Then2<Self, T>;
573
574 #[inline]
575 fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
576 if input.first() == Some(self) {
577 Ok(((), &input[1..]))
578 } else {
579 Err((ParseError::ExpectedByte(*self), input))
580 }
581 }
582}
583
584/// Allow custom functions and closures to be used as parsers.
585impl<'i, O, F: Fn(&'i [u8]) -> ParseResult<'i, O>> Parser<'i> for F {
586 type Output = O;
587 type Then<T: Parser<'i>> = Then2<Self, T>;
588
589 #[inline]
590 fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
591 self(input)
592 }
593}
594
595/// Trait for types that have a canonical parser.
596pub trait Parseable {
597 type Parser: for<'i> Parser<'i, Output = Self>;
598 const PARSER: Self::Parser;
599}