utils/parser/base.rs
1use crate::input::{InputError, MapWithInputExt};
2use crate::parser::combinator::{
3 Map, MapResult, Optional, Or, RepeatArrayVec, RepeatN, RepeatVec, WithConsumed, WithPrefix,
4 WithSuffix,
5};
6use crate::parser::error::{ParseError, WithErrorMsg};
7use crate::parser::iterator::{ParserIterator, ParserMatchesIterator};
8use crate::parser::simple::{Constant, Eol};
9use crate::parser::then::{Then, Then2, Unimplemented};
10
11/// [`Result`] type returned by [`Parser::parse`].
12pub type ParseResult<'i, T> = Result<(T, &'i [u8]), (ParseError, &'i [u8])>;
13
14/// Parser trait.
15///
16/// Implementations should avoid allocating where possible.
17pub trait Parser<'i>: Sized {
18 /// Type of the value produced by [`parse`](Self::parse) when successful.
19 type Output;
20
21 /// Type of the chained parser returned by [`then`](Self::then).
22 ///
23 /// This is used to allow multiple [`then`](Self::then) calls to extend one tuple, instead of
24 /// nesting tuples inside each other.
25 type Then<T: Parser<'i>>: Then<'i, Self, T>;
26
27 /// Parse the given sequence of bytes.
28 ///
29 /// Returns a tuple of the successfully parsed [`Output`](Self::Output) value and the
30 /// remaining bytes, or a tuple containing a [`ParseError`] and the location of the error.
31 ///
32 /// The returned slices *must* be subslices of the input slice, otherwise [`InputError::new`]
33 /// (in [`parse_all`](Self::parse_all)) will panic.
34 ///
35 /// # Examples
36 /// ```
37 /// # use utils::parser::{self, Parser};
38 /// assert_eq!(parser::u32().parse(b"1234abc"), Ok((1234, &b"abc"[..])));
39 /// assert!(parser::u32().parse(b"abc1234").is_err());
40 /// ```
41 fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output>;
42
43 // Provided methods
44
45 /// Sequence another parser after this one.
46 ///
47 /// # Examples
48 /// ```
49 /// # use utils::parser::{self, Parser};
50 /// assert_eq!(
51 /// parser::i32()
52 /// .then(parser::i32())
53 /// .parse(b"123-123"),
54 /// Ok(((123, -123), &b""[..]))
55 /// );
56 /// ```
57 fn then<T: Parser<'i>>(self, next: T) -> Self::Then<T> {
58 Then::then(self, next)
59 }
60
61 /// Attempt to parse using this parser, followed by provided parser.
62 ///
63 /// If this parser succeeds, the alternative provider won't be tried. If both error, the error
64 /// from the parser which parsed further into the input is returned (preferring the first error
65 /// if both errored at the same position).
66 ///
67 /// See also [`parser::one_of`](super::one_of()).
68 ///
69 /// # Examples
70 /// ```
71 /// # use utils::parser::{self, ParseError, Parser};
72 /// let parser = parser::u8()
73 /// .map(|x| u32::from(x) * 1001001)
74 /// .or(parser::u32());
75 /// assert_eq!(
76 /// parser.parse(b"123"),
77 /// Ok((123123123, &b""[..]))
78 /// );
79 /// assert_eq!(
80 /// parser.parse(b"1000"),
81 /// Ok((1000, &b""[..]))
82 /// );
83 /// ```
84 fn or<T: Parser<'i, Output = Self::Output>>(self, alternative: T) -> Or<Self, T> {
85 Or {
86 first: self,
87 second: alternative,
88 }
89 }
90
91 /// Map the output of this parser using the supplied function.
92 ///
93 /// # Examples
94 /// ```
95 /// # use utils::parser::{self, Parser};
96 /// assert_eq!(
97 /// parser::u32()
98 /// .map(|x| x * 2)
99 /// .parse(b"123"),
100 /// Ok((246, &b""[..]))
101 /// );
102 /// ```
103 ///
104 /// Closure that returns a value borrowing from both its input and an outer variable:
105 /// ```
106 /// # use utils::parser::{self, Parser};
107 /// let my_vec = vec![1, 2, 3];
108 /// assert_eq!(
109 /// parser::take_while(u8::is_ascii_digit)
110 /// .map(|x| (x, my_vec.as_slice()))
111 /// .parse(b"123"),
112 /// Ok(((&b"123"[..], &[1, 2, 3][..]), &b""[..]))
113 /// );
114 /// ```
115 fn map<O, F: Fn(Self::Output) -> O>(self, f: F) -> Map<Self, F> {
116 Map {
117 parser: self,
118 map_fn: f,
119 }
120 }
121
122 /// Map the output of this parser using the supplied fallible function.
123 ///
124 /// Errors must be `&'static str`, which will be mapped to [`ParseError::Custom`].
125 ///
126 /// # Examples
127 /// ```
128 /// # use utils::parser::{self, ParseError, Parser};
129 /// let parser = parser::u8()
130 /// .map_res(|x| x.checked_mul(2).ok_or("input too large"));
131 /// assert_eq!(
132 /// parser.parse(b"123"),
133 /// Ok((246, &b""[..]))
134 /// );
135 /// assert_eq!(
136 /// parser.parse(b"200"),
137 /// Err((ParseError::Custom("input too large"), &b"200"[..]))
138 /// );
139 /// ```
140 ///
141 /// Closure that returns a value borrowing from both its input and an outer variable:
142 /// ```
143 /// # use utils::parser::{self, Parser};
144 /// let my_vec = vec![1, 2, 3];
145 /// assert_eq!(
146 /// parser::take_while(u8::is_ascii_digit)
147 /// .map_res(|x| {
148 /// if x.len() < 100 {
149 /// Ok((x, my_vec.as_slice()))
150 /// } else {
151 /// Err("expected more digits")
152 /// }
153 /// })
154 /// .parse(b"123"),
155 /// Ok(((&b"123"[..], &[1, 2, 3][..]), &b""[..]))
156 /// );
157 /// ```
158 fn map_res<O, F: Fn(Self::Output) -> Result<O, &'static str>>(
159 self,
160 f: F,
161 ) -> MapResult<Self, F> {
162 MapResult {
163 parser: self,
164 map_fn: f,
165 }
166 }
167
168 /// Wrap [`Output`](Self::Output) in [`Option`], returning [`None`] on error.
169 ///
170 /// # Examples
171 /// ```
172 /// # use utils::parser::{self, ParseError, Parser};
173 /// let parser = parser::u32()
174 /// .optional();
175 /// assert_eq!(
176 /// parser.parse(b"123"),
177 /// Ok((Some(123), &b""[..]))
178 /// );
179 /// assert_eq!(
180 /// parser.parse(b"abc"),
181 /// Ok((None, &b"abc"[..]))
182 /// );
183 /// ```
184 fn optional(self) -> Optional<Self> {
185 Optional { parser: self }
186 }
187
188 /// Repeat this parser `N` times, returning an [`array`].
189 ///
190 /// If the number of items is variable use [`repeat_arrayvec`](Self::repeat_arrayvec) or
191 /// [`repeat`](Self::repeat).
192 ///
193 /// # Examples
194 /// ```
195 /// # use utils::parser::{self, Parser};
196 /// assert_eq!(
197 /// parser::u32()
198 /// .repeat_n(",") // N = 3 is inferred
199 /// .parse(b"12,34,56"),
200 /// Ok(([12, 34, 56], &b""[..]))
201 /// );
202 /// ```
203 fn repeat_n<const N: usize, S: Parser<'i>>(self, separator: S) -> RepeatN<N, Self, S>
204 where
205 Self::Output: Copy + Default,
206 {
207 RepeatN {
208 parser: self,
209 separator,
210 }
211 }
212
213 /// Repeat this parser while it matches, returning a [`ArrayVec`](crate::array::ArrayVec).
214 ///
215 /// This parser can parse up to `N` items. If more items match, it will return an error.
216 ///
217 /// See [`repeat`](Self::repeat) if the upper bound is large or not known, and
218 /// [`repeat_n`](Self::repeat_n) if the number of items is consistent.
219 ///
220 /// # Examples
221 /// ```
222 /// # use utils::parser::{self, Parser};
223 /// let parser = parser::u32()
224 /// .repeat(",", 3);
225 /// assert_eq!(parser.parse(b"12,34,56,78"), Ok((vec![12, 34, 56, 78], &b""[..])));
226 /// assert!(parser.parse(b"12,34").is_err());
227 /// ```
228 fn repeat_arrayvec<const N: usize, S: Parser<'i>>(
229 self,
230 separator: S,
231 min_elements: usize,
232 ) -> RepeatArrayVec<N, Self, S>
233 where
234 Self::Output: Copy + Default,
235 {
236 RepeatArrayVec {
237 parser: self,
238 separator,
239 min_elements,
240 }
241 }
242
243 /// Repeat this parser while it matches, returning a [`Vec`].
244 ///
245 /// To avoid allocating, prefer [`repeat_n`](Self::repeat_n) if the number of items is
246 /// consistent and known in advance, or [`repeat_arrayvec`](Self::repeat_arrayvec) if the number
247 /// of items is variable but has a known upper bound.
248 ///
249 /// # Examples
250 /// ```
251 /// # use utils::parser::{self, Parser};
252 /// let parser = parser::u32()
253 /// .repeat(",", 3);
254 /// assert_eq!(parser.parse(b"12,34,56,78"), Ok((vec![12, 34, 56, 78], &b""[..])));
255 /// assert!(parser.parse(b"12,34").is_err());
256 /// ```
257 fn repeat<S: Parser<'i>>(self, separator: S, min_elements: usize) -> RepeatVec<Self, S> {
258 RepeatVec {
259 parser: self,
260 separator,
261 min_elements,
262 }
263 }
264
265 /// Return the output of this parser as well as the bytes consumed.
266 ///
267 /// This can be used to map any errors that occur while processing the parsed input back to the
268 /// problematic item's position in the input.
269 ///
270 /// # Examples
271 /// ```
272 /// # use utils::parser::{self, Parser};
273 /// assert_eq!(
274 /// parser::u32().with_consumed().parse(b"012,345,678"),
275 /// Ok(((12, &b"012"[..]), &b",345,678"[..]))
276 /// );
277 /// ```
278 fn with_consumed(self) -> WithConsumed<Self> {
279 WithConsumed { parser: self }
280 }
281
282 /// Parse a prefix (normally a string literal) before this parser.
283 ///
284 /// The result of the prefix parser is discarded.
285 ///
286 /// # Examples
287 /// ```
288 /// # use utils::parser::{self, Parser};
289 /// assert_eq!(
290 /// parser::u32()
291 /// .with_prefix("abc")
292 /// .parse(b"abc123"),
293 /// Ok((123, &b""[..]))
294 /// );
295 /// ```
296 fn with_prefix<T: Parser<'i>>(self, prefix: T) -> WithPrefix<Self, T> {
297 WithPrefix {
298 parser: self,
299 prefix,
300 }
301 }
302
303 /// Parse a suffix (normally a string literal) after this parser.
304 ///
305 /// The result of the suffix parser is discarded.
306 ///
307 /// # Examples
308 /// ```
309 /// # use utils::parser::{self, Parser};
310 /// assert_eq!(
311 /// parser::u32()
312 /// .with_suffix("abc")
313 /// .parse(b"123abc"),
314 /// Ok((123, &b""[..]))
315 /// );
316 /// ```
317 fn with_suffix<T: Parser<'i>>(self, suffix: T) -> WithSuffix<Self, T> {
318 WithSuffix {
319 parser: self,
320 suffix,
321 }
322 }
323
324 /// Replace this parser's error message with the provided string.
325 ///
326 /// # Examples
327 /// ```
328 /// # use utils::parser::{self, ParseError, Parser};
329 /// let parser = parser::u8()
330 /// .error_msg("expected power level");
331 /// assert_eq!(
332 /// parser.parse(b"123"),
333 /// Ok((123, &b""[..]))
334 /// );
335 /// assert_eq!(
336 /// parser.parse(b"abc"),
337 /// Err((ParseError::Custom("expected power level"), &b"abc"[..]))
338 /// );
339 /// ```
340 fn error_msg(self, message: &'static str) -> WithErrorMsg<Self> {
341 WithErrorMsg {
342 parser: self,
343 message,
344 }
345 }
346
347 /// Apply this parser once, checking the provided input is fully consumed.
348 ///
349 /// # Examples
350 /// ```
351 /// # use utils::parser::{self, Parser};
352 /// assert_eq!(parser::u32().parse_complete("1234").unwrap(), 1234);
353 /// assert!(parser::u32().parse_complete("1234abc").is_err());
354 /// ```
355 fn parse_complete(&self, input: &'i str) -> Result<Self::Output, InputError> {
356 match self.parse(input.as_bytes()).map_with_input(input)? {
357 (v, []) => Ok(v),
358 (_, remaining) => Err(InputError::new(input, remaining, ParseError::ExpectedEof())),
359 }
360 }
361
362 /// Apply this parser repeatedly until the provided input is fully consumed.
363 ///
364 /// Equivalent to `parser.repeat(parser::noop(), 0).parse_complete(input)`.
365 ///
366 /// # Examples
367 /// ```
368 /// # use utils::parser::{self, Parser};
369 /// assert_eq!(
370 /// parser::u32()
371 /// .then(parser::u32().with_prefix("x"))
372 /// .with_suffix(",".or(parser::eof()))
373 /// .parse_all("1x2,3x4,1234x5678")
374 /// .unwrap(),
375 /// vec![
376 /// (1, 2),
377 /// (3, 4),
378 /// (1234, 5678),
379 /// ]
380 /// );
381 /// ```
382 fn parse_all(&self, input: &'i str) -> Result<Vec<Self::Output>, InputError> {
383 ParserRef(self)
384 .repeat(Constant(()), 0)
385 .parse_complete(input)
386 }
387
388 /// Similar to [`parse_all`](Self::parse_all) but expects a newline after each item.
389 ///
390 /// Equivalent to `parser.with_suffix(`[`parser::eol()`](super::eol)`).parse_all(input)`.
391 ///
392 /// # Examples
393 /// ```
394 /// # use utils::parser::{self, Parser};
395 /// assert_eq!(
396 /// parser::u32()
397 /// .then(parser::u32().with_prefix("x"))
398 /// .parse_lines("1x2\n3x4\n1234x5678")
399 /// .unwrap(),
400 /// vec![
401 /// (1, 2),
402 /// (3, 4),
403 /// (1234, 5678),
404 /// ]
405 /// );
406 /// ```
407 fn parse_lines(&self, input: &'i str) -> Result<Vec<Self::Output>, InputError> {
408 ParserRef(self)
409 .with_suffix(Eol())
410 .repeat(Constant(()), 0)
411 .parse_complete(input)
412 }
413
414 /// Create an iterator which applies this parser repeatedly until the provided input is fully
415 /// consumed.
416 ///
417 /// The returned iterator will lazily parse the provided input string, producing a sequence of
418 /// [`Result`] values. Once the end of input is reached, or an error is returned, the parser
419 /// will always return [`None`].
420 ///
421 /// # Examples
422 /// ```
423 /// # use utils::input::InputError;
424 /// # use utils::parser::{self, Parser};
425 /// let iterator = parser::u32()
426 /// .with_suffix(parser::eol())
427 /// .parse_iterator("12\n34\n56\n78");
428 /// for item in iterator {
429 /// println!("{}", item?);
430 /// }
431 /// # Ok::<(), InputError>(())
432 /// ```
433 ///
434 /// ```
435 /// # use utils::parser::{self, Parser};
436 /// let mut iterator = parser::u32()
437 /// .with_suffix(parser::eol())
438 /// .parse_iterator("12\n34\nnot a integer");
439 /// assert_eq!(iterator.next().unwrap().unwrap(), 12);
440 /// assert_eq!(iterator.next().unwrap().unwrap(), 34);
441 /// assert!(iterator.next().unwrap().is_err());
442 /// assert!(iterator.next().is_none());
443 /// ```
444 ///
445 /// ```
446 /// # use utils::input::InputError;
447 /// # use utils::parser::{self, Parser};
448 /// let filtered = parser::u32()
449 /// .with_suffix(parser::eol())
450 /// .parse_iterator("11\n22\n33\n44\n55")
451 /// .filter(|r| r.is_err() || r.as_ref().is_ok_and(|v| v % 2 == 0))
452 /// .collect::<Result<Vec<u32>, InputError>>()?;
453 /// assert_eq!(filtered, vec![22, 44]);
454 /// # Ok::<(), InputError>(())
455 /// ```
456 fn parse_iterator(self, input: &str) -> ParserIterator<'_, Self> {
457 ParserIterator {
458 input,
459 remaining: input.as_bytes(),
460 parser: self,
461 }
462 }
463
464 /// Create an iterator which returns matches only and skips over errors.
465 ///
466 /// This is intended for cases that require extracting matches out of the input.
467 /// Otherwise, [`parse_iterator`](Self::parse_iterator) should be used with a parser that can
468 /// match the entire input structure.
469 ///
470 /// # Examples
471 /// ```
472 /// # use utils::parser::{self, Parser};
473 /// assert_eq!(
474 /// parser::u32()
475 /// .matches_iterator("abc123d456efg7hi8jk9lmnop")
476 /// .collect::<Vec<_>>(),
477 /// vec![123, 456, 7, 8, 9]
478 /// );
479 /// ```
480 fn matches_iterator(self, input: &str) -> ParserMatchesIterator<'_, Self> {
481 ParserMatchesIterator {
482 remaining: input.as_bytes(),
483 parser: self,
484 }
485 }
486}
487
488// Workaround to allow using methods which consume a parser in methods which take references.
489struct ParserRef<'a, P>(&'a P);
490impl<'i, P: Parser<'i>> Parser<'i> for ParserRef<'_, P> {
491 type Output = P::Output;
492 type Then<T: Parser<'i>> = Unimplemented;
493
494 #[inline]
495 fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
496 self.0.parse(input)
497 }
498}
499
500/// Matches the string literal exactly.
501///
502/// Normally used with [`with_prefix`](Parser::with_prefix)/[`with_suffix`](Parser::with_suffix).
503impl<'i> Parser<'i> for &'static str {
504 type Output = ();
505 type Then<T: Parser<'i>> = Then2<Self, T>;
506
507 #[inline]
508 fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
509 // This is faster than using strip_prefix for the common case where the string is a short
510 // string literal known at compile time.
511 if input.len() >= self.len() && self.bytes().zip(input).all(|(a, &b)| a == b) {
512 Ok(((), &input[self.len()..]))
513 } else {
514 Err((ParseError::ExpectedLiteral(self), input))
515 }
516 }
517}
518
519/// Matches the byte exactly.
520///
521/// Normally used with [`with_prefix`](Parser::with_prefix)/[`with_suffix`](Parser::with_suffix).
522impl<'i> Parser<'i> for u8 {
523 type Output = ();
524 type Then<T: Parser<'i>> = Then2<Self, T>;
525
526 #[inline]
527 fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
528 if input.first() == Some(self) {
529 Ok(((), &input[1..]))
530 } else {
531 Err((ParseError::ExpectedByte(*self), input))
532 }
533 }
534}
535
536/// Allow custom functions and closures to be used as parsers.
537impl<'i, O, F: Fn(&'i [u8]) -> ParseResult<'i, O>> Parser<'i> for F {
538 type Output = O;
539 type Then<T: Parser<'i>> = Then2<Self, T>;
540
541 #[inline]
542 fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
543 self(input)
544 }
545}
546
547/// Trait for types that have a canonical parser.
548pub trait Parseable {
549 type Parser: for<'i> Parser<'i, Output = Self>;
550 const PARSER: Self::Parser;
551}