utils/parser/
simple.rs

1use crate::parser::then::{Then2, Unimplemented};
2use crate::parser::{ParseError, ParseResult, Parser};
3use std::ops::RangeInclusive;
4
5#[derive(Copy, Clone)]
6pub struct Byte();
7impl<'i> Parser<'i> for Byte {
8    type Output = u8;
9    type Then<T: Parser<'i>> = Then2<Self, T>;
10
11    #[inline]
12    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
13        if let [byte, remaining @ ..] = input {
14            Ok((*byte, remaining))
15        } else {
16            Err((ParseError::Expected("byte"), input))
17        }
18    }
19}
20
21/// Parser that consumes a single byte.
22///
23/// Not to be confused with [`u8`](super::u8), which parses a number in the range 0-255.
24///
25/// # Examples
26/// ```
27/// # use utils::parser::{self, Parser};
28/// assert_eq!(
29///     parser::byte().parse(b"abcdef"),
30///     Ok((b'a', &b"bcdef"[..]))
31/// );
32/// assert_eq!(
33///     parser::byte().parse(b"123"),
34///     Ok((b'1', &b"23"[..]))
35/// );
36/// ```
37#[inline]
38#[must_use]
39pub fn byte() -> Byte {
40    Byte()
41}
42
43#[derive(Copy, Clone)]
44pub struct ByteLut<'a, O> {
45    lut: &'a [Option<O>; 256],
46    error: ParseError,
47}
48impl<'i, O: Copy> Parser<'i> for ByteLut<'_, O> {
49    type Output = O;
50    type Then<T: Parser<'i>> = Then2<Self, T>;
51
52    #[inline]
53    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
54        if let [byte, remaining @ ..] = input
55            && let Some(output) = self.lut[*byte as usize]
56        {
57            Ok((output, remaining))
58        } else {
59            Err((self.error, input))
60        }
61    }
62}
63
64/// Parser that consumes a single byte and maps it using a lookup table.
65///
66/// Equivalent to `parser::byte().map_res(|b| LOOKUP[b as usize].ok_or("expected ..."))`, which is
67/// usually faster than an equivalent match statement in the closure.
68///
69/// See also [`parser::byte_map!`](crate::parser::byte_map!) which wraps this function, allowing a
70/// match-like syntax to be used to define the lookup table.
71///
72/// # Examples
73/// ```
74/// # use utils::parser::{self, Parser, ParseError};
75/// const LOOKUP: [Option<bool>; 256] = {
76///     let mut x = [None; 256];
77///     x['#' as usize] = Some(true);
78///     x['.' as usize] = Some(false);
79///     x
80/// };
81///
82/// let parser = parser::byte_lut(&LOOKUP, ParseError::Custom("expected '#' or '.'"));
83/// assert_eq!(parser.parse(b"#..##"), Ok((true, &b"..##"[..])));
84/// assert_eq!(parser.parse(b"..##"), Ok((false, &b".##"[..])));
85/// assert_eq!(parser.parse(b"abc"), Err((ParseError::Custom("expected '#' or '.'"), &b"abc"[..])));
86/// ```
87#[inline]
88#[must_use]
89pub fn byte_lut<T: Copy>(lut: &'_ [Option<T>; 256], error: ParseError) -> ByteLut<'_, T> {
90    ByteLut { lut, error }
91}
92
93#[derive(Copy, Clone)]
94pub struct ByteRange {
95    min: u8,
96    max: u8,
97}
98impl<'i> Parser<'i> for ByteRange {
99    type Output = u8;
100    type Then<T: Parser<'i>> = Then2<Self, T>;
101
102    #[inline]
103    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
104        if let [byte, remaining @ ..] = input {
105            if *byte >= self.min && *byte <= self.max {
106                Ok((*byte, remaining))
107            } else {
108                Err((ParseError::ExpectedByteRange(self.min, self.max), input))
109            }
110        } else {
111            Err((ParseError::Expected("byte"), input))
112        }
113    }
114}
115
116/// Parser that consumes a single byte in the supplied range.
117///
118/// See also [`number_range`](super::number_range) and [`byte`].
119///
120/// # Examples
121/// ```
122/// # use utils::parser::{self, Parser};
123/// assert_eq!(
124///     parser::byte_range(b'a'..=b'z').parse(b"hello world"),
125///     Ok((b'h', &b"ello world"[..]))
126/// );
127/// ```
128#[inline]
129#[must_use]
130pub fn byte_range(range: RangeInclusive<u8>) -> ByteRange {
131    let min = *range.start();
132    let max = *range.end();
133    assert!(min <= max);
134    ByteRange { min, max }
135}
136
137#[derive(Copy, Clone)]
138pub struct Constant<V: Copy>(pub(super) V);
139impl<'i, V: Copy> Parser<'i> for Constant<V> {
140    type Output = V;
141    type Then<T: Parser<'i>> = Then2<Self, T>;
142
143    #[inline]
144    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
145        Ok((self.0, input))
146    }
147}
148
149/// Parser that consumes no input and always succeeds, returning the provided value.
150///
151/// # Examples
152/// ```
153/// # use utils::parser::{self, Parser};
154/// assert_eq!(
155///     parser::constant(1).parse(b"abc"),
156///     Ok((1, &b"abc"[..]))
157/// );
158/// ```
159#[inline]
160#[must_use]
161pub fn constant<T: Copy>(v: T) -> Constant<T> {
162    Constant(v)
163}
164
165/// Parser that consumes no input and always succeeds, returning [`()`](unit).
166///
167/// # Examples
168/// ```
169/// # use utils::parser::{self, Parser};
170/// assert_eq!(
171///     parser::noop().parse(b"abc"),
172///     Ok(((), &b"abc"[..]))
173/// );
174/// ```
175#[inline]
176#[must_use]
177pub fn noop() -> Constant<()> {
178    const {
179        assert!(size_of::<Constant<()>>() == 0);
180    }
181    Constant(())
182}
183
184#[derive(Copy, Clone)]
185pub struct Eof();
186impl<'i> Parser<'i> for Eof {
187    type Output = ();
188    type Then<T: Parser<'i>> = Unimplemented;
189
190    #[inline]
191    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
192        match input {
193            [] => Ok(((), input)),
194            _ => Err((ParseError::ExpectedEof(), input)),
195        }
196    }
197
198    fn then<T: Parser<'i>>(self, _next: T) -> Self::Then<T> {
199        panic!("chaining after eof will never match");
200    }
201}
202
203/// Parser which matches the end of the input.
204///
205/// Useful when parsing a list and each item is separated by a separator, unless it is at the end of
206/// the input.
207///
208/// # Examples
209/// ```
210/// # use utils::parser::{self, Parser};
211/// assert_eq!(
212///     parser::eof().parse(b""),
213///     Ok(((), &b""[..]))
214/// );
215/// assert_eq!(
216///     parser::u32()
217///         .with_suffix(b','.or(parser::eof()))
218///         .parse_all("12,34,56")
219///         .unwrap(),
220///     vec![12, 34, 56],
221/// );
222/// ```
223#[inline]
224#[must_use]
225pub fn eof() -> Eof {
226    Eof()
227}
228
229#[derive(Copy, Clone)]
230pub struct Eol();
231impl<'i> Parser<'i> for Eol {
232    type Output = ();
233    type Then<T: Parser<'i>> = Then2<Self, T>;
234
235    #[inline]
236    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
237        match input {
238            [b'\n', remaining @ ..] | [b'\r', b'\n', remaining @ ..] => Ok(((), remaining)),
239            [] => Ok(((), input)),
240            _ => Err((ParseError::Expected("newline or end of input"), input)),
241        }
242    }
243}
244
245/// Parser which matches newlines or the end of the input.
246///
247/// Matches both LF and CRLF line endings.
248///
249/// # Examples
250/// ```
251/// # use utils::parser::{self, Parser};
252/// assert_eq!(
253///     parser::eol().parse(b"\nabc"),
254///     Ok(((), &b"abc"[..]))
255/// );
256/// assert_eq!(
257///     parser::eol().parse(b"\r\nabc"),
258///     Ok(((), &b"abc"[..]))
259/// );
260/// assert_eq!(
261///     parser::eol().parse(b""),
262///     Ok(((), &b""[..]))
263/// );
264/// ```
265#[inline]
266#[must_use]
267pub fn eol() -> Eol {
268    Eol()
269}
270
271#[derive(Copy, Clone)]
272pub struct TakeWhile<const N: usize>(fn(&u8) -> bool);
273impl<'i, const N: usize> Parser<'i> for TakeWhile<N> {
274    type Output = &'i [u8];
275    type Then<T: Parser<'i>> = Then2<Self, T>;
276
277    #[inline]
278    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
279        let mut end = 0;
280        while end < input.len() && self.0(&input[end]) {
281            end += 1;
282        }
283        if end >= N {
284            Ok(input.split_at(end))
285        } else {
286            Err((ParseError::ExpectedMatches(N), &input[end..]))
287        }
288    }
289}
290
291/// Parser for substrings consisting of bytes matching the provided function.
292///
293/// # Examples
294/// ```
295/// # use utils::parser::{self, Parser};
296/// let parser = parser::take_while(u8::is_ascii_lowercase);
297/// assert_eq!(
298///     parser.parse(b"abc def"),
299///     Ok((&b"abc"[..], &b" def"[..]))
300/// );
301/// assert_eq!(
302///     parser.parse(b"ABC"),
303///     Ok((&b""[..], &b"ABC"[..]))
304/// );
305/// ```
306#[inline]
307#[must_use]
308pub fn take_while(f: fn(&u8) -> bool) -> TakeWhile<0> {
309    TakeWhile(f)
310}
311
312/// Parser for non-empty substrings consisting of bytes matching the provided function.
313///
314/// # Examples
315/// ```
316/// # use utils::parser::{self, Parser};
317/// let parser = parser::take_while1(u8::is_ascii_lowercase);
318/// assert_eq!(
319///     parser.parse(b"abc def"),
320///     Ok((&b"abc"[..], &b" def"[..]))
321/// );
322/// assert!(parser.parse(b"ABC").is_err());
323/// ```
324#[inline]
325#[must_use]
326pub fn take_while1(f: fn(&u8) -> bool) -> TakeWhile<1> {
327    TakeWhile(f)
328}