utils/parser/
simple.rs

1use crate::parser::then::{Then2, Unimplemented};
2use crate::parser::{ParseError, ParseResult, Parser};
3use std::ops::RangeInclusive;
4
5#[derive(Copy, Clone)]
6pub struct Byte();
7impl<'i> Parser<'i> for Byte {
8    type Output = u8;
9    type Then<T: Parser<'i>> = Then2<Self, T>;
10
11    #[inline]
12    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
13        if let [byte, remaining @ ..] = input {
14            Ok((*byte, remaining))
15        } else {
16            Err((ParseError::Expected("byte"), input))
17        }
18    }
19}
20
21/// Parser that consumes a single byte.
22///
23/// Not to be confused with [`u8`](super::u8), which parses a number in the range 0-255.
24///
25/// # Examples
26/// ```
27/// # use utils::parser::{self, Parser};
28/// assert_eq!(
29///     parser::byte().parse(b"abcdef"),
30///     Ok((b'a', &b"bcdef"[..]))
31/// );
32/// assert_eq!(
33///     parser::byte().parse(b"123"),
34///     Ok((b'1', &b"23"[..]))
35/// );
36/// ```
37#[must_use]
38pub fn byte() -> Byte {
39    Byte()
40}
41
42#[derive(Copy, Clone)]
43pub struct ByteLut<'a, O> {
44    lut: &'a [Option<O>; 256],
45    error: &'static str,
46}
47impl<'i, O: Copy> Parser<'i> for ByteLut<'_, O> {
48    type Output = O;
49    type Then<T: Parser<'i>> = Then2<Self, T>;
50
51    #[inline]
52    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
53        if let [byte, remaining @ ..] = input
54            && let Some(output) = self.lut[*byte as usize]
55        {
56            Ok((output, remaining))
57        } else {
58            Err((ParseError::Custom(self.error), input))
59        }
60    }
61}
62
63/// Parser that consumes a single byte and maps it using a lookup table.
64///
65/// Equivalent to `parser::byte().map_res(|b| LOOKUP[b as usize].ok_or("expected ..."))`, which is
66/// usually faster than an equivalent match statement in the closure.
67///
68/// See also [`parser::byte_map!`](crate::parser::byte_map!) which wraps this function, allowing a
69/// match-like syntax to be used to define the lookup table.
70///
71/// # Examples
72/// ```
73/// # use utils::parser::{self, Parser, ParseError};
74/// const LOOKUP: [Option<bool>; 256] = {
75///     let mut x = [None; 256];
76///     x['#' as usize] = Some(true);
77///     x['.' as usize] = Some(false);
78///     x
79/// };
80///
81/// let parser = parser::byte_lut(&LOOKUP, "expected '#' or '.'");
82/// assert_eq!(parser.parse(b"#..##"), Ok((true, &b"..##"[..])));
83/// assert_eq!(parser.parse(b"..##"), Ok((false, &b".##"[..])));
84/// assert_eq!(parser.parse(b"abc"), Err((ParseError::Custom("expected '#' or '.'"), &b"abc"[..])));
85/// ```
86#[inline]
87#[must_use]
88pub fn byte_lut<'a, T: Copy>(lut: &'a [Option<T>; 256], error: &'static str) -> ByteLut<'a, T> {
89    ByteLut { lut, error }
90}
91
92#[derive(Copy, Clone)]
93pub struct ByteRange {
94    min: u8,
95    max: u8,
96}
97impl<'i> Parser<'i> for ByteRange {
98    type Output = u8;
99    type Then<T: Parser<'i>> = Then2<Self, T>;
100
101    #[inline]
102    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
103        if let [byte, remaining @ ..] = input {
104            if *byte >= self.min && *byte <= self.max {
105                Ok((*byte, remaining))
106            } else {
107                Err((ParseError::ExpectedByteRange(self.min, self.max), input))
108            }
109        } else {
110            Err((ParseError::Expected("byte"), input))
111        }
112    }
113}
114
115/// Parser that consumes a single byte in the supplied range.
116///
117/// See also [`number_range`](super::number_range) and [`byte`].
118///
119/// # Examples
120/// ```
121/// # use utils::parser::{self, Parser};
122/// assert_eq!(
123///     parser::byte_range(b'a'..=b'z').parse(b"hello world"),
124///     Ok((b'h', &b"ello world"[..]))
125/// );
126/// ```
127#[inline]
128#[must_use]
129pub fn byte_range(range: RangeInclusive<u8>) -> ByteRange {
130    let min = *range.start();
131    let max = *range.end();
132    assert!(min <= max);
133    ByteRange { min, max }
134}
135
136#[derive(Copy, Clone)]
137pub struct Constant<V: Copy>(pub(super) V);
138impl<'i, V: Copy> Parser<'i> for Constant<V> {
139    type Output = V;
140    type Then<T: Parser<'i>> = Then2<Self, T>;
141
142    #[inline]
143    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
144        Ok((self.0, input))
145    }
146}
147
148/// Parser that consumes no input and always succeeds, returning the provided value.
149///
150/// # Examples
151/// ```
152/// # use utils::parser::{self, Parser};
153/// assert_eq!(
154///     parser::constant(1).parse(b"abc"),
155///     Ok((1, &b"abc"[..]))
156/// );
157/// ```
158#[must_use]
159pub fn constant<T: Copy>(v: T) -> Constant<T> {
160    Constant(v)
161}
162
163/// Parser that consumes no input and always succeeds, returning [`()`](unit).
164///
165/// # Examples
166/// ```
167/// # use utils::parser::{self, Parser};
168/// assert_eq!(
169///     parser::noop().parse(b"abc"),
170///     Ok(((), &b"abc"[..]))
171/// );
172/// ```
173#[must_use]
174pub fn noop() -> Constant<()> {
175    const {
176        assert!(size_of::<Constant<()>>() == 0);
177    }
178    Constant(())
179}
180
181#[derive(Copy, Clone)]
182pub struct Eof();
183impl<'i> Parser<'i> for Eof {
184    type Output = ();
185    type Then<T: Parser<'i>> = Unimplemented;
186
187    #[inline]
188    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
189        match input {
190            [] => Ok(((), input)),
191            _ => Err((ParseError::ExpectedEof(), input)),
192        }
193    }
194
195    fn then<T: Parser<'i>>(self, _next: T) -> Self::Then<T> {
196        panic!("chaining after eof will never match");
197    }
198}
199
200/// Parser which matches the end of the input.
201///
202/// Useful when parsing a list and each item is separated by a separator, unless it is at the end of
203/// the input.
204///
205/// # Examples
206/// ```
207/// # use utils::parser::{self, Parser};
208/// assert_eq!(
209///     parser::eof().parse(b""),
210///     Ok(((), &b""[..]))
211/// );
212/// assert_eq!(
213///     parser::u32()
214///         .with_suffix(b','.or(parser::eof()))
215///         .parse_all("12,34,56")
216///         .unwrap(),
217///     vec![12, 34, 56],
218/// );
219/// ```
220#[must_use]
221pub fn eof() -> Eof {
222    Eof()
223}
224
225#[derive(Copy, Clone)]
226pub struct Eol();
227impl<'i> Parser<'i> for Eol {
228    type Output = ();
229    type Then<T: Parser<'i>> = Then2<Self, T>;
230
231    #[inline]
232    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
233        match input {
234            [b'\n', remaining @ ..] | [b'\r', b'\n', remaining @ ..] => Ok(((), remaining)),
235            [] => Ok(((), input)),
236            _ => Err((ParseError::Expected("newline or end of input"), input)),
237        }
238    }
239}
240
241/// Parser which matches newlines or the end of the input.
242///
243/// Matches both LF and CRLF line endings.
244///
245/// # Examples
246/// ```
247/// # use utils::parser::{self, Parser};
248/// assert_eq!(
249///     parser::eol().parse(b"\nabc"),
250///     Ok(((), &b"abc"[..]))
251/// );
252/// assert_eq!(
253///     parser::eol().parse(b"\r\nabc"),
254///     Ok(((), &b"abc"[..]))
255/// );
256/// assert_eq!(
257///     parser::eol().parse(b""),
258///     Ok(((), &b""[..]))
259/// );
260/// ```
261#[must_use]
262pub fn eol() -> Eol {
263    Eol()
264}
265
266#[derive(Copy, Clone)]
267pub struct TakeWhile<const N: usize>(fn(&u8) -> bool);
268impl<'i, const N: usize> Parser<'i> for TakeWhile<N> {
269    type Output = &'i [u8];
270    type Then<T: Parser<'i>> = Then2<Self, T>;
271
272    #[inline]
273    fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
274        let mut end = 0;
275        while end < input.len() && self.0(&input[end]) {
276            end += 1;
277        }
278        if end >= N {
279            Ok(input.split_at(end))
280        } else {
281            Err((ParseError::ExpectedMatches(N), &input[end..]))
282        }
283    }
284}
285
286/// Parser for substrings consisting of bytes matching the provided function.
287///
288/// # Examples
289/// ```
290/// # use utils::parser::{self, Parser};
291/// let parser = parser::take_while(u8::is_ascii_lowercase);
292/// assert_eq!(
293///     parser.parse(b"abc def"),
294///     Ok((&b"abc"[..], &b" def"[..]))
295/// );
296/// assert_eq!(
297///     parser.parse(b"ABC"),
298///     Ok((&b""[..], &b"ABC"[..]))
299/// );
300/// ```
301#[must_use]
302pub fn take_while(f: fn(&u8) -> bool) -> TakeWhile<0> {
303    TakeWhile(f)
304}
305
306/// Parser for non-empty substrings consisting of bytes matching the provided function.
307///
308/// # Examples
309/// ```
310/// # use utils::parser::{self, Parser};
311/// let parser = parser::take_while1(u8::is_ascii_lowercase);
312/// assert_eq!(
313///     parser.parse(b"abc def"),
314///     Ok((&b"abc"[..], &b" def"[..]))
315/// );
316/// assert!(parser.parse(b"ABC").is_err());
317/// ```
318#[must_use]
319pub fn take_while1(f: fn(&u8) -> bool) -> TakeWhile<1> {
320    TakeWhile(f)
321}