utils/parser/simple.rs
1use crate::parser::then::{Then2, Unimplemented};
2use crate::parser::{ParseError, ParseResult, Parser};
3use std::ops::RangeInclusive;
4
5#[derive(Copy, Clone)]
6pub struct Byte();
7impl<'i> Parser<'i> for Byte {
8 type Output = u8;
9 type Then<T: Parser<'i>> = Then2<Self, T>;
10
11 #[inline]
12 fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
13 if let [byte, remaining @ ..] = input {
14 Ok((*byte, remaining))
15 } else {
16 Err((ParseError::Expected("byte"), input))
17 }
18 }
19}
20
21/// Parser that consumes a single byte.
22///
23/// Not to be confused with [`u8`](super::u8), which parses a number in the range 0-255.
24///
25/// # Examples
26/// ```
27/// # use utils::parser::{self, Parser};
28/// assert_eq!(
29/// parser::byte().parse(b"abcdef"),
30/// Ok((b'a', &b"bcdef"[..]))
31/// );
32/// assert_eq!(
33/// parser::byte().parse(b"123"),
34/// Ok((b'1', &b"23"[..]))
35/// );
36/// ```
37#[must_use]
38pub fn byte() -> Byte {
39 Byte()
40}
41
42#[derive(Copy, Clone)]
43pub struct ByteLut<'a, O> {
44 lut: &'a [Option<O>; 256],
45 error: &'static str,
46}
47impl<'i, O: Copy> Parser<'i> for ByteLut<'_, O> {
48 type Output = O;
49 type Then<T: Parser<'i>> = Then2<Self, T>;
50
51 #[inline]
52 fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
53 if let [byte, remaining @ ..] = input
54 && let Some(output) = self.lut[*byte as usize]
55 {
56 Ok((output, remaining))
57 } else {
58 Err((ParseError::Custom(self.error), input))
59 }
60 }
61}
62
63/// Parser that consumes a single byte and maps it using a lookup table.
64///
65/// Equivalent to `parser::byte().map_res(|b| LOOKUP[b as usize].ok_or("expected ..."))`, which is
66/// usually faster than an equivalent match statement in the closure.
67///
68/// See also [`parser::byte_map!`](crate::parser::byte_map!) which wraps this function, allowing a
69/// match-like syntax to be used to define the lookup table.
70///
71/// # Examples
72/// ```
73/// # use utils::parser::{self, Parser, ParseError};
74/// const LOOKUP: [Option<bool>; 256] = {
75/// let mut x = [None; 256];
76/// x['#' as usize] = Some(true);
77/// x['.' as usize] = Some(false);
78/// x
79/// };
80///
81/// let parser = parser::byte_lut(&LOOKUP, "expected '#' or '.'");
82/// assert_eq!(parser.parse(b"#..##"), Ok((true, &b"..##"[..])));
83/// assert_eq!(parser.parse(b"..##"), Ok((false, &b".##"[..])));
84/// assert_eq!(parser.parse(b"abc"), Err((ParseError::Custom("expected '#' or '.'"), &b"abc"[..])));
85/// ```
86#[inline]
87#[must_use]
88pub fn byte_lut<'a, T: Copy>(lut: &'a [Option<T>; 256], error: &'static str) -> ByteLut<'a, T> {
89 ByteLut { lut, error }
90}
91
92#[derive(Copy, Clone)]
93pub struct ByteRange {
94 min: u8,
95 max: u8,
96}
97impl<'i> Parser<'i> for ByteRange {
98 type Output = u8;
99 type Then<T: Parser<'i>> = Then2<Self, T>;
100
101 #[inline]
102 fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
103 if let [byte, remaining @ ..] = input {
104 if *byte >= self.min && *byte <= self.max {
105 Ok((*byte, remaining))
106 } else {
107 Err((ParseError::ExpectedByteRange(self.min, self.max), input))
108 }
109 } else {
110 Err((ParseError::Expected("byte"), input))
111 }
112 }
113}
114
115/// Parser that consumes a single byte in the supplied range.
116///
117/// See also [`number_range`](super::number_range) and [`byte`].
118///
119/// # Examples
120/// ```
121/// # use utils::parser::{self, Parser};
122/// assert_eq!(
123/// parser::byte_range(b'a'..=b'z').parse(b"hello world"),
124/// Ok((b'h', &b"ello world"[..]))
125/// );
126/// ```
127#[inline]
128#[must_use]
129pub fn byte_range(range: RangeInclusive<u8>) -> ByteRange {
130 let min = *range.start();
131 let max = *range.end();
132 assert!(min <= max);
133 ByteRange { min, max }
134}
135
136#[derive(Copy, Clone)]
137pub struct Constant<V: Copy>(pub(super) V);
138impl<'i, V: Copy> Parser<'i> for Constant<V> {
139 type Output = V;
140 type Then<T: Parser<'i>> = Then2<Self, T>;
141
142 #[inline]
143 fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
144 Ok((self.0, input))
145 }
146}
147
148/// Parser that consumes no input and always succeeds, returning the provided value.
149///
150/// # Examples
151/// ```
152/// # use utils::parser::{self, Parser};
153/// assert_eq!(
154/// parser::constant(1).parse(b"abc"),
155/// Ok((1, &b"abc"[..]))
156/// );
157/// ```
158#[must_use]
159pub fn constant<T: Copy>(v: T) -> Constant<T> {
160 Constant(v)
161}
162
163/// Parser that consumes no input and always succeeds, returning [`()`](unit).
164///
165/// # Examples
166/// ```
167/// # use utils::parser::{self, Parser};
168/// assert_eq!(
169/// parser::noop().parse(b"abc"),
170/// Ok(((), &b"abc"[..]))
171/// );
172/// ```
173#[must_use]
174pub fn noop() -> Constant<()> {
175 const {
176 assert!(size_of::<Constant<()>>() == 0);
177 }
178 Constant(())
179}
180
181#[derive(Copy, Clone)]
182pub struct Eof();
183impl<'i> Parser<'i> for Eof {
184 type Output = ();
185 type Then<T: Parser<'i>> = Unimplemented;
186
187 #[inline]
188 fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
189 match input {
190 [] => Ok(((), input)),
191 _ => Err((ParseError::ExpectedEof(), input)),
192 }
193 }
194
195 fn then<T: Parser<'i>>(self, _next: T) -> Self::Then<T> {
196 panic!("chaining after eof will never match");
197 }
198}
199
200/// Parser which matches the end of the input.
201///
202/// Useful when parsing a list and each item is separated by a separator, unless it is at the end of
203/// the input.
204///
205/// # Examples
206/// ```
207/// # use utils::parser::{self, Parser};
208/// assert_eq!(
209/// parser::eof().parse(b""),
210/// Ok(((), &b""[..]))
211/// );
212/// assert_eq!(
213/// parser::u32()
214/// .with_suffix(b','.or(parser::eof()))
215/// .parse_all("12,34,56")
216/// .unwrap(),
217/// vec![12, 34, 56],
218/// );
219/// ```
220#[must_use]
221pub fn eof() -> Eof {
222 Eof()
223}
224
225#[derive(Copy, Clone)]
226pub struct Eol();
227impl<'i> Parser<'i> for Eol {
228 type Output = ();
229 type Then<T: Parser<'i>> = Then2<Self, T>;
230
231 #[inline]
232 fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
233 match input {
234 [b'\n', remaining @ ..] | [b'\r', b'\n', remaining @ ..] => Ok(((), remaining)),
235 [] => Ok(((), input)),
236 _ => Err((ParseError::Expected("newline or end of input"), input)),
237 }
238 }
239}
240
241/// Parser which matches newlines or the end of the input.
242///
243/// Matches both LF and CRLF line endings.
244///
245/// # Examples
246/// ```
247/// # use utils::parser::{self, Parser};
248/// assert_eq!(
249/// parser::eol().parse(b"\nabc"),
250/// Ok(((), &b"abc"[..]))
251/// );
252/// assert_eq!(
253/// parser::eol().parse(b"\r\nabc"),
254/// Ok(((), &b"abc"[..]))
255/// );
256/// assert_eq!(
257/// parser::eol().parse(b""),
258/// Ok(((), &b""[..]))
259/// );
260/// ```
261#[must_use]
262pub fn eol() -> Eol {
263 Eol()
264}
265
266#[derive(Copy, Clone)]
267pub struct TakeWhile<const N: usize>(fn(&u8) -> bool);
268impl<'i, const N: usize> Parser<'i> for TakeWhile<N> {
269 type Output = &'i [u8];
270 type Then<T: Parser<'i>> = Then2<Self, T>;
271
272 #[inline]
273 fn parse(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output> {
274 let mut end = 0;
275 while end < input.len() && self.0(&input[end]) {
276 end += 1;
277 }
278 if end >= N {
279 Ok(input.split_at(end))
280 } else {
281 Err((ParseError::ExpectedMatches(N), &input[end..]))
282 }
283 }
284}
285
286/// Parser for substrings consisting of bytes matching the provided function.
287///
288/// # Examples
289/// ```
290/// # use utils::parser::{self, Parser};
291/// let parser = parser::take_while(u8::is_ascii_lowercase);
292/// assert_eq!(
293/// parser.parse(b"abc def"),
294/// Ok((&b"abc"[..], &b" def"[..]))
295/// );
296/// assert_eq!(
297/// parser.parse(b"ABC"),
298/// Ok((&b""[..], &b"ABC"[..]))
299/// );
300/// ```
301#[must_use]
302pub fn take_while(f: fn(&u8) -> bool) -> TakeWhile<0> {
303 TakeWhile(f)
304}
305
306/// Parser for non-empty substrings consisting of bytes matching the provided function.
307///
308/// # Examples
309/// ```
310/// # use utils::parser::{self, Parser};
311/// let parser = parser::take_while1(u8::is_ascii_lowercase);
312/// assert_eq!(
313/// parser.parse(b"abc def"),
314/// Ok((&b"abc"[..], &b" def"[..]))
315/// );
316/// assert!(parser.parse(b"ABC").is_err());
317/// ```
318#[must_use]
319pub fn take_while1(f: fn(&u8) -> bool) -> TakeWhile<1> {
320 TakeWhile(f)
321}