utils/
str.rs

1//! String helpers.
2use std::fmt::{Debug, Display};
3use std::fmt::{Formatter, Write};
4use std::hash::Hash;
5use std::num::NonZero;
6
7/// 2 byte [`TinyStr`] using a [`u16`].
8pub type TinyStr2 = TinyStr<NonZero<u16>>;
9/// 4 byte [`TinyStr`] using a [`u32`].
10pub type TinyStr4 = TinyStr<NonZero<u32>>;
11/// 8 byte [`TinyStr`] using a [`u64`].
12pub type TinyStr8 = TinyStr<NonZero<u64>>;
13
14/// A short string packed into a big-endian [`NonZero`] integer.
15///
16/// `TinyStr` stores up to `N` bytes in a single value that fits in a register, enabling
17/// single-instruction equality and ordering comparisons.
18/// The big-endian layout means normal integer comparisons result in lexicographic order.
19///
20/// Strings are NUL-padded. Trailing NUL bytes are indistinguishable from padding, so two inputs
21/// only differing in trailing NULs will be equal.
22///
23/// Empty strings or strings containing all NUL bytes are not representable.
24///
25/// # Examples
26/// ```
27/// # use utils::str::{TinyStr4, TinyStr8};
28/// let s4 = TinyStr4::new(b"abc").unwrap();
29/// assert_eq!(s4.len(), 3);
30/// assert_eq!(format!("{s4}"), "abc");
31/// assert!(s4 < TinyStr4::from_const(b"abd"));
32/// assert!(s4 > TinyStr4::from_const(b"abb"));
33///
34/// let s8 = TinyStr8::new(b"abcdefg").unwrap();
35/// assert_eq!(s8.len(), 7);
36/// assert_eq!(format!("{s8}"), "abcdefg");
37/// assert_eq!(s8, const { TinyStr8::from_const(b"abcdefg") });
38/// ```
39#[must_use]
40#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
41pub struct TinyStr<T: TinyStrInt>(T);
42
43impl<T: TinyStrInt> TinyStr<T> {
44    /// Creates a new `TinyStr` from a byte slice.
45    ///
46    /// Returns [`None`] if the slice is empty, only contains NUL bytes, or is too long.
47    ///
48    /// # Examples
49    /// ```
50    /// # use utils::str::TinyStr4;
51    /// let s = TinyStr4::new(b"abc").unwrap();
52    /// assert_eq!(s.len(), 3);
53    /// assert_eq!(format!("{s}"), "abc");
54    ///
55    /// assert!(TinyStr4::new(b"").is_none());
56    /// assert!(TinyStr4::new(b"abcde").is_none());
57    /// ```
58    pub fn new(s: &[u8]) -> Option<Self> {
59        if s.is_empty() || s.len() > T::LEN {
60            return None;
61        }
62
63        let mut accumulator = T::Raw::default();
64        let mut i = 0;
65        while i < T::LEN && i < s.len() {
66            accumulator = T::set_raw(accumulator, s[i], i);
67            i += 1;
68        }
69
70        T::from_raw(accumulator).map(Self)
71    }
72
73    /// Creates a new `TinyStr` from a raw [`NonZero`] integer.
74    ///
75    /// # Examples
76    /// ```
77    /// # use utils::str::TinyStr4;
78    /// # use std::num::NonZero;
79    /// let raw = NonZero::new(0x61626300).unwrap();
80    /// let s = TinyStr4::from_raw(raw);
81    /// assert_eq!(s.len(), 3);
82    /// assert_eq!(format!("{s}"), "abc");
83    /// ```
84    #[inline]
85    pub const fn from_raw(raw: T) -> Self {
86        TinyStr(raw)
87    }
88
89    /// Returns the number of bytes in the string.
90    ///
91    /// # Examples
92    /// ```
93    /// # use utils::str::TinyStr4;
94    /// assert_eq!(TinyStr4::new(b"a").unwrap().len(), 1);
95    /// assert_eq!(TinyStr4::new(b"ab").unwrap().len(), 2);
96    /// assert_eq!(TinyStr4::new(b"abc").unwrap().len(), 3);
97    /// assert_eq!(TinyStr4::new(b"abcd").unwrap().len(), 4);
98    /// ```
99    #[inline]
100    #[must_use]
101    #[expect(clippy::len_without_is_empty, reason = "TinyStr is never empty")]
102    pub fn len(self) -> usize {
103        let len = T::LEN - (self.0.trailing_zeros() as usize) / 8;
104        #[cfg(feature = "unsafe")]
105        unsafe {
106            std::hint::assert_unchecked(len >= 1);
107            std::hint::assert_unchecked(len <= T::LEN);
108        }
109        len
110    }
111}
112
113/// Writes the string, replacing any invalid UTF-8 sequences with the replacement character.
114///
115/// This is a comparatively expensive operation, requiring the value to be copied onto the stack
116/// and UTF-8 validation.
117///
118/// # Examples
119/// ```
120/// # use utils::str::TinyStr4;
121/// let s = TinyStr4::new(b"abc").unwrap();
122/// assert_eq!(format!("{s}"), "abc");
123///
124/// let invalid = TinyStr4::from_raw(std::num::NonZero::new(0x61FF6200).unwrap());
125/// assert_eq!(format!("{invalid}"), "a\u{FFFD}b");
126/// ```
127impl<T: TinyStrInt> Display for TinyStr<T> {
128    #[cold]
129    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
130        let bytes = self.0.to_be_bytes();
131        let slice = &bytes.as_ref()[..self.len()];
132        for chunk in slice.utf8_chunks() {
133            f.write_str(chunk.valid())?;
134            if !chunk.invalid().is_empty() {
135                f.write_char(char::REPLACEMENT_CHARACTER)?;
136            }
137        }
138        Ok(())
139    }
140}
141
142/// Writes a debug representation of the string.
143///
144/// # Examples
145/// ```
146/// # use utils::str::TinyStr4;
147/// let s = TinyStr4::new(b"abc").unwrap();
148/// assert_eq!(format!("{s:?}"), "TinyStr(\"abc\")");
149///
150/// let invalid = TinyStr4::from_raw(std::num::NonZero::new(0x61FF6200).unwrap());
151/// assert_eq!(format!("{invalid:?}"), "TinyStr([97, 255, 98])");
152/// ```
153impl<T: TinyStrInt> Debug for TinyStr<T> {
154    #[cold]
155    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
156        let bytes = self.0.to_be_bytes();
157        let slice = &bytes.as_ref()[..self.len()];
158        match std::str::from_utf8(slice) {
159            Ok(s) => f.debug_tuple("TinyStr").field(&s).finish(),
160            Err(_) => f.debug_tuple("TinyStr").field(&slice).finish(),
161        }
162    }
163}
164
165/// Helper trait for integer types that can be used as storage for a [`TinyStr`].
166pub trait TinyStrInt: Copy + Clone + Eq + Ord + Hash + Sized {
167    type Bytes: Copy + Default + AsMut<[u8]> + AsRef<[u8]>;
168    type Raw: Copy + Default;
169    const LEN: usize;
170
171    fn to_be_bytes(self) -> Self::Bytes;
172    fn trailing_zeros(self) -> u32;
173    fn set_raw(accumulator: Self::Raw, byte: u8, pos: usize) -> Self::Raw;
174    fn from_raw(raw: Self::Raw) -> Option<Self>;
175}
176
177macro_rules! int_impl {
178    ($t:ident) => {
179        impl TinyStrInt for NonZero<$t> {
180            type Raw = $t;
181            type Bytes = [u8; Self::LEN];
182            const LEN: usize = $t::BITS as usize / 8;
183
184            #[inline]
185            fn to_be_bytes(self) -> Self::Bytes {
186                self.get().to_be_bytes()
187            }
188            #[inline]
189            fn trailing_zeros(self) -> u32 {
190                self.get().trailing_zeros()
191            }
192            #[inline]
193            fn set_raw(accumulator: Self::Raw, byte: u8, pos: usize) -> Self::Raw {
194                accumulator | ($t::from(byte) << ((Self::LEN - 1 - pos) * 8))
195            }
196            #[inline]
197            fn from_raw(raw: Self::Raw) -> Option<Self> {
198                NonZero::new(raw)
199            }
200        }
201
202        // Workaround for const trait limitations
203        impl TinyStr<NonZero<$t>> {
204            #[doc = concat!(
205                "Creates a `TinyStr` from a byte slice at compile time, panicking if the string is invalid.\n",
206                "\n",
207                "# Examples\n",
208                "```\n",
209                "# use utils::str::TinyStr;\n",
210                "# use std::num::NonZero;\n",
211                "const S: TinyStr<NonZero<", stringify!($t), ">> = TinyStr::<NonZero<", stringify!($t), ">>::from_const(b\"ab\");\n",
212                "assert_eq!(S, TinyStr::new(b\"ab\").unwrap());\n",
213                "assert_eq!(S.to_string(), \"ab\");\n",
214                "assert_eq!(S.len(), 2);\n",
215                "```"
216            )]
217            pub const fn from_const(s: &[u8]) -> Self {
218                const LEN: usize = $t::BITS as usize / 8;
219
220                assert!(!s.is_empty(), "string is empty");
221                assert!(s.len() <= LEN, "string is too long");
222
223                let mut accumulator: $t = 0;
224                let mut i = 0;
225                while i < s.len() {
226                    accumulator |= (s[i] as $t) << ((LEN - 1 - i) * 8);
227                    i += 1;
228                }
229
230                match NonZero::new(accumulator) {
231                    Some(v) => Self::from_raw(v),
232                    None => panic!("string only contains zero bytes"),
233                }
234            }
235        }
236    };
237}
238int_impl!(u16);
239int_impl!(u32);
240int_impl!(u64);
241
242/// Helper trait to map between length `N` and the smallest [`TinyStrInt`] type that can store it.
243///
244/// See [`parser::tinystr`](crate::parser::tinystr).
245pub trait TinyStrLen<const N: usize> {
246    type Int: TinyStrInt;
247}
248impl TinyStrLen<2> for () {
249    type Int = NonZero<u16>;
250}
251impl TinyStrLen<3> for () {
252    type Int = NonZero<u32>;
253}
254impl TinyStrLen<4> for () {
255    type Int = NonZero<u32>;
256}
257impl TinyStrLen<5> for () {
258    type Int = NonZero<u64>;
259}
260impl TinyStrLen<6> for () {
261    type Int = NonZero<u64>;
262}
263impl TinyStrLen<7> for () {
264    type Int = NonZero<u64>;
265}
266impl TinyStrLen<8> for () {
267    type Int = NonZero<u64>;
268}