utils/str.rs
1//! String helpers.
2use std::fmt::{Debug, Display};
3use std::fmt::{Formatter, Write};
4use std::hash::Hash;
5use std::num::NonZero;
6
7/// 2 byte [`TinyStr`] using a [`u16`].
8pub type TinyStr2 = TinyStr<NonZero<u16>>;
9/// 4 byte [`TinyStr`] using a [`u32`].
10pub type TinyStr4 = TinyStr<NonZero<u32>>;
11/// 8 byte [`TinyStr`] using a [`u64`].
12pub type TinyStr8 = TinyStr<NonZero<u64>>;
13
14/// A short string packed into a big-endian [`NonZero`] integer.
15///
16/// `TinyStr` stores up to `N` bytes in a single value that fits in a register, enabling
17/// single-instruction equality and ordering comparisons.
18/// The big-endian layout means normal integer comparisons result in lexicographic order.
19///
20/// Strings are NUL-padded. Trailing NUL bytes are indistinguishable from padding, so two inputs
21/// only differing in trailing NULs will be equal.
22///
23/// Empty strings or strings containing all NUL bytes are not representable.
24///
25/// # Examples
26/// ```
27/// # use utils::str::{TinyStr4, TinyStr8};
28/// let s4 = TinyStr4::new(b"abc").unwrap();
29/// assert_eq!(s4.len(), 3);
30/// assert_eq!(format!("{s4}"), "abc");
31/// assert!(s4 < TinyStr4::from_const(b"abd"));
32/// assert!(s4 > TinyStr4::from_const(b"abb"));
33///
34/// let s8 = TinyStr8::new(b"abcdefg").unwrap();
35/// assert_eq!(s8.len(), 7);
36/// assert_eq!(format!("{s8}"), "abcdefg");
37/// assert_eq!(s8, const { TinyStr8::from_const(b"abcdefg") });
38/// ```
39#[must_use]
40#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
41pub struct TinyStr<T: TinyStrInt>(T);
42
43impl<T: TinyStrInt> TinyStr<T> {
44 /// Creates a new `TinyStr` from a byte slice.
45 ///
46 /// Returns [`None`] if the slice is empty, only contains NUL bytes, or is too long.
47 ///
48 /// # Examples
49 /// ```
50 /// # use utils::str::TinyStr4;
51 /// let s = TinyStr4::new(b"abc").unwrap();
52 /// assert_eq!(s.len(), 3);
53 /// assert_eq!(format!("{s}"), "abc");
54 ///
55 /// assert!(TinyStr4::new(b"").is_none());
56 /// assert!(TinyStr4::new(b"abcde").is_none());
57 /// ```
58 pub fn new(s: &[u8]) -> Option<Self> {
59 if s.is_empty() || s.len() > T::LEN {
60 return None;
61 }
62
63 let mut accumulator = T::Raw::default();
64 let mut i = 0;
65 while i < T::LEN && i < s.len() {
66 accumulator = T::set_raw(accumulator, s[i], i);
67 i += 1;
68 }
69
70 T::from_raw(accumulator).map(Self)
71 }
72
73 /// Creates a new `TinyStr` from a raw [`NonZero`] integer.
74 ///
75 /// # Examples
76 /// ```
77 /// # use utils::str::TinyStr4;
78 /// # use std::num::NonZero;
79 /// let raw = NonZero::new(0x61626300).unwrap();
80 /// let s = TinyStr4::from_raw(raw);
81 /// assert_eq!(s.len(), 3);
82 /// assert_eq!(format!("{s}"), "abc");
83 /// ```
84 #[inline]
85 pub const fn from_raw(raw: T) -> Self {
86 TinyStr(raw)
87 }
88
89 /// Returns the number of bytes in the string.
90 ///
91 /// # Examples
92 /// ```
93 /// # use utils::str::TinyStr4;
94 /// assert_eq!(TinyStr4::new(b"a").unwrap().len(), 1);
95 /// assert_eq!(TinyStr4::new(b"ab").unwrap().len(), 2);
96 /// assert_eq!(TinyStr4::new(b"abc").unwrap().len(), 3);
97 /// assert_eq!(TinyStr4::new(b"abcd").unwrap().len(), 4);
98 /// ```
99 #[inline]
100 #[must_use]
101 #[expect(clippy::len_without_is_empty, reason = "TinyStr is never empty")]
102 pub fn len(self) -> usize {
103 let len = T::LEN - (self.0.trailing_zeros() as usize) / 8;
104 #[cfg(feature = "unsafe")]
105 unsafe {
106 std::hint::assert_unchecked(len >= 1);
107 std::hint::assert_unchecked(len <= T::LEN);
108 }
109 len
110 }
111}
112
113/// Writes the string, replacing any invalid UTF-8 sequences with the replacement character.
114///
115/// This is a comparatively expensive operation, requiring the value to be copied onto the stack
116/// and UTF-8 validation.
117///
118/// # Examples
119/// ```
120/// # use utils::str::TinyStr4;
121/// let s = TinyStr4::new(b"abc").unwrap();
122/// assert_eq!(format!("{s}"), "abc");
123///
124/// let invalid = TinyStr4::from_raw(std::num::NonZero::new(0x61FF6200).unwrap());
125/// assert_eq!(format!("{invalid}"), "a\u{FFFD}b");
126/// ```
127impl<T: TinyStrInt> Display for TinyStr<T> {
128 #[cold]
129 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
130 let bytes = self.0.to_be_bytes();
131 let slice = &bytes.as_ref()[..self.len()];
132 for chunk in slice.utf8_chunks() {
133 f.write_str(chunk.valid())?;
134 if !chunk.invalid().is_empty() {
135 f.write_char(char::REPLACEMENT_CHARACTER)?;
136 }
137 }
138 Ok(())
139 }
140}
141
142/// Writes a debug representation of the string.
143///
144/// # Examples
145/// ```
146/// # use utils::str::TinyStr4;
147/// let s = TinyStr4::new(b"abc").unwrap();
148/// assert_eq!(format!("{s:?}"), "TinyStr(\"abc\")");
149///
150/// let invalid = TinyStr4::from_raw(std::num::NonZero::new(0x61FF6200).unwrap());
151/// assert_eq!(format!("{invalid:?}"), "TinyStr([97, 255, 98])");
152/// ```
153impl<T: TinyStrInt> Debug for TinyStr<T> {
154 #[cold]
155 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
156 let bytes = self.0.to_be_bytes();
157 let slice = &bytes.as_ref()[..self.len()];
158 match std::str::from_utf8(slice) {
159 Ok(s) => f.debug_tuple("TinyStr").field(&s).finish(),
160 Err(_) => f.debug_tuple("TinyStr").field(&slice).finish(),
161 }
162 }
163}
164
165/// Helper trait for integer types that can be used as storage for a [`TinyStr`].
166pub trait TinyStrInt: Copy + Clone + Eq + Ord + Hash + Sized {
167 type Bytes: Copy + Default + AsMut<[u8]> + AsRef<[u8]>;
168 type Raw: Copy + Default;
169 const LEN: usize;
170
171 fn to_be_bytes(self) -> Self::Bytes;
172 fn trailing_zeros(self) -> u32;
173 fn set_raw(accumulator: Self::Raw, byte: u8, pos: usize) -> Self::Raw;
174 fn from_raw(raw: Self::Raw) -> Option<Self>;
175}
176
177macro_rules! int_impl {
178 ($t:ident) => {
179 impl TinyStrInt for NonZero<$t> {
180 type Raw = $t;
181 type Bytes = [u8; Self::LEN];
182 const LEN: usize = $t::BITS as usize / 8;
183
184 #[inline]
185 fn to_be_bytes(self) -> Self::Bytes {
186 self.get().to_be_bytes()
187 }
188 #[inline]
189 fn trailing_zeros(self) -> u32 {
190 self.get().trailing_zeros()
191 }
192 #[inline]
193 fn set_raw(accumulator: Self::Raw, byte: u8, pos: usize) -> Self::Raw {
194 accumulator | ($t::from(byte) << ((Self::LEN - 1 - pos) * 8))
195 }
196 #[inline]
197 fn from_raw(raw: Self::Raw) -> Option<Self> {
198 NonZero::new(raw)
199 }
200 }
201
202 // Workaround for const trait limitations
203 impl TinyStr<NonZero<$t>> {
204 #[doc = concat!(
205 "Creates a `TinyStr` from a byte slice at compile time, panicking if the string is invalid.\n",
206 "\n",
207 "# Examples\n",
208 "```\n",
209 "# use utils::str::TinyStr;\n",
210 "# use std::num::NonZero;\n",
211 "const S: TinyStr<NonZero<", stringify!($t), ">> = TinyStr::<NonZero<", stringify!($t), ">>::from_const(b\"ab\");\n",
212 "assert_eq!(S, TinyStr::new(b\"ab\").unwrap());\n",
213 "assert_eq!(S.to_string(), \"ab\");\n",
214 "assert_eq!(S.len(), 2);\n",
215 "```"
216 )]
217 pub const fn from_const(s: &[u8]) -> Self {
218 const LEN: usize = $t::BITS as usize / 8;
219
220 assert!(!s.is_empty(), "string is empty");
221 assert!(s.len() <= LEN, "string is too long");
222
223 let mut accumulator: $t = 0;
224 let mut i = 0;
225 while i < s.len() {
226 accumulator |= (s[i] as $t) << ((LEN - 1 - i) * 8);
227 i += 1;
228 }
229
230 match NonZero::new(accumulator) {
231 Some(v) => Self::from_raw(v),
232 None => panic!("string only contains zero bytes"),
233 }
234 }
235 }
236 };
237}
238int_impl!(u16);
239int_impl!(u32);
240int_impl!(u64);
241
242/// Helper trait to map between length `N` and the smallest [`TinyStrInt`] type that can store it.
243///
244/// See [`parser::tinystr`](crate::parser::tinystr).
245pub trait TinyStrLen<const N: usize> {
246 type Int: TinyStrInt;
247}
248impl TinyStrLen<2> for () {
249 type Int = NonZero<u16>;
250}
251impl TinyStrLen<3> for () {
252 type Int = NonZero<u32>;
253}
254impl TinyStrLen<4> for () {
255 type Int = NonZero<u32>;
256}
257impl TinyStrLen<5> for () {
258 type Int = NonZero<u64>;
259}
260impl TinyStrLen<6> for () {
261 type Int = NonZero<u64>;
262}
263impl TinyStrLen<7> for () {
264 type Int = NonZero<u64>;
265}
266impl TinyStrLen<8> for () {
267 type Int = NonZero<u64>;
268}