Skip to main content

hickory_proto/rr/domain/
label.rs

1// Copyright 2015-2018 Benjamin Fry <benjaminfry@me.com>
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// https://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// https://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8//! Labels are used as the internal components of a Name.
9//!
10//! A label is stored internally as ascii, where all unicode characters are converted to punycode internally.
11
12use alloc::{string::String, vec::Vec};
13use core::borrow::Borrow;
14use core::cmp::{Ordering, PartialEq};
15use core::fmt::{self, Debug, Display, Formatter, Write};
16use core::hash::{Hash, Hasher};
17
18use idna::uts46::{AsciiDenyList, DnsLength, Hyphens, Uts46};
19use tinyvec::TinyVec;
20use tracing::debug;
21
22use crate::error::*;
23use crate::serialize::binary::DecodeError;
24
25const WILDCARD: &[u8] = b"*";
26const IDNA_PREFIX: &[u8] = b"xn--";
27
28/// Labels are always stored as ASCII, unicode characters must be encoded with punycode
29#[derive(Clone, Eq)]
30pub struct Label(TinyVec<[u8; 24]>);
31
32impl Label {
33    /// These must only be ASCII, with unicode encoded to PunyCode, or other such transformation.
34    ///
35    /// This uses the bytes as raw ascii values, with nothing escaped on the wire.
36    /// Generally users should use `from_str` or `from_ascii`
37    pub fn from_raw_bytes(bytes: &[u8]) -> ProtoResult<Self> {
38        // Check for label validity.
39        // RFC 2181, Section 11 "Name Syntax".
40        // > The length of any one label is limited to between 1 and 63 octets.
41        if bytes.is_empty() {
42            return Err("Label requires a minimum length of 1".into());
43        }
44        if bytes.len() > 63 {
45            return Err(DecodeError::LabelBytesTooLong(bytes.len()).into());
46        };
47        Ok(Self(TinyVec::from(bytes)))
48    }
49
50    /// Translates this string into IDNA safe name, encoding to punycode as necessary.
51    pub fn from_utf8(s: &str) -> ProtoResult<Self> {
52        if s.as_bytes() == WILDCARD {
53            return Ok(Self::wildcard());
54        }
55
56        // special case for SRV type records
57        if s.starts_with('_') {
58            return Self::from_ascii(s);
59        }
60
61        // length don't exceeding 63 is done in `from_ascii`
62        // on puny encoded string
63        // idna error are opaque so early failure is not possible.
64        match Uts46::new().to_ascii(
65            s.as_bytes(),
66            AsciiDenyList::STD3,
67            Hyphens::Allow,
68            DnsLength::Ignore,
69        ) {
70            Ok(puny) => Self::from_ascii(&puny),
71            e => Err(format!("Label contains invalid characters: {e:?}").into()),
72        }
73    }
74
75    /// Takes the ascii string and returns a new label.
76    ///
77    /// This will return an Error if the label is not an ascii string
78    pub fn from_ascii(s: &str) -> ProtoResult<Self> {
79        if s.len() > 63 {
80            return Err(DecodeError::LabelBytesTooLong(s.len()).into());
81        }
82
83        if s.as_bytes() == WILDCARD {
84            return Ok(Self::wildcard());
85        }
86
87        if !s.is_empty()
88            && s.is_ascii()
89            && s.chars().take(1).all(|c| is_safe_ascii(c, true, false))
90            && s.chars().skip(1).all(|c| is_safe_ascii(c, false, false))
91        {
92            Self::from_raw_bytes(s.as_bytes())
93        } else {
94            Err(format!("Malformed label: {s}").into())
95        }
96    }
97
98    /// Returns a new Label of the Wildcard, i.e. "*"
99    pub fn wildcard() -> Self {
100        Self(TinyVec::from(WILDCARD))
101    }
102
103    /// Converts this label to lowercase
104    pub fn to_lowercase(&self) -> Self {
105        // TODO: replace case conversion when (ascii_ctype #39658) stabilizes
106        if let Some((idx, _)) = self
107            .0
108            .iter()
109            .enumerate()
110            .find(|&(_, c)| *c != c.to_ascii_lowercase())
111        {
112            let mut lower_label: Vec<u8> = self.0.to_vec();
113            lower_label[idx..].make_ascii_lowercase();
114            Self(TinyVec::from(lower_label.as_slice()))
115        } else {
116            self.clone()
117        }
118    }
119
120    /// Returns true if this label is the wildcard, '*', label
121    pub fn is_wildcard(&self) -> bool {
122        self.as_bytes() == WILDCARD
123    }
124
125    /// Returns the length in bytes of this label
126    pub fn len(&self) -> usize {
127        self.0.len()
128    }
129
130    /// True if the label contains no characters
131    pub fn is_empty(&self) -> bool {
132        self.0.is_empty()
133    }
134
135    /// Returns the raw bytes of the label, this is good for writing to the wire.
136    ///
137    /// See [`Display`] for presentation version (unescaped from punycode, etc)
138    pub fn as_bytes(&self) -> &[u8] {
139        &self.0
140    }
141
142    /// Performs the equivalence operation disregarding case
143    pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
144        self.0.eq_ignore_ascii_case(&other.0)
145    }
146
147    /// compares with the other label, ignoring case
148    pub fn cmp_with_f<F: LabelCmp>(&self, other: &Self) -> Ordering {
149        let s = self.0.iter();
150        let o = other.0.iter();
151
152        for (s, o) in s.zip(o) {
153            match F::cmp_u8(*s, *o) {
154                Ordering::Equal => continue,
155                not_eq => return not_eq,
156            }
157        }
158
159        self.0.len().cmp(&other.0.len())
160    }
161
162    /// Performs the conversion to utf8 from IDNA as necessary, see `fmt` for more details
163    pub fn to_utf8(&self) -> String {
164        format!("{self}")
165    }
166
167    /// Converts this label to safe ascii, escaping characters as necessary
168    ///
169    /// If this is an IDNA, punycode, label, then the xn-- prefix will be maintained as ascii
170    pub fn to_ascii(&self) -> String {
171        let mut ascii = String::with_capacity(self.as_bytes().len());
172
173        self.write_ascii(&mut ascii)
174            .expect("should never fail to write a new string");
175        ascii
176    }
177
178    /// Writes this label to safe ascii, escaping characters as necessary
179    pub fn write_ascii<W: Write>(&self, f: &mut W) -> Result<(), fmt::Error> {
180        // We can't guarantee that the same input will always translate to the same output
181        fn escape_non_ascii<W: Write>(
182            byte: u8,
183            f: &mut W,
184            is_first: bool,
185        ) -> Result<(), fmt::Error> {
186            let to_triple_escape = |ch: u8| format!("\\{ch:03o}");
187            let to_single_escape = |ch: char| format!("\\{ch}");
188
189            match char::from(byte) {
190                c if is_safe_ascii(c, is_first, true) => f.write_char(c)?,
191                // it's not a control and is printable as well as inside the standard ascii range
192                c if byte > b'\x20' && byte < b'\x7f' => f.write_str(&to_single_escape(c))?,
193                _ => f.write_str(&to_triple_escape(byte))?,
194            }
195
196            Ok(())
197        }
198
199        // traditional ascii case...
200        let mut chars = self.as_bytes().iter();
201        if let Some(ch) = chars.next() {
202            escape_non_ascii(*ch, f, true)?;
203        }
204
205        for ch in chars {
206            escape_non_ascii(*ch, f, false)?;
207        }
208
209        Ok(())
210    }
211}
212
213impl AsRef<[u8]> for Label {
214    fn as_ref(&self) -> &[u8] {
215        self.as_bytes()
216    }
217}
218
219impl Borrow<[u8]> for Label {
220    fn borrow(&self) -> &[u8] {
221        &self.0
222    }
223}
224
225fn is_safe_ascii(c: char, is_first: bool, for_encoding: bool) -> bool {
226    match c {
227        c if !c.is_ascii() => false,
228        c if c.is_alphanumeric() => true,
229        '-' if !is_first => true,     // dash is allowed
230        '_' => true,                  // SRV like labels
231        '*' if is_first => true,      // wildcard
232        '.' if !for_encoding => true, // needed to allow dots, for things like email addresses
233        _ => false,
234    }
235}
236
237impl Display for Label {
238    /// outputs characters in a safe string manner.
239    ///
240    /// if the string is punycode, i.e. starts with `xn--`, otherwise it translates to a safe ascii string
241    ///   escaping characters as necessary.
242    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
243        if self.as_bytes().starts_with(IDNA_PREFIX) {
244            // this should never be outside the ascii codes...
245            let label = String::from_utf8_lossy(self.borrow());
246            let (label, e) =
247                Uts46::new().to_unicode(label.as_bytes(), AsciiDenyList::EMPTY, Hyphens::Allow);
248
249            if e.is_ok() {
250                return f.write_str(&label);
251            } else {
252                debug!(
253                    "xn-- prefixed string did not translate via IDNA properly: {:?}",
254                    e
255                )
256            }
257        }
258
259        // it wasn't known to be utf8
260        self.write_ascii(f)
261    }
262}
263
264impl Debug for Label {
265    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
266        let label = String::from_utf8_lossy(self.borrow());
267        f.write_str(&label)
268    }
269}
270
271impl PartialEq<Self> for Label {
272    fn eq(&self, other: &Self) -> bool {
273        self.eq_ignore_ascii_case(other)
274    }
275}
276
277impl PartialOrd<Self> for Label {
278    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
279        Some(self.cmp(other))
280    }
281}
282
283impl Ord for Label {
284    fn cmp(&self, other: &Self) -> Ordering {
285        self.cmp_with_f::<CaseInsensitive>(other)
286    }
287}
288
289impl Hash for Label {
290    fn hash<H>(&self, state: &mut H)
291    where
292        H: Hasher,
293    {
294        for b in self.borrow() as &[u8] {
295            state.write_u8(b.to_ascii_lowercase());
296        }
297    }
298}
299
300/// Label comparison trait for case sensitive or insensitive comparisons
301pub trait LabelCmp {
302    /// this should mimic the cmp method from [`PartialOrd`]
303    fn cmp_u8(l: u8, r: u8) -> Ordering;
304}
305
306/// For case sensitive comparisons
307pub(super) struct CaseSensitive;
308
309impl LabelCmp for CaseSensitive {
310    fn cmp_u8(l: u8, r: u8) -> Ordering {
311        l.cmp(&r)
312    }
313}
314
315/// For case insensitive comparisons
316pub(super) struct CaseInsensitive;
317
318impl LabelCmp for CaseInsensitive {
319    fn cmp_u8(l: u8, r: u8) -> Ordering {
320        l.to_ascii_lowercase().cmp(&r.to_ascii_lowercase())
321    }
322}
323
324/// Conversion into a Label
325pub trait IntoLabel: Sized {
326    /// Convert this into Label
327    fn into_label(self) -> ProtoResult<Label>;
328}
329
330impl IntoLabel for &Label {
331    fn into_label(self) -> ProtoResult<Label> {
332        Ok(self.clone())
333    }
334}
335
336impl IntoLabel for Label {
337    fn into_label(self) -> ProtoResult<Label> {
338        Ok(self)
339    }
340}
341
342impl IntoLabel for &str {
343    fn into_label(self) -> ProtoResult<Label> {
344        Label::from_utf8(self)
345    }
346}
347
348impl IntoLabel for String {
349    fn into_label(self) -> ProtoResult<Label> {
350        Label::from_utf8(&self)
351    }
352}
353
354impl IntoLabel for &[u8] {
355    fn into_label(self) -> ProtoResult<Label> {
356        Label::from_raw_bytes(self)
357    }
358}
359
360impl IntoLabel for Vec<u8> {
361    fn into_label(self) -> ProtoResult<Label> {
362        Label::from_raw_bytes(&self)
363    }
364}
365
366#[cfg(test)]
367mod tests {
368    #![allow(clippy::dbg_macro, clippy::print_stdout)]
369
370    use alloc::string::ToString;
371    #[cfg(feature = "std")]
372    use std::{eprintln, println};
373
374    use super::*;
375
376    #[test]
377    fn test_encoding() {
378        assert_eq!(
379            Label::from_utf8("abc").unwrap(),
380            Label::from_raw_bytes(b"abc").unwrap()
381        );
382        // case insensitive, this works...
383        assert_eq!(
384            Label::from_utf8("ABC").unwrap(),
385            Label::from_raw_bytes(b"ABC").unwrap()
386        );
387        assert_eq!(
388            Label::from_utf8("🦀").unwrap(),
389            Label::from_raw_bytes(b"xn--zs9h").unwrap()
390        );
391        assert_eq!(
392            Label::from_utf8("rust-🦀-icon").unwrap(),
393            Label::from_raw_bytes(b"xn--rust--icon-9447i").unwrap()
394        );
395        assert_eq!(
396            Label::from_ascii("ben.fry").unwrap(),
397            Label::from_raw_bytes(b"ben.fry").unwrap()
398        );
399        assert_eq!(Label::from_utf8("🦀").unwrap().to_utf8(), "🦀");
400        assert_eq!(Label::from_utf8("🦀").unwrap().to_ascii(), "xn--zs9h");
401    }
402
403    fn assert_panic_label_too_long(error: ProtoResult<Label>, len: usize) {
404        // poor man debug since ProtoResult don't implement Partial Eq due to ssl errors.
405        #[cfg(feature = "std")]
406        eprintln!("{error:?}");
407        assert!(error.is_err());
408        match error.unwrap_err() {
409            ProtoError::Decode(DecodeError::LabelBytesTooLong(n)) if n == len => (),
410            ProtoError::Decode(DecodeError::LabelBytesTooLong(e)) => {
411                panic!(
412                    "LabelTooLongError error don't report expected size {} of the label provided.",
413                    e
414                )
415            }
416            _ => panic!("Should have returned a LabelTooLongError"),
417        }
418    }
419
420    #[test]
421    fn test_label_too_long_ascii_with_utf8() {
422        let label_too_long = "alwaystestingcodewithatoolonglabeltoolongtofitin63bytesisagoodhabit";
423        let error = Label::from_utf8(label_too_long);
424        assert_panic_label_too_long(error, label_too_long.len());
425    }
426
427    #[test]
428    fn test_label_too_long_utf8_puny_emoji() {
429        // too long only puny 65
430        let emoji_case = "💜🦀🏖️🖥️😨🚀✨🤖💚🦾🦿😱😨✉️👺📚💻🗓️🤡🦀😈🚀💀⚡🦄";
431        let error = Label::from_utf8(emoji_case);
432        assert_panic_label_too_long(error, 64);
433    }
434
435    #[test]
436    fn test_label_too_long_utf8_puny_emoji_mixed() {
437        // too long mixed 65
438        // Something international to say
439        // "Hello I like automn coffee 🦀 interesting"
440        let emoji_case = "こんにちは-I-mögen-jesień-café-🦀-intéressant";
441        let error = Label::from_utf8(emoji_case);
442        assert_panic_label_too_long(error, 65);
443    }
444
445    #[test]
446    fn test_label_too_long_utf8_puny_mixed() {
447        // edge case 64 octet long.
448        // xn--testwithalonglabelinutf8tofitin63octetsisagoodhabit-f2106cqb
449        let edge_case = "🦀testwithalonglabelinutf8tofitin63octetsisagoodhabit🦀";
450        let error = Label::from_utf8(edge_case);
451        assert_panic_label_too_long(error, 64);
452    }
453
454    #[test]
455    fn test_label_too_long_raw() {
456        let label_too_long = b"alwaystestingcodewithatoolonglabeltoolongtofitin63bytesisagoodhabit";
457        let error = Label::from_raw_bytes(label_too_long);
458        assert_panic_label_too_long(error, label_too_long.len());
459    }
460
461    #[test]
462    fn test_label_too_long_ascii() {
463        let label_too_long = "alwaystestingcodewithatoolonglabeltoolongtofitin63bytesisagoodhabit";
464        let error = Label::from_ascii(label_too_long);
465        assert_panic_label_too_long(error, label_too_long.len());
466    }
467
468    #[test]
469    fn test_decoding() {
470        assert_eq!(Label::from_raw_bytes(b"abc").unwrap().to_string(), "abc");
471        assert_eq!(
472            Label::from_raw_bytes(b"xn--zs9h").unwrap().to_string(),
473            "🦀"
474        );
475        assert_eq!(
476            Label::from_raw_bytes(b"xn--rust--icon-9447i")
477                .unwrap()
478                .to_string(),
479            "rust-🦀-icon"
480        );
481    }
482
483    #[test]
484    fn test_from_ascii_adversial_utf8() {
485        let expect_err = Label::from_ascii("🦀");
486        assert!(expect_err.is_err());
487    }
488
489    #[test]
490    fn test_to_lowercase() {
491        assert_ne!(Label::from_ascii("ABC").unwrap().to_string(), "abc");
492        assert_ne!(Label::from_ascii("abcDEF").unwrap().to_string(), "abcdef");
493        assert_eq!(
494            Label::from_ascii("ABC").unwrap().to_lowercase().to_string(),
495            "abc"
496        );
497        assert_eq!(
498            Label::from_ascii("abcDEF")
499                .unwrap()
500                .to_lowercase()
501                .to_string(),
502            "abcdef"
503        );
504    }
505
506    #[test]
507    fn test_to_cmp_f() {
508        assert_eq!(
509            Label::from_ascii("ABC")
510                .unwrap()
511                .cmp_with_f::<CaseInsensitive>(&Label::from_ascii("abc").unwrap()),
512            Ordering::Equal
513        );
514        assert_eq!(
515            Label::from_ascii("abcDEF")
516                .unwrap()
517                .cmp_with_f::<CaseInsensitive>(&Label::from_ascii("abcdef").unwrap()),
518            Ordering::Equal
519        );
520        assert_eq!(
521            Label::from_ascii("ABC")
522                .unwrap()
523                .cmp_with_f::<CaseSensitive>(&Label::from_ascii("abc").unwrap()),
524            Ordering::Less
525        );
526        assert_eq!(
527            Label::from_ascii("abcDEF")
528                .unwrap()
529                .cmp_with_f::<CaseSensitive>(&Label::from_ascii("abcdef").unwrap()),
530            Ordering::Less
531        );
532    }
533
534    #[test]
535    fn test_partial_cmp() {
536        let comparisons: Vec<(Label, Label)> = vec![
537            (
538                Label::from_raw_bytes(b"yljkjljk").unwrap(),
539                Label::from_raw_bytes(b"Z").unwrap(),
540            ),
541            (
542                Label::from_raw_bytes(b"Z").unwrap(),
543                Label::from_raw_bytes(b"zABC").unwrap(),
544            ),
545            (
546                Label::from_raw_bytes(&[1]).unwrap(),
547                Label::from_raw_bytes(b"*").unwrap(),
548            ),
549            (
550                Label::from_raw_bytes(b"*").unwrap(),
551                Label::from_raw_bytes(&[200]).unwrap(),
552            ),
553        ];
554
555        for (left, right) in comparisons {
556            #[cfg(feature = "std")]
557            println!("left: {left}, right: {right}");
558            assert_eq!(left.cmp(&right), Ordering::Less);
559        }
560    }
561
562    #[test]
563    fn test_is_wildcard() {
564        assert!(Label::from_raw_bytes(b"*").unwrap().is_wildcard());
565        assert!(Label::from_ascii("*").unwrap().is_wildcard());
566        assert!(Label::from_utf8("*").unwrap().is_wildcard());
567        assert!(!Label::from_raw_bytes(b"abc").unwrap().is_wildcard());
568    }
569
570    #[test]
571    fn test_ascii_escape() {
572        assert_eq!(
573            Label::from_raw_bytes(&[0o200]).unwrap().to_string(),
574            "\\200"
575        );
576        assert_eq!(
577            Label::from_raw_bytes(&[0o001]).unwrap().to_string(),
578            "\\001"
579        );
580        assert_eq!(Label::from_ascii(".").unwrap().to_ascii(), "\\.");
581        assert_eq!(
582            Label::from_ascii("ben.fry").unwrap().to_string(),
583            "ben\\.fry"
584        );
585        assert_eq!(Label::from_raw_bytes(&[0o200]).unwrap().to_ascii(), "\\200");
586    }
587}