Skip to main content

hickory_proto/serialize/txt/
zone.rs

1// Copyright 2015-2023 Benjamin Fry <benjaminfry@me.com>
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// https://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// https://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use alloc::{
9    borrow::Cow,
10    collections::btree_map::{BTreeMap, Entry},
11    string::String,
12    vec::Vec,
13};
14use core::{mem, str::FromStr};
15use std::{
16    fs,
17    path::{Path, PathBuf},
18};
19
20use crate::{
21    rr::{DNSClass, LowerName, Name, RData, Record, RecordSet, RecordType, RrKey},
22    serialize::txt::{
23        ParseError, ParseResult, parse_ttl,
24        zone_lex::{Lexer, Token},
25    },
26};
27
28/// ```text
29/// 5. ZONE FILES
30///
31/// Zone files are text files that contain RRs in text form.  Since the
32/// contents of a zone can be expressed in the form of a list of RRs a
33/// Zone File is most often used to define a zone, though it can be used
34/// to list a cache's contents.  Hence, this section first discusses the
35/// format of RRs in a Zone File, and then the special considerations when
36/// a Zone File is used to create a zone in some name server.
37///
38/// 5.1. Format
39///
40/// The format of these files is a sequence of entries.  Entries are
41/// predominantly line-oriented, though parentheses can be used to continue
42/// a list of items across a line boundary, and text literals can contain
43/// CRLF within the text.  Any combination of tabs and spaces act as a
44/// delimiter between the separate items that make up an entry.  The end of
45/// any line in the Zone File can end with a comment.  The comment starts
46/// with a ";" (semicolon).
47///
48/// The following entries are defined:
49///
50///     <blank>[<comment>]
51///
52///     $ORIGIN <domain-name> [<comment>]
53///
54///     $INCLUDE <file-name> [<domain-name>] [<comment>]
55///
56///     <domain-name><rr> [<comment>]
57///
58///     <blank><rr> [<comment>]
59///
60/// Blank lines, with or without comments, are allowed anywhere in the file.
61///
62/// Two control entries are defined: $ORIGIN and $INCLUDE.  $ORIGIN is
63/// followed by a domain name, and resets the current origin for relative
64/// domain names to the stated name.  $INCLUDE inserts the named file into
65/// the current file, and may optionally specify a domain name that sets the
66/// relative domain name origin for the included file.  $INCLUDE may also
67/// have a comment.  Note that a $INCLUDE entry never changes the relative
68/// origin of the parent file, regardless of changes to the relative origin
69/// made within the included file.
70///
71/// The last two forms represent RRs.  If an entry for an RR begins with a
72/// blank, then the RR is assumed to be owned by the last stated owner.  If
73/// an RR entry begins with a <domain-name>, then the owner name is reset.
74///
75/// <rr> contents take one of the following forms:
76///
77///     [<TTL>] [<class>] <type> <RDATA>
78///
79///     [<class>] [<TTL>] <type> <RDATA>
80///
81/// The RR begins with optional TTL and class fields, followed by a type and
82/// RDATA field appropriate to the type and class.  Class and type use the
83/// standard mnemonics, TTL is a decimal integer.  Omitted class and TTL
84/// values are default to the last explicitly stated values.  Since type and
85/// class mnemonics are disjoint, the parse is unique.  (Note that this
86/// order is different from the order used in examples and the order used in
87/// the actual RRs; the given order allows easier parsing and defaulting.)
88///
89/// <domain-name>s make up a large share of the data in the Zone File.
90/// The labels in the domain name are expressed as character strings and
91/// separated by dots.  Quoting conventions allow arbitrary characters to be
92/// stored in domain names.  Domain names that end in a dot are called
93/// absolute, and are taken as complete.  Domain names which do not end in a
94/// dot are called relative; the actual domain name is the concatenation of
95/// the relative part with an origin specified in a $ORIGIN, $INCLUDE, or as
96/// an argument to the Zone File loading routine.  A relative name is an
97/// error when no origin is available.
98///
99/// <character-string> is expressed in one or two ways: as a contiguous set
100/// of characters without interior spaces, or as a string beginning with a "
101/// and ending with a ".  Inside a " delimited string any character can
102/// occur, except for a " itself, which must be quoted using \ (back slash).
103///
104/// Because these files are text files several special encodings are
105/// necessary to allow arbitrary data to be loaded.  In particular:
106///
107///                 of the root.
108///
109/// @               A free standing @ is used to denote the current origin.
110///
111/// \X              where X is any character other than a digit (0-9), is
112///                 used to quote that character so that its special meaning
113///                 does not apply.  For example, "\." can be used to place
114///                 a dot character in a label.
115///
116/// \DDD            where each D is a digit is the octet corresponding to
117///                 the decimal number described by DDD.  The resulting
118///                 octet is assumed to be text and is not checked for
119///                 special meaning.
120///
121/// ( )             Parentheses are used to group data that crosses a line
122///                 boundary.  In effect, line terminations are not
123///                 recognized within parentheses.
124///
125/// ;               Semicolon is used to start a comment; the remainder of
126///                 the line is ignored.
127/// ```
128pub struct Parser<'a> {
129    lexers: Vec<(Lexer<'a>, Option<PathBuf>)>,
130    origin: Option<Name>,
131}
132
133impl<'a> Parser<'a> {
134    /// Returns a new Zone file parser
135    ///
136    /// The `path` argument's parent directory is used to resolve relative `$INCLUDE` paths.
137    /// Relative `$INCLUDE` paths will yield an error if `path` is `None`.
138    pub fn new(
139        input: impl Into<Cow<'a, str>>,
140        path: Option<PathBuf>,
141        mut origin: Option<Name>,
142    ) -> Self {
143        if let Some(origin) = &mut origin {
144            origin.set_fqdn(true);
145        }
146        Self {
147            lexers: vec![(Lexer::new(input), path)],
148            origin,
149        }
150    }
151
152    /// Parse a file from the Lexer
153    ///
154    /// # Return
155    ///
156    /// A pair of the Zone origin name and a map of all Keys to RecordSets
157    pub fn parse(mut self) -> ParseResult<(Name, BTreeMap<RrKey, RecordSet>)> {
158        let mut cx = Context::new(self.origin);
159        let mut state = State::StartLine;
160        let mut stack = self.lexers.len();
161
162        'outer: while let Some((lexer, path)) = self.lexers.last_mut() {
163            while let Some(t) = lexer.next_token()? {
164                state = match state {
165                    State::StartLine => {
166                        // current_name is not reset on the next line b/c it might be needed from the previous
167                        cx.rtype = None;
168
169                        match t {
170                            // if Dollar, then $INCLUDE or $ORIGIN
171                            Token::Include => State::Include(None),
172                            Token::Origin => State::Origin,
173                            Token::Ttl => State::Ttl,
174
175                            // if CharData, then Name then ttl_class_type
176                            Token::CharData(data) => {
177                                cx.current_name = Some(Name::parse(&data, cx.origin.as_ref())?);
178                                State::TtlClassType
179                            }
180
181                            // @ is a placeholder for specifying the current origin
182                            Token::At => {
183                                cx.current_name.clone_from(&cx.origin); // TODO a COW or RC would reduce copies...
184                                State::TtlClassType
185                            }
186
187                            // if blank, then nothing or ttl_class_type
188                            Token::Blank => State::TtlClassType,
189                            Token::EOL => State::StartLine, // probably a comment
190                            _ => return Err(ParseError::UnexpectedToken(t)),
191                        }
192                    }
193                    State::Ttl => match t {
194                        Token::CharData(data) => {
195                            cx.ttl.default = Some(parse_ttl(&data)?);
196                            State::StartLine
197                        }
198                        _ => return Err(ParseError::UnexpectedToken(t)),
199                    },
200                    State::Origin => {
201                        match t {
202                            Token::CharData(data) => {
203                                // TODO an origin was specified, should this be legal? definitely confusing...
204                                cx.origin = Some(Name::parse(&data, None)?);
205                                State::StartLine
206                            }
207                            _ => return Err(ParseError::UnexpectedToken(t)),
208                        }
209                    }
210                    State::Include(include_path) => match (t, include_path) {
211                        (Token::CharData(data), None) => State::Include(Some(data)),
212                        (Token::EOL, Some(include_path)) => {
213                            // RFC1035 (section 5) does not specify how filename for $INCLUDE
214                            // should be resolved into file path. The underlying code implements the
215                            // following:
216                            // * if the path is absolute (relies on Path::is_absolute), it uses normalized path
217                            // * otherwise, it joins the path with parent root of the current file
218                            //
219                            // TODO: Inlining files specified using non-relative path might potentially introduce
220                            // security issue in some cases (e.g. when working with zone files from untrusted sources)
221                            // and should probably be configurable by user.
222
223                            if stack > MAX_INCLUDE_LEVEL {
224                                return Err(ParseError::Message(
225                                    "Max depth level for nested $INCLUDE is reached",
226                                ));
227                            }
228
229                            let include = Path::new(&include_path);
230                            let include = match (include.is_absolute(), path) {
231                                (true, _) => include.to_path_buf(),
232                                (false, Some(path)) => path
233                                    .parent()
234                                    .expect("file has to have parent folder")
235                                    .join(include),
236                                (false, None) => {
237                                    return Err(ParseError::Message(
238                                        "Relative $INCLUDE is not supported",
239                                    ));
240                                }
241                            };
242
243                            let input = fs::read_to_string(&include)?;
244                            let lexer = Lexer::new(input);
245                            self.lexers.push((lexer, Some(include)));
246                            stack += 1;
247                            state = State::StartLine;
248                            continue 'outer;
249                        }
250                        (Token::CharData(_), Some(_)) => {
251                            return Err(ParseError::Message(
252                                "Domain name for $INCLUDE is not supported",
253                            ));
254                        }
255                        (t, _) => {
256                            return Err(ParseError::UnexpectedToken(t));
257                        }
258                    },
259                    State::TtlClassType => {
260                        match t {
261                            // if number, TTL
262                            // Token::Number(num) => ttl = Some(*num),
263                            // One of Class or Type (these cannot be overlapping!)
264                            Token::CharData(mut data) => {
265                                // if it's a number it's a ttl
266                                let result: ParseResult<u32> = parse_ttl(&data);
267                                if let Ok(ttl) = result {
268                                    cx.ttl.this = Some(ttl);
269                                    State::TtlClassType // hm, should this go to just ClassType?
270                                } else {
271                                    // if can parse DNSClass, then class
272                                    data.make_ascii_uppercase();
273                                    let result = DNSClass::from_str(&data);
274                                    if let Ok(parsed) = result {
275                                        cx.class = parsed;
276                                        State::TtlClassType
277                                    } else {
278                                        // if can parse RecordType, then RecordType
279                                        cx.rtype = Some(RecordType::from_str(&data)?);
280                                        State::Record(vec![])
281                                    }
282                                }
283                            }
284                            // could be nothing if started with blank and is a comment, i.e. EOL
285                            Token::EOL => {
286                                State::StartLine // next line
287                            }
288                            _ => return Err(ParseError::UnexpectedToken(t)),
289                        }
290                    }
291                    State::Record(record_parts) => {
292                        // b/c of ownership rules, perhaps, just collect all the RData components as a list of
293                        //  tokens to pass into the processor
294                        match t {
295                            Token::EOL => {
296                                cx.insert(record_parts)?;
297                                State::StartLine
298                            }
299                            Token::CharData(part) => {
300                                let mut record_parts = record_parts;
301                                record_parts.push(part);
302                                State::Record(record_parts)
303                            }
304                            // TODO: we should not tokenize the list...
305                            Token::List(list) => {
306                                let mut record_parts = record_parts;
307                                record_parts.extend(list);
308                                State::Record(record_parts)
309                            }
310                            _ => return Err(ParseError::UnexpectedToken(t)),
311                        }
312                    }
313                };
314            }
315
316            // Extra flush at the end for the case of missing endline
317            if let State::Record(record_parts) = mem::replace(&mut state, State::StartLine) {
318                cx.insert(record_parts)?;
319            }
320
321            stack -= 1;
322            self.lexers.pop();
323        }
324
325        //
326        // build the Authority and return.
327        let origin = cx
328            .origin
329            .ok_or(ParseError::Message("$ORIGIN was not specified"))?;
330        Ok((origin, cx.records))
331    }
332}
333
334#[derive(Default)]
335struct Ttl {
336    default: Option<u32>,
337    last: Option<u32>,
338    this: Option<u32>,
339}
340
341impl Ttl {
342    fn take(&mut self) -> Option<u32> {
343        if let Some(ttl) = self.this.take() {
344            self.last.replace(ttl);
345            return Some(ttl);
346        }
347        if let Some(ttl) = self.default {
348            return Some(ttl);
349        }
350        if let Some(ttl) = self.last {
351            return Some(ttl);
352        }
353
354        None
355    }
356}
357
358struct Context {
359    origin: Option<Name>,
360    records: BTreeMap<RrKey, RecordSet>,
361    class: DNSClass,
362    current_name: Option<Name>,
363    rtype: Option<RecordType>,
364    ttl: Ttl,
365}
366
367impl Context {
368    fn new(origin: Option<Name>) -> Self {
369        Self {
370            origin,
371            records: BTreeMap::default(),
372            class: DNSClass::IN,
373            current_name: None,
374            rtype: None,
375            ttl: Ttl::default(),
376        }
377    }
378
379    fn insert(&mut self, record_parts: Vec<String>) -> ParseResult<()> {
380        // call out to parsers for difference record types
381        // all tokens as part of the Record should be chardata...
382        let rtype = self
383            .rtype
384            .ok_or_else(|| ParseError::from("record type not specified"))?;
385
386        let rdata = RData::from_tokens(
387            rtype,
388            record_parts.iter().map(AsRef::as_ref),
389            self.origin.as_ref(),
390        )?;
391
392        // verify that we have everything we need for the record
393        // TODO COW or RC would reduce mem usage, perhaps Name should have an intern()...
394        //  might want to wait until RC.weak() stabilizes, as that would be needed for global
395        //  memory where you want
396        let mut name = self
397            .current_name
398            .clone()
399            .ok_or_else(|| ParseError::from("record name not specified"))?;
400
401        let ttl = self
402            .ttl
403            .take()
404            .ok_or_else(|| ParseError::from("record ttl not specified"))?;
405
406        // TODO: validate record, e.g. the name of SRV record allows _ but others do not.
407
408        // move the rdata into record...
409        name.set_fqdn(true);
410        let mut record = Record::from_rdata(name, ttl, rdata);
411        record.dns_class = self.class;
412
413        // add to the map
414        let entry = self.records.entry(RrKey::new(
415            LowerName::new(&record.name),
416            record.record_type(),
417        ));
418        match (rtype, entry) {
419            (RecordType::SOA, Entry::Occupied(_)) => {
420                return Err(ParseError::from("SOA is already specified"));
421            }
422            (_, Entry::Vacant(entry)) => {
423                entry.insert(RecordSet::from(record));
424            }
425            (_, Entry::Occupied(mut entry)) => {
426                entry.get_mut().insert(record, 0);
427            }
428        };
429
430        Ok(())
431    }
432}
433
434enum State {
435    StartLine,    // start of line, @, $<WORD>, Name, Blank
436    TtlClassType, // [<TTL>] [<class>] <type>,
437    Ttl,          // $TTL <time>
438    Record(Vec<String>),
439    Include(Option<String>), // $INCLUDE <filename>
440    Origin,
441}
442
443/// Max traversal depth for $INCLUDE files
444const MAX_INCLUDE_LEVEL: usize = 256;
445
446#[cfg(test)]
447mod tests {
448    use alloc::string::ToString;
449
450    use super::*;
451
452    #[test]
453    #[allow(clippy::uninlined_format_args)]
454    fn test_zone_parse() {
455        let domain = Name::from_str("parameter.origin.org.").unwrap();
456
457        let zone_data = r#"$ORIGIN parsed.zone.origin.org.
458 faulty-record-type 60 IN A 1.2.3.4
459"#;
460
461        let result = Parser::new(zone_data, None, Some(domain)).parse();
462        assert!(
463            result.is_err()
464                & result
465                    .as_ref()
466                    .unwrap_err()
467                    .to_string()
468                    .contains("FAULTY-RECORD-TYPE"),
469            "unexpected success: {:#?}",
470            result
471        );
472    }
473}