hickory_proto/serialize/txt/zone.rs
1// Copyright 2015-2023 Benjamin Fry <benjaminfry@me.com>
2//
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// https://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// https://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8use alloc::{
9 borrow::Cow,
10 collections::btree_map::{BTreeMap, Entry},
11 string::String,
12 vec::Vec,
13};
14use core::{mem, str::FromStr};
15use std::{
16 fs,
17 path::{Path, PathBuf},
18};
19
20use crate::{
21 rr::{DNSClass, LowerName, Name, RData, Record, RecordSet, RecordType, RrKey},
22 serialize::txt::{
23 ParseError, ParseResult, parse_ttl,
24 zone_lex::{Lexer, Token},
25 },
26};
27
28/// ```text
29/// 5. ZONE FILES
30///
31/// Zone files are text files that contain RRs in text form. Since the
32/// contents of a zone can be expressed in the form of a list of RRs a
33/// Zone File is most often used to define a zone, though it can be used
34/// to list a cache's contents. Hence, this section first discusses the
35/// format of RRs in a Zone File, and then the special considerations when
36/// a Zone File is used to create a zone in some name server.
37///
38/// 5.1. Format
39///
40/// The format of these files is a sequence of entries. Entries are
41/// predominantly line-oriented, though parentheses can be used to continue
42/// a list of items across a line boundary, and text literals can contain
43/// CRLF within the text. Any combination of tabs and spaces act as a
44/// delimiter between the separate items that make up an entry. The end of
45/// any line in the Zone File can end with a comment. The comment starts
46/// with a ";" (semicolon).
47///
48/// The following entries are defined:
49///
50/// <blank>[<comment>]
51///
52/// $ORIGIN <domain-name> [<comment>]
53///
54/// $INCLUDE <file-name> [<domain-name>] [<comment>]
55///
56/// <domain-name><rr> [<comment>]
57///
58/// <blank><rr> [<comment>]
59///
60/// Blank lines, with or without comments, are allowed anywhere in the file.
61///
62/// Two control entries are defined: $ORIGIN and $INCLUDE. $ORIGIN is
63/// followed by a domain name, and resets the current origin for relative
64/// domain names to the stated name. $INCLUDE inserts the named file into
65/// the current file, and may optionally specify a domain name that sets the
66/// relative domain name origin for the included file. $INCLUDE may also
67/// have a comment. Note that a $INCLUDE entry never changes the relative
68/// origin of the parent file, regardless of changes to the relative origin
69/// made within the included file.
70///
71/// The last two forms represent RRs. If an entry for an RR begins with a
72/// blank, then the RR is assumed to be owned by the last stated owner. If
73/// an RR entry begins with a <domain-name>, then the owner name is reset.
74///
75/// <rr> contents take one of the following forms:
76///
77/// [<TTL>] [<class>] <type> <RDATA>
78///
79/// [<class>] [<TTL>] <type> <RDATA>
80///
81/// The RR begins with optional TTL and class fields, followed by a type and
82/// RDATA field appropriate to the type and class. Class and type use the
83/// standard mnemonics, TTL is a decimal integer. Omitted class and TTL
84/// values are default to the last explicitly stated values. Since type and
85/// class mnemonics are disjoint, the parse is unique. (Note that this
86/// order is different from the order used in examples and the order used in
87/// the actual RRs; the given order allows easier parsing and defaulting.)
88///
89/// <domain-name>s make up a large share of the data in the Zone File.
90/// The labels in the domain name are expressed as character strings and
91/// separated by dots. Quoting conventions allow arbitrary characters to be
92/// stored in domain names. Domain names that end in a dot are called
93/// absolute, and are taken as complete. Domain names which do not end in a
94/// dot are called relative; the actual domain name is the concatenation of
95/// the relative part with an origin specified in a $ORIGIN, $INCLUDE, or as
96/// an argument to the Zone File loading routine. A relative name is an
97/// error when no origin is available.
98///
99/// <character-string> is expressed in one or two ways: as a contiguous set
100/// of characters without interior spaces, or as a string beginning with a "
101/// and ending with a ". Inside a " delimited string any character can
102/// occur, except for a " itself, which must be quoted using \ (back slash).
103///
104/// Because these files are text files several special encodings are
105/// necessary to allow arbitrary data to be loaded. In particular:
106///
107/// of the root.
108///
109/// @ A free standing @ is used to denote the current origin.
110///
111/// \X where X is any character other than a digit (0-9), is
112/// used to quote that character so that its special meaning
113/// does not apply. For example, "\." can be used to place
114/// a dot character in a label.
115///
116/// \DDD where each D is a digit is the octet corresponding to
117/// the decimal number described by DDD. The resulting
118/// octet is assumed to be text and is not checked for
119/// special meaning.
120///
121/// ( ) Parentheses are used to group data that crosses a line
122/// boundary. In effect, line terminations are not
123/// recognized within parentheses.
124///
125/// ; Semicolon is used to start a comment; the remainder of
126/// the line is ignored.
127/// ```
128pub struct Parser<'a> {
129 lexers: Vec<(Lexer<'a>, Option<PathBuf>)>,
130 origin: Option<Name>,
131}
132
133impl<'a> Parser<'a> {
134 /// Returns a new Zone file parser
135 ///
136 /// The `path` argument's parent directory is used to resolve relative `$INCLUDE` paths.
137 /// Relative `$INCLUDE` paths will yield an error if `path` is `None`.
138 pub fn new(
139 input: impl Into<Cow<'a, str>>,
140 path: Option<PathBuf>,
141 mut origin: Option<Name>,
142 ) -> Self {
143 if let Some(origin) = &mut origin {
144 origin.set_fqdn(true);
145 }
146 Self {
147 lexers: vec![(Lexer::new(input), path)],
148 origin,
149 }
150 }
151
152 /// Parse a file from the Lexer
153 ///
154 /// # Return
155 ///
156 /// A pair of the Zone origin name and a map of all Keys to RecordSets
157 pub fn parse(mut self) -> ParseResult<(Name, BTreeMap<RrKey, RecordSet>)> {
158 let mut cx = Context::new(self.origin);
159 let mut state = State::StartLine;
160 let mut stack = self.lexers.len();
161
162 'outer: while let Some((lexer, path)) = self.lexers.last_mut() {
163 while let Some(t) = lexer.next_token()? {
164 state = match state {
165 State::StartLine => {
166 // current_name is not reset on the next line b/c it might be needed from the previous
167 cx.rtype = None;
168
169 match t {
170 // if Dollar, then $INCLUDE or $ORIGIN
171 Token::Include => State::Include(None),
172 Token::Origin => State::Origin,
173 Token::Ttl => State::Ttl,
174
175 // if CharData, then Name then ttl_class_type
176 Token::CharData(data) => {
177 cx.current_name = Some(Name::parse(&data, cx.origin.as_ref())?);
178 State::TtlClassType
179 }
180
181 // @ is a placeholder for specifying the current origin
182 Token::At => {
183 cx.current_name.clone_from(&cx.origin); // TODO a COW or RC would reduce copies...
184 State::TtlClassType
185 }
186
187 // if blank, then nothing or ttl_class_type
188 Token::Blank => State::TtlClassType,
189 Token::EOL => State::StartLine, // probably a comment
190 _ => return Err(ParseError::UnexpectedToken(t)),
191 }
192 }
193 State::Ttl => match t {
194 Token::CharData(data) => {
195 cx.ttl.default = Some(parse_ttl(&data)?);
196 State::StartLine
197 }
198 _ => return Err(ParseError::UnexpectedToken(t)),
199 },
200 State::Origin => {
201 match t {
202 Token::CharData(data) => {
203 // TODO an origin was specified, should this be legal? definitely confusing...
204 cx.origin = Some(Name::parse(&data, None)?);
205 State::StartLine
206 }
207 _ => return Err(ParseError::UnexpectedToken(t)),
208 }
209 }
210 State::Include(include_path) => match (t, include_path) {
211 (Token::CharData(data), None) => State::Include(Some(data)),
212 (Token::EOL, Some(include_path)) => {
213 // RFC1035 (section 5) does not specify how filename for $INCLUDE
214 // should be resolved into file path. The underlying code implements the
215 // following:
216 // * if the path is absolute (relies on Path::is_absolute), it uses normalized path
217 // * otherwise, it joins the path with parent root of the current file
218 //
219 // TODO: Inlining files specified using non-relative path might potentially introduce
220 // security issue in some cases (e.g. when working with zone files from untrusted sources)
221 // and should probably be configurable by user.
222
223 if stack > MAX_INCLUDE_LEVEL {
224 return Err(ParseError::Message(
225 "Max depth level for nested $INCLUDE is reached",
226 ));
227 }
228
229 let include = Path::new(&include_path);
230 let include = match (include.is_absolute(), path) {
231 (true, _) => include.to_path_buf(),
232 (false, Some(path)) => path
233 .parent()
234 .expect("file has to have parent folder")
235 .join(include),
236 (false, None) => {
237 return Err(ParseError::Message(
238 "Relative $INCLUDE is not supported",
239 ));
240 }
241 };
242
243 let input = fs::read_to_string(&include)?;
244 let lexer = Lexer::new(input);
245 self.lexers.push((lexer, Some(include)));
246 stack += 1;
247 state = State::StartLine;
248 continue 'outer;
249 }
250 (Token::CharData(_), Some(_)) => {
251 return Err(ParseError::Message(
252 "Domain name for $INCLUDE is not supported",
253 ));
254 }
255 (t, _) => {
256 return Err(ParseError::UnexpectedToken(t));
257 }
258 },
259 State::TtlClassType => {
260 match t {
261 // if number, TTL
262 // Token::Number(num) => ttl = Some(*num),
263 // One of Class or Type (these cannot be overlapping!)
264 Token::CharData(mut data) => {
265 // if it's a number it's a ttl
266 let result: ParseResult<u32> = parse_ttl(&data);
267 if let Ok(ttl) = result {
268 cx.ttl.this = Some(ttl);
269 State::TtlClassType // hm, should this go to just ClassType?
270 } else {
271 // if can parse DNSClass, then class
272 data.make_ascii_uppercase();
273 let result = DNSClass::from_str(&data);
274 if let Ok(parsed) = result {
275 cx.class = parsed;
276 State::TtlClassType
277 } else {
278 // if can parse RecordType, then RecordType
279 cx.rtype = Some(RecordType::from_str(&data)?);
280 State::Record(vec![])
281 }
282 }
283 }
284 // could be nothing if started with blank and is a comment, i.e. EOL
285 Token::EOL => {
286 State::StartLine // next line
287 }
288 _ => return Err(ParseError::UnexpectedToken(t)),
289 }
290 }
291 State::Record(record_parts) => {
292 // b/c of ownership rules, perhaps, just collect all the RData components as a list of
293 // tokens to pass into the processor
294 match t {
295 Token::EOL => {
296 cx.insert(record_parts)?;
297 State::StartLine
298 }
299 Token::CharData(part) => {
300 let mut record_parts = record_parts;
301 record_parts.push(part);
302 State::Record(record_parts)
303 }
304 // TODO: we should not tokenize the list...
305 Token::List(list) => {
306 let mut record_parts = record_parts;
307 record_parts.extend(list);
308 State::Record(record_parts)
309 }
310 _ => return Err(ParseError::UnexpectedToken(t)),
311 }
312 }
313 };
314 }
315
316 // Extra flush at the end for the case of missing endline
317 if let State::Record(record_parts) = mem::replace(&mut state, State::StartLine) {
318 cx.insert(record_parts)?;
319 }
320
321 stack -= 1;
322 self.lexers.pop();
323 }
324
325 //
326 // build the Authority and return.
327 let origin = cx
328 .origin
329 .ok_or(ParseError::Message("$ORIGIN was not specified"))?;
330 Ok((origin, cx.records))
331 }
332}
333
334#[derive(Default)]
335struct Ttl {
336 default: Option<u32>,
337 last: Option<u32>,
338 this: Option<u32>,
339}
340
341impl Ttl {
342 fn take(&mut self) -> Option<u32> {
343 if let Some(ttl) = self.this.take() {
344 self.last.replace(ttl);
345 return Some(ttl);
346 }
347 if let Some(ttl) = self.default {
348 return Some(ttl);
349 }
350 if let Some(ttl) = self.last {
351 return Some(ttl);
352 }
353
354 None
355 }
356}
357
358struct Context {
359 origin: Option<Name>,
360 records: BTreeMap<RrKey, RecordSet>,
361 class: DNSClass,
362 current_name: Option<Name>,
363 rtype: Option<RecordType>,
364 ttl: Ttl,
365}
366
367impl Context {
368 fn new(origin: Option<Name>) -> Self {
369 Self {
370 origin,
371 records: BTreeMap::default(),
372 class: DNSClass::IN,
373 current_name: None,
374 rtype: None,
375 ttl: Ttl::default(),
376 }
377 }
378
379 fn insert(&mut self, record_parts: Vec<String>) -> ParseResult<()> {
380 // call out to parsers for difference record types
381 // all tokens as part of the Record should be chardata...
382 let rtype = self
383 .rtype
384 .ok_or_else(|| ParseError::from("record type not specified"))?;
385
386 let rdata = RData::from_tokens(
387 rtype,
388 record_parts.iter().map(AsRef::as_ref),
389 self.origin.as_ref(),
390 )?;
391
392 // verify that we have everything we need for the record
393 // TODO COW or RC would reduce mem usage, perhaps Name should have an intern()...
394 // might want to wait until RC.weak() stabilizes, as that would be needed for global
395 // memory where you want
396 let mut name = self
397 .current_name
398 .clone()
399 .ok_or_else(|| ParseError::from("record name not specified"))?;
400
401 let ttl = self
402 .ttl
403 .take()
404 .ok_or_else(|| ParseError::from("record ttl not specified"))?;
405
406 // TODO: validate record, e.g. the name of SRV record allows _ but others do not.
407
408 // move the rdata into record...
409 name.set_fqdn(true);
410 let mut record = Record::from_rdata(name, ttl, rdata);
411 record.dns_class = self.class;
412
413 // add to the map
414 let entry = self.records.entry(RrKey::new(
415 LowerName::new(&record.name),
416 record.record_type(),
417 ));
418 match (rtype, entry) {
419 (RecordType::SOA, Entry::Occupied(_)) => {
420 return Err(ParseError::from("SOA is already specified"));
421 }
422 (_, Entry::Vacant(entry)) => {
423 entry.insert(RecordSet::from(record));
424 }
425 (_, Entry::Occupied(mut entry)) => {
426 entry.get_mut().insert(record, 0);
427 }
428 };
429
430 Ok(())
431 }
432}
433
434enum State {
435 StartLine, // start of line, @, $<WORD>, Name, Blank
436 TtlClassType, // [<TTL>] [<class>] <type>,
437 Ttl, // $TTL <time>
438 Record(Vec<String>),
439 Include(Option<String>), // $INCLUDE <filename>
440 Origin,
441}
442
443/// Max traversal depth for $INCLUDE files
444const MAX_INCLUDE_LEVEL: usize = 256;
445
446#[cfg(test)]
447mod tests {
448 use alloc::string::ToString;
449
450 use super::*;
451
452 #[test]
453 #[allow(clippy::uninlined_format_args)]
454 fn test_zone_parse() {
455 let domain = Name::from_str("parameter.origin.org.").unwrap();
456
457 let zone_data = r#"$ORIGIN parsed.zone.origin.org.
458 faulty-record-type 60 IN A 1.2.3.4
459"#;
460
461 let result = Parser::new(zone_data, None, Some(domain)).parse();
462 assert!(
463 result.is_err()
464 & result
465 .as_ref()
466 .unwrap_err()
467 .to_string()
468 .contains("FAULTY-RECORD-TYPE"),
469 "unexpected success: {:#?}",
470 result
471 );
472 }
473}