Skip to main content

tor_geoip/
dense_range_map.rs

1//! Helper type: a frozen RangeMap where most ranges do not have gaps between them.
2//!
3//! Ordinary RangeMaps store a start and end for each range.  But if the
4//! there are not gaps between a pair of ranges, then the range end is redundant.
5//!
6//! This trick lets us save about 40%-50% of the total database size, for
7//! a savings of around 6 MiB.  (Data checked as of April 2026)
8
9use std::borrow::Cow;
10use std::ops::RangeInclusive;
11
12/// An object that has a single next element.
13pub(crate) trait Successor: Sized {
14    /// Return the next element after this one.
15    ///
16    /// Returns None if this is the maximum value
17    fn next(&self) -> Option<Self>;
18}
19
20impl Successor for u32 {
21    fn next(&self) -> Option<Self> {
22        self.checked_add(1)
23    }
24}
25
26impl Successor for u128 {
27    fn next(&self) -> Option<Self> {
28        self.checked_add(1)
29    }
30}
31
32/// An immutable map from ranges to `Option<V1>`, `Option<V2>`-pairs.
33///
34/// This type is optimized for reasonable O(lg N) performance,
35/// and for space efficiency in the case where:
36/// - `Option<V>` is the same size as V, or at least not much larger.
37/// - most or all ranges have no gaps between them.
38/// - we might not want to record any values for V2 at all.
39/// - we are willing to treat "no entry" and "maps to None" as equivalent.
40///
41/// That is to say, we get our space efficiency wins in the case where,
42/// if some range (K..=V) in the map,
43/// (V+1..=K2) is also likely to be in the map.
44///
45/// (This is true for around like 99% of our IPv4 ranges and around 91% of our
46/// IPv6 ranges.)
47///
48/// This type is crate-internal because its functionality is limited,
49/// and because we may want to switch to some other approach to geoip in the future.
50///
51/// ## Overview
52///
53/// We consider a map of disjoint ranges `(S..=E) => V`
54/// as a _dense_ map from `(S'..=E') => Option<V1, V2>`,
55/// such that there is is exactly one `(S'..E')` range covering every value from
56/// `min(S)` through `K::MAX` inclusive.
57///
58/// Because this map is dense, we can encode these ranges as a sorted list of S',
59/// and then use a binary search to find which `Option<V1, V2>` corresponds
60/// to any given value.
61///
62/// ## Invariants
63///
64/// - `starts` is sorted and contains no duplicates.
65/// - `values1.len() == starts.len()`
66/// - If `values2` is present, `values2.len() == starts.len()`
67///
68/// ## Semantics:
69///
70/// This table maps keys to value as follows:
71///
72/// If `starts` is empty, then every key maps to None.
73///
74/// Otherwise:
75///    - Every key such that `K::min <= key < starts[0]` maps to None.
76///    - Every key such that `starts[idx] <= key < starts[idx+1]` maps to
77///      `values[idx]`, which may be Some or None.
78///    - Every key such that `key >= starts.last()` maps to values.last(),
79///      which may be Some or None.
80///
81/// If `values2` is present, then keys map to the same indices in `values2`
82/// as they do in `values1`.
83#[derive(Clone, Debug, Eq, PartialEq)]
84pub(crate) struct DenseRangeMap<K, V1, V2>
85where
86    K: Clone + 'static,
87    V1: Clone + 'static,
88    V2: Clone + 'static,
89{
90    /// A list of the starting points of each range or gap.
91    starts: Cow<'static, [K]>,
92    /// A list of values.
93    ///
94    /// If `starts[i]` is the start of a range, then `values[i]` is Some(v)
95    /// where v is the value of that range for every
96    values1: Cow<'static, [Option<V1>]>,
97
98    /// An optional list of secondary values.
99    ///
100    values2: Option<Cow<'static, [Option<V2>]>>,
101}
102
103impl<K, V1, V2> Default for DenseRangeMap<K, V1, V2>
104where
105    K: Clone + 'static,
106    V1: Clone + 'static,
107    V2: Clone + 'static,
108{
109    fn default() -> Self {
110        Self {
111            starts: Default::default(),
112            values1: Default::default(),
113            values2: None,
114        }
115    }
116}
117
118/// A helper type to create a [`DenseRangeMap`] from a sorted list of disjoint ranges.
119///
120/// ## Invariants
121///
122/// - `starts.len() == values.len()`
123/// - `starts` is sorted and contains no duplicates.
124/// - `starts` is empty if and only if `prev_end` is None.
125///
126/// ## Semantics
127///
128/// If `starts` is empty, nothing has been added to this Builder.
129///
130/// Otherwise:
131///    - Every key such that `K::min <= key < starts[0]` maps to None.
132///    - Every key such that `starts[idx] <= key < starts[idx+1]` maps to
133///      `values[idx]`, which may be Some or None.
134///    - Every key such that `key <= starts.last() <= prev_end` maps to
135///      `values.last()`.
136///    - No mappings have been added for any range S..=E such that
137///      'S > prev_end`.
138struct DenseRangeMapBuilder<K, V1, V2> {
139    /// A list of range starts so far.
140    starts: Vec<K>,
141    /// A list of values so far.
142    values1: Vec<Option<V1>>,
143
144    /// A list of secondary values so far.
145    ///
146    /// None if we're ignoring secondary values.
147    values2: Option<Vec<Option<V2>>>,
148
149    /// The last element of the most recently added range.
150    prev_end: Option<K>,
151}
152
153/// An error that occurred while building a [`DenseRangeMap`]
154#[derive(Debug, Clone, thiserror::Error)]
155pub(crate) enum Error {
156    /// Some range was invalid
157    #[error("Found an entry with an invalid range")]
158    BadEntry,
159
160    /// The entries in the database were not sorted.
161    #[error("Entries were not sorted")]
162    Unsorted,
163}
164
165impl<K: Eq + Ord + Successor, V1, V2> DenseRangeMapBuilder<K, V1, V2>
166where
167    K: Clone + 'static,
168    V1: Clone + 'static,
169    V2: Clone + 'static,
170{
171    /// Construct a new empty builder.
172    fn new() -> Self {
173        Self {
174            starts: Vec::new(),
175            values1: Vec::new(),
176            values2: Some(Vec::new()),
177            prev_end: None,
178        }
179    }
180
181    /// Add a single entry to this builder.
182    fn push(&mut self, start: K, v1: Option<V1>, v2: Option<V2>) {
183        self.starts.push(start);
184        self.values1.push(v1);
185        if let Some(values2) = self.values2.as_mut() {
186            values2.push(v2);
187        }
188    }
189
190    /// Consume this builder and return a DenseRangeMap with the same values.
191    fn build(mut self) -> DenseRangeMap<K, V1, V2> {
192        if let Some(prev_end) = self.prev_end.take()
193            && let Some(next_range_start) = prev_end.next()
194        {
195            // There is empty space after the last range, so we need to
196            // represent that with a gap entry.
197            self.push(next_range_start, None, None);
198        }
199        // See if we can reclaim any space.
200        self.starts.shrink_to_fit();
201        self.values1.shrink_to_fit();
202        if let Some(values2) = self.values2.as_mut() {
203            values2.shrink_to_fit();
204        }
205
206        let map = DenseRangeMap {
207            starts: self.starts.into(),
208            values1: self.values1.into(),
209            values2: self.values2.map(Into::into),
210        };
211
212        #[cfg(test)]
213        map.assert_valid();
214
215        map
216    }
217
218    /// Add an entry to this [`DenseRangeMapBuilder`].
219    ///
220    /// Returns an error if `range` is not in strictly ascending order with respect
221    /// to all previous ranges.
222    fn add_entry(
223        &mut self,
224        range: RangeInclusive<K>,
225        value1: Option<V1>,
226        value2: Option<V2>,
227    ) -> Result<(), Error> {
228        if value1.is_none() && value2.is_none() {
229            return Ok(());
230        }
231
232        // NOTE: We _could_ coalesce our ranges if two abutting ranges have the
233        // same value.  But our geoip data processing tools already do that.
234        use std::cmp::Ordering::*;
235
236        let (start, end) = range.into_inner();
237        if start > end {
238            return Err(Error::BadEntry);
239        }
240
241        // Set "next_range_start" to the place that this range would start if
242        // there is no gap after the last range.
243        if let Some(prev_end) = self.prev_end.take() {
244            // This is not the first entry, so we might need to add a gap entry.
245
246            // Find the start of a possible gap entry.
247            // (If there is not a successor to the end of the last range, then
248            // any entry we are trying to push is unsorted!)
249            let gap_start = prev_end.next().ok_or(Error::Unsorted)?;
250
251            // Compare the start of the possible gap to the start of this range.
252            match gap_start.cmp(&start) {
253                Less => {
254                    // There is a gap between the end of the last entry and the
255                    // start of this one.  Add a representation of that gap.
256                    self.push(gap_start, None, None);
257                }
258                Equal => {
259                    // There is no gap, so we don't have to represent it. Cool!
260                }
261                Greater => {
262                    // We aren't sorted; give up.
263                    return Err(Error::Unsorted);
264                }
265            }
266        }
267        // Add this entry.
268        self.push(start, value1, value2);
269        self.prev_end = Some(end);
270
271        Ok(())
272    }
273}
274
275impl<K: Eq + Ord + Successor, V1, V2> DenseRangeMap<K, V1, V2>
276where
277    K: Clone + 'static,
278    V1: Clone + 'static,
279    V2: Clone + 'static,
280{
281    /// Create a new map from its raw parts.
282    ///
283    /// Requires that the slices are actually the results of calling
284    /// `export()` on a map of this type.
285    pub(crate) fn from_static_parts(
286        starts: &'static [K],
287        values1: &'static [Option<V1>],
288        values2: Option<&'static [Option<V2>]>,
289    ) -> Self {
290        let map = Self {
291            starts: starts.into(),
292            values1: values1.into(),
293            values2: values2.map(Into::into),
294        };
295        #[cfg(test)]
296        map.assert_valid();
297
298        map
299    }
300
301    /// Construct a [`DenseRangeMap`] from an iterator of `(range,v1)` tuples.
302    ///
303    /// The ranges must be disjoint and sorted in ascending order by their start.
304    #[cfg(test)]
305    pub(crate) fn from_sorted_inclusive_ranges<S>(iter: S) -> Result<Self, Error>
306    where
307        S: Iterator<Item = (RangeInclusive<K>, V1)>,
308    {
309        let discard_v2 = true;
310        Self::try_from_sorted_inclusive_ranges(
311            iter.map(|(r, v1)| Ok((r, Some(v1), None))),
312            discard_v2,
313        )
314    }
315
316    /// Construct a [`DenseRangeMap`] from an iterator of
317    /// `Result<(range,optvalue1,optvalue2)>` tuples.
318    ///
319    /// The ranges must be disjoint and sorted in ascending order by their
320    /// start.
321    pub(crate) fn try_from_sorted_inclusive_ranges<S, E>(
322        iter: S,
323        discard_v2: bool,
324    ) -> Result<Self, E>
325    where
326        S: Iterator<Item = Result<(RangeInclusive<K>, Option<V1>, Option<V2>), E>>,
327        E: From<Error>,
328    {
329        let mut b = DenseRangeMapBuilder::new();
330        if discard_v2 {
331            b.values2 = None;
332        }
333        for entry in iter {
334            let (range, value1, mut value2) = entry?;
335            if discard_v2 {
336                value2 = None;
337            }
338            b.add_entry(range, value1, value2)?;
339        }
340
341        Ok(b.build())
342    }
343
344    /// Return the index for the values corresponding to `key`.
345    pub(crate) fn index_for_key(&self, key: &K) -> Option<usize> {
346        match self.starts.binary_search(key) {
347            Ok(v) => Some(v),
348            Err(0) => None,
349            Err(v) => Some(v - 1),
350        }
351    }
352
353    /// Return the value, if any, associated with the given `key`.
354    pub(crate) fn get1(&self, key: &K) -> Option<&V1> {
355        self.index_for_key(key)
356            .and_then(|index| self.values1[index].as_ref())
357    }
358
359    /// Return the secondary value, if any, associated with the given `key`.
360    pub(crate) fn get2(&self, key: &K) -> Option<&V2> {
361        if let Some(values2) = self.values2.as_ref() {
362            self.index_for_key(key)
363                .and_then(|index| values2[index].as_ref())
364        } else {
365            None
366        }
367    }
368
369    /// Testing only: Assert that this object obeys its invariants.
370    #[cfg(test)]
371    fn assert_valid(&self) {
372        // We don't use `is_sorted` here since it allows duplicates.
373        for pair in self.starts.windows(2) {
374            assert!(pair[0] < pair[1]);
375        }
376        assert_eq!(self.values1.len(), self.starts.len());
377        if let Some(values2) = &self.values2 {
378            assert_eq!(values2.len(), self.starts.len());
379        }
380    }
381
382    /// Testing only: return a `Vec` of `(start, Option<V1, V2>)` to represent
383    /// this entries table.
384    ///
385    /// This is more convenient to inspect than looking at `starts` and `values`
386    /// manually.
387    ///
388    /// (We store `starts` and `values` in separate lists to avoid padding issues.)
389    #[cfg(test)]
390    fn rep(&self) -> Vec<(K, Option<V1>)>
391    where
392        K: Clone + 'static,
393        V1: Clone + 'static,
394    {
395        self.starts
396            .iter()
397            .cloned()
398            .zip(self.values1.iter().cloned())
399            .collect()
400    }
401
402    /// Return a triple of slices that encode this map.
403    #[cfg(feature = "export")]
404    #[allow(clippy::type_complexity)]
405    pub(crate) fn export(&self) -> (&[K], &[Option<V1>], Option<&[Option<V2>]>) {
406        (
407            self.starts.as_ref(),
408            self.values1.as_ref(),
409            self.values2.as_ref().map(|v2| v2.as_ref()),
410        )
411    }
412}
413
414#[cfg(test)]
415mod test {
416    // @@ begin test lint list maintained by maint/add_warning @@
417    #![allow(clippy::bool_assert_comparison)]
418    #![allow(clippy::clone_on_copy)]
419    #![allow(clippy::dbg_macro)]
420    #![allow(clippy::mixed_attributes_style)]
421    #![allow(clippy::print_stderr)]
422    #![allow(clippy::print_stdout)]
423    #![allow(clippy::single_char_pattern)]
424    #![allow(clippy::unwrap_used)]
425    #![allow(clippy::unchecked_time_subtraction)]
426    #![allow(clippy::useless_vec)]
427    #![allow(clippy::needless_pass_by_value)]
428    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
429
430    use super::*;
431    use proptest::prelude::*;
432
433    type M = DenseRangeMap<u32, &'static str, ()>;
434
435    #[test]
436    fn empty() {
437        let map = M::from_sorted_inclusive_ranges(std::iter::empty()).unwrap();
438        assert_eq!(map.get1(&0), None);
439        assert_eq!(map.get1(&1), None);
440        assert_eq!(map.get1(&50), None);
441        assert_eq!(map.get1(&(u32::MAX - 1)), None);
442        assert_eq!(map.get1(&(u32::MAX)), None);
443    }
444
445    #[test]
446    fn both_ends_open() {
447        // construct a map that has gaps at both ends.
448        let map = M::from_sorted_inclusive_ranges(
449            [
450                //
451                (5..=10, "small"),
452                (11..=90, "medium"),
453                (100..=1000, "big"),
454            ]
455            .into_iter(),
456        )
457        .unwrap();
458        map.assert_valid();
459
460        assert_eq!(
461            map.rep()[..],
462            [
463                (5, Some("small")),
464                (11, Some("medium")),
465                (91, None),
466                (100, Some("big")),
467                (1001, None),
468            ]
469        );
470
471        assert_eq!(map.get1(&0), None);
472        assert_eq!(map.get1(&1), None);
473        assert_eq!(map.get1(&5), Some(&"small"));
474        assert_eq!(map.get1(&10), Some(&"small"));
475        assert_eq!(map.get1(&11), Some(&"medium"));
476        assert_eq!(map.get1(&85), Some(&"medium"));
477        assert_eq!(map.get1(&90), Some(&"medium"));
478        assert_eq!(map.get1(&91), None);
479        assert_eq!(map.get1(&99), None);
480        assert_eq!(map.get1(&100), Some(&"big"));
481        assert_eq!(map.get1(&500), Some(&"big"));
482        assert_eq!(map.get1(&1000), Some(&"big"));
483        assert_eq!(map.get1(&1001), None);
484        assert_eq!(map.get1(&(u32::MAX - 1)), None);
485        assert_eq!(map.get1(&u32::MAX), None);
486    }
487
488    #[test]
489    fn both_ends_filled_map() {
490        // construct a map that has no gap at either end.
491        let map = M::from_sorted_inclusive_ranges(
492            [
493                //
494                (0..=10, "small"),
495                (11..=90, "medium"),
496                (100..=u32::MAX, "big"),
497            ]
498            .into_iter(),
499        )
500        .unwrap();
501
502        assert_eq!(
503            map.rep()[..],
504            [
505                (0, Some("small")),
506                (11, Some("medium")),
507                (91, None),
508                (100, Some("big")),
509            ]
510        );
511
512        assert_eq!(map.get1(&0), Some(&"small"));
513        assert_eq!(map.get1(&1), Some(&"small"));
514        assert_eq!(map.get1(&5), Some(&"small"));
515        assert_eq!(map.get1(&10), Some(&"small"));
516        assert_eq!(map.get1(&11), Some(&"medium"));
517        assert_eq!(map.get1(&85), Some(&"medium"));
518        assert_eq!(map.get1(&90), Some(&"medium"));
519        assert_eq!(map.get1(&91), None);
520        assert_eq!(map.get1(&99), None);
521        assert_eq!(map.get1(&100), Some(&"big"));
522        assert_eq!(map.get1(&500), Some(&"big"));
523        assert_eq!(map.get1(&1000), Some(&"big"));
524        assert_eq!(map.get1(&1001), Some(&"big"));
525        assert_eq!(map.get1(&(u32::MAX - 1)), Some(&"big"));
526        assert_eq!(map.get1(&u32::MAX), Some(&"big"));
527    }
528
529    proptest! {
530        // Property test: build a RangeIncluseiveMap at random, then construct a new
531        // DenseRangeMap from that map, and make sure they give the same outputs.
532        #[test]
533        fn matches_rangemap(ranges: Vec<RangeInclusive<u32>>, probes: Vec<u32>) {
534            let mut rangemap: rangemap::RangeInclusiveMap<u32, usize> = Default::default();
535            for (n, range) in ranges.into_iter().enumerate() {
536                rangemap.insert(range, n);
537            }
538            let dense_map = DenseRangeMap::<u32, usize, ()>::from_sorted_inclusive_ranges(
539                rangemap.iter().map(|(k,v)| (k.clone(), *v))
540            ).unwrap();
541
542            for probe in probes.iter() {
543                assert_eq!(rangemap.get(probe), dense_map.get1(probe));
544            }
545        }
546
547        // Property test: construct a disjoint list of ranges in ascending
548        // order, use that list to construct a RangeInclusiveMap and a
549        // DenseRangeMap and make sure they give the same outputs.
550        #[test]
551        fn matches_rangemap2(r: Vec<(u32,u32)>, probes: Vec<u32>) {
552            let mut ranges = vec![];
553            let mut next = 0_u32;
554            for (gap,len) in r {
555                let Some(start) = next.checked_add(gap) else {break;};
556                let end = start.saturating_add(len);
557                ranges.push(start..=end);
558                if let Some(n) = end.checked_add(1) {
559                    next = n;
560                } else {
561                    break;
562                }
563            }
564
565            let mut rangemap: rangemap::RangeInclusiveMap<u32, usize> = Default::default();
566            for (n, range) in ranges.iter().enumerate() {
567                rangemap.insert(range.clone(), n);
568            }
569
570            let dense_map = DenseRangeMap::<u32, usize, ()>::from_sorted_inclusive_ranges(
571                ranges.into_iter().enumerate().map(|(n, r)| (r, n))
572            ).unwrap();
573
574            for probe in probes.iter() {
575                assert_eq!(rangemap.get(probe), dense_map.get1(probe));
576            }
577        }
578    }
579}