tor_geoip/dense_range_map.rs
1//! Helper type: a frozen RangeMap where most ranges do not have gaps between them.
2//!
3//! Ordinary RangeMaps store a start and end for each range. But if the
4//! there are not gaps between a pair of ranges, then the range end is redundant.
5//!
6//! This trick lets us save about 40%-50% of the total database size, for
7//! a savings of around 6 MiB. (Data checked as of April 2026)
8
9use std::borrow::Cow;
10use std::ops::RangeInclusive;
11
12/// An object that has a single next element.
13pub(crate) trait Successor: Sized {
14 /// Return the next element after this one.
15 ///
16 /// Returns None if this is the maximum value
17 fn next(&self) -> Option<Self>;
18}
19
20impl Successor for u32 {
21 fn next(&self) -> Option<Self> {
22 self.checked_add(1)
23 }
24}
25
26impl Successor for u128 {
27 fn next(&self) -> Option<Self> {
28 self.checked_add(1)
29 }
30}
31
32/// An immutable map from ranges to `Option<V1>`, `Option<V2>`-pairs.
33///
34/// This type is optimized for reasonable O(lg N) performance,
35/// and for space efficiency in the case where:
36/// - `Option<V>` is the same size as V, or at least not much larger.
37/// - most or all ranges have no gaps between them.
38/// - we might not want to record any values for V2 at all.
39/// - we are willing to treat "no entry" and "maps to None" as equivalent.
40///
41/// That is to say, we get our space efficiency wins in the case where,
42/// if some range (K..=V) in the map,
43/// (V+1..=K2) is also likely to be in the map.
44///
45/// (This is true for around like 99% of our IPv4 ranges and around 91% of our
46/// IPv6 ranges.)
47///
48/// This type is crate-internal because its functionality is limited,
49/// and because we may want to switch to some other approach to geoip in the future.
50///
51/// ## Overview
52///
53/// We consider a map of disjoint ranges `(S..=E) => V`
54/// as a _dense_ map from `(S'..=E') => Option<V1, V2>`,
55/// such that there is is exactly one `(S'..E')` range covering every value from
56/// `min(S)` through `K::MAX` inclusive.
57///
58/// Because this map is dense, we can encode these ranges as a sorted list of S',
59/// and then use a binary search to find which `Option<V1, V2>` corresponds
60/// to any given value.
61///
62/// ## Invariants
63///
64/// - `starts` is sorted and contains no duplicates.
65/// - `values1.len() == starts.len()`
66/// - If `values2` is present, `values2.len() == starts.len()`
67///
68/// ## Semantics:
69///
70/// This table maps keys to value as follows:
71///
72/// If `starts` is empty, then every key maps to None.
73///
74/// Otherwise:
75/// - Every key such that `K::min <= key < starts[0]` maps to None.
76/// - Every key such that `starts[idx] <= key < starts[idx+1]` maps to
77/// `values[idx]`, which may be Some or None.
78/// - Every key such that `key >= starts.last()` maps to values.last(),
79/// which may be Some or None.
80///
81/// If `values2` is present, then keys map to the same indices in `values2`
82/// as they do in `values1`.
83#[derive(Clone, Debug, Eq, PartialEq)]
84pub(crate) struct DenseRangeMap<K, V1, V2>
85where
86 K: Clone + 'static,
87 V1: Clone + 'static,
88 V2: Clone + 'static,
89{
90 /// A list of the starting points of each range or gap.
91 starts: Cow<'static, [K]>,
92 /// A list of values.
93 ///
94 /// If `starts[i]` is the start of a range, then `values[i]` is Some(v)
95 /// where v is the value of that range for every
96 values1: Cow<'static, [Option<V1>]>,
97
98 /// An optional list of secondary values.
99 ///
100 values2: Option<Cow<'static, [Option<V2>]>>,
101}
102
103impl<K, V1, V2> Default for DenseRangeMap<K, V1, V2>
104where
105 K: Clone + 'static,
106 V1: Clone + 'static,
107 V2: Clone + 'static,
108{
109 fn default() -> Self {
110 Self {
111 starts: Default::default(),
112 values1: Default::default(),
113 values2: None,
114 }
115 }
116}
117
118/// A helper type to create a [`DenseRangeMap`] from a sorted list of disjoint ranges.
119///
120/// ## Invariants
121///
122/// - `starts.len() == values.len()`
123/// - `starts` is sorted and contains no duplicates.
124/// - `starts` is empty if and only if `prev_end` is None.
125///
126/// ## Semantics
127///
128/// If `starts` is empty, nothing has been added to this Builder.
129///
130/// Otherwise:
131/// - Every key such that `K::min <= key < starts[0]` maps to None.
132/// - Every key such that `starts[idx] <= key < starts[idx+1]` maps to
133/// `values[idx]`, which may be Some or None.
134/// - Every key such that `key <= starts.last() <= prev_end` maps to
135/// `values.last()`.
136/// - No mappings have been added for any range S..=E such that
137/// 'S > prev_end`.
138struct DenseRangeMapBuilder<K, V1, V2> {
139 /// A list of range starts so far.
140 starts: Vec<K>,
141 /// A list of values so far.
142 values1: Vec<Option<V1>>,
143
144 /// A list of secondary values so far.
145 ///
146 /// None if we're ignoring secondary values.
147 values2: Option<Vec<Option<V2>>>,
148
149 /// The last element of the most recently added range.
150 prev_end: Option<K>,
151}
152
153/// An error that occurred while building a [`DenseRangeMap`]
154#[derive(Debug, Clone, thiserror::Error)]
155pub(crate) enum Error {
156 /// Some range was invalid
157 #[error("Found an entry with an invalid range")]
158 BadEntry,
159
160 /// The entries in the database were not sorted.
161 #[error("Entries were not sorted")]
162 Unsorted,
163}
164
165impl<K: Eq + Ord + Successor, V1, V2> DenseRangeMapBuilder<K, V1, V2>
166where
167 K: Clone + 'static,
168 V1: Clone + 'static,
169 V2: Clone + 'static,
170{
171 /// Construct a new empty builder.
172 fn new() -> Self {
173 Self {
174 starts: Vec::new(),
175 values1: Vec::new(),
176 values2: Some(Vec::new()),
177 prev_end: None,
178 }
179 }
180
181 /// Add a single entry to this builder.
182 fn push(&mut self, start: K, v1: Option<V1>, v2: Option<V2>) {
183 self.starts.push(start);
184 self.values1.push(v1);
185 if let Some(values2) = self.values2.as_mut() {
186 values2.push(v2);
187 }
188 }
189
190 /// Consume this builder and return a DenseRangeMap with the same values.
191 fn build(mut self) -> DenseRangeMap<K, V1, V2> {
192 if let Some(prev_end) = self.prev_end.take()
193 && let Some(next_range_start) = prev_end.next()
194 {
195 // There is empty space after the last range, so we need to
196 // represent that with a gap entry.
197 self.push(next_range_start, None, None);
198 }
199 // See if we can reclaim any space.
200 self.starts.shrink_to_fit();
201 self.values1.shrink_to_fit();
202 if let Some(values2) = self.values2.as_mut() {
203 values2.shrink_to_fit();
204 }
205
206 let map = DenseRangeMap {
207 starts: self.starts.into(),
208 values1: self.values1.into(),
209 values2: self.values2.map(Into::into),
210 };
211
212 #[cfg(test)]
213 map.assert_valid();
214
215 map
216 }
217
218 /// Add an entry to this [`DenseRangeMapBuilder`].
219 ///
220 /// Returns an error if `range` is not in strictly ascending order with respect
221 /// to all previous ranges.
222 fn add_entry(
223 &mut self,
224 range: RangeInclusive<K>,
225 value1: Option<V1>,
226 value2: Option<V2>,
227 ) -> Result<(), Error> {
228 if value1.is_none() && value2.is_none() {
229 return Ok(());
230 }
231
232 // NOTE: We _could_ coalesce our ranges if two abutting ranges have the
233 // same value. But our geoip data processing tools already do that.
234 use std::cmp::Ordering::*;
235
236 let (start, end) = range.into_inner();
237 if start > end {
238 return Err(Error::BadEntry);
239 }
240
241 // Set "next_range_start" to the place that this range would start if
242 // there is no gap after the last range.
243 if let Some(prev_end) = self.prev_end.take() {
244 // This is not the first entry, so we might need to add a gap entry.
245
246 // Find the start of a possible gap entry.
247 // (If there is not a successor to the end of the last range, then
248 // any entry we are trying to push is unsorted!)
249 let gap_start = prev_end.next().ok_or(Error::Unsorted)?;
250
251 // Compare the start of the possible gap to the start of this range.
252 match gap_start.cmp(&start) {
253 Less => {
254 // There is a gap between the end of the last entry and the
255 // start of this one. Add a representation of that gap.
256 self.push(gap_start, None, None);
257 }
258 Equal => {
259 // There is no gap, so we don't have to represent it. Cool!
260 }
261 Greater => {
262 // We aren't sorted; give up.
263 return Err(Error::Unsorted);
264 }
265 }
266 }
267 // Add this entry.
268 self.push(start, value1, value2);
269 self.prev_end = Some(end);
270
271 Ok(())
272 }
273}
274
275impl<K: Eq + Ord + Successor, V1, V2> DenseRangeMap<K, V1, V2>
276where
277 K: Clone + 'static,
278 V1: Clone + 'static,
279 V2: Clone + 'static,
280{
281 /// Create a new map from its raw parts.
282 ///
283 /// Requires that the slices are actually the results of calling
284 /// `export()` on a map of this type.
285 pub(crate) fn from_static_parts(
286 starts: &'static [K],
287 values1: &'static [Option<V1>],
288 values2: Option<&'static [Option<V2>]>,
289 ) -> Self {
290 let map = Self {
291 starts: starts.into(),
292 values1: values1.into(),
293 values2: values2.map(Into::into),
294 };
295 #[cfg(test)]
296 map.assert_valid();
297
298 map
299 }
300
301 /// Construct a [`DenseRangeMap`] from an iterator of `(range,v1)` tuples.
302 ///
303 /// The ranges must be disjoint and sorted in ascending order by their start.
304 #[cfg(test)]
305 pub(crate) fn from_sorted_inclusive_ranges<S>(iter: S) -> Result<Self, Error>
306 where
307 S: Iterator<Item = (RangeInclusive<K>, V1)>,
308 {
309 let discard_v2 = true;
310 Self::try_from_sorted_inclusive_ranges(
311 iter.map(|(r, v1)| Ok((r, Some(v1), None))),
312 discard_v2,
313 )
314 }
315
316 /// Construct a [`DenseRangeMap`] from an iterator of
317 /// `Result<(range,optvalue1,optvalue2)>` tuples.
318 ///
319 /// The ranges must be disjoint and sorted in ascending order by their
320 /// start.
321 pub(crate) fn try_from_sorted_inclusive_ranges<S, E>(
322 iter: S,
323 discard_v2: bool,
324 ) -> Result<Self, E>
325 where
326 S: Iterator<Item = Result<(RangeInclusive<K>, Option<V1>, Option<V2>), E>>,
327 E: From<Error>,
328 {
329 let mut b = DenseRangeMapBuilder::new();
330 if discard_v2 {
331 b.values2 = None;
332 }
333 for entry in iter {
334 let (range, value1, mut value2) = entry?;
335 if discard_v2 {
336 value2 = None;
337 }
338 b.add_entry(range, value1, value2)?;
339 }
340
341 Ok(b.build())
342 }
343
344 /// Return the index for the values corresponding to `key`.
345 pub(crate) fn index_for_key(&self, key: &K) -> Option<usize> {
346 match self.starts.binary_search(key) {
347 Ok(v) => Some(v),
348 Err(0) => None,
349 Err(v) => Some(v - 1),
350 }
351 }
352
353 /// Return the value, if any, associated with the given `key`.
354 pub(crate) fn get1(&self, key: &K) -> Option<&V1> {
355 self.index_for_key(key)
356 .and_then(|index| self.values1[index].as_ref())
357 }
358
359 /// Return the secondary value, if any, associated with the given `key`.
360 pub(crate) fn get2(&self, key: &K) -> Option<&V2> {
361 if let Some(values2) = self.values2.as_ref() {
362 self.index_for_key(key)
363 .and_then(|index| values2[index].as_ref())
364 } else {
365 None
366 }
367 }
368
369 /// Testing only: Assert that this object obeys its invariants.
370 #[cfg(test)]
371 fn assert_valid(&self) {
372 // We don't use `is_sorted` here since it allows duplicates.
373 for pair in self.starts.windows(2) {
374 assert!(pair[0] < pair[1]);
375 }
376 assert_eq!(self.values1.len(), self.starts.len());
377 if let Some(values2) = &self.values2 {
378 assert_eq!(values2.len(), self.starts.len());
379 }
380 }
381
382 /// Testing only: return a `Vec` of `(start, Option<V1, V2>)` to represent
383 /// this entries table.
384 ///
385 /// This is more convenient to inspect than looking at `starts` and `values`
386 /// manually.
387 ///
388 /// (We store `starts` and `values` in separate lists to avoid padding issues.)
389 #[cfg(test)]
390 fn rep(&self) -> Vec<(K, Option<V1>)>
391 where
392 K: Clone + 'static,
393 V1: Clone + 'static,
394 {
395 self.starts
396 .iter()
397 .cloned()
398 .zip(self.values1.iter().cloned())
399 .collect()
400 }
401
402 /// Return a triple of slices that encode this map.
403 #[cfg(feature = "export")]
404 #[allow(clippy::type_complexity)]
405 pub(crate) fn export(&self) -> (&[K], &[Option<V1>], Option<&[Option<V2>]>) {
406 (
407 self.starts.as_ref(),
408 self.values1.as_ref(),
409 self.values2.as_ref().map(|v2| v2.as_ref()),
410 )
411 }
412}
413
414#[cfg(test)]
415mod test {
416 // @@ begin test lint list maintained by maint/add_warning @@
417 #![allow(clippy::bool_assert_comparison)]
418 #![allow(clippy::clone_on_copy)]
419 #![allow(clippy::dbg_macro)]
420 #![allow(clippy::mixed_attributes_style)]
421 #![allow(clippy::print_stderr)]
422 #![allow(clippy::print_stdout)]
423 #![allow(clippy::single_char_pattern)]
424 #![allow(clippy::unwrap_used)]
425 #![allow(clippy::unchecked_time_subtraction)]
426 #![allow(clippy::useless_vec)]
427 #![allow(clippy::needless_pass_by_value)]
428 //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
429
430 use super::*;
431 use proptest::prelude::*;
432
433 type M = DenseRangeMap<u32, &'static str, ()>;
434
435 #[test]
436 fn empty() {
437 let map = M::from_sorted_inclusive_ranges(std::iter::empty()).unwrap();
438 assert_eq!(map.get1(&0), None);
439 assert_eq!(map.get1(&1), None);
440 assert_eq!(map.get1(&50), None);
441 assert_eq!(map.get1(&(u32::MAX - 1)), None);
442 assert_eq!(map.get1(&(u32::MAX)), None);
443 }
444
445 #[test]
446 fn both_ends_open() {
447 // construct a map that has gaps at both ends.
448 let map = M::from_sorted_inclusive_ranges(
449 [
450 //
451 (5..=10, "small"),
452 (11..=90, "medium"),
453 (100..=1000, "big"),
454 ]
455 .into_iter(),
456 )
457 .unwrap();
458 map.assert_valid();
459
460 assert_eq!(
461 map.rep()[..],
462 [
463 (5, Some("small")),
464 (11, Some("medium")),
465 (91, None),
466 (100, Some("big")),
467 (1001, None),
468 ]
469 );
470
471 assert_eq!(map.get1(&0), None);
472 assert_eq!(map.get1(&1), None);
473 assert_eq!(map.get1(&5), Some(&"small"));
474 assert_eq!(map.get1(&10), Some(&"small"));
475 assert_eq!(map.get1(&11), Some(&"medium"));
476 assert_eq!(map.get1(&85), Some(&"medium"));
477 assert_eq!(map.get1(&90), Some(&"medium"));
478 assert_eq!(map.get1(&91), None);
479 assert_eq!(map.get1(&99), None);
480 assert_eq!(map.get1(&100), Some(&"big"));
481 assert_eq!(map.get1(&500), Some(&"big"));
482 assert_eq!(map.get1(&1000), Some(&"big"));
483 assert_eq!(map.get1(&1001), None);
484 assert_eq!(map.get1(&(u32::MAX - 1)), None);
485 assert_eq!(map.get1(&u32::MAX), None);
486 }
487
488 #[test]
489 fn both_ends_filled_map() {
490 // construct a map that has no gap at either end.
491 let map = M::from_sorted_inclusive_ranges(
492 [
493 //
494 (0..=10, "small"),
495 (11..=90, "medium"),
496 (100..=u32::MAX, "big"),
497 ]
498 .into_iter(),
499 )
500 .unwrap();
501
502 assert_eq!(
503 map.rep()[..],
504 [
505 (0, Some("small")),
506 (11, Some("medium")),
507 (91, None),
508 (100, Some("big")),
509 ]
510 );
511
512 assert_eq!(map.get1(&0), Some(&"small"));
513 assert_eq!(map.get1(&1), Some(&"small"));
514 assert_eq!(map.get1(&5), Some(&"small"));
515 assert_eq!(map.get1(&10), Some(&"small"));
516 assert_eq!(map.get1(&11), Some(&"medium"));
517 assert_eq!(map.get1(&85), Some(&"medium"));
518 assert_eq!(map.get1(&90), Some(&"medium"));
519 assert_eq!(map.get1(&91), None);
520 assert_eq!(map.get1(&99), None);
521 assert_eq!(map.get1(&100), Some(&"big"));
522 assert_eq!(map.get1(&500), Some(&"big"));
523 assert_eq!(map.get1(&1000), Some(&"big"));
524 assert_eq!(map.get1(&1001), Some(&"big"));
525 assert_eq!(map.get1(&(u32::MAX - 1)), Some(&"big"));
526 assert_eq!(map.get1(&u32::MAX), Some(&"big"));
527 }
528
529 proptest! {
530 // Property test: build a RangeIncluseiveMap at random, then construct a new
531 // DenseRangeMap from that map, and make sure they give the same outputs.
532 #[test]
533 fn matches_rangemap(ranges: Vec<RangeInclusive<u32>>, probes: Vec<u32>) {
534 let mut rangemap: rangemap::RangeInclusiveMap<u32, usize> = Default::default();
535 for (n, range) in ranges.into_iter().enumerate() {
536 rangemap.insert(range, n);
537 }
538 let dense_map = DenseRangeMap::<u32, usize, ()>::from_sorted_inclusive_ranges(
539 rangemap.iter().map(|(k,v)| (k.clone(), *v))
540 ).unwrap();
541
542 for probe in probes.iter() {
543 assert_eq!(rangemap.get(probe), dense_map.get1(probe));
544 }
545 }
546
547 // Property test: construct a disjoint list of ranges in ascending
548 // order, use that list to construct a RangeInclusiveMap and a
549 // DenseRangeMap and make sure they give the same outputs.
550 #[test]
551 fn matches_rangemap2(r: Vec<(u32,u32)>, probes: Vec<u32>) {
552 let mut ranges = vec![];
553 let mut next = 0_u32;
554 for (gap,len) in r {
555 let Some(start) = next.checked_add(gap) else {break;};
556 let end = start.saturating_add(len);
557 ranges.push(start..=end);
558 if let Some(n) = end.checked_add(1) {
559 next = n;
560 } else {
561 break;
562 }
563 }
564
565 let mut rangemap: rangemap::RangeInclusiveMap<u32, usize> = Default::default();
566 for (n, range) in ranges.iter().enumerate() {
567 rangemap.insert(range.clone(), n);
568 }
569
570 let dense_map = DenseRangeMap::<u32, usize, ()>::from_sorted_inclusive_ranges(
571 ranges.into_iter().enumerate().map(|(n, r)| (r, n))
572 ).unwrap();
573
574 for probe in probes.iter() {
575 assert_eq!(rangemap.get(probe), dense_map.get1(probe));
576 }
577 }
578 }
579}