Merge branch 'guards_as_bridges_part3' into 'main'

GuardMgr: decouple NetDir from guards and sample code.

See merge request tpo/core/arti!815
This commit is contained in:
Nick Mathewson 2022-11-02 12:41:03 +00:00
commit 66f34ee774
6 changed files with 558 additions and 155 deletions

View File

@ -5,16 +5,24 @@
use std::collections::HashMap;
use std::sync::Arc;
use std::time::SystemTime;
use crate::bridge::BridgeConfig;
use crate::{
bridge::BridgeConfig,
sample::{Candidate, CandidateStatus, Universe, WeightThreshold},
};
use dyn_clone::DynClone;
use futures::stream::BoxStream;
use num_enum::{IntoPrimitive, TryFromPrimitive};
use strum::{EnumCount, EnumIter};
use tor_error::{HasKind, HasRetryTime};
use tor_linkspec::{ChanTarget, HasChanMethod, HasRelayIds, OwnedChanTarget};
use tor_llcrypto::pk::{ed25519::Ed25519Identity, rsa::RsaIdentity};
use tor_netdir::RelayWeight;
use tor_netdoc::doc::routerdesc::RouterDesc;
use super::BridgeRelay;
/// A router descriptor that can be used to build circuits through a bridge.
///
/// These descriptors are fetched from the bridges themselves, and used in
@ -122,3 +130,161 @@ dyn_clone::clone_trait_object!(BridgeDescError);
/// A set of bridge descriptors, managed and modified by a BridgeDescProvider.
pub type BridgeDescList = HashMap<Arc<BridgeConfig>, Result<BridgeDesc, Box<dyn BridgeDescError>>>;
/// A collection of bridges, possibly with their descriptors.
//
// TODO pt-client: I doubt that this type is in its final form.
#[derive(Debug, Clone)]
pub(crate) struct BridgeSet {
/// When did this BridgeSet last change its listed bridges?
config_last_changed: SystemTime,
/// The configured bridges.
config: Vec<Arc<BridgeConfig>>,
/// A map from those bridges to their descriptors. It may contain elements
/// that are not in `config`.
descs: Arc<BridgeDescList>,
}
impl BridgeSet {
/// Create a new `BridgeSet` from its configuration.
#[allow(dead_code)] // TODO pt-client remove
pub(crate) fn new(config: Vec<Arc<BridgeConfig>>) -> Self {
Self {
config_last_changed: SystemTime::now(),
config,
descs: Arc::new(BridgeDescList::default()),
}
}
/// Returns the bridge that best matches a given guard.
///
/// Note that since the guard may have more identities than the bridge the
/// match may not be perfect: the caller needs to check for a closer match
/// if they want to be certain.
///
/// We check for a match by identity _and_ channel method, since channel
/// method is part of what makes two bridge lines different.
fn bridge_by_guard<T>(&self, guard: &T) -> Option<&Arc<BridgeConfig>>
where
T: ChanTarget,
{
self.config.iter().find(|bridge| {
guard.has_all_relay_ids_from(bridge.as_ref())
&& guard.chan_method() == bridge.chan_method()
})
}
/// Return a BridgeRelay wrapping the provided configuration, plus any known
/// descriptor for that configuration.
fn relay_by_bridge(&self, bridge: &Arc<BridgeConfig>) -> BridgeRelay {
let desc = match self.descs.get(bridge) {
Some(Ok(b)) => Some(b.clone()),
_ => None,
};
BridgeRelay::new(bridge.clone(), desc)
}
/// Look up a BridgeRelay corresponding to a given guard.
fn bridge_relay_by_guard<T: tor_linkspec::ChanTarget>(
&self,
guard: &T,
) -> CandidateStatus<BridgeRelay> {
match self.bridge_by_guard(guard) {
Some(bridge) => {
let bridge_relay = self.relay_by_bridge(bridge);
if bridge_relay.has_all_relay_ids_from(guard) {
// We have all the IDs from the guard, either in the bridge
// line or in the descriptor, so the match is exact.
CandidateStatus::Present(bridge_relay)
} else if bridge_relay.has_descriptor() {
// We don't have an exact match and we have have a
// descriptor, so we know that this is _not_ a real match.
CandidateStatus::Absent
} else {
// We don't have a descriptor; finding it might make our
// match precise.
CandidateStatus::Uncertain
}
}
// We found no bridge that matches this guard's identities, so we
// can declare it absent.
None => CandidateStatus::Absent,
}
}
}
impl Universe for BridgeSet {
fn contains<T: tor_linkspec::ChanTarget>(&self, guard: &T) -> Option<bool> {
match self.bridge_relay_by_guard(guard) {
CandidateStatus::Present(_) => Some(true),
CandidateStatus::Absent => Some(false),
CandidateStatus::Uncertain => None,
}
}
fn status<T: tor_linkspec::ChanTarget>(&self, guard: &T) -> CandidateStatus<Candidate> {
match self.bridge_relay_by_guard(guard) {
CandidateStatus::Present(bridge_relay) => CandidateStatus::Present(Candidate {
listed_as_guard: true,
is_dir_cache: true, // all bridges are directory caches.
full_dir_info: bridge_relay.has_descriptor(),
owned_target: OwnedChanTarget::from_chan_target(&bridge_relay),
}),
CandidateStatus::Absent => CandidateStatus::Absent,
CandidateStatus::Uncertain => CandidateStatus::Uncertain,
}
}
fn timestamp(&self) -> std::time::SystemTime {
self.config_last_changed
}
fn weight_threshold<T>(
&self,
_sample: &tor_linkspec::ByRelayIds<T>,
_params: &crate::GuardParams,
) -> WeightThreshold
where
T: HasRelayIds,
{
WeightThreshold {
current_weight: RelayWeight::from(0),
maximum_weight: RelayWeight::from(u64::MAX),
}
}
fn sample<T>(
&self,
pre_existing: &tor_linkspec::ByRelayIds<T>,
filter: &crate::GuardFilter,
n: usize,
) -> Vec<(Candidate, tor_netdir::RelayWeight)>
where
T: HasRelayIds,
{
use rand::seq::IteratorRandom;
self.config
.iter()
.filter(|bridge_conf| {
filter.permits(bridge_conf.as_ref())
&& pre_existing
.all_overlapping(bridge_conf.as_ref())
.is_empty()
})
.choose_multiple(&mut rand::thread_rng(), n)
.into_iter()
.map(|bridge_config| {
let relay = self.relay_by_bridge(bridge_config);
(
Candidate {
listed_as_guard: true,
is_dir_cache: true,
full_dir_info: relay.has_descriptor(),
owned_target: OwnedChanTarget::from_chan_target(&relay),
},
RelayWeight::from(0),
)
})
.collect()
}
}

View File

@ -38,6 +38,11 @@ pub struct BridgeRelayWithDesc<'a>(
);
impl BridgeRelay {
/// Construct a new BridgeRelay from its parts.
pub(crate) fn new(bridge_line: Arc<BridgeConfig>, desc: Option<BridgeDesc>) -> Self {
Self { bridge_line, desc }
}
/// Return true if this BridgeRelay has a known descriptor and can be used for relays.
pub fn has_descriptor(&self) -> bool {
self.desc.is_some()

View File

@ -1,7 +1,6 @@
//! Code to represent its single guard node and track its status.
use tor_basic_utils::retry::RetryDelay;
use tor_netdir::{NetDir, Relay, RelayWeight};
use educe::Educe;
use serde::{Deserialize, Serialize};
@ -11,11 +10,14 @@ use std::time::{Duration, Instant, SystemTime};
use tracing::{trace, warn};
use crate::dirstatus::DirStatus;
use crate::sample::Candidate;
use crate::skew::SkewObservation;
use crate::util::randomize_time;
use crate::{ids::GuardId, GuardParams, GuardRestriction, GuardUsage};
use crate::{ExternalActivity, GuardSetSelector, GuardUsageKind};
use tor_linkspec::{HasAddrs, HasRelayIds, RelayIds};
use crate::{sample, ExternalActivity, GuardSetSelector, GuardUsageKind};
use tor_linkspec::{
ChanTarget, ChannelMethod, HasAddrs, HasChanMethod, HasRelayIds, PtTarget, RelayIds,
};
use tor_persist::{Futureproof, JsonValue};
/// Tri-state to represent whether a guard is believed to be reachable or not.
@ -70,8 +72,8 @@ impl CrateId {
///
/// A Guard is a Tor relay that clients use for the first hop of their circuits.
/// It doesn't need to be a relay that's currently on the network (that is, one
/// that we could represent as a [`Relay`]): guards might be temporarily
/// unlisted.
/// that we could represent as a [`Relay`](tor_netdir::Relay)): guards might be
/// temporarily unlisted.
///
/// Some fields in guards are persistent; others are reset with every process.
///
@ -91,10 +93,29 @@ pub(crate) struct Guard {
/// The identity keys for this guard.
id: GuardId,
/// The most recently seen addresses for making OR connections to this
/// guard.
/// The most recently seen addresses for this guard. If `pt_targets` is
/// empty, these are the addresses we use for making OR connections to this
/// guard directly. If `pt_targets` is nonempty, these are addresses at
/// which the server is "located" (q.v. [`HasAddrs`]), but not ways to
/// connect to it.
orports: Vec<SocketAddr>,
/// Any `PtTarget` instances that we know about for connecting to this guard
/// over a pluggable transport.
///
/// If this is empty, then this guard only supports direct connections, at
/// the locations in `orports`.
///
/// (Currently, this is always empty, or a singleton. If we find more than
/// one, we only look at the first. It is a vector only for forward
/// compatibility.)
//
// TODO: We may want to replace pt_targets and orports with a new structure;
// maybe a PtAddress and a list of SocketAddr. But we'll keep them like
// this for now to keep backward compatibility.
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pt_targets: Vec<PtTarget>,
/// When, approximately, did we first add this guard to our sample?
#[serde(with = "humantime_serde")]
added_at: SystemTime,
@ -125,7 +146,7 @@ pub(crate) struct Guard {
/// True if this guard is listed in the latest consensus, but we don't
/// have a microdescriptor for it.
#[serde(skip)]
microdescriptor_missing: bool,
dir_info_missing: bool,
/// When did we last give out this guard in response to a request?
#[serde(skip)]
@ -211,35 +232,71 @@ pub(crate) enum NewlyConfirmed {
}
impl Guard {
/// Create a new unused [`Guard`] from a [`Relay`].
/// Create a new unused [`Guard`] from a [`Candidate`].
pub(crate) fn from_candidate(
candidate: Candidate,
now: SystemTime,
params: &GuardParams,
) -> Self {
let Candidate {
is_dir_cache,
full_dir_info,
owned_target,
..
} = candidate;
Guard {
is_dir_cache,
dir_info_missing: !full_dir_info,
..Self::from_chan_target(&owned_target, now, params)
}
}
/// Create a new unused [`Guard`] from a [`ChanTarget`].
///
/// This function doesn't check whether the provided relay is a
/// suitable guard node or not: that's up to the caller to decide.
pub(crate) fn from_relay(relay: &Relay<'_>, now: SystemTime, params: &GuardParams) -> Self {
fn from_chan_target<T>(relay: &T, now: SystemTime, params: &GuardParams) -> Self
where
T: ChanTarget,
{
let added_at = randomize_time(
&mut rand::thread_rng(),
now,
params.lifetime_unconfirmed / 10,
);
let pt_target = match relay.chan_method() {
#[cfg(feature = "pt-client")]
ChannelMethod::Pluggable(pt) => Some(pt),
_ => None,
};
Self::new(
GuardId::from_relay_ids(relay),
relay.addrs().into(),
pt_target,
added_at,
)
}
/// Return a new, manually constructed [`Guard`].
fn new(id: GuardId, orports: Vec<SocketAddr>, added_at: SystemTime) -> Self {
fn new(
id: GuardId,
orports: Vec<SocketAddr>,
pt_target: Option<PtTarget>,
added_at: SystemTime,
) -> Self {
Guard {
id,
orports,
pt_targets: pt_target.into_iter().collect(),
added_at,
added_by: CrateId::this_crate(),
disabled: None,
confirmed_at: None,
unlisted_since: None,
microdescriptor_missing: false,
dir_info_missing: false,
last_tried_to_connect_at: None,
reachable: Reachable::Unknown,
retry_at: None,
@ -279,8 +336,9 @@ impl Guard {
}
}
/// Return true if this guard is listed in the latest NetDir, and hasn't
/// been turned off for some other reason.
/// Return true if this guard is usable and working according to our latest
/// configuration and directory information, and hasn't been turned off for
/// some other reason.
pub(crate) fn usable(&self) -> bool {
self.unlisted_since.is_none() && self.disabled.is_none()
}
@ -324,6 +382,7 @@ impl Guard {
Guard {
// All other persistent fields are taken from `self`.
id: self.id,
pt_targets: self.pt_targets,
orports: self.orports,
added_at: self.added_at,
added_by: self.added_by,
@ -339,7 +398,7 @@ impl Guard {
reachable: other.reachable,
is_dir_cache: other.is_dir_cache,
exploratory_circ_pending: other.exploratory_circ_pending,
microdescriptor_missing: other.microdescriptor_missing,
dir_info_missing: other.dir_info_missing,
circ_history: other.circ_history,
suspicious_behavior_warned: other.suspicious_behavior_warned,
dir_status: other.dir_status,
@ -429,7 +488,7 @@ impl Guard {
GuardUsageKind::Data => {
// We need a "definitely listed" guard to build a multihop
// circuit.
if self.microdescriptor_missing {
if self.dir_info_missing {
return false;
}
}
@ -437,66 +496,73 @@ impl Guard {
self.obeys_restrictions(&usage.restrictions[..])
}
/// Check whether this guard is listed in the provided [`NetDir`].
/// Check whether this guard is listed in the provided [`sample::Universe`].
///
/// Returns `Some(true)` if it is definitely listed, and `Some(false)` if it
/// is definitely not listed. A `None` return indicates that we need to
/// download another microdescriptor before we can be certain whether this
/// guard is listed or not.
pub(crate) fn listed_in(&self, netdir: &NetDir) -> Option<bool> {
netdir.ids_listed(&self.id.0)
/// download more directory information about this guard before we can be
/// certain whether this guard is listed or not.
pub(crate) fn listed_in<U: sample::Universe>(&self, universe: &U) -> Option<bool> {
universe.contains(self)
}
/// Change this guard's status based on a newly received or newly updated
/// [`NetDir`].
/// [`sample::Universe`].
///
/// A guard may become "listed" or "unlisted": a listed guard is one that
/// appears in the consensus with the Guard flag.
///
/// A guard may acquire additional identities if we learned them from the
/// netdir.
/// guard, either directly or via an authenticated directory document.
///
/// Additionally, a guard's orports may change, if the directory lists a new
/// address for the relay.
pub(crate) fn update_from_netdir(&mut self, netdir: &NetDir) {
// This is a tricky check, since if we're missing a microdescriptor
// for the RSA id, we won't know whether the ed25519 id is listed or
// not.
let listed_as_guard = match self.listed_in(netdir) {
Some(true) => {
// Definitely listed.
let relay = netdir
.by_ids(&self.id.0)
.expect("Couldn't get a listed relay?!");
/// Additionally, a guard's `orports` or `pt_targets` may change, if the
/// `universe` lists a new address for the relay.
pub(crate) fn update_from_universe<U: sample::Universe>(&mut self, universe: &U) {
// This is a tricky check, since if we're missing directory information
// for the guard, we won't know its full set of identities.
use sample::CandidateStatus::*;
let listed_as_guard = match universe.status(self) {
Present(Candidate {
listed_as_guard,
is_dir_cache,
full_dir_info,
owned_target,
}) => {
// Update address information.
self.orports = relay.addrs().into();
self.orports = owned_target.addrs().into();
// Update Pt information.
self.pt_targets = match owned_target.chan_method() {
#[cfg(feature = "pt-client")]
ChannelMethod::Pluggable(pt) => vec![pt],
_ => Vec::new(),
};
// Check whether we can currently use it as a directory cache.
self.is_dir_cache = relay.is_dir_cache();
self.is_dir_cache = is_dir_cache;
// Update our IDs: the Relay will have strictly more.
assert!(relay.has_all_relay_ids_from(self));
self.id = GuardId(RelayIds::from_relay_ids(&relay));
assert!(owned_target.has_all_relay_ids_from(self));
self.id = GuardId(RelayIds::from_relay_ids(&owned_target));
self.dir_info_missing = !full_dir_info;
relay.is_flagged_guard()
listed_as_guard
}
Some(false) => false, // Definitely not listed.
None => {
// We can't tell if this is listed: The RSA id is present, but
// the microdescriptor is missing so we don't know the Ed25519 ID.
self.microdescriptor_missing = true;
Absent => false, // Definitely not listed.
Uncertain => {
// We can't tell if this is listed without more directory information.
self.dir_info_missing = true;
return;
}
};
// We got a definite answer, so we aren't missing a microdesc for this
// guard.
self.microdescriptor_missing = false;
self.dir_info_missing = false;
if listed_as_guard {
// Definitely listed, so clear unlisted_since.
self.mark_listed();
} else {
// Unlisted or not a guard; mark it unlisted.
self.mark_unlisted(netdir.lifetime().valid_after());
self.mark_unlisted(universe.timestamp());
}
}
@ -679,14 +745,6 @@ impl Guard {
}
}
/// Return the weight of this guard (if any) according to `dir`.
///
/// We use this information to decide whether we are about to sample
/// too much of the network as guards.
pub(crate) fn get_weight(&self, dir: &NetDir) -> Option<RelayWeight> {
dir.weight_by_rsa_id(self.id.0.rsa_identity()?, tor_netdir::WeightRole::Guard)
}
/// Return a [`FirstHop`](crate::FirstHop) object to represent this guard.
pub(crate) fn get_external_rep(&self, selection: GuardSetSelector) -> crate::FirstHop {
crate::FirstHop {
@ -731,7 +789,17 @@ impl tor_linkspec::HasRelayIds for Guard {
}
}
impl tor_linkspec::DirectChanMethodsHelper for Guard {}
impl tor_linkspec::HasChanMethod for Guard {
fn chan_method(&self) -> ChannelMethod {
match &self.pt_targets[..] {
#[cfg(feature = "pt-client")]
[first, ..] => ChannelMethod::Pluggable(first.clone()),
#[cfg(not(feature = "pt-client"))]
[_first, ..] => ChannelMethod::Direct(vec![]), // can't connect to this; no pt support.
[] => ChannelMethod::Direct(self.orports.clone()),
}
}
}
impl tor_linkspec::ChanTarget for Guard {}
@ -838,7 +906,7 @@ mod test {
let id = basic_id();
let ports = vec!["127.0.0.7:7777".parse().unwrap()];
let added = SystemTime::now();
Guard::new(id, ports, added)
Guard::new(id, ports, None, added)
}
#[test]
@ -912,7 +980,7 @@ mod test {
assert!(g.conforms_to_usage(&dir_usage));
let mut g2 = g.clone();
g2.microdescriptor_missing = true;
g2.dir_info_missing = true;
assert!(!g2.conforms_to_usage(&data_usage));
assert!(g2.conforms_to_usage(&dir_usage));
@ -1049,28 +1117,24 @@ mod test {
// Construct a guard from a relay from the netdir.
let relay22 = netdir.by_id(&Ed25519Identity::from([22; 32])).unwrap();
let guard22 = Guard::from_relay(&relay22, now, &params);
let guard22 = Guard::from_chan_target(&relay22, now, &params);
assert!(guard22.same_relay_ids(&relay22));
assert!(Some(guard22.added_at) <= Some(now));
// Can we still get the relay back?
let id = FirstHopId::in_sample(GuardSetSelector::Default, guard22.id.clone());
let id = FirstHopId::in_sample(GuardSetSelector::Default, guard22.id);
let r = id.get_relay(&netdir).unwrap();
assert!(r.same_relay_ids(&relay22));
// Can we check on the guard's weight?
let w = guard22.get_weight(&netdir).unwrap();
assert_eq!(w, 3000.into());
// Now try a guard that isn't in the netdir.
let guard255 = Guard::new(
GuardId::new([255; 32].into(), [255; 20].into()),
vec![],
None,
now,
);
let id = FirstHopId::in_sample(GuardSetSelector::Default, guard255.id.clone());
let id = FirstHopId::in_sample(GuardSetSelector::Default, guard255.id);
assert!(id.get_relay(&netdir).is_none());
assert!(guard255.get_weight(&netdir).is_none());
}
#[test]
@ -1105,11 +1169,12 @@ mod test {
let mut guard255 = Guard::new(
GuardId::new([255; 32].into(), [255; 20].into()),
vec!["8.8.8.8:53".parse().unwrap()],
None,
now,
);
assert_eq!(guard255.unlisted_since, None);
assert_eq!(guard255.listed_in(&netdir), Some(false));
guard255.update_from_netdir(&netdir);
guard255.update_from_universe(&netdir);
assert_eq!(
guard255.unlisted_since,
Some(netdir.lifetime().valid_after())
@ -1117,28 +1182,38 @@ mod test {
assert!(!guard255.orports.is_empty());
// Try a guard that is in netdir, but not netdir2.
let mut guard22 = Guard::new(GuardId::new([22; 32].into(), [22; 20].into()), vec![], now);
let mut guard22 = Guard::new(
GuardId::new([22; 32].into(), [22; 20].into()),
vec![],
None,
now,
);
let id22: FirstHopId = FirstHopId::in_sample(GuardSetSelector::Default, guard22.id.clone());
let relay22 = id22.get_relay(&netdir).unwrap();
assert_eq!(guard22.listed_in(&netdir), Some(true));
guard22.update_from_netdir(&netdir);
guard22.update_from_universe(&netdir);
assert_eq!(guard22.unlisted_since, None); // It's listed.
assert_eq!(&guard22.orports, relay22.addrs()); // Addrs are set.
assert_eq!(guard22.listed_in(&netdir2), Some(false));
guard22.update_from_netdir(&netdir2);
guard22.update_from_universe(&netdir2);
assert_eq!(
guard22.unlisted_since,
Some(netdir2.lifetime().valid_after())
);
assert_eq!(&guard22.orports, relay22.addrs()); // Addrs still set.
assert!(!guard22.microdescriptor_missing);
assert!(!guard22.dir_info_missing);
// Now see what happens for a guard that's in the consensus, but missing an MD.
let mut guard23 = Guard::new(GuardId::new([23; 32].into(), [23; 20].into()), vec![], now);
let mut guard23 = Guard::new(
GuardId::new([23; 32].into(), [23; 20].into()),
vec![],
None,
now,
);
assert_eq!(guard23.listed_in(&netdir2), Some(true));
assert_eq!(guard23.listed_in(&netdir3), None);
guard23.update_from_netdir(&netdir3);
assert!(guard23.microdescriptor_missing);
guard23.update_from_universe(&netdir3);
assert!(guard23.dir_info_missing);
assert!(guard23.is_dir_cache);
}

View File

@ -385,7 +385,7 @@ impl<R: Runtime> GuardMgr<R> {
inner
.guards
.active_guards_mut()
.missing_primary_microdescriptors(netdir)
.n_primary_without_dir_info(netdir)
== 0
}
@ -722,7 +722,7 @@ impl GuardMgrInner {
if self
.guards
.active_guards_mut()
.missing_primary_microdescriptors(netdir)
.n_primary_without_dir_info(netdir)
> 0
{
// We are missing primary guard descriptors, so we shouldn't update our guard
@ -731,7 +731,7 @@ impl GuardMgrInner {
}
self.guards
.active_guards_mut()
.update_status_from_netdir(netdir);
.update_status_from_dir(netdir);
self.guards
.active_guards_mut()
.extend_sample_as_needed(now, &self.params, netdir);

View File

@ -5,6 +5,8 @@
// - allow use of BridgeList in place of NetDir, possibly via a trait implemented by both.
// - allow Guard to be constructed from a Bridge rather than a Relay
mod candidate;
use crate::filter::GuardFilter;
use crate::guard::{Guard, NewlyConfirmed, Reachable};
use crate::skew::SkewObservation;
@ -14,7 +16,6 @@ use crate::{
use crate::{FirstHop, GuardSetSelector};
use tor_basic_utils::iter::{FilterCount, IteratorExt as _};
use tor_linkspec::{ByRelayIds, HasRelayIds};
use tor_netdir::{NetDir, Relay};
use itertools::Itertools;
use rand::seq::SliceRandom;
@ -24,6 +25,8 @@ use std::collections::{HashMap, HashSet};
use std::time::{Instant, SystemTime};
use tracing::{debug, info};
pub(crate) use candidate::{Candidate, CandidateStatus, Universe, WeightThreshold};
/// A set of sampled guards, along with various orderings on subsets
/// of the sample.
///
@ -336,12 +339,6 @@ impl GuardSet {
guard_set
}
/// Return false if `relay` (or some other relay that shares an ID with it)
/// is a member if this set.
fn can_add_relay(&self, relay: &Relay<'_>) -> bool {
self.guards.all_overlapping(relay).is_empty()
}
/// Return `Ok(true)` if `id` is definitely a member of this set, and
/// `Ok(false)` if it is definitely not a member.
///
@ -371,11 +368,11 @@ impl GuardSet {
/// Guards always start out un-confirmed.
///
/// Return true if any guards were added.
pub(crate) fn extend_sample_as_needed(
pub(crate) fn extend_sample_as_needed<U: Universe>(
&mut self,
now: SystemTime,
params: &GuardParams,
dir: &NetDir,
dir: &U,
) -> bool {
let mut any_added = false;
while self.extend_sample_inner(now, params, dir) {
@ -393,7 +390,12 @@ impl GuardSet {
/// this function will add fewer filter-permitted guards than we had wanted.
/// Because of that, this is a separate function, and
/// extend_sample_as_needed runs it in a loop until it returns false.
fn extend_sample_inner(&mut self, now: SystemTime, params: &GuardParams, dir: &NetDir) -> bool {
fn extend_sample_inner<U: Universe>(
&mut self,
now: SystemTime,
params: &GuardParams,
dir: &U,
) -> bool {
self.assert_consistency();
let n_filtered_usable = self
.guards
@ -416,62 +418,33 @@ impl GuardSet {
let want_to_add = params.min_filtered_sample_size - n_filtered_usable;
let n_to_add = std::cmp::min(max_to_add, want_to_add);
// What's the most weight we're willing to have in the sample?
let target_weight = {
let total_weight = dir.total_weight(tor_netdir::WeightRole::Guard, |r| {
r.is_flagged_guard() && r.is_dir_cache()
});
total_weight
.ratio(params.max_sample_bw_fraction)
.unwrap_or(total_weight)
};
let mut current_weight: tor_netdir::RelayWeight = self
.guards
.values()
.filter_map(|guard| guard.get_weight(dir))
.sum();
if current_weight >= target_weight {
return false; // Can't add any more weight.
}
let candidate::WeightThreshold {
mut current_weight,
maximum_weight,
} = dir.weight_threshold(&self.guards, params);
// Ask the netdir for a set of guards we could use.
let n_candidates = if self.filter_is_restrictive || self.active_filter.is_unfiltered() {
n_to_add
} else {
// The filter will probably reject a bunch of guards, but we sample
// before filtering, so we make this larger on an ad-hoc basis.
n_to_add * 3
};
let candidates = dir.pick_n_relays(
&mut rand::thread_rng(),
n_candidates,
tor_netdir::WeightRole::Guard,
|relay| {
let filter_ok = if self.filter_is_restrictive {
// If we have a very restrictive filter, we only add
// relays permitted by that filter.
self.active_filter.permits(relay)
} else {
// Otherwise we add any relay to the sample.
true
};
filter_ok
&& relay.is_flagged_guard()
&& relay.is_dir_cache()
&& self.can_add_relay(relay)
},
);
let no_filter = GuardFilter::unfiltered();
let (n_candidates, pre_filter) =
if self.filter_is_restrictive || self.active_filter.is_unfiltered() {
(n_to_add, &self.active_filter)
} else {
// The filter will probably reject a bunch of guards, but we sample
// before filtering, so we make this larger on an ad-hoc basis.
(n_to_add * 3, &no_filter)
};
// Add those candidates to the sample, up to our maximum weight.
let candidates = dir.sample(&self.guards, pre_filter, n_candidates);
// Add those candidates to the sample.
let mut any_added = false;
let mut n_filtered_usable = n_filtered_usable;
for candidate in candidates {
if current_weight >= target_weight
for (candidate, weight) in candidates {
// Don't add any more if we have met the minimal sample size, and we
// have added too much weight.
if current_weight >= maximum_weight
&& self.guards.len() >= params.min_filtered_sample_size
{
// Can't add any more weight. (We only enforce target_weight
// if we have at least 'min_filtered_sample_size' in
// our total sample.)
break;
}
if self.guards.len() >= params.max_sample_size {
@ -482,15 +455,13 @@ impl GuardSet {
// We've reached our target; no need to add more.
break;
}
let candidate_weight = dir.relay_weight(&candidate, tor_netdir::WeightRole::Guard);
if self.active_filter.permits(&candidate) {
if self.active_filter.permits(&candidate.owned_target) {
n_filtered_usable += 1;
}
current_weight += candidate_weight;
self.add_guard(&candidate, now, params);
current_weight += weight;
self.add_guard(candidate, now, params);
any_added = true;
}
self.assert_consistency();
any_added
}
@ -498,21 +469,21 @@ impl GuardSet {
/// Add `relay` as a new guard.
///
/// Does nothing if it is already a guard.
fn add_guard(&mut self, relay: &Relay<'_>, now: SystemTime, params: &GuardParams) {
let id = GuardId::from_relay_ids(relay);
fn add_guard(&mut self, relay: Candidate, now: SystemTime, params: &GuardParams) {
let id = GuardId::from_relay_ids(&relay.owned_target);
if self.guards.by_all_ids(&id).is_some() {
return;
}
debug!(guard_id=?id, "Adding guard to sample.");
let guard = Guard::from_relay(relay, now, params);
let guard = Guard::from_candidate(relay, now, params);
self.guards.insert(guard);
self.sample.push(id);
self.primary_guards_invalidated = true;
}
/// Return the number of our primary guards are missing their
/// microdescriptors in `dir`.
pub(crate) fn missing_primary_microdescriptors(&mut self, dir: &NetDir) -> usize {
/// Return the number of our primary guards that are missing directory
/// information in `universe`.
pub(crate) fn n_primary_without_dir_info<U: Universe>(&mut self, universe: &U) -> usize {
self.primary
.iter()
.filter(|id| {
@ -520,19 +491,18 @@ impl GuardSet {
.guards
.by_all_ids(*id)
.expect("Inconsistent guard state");
g.listed_in(dir).is_none()
g.listed_in(universe).is_none()
})
.count()
}
/// Update the status of every guard in this sample from a network
/// directory.
pub(crate) fn update_status_from_netdir(&mut self, dir: &NetDir) {
/// Update the status of every guard in this sample from a given source.
pub(crate) fn update_status_from_dir<U: Universe>(&mut self, dir: &U) {
let old_guards = std::mem::take(&mut self.guards);
self.guards = old_guards
.into_values()
.map(|mut guard| {
guard.update_from_netdir(dir);
guard.update_from_universe(dir);
guard
})
.collect();
@ -1009,6 +979,7 @@ impl<'a> From<GuardSample<'a>> for GuardSet {
mod test {
#![allow(clippy::unwrap_used)]
use tor_linkspec::{HasRelayIds, RelayIdType};
use tor_netdir::{NetDir, Relay};
use tor_netdoc::doc::netstatus::{RelayFlags, RelayWeight};
use super::*;
@ -1446,7 +1417,7 @@ mod test {
.pick_guard_id(&usage, &params, Instant::now())
.unwrap();
guards.record_success(&p_id1, &params, None, SystemTime::now());
assert_eq!(guards.missing_primary_microdescriptors(&netdir), 0);
assert_eq!(guards.n_primary_without_dir_info(&netdir), 0);
use tor_netdir::testnet;
let netdir2 = testnet::construct_custom_netdir(|_idx, bld| {
@ -1459,7 +1430,7 @@ mod test {
.unwrap_if_sufficient()
.unwrap();
assert_eq!(guards.missing_primary_microdescriptors(&netdir2), 1);
assert_eq!(guards.n_primary_without_dir_info(&netdir2), 1);
}
#[test]

View File

@ -0,0 +1,186 @@
//! This module defines and implements traits used to create a guard sample from
//! either bridges or relays.
use std::time::SystemTime;
use tor_linkspec::{ByRelayIds, ChanTarget, HasRelayIds, OwnedChanTarget};
use tor_netdir::{NetDir, Relay, RelayWeight};
use crate::{GuardFilter, GuardParams};
/// A "Universe" is a source from which guard candidates are drawn, and from
/// which guards are updated.
pub(crate) trait Universe {
/// Check whether this universe contains a candidate for the given guard.
///
/// Return `Some(true)` if it definitely does; `Some(false)` if it
/// definitely does not, and `None` if we cannot tell without downloading
/// more information.
fn contains<T: ChanTarget>(&self, guard: &T) -> Option<bool>;
/// Return full information about a member of this universe for a given guard.
fn status<T: ChanTarget>(&self, guard: &T) -> CandidateStatus<Candidate>;
/// Return the time at which this Universe last changed. This can be
/// approximate.
fn timestamp(&self) -> SystemTime;
/// Return information about how much of this universe has been added to
/// `sample`, and how much we're willing to add according to `params`.
fn weight_threshold<T>(&self, sample: &ByRelayIds<T>, params: &GuardParams) -> WeightThreshold
where
T: HasRelayIds;
/// Return up to `n` of new candidate guards from this Universe.
///
/// Only return elements that have no conflicts with identities in
/// `pre_existing`, and which obey `filter`.
fn sample<T>(
&self,
pre_existing: &ByRelayIds<T>,
filter: &GuardFilter,
n: usize,
) -> Vec<(Candidate, RelayWeight)>
where
T: HasRelayIds;
}
/// Information about a single guard candidate, as returned by
/// [`Universe::status`].
#[derive(Clone, Debug)]
pub(crate) enum CandidateStatus<T> {
/// The candidate is definitely present in some form.
Present(T),
/// The candidate is definitely not in the [`Universe`].
Absent,
/// We would need to download more directory information to be sure whether
/// this candidate is in the [`Universe`].
Uncertain,
}
/// Information about a candidate that we have selected as a guard.
#[derive(Clone, Debug)]
pub(crate) struct Candidate {
/// True if the candidate is not currently disabled for use as a guard.
pub(crate) listed_as_guard: bool,
/// True if the candidate can be used as a directory cache.
pub(crate) is_dir_cache: bool,
/// True if we have complete directory information about this candidate.
pub(crate) full_dir_info: bool,
/// Information about connecting to the candidate and using it to build
/// a channel.
pub(crate) owned_target: OwnedChanTarget,
}
/// Information about how much of the universe we are using in a guard sample,
/// and how much we are allowed to use.
///
/// We use this to avoid adding the whole network to our guard sample.
#[derive(Debug, Clone)]
pub(crate) struct WeightThreshold {
/// The amount of the universe that we are using, in [`RelayWeight`].
pub(crate) current_weight: RelayWeight,
/// The greatest amount that we are willing to use, in [`RelayWeight`].
///
/// We can violate this maximum if it's necessary in order to meet our
/// minimum number of guards; otherwise, were're willing to add a _single_
/// guard that exceeds this threshold, but no more.
pub(crate) maximum_weight: RelayWeight,
}
impl Universe for NetDir {
fn timestamp(&self) -> SystemTime {
NetDir::lifetime(self).valid_after()
}
fn contains<T: ChanTarget>(&self, guard: &T) -> Option<bool> {
NetDir::ids_listed(self, guard)
}
fn status<T: ChanTarget>(&self, guard: &T) -> CandidateStatus<Candidate> {
match NetDir::by_ids(self, guard) {
Some(relay) => CandidateStatus::Present(Candidate {
listed_as_guard: relay.is_flagged_guard(),
is_dir_cache: relay.is_dir_cache(),
owned_target: OwnedChanTarget::from_chan_target(&relay),
full_dir_info: true,
}),
None => match NetDir::ids_listed(self, guard) {
Some(true) => panic!("ids_listed said true, but by_ids said none!"),
Some(false) => CandidateStatus::Absent,
None => CandidateStatus::Uncertain,
},
}
}
fn weight_threshold<T>(&self, sample: &ByRelayIds<T>, params: &GuardParams) -> WeightThreshold
where
T: HasRelayIds,
{
// When adding from a netdir, we impose a limit on the fraction of the
// universe we're willing to add.
let maximum_weight = {
let total_weight = self.total_weight(tor_netdir::WeightRole::Guard, |r| {
r.is_flagged_guard() && r.is_dir_cache()
});
total_weight
.ratio(params.max_sample_bw_fraction)
.unwrap_or(total_weight)
};
let current_weight: tor_netdir::RelayWeight = sample
.values()
.filter_map(|guard| {
self.weight_by_rsa_id(guard.rsa_identity()?, tor_netdir::WeightRole::Guard)
})
.sum();
WeightThreshold {
current_weight,
maximum_weight,
}
}
fn sample<T>(
&self,
pre_existing: &ByRelayIds<T>,
filter: &GuardFilter,
n: usize,
) -> Vec<(Candidate, RelayWeight)>
where
T: HasRelayIds,
{
/// Return the weight for this relay, if we can find it.
///
/// (We should always be able to find it as netdirs are constructed
/// today.)
fn weight(dir: &NetDir, relay: &Relay<'_>) -> Option<RelayWeight> {
dir.weight_by_rsa_id(relay.rsa_identity()?, tor_netdir::WeightRole::Guard)
}
self.pick_n_relays(
&mut rand::thread_rng(),
n,
tor_netdir::WeightRole::Guard,
|relay| {
filter.permits(relay)
&& relay.is_flagged_guard()
&& relay.is_dir_cache()
&& pre_existing.all_overlapping(relay).is_empty()
},
)
.iter()
.map(|relay| {
(
Candidate {
listed_as_guard: true,
is_dir_cache: true,
full_dir_info: true,
owned_target: OwnedChanTarget::from_chan_target(relay),
},
weight(self, relay).unwrap_or_else(|| RelayWeight::from(0)),
)
})
.collect()
}
}