From f15cde80de0b5f6f81a7426addba35c3c7506112 Mon Sep 17 00:00:00 2001
From: Nick Mathewson <nickm@torproject.org>
Date: Wed, 13 Oct 2021 11:24:37 -0400
Subject: [PATCH] Use better reporting for guard status.

The previous code would report all failures to build a circuit as
failures of the guard.  But of course that's not right:  If we
fail to extend to the second or third hop, that might or might not
be the guard's fault.

Now we use the "pending status" feature of the GuardMonitor type so
that an early failure is attributed to the guard, but a later
failure is attributed as "Indeterminate".  Only a complete circuit
is called a success.  We use a new "GuardStatusHandle" type here so
that we can report the status early if there is a timeout.
---
 crates/tor-circmgr/src/build.rs             | 53 +++++++++++++++----
 crates/tor-circmgr/src/build/guardstatus.rs | 57 +++++++++++++++++++++
 crates/tor-circmgr/src/impls.rs             | 31 ++++++-----
 crates/tor-guardmgr/src/lib.rs              |  4 +-
 crates/tor-guardmgr/src/pending.rs          |  6 +++
 5 files changed, 127 insertions(+), 24 deletions(-)
 create mode 100644 crates/tor-circmgr/src/build/guardstatus.rs
diff --git a/crates/tor-circmgr/src/build.rs b/crates/tor-circmgr/src/build.rs
index ce5a52497..059639dda 100644
--- a/crates/tor-circmgr/src/build.rs
+++ b/crates/tor-circmgr/src/build.rs
@@ -15,11 +15,16 @@ use std::sync::{
 };
 use std::time::{Duration, Instant};
 use tor_chanmgr::ChanMgr;
+use tor_guardmgr::GuardStatus;
 use tor_linkspec::{ChanTarget, OwnedChanTarget, OwnedCircTarget};
 use tor_proto::circuit::{CircParameters, ClientCirc, PendingClientCirc};
 use tor_rtcompat::{Runtime, SleepProviderExt};
 use tracing::warn;
 
+mod guardstatus;
+
+pub(crate) use guardstatus::GuardStatusHandle;
+
 /// Represents an objects that can be constructed in a circuit-like way.
 ///
 /// This is only a separate trait for testing purposes, so that we can swap
@@ -159,7 +164,11 @@ impl<
 
     /// Build a circuit, without performing any timeout operations.
     ///
-    /// After each hop is built, increments n_hops_built. (TODO: Find
+    /// After each hop is built, increments n_hops_built.  Make sure that
+    /// `guard_status` has its pending status set correctly to correspond
+    /// to a circuit failure at any given stage.
+    ///
+    /// (TODO: Find
     /// a better design there.)
     async fn build_notimeout<RNG: CryptoRng + Rng + Send>(
         self: Arc<Self>,
@@ -168,9 +177,12 @@ impl<
         start_time: Instant,
         n_hops_built: Arc<AtomicU32>,
         mut rng: RNG,
+        guard_status: Arc<GuardStatusHandle>,
     ) -> Result<C> {
         match path {
             OwnedPath::ChannelOnly(target) => {
+                // If we fail now, it's the guard's fault.
+                guard_status.pending(GuardStatus::Failure);
                 let circ =
                     C::create_chantarget(&self.chanmgr, &self.runtime, &mut rng, &target, &params)
                         .await?;
@@ -182,10 +194,15 @@ impl<
             OwnedPath::Normal(p) => {
                 assert!(!p.is_empty());
                 let n_hops = p.len() as u8;
+                // If we fail now, it's the guard's fault.
+                guard_status.pending(GuardStatus::Failure);
                 let circ =
                     C::create(&self.chanmgr, &self.runtime, &mut rng, &p[0], &params).await?;
                 self.timeouts
                     .note_hop_completed(0, self.runtime.now() - start_time, n_hops == 0);
+                // If we fail after this point, we can't tell whether it's
+                // the fault of the guard or some later relay.
+                guard_status.pending(GuardStatus::Indeterminate);
                 n_hops_built.fetch_add(1, Ordering::SeqCst);
                 let mut hop_num = 1;
                 for relay in p[1..].iter() {
@@ -209,6 +226,7 @@ impl<
         path: OwnedPath,
         params: &CircParameters,
         rng: RNG,
+        guard_status: Arc<GuardStatusHandle>,
     ) -> Result<C> {
         let action = Action::BuildCircuit { length: path.len() };
         let (timeout, abandon_timeout) = self.timeouts.timeouts(&action);
@@ -222,8 +240,14 @@ impl<
         let self_clone = Arc::clone(self);
         let params = params.clone();
 
-        let circuit_future =
-            self_clone.build_notimeout(path, params, start_time, Arc::clone(&hops_built), rng);
+        let circuit_future = self_clone.build_notimeout(
+            path,
+            params,
+            start_time,
+            Arc::clone(&hops_built),
+            rng,
+            guard_status,
+        );
 
         match double_timeout(&self.runtime, circuit_future, timeout, abandon_timeout).await {
             Ok(circuit) => Ok(circuit),
@@ -308,8 +332,11 @@ impl<R: Runtime> CircuitBuilder<R> {
         path: OwnedPath,
         params: &CircParameters,
         rng: RNG,
+        guard_status: Arc<GuardStatusHandle>,
     ) -> Result<Arc<ClientCirc>> {
-        self.builder.build_owned(path, params, rng).await
+        self.builder
+            .build_owned(path, params, rng, guard_status)
+            .await
     }
 
     /// Try to construct a new circuit from a given path, using appropriate
@@ -326,7 +353,8 @@ impl<R: Runtime> CircuitBuilder<R> {
     ) -> Result<Arc<ClientCirc>> {
         let rng = StdRng::from_rng(rng).expect("couldn't construct temporary rng");
         let owned = path.try_into()?;
-        self.build_owned(owned, params, rng).await
+        self.build_owned(owned, params, rng, Arc::new(None.into()))
+            .await
     }
 
     /// Return the path configuration used by this builder.
@@ -400,6 +428,11 @@ mod test {
     use tor_llcrypto::pk::ed25519::Ed25519Identity;
     use tor_rtcompat::{test_with_all_runtimes, SleepProvider};
 
+    /// Make a new nonfunctional `Arc<GuardStatusHandle>`
+    fn gs() -> Arc<GuardStatusHandle> {
+        Arc::new(None.into())
+    }
+
     #[test]
     #[ignore]
     // TODO: re-enable this test after arti#149 is fixed. For now, it
@@ -624,7 +657,9 @@ mod test {
                 StdRng::from_rng(rand::thread_rng()).expect("couldn't construct temporary rng");
             let params = CircParameters::default();
 
-            let outcome = rt.wait_for(builder.build_owned(p1, &params, rng)).await;
+            let outcome = rt
+                .wait_for(builder.build_owned(p1, &params, rng, gs()))
+                .await;
 
             let circ = outcome.unwrap().into_inner().unwrap();
             assert!(circ.onehop);
@@ -633,7 +668,7 @@ mod test {
             let rng =
                 StdRng::from_rng(rand::thread_rng()).expect("couldn't construct temporary rng");
             let outcome = rt
-                .wait_for(builder.build_owned(p2.clone(), &params, rng))
+                .wait_for(builder.build_owned(p2.clone(), &params, rng, gs()))
                 .await;
             let circ = outcome.unwrap().into_inner().unwrap();
             assert!(!circ.onehop);
@@ -661,7 +696,7 @@ mod test {
             let rng =
                 StdRng::from_rng(rand::thread_rng()).expect("couldn't construct temporary rng");
             let outcome = rt
-                .wait_for(builder.build_owned(p2.clone(), &params, rng))
+                .wait_for(builder.build_owned(p2.clone(), &params, rng, gs()))
                 .await;
             assert!(outcome.is_err());
 
@@ -678,7 +713,7 @@ mod test {
             let rng =
                 StdRng::from_rng(rand::thread_rng()).expect("couldn't construct temporary rng");
             let outcome = rt
-                .wait_for(builder.build_owned(p2.clone(), &params, rng))
+                .wait_for(builder.build_owned(p2.clone(), &params, rng, gs()))
                 .await;
             assert!(outcome.is_err());
             // "wait" a while longer to make sure that we eventually
diff --git a/crates/tor-circmgr/src/build/guardstatus.rs b/crates/tor-circmgr/src/build/guardstatus.rs
new file mode 100644
index 000000000..e7cf3865f
--- /dev/null
+++ b/crates/tor-circmgr/src/build/guardstatus.rs
@@ -0,0 +1,57 @@
+//! Helpers for reporting information about guard status to the guard manager.
+
+use std::sync::Mutex;
+use tor_guardmgr::{GuardMonitor, GuardStatus};
+
+/// A shareable object that we can use to report guard status to the guard
+/// manager.
+pub(crate) struct GuardStatusHandle {
+    /// An inner guard monitor.
+    ///
+    /// If this is None, then either we aren't using the guard
+    /// manager, or we already reported a status to it.
+    mon: Mutex<Option<GuardMonitor>>,
+}
+
+impl From<Option<GuardMonitor>> for GuardStatusHandle {
+    fn from(mon: Option<GuardMonitor>) -> Self {
+        Self {
+            mon: Mutex::new(mon),
+        }
+    }
+}
+
+impl GuardStatusHandle {
+    /// Finalize this guard status handle, and report its pending status
+    /// to the guard manager.
+    ///
+    /// Future calls to methods on this object will do nothing.
+    pub(crate) fn commit(&self) {
+        let mut mon = self.mon.lock().expect("Poisoned lock");
+        if let Some(mon) = mon.take() {
+            mon.commit();
+        }
+    }
+
+    /// Change the pending status on this guard.
+    ///
+    /// Note that the pending status will not be sent to the guard manager
+    /// immediately: only committing this GuardStatusHandle, or dropping it,
+    /// will do so.
+    pub(crate) fn pending(&self, status: GuardStatus) {
+        let mut mon = self.mon.lock().expect("Poisoned lock");
+        if let Some(mon) = mon.as_mut() {
+            mon.pending_status(status);
+        }
+    }
+
+    /// Report the provided status to the guard manager.
+    ///
+    /// Future cals to methods on this object will do nothing.
+    pub(crate) fn report(&self, status: GuardStatus) {
+        let mut mon = self.mon.lock().expect("Poisoned lock");
+        if let Some(mon) = mon.take() {
+            mon.report(status);
+        }
+    }
+}
diff --git a/crates/tor-circmgr/src/impls.rs b/crates/tor-circmgr/src/impls.rs
index 869dadc52..014855e66 100644
--- a/crates/tor-circmgr/src/impls.rs
+++ b/crates/tor-circmgr/src/impls.rs
@@ -68,6 +68,8 @@ impl<R: Runtime> crate::mgr::AbstractCircBuilder for crate::build::CircuitBuilde
     }
 
     async fn build_circuit(&self, plan: Plan) -> Result<(SupportedCircUsage, Arc<ClientCirc>)> {
+        use crate::build::GuardStatusHandle;
+        use tor_guardmgr::GuardStatus;
         let Plan {
             final_spec,
             path,
@@ -78,6 +80,9 @@ impl<R: Runtime> crate::mgr::AbstractCircBuilder for crate::build::CircuitBuilde
         let rng = StdRng::from_rng(rand::thread_rng()).expect("couldn't construct temporary rng");
 
         let guard_usable: OptionFuture<_> = guard_usable.into();
+        let guard_status: Arc<GuardStatusHandle> = Arc::new(guard_status.into());
+
+        guard_status.pending(GuardStatus::AttemptAbandoned);
 
         // TODO: We may want to lower the logic for handling
         // guard_status and guard_usable into build.rs, so that they
@@ -85,15 +90,15 @@ impl<R: Runtime> crate::mgr::AbstractCircBuilder for crate::build::CircuitBuilde
         //
         // This will probably require a different API for circuit
         // construction.
-        match self.build_owned(path, &params, rng).await {
+        match self
+            .build_owned(path, &params, rng, Arc::clone(&guard_status))
+            .await
+        {
             Ok(circuit) => {
-                if let Some(mon) = guard_status {
-                    // Report success to the guard manager, so it knows that
-                    // this guard is reachable.
-                    // TODO: We may someday want to report two-hop circuits
-                    // as successful. But not today.
-                    mon.succeeded();
-                }
+                // Report success to the guard manager, so it knows that
+                // this guard is reachable.
+                guard_status.report(GuardStatus::Success);
+
                 // We have to wait for the guard manager to tell us whether
                 // this guard is actually _usable_ or not.  Possibly,
                 // it is a speculative guard that we're only trying out
@@ -108,11 +113,11 @@ impl<R: Runtime> crate::mgr::AbstractCircBuilder for crate::build::CircuitBuilde
                 Ok((final_spec, circuit))
             }
             Err(e) => {
-                if let Some(mon) = guard_status {
-                    // Report failure to the guard manager, so it knows
-                    // not to use this guard in the future.
-                    mon.failed();
-                }
+                // The attempt failed; the builder should have set the
+                // pending status on the guard to some value which will
+                // tell the guard manager whether to blame the guard or not.
+                guard_status.commit();
+
                 Err(e)
             }
         }
diff --git a/crates/tor-guardmgr/src/lib.rs b/crates/tor-guardmgr/src/lib.rs
index c383d79ed..04a9cc794 100644
--- a/crates/tor-guardmgr/src/lib.rs
+++ b/crates/tor-guardmgr/src/lib.rs
@@ -151,10 +151,10 @@ mod sample;
 mod util;
 
 pub use filter::GuardFilter;
-pub use pending::{GuardMonitor, GuardUsable};
+pub use pending::{GuardMonitor, GuardStatus, GuardUsable};
 pub use sample::PickGuardError;
 
-use pending::{GuardStatus, PendingRequest, RequestId};
+use pending::{PendingRequest, RequestId};
 use sample::GuardSet;
 
 /// A "guard manager" that selects and remembers a persistent set of
diff --git a/crates/tor-guardmgr/src/pending.rs b/crates/tor-guardmgr/src/pending.rs
index 907240308..fbdce2ce7 100644
--- a/crates/tor-guardmgr/src/pending.rs
+++ b/crates/tor-guardmgr/src/pending.rs
@@ -164,6 +164,12 @@ impl GuardMonitor {
             .expect("GuardMonitor initialized with no sender")
             .send(daemon::Msg::Status(self.id, msg));
     }
+
+    /// Report the pending message for his guard, whatever it is.
+    pub fn commit(self) {
+        let status = self.pending_status;
+        self.report(status);
+    }
 }
 
 impl Drop for GuardMonitor {