circmgr: use AllGuardsDown to retry better

If all guards are down and they won't be retriable for a while, try
waiting that long to get whichever guard _is_ retriable.

Additionally, if we are making multiple circuit plans in parallel,
only report our planning as having failed if we failed at making
_all_ the plans.  Previously we treated any failure as fatal for the
other plans, which could lead to trouble in the case when guards
were all down or pending.

Part of #407.
This commit is contained in:
Nick Mathewson 2022-03-16 14:15:29 -04:00
parent eed1f06662
commit 451a53a5bf
2 changed files with 36 additions and 8 deletions

View File

@ -146,7 +146,7 @@ impl HasKind for Error {
E::Protocol(e) => e.kind(),
E::State(e) => e.kind(),
E::GuardMgr(e) => e.kind(),
E::Guard(_) => EK::NoPath,
E::Guard(e) => e.kind(),
E::ExpiredConsensus => EK::DirectoryExpired,
E::Spawn { cause, .. } => cause.kind(),
}

View File

@ -780,11 +780,27 @@ impl<B: AbstractCircBuilder + 'static, R: Runtime> AbstractCircMgr<B, R> {
}
}
Err(e) => {
// We couldn't pick the action! This is unusual; wait
// a little while before we try again.
// We couldn't pick the action!
info!("Couldn't pick action for circuit attempt {}: {}", n, &e);
let small_delay = Duration::from_millis(50);
let wait_for_action = match &e {
Error::Guard(tor_guardmgr::PickGuardError::AllGuardsDown {
retry_at: Some(instant),
}) => {
// If we failed because all guards are down, that's fine: we just wait until
// the next guard is retriable.
instant.saturating_duration_since(self.runtime.now()) + small_delay
}
_ => {
// Any other errors are pretty unusual; wait a little while, then try again.
small_delay
}
};
retry_err.push(e);
let wait_for_action = Duration::from_millis(50);
if remaining < wait_for_action {
// We can't wait long enough. Call this failed now.
break;
}
self.runtime
.sleep(std::cmp::min(remaining, wait_for_action))
.await;
@ -869,12 +885,24 @@ impl<B: AbstractCircBuilder + 'static, R: Runtime> AbstractCircMgr<B, R> {
// Okay, we need to launch circuits here.
let parallelism = std::cmp::max(1, self.builder.launch_parallelism(usage));
let mut plans = Vec::new();
let mut errs = Vec::new();
for _ in 0..parallelism {
let (pending, plan) = self.plan_by_usage(dir, usage)?;
list.add_pending_circ(pending);
plans.push(plan);
match self.plan_by_usage(dir, usage) {
Ok((pending, plan)) => {
list.add_pending_circ(pending);
plans.push(plan);
}
Err(e) => errs.push(e),
}
}
if !plans.is_empty() {
Ok(Action::Build(plans))
} else if let Some(last_err) = errs.pop() {
Err(last_err)
} else {
// we didn't even try to plan anything!
Err(internal!("no plans were built, but no errors were found").into())
}
Ok(Action::Build(plans))
}
/// Execute an action returned by pick-action, and return the