From 3f2e164bc5efe6356286f9df6655bd1435d9ef21 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Wed, 8 Jun 2022 14:53:05 +0100 Subject: [PATCH] tor-proto: padding: Test padding timer distribution --- Cargo.lock | 110 +++++++++++++++++++++ crates/tor-proto/Cargo.toml | 2 + crates/tor-proto/src/channel/padding.rs | 123 ++++++++++++++++++++++++ 3 files changed, 235 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index c988f85f1..b5e3cacdd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -63,6 +63,15 @@ version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08f9b8508dccb7687a1d6c4ce66b2b0ecef467c94667de27d8d7fe1f8d2a9cdc" +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + [[package]] name = "arrayref" version = "0.3.6" @@ -1801,6 +1810,15 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" +[[package]] +name = "matrixmultiply" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "add85d4dd35074e6fedc608f8c8f513a3548619a9024b751949ef0e8e45a4d84" +dependencies = [ + "rawpointer", +] + [[package]] name = "memchr" version = "2.5.0" @@ -1898,6 +1916,35 @@ dependencies = [ "ws2_32-sys", ] +[[package]] +name = "nalgebra" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "462fffe4002f4f2e1f6a9dcf12cc1a6fc0e15989014efc02a941d3e0f5dc2120" +dependencies = [ + "approx", + "matrixmultiply", + "nalgebra-macros", + "num-complex", + "num-rational", + "num-traits", + "rand 0.8.5", + "rand_distr", + "simba", + "typenum", +] + +[[package]] +name = "nalgebra-macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01fcc0b8149b4632adc89ac3b7b31a12fb6099a0317a4eb2ebff574ef7de7218" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "native-tls" version = "0.2.10" @@ -1995,6 +2042,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "num-complex" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fbc387afefefd5e9e39493299f3069e14a140dd34dc19b4c1c1a8fddb6a790" +dependencies = [ + "num-traits", +] + [[package]] name = "num-integer" version = "0.1.45" @@ -2016,6 +2072,17 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-rational" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d41702bd167c2df5520b384281bc111a4b5efcf7fbc4c9c222c815b07e0a6a6a" +dependencies = [ + "autocfg 1.1.0", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.15" @@ -2478,6 +2545,16 @@ dependencies = [ "getrandom 0.2.6", ] +[[package]] +name = "rand_distr" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "964d548f8e7d12e102ef183a0de7e98180c9f8729f555897a857b96e48122d2f" +dependencies = [ + "num-traits", + "rand 0.8.5", +] + [[package]] name = "rand_hc" version = "0.2.0" @@ -2487,6 +2564,12 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + [[package]] name = "redox_syscall" version = "0.2.13" @@ -2925,6 +3008,18 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f054c6c1a6e95179d6f23ed974060dcefb2d9388bb7256900badad682c499de4" +[[package]] +name = "simba" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e82063457853d00243beda9952e910b82593e4b07ae9f721b9278a99a0d3d5c" +dependencies = [ + "approx", + "num-complex", + "num-traits", + "paste", +] + [[package]] name = "simple_asn1" version = "0.6.1" @@ -2986,6 +3081,19 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "statrs" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05bdbb8e4e78216a85785a85d3ec3183144f98d0097b9281802c019bb07a6f05" +dependencies = [ + "approx", + "lazy_static", + "nalgebra", + "num-traits", + "rand 0.8.5", +] + [[package]] name = "strsim" version = "0.8.0" @@ -3721,9 +3829,11 @@ dependencies = [ "hex-literal", "hkdf", "hmac", + "itertools", "pin-project", "rand 0.8.5", "rand_core 0.6.3", + "statrs", "subtle", "thiserror", "tokio", diff --git a/crates/tor-proto/Cargo.toml b/crates/tor-proto/Cargo.toml index 09e206f31..7cd50b40d 100644 --- a/crates/tor-proto/Cargo.toml +++ b/crates/tor-proto/Cargo.toml @@ -54,5 +54,7 @@ zeroize = "1" [dev-dependencies] hex = "0.4" hex-literal = "0.3" +itertools = "0.10.1" +statrs = "0.15.0" tokio-crate = { package = "tokio", version = "1.7", features = ["full"] } tor-rtcompat = { path = "../tor-rtcompat", version = "0.4.0", features = ["tokio", "native-tls"] } diff --git a/crates/tor-proto/src/channel/padding.rs b/crates/tor-proto/src/channel/padding.rs index ddf6ae832..ed4eb7cf3 100644 --- a/crates/tor-proto/src/channel/padding.rs +++ b/crates/tor-proto/src/channel/padding.rs @@ -300,6 +300,8 @@ mod test { use super::*; use futures::future::ready; use futures::select_biased; + use itertools::{izip, Itertools}; + use statrs::distribution::ContinuousCDF; use tokio::pin; use tokio_crate as tokio; use tor_rtcompat::*; @@ -400,4 +402,125 @@ mod test { assert_eq! { false, timer.is_enabled() } }); } + + #[test] + fn timeout_distribution() { + // Test that the distribution of padding intervals is as we expect. This is not so + // straightforward. We need to deal with true randomness (since we can't plumb a + // testing RNG into the padding timer, and perhaps don't even *want* to make that a + // mockable interface). Measuring a distribution of random variables involves some + // statistics. + + // The overall approach is: + // Use a fixed (but nontrivial) low to high range + // Sample N times into n equal sized buckes + // Calculate the expected number of samples in each bucket + // Do a chi^2 test. If it doesn't spot a potential difference, declare OK. + // If the chi^2 test does definitely declare a difference, declare failure. + // Otherwise increase N and go round again. + // + // This allows most runs to be fast without having an appreciable possibility of a + // false test failure and while being able to detect even quite small deviations. + + // Notation from + // https://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test#Calculating_the_test-statistic + // I haven't done a formal power calculation but empirically + // this detects the following most of the time: + // deviation of the CDF power from B^2 to B^1.98 + // wrong minimum value by 25ms out of 12s, low_ms = min + 25 + // wrong maximum value by 10ms out of 12s, high_ms = max -1 - 10 + + #[allow(non_snake_case)] + let mut N = 100_0000; + + #[allow(non_upper_case_globals)] + const n: usize = 100; + + const P_GOOD: f64 = 0.05; // Investigate further 5% of times (if all is actually well) + const P_BAD: f64 = 1e-12; + + loop { + eprintln!("padding distribution test, n={} N={}", n, N); + + let min = 5000; + let max = 17000; // Exclusive + assert_eq!(0, (max - min) % (n as u32)); // buckets must match up to integer boundaries + + let cdf = (0..=n) + .into_iter() + .map(|bi| { + let b = (bi as f64) / (n as f64); + // expected distribution: + // with B = bi / n + // P(X) < B == B + // P(max(X1,X1)) < B = B^2 + b.powi(2) + }) + .collect_vec(); + + let pdf = cdf + .iter() + .cloned() + .tuple_windows() + .map(|(p, q)| q - p) + .collect_vec(); + let exp = pdf.iter().cloned().map(|p| p * f64::from(N)).collect_vec(); + + // chi-squared test only valid if every cell expects at lesat 5 + assert!(exp.iter().cloned().all(|ei| ei >= 5.)); + + let mut obs = [0_u32; n]; + + let params = Parameters { + low_ms: min, + high_ms: max - 1, // convert exclusive to inclusive + } + .prepare(); + + for _ in 0..N { + let xx = params.select_timeout(); + let ms = xx.as_millis(); + let ms = u32::try_from(ms).unwrap(); + assert!(ms >= min); + assert!(ms < max); + // Integer arithmetic ensures that we classify exactly + let bi = ((ms - min) * (n as u32)) / (max - min); + obs[bi as usize] += 1; + } + + let chi2 = izip!(&obs, &exp) + .map(|(&oi, &ei)| (f64::from(oi) - ei).powi(2) / ei) + .sum::(); + + // n degrees of freedom, one-tailed test + // (since distro parameters are all fixed, not estimated from the sample) + let chi2_distr = statrs::distribution::ChiSquared::new(n as _).unwrap(); + + // probability of good code generating a result at least this bad + let p = 1. - chi2_distr.cdf(chi2); + + eprintln!( + "padding distribution test, n={} N={} chi2={} p={}", + n, N, chi2, p + ); + + if p >= P_GOOD { + break; + } + + for (i, (&oi, &ei)) in izip!(&obs, &exp).enumerate() { + eprintln!("bi={:4} OI={:4} EI={}", i, oi, ei); + } + + if p < P_BAD { + panic!("distribution is wrong (p < {:e})", P_BAD); + } + + // This is statistically rather cheaty: we keep trying until we get a definite + // answer! But we radically increase the power of the test each time. + // If the distribution is really wrong, this test ought to find it soon enough, + // especially since we run this repeatedly in CI. + N *= 10; + } + } }