From cd2432474c8b274ed5de8ed3672b1d1a5fbd22e4 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Fri, 19 Aug 2022 16:09:34 +0100 Subject: [PATCH 01/20] tor-config sources: Remove some unneeded .to_string() from tests --- crates/tor-config/src/sources.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/tor-config/src/sources.rs b/crates/tor-config/src/sources.rs index 0a36364f4..aecc759b2 100644 --- a/crates/tor-config/src/sources.rs +++ b/crates/tor-config/src/sources.rs @@ -233,7 +233,7 @@ world = \"nonsense\" let c = load_nodefaults(&files, Default::default()).unwrap(); assert!(c.get_string("hello.friends").is_err()); - assert_eq!(c.get_string("hello.world").unwrap(), "nonsense".to_string()); + assert_eq!(c.get_string("hello.world").unwrap(), "nonsense"); } #[test] @@ -247,9 +247,9 @@ world = \"nonsense\" let v2 = vec!["other.var=present".to_string()]; let c = load_nodefaults(&v, &v2).unwrap(); - assert_eq!(c.get_string("hello.friends").unwrap(), "4242".to_string()); - assert_eq!(c.get_string("hello.world").unwrap(), "nonsense".to_string()); - assert_eq!(c.get_string("other.var").unwrap(), "present".to_string()); + assert_eq!(c.get_string("hello.friends").unwrap(), "4242"); + assert_eq!(c.get_string("hello.world").unwrap(), "nonsense"); + assert_eq!(c.get_string("other.var").unwrap(), "present"); } #[test] From 25b5a5395352cf5b916994ec006841b3d3abc956 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Fri, 19 Aug 2022 12:52:22 +0100 Subject: [PATCH 02/20] config: Do process hardening on reconfigure even if not watching These blocks were in the wrong order. Previously, if you tried to turn on process hardening in the config and then reloaded rather than restarting, it wouldn't take effect. --- crates/arti/semver.md | 2 +- crates/arti/src/watch_cfg.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/arti/semver.md b/crates/arti/semver.md index 8614f1725..277b2f093 100644 --- a/crates/arti/semver.md +++ b/crates/arti/semver.md @@ -1,4 +1,4 @@ BREAKING: Most functions are now only available behind an experimental-api feature. BREAKING: main_main() now takes a set of command-line arguments. - +FIXED: Bugfixes to configuration auto-reload arrangements. diff --git a/crates/arti/src/watch_cfg.rs b/crates/arti/src/watch_cfg.rs index 57bdac9ee..e078fcdec 100644 --- a/crates/arti/src/watch_cfg.rs +++ b/crates/arti/src/watch_cfg.rs @@ -98,15 +98,15 @@ fn reconfigure( } client.reconfigure(&client_config, Reconfigure::WarnOnFailures)?; - if !config.application().watch_configuration { - // Stop watching for configuration changes. - return Ok(true); - } if !config.application().permit_debugging { #[cfg(feature = "harden")] crate::process::enable_process_hardening()?; } + if !config.application().watch_configuration { + // Stop watching for configuration changes. + return Ok(true); + } Ok(false) } From 8e86599df4c396c4cced769ee9c5303d69406e66 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Fri, 19 Aug 2022 13:53:31 +0100 Subject: [PATCH 03/20] config watch: Make the mpsc channel part of FileWatcher The previous approach (inherited from the API of notify) was kind of odd. Soon we are going to want to be able to drop the watcher and replace it. That really wants the same object to contain all the things that ought to be dropped together. (notify's watchers stop generating events and give EOF on the channel, when dropped.) --- crates/arti/src/watch_cfg.rs | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/crates/arti/src/watch_cfg.rs b/crates/arti/src/watch_cfg.rs index e078fcdec..73b136c8e 100644 --- a/crates/arti/src/watch_cfg.rs +++ b/crates/arti/src/watch_cfg.rs @@ -27,8 +27,7 @@ pub(crate) fn watch_for_config_changes( original: ArtiConfig, client: TorClient, ) -> anyhow::Result<()> { - let (tx, rx) = std_channel(); - let mut watcher = FileWatcher::new(tx, POLL_INTERVAL)?; + let mut watcher = FileWatcher::new(POLL_INTERVAL)?; for file in sources.files() { watcher.watch_file(file)?; @@ -39,7 +38,7 @@ pub(crate) fn watch_for_config_changes( // `arti` application, we probably don't want to have this thread own // the FileWatcher. debug!("Waiting for FS events"); - while let Ok(event) = rx.recv() { + while let Ok(event) = watcher.rx().recv() { if !watcher.event_matched(&event) { // NOTE: Sadly, it's not safe to log in this case. If the user // has put a configuration file and a logfile in the same @@ -47,7 +46,7 @@ pub(crate) fn watch_for_config_changes( // every time we log, and fill up the filesystem. continue; } - while let Ok(_ignore) = rx.try_recv() { + while let Ok(_ignore) = watcher.rx().try_recv() { // Discard other events, so that we only reload once. // // We can afford to treat both error cases from try_recv [Empty @@ -114,6 +113,8 @@ fn reconfigure( /// directories in order to learn about changes in some specific files that they /// contain. /// +/// The wrapper contains the `Watcher` and also the channel for receiving events. +/// /// The `Watcher` implementation in `notify` has a weakness: it gives sensible /// results when you're watching directories, but if you start watching /// non-directory files, it won't notice when those files get replaced. That's @@ -129,6 +130,8 @@ fn reconfigure( /// to mess around with `std::sync::mpsc` and filter out the events they want /// using `FileWatcher::event_matched`. struct FileWatcher { + /// The channel we receive events on + rx: std::sync::mpsc::Receiver, /// An underlying `notify` watcher that tells us about directory changes. watcher: notify::RecommendedWatcher, /// The list of directories that we're currently watching. @@ -139,18 +142,22 @@ struct FileWatcher { impl FileWatcher { /// Like `notify::watcher`, but create a FileWatcher instead. - fn new( - tx: std::sync::mpsc::Sender, - interval: Duration, - ) -> anyhow::Result { + fn new(interval: Duration) -> anyhow::Result { + let (tx, rx) = std_channel(); let watcher = notify::watcher(tx, interval)?; Ok(Self { + rx, watcher, watching_dirs: HashSet::new(), watching_files: HashSet::new(), }) } + /// Access the channel - use for receiving events + fn rx(&self) -> &std::sync::mpsc::Receiver { + &self.rx + } + /// Watch a single file (not a directory). Does nothing if we're already watching that file. fn watch_file>(&mut self, path: P) -> anyhow::Result<()> { // Make the path absolute (without necessarily making it canonical). From 0f9bf12a7f5816e1517c91e3fcca081cfd6a5f6d Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Fri, 19 Aug 2022 14:12:04 +0100 Subject: [PATCH 04/20] config watch: Break out prepare_watcher This is going to become more complicated, and gain another call site. --- crates/arti/src/watch_cfg.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/crates/arti/src/watch_cfg.rs b/crates/arti/src/watch_cfg.rs index 73b136c8e..ba501f27c 100644 --- a/crates/arti/src/watch_cfg.rs +++ b/crates/arti/src/watch_cfg.rs @@ -17,6 +17,15 @@ use crate::{ArtiCombinedConfig, ArtiConfig}; /// How long (worst case) should we take to learn about configuration changes? const POLL_INTERVAL: Duration = Duration::from_secs(10); +/// Prepare a watcher for the configuration files +fn prepare_watcher(sources: &ConfigurationSources) -> anyhow::Result { + let mut watcher = FileWatcher::new(POLL_INTERVAL)?; + for file in sources.files() { + watcher.watch_file(file)?; + } + Ok(watcher) +} + /// Launch a thread to watch our configuration files. /// /// Whenever one or more files in `files` changes, try to reload our @@ -27,11 +36,7 @@ pub(crate) fn watch_for_config_changes( original: ArtiConfig, client: TorClient, ) -> anyhow::Result<()> { - let mut watcher = FileWatcher::new(POLL_INTERVAL)?; - - for file in sources.files() { - watcher.watch_file(file)?; - } + let watcher = prepare_watcher(&sources)?; std::thread::spawn(move || { // TODO: If someday we make this facility available outside of the From a7bb3a73b4dfb0e8e0f36994de3d31389d4997b9 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Fri, 19 Aug 2022 14:19:00 +0100 Subject: [PATCH 05/20] config watch: Rescan once on startup That way if the config changes after we read it initially, but before we set up the watcher, we will still pick it up. Fixes #544 --- crates/arti/src/watch_cfg.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/crates/arti/src/watch_cfg.rs b/crates/arti/src/watch_cfg.rs index ba501f27c..04968cb51 100644 --- a/crates/arti/src/watch_cfg.rs +++ b/crates/arti/src/watch_cfg.rs @@ -1,6 +1,7 @@ //! Code to watch configuration files for any changes. use std::collections::HashSet; +use std::iter; use std::path::{Path, PathBuf}; use std::sync::mpsc::channel as std_channel; use std::time::Duration; @@ -17,7 +18,7 @@ use crate::{ArtiCombinedConfig, ArtiConfig}; /// How long (worst case) should we take to learn about configuration changes? const POLL_INTERVAL: Duration = Duration::from_secs(10); -/// Prepare a watcher for the configuration files +/// Prepare a watcher for the configuration files (config must be read after this point) fn prepare_watcher(sources: &ConfigurationSources) -> anyhow::Result { let mut watcher = FileWatcher::new(POLL_INTERVAL)?; for file in sources.files() { @@ -38,12 +39,16 @@ pub(crate) fn watch_for_config_changes( ) -> anyhow::Result<()> { let watcher = prepare_watcher(&sources)?; + // If watching, we must reload the config once right away, because + // we have set up the watcher *after* loading it the first time. + let mut first_reload = iter::once(notify::DebouncedEvent::Rescan); + std::thread::spawn(move || { // TODO: If someday we make this facility available outside of the // `arti` application, we probably don't want to have this thread own // the FileWatcher. - debug!("Waiting for FS events"); - while let Ok(event) = watcher.rx().recv() { + debug!("Entering FS event loop"); + while let Some(event) = first_reload.next().or_else(|| watcher.rx().recv().ok()) { if !watcher.event_matched(&event) { // NOTE: Sadly, it's not safe to log in this case. If the user // has put a configuration file and a logfile in the same From 863c66159b395a127a47658ff4d2b8a8d521f3fb Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Fri, 19 Aug 2022 14:26:57 +0100 Subject: [PATCH 06/20] config watch: Re-establish watcher on each iteration This is going to be needed in a moment. --- crates/arti/src/watch_cfg.rs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/crates/arti/src/watch_cfg.rs b/crates/arti/src/watch_cfg.rs index 04968cb51..32b287a17 100644 --- a/crates/arti/src/watch_cfg.rs +++ b/crates/arti/src/watch_cfg.rs @@ -11,7 +11,7 @@ use arti_client::TorClient; use notify::Watcher; use tor_config::ConfigurationSources; use tor_rtcompat::Runtime; -use tracing::{debug, info, warn}; +use tracing::{debug, error, info, warn}; use crate::{ArtiCombinedConfig, ArtiConfig}; @@ -37,7 +37,7 @@ pub(crate) fn watch_for_config_changes( original: ArtiConfig, client: TorClient, ) -> anyhow::Result<()> { - let watcher = prepare_watcher(&sources)?; + let mut watcher = prepare_watcher(&sources)?; // If watching, we must reload the config once right away, because // we have set up the watcher *after* loading it the first time. @@ -65,6 +65,18 @@ pub(crate) fn watch_for_config_changes( // call recv() in the outer loop. } debug!("FS event {:?}: reloading configuration.", event); + + watcher = match prepare_watcher(&sources) { + Ok(y) => y, + Err(e) => { + error!( + "FS watch: failed to rescan config and re-establish watch: {}", + e + ); + break; + } + }; + match reconfigure(&sources, &original, &client) { Ok(exit) => { info!("Successfully reloaded configuration."); From 587fa5f4189033bdd67da6c8dd94686e80fe6032 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Fri, 19 Aug 2022 17:22:09 +0100 Subject: [PATCH 07/20] config watch: Provide watch_dir No call site just yet; that will come shortly. This requires a bit of reorganisation first. --- crates/arti/src/watch_cfg.rs | 45 +++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/crates/arti/src/watch_cfg.rs b/crates/arti/src/watch_cfg.rs index 32b287a17..f3dbc587e 100644 --- a/crates/arti/src/watch_cfg.rs +++ b/crates/arti/src/watch_cfg.rs @@ -180,8 +180,28 @@ impl FileWatcher { &self.rx } - /// Watch a single file (not a directory). Does nothing if we're already watching that file. + /// Watch a single file (not a directory). + /// + /// Idempotent: does nothing if we're already watching that file. fn watch_file>(&mut self, path: P) -> anyhow::Result<()> { + self.watch_just_parents(path.as_ref())?; + Ok(()) + } + + /// Watch a directory (but not any subdirs). + /// + /// Idempotent. + fn watch_dir>(&mut self, path: P) -> anyhow::Result<()> { + let path = self.watch_just_parents(path.as_ref())?; + self.watch_just_abs_dir(&path) + } + + /// Watch the parents of `path`. + /// + /// Returns the absolute path of `path`. + /// + /// Idempotent. + fn watch_just_parents(&mut self, path: &Path) -> anyhow::Result { // Make the path absolute (without necessarily making it canonical). // // We do this because `notify` reports all of its events in terms of @@ -189,7 +209,7 @@ impl FileWatcher { // relative path, we'd get reports about the absolute paths of the files // in that directory. let cwd = std::env::current_dir()?; - let path = cwd.join(path.as_ref()); + let path = cwd.join(path); debug_assert!(path.is_absolute()); // See what directory we should watch in order to watch this file. @@ -202,18 +222,27 @@ impl FileWatcher { None => path.as_ref(), }; - // Start watching this directory, if we're not already watching it. + self.watch_just_abs_dir(watch_target)?; + + // Note this file as one that we're watching, so that we can see changes + // to it later on. + self.watching_files.insert(path.clone()); + + Ok(path) + } + + /// Watch just this (absolute) directory. + /// + /// Does not watch any of the parents. + /// + /// Idempotent. + fn watch_just_abs_dir(&mut self, watch_target: &Path) -> anyhow::Result<()> { if !self.watching_dirs.contains(watch_target) { self.watcher .watch(watch_target, notify::RecursiveMode::NonRecursive)?; self.watching_dirs.insert(watch_target.into()); } - - // Note this file as one that we're watching, so that we can see changes - // to it later on. - self.watching_files.insert(path); - Ok(()) } From 2fa75be6606301d59ef5c48f5b7abdaefefd620e Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Thu, 18 Aug 2022 15:16:24 +0100 Subject: [PATCH 08/20] tor-basic-utils: Provide IoErrorExt is_not_a_directory() We're going to want this functionality, which isn't in the stable stdlib. --- Cargo.lock | 1 + crates/tor-basic-utils/Cargo.toml | 3 +++ crates/tor-basic-utils/semver.md | 1 + crates/tor-basic-utils/src/lib.rs | 27 +++++++++++++++++++++++++++ 4 files changed, 32 insertions(+) create mode 100644 crates/tor-basic-utils/semver.md diff --git a/Cargo.lock b/Cargo.lock index 8df231f42..4e71c84cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3395,6 +3395,7 @@ dependencies = [ "futures", "futures-await-test", "hex", + "libc", "pin-project", "postage", "rand 0.8.5", diff --git a/crates/tor-basic-utils/Cargo.toml b/crates/tor-basic-utils/Cargo.toml index eca4ed724..ee994b24f 100644 --- a/crates/tor-basic-utils/Cargo.toml +++ b/crates/tor-basic-utils/Cargo.toml @@ -22,6 +22,9 @@ rand_chacha = "0.3" thiserror = "1" void = "1" +[target.'cfg(unix)'.dependencies] +libc = { version = "0.2", default-features = false } + [dev-dependencies] derive_more = "0.99" educe = "0.4.6" diff --git a/crates/tor-basic-utils/semver.md b/crates/tor-basic-utils/semver.md new file mode 100644 index 000000000..c69e79300 --- /dev/null +++ b/crates/tor-basic-utils/semver.md @@ -0,0 +1 @@ +ADDED: IoErrorExt, with fn is_not_a_directory() diff --git a/crates/tor-basic-utils/src/lib.rs b/crates/tor-basic-utils/src/lib.rs index ee5fe9fe8..bf78f9dc0 100644 --- a/crates/tor-basic-utils/src/lib.rs +++ b/crates/tor-basic-utils/src/lib.rs @@ -83,6 +83,33 @@ pub fn skip_fmt(_: &T, f: &mut fmt::Formatter) -> fmt::Result { // ---------------------------------------------------------------------- +/// Implementation of `ErrorKind::NotADirectory` that doesn't require Nightly +pub trait IoErrorExt { + /// Is this `io::ErrorKind::NotADirectory` ? + fn is_not_a_directory(&self) -> bool; +} +impl IoErrorExt for std::io::Error { + fn is_not_a_directory(&self) -> bool { + self.raw_os_error() + == Some( + #[cfg(target_family = "unix")] + libc::ENOTDIR, + #[cfg(target_family = "windows")] + { + /// Obtained from Rust stdlib source code + /// See also: + /// + /// (although the documentation is anaemic) and + /// + const ERROR_DIRECTORY: i32 = 267; + ERROR_DIRECTORY + }, + ) + } +} + +// ---------------------------------------------------------------------- + /// Define an "accessor trait", which describes structs that have fields of certain types /// /// This can be useful if a large struct, living high up in the dependency graph, From 7d088cf8df3a2da569dae98e04cf9e1248d8d91d Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Fri, 19 Aug 2022 14:44:05 +0100 Subject: [PATCH 09/20] config sources: Introduce scan() and FoundConfigFiles We're going to need to do config file reading in two phases. Right now this isn't actually necessary, because the set of files is fixed since we don't support dynamically scanning directories. But the new API will be needed in a moment. Code motion and API changes, but no overall functional change. Review with `git show -b` may be helpful. The new API also provides for dealing with directories, but right now that doesn't happen. --- crates/arti/src/watch_cfg.rs | 36 +++++-- crates/tor-config/semver.md | 1 + crates/tor-config/src/sources.rs | 179 +++++++++++++++++++++++-------- 3 files changed, 160 insertions(+), 56 deletions(-) diff --git a/crates/arti/src/watch_cfg.rs b/crates/arti/src/watch_cfg.rs index f3dbc587e..3b526faaf 100644 --- a/crates/arti/src/watch_cfg.rs +++ b/crates/arti/src/watch_cfg.rs @@ -9,7 +9,7 @@ use std::time::Duration; use arti_client::config::Reconfigure; use arti_client::TorClient; use notify::Watcher; -use tor_config::ConfigurationSources; +use tor_config::{sources::FoundConfigFiles, ConfigurationSources}; use tor_rtcompat::Runtime; use tracing::{debug, error, info, warn}; @@ -18,13 +18,23 @@ use crate::{ArtiCombinedConfig, ArtiConfig}; /// How long (worst case) should we take to learn about configuration changes? const POLL_INTERVAL: Duration = Duration::from_secs(10); -/// Prepare a watcher for the configuration files (config must be read after this point) -fn prepare_watcher(sources: &ConfigurationSources) -> anyhow::Result { +/// Find the configuration files and prepare a watcher +/// +/// For the watching to be reliably effective (race-free), the config must be read +/// *after* this point, using the returned `FoundConfigFiles`. +fn prepare_watcher( + sources: &ConfigurationSources, +) -> anyhow::Result<(FileWatcher, FoundConfigFiles)> { let mut watcher = FileWatcher::new(POLL_INTERVAL)?; - for file in sources.files() { - watcher.watch_file(file)?; + let files = sources.scan()?; + for file in files.iter() { + if file.was_dir() { + watcher.watch_dir(file)?; + } else { + watcher.watch_file(file)?; + } } - Ok(watcher) + Ok((watcher, files)) } /// Launch a thread to watch our configuration files. @@ -37,10 +47,13 @@ pub(crate) fn watch_for_config_changes( original: ArtiConfig, client: TorClient, ) -> anyhow::Result<()> { - let mut watcher = prepare_watcher(&sources)?; + let (mut watcher, found_files) = prepare_watcher(&sources)?; // If watching, we must reload the config once right away, because // we have set up the watcher *after* loading it the first time. + // + // That means we safely drop the found_files without races, since we're going to rescan. + drop(found_files); let mut first_reload = iter::once(notify::DebouncedEvent::Rescan); std::thread::spawn(move || { @@ -66,7 +79,7 @@ pub(crate) fn watch_for_config_changes( } debug!("FS event {:?}: reloading configuration.", event); - watcher = match prepare_watcher(&sources) { + let (new_watcher, found_files) = match prepare_watcher(&sources) { Ok(y) => y, Err(e) => { error!( @@ -76,8 +89,9 @@ pub(crate) fn watch_for_config_changes( break; } }; + watcher = new_watcher; - match reconfigure(&sources, &original, &client) { + match reconfigure(found_files, &original, &client) { Ok(exit) => { info!("Successfully reloaded configuration."); if exit { @@ -102,11 +116,11 @@ pub(crate) fn watch_for_config_changes( /// /// Return true if we should stop watching for configuration changes. fn reconfigure( - sources: &ConfigurationSources, + found_files: FoundConfigFiles<'_>, original: &ArtiConfig, client: &TorClient, ) -> anyhow::Result { - let config = sources.load()?; + let config = found_files.load()?; let (config, client_config) = tor_config::resolve::(config)?; if config.proxy() != original.proxy() { warn!("Can't (yet) reconfigure proxy settings while arti is running."); diff --git a/crates/tor-config/semver.md b/crates/tor-config/semver.md index b3400b90e..fec42cdc2 100644 --- a/crates/tor-config/semver.md +++ b/crates/tor-config/semver.md @@ -1,3 +1,4 @@ ADDED: ReconfigureError::Bug enum variant ADDED: misc::PaddingLevel ADDED: resolve_option_general +ADDED: sources::FoundConfigFiles diff --git a/crates/tor-config/src/sources.rs b/crates/tor-config/src/sources.rs index aecc759b2..69df849c5 100644 --- a/crates/tor-config/src/sources.rs +++ b/crates/tor-config/src/sources.rs @@ -13,6 +13,14 @@ //! perhaps [`set_mistrust`](ConfigurationSources::set_mistrust), //! and finally [`load`](ConfigurationSources::load). //! The resulting [`config::Config`] can then be deserialized. +//! +//! If you want to watch for config file changes, +//! use [`ConfigurationSources::scan()`], +//! to obtain a [`FoundConfigFiles`], +//! start watching the paths returned by [`FoundConfigFiles::iter()`], +//! and then call [`FoundConfigFiles::load()`]. +//! (This ordering starts watching the files before you read them, +//! which is necessary to avoid possibly missing changes.) use crate::CmdLine; @@ -50,6 +58,50 @@ enum MustRead { MustRead, } +/// Configuration files we found in the filesystem +/// +/// Result of [`ConfigurationSources::scan`]. +/// +/// When loading configuration files and also watching for filesystem updates, +/// this type encapsulates all the actual filesystem objects that need watching. +#[derive(Debug)] +pub struct FoundConfigFiles<'srcs> { + /// The things we found + files: Vec, + + /// Our parent, which contains details we need for `load` + sources: &'srcs ConfigurationSources, +} + +/// A configuration source file, found or not found on the filesystem +#[derive(Debug, Clone)] +pub struct FoundConfigFile { + /// The path of the (putative) object + path: PathBuf, + + /// Were we expecting this to definitely exist + must_read: MustRead, +} + +impl FoundConfigFile { + /// Get the path + pub fn path(&self) -> &Path { + &self.path + } + + /// Was this a directory, when we found it ? + pub fn was_dir(&self) -> bool { + // TODO: we don't support directories right now + false + } +} + +impl AsRef for FoundConfigFile { + fn as_ref(&self) -> &Path { + self.path() + } +} + impl ConfigurationSources { /// Create a new empty [`ConfigurationSources`]. pub fn new_empty() -> Self { @@ -132,63 +184,89 @@ impl ConfigurationSources { &self.mistrust } - /// Return an iterator over the files that we care about. - pub fn files(&self) -> impl Iterator { - self.files.iter().map(|(f, _)| f.as_path()) + /// Scan for files and load the configuration into a new [`config::Config`]. + /// + /// This is a convenience method for [`scan()`](Self::scan) + /// followed by [`files.load`]. + pub fn load(&self) -> Result { + let files = self.scan()?; + files.load() } - /// Load the configuration into a new [`config::Config`]. - pub fn load(&self) -> Result { - let mut builder = config::Config::builder(); - builder = add_sources(builder, &self.mistrust, &self.files, &self.options)?; - builder.build() + /// Scan for configuration source files (including scanning any directories) + // + // Currently this does not actually look at the filesystem, but it will do. + pub fn scan(&self) -> Result { + let mut out = vec![]; + + for &(ref found, must_read) in &self.files { + out.push(FoundConfigFile { + path: found.clone(), + must_read, + }); + } + + Ok(FoundConfigFiles { + files: out, + sources: self, + }) } } -/// Add every file and commandline source to `builder`, returning a new -/// builder. -fn add_sources

( - mut builder: ConfigBuilder, - mistrust: &fs_mistrust::Mistrust, - files: &[(P, MustRead)], - opts: &[String], -) -> Result -where - P: AsRef, -{ - for (path, must_read) in files { - let required = must_read == &MustRead::MustRead; +impl FoundConfigFiles<'_> { + /// Iterate over the filesystem objects that the scan found + // + // This ought really to be `impl IntoIterator for &Self` but that's awkward without TAIT + pub fn iter(&self) -> impl Iterator { + self.files.iter() + } - match mistrust - .verifier() - .permit_readable() - .require_file() - .check(&path) - { - Ok(()) => {} - Err(fs_mistrust::Error::NotFound(_)) if !required => {} - Err(e) => return Err(ConfigError::Foreign(e.into())), + /// Add every file and commandline source to `builder`, returning a new + /// builder. + fn add_sources(self, mut builder: ConfigBuilder) -> Result { + for FoundConfigFile { path, must_read } in self.files { + let required = must_read == MustRead::MustRead; + + match self + .sources + .mistrust + .verifier() + .permit_readable() + .check(&path) + { + Ok(()) => {} + Err(fs_mistrust::Error::NotFound(_)) if !required => {} + Err(e) => return Err(ConfigError::Foreign(e.into())), + } + + // Not going to use File::with_name here, since it doesn't + // quite do what we want. + let f: config::File<_, _> = path.into(); + builder = builder.add_source(f.format(config::FileFormat::Toml).required(required)); } - // Not going to use File::with_name here, since it doesn't - // quite do what we want. - let f: config::File<_, _> = path.as_ref().into(); - builder = builder.add_source(f.format(config::FileFormat::Toml).required(required)); + let mut cmdline = CmdLine::new(); + for opt in &self.sources.options { + cmdline.push_toml_line(opt.clone()); + } + builder = builder.add_source(cmdline); + + Ok(builder) } - let mut cmdline = CmdLine::new(); - for opt in opts { - cmdline.push_toml_line(opt.clone()); + /// Load the configuration into a new [`config::Config`]. + pub fn load(self) -> Result { + let mut builder = config::Config::builder(); + builder = self.add_sources(builder)?; + builder.build() } - builder = builder.add_source(cmdline); - - Ok(builder) } #[cfg(test)] mod test { #![allow(clippy::unwrap_used)] use super::*; + use itertools::Itertools; use tempfile::tempdir; static EX_TOML: &str = " @@ -204,10 +282,17 @@ friends = 4242 opts: &[String], ) -> Result { let mistrust = fs_mistrust::Mistrust::new_dangerously_trust_everyone(); - - add_sources(config::Config::builder(), &mistrust, files, opts) - .unwrap() - .build() + let files = files + .iter() + .map(|(p, m)| (p.as_ref().to_owned(), *m)) + .collect_vec(); + let options = opts.iter().cloned().collect_vec(); + ConfigurationSources { + files, + options, + mistrust, + } + .load() } #[test] @@ -260,7 +345,11 @@ world = \"nonsense\" ["/family/yor.toml", "/family/anya.toml"], ["decade=1960", "snack=peanuts"], ); - let files: Vec<_> = sources.files().map(|path| path.to_str().unwrap()).collect(); + let files: Vec<_> = sources + .files + .iter() + .map(|file| file.0.to_str().unwrap()) + .collect(); assert_eq!(files, vec!["/family/yor.toml", "/family/anya.toml"]); assert_eq!(sources.files[0].1, MustRead::MustRead); assert_eq!( From 08767f59d8645d69db54c8c37fbbe0494a7b5df9 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Fri, 19 Aug 2022 19:21:41 +0100 Subject: [PATCH 10/20] config sources: Supporting reading directories --- crates/arti/src/watch_cfg.rs | 8 ++- crates/tor-config/Cargo.toml | 1 + crates/tor-config/src/sources.rs | 118 +++++++++++++++++++++++++++---- 3 files changed, 112 insertions(+), 15 deletions(-) diff --git a/crates/arti/src/watch_cfg.rs b/crates/arti/src/watch_cfg.rs index 3b526faaf..0f07b422e 100644 --- a/crates/arti/src/watch_cfg.rs +++ b/crates/arti/src/watch_cfg.rs @@ -263,16 +263,18 @@ impl FileWatcher { /// Return true if the provided event describes a change affecting one of /// the files that we care about. fn event_matched(&self, event: ¬ify::DebouncedEvent) -> bool { - let watching = |f| self.watching_files.contains(f); + let watching = |f: &_| self.watching_files.contains(f); + let watching_or_parent = + |f: &Path| watching(f) || f.parent().map(watching).unwrap_or(false); match event { notify::DebouncedEvent::NoticeWrite(f) => watching(f), notify::DebouncedEvent::NoticeRemove(f) => watching(f), - notify::DebouncedEvent::Create(f) => watching(f), + notify::DebouncedEvent::Create(f) => watching_or_parent(f), notify::DebouncedEvent::Write(f) => watching(f), notify::DebouncedEvent::Chmod(f) => watching(f), notify::DebouncedEvent::Remove(f) => watching(f), - notify::DebouncedEvent::Rename(f1, f2) => watching(f1) || watching(f2), + notify::DebouncedEvent::Rename(f1, f2) => watching(f1) || watching_or_parent(f2), notify::DebouncedEvent::Rescan => { // We've missed some events: no choice but to reload. true diff --git a/crates/tor-config/Cargo.toml b/crates/tor-config/Cargo.toml index 511d3d19e..f6684f3d4 100644 --- a/crates/tor-config/Cargo.toml +++ b/crates/tor-config/Cargo.toml @@ -16,6 +16,7 @@ default = ["expand-paths"] expand-paths = ["shellexpand", "directories"] [dependencies] +anyhow = "1.0.23" config = { version = "0.13", default-features = false, features = ["toml"] } derive_builder = { version = "0.11.2", package = "derive_builder_fork_arti" } directories = { version = "4", optional = true } diff --git a/crates/tor-config/src/sources.rs b/crates/tor-config/src/sources.rs index 69df849c5..5ae505c06 100644 --- a/crates/tor-config/src/sources.rs +++ b/crates/tor-config/src/sources.rs @@ -22,9 +22,12 @@ //! (This ordering starts watching the files before you read them, //! which is necessary to avoid possibly missing changes.) +use std::{fs, io}; + use crate::CmdLine; use config::ConfigError; +use tor_basic_utils::IoErrorExt as _; /// The synchronous configuration builder type we use. /// @@ -58,7 +61,7 @@ enum MustRead { MustRead, } -/// Configuration files we found in the filesystem +/// Configuration files and directories we found in the filesystem /// /// Result of [`ConfigurationSources::scan`]. /// @@ -67,13 +70,21 @@ enum MustRead { #[derive(Debug)] pub struct FoundConfigFiles<'srcs> { /// The things we found + /// + /// This includes both: + /// * Files which ought to be read + /// * Directories, which may or may not contain any currently-relevant files + /// + /// The directories are retained for the purpose of watching for config changes: + /// we will want to detect files being created within them, + /// so our caller needs to discover them (via [`FoundConfigFiles::iter()`]). files: Vec, /// Our parent, which contains details we need for `load` sources: &'srcs ConfigurationSources, } -/// A configuration source file, found or not found on the filesystem +/// A configuration source file or directory, found or not found on the filesystem #[derive(Debug, Clone)] pub struct FoundConfigFile { /// The path of the (putative) object @@ -81,6 +92,18 @@ pub struct FoundConfigFile { /// Were we expecting this to definitely exist must_read: MustRead, + + /// What happened when we looked for it + ty: FoundType, +} + +/// Was this filesystem object a file or a directory? +#[derive(Debug, Copy, Clone)] +enum FoundType { + /// File + File, + /// Directory + Dir, } impl FoundConfigFile { @@ -91,8 +114,10 @@ impl FoundConfigFile { /// Was this a directory, when we found it ? pub fn was_dir(&self) -> bool { - // TODO: we don't support directories right now - false + match self.ty { + FoundType::Dir => true, + FoundType::File => false, + } } } @@ -112,13 +137,17 @@ impl ConfigurationSources { /// /// The caller should have parsed the program's command line, and extracted (inter alia) /// - /// * `config_files_options`: Paths of config file(s) + /// * `config_files_options`: Paths of config file(s) (or directories of `.toml` files) /// * `cmdline_toml_override_options`: Overrides ("key=value") /// /// The caller should also provide `default_config_file`, the default location of the /// configuration file. This is used if no file(s) are specified on the command line. /// /// `mistrust` is used to check whether the configuration files have appropriate permissions. + /// + /// Configuration file locations that turn out to be directories, + /// will be scanned for files whose name ends in `.toml`. + /// All those files (if any) will be read (in lexical order by filename). pub fn from_cmdline( default_config_file: impl Into, config_files_options: impl IntoIterator, @@ -194,16 +223,71 @@ impl ConfigurationSources { } /// Scan for configuration source files (including scanning any directories) - // - // Currently this does not actually look at the filesystem, but it will do. pub fn scan(&self) -> Result { let mut out = vec![]; for &(ref found, must_read) in &self.files { - out.push(FoundConfigFile { - path: found.clone(), - must_read, - }); + let required = must_read == MustRead::MustRead; + + // Returns Err(error) if we shuold bail, + // or Ok(()) if we should ignore the error and skip the file. + let handle_io_error = |e: io::Error| { + if e.kind() == io::ErrorKind::NotFound && !required { + Ok(()) + } else { + Err(ConfigError::Foreign( + anyhow::anyhow!(format!( + "unable to access config path: {:?}: {}", + &found, e + )) + .into(), + )) + } + }; + + match fs::read_dir(&found) { + Ok(dir) => { + out.push(FoundConfigFile { + path: found.clone(), + must_read, + ty: FoundType::Dir, + }); + // Rebinding `found` avoids using the directory name by mistake. + let mut entries = vec![]; + for found in dir { + // reuse map_io_err, which embeds the directory name, + // since if we have Err we don't have an entry name. + let found = match found { + Ok(y) => y, + Err(e) => { + handle_io_error(e)?; + continue; + } + }; + let leaf = found.file_name(); + let leaf: &Path = leaf.as_ref(); + match leaf.extension() { + Some(e) if e == "toml" => {} + _ => continue, + } + entries.push(found.path()); + } + entries.sort(); + out.extend(entries.into_iter().map(|path| FoundConfigFile { + path, + must_read: MustRead::TolerateAbsence, + ty: FoundType::File, + })); + } + Err(e) if e.is_not_a_directory() => { + out.push(FoundConfigFile { + path: found.clone(), + must_read, + ty: FoundType::File, + }); + } + Err(e) => handle_io_error(e)?, + } } Ok(FoundConfigFiles { @@ -224,9 +308,19 @@ impl FoundConfigFiles<'_> { /// Add every file and commandline source to `builder`, returning a new /// builder. fn add_sources(self, mut builder: ConfigBuilder) -> Result { - for FoundConfigFile { path, must_read } in self.files { + for FoundConfigFile { + path, + must_read, + ty, + } in self.files + { let required = must_read == MustRead::MustRead; + match ty { + FoundType::File => {} + FoundType::Dir => continue, + } + match self .sources .mistrust From e4fea3e1ea833abe2b4786f4564fb5694ce5bd09 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Fri, 19 Aug 2022 15:42:53 +0100 Subject: [PATCH 11/20] config sources tests: Introduce test of reading directory --- crates/tor-config/src/sources.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/crates/tor-config/src/sources.rs b/crates/tor-config/src/sources.rs index 5ae505c06..4aa3d742b 100644 --- a/crates/tor-config/src/sources.rs +++ b/crates/tor-config/src/sources.rs @@ -415,6 +415,35 @@ world = \"nonsense\" assert_eq!(c.get_string("hello.world").unwrap(), "nonsense"); } + #[test] + fn dir_with_some() { + let td = tempdir().unwrap(); + let cf = td.path().join("1.toml"); + let d = td.path().join("extra.d"); + let df = d.join("2.toml"); + let xd = td.path().join("nonexistent.d"); + std::fs::create_dir(&d).unwrap(); + std::fs::write(&cf, EX_TOML).unwrap(); + std::fs::write(&df, EX2_TOML).unwrap(); + std::fs::write(d.join("not-toml"), "SYNTAX ERROR").unwrap(); + + let files = vec![ + (cf, MustRead::MustRead), + (d, MustRead::MustRead), + (xd.clone(), MustRead::TolerateAbsence), + ]; + let c = load_nodefaults(&files, Default::default()).unwrap(); + + assert_eq!(c.get_string("hello.friends").unwrap(), "4242"); + assert_eq!(c.get_string("hello.world").unwrap(), "nonsense"); + + let files = vec![(xd, MustRead::MustRead)]; + let e = load_nodefaults(&files, Default::default()) + .unwrap_err() + .to_string(); + assert!(dbg!(e).contains("nonexistent.d")); + } + #[test] fn load_two_files_with_cmdline() { let td = tempdir().unwrap(); From b700816eefa0f3920f6d1719184f52d69d2b64fc Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Fri, 19 Aug 2022 19:24:03 +0100 Subject: [PATCH 12/20] config sources tests: Break out sources_nodefaults --- crates/tor-config/src/sources.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/crates/tor-config/src/sources.rs b/crates/tor-config/src/sources.rs index 4aa3d742b..eb8c9e157 100644 --- a/crates/tor-config/src/sources.rs +++ b/crates/tor-config/src/sources.rs @@ -369,12 +369,11 @@ world = \"stuff\" friends = 4242 "; - /// Load from a set of files and option strings, without taking - /// the arti defaults into account. - fn load_nodefaults>( + /// Make a ConfigurationSources (that doesn't include the arti defaults) + fn sources_nodefaults>( files: &[(P, MustRead)], opts: &[String], - ) -> Result { + ) -> ConfigurationSources { let mistrust = fs_mistrust::Mistrust::new_dangerously_trust_everyone(); let files = files .iter() @@ -386,7 +385,15 @@ friends = 4242 options, mistrust, } - .load() + } + + /// Load from a set of files and option strings, without taking + /// the arti defaults into account. + fn load_nodefaults>( + files: &[(P, MustRead)], + opts: &[String], + ) -> Result { + sources_nodefaults(files, opts).load() } #[test] From ba94c4a4fab655a385fec96cc2a1041725af7806 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Fri, 19 Aug 2022 16:32:17 +0100 Subject: [PATCH 13/20] config sources tests: Test results of directory scan --- crates/tor-config/src/sources.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/crates/tor-config/src/sources.rs b/crates/tor-config/src/sources.rs index eb8c9e157..cb3033634 100644 --- a/crates/tor-config/src/sources.rs +++ b/crates/tor-config/src/sources.rs @@ -439,7 +439,18 @@ world = \"nonsense\" (d, MustRead::MustRead), (xd.clone(), MustRead::TolerateAbsence), ]; - let c = load_nodefaults(&files, Default::default()).unwrap(); + let c = sources_nodefaults(&files, Default::default()); + let found = c.scan().unwrap(); + + assert_eq!( + found + .iter() + .map(|p| p.path().strip_prefix(&td).unwrap().to_str().unwrap()) + .collect_vec(), + &["1.toml", "extra.d", "extra.d/2.toml"] + ); + + let c = found.load().unwrap(); assert_eq!(c.get_string("hello.friends").unwrap(), "4242"); assert_eq!(c.get_string("hello.world").unwrap(), "nonsense"); From 7d8b3e2f2fbd1bc8e479978eabb240739df4c1f2 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Fri, 19 Aug 2022 19:24:38 +0100 Subject: [PATCH 14/20] config sources: Read arti.d as well as arti.toml Fixes #474 aka #271 --- crates/arti-client/src/config.rs | 15 ++++++++++----- crates/arti/src/lib.rs | 6 +++--- crates/tor-config/src/sources.rs | 18 ++++++++++-------- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/crates/arti-client/src/config.rs b/crates/arti-client/src/config.rs index f838a5f0c..30846dbd2 100644 --- a/crates/arti-client/src/config.rs +++ b/crates/arti-client/src/config.rs @@ -378,9 +378,12 @@ impl TorClientConfigBuilder { } } -/// Return a filename for the default user configuration file. -pub fn default_config_file() -> Result { - CfgPath::new("${ARTI_CONFIG}/arti.toml".into()).path() +/// Return the filenames for the default user configuration files +pub fn default_config_files() -> Result, CfgPathError> { + ["${ARTI_CONFIG}/arti.toml", "${ARTI_CONFIG}/arti.d"] + .into_iter() + .map(|f| CfgPath::new(f.into()).path()) + .collect() } /// The environment variable we look at when deciding whether to disable FS permissions checking. @@ -461,7 +464,9 @@ mod test { // We don't want to second-guess the directories crate too much // here, so we'll just make sure it does _something_ plausible. - let dflt = default_config_file().unwrap(); - assert!(dflt.ends_with("arti.toml")); + let dflt = default_config_files().unwrap(); + assert!(dflt[0].ends_with("arti.toml")); + assert!(dflt[1].ends_with("arti.d")); + assert_eq!(dflt.len(), 2); } } diff --git a/crates/arti/src/lib.rs b/crates/arti/src/lib.rs index ef6dcebc6..b454c61cf 100644 --- a/crates/arti/src/lib.rs +++ b/crates/arti/src/lib.rs @@ -194,7 +194,7 @@ pub use cfg::{ }; pub use logging::{LoggingConfig, LoggingConfigBuilder}; -use arti_client::config::default_config_file; +use arti_client::config::default_config_files; use arti_client::{TorClient, TorClientConfig}; use safelog::with_safe_logging_suppressed; use tor_config::ConfigurationSources; @@ -334,7 +334,7 @@ where // correct behavior is different depending on whether the filename is given // explicitly or not. let mut config_file_help = "Specify which config file(s) to read.".to_string(); - if let Ok(default) = default_config_file() { + if let Ok(default) = default_config_files() { // If we couldn't resolve the default config file, then too bad. If something // actually tries to use it, it will produce an error, but don't fail here // just for that reason. @@ -468,7 +468,7 @@ where let cfg_sources = { let mut cfg_sources = ConfigurationSources::from_cmdline( - default_config_file()?, + default_config_files()?, matches.values_of_os("config-files").unwrap_or_default(), override_options, ); diff --git a/crates/tor-config/src/sources.rs b/crates/tor-config/src/sources.rs index cb3033634..4f0accd97 100644 --- a/crates/tor-config/src/sources.rs +++ b/crates/tor-config/src/sources.rs @@ -140,20 +140,21 @@ impl ConfigurationSources { /// * `config_files_options`: Paths of config file(s) (or directories of `.toml` files) /// * `cmdline_toml_override_options`: Overrides ("key=value") /// - /// The caller should also provide `default_config_file`, the default location of the - /// configuration file. This is used if no file(s) are specified on the command line. + /// The caller should also provide `default_config_files`, the default locations of the + /// configuration files. This is used if no file(s) are specified on the command line. /// /// `mistrust` is used to check whether the configuration files have appropriate permissions. /// /// Configuration file locations that turn out to be directories, /// will be scanned for files whose name ends in `.toml`. /// All those files (if any) will be read (in lexical order by filename). - pub fn from_cmdline( - default_config_file: impl Into, + pub fn from_cmdline( + default_config_files: impl IntoIterator, config_files_options: impl IntoIterator, cmdline_toml_override_options: impl IntoIterator, ) -> Self where + P: Into, F: Into, O: Into, { @@ -166,8 +167,9 @@ impl ConfigurationSources { any_files = true; } if !any_files { - let default = default_config_file.into(); - cfg_sources.push_optional_file(default); + for default in default_config_files { + cfg_sources.push_optional_file(default.into()); + } } for s in cmdline_toml_override_options { @@ -482,7 +484,7 @@ world = \"nonsense\" fn from_cmdline() { // Try one with specified files let sources = ConfigurationSources::from_cmdline( - "/etc/loid.toml", + ["/etc/loid.toml"], ["/family/yor.toml", "/family/anya.toml"], ["decade=1960", "snack=peanuts"], ); @@ -500,7 +502,7 @@ world = \"nonsense\" // Try once with default only. let sources = ConfigurationSources::from_cmdline( - "/etc/loid.toml", + ["/etc/loid.toml"], Vec::::new(), ["decade=1960", "snack=peanuts"], ); From 7c0637ad441bd48eaffa5a61cbd8e9c4a8d56647 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Fri, 19 Aug 2022 19:22:35 +0100 Subject: [PATCH 15/20] config watch: Fix and reduce debounce interval The parameter to FileWatcher::new is not a polling time fallback; it is a "debounce time". Events are always delayed by at least this much. 10s is much too long for this. 1s is more appropriate. --- crates/arti-client/semver.md | 2 ++ crates/arti/src/watch_cfg.rs | 6 +++--- crates/tor-config/semver.md | 1 + 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/crates/arti-client/semver.md b/crates/arti-client/semver.md index ecec63212..8bf6b0566 100644 --- a/crates/arti-client/semver.md +++ b/crates/arti-client/semver.md @@ -1 +1,3 @@ ADDED: `channel` configuration, and `channnel.padding` config variable. +ADDED: default_config_files: returns a Vec; return value includes an `arti.d` directory +BREAKING: default_config_file: removed diff --git a/crates/arti/src/watch_cfg.rs b/crates/arti/src/watch_cfg.rs index 0f07b422e..1bcfa8d93 100644 --- a/crates/arti/src/watch_cfg.rs +++ b/crates/arti/src/watch_cfg.rs @@ -15,8 +15,8 @@ use tracing::{debug, error, info, warn}; use crate::{ArtiCombinedConfig, ArtiConfig}; -/// How long (worst case) should we take to learn about configuration changes? -const POLL_INTERVAL: Duration = Duration::from_secs(10); +/// How long to wait after a file is created, before we try to read it. +const DEBOUNCE_INTERVAL: Duration = Duration::from_secs(1); /// Find the configuration files and prepare a watcher /// @@ -25,7 +25,7 @@ const POLL_INTERVAL: Duration = Duration::from_secs(10); fn prepare_watcher( sources: &ConfigurationSources, ) -> anyhow::Result<(FileWatcher, FoundConfigFiles)> { - let mut watcher = FileWatcher::new(POLL_INTERVAL)?; + let mut watcher = FileWatcher::new(DEBOUNCE_INTERVAL)?; let files = sources.scan()?; for file in files.iter() { if file.was_dir() { diff --git a/crates/tor-config/semver.md b/crates/tor-config/semver.md index fec42cdc2..1f0e9891c 100644 --- a/crates/tor-config/semver.md +++ b/crates/tor-config/semver.md @@ -2,3 +2,4 @@ ADDED: ReconfigureError::Bug enum variant ADDED: misc::PaddingLevel ADDED: resolve_option_general ADDED: sources::FoundConfigFiles +BREAKING: ConfigurationSources::from_cmdline wants an iterator of default config locations From a3005d8c0a801deafcc574dde4c5c47797097e62 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Wed, 24 Aug 2022 18:58:21 +0100 Subject: [PATCH 16/20] tor-config: MustRead: Make public I think this ought to be exhaustive. --- crates/tor-config/src/sources.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/tor-config/src/sources.rs b/crates/tor-config/src/sources.rs index 4f0accd97..2ba8b199b 100644 --- a/crates/tor-config/src/sources.rs +++ b/crates/tor-config/src/sources.rs @@ -53,7 +53,8 @@ pub struct ConfigurationSources { /// aren't present. Others (like those specified on the command line) really /// need to be there. #[derive(Clone, Debug, Copy, Eq, PartialEq)] -enum MustRead { +#[allow(clippy::exhaustive_enums)] +pub enum MustRead { /// This file is okay to skip if it isn't present, TolerateAbsence, From e98bdf6004c0064ba9597740ce4251c248aa16e8 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Wed, 24 Aug 2022 18:56:02 +0100 Subject: [PATCH 17/20] tor-config: Provide is_syntactically_directory helper function --- crates/tor-config/src/sources.rs | 48 ++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/crates/tor-config/src/sources.rs b/crates/tor-config/src/sources.rs index 2ba8b199b..2ec9a6a09 100644 --- a/crates/tor-config/src/sources.rs +++ b/crates/tor-config/src/sources.rs @@ -359,6 +359,32 @@ impl FoundConfigFiles<'_> { } } +/// Does it end in a slash? (Or some other way of saying this is a directory.) +fn is_syntactically_directory(p: &Path) -> bool { + use std::path::Component as PC; + + match p.components().rev().next() { + None => false, + Some(PC::Prefix(_)) | Some(PC::RootDir) | Some(PC::CurDir) | Some(PC::ParentDir) => true, + Some(PC::Normal(_)) => { + // Does it end in a slash? + let l = p.components().count(); + + // stdlib doesn't let us tell if the thing ends in a path separator. + // components() normalises, so doesn't give us an empty component + // But, if it ends in a path separator, adding a path component char will + // mean adding a component. + // This will work regardless of the path separator, on any platform where + // paths naming directories are like those for files. + // It would even work on some others, eg VMS. + let mut appended = OsString::from(p); + appended.push("a"); + let l2 = PathBuf::from(appended).components().count(); + l2 != l + } + } +} + #[cfg(test)] mod test { #![allow(clippy::unwrap_used)] @@ -512,4 +538,26 @@ world = \"nonsense\" &vec![("/etc/loid.toml".into(), MustRead::TolerateAbsence)] ); } + + #[test] + fn dir_syntax() { + let chk = |tf, s: &str| assert_eq!(tf, is_syntactically_directory(s.as_ref()), "{:?}", s); + + chk(false, ""); + chk(false, "1"); + chk(false, "1/2"); + chk(false, "/1"); + chk(false, "/1/2"); + + chk(true, "/"); + chk(true, "."); + chk(true, "./"); + chk(true, ".."); + chk(true, "../"); + chk(true, "/"); + chk(true, "1/"); + chk(true, "1/2/"); + chk(true, "/1/"); + chk(true, "/1/2/"); + } } From 9c00ec7da432a6ecd434fcb5227f50fc349c7162 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Wed, 24 Aug 2022 18:56:27 +0100 Subject: [PATCH 18/20] tor-config: Replace dir detection with ConfigurationSource enum As per https://gitlab.torproject.org/tpo/core/arti/-/merge_requests/682#note_2830860 And subsequent IRC discussion. Having done the work as per review comments, I don't much like the result. It's quite un-ergonomiuc. If we can't have fs autodetection, I think syntactic autodetection within sources.rs would be nearly as nice. However, I seem to be outvoted. At least the externally visible functionality (of an arti binary, say) is reasonably ergonomic. --- crates/arti-bench/src/main.rs | 9 ++- crates/arti-client/src/config.rs | 15 ++-- crates/arti-testing/src/config.rs | 9 ++- crates/tor-config/src/lib.rs | 2 +- crates/tor-config/src/sources.rs | 110 ++++++++++++++++++++++-------- 5 files changed, 104 insertions(+), 41 deletions(-) diff --git a/crates/arti-bench/src/main.rs b/crates/arti-bench/src/main.rs index 09d09d54f..5522a6507 100644 --- a/crates/arti-bench/src/main.rs +++ b/crates/arti-bench/src/main.rs @@ -63,7 +63,7 @@ use std::thread::JoinHandle; use std::time::SystemTime; use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; use tokio_socks::tcp::Socks5Stream; -use tor_config::ConfigurationSources; +use tor_config::{ConfigurationSource, ConfigurationSources}; use tor_rtcompat::Runtime; use tracing::info; @@ -366,7 +366,12 @@ fn main() -> Result<()> { matches .values_of_os("arti-config") .unwrap_or_default() - .for_each(|f| config_sources.push_file(f)); + .for_each(|f| { + config_sources.push_source( + ConfigurationSource::from_path(f), + tor_config::sources::MustRead::MustRead, + ); + }); // TODO really we ought to get this from the arti configuration, or something. // But this is OK for now since we are a benchmarking tool. diff --git a/crates/arti-client/src/config.rs b/crates/arti-client/src/config.rs index 30846dbd2..a3cd89b75 100644 --- a/crates/arti-client/src/config.rs +++ b/crates/arti-client/src/config.rs @@ -21,7 +21,7 @@ use std::path::PathBuf; use std::time::Duration; pub use tor_chanmgr::{ChannelConfig, ChannelConfigBuilder}; pub use tor_config::impl_standard_builder; -pub use tor_config::{CfgPath, CfgPathError, ConfigBuildError, Reconfigure}; +pub use tor_config::{CfgPath, CfgPathError, ConfigBuildError, ConfigurationSource, Reconfigure}; /// Types for configuring how Tor circuits are built. pub mod circ { @@ -379,10 +379,13 @@ impl TorClientConfigBuilder { } /// Return the filenames for the default user configuration files -pub fn default_config_files() -> Result, CfgPathError> { - ["${ARTI_CONFIG}/arti.toml", "${ARTI_CONFIG}/arti.d"] +pub fn default_config_files() -> Result, CfgPathError> { + ["${ARTI_CONFIG}/arti.toml", "${ARTI_CONFIG}/arti.d/"] .into_iter() - .map(|f| CfgPath::new(f.into()).path()) + .map(|f| { + let path = CfgPath::new(f.into()).path()?; + Ok(ConfigurationSource::from_path(path)) + }) .collect() } @@ -465,8 +468,8 @@ mod test { // here, so we'll just make sure it does _something_ plausible. let dflt = default_config_files().unwrap(); - assert!(dflt[0].ends_with("arti.toml")); - assert!(dflt[1].ends_with("arti.d")); + assert!(dflt[0].as_path().ends_with("arti.toml")); + assert!(dflt[1].as_path().ends_with("arti.d")); assert_eq!(dflt.len(), 2); } } diff --git a/crates/arti-testing/src/config.rs b/crates/arti-testing/src/config.rs index 5f36d173e..18632030c 100644 --- a/crates/arti-testing/src/config.rs +++ b/crates/arti-testing/src/config.rs @@ -8,7 +8,7 @@ use clap::{App, AppSettings, Arg, SubCommand}; use std::str::FromStr; use std::time::Duration; -use tor_config::ConfigurationSources; +use tor_config::{ConfigurationSource, ConfigurationSources}; /// Helper: parse an optional string as a number of seconds. fn int_str_to_secs(s: Option<&str>) -> Result> { @@ -134,7 +134,12 @@ pub(crate) fn parse_cmdline() -> Result { // Maybe change this later on if we decide it's silly. return Err(anyhow!("Sorry, you need to give me a configuration file.")); } else { - config_files.for_each(|f| cfg_sources.push_file(f)); + config_files.for_each(|f| { + cfg_sources.push_source( + ConfigurationSource::from_path(f), + tor_config::sources::MustRead::MustRead, + ); + }); } matches diff --git a/crates/tor-config/src/lib.rs b/crates/tor-config/src/lib.rs index 926abd46e..ceacc9900 100644 --- a/crates/tor-config/src/lib.rs +++ b/crates/tor-config/src/lib.rs @@ -112,7 +112,7 @@ pub use mut_cfg::MutCfg; pub use paste::paste; pub use path::{CfgPath, CfgPathError}; pub use serde; -pub use sources::ConfigurationSources; +pub use sources::{ConfigurationSource, ConfigurationSources}; pub use tor_basic_utils::macro_first_nonempty; diff --git a/crates/tor-config/src/sources.rs b/crates/tor-config/src/sources.rs index 2ec9a6a09..cae59081c 100644 --- a/crates/tor-config/src/sources.rs +++ b/crates/tor-config/src/sources.rs @@ -22,12 +22,12 @@ //! (This ordering starts watching the files before you read them, //! which is necessary to avoid possibly missing changes.) +use std::ffi::OsString; use std::{fs, io}; use crate::CmdLine; use config::ConfigError; -use tor_basic_utils::IoErrorExt as _; /// The synchronous configuration builder type we use. /// @@ -40,7 +40,7 @@ use std::path::{Path, PathBuf}; #[derive(Clone, Debug, Default)] pub struct ConfigurationSources { /// List of files to read (in order). - files: Vec<(PathBuf, MustRead)>, + files: Vec<(ConfigurationSource, MustRead)>, /// A list of command-line options to apply after parsing the files. options: Vec, /// We will check all files we read @@ -62,6 +62,52 @@ pub enum MustRead { MustRead, } +/// A configuration file or directory, for use by a `ConfigurationSources` +/// +/// You can make one out of a `PathBuf`, examining its syntax like `arti` does, +/// using `ConfigurationSource::from_path`. +#[derive(Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)] +#[allow(clippy::exhaustive_enums)] // Callers will need to understand this +pub enum ConfigurationSource { + /// A plain file + File(PathBuf), + + /// A directory + Dir(PathBuf), +} + +impl ConfigurationSource { + /// Interpret a path (or string) as a configuration file or directory spec + /// + /// If the path syntactically specifies a directory + /// (i.e., can be seen to be a directory without accessing the filesystem, + /// for example because it ends in a directory separator such as `/`) + /// it is treated as specifying a directory. + pub fn from_path>(p: P) -> ConfigurationSource { + use ConfigurationSource as CS; + let p = p.into(); + if is_syntactically_directory(&p) { + CS::Dir(p) + } else { + CS::File(p) + } + } + + /// Return a reference to the inner `Path` + pub fn as_path(&self) -> &Path { + self.as_ref() + } +} + +impl AsRef for ConfigurationSource { + fn as_ref(&self) -> &PathBuf { + use ConfigurationSource as CS; + match self { + CS::File(p) | CS::Dir(p) => p, + } + } +} + /// Configuration files and directories we found in the filesystem /// /// Result of [`ConfigurationSources::scan`]. @@ -146,16 +192,15 @@ impl ConfigurationSources { /// /// `mistrust` is used to check whether the configuration files have appropriate permissions. /// - /// Configuration file locations that turn out to be directories, + /// `ConfigurationSource::Dir`s /// will be scanned for files whose name ends in `.toml`. /// All those files (if any) will be read (in lexical order by filename). - pub fn from_cmdline( - default_config_files: impl IntoIterator, + pub fn from_cmdline( + default_config_files: impl IntoIterator, config_files_options: impl IntoIterator, cmdline_toml_override_options: impl IntoIterator, ) -> Self where - P: Into, F: Into, O: Into, { @@ -164,12 +209,12 @@ impl ConfigurationSources { let mut any_files = false; for f in config_files_options { let f = f.into(); - cfg_sources.push_file(f); + cfg_sources.push_source(ConfigurationSource::from_path(f), MustRead::MustRead); any_files = true; } if !any_files { for default in default_config_files { - cfg_sources.push_optional_file(default.into()); + cfg_sources.push_source(default, MustRead::TolerateAbsence); } } @@ -180,20 +225,14 @@ impl ConfigurationSources { cfg_sources } - /// Add `p` to the list of files that we want to read configuration from. + /// Add `src` to the list of files or directories that we want to read configuration from. /// /// Configuration files are loaded and applied in the order that they are /// added to this object. /// /// If the listed file is absent, loading the configuration won't succeed. - pub fn push_file(&mut self, p: impl Into) { - self.files.push((p.into(), MustRead::MustRead)); - } - - /// As `push_file`, but if the listed file can't be loaded, loading the - /// configuration can still succeed. - pub fn push_optional_file(&mut self, p: impl Into) { - self.files.push((p.into(), MustRead::TolerateAbsence)); + pub fn push_source(&mut self, src: ConfigurationSource, must_read: MustRead) { + self.files.push((src, must_read)); } /// Add `s` to the list of overridden options to apply to our configuration. @@ -241,19 +280,28 @@ impl ConfigurationSources { Err(ConfigError::Foreign( anyhow::anyhow!(format!( "unable to access config path: {:?}: {}", - &found, e + &found.as_path(), + e )) .into(), )) } }; - match fs::read_dir(&found) { - Ok(dir) => { + use ConfigurationSource as CS; + match &found { + CS::Dir(found) => { + let dir = match fs::read_dir(&found) { + Ok(y) => y, + Err(e) => { + handle_io_error(e)?; + continue; + } + }; out.push(FoundConfigFile { path: found.clone(), - must_read, ty: FoundType::Dir, + must_read, }); // Rebinding `found` avoids using the directory name by mistake. let mut entries = vec![]; @@ -282,14 +330,13 @@ impl ConfigurationSources { ty: FoundType::File, })); } - Err(e) if e.is_not_a_directory() => { + CS::File(found) => { out.push(FoundConfigFile { path: found.clone(), must_read, ty: FoundType::File, }); } - Err(e) => handle_io_error(e)?, } } @@ -406,7 +453,7 @@ friends = 4242 let mistrust = fs_mistrust::Mistrust::new_dangerously_trust_everyone(); let files = files .iter() - .map(|(p, m)| (p.as_ref().to_owned(), *m)) + .map(|(p, m)| (ConfigurationSource::from_path(p.as_ref()), *m)) .collect_vec(); let options = opts.iter().cloned().collect_vec(); ConfigurationSources { @@ -455,9 +502,9 @@ world = \"nonsense\" fn dir_with_some() { let td = tempdir().unwrap(); let cf = td.path().join("1.toml"); - let d = td.path().join("extra.d"); + let d = td.path().join("extra.d/"); let df = d.join("2.toml"); - let xd = td.path().join("nonexistent.d"); + let xd = td.path().join("nonexistent.d/"); std::fs::create_dir(&d).unwrap(); std::fs::write(&cf, EX_TOML).unwrap(); std::fs::write(&df, EX2_TOML).unwrap(); @@ -511,14 +558,14 @@ world = \"nonsense\" fn from_cmdline() { // Try one with specified files let sources = ConfigurationSources::from_cmdline( - ["/etc/loid.toml"], + [ConfigurationSource::from_path("/etc/loid.toml")], ["/family/yor.toml", "/family/anya.toml"], ["decade=1960", "snack=peanuts"], ); let files: Vec<_> = sources .files .iter() - .map(|file| file.0.to_str().unwrap()) + .map(|file| file.0.as_ref().to_str().unwrap()) .collect(); assert_eq!(files, vec!["/family/yor.toml", "/family/anya.toml"]); assert_eq!(sources.files[0].1, MustRead::MustRead); @@ -529,13 +576,16 @@ world = \"nonsense\" // Try once with default only. let sources = ConfigurationSources::from_cmdline( - ["/etc/loid.toml"], + [ConfigurationSource::from_path("/etc/loid.toml")], Vec::::new(), ["decade=1960", "snack=peanuts"], ); assert_eq!( &sources.files, - &vec![("/etc/loid.toml".into(), MustRead::TolerateAbsence)] + &vec![( + ConfigurationSource::from_path("/etc/loid.toml"), + MustRead::TolerateAbsence + )] ); } From 2662fd0d7197bc2d11f6d4553990a33a88c20baf Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Wed, 24 Aug 2022 19:43:48 +0100 Subject: [PATCH 19/20] tor-config source: just ConfigurationSource, not FoundConfigFile FoundConfigFile existed to hide something that ConfigurationSource now exposes. --- crates/arti/src/watch_cfg.rs | 15 +++--- crates/tor-config/src/sources.rs | 81 +++++++++----------------------- 2 files changed, 28 insertions(+), 68 deletions(-) diff --git a/crates/arti/src/watch_cfg.rs b/crates/arti/src/watch_cfg.rs index 1bcfa8d93..d24ac4584 100644 --- a/crates/arti/src/watch_cfg.rs +++ b/crates/arti/src/watch_cfg.rs @@ -9,7 +9,7 @@ use std::time::Duration; use arti_client::config::Reconfigure; use arti_client::TorClient; use notify::Watcher; -use tor_config::{sources::FoundConfigFiles, ConfigurationSources}; +use tor_config::{sources::FoundConfigFiles, ConfigurationSource, ConfigurationSources}; use tor_rtcompat::Runtime; use tracing::{debug, error, info, warn}; @@ -26,15 +26,14 @@ fn prepare_watcher( sources: &ConfigurationSources, ) -> anyhow::Result<(FileWatcher, FoundConfigFiles)> { let mut watcher = FileWatcher::new(DEBOUNCE_INTERVAL)?; - let files = sources.scan()?; - for file in files.iter() { - if file.was_dir() { - watcher.watch_dir(file)?; - } else { - watcher.watch_file(file)?; + let sources = sources.scan()?; + for source in sources.iter() { + match source { + ConfigurationSource::Dir(dir) => watcher.watch_dir(dir)?, + ConfigurationSource::File(file) => watcher.watch_file(file)?, } } - Ok((watcher, files)) + Ok((watcher, sources)) } /// Launch a thread to watch our configuration files. diff --git a/crates/tor-config/src/sources.rs b/crates/tor-config/src/sources.rs index cae59081c..d8af66b07 100644 --- a/crates/tor-config/src/sources.rs +++ b/crates/tor-config/src/sources.rs @@ -133,45 +133,12 @@ pub struct FoundConfigFiles<'srcs> { /// A configuration source file or directory, found or not found on the filesystem #[derive(Debug, Clone)] -pub struct FoundConfigFile { +struct FoundConfigFile { /// The path of the (putative) object - path: PathBuf, + source: ConfigurationSource, /// Were we expecting this to definitely exist must_read: MustRead, - - /// What happened when we looked for it - ty: FoundType, -} - -/// Was this filesystem object a file or a directory? -#[derive(Debug, Copy, Clone)] -enum FoundType { - /// File - File, - /// Directory - Dir, -} - -impl FoundConfigFile { - /// Get the path - pub fn path(&self) -> &Path { - &self.path - } - - /// Was this a directory, when we found it ? - pub fn was_dir(&self) -> bool { - match self.ty { - FoundType::Dir => true, - FoundType::File => false, - } - } -} - -impl AsRef for FoundConfigFile { - fn as_ref(&self) -> &Path { - self.path() - } } impl ConfigurationSources { @@ -268,7 +235,7 @@ impl ConfigurationSources { pub fn scan(&self) -> Result { let mut out = vec![]; - for &(ref found, must_read) in &self.files { + for &(ref source, must_read) in &self.files { let required = must_read == MustRead::MustRead; // Returns Err(error) if we shuold bail, @@ -280,7 +247,7 @@ impl ConfigurationSources { Err(ConfigError::Foreign( anyhow::anyhow!(format!( "unable to access config path: {:?}: {}", - &found.as_path(), + &source.as_path(), e )) .into(), @@ -289,7 +256,7 @@ impl ConfigurationSources { }; use ConfigurationSource as CS; - match &found { + match &source { CS::Dir(found) => { let dir = match fs::read_dir(&found) { Ok(y) => y, @@ -299,8 +266,7 @@ impl ConfigurationSources { } }; out.push(FoundConfigFile { - path: found.clone(), - ty: FoundType::Dir, + source: source.clone(), must_read, }); // Rebinding `found` avoids using the directory name by mistake. @@ -325,16 +291,14 @@ impl ConfigurationSources { } entries.sort(); out.extend(entries.into_iter().map(|path| FoundConfigFile { - path, + source: CS::File(path), must_read: MustRead::TolerateAbsence, - ty: FoundType::File, })); } - CS::File(found) => { + CS::File(_) => { out.push(FoundConfigFile { - path: found.clone(), + source: source.clone(), must_read, - ty: FoundType::File, }); } } @@ -351,32 +315,29 @@ impl FoundConfigFiles<'_> { /// Iterate over the filesystem objects that the scan found // // This ought really to be `impl IntoIterator for &Self` but that's awkward without TAIT - pub fn iter(&self) -> impl Iterator { - self.files.iter() + pub fn iter(&self) -> impl Iterator { + self.files.iter().map(|f| &f.source) } /// Add every file and commandline source to `builder`, returning a new /// builder. fn add_sources(self, mut builder: ConfigBuilder) -> Result { - for FoundConfigFile { - path, - must_read, - ty, - } in self.files - { + for FoundConfigFile { source, must_read } in self.files { + use ConfigurationSource as CS; + let required = must_read == MustRead::MustRead; - match ty { - FoundType::File => {} - FoundType::Dir => continue, - } + let file = match source { + CS::File(file) => file, + CS::Dir(_) => continue, + }; match self .sources .mistrust .verifier() .permit_readable() - .check(&path) + .check(&file) { Ok(()) => {} Err(fs_mistrust::Error::NotFound(_)) if !required => {} @@ -385,7 +346,7 @@ impl FoundConfigFiles<'_> { // Not going to use File::with_name here, since it doesn't // quite do what we want. - let f: config::File<_, _> = path.into(); + let f: config::File<_, _> = file.into(); builder = builder.add_source(f.format(config::FileFormat::Toml).required(required)); } @@ -521,7 +482,7 @@ world = \"nonsense\" assert_eq!( found .iter() - .map(|p| p.path().strip_prefix(&td).unwrap().to_str().unwrap()) + .map(|p| p.as_path().strip_prefix(&td).unwrap().to_str().unwrap()) .collect_vec(), &["1.toml", "extra.d", "extra.d/2.toml"] ); From ae5ca4377956494644e1f4b4c22db57f9ea71f84 Mon Sep 17 00:00:00 2001 From: Ian Jackson Date: Thu, 25 Aug 2022 13:00:08 +0100 Subject: [PATCH 20/20] tor-config: semver.md: Document change to ConfigurationSource enum --- crates/tor-config/semver.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/tor-config/semver.md b/crates/tor-config/semver.md index 1f0e9891c..6d0fb2d24 100644 --- a/crates/tor-config/semver.md +++ b/crates/tor-config/semver.md @@ -2,4 +2,5 @@ ADDED: ReconfigureError::Bug enum variant ADDED: misc::PaddingLevel ADDED: resolve_option_general ADDED: sources::FoundConfigFiles -BREAKING: ConfigurationSources::from_cmdline wants an iterator of default config locations +BREAKING: ConfigurationSources takes ConfigurationSource for files, not Paths +BREAKING: ConfigurationSources::from_cmdline wants an iterator of defaults