diff --git a/Cargo.lock b/Cargo.lock index 8df231f42..4e71c84cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3395,6 +3395,7 @@ dependencies = [ "futures", "futures-await-test", "hex", + "libc", "pin-project", "postage", "rand 0.8.5", diff --git a/crates/arti-bench/src/main.rs b/crates/arti-bench/src/main.rs index 09d09d54f..5522a6507 100644 --- a/crates/arti-bench/src/main.rs +++ b/crates/arti-bench/src/main.rs @@ -63,7 +63,7 @@ use std::thread::JoinHandle; use std::time::SystemTime; use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; use tokio_socks::tcp::Socks5Stream; -use tor_config::ConfigurationSources; +use tor_config::{ConfigurationSource, ConfigurationSources}; use tor_rtcompat::Runtime; use tracing::info; @@ -366,7 +366,12 @@ fn main() -> Result<()> { matches .values_of_os("arti-config") .unwrap_or_default() - .for_each(|f| config_sources.push_file(f)); + .for_each(|f| { + config_sources.push_source( + ConfigurationSource::from_path(f), + tor_config::sources::MustRead::MustRead, + ); + }); // TODO really we ought to get this from the arti configuration, or something. // But this is OK for now since we are a benchmarking tool. diff --git a/crates/arti-client/semver.md b/crates/arti-client/semver.md index ecec63212..8bf6b0566 100644 --- a/crates/arti-client/semver.md +++ b/crates/arti-client/semver.md @@ -1 +1,3 @@ ADDED: `channel` configuration, and `channnel.padding` config variable. +ADDED: default_config_files: returns a Vec; return value includes an `arti.d` directory +BREAKING: default_config_file: removed diff --git a/crates/arti-client/src/config.rs b/crates/arti-client/src/config.rs index f838a5f0c..a3cd89b75 100644 --- a/crates/arti-client/src/config.rs +++ b/crates/arti-client/src/config.rs @@ -21,7 +21,7 @@ use std::path::PathBuf; use std::time::Duration; pub use tor_chanmgr::{ChannelConfig, ChannelConfigBuilder}; pub use tor_config::impl_standard_builder; -pub use tor_config::{CfgPath, CfgPathError, ConfigBuildError, Reconfigure}; +pub use tor_config::{CfgPath, CfgPathError, ConfigBuildError, ConfigurationSource, Reconfigure}; /// Types for configuring how Tor circuits are built. pub mod circ { @@ -378,9 +378,15 @@ impl TorClientConfigBuilder { } } -/// Return a filename for the default user configuration file. -pub fn default_config_file() -> Result { - CfgPath::new("${ARTI_CONFIG}/arti.toml".into()).path() +/// Return the filenames for the default user configuration files +pub fn default_config_files() -> Result, CfgPathError> { + ["${ARTI_CONFIG}/arti.toml", "${ARTI_CONFIG}/arti.d/"] + .into_iter() + .map(|f| { + let path = CfgPath::new(f.into()).path()?; + Ok(ConfigurationSource::from_path(path)) + }) + .collect() } /// The environment variable we look at when deciding whether to disable FS permissions checking. @@ -461,7 +467,9 @@ mod test { // We don't want to second-guess the directories crate too much // here, so we'll just make sure it does _something_ plausible. - let dflt = default_config_file().unwrap(); - assert!(dflt.ends_with("arti.toml")); + let dflt = default_config_files().unwrap(); + assert!(dflt[0].as_path().ends_with("arti.toml")); + assert!(dflt[1].as_path().ends_with("arti.d")); + assert_eq!(dflt.len(), 2); } } diff --git a/crates/arti-testing/src/config.rs b/crates/arti-testing/src/config.rs index 5f36d173e..18632030c 100644 --- a/crates/arti-testing/src/config.rs +++ b/crates/arti-testing/src/config.rs @@ -8,7 +8,7 @@ use clap::{App, AppSettings, Arg, SubCommand}; use std::str::FromStr; use std::time::Duration; -use tor_config::ConfigurationSources; +use tor_config::{ConfigurationSource, ConfigurationSources}; /// Helper: parse an optional string as a number of seconds. fn int_str_to_secs(s: Option<&str>) -> Result> { @@ -134,7 +134,12 @@ pub(crate) fn parse_cmdline() -> Result { // Maybe change this later on if we decide it's silly. return Err(anyhow!("Sorry, you need to give me a configuration file.")); } else { - config_files.for_each(|f| cfg_sources.push_file(f)); + config_files.for_each(|f| { + cfg_sources.push_source( + ConfigurationSource::from_path(f), + tor_config::sources::MustRead::MustRead, + ); + }); } matches diff --git a/crates/arti/semver.md b/crates/arti/semver.md index 8614f1725..277b2f093 100644 --- a/crates/arti/semver.md +++ b/crates/arti/semver.md @@ -1,4 +1,4 @@ BREAKING: Most functions are now only available behind an experimental-api feature. BREAKING: main_main() now takes a set of command-line arguments. - +FIXED: Bugfixes to configuration auto-reload arrangements. diff --git a/crates/arti/src/lib.rs b/crates/arti/src/lib.rs index ef6dcebc6..b454c61cf 100644 --- a/crates/arti/src/lib.rs +++ b/crates/arti/src/lib.rs @@ -194,7 +194,7 @@ pub use cfg::{ }; pub use logging::{LoggingConfig, LoggingConfigBuilder}; -use arti_client::config::default_config_file; +use arti_client::config::default_config_files; use arti_client::{TorClient, TorClientConfig}; use safelog::with_safe_logging_suppressed; use tor_config::ConfigurationSources; @@ -334,7 +334,7 @@ where // correct behavior is different depending on whether the filename is given // explicitly or not. let mut config_file_help = "Specify which config file(s) to read.".to_string(); - if let Ok(default) = default_config_file() { + if let Ok(default) = default_config_files() { // If we couldn't resolve the default config file, then too bad. If something // actually tries to use it, it will produce an error, but don't fail here // just for that reason. @@ -468,7 +468,7 @@ where let cfg_sources = { let mut cfg_sources = ConfigurationSources::from_cmdline( - default_config_file()?, + default_config_files()?, matches.values_of_os("config-files").unwrap_or_default(), override_options, ); diff --git a/crates/arti/src/watch_cfg.rs b/crates/arti/src/watch_cfg.rs index 57bdac9ee..d24ac4584 100644 --- a/crates/arti/src/watch_cfg.rs +++ b/crates/arti/src/watch_cfg.rs @@ -1,6 +1,7 @@ //! Code to watch configuration files for any changes. use std::collections::HashSet; +use std::iter; use std::path::{Path, PathBuf}; use std::sync::mpsc::channel as std_channel; use std::time::Duration; @@ -8,14 +9,32 @@ use std::time::Duration; use arti_client::config::Reconfigure; use arti_client::TorClient; use notify::Watcher; -use tor_config::ConfigurationSources; +use tor_config::{sources::FoundConfigFiles, ConfigurationSource, ConfigurationSources}; use tor_rtcompat::Runtime; -use tracing::{debug, info, warn}; +use tracing::{debug, error, info, warn}; use crate::{ArtiCombinedConfig, ArtiConfig}; -/// How long (worst case) should we take to learn about configuration changes? -const POLL_INTERVAL: Duration = Duration::from_secs(10); +/// How long to wait after a file is created, before we try to read it. +const DEBOUNCE_INTERVAL: Duration = Duration::from_secs(1); + +/// Find the configuration files and prepare a watcher +/// +/// For the watching to be reliably effective (race-free), the config must be read +/// *after* this point, using the returned `FoundConfigFiles`. +fn prepare_watcher( + sources: &ConfigurationSources, +) -> anyhow::Result<(FileWatcher, FoundConfigFiles)> { + let mut watcher = FileWatcher::new(DEBOUNCE_INTERVAL)?; + let sources = sources.scan()?; + for source in sources.iter() { + match source { + ConfigurationSource::Dir(dir) => watcher.watch_dir(dir)?, + ConfigurationSource::File(file) => watcher.watch_file(file)?, + } + } + Ok((watcher, sources)) +} /// Launch a thread to watch our configuration files. /// @@ -27,19 +46,21 @@ pub(crate) fn watch_for_config_changes( original: ArtiConfig, client: TorClient, ) -> anyhow::Result<()> { - let (tx, rx) = std_channel(); - let mut watcher = FileWatcher::new(tx, POLL_INTERVAL)?; + let (mut watcher, found_files) = prepare_watcher(&sources)?; - for file in sources.files() { - watcher.watch_file(file)?; - } + // If watching, we must reload the config once right away, because + // we have set up the watcher *after* loading it the first time. + // + // That means we safely drop the found_files without races, since we're going to rescan. + drop(found_files); + let mut first_reload = iter::once(notify::DebouncedEvent::Rescan); std::thread::spawn(move || { // TODO: If someday we make this facility available outside of the // `arti` application, we probably don't want to have this thread own // the FileWatcher. - debug!("Waiting for FS events"); - while let Ok(event) = rx.recv() { + debug!("Entering FS event loop"); + while let Some(event) = first_reload.next().or_else(|| watcher.rx().recv().ok()) { if !watcher.event_matched(&event) { // NOTE: Sadly, it's not safe to log in this case. If the user // has put a configuration file and a logfile in the same @@ -47,7 +68,7 @@ pub(crate) fn watch_for_config_changes( // every time we log, and fill up the filesystem. continue; } - while let Ok(_ignore) = rx.try_recv() { + while let Ok(_ignore) = watcher.rx().try_recv() { // Discard other events, so that we only reload once. // // We can afford to treat both error cases from try_recv [Empty @@ -56,7 +77,20 @@ pub(crate) fn watch_for_config_changes( // call recv() in the outer loop. } debug!("FS event {:?}: reloading configuration.", event); - match reconfigure(&sources, &original, &client) { + + let (new_watcher, found_files) = match prepare_watcher(&sources) { + Ok(y) => y, + Err(e) => { + error!( + "FS watch: failed to rescan config and re-establish watch: {}", + e + ); + break; + } + }; + watcher = new_watcher; + + match reconfigure(found_files, &original, &client) { Ok(exit) => { info!("Successfully reloaded configuration."); if exit { @@ -81,11 +115,11 @@ pub(crate) fn watch_for_config_changes( /// /// Return true if we should stop watching for configuration changes. fn reconfigure( - sources: &ConfigurationSources, + found_files: FoundConfigFiles<'_>, original: &ArtiConfig, client: &TorClient, ) -> anyhow::Result { - let config = sources.load()?; + let config = found_files.load()?; let (config, client_config) = tor_config::resolve::(config)?; if config.proxy() != original.proxy() { warn!("Can't (yet) reconfigure proxy settings while arti is running."); @@ -98,15 +132,15 @@ fn reconfigure( } client.reconfigure(&client_config, Reconfigure::WarnOnFailures)?; - if !config.application().watch_configuration { - // Stop watching for configuration changes. - return Ok(true); - } if !config.application().permit_debugging { #[cfg(feature = "harden")] crate::process::enable_process_hardening()?; } + if !config.application().watch_configuration { + // Stop watching for configuration changes. + return Ok(true); + } Ok(false) } @@ -114,6 +148,8 @@ fn reconfigure( /// directories in order to learn about changes in some specific files that they /// contain. /// +/// The wrapper contains the `Watcher` and also the channel for receiving events. +/// /// The `Watcher` implementation in `notify` has a weakness: it gives sensible /// results when you're watching directories, but if you start watching /// non-directory files, it won't notice when those files get replaced. That's @@ -129,6 +165,8 @@ fn reconfigure( /// to mess around with `std::sync::mpsc` and filter out the events they want /// using `FileWatcher::event_matched`. struct FileWatcher { + /// The channel we receive events on + rx: std::sync::mpsc::Receiver, /// An underlying `notify` watcher that tells us about directory changes. watcher: notify::RecommendedWatcher, /// The list of directories that we're currently watching. @@ -139,20 +177,44 @@ struct FileWatcher { impl FileWatcher { /// Like `notify::watcher`, but create a FileWatcher instead. - fn new( - tx: std::sync::mpsc::Sender, - interval: Duration, - ) -> anyhow::Result { + fn new(interval: Duration) -> anyhow::Result { + let (tx, rx) = std_channel(); let watcher = notify::watcher(tx, interval)?; Ok(Self { + rx, watcher, watching_dirs: HashSet::new(), watching_files: HashSet::new(), }) } - /// Watch a single file (not a directory). Does nothing if we're already watching that file. + /// Access the channel - use for receiving events + fn rx(&self) -> &std::sync::mpsc::Receiver { + &self.rx + } + + /// Watch a single file (not a directory). + /// + /// Idempotent: does nothing if we're already watching that file. fn watch_file>(&mut self, path: P) -> anyhow::Result<()> { + self.watch_just_parents(path.as_ref())?; + Ok(()) + } + + /// Watch a directory (but not any subdirs). + /// + /// Idempotent. + fn watch_dir>(&mut self, path: P) -> anyhow::Result<()> { + let path = self.watch_just_parents(path.as_ref())?; + self.watch_just_abs_dir(&path) + } + + /// Watch the parents of `path`. + /// + /// Returns the absolute path of `path`. + /// + /// Idempotent. + fn watch_just_parents(&mut self, path: &Path) -> anyhow::Result { // Make the path absolute (without necessarily making it canonical). // // We do this because `notify` reports all of its events in terms of @@ -160,7 +222,7 @@ impl FileWatcher { // relative path, we'd get reports about the absolute paths of the files // in that directory. let cwd = std::env::current_dir()?; - let path = cwd.join(path.as_ref()); + let path = cwd.join(path); debug_assert!(path.is_absolute()); // See what directory we should watch in order to watch this file. @@ -173,34 +235,45 @@ impl FileWatcher { None => path.as_ref(), }; - // Start watching this directory, if we're not already watching it. + self.watch_just_abs_dir(watch_target)?; + + // Note this file as one that we're watching, so that we can see changes + // to it later on. + self.watching_files.insert(path.clone()); + + Ok(path) + } + + /// Watch just this (absolute) directory. + /// + /// Does not watch any of the parents. + /// + /// Idempotent. + fn watch_just_abs_dir(&mut self, watch_target: &Path) -> anyhow::Result<()> { if !self.watching_dirs.contains(watch_target) { self.watcher .watch(watch_target, notify::RecursiveMode::NonRecursive)?; self.watching_dirs.insert(watch_target.into()); } - - // Note this file as one that we're watching, so that we can see changes - // to it later on. - self.watching_files.insert(path); - Ok(()) } /// Return true if the provided event describes a change affecting one of /// the files that we care about. fn event_matched(&self, event: ¬ify::DebouncedEvent) -> bool { - let watching = |f| self.watching_files.contains(f); + let watching = |f: &_| self.watching_files.contains(f); + let watching_or_parent = + |f: &Path| watching(f) || f.parent().map(watching).unwrap_or(false); match event { notify::DebouncedEvent::NoticeWrite(f) => watching(f), notify::DebouncedEvent::NoticeRemove(f) => watching(f), - notify::DebouncedEvent::Create(f) => watching(f), + notify::DebouncedEvent::Create(f) => watching_or_parent(f), notify::DebouncedEvent::Write(f) => watching(f), notify::DebouncedEvent::Chmod(f) => watching(f), notify::DebouncedEvent::Remove(f) => watching(f), - notify::DebouncedEvent::Rename(f1, f2) => watching(f1) || watching(f2), + notify::DebouncedEvent::Rename(f1, f2) => watching(f1) || watching_or_parent(f2), notify::DebouncedEvent::Rescan => { // We've missed some events: no choice but to reload. true diff --git a/crates/tor-basic-utils/Cargo.toml b/crates/tor-basic-utils/Cargo.toml index eca4ed724..ee994b24f 100644 --- a/crates/tor-basic-utils/Cargo.toml +++ b/crates/tor-basic-utils/Cargo.toml @@ -22,6 +22,9 @@ rand_chacha = "0.3" thiserror = "1" void = "1" +[target.'cfg(unix)'.dependencies] +libc = { version = "0.2", default-features = false } + [dev-dependencies] derive_more = "0.99" educe = "0.4.6" diff --git a/crates/tor-basic-utils/semver.md b/crates/tor-basic-utils/semver.md new file mode 100644 index 000000000..c69e79300 --- /dev/null +++ b/crates/tor-basic-utils/semver.md @@ -0,0 +1 @@ +ADDED: IoErrorExt, with fn is_not_a_directory() diff --git a/crates/tor-basic-utils/src/lib.rs b/crates/tor-basic-utils/src/lib.rs index ee5fe9fe8..bf78f9dc0 100644 --- a/crates/tor-basic-utils/src/lib.rs +++ b/crates/tor-basic-utils/src/lib.rs @@ -83,6 +83,33 @@ pub fn skip_fmt(_: &T, f: &mut fmt::Formatter) -> fmt::Result { // ---------------------------------------------------------------------- +/// Implementation of `ErrorKind::NotADirectory` that doesn't require Nightly +pub trait IoErrorExt { + /// Is this `io::ErrorKind::NotADirectory` ? + fn is_not_a_directory(&self) -> bool; +} +impl IoErrorExt for std::io::Error { + fn is_not_a_directory(&self) -> bool { + self.raw_os_error() + == Some( + #[cfg(target_family = "unix")] + libc::ENOTDIR, + #[cfg(target_family = "windows")] + { + /// Obtained from Rust stdlib source code + /// See also: + /// + /// (although the documentation is anaemic) and + /// + const ERROR_DIRECTORY: i32 = 267; + ERROR_DIRECTORY + }, + ) + } +} + +// ---------------------------------------------------------------------- + /// Define an "accessor trait", which describes structs that have fields of certain types /// /// This can be useful if a large struct, living high up in the dependency graph, diff --git a/crates/tor-config/Cargo.toml b/crates/tor-config/Cargo.toml index 511d3d19e..f6684f3d4 100644 --- a/crates/tor-config/Cargo.toml +++ b/crates/tor-config/Cargo.toml @@ -16,6 +16,7 @@ default = ["expand-paths"] expand-paths = ["shellexpand", "directories"] [dependencies] +anyhow = "1.0.23" config = { version = "0.13", default-features = false, features = ["toml"] } derive_builder = { version = "0.11.2", package = "derive_builder_fork_arti" } directories = { version = "4", optional = true } diff --git a/crates/tor-config/semver.md b/crates/tor-config/semver.md index b3400b90e..6d0fb2d24 100644 --- a/crates/tor-config/semver.md +++ b/crates/tor-config/semver.md @@ -1,3 +1,6 @@ ADDED: ReconfigureError::Bug enum variant ADDED: misc::PaddingLevel ADDED: resolve_option_general +ADDED: sources::FoundConfigFiles +BREAKING: ConfigurationSources takes ConfigurationSource for files, not Paths +BREAKING: ConfigurationSources::from_cmdline wants an iterator of defaults diff --git a/crates/tor-config/src/lib.rs b/crates/tor-config/src/lib.rs index 926abd46e..ceacc9900 100644 --- a/crates/tor-config/src/lib.rs +++ b/crates/tor-config/src/lib.rs @@ -112,7 +112,7 @@ pub use mut_cfg::MutCfg; pub use paste::paste; pub use path::{CfgPath, CfgPathError}; pub use serde; -pub use sources::ConfigurationSources; +pub use sources::{ConfigurationSource, ConfigurationSources}; pub use tor_basic_utils::macro_first_nonempty; diff --git a/crates/tor-config/src/sources.rs b/crates/tor-config/src/sources.rs index 0a36364f4..d8af66b07 100644 --- a/crates/tor-config/src/sources.rs +++ b/crates/tor-config/src/sources.rs @@ -13,6 +13,17 @@ //! perhaps [`set_mistrust`](ConfigurationSources::set_mistrust), //! and finally [`load`](ConfigurationSources::load). //! The resulting [`config::Config`] can then be deserialized. +//! +//! If you want to watch for config file changes, +//! use [`ConfigurationSources::scan()`], +//! to obtain a [`FoundConfigFiles`], +//! start watching the paths returned by [`FoundConfigFiles::iter()`], +//! and then call [`FoundConfigFiles::load()`]. +//! (This ordering starts watching the files before you read them, +//! which is necessary to avoid possibly missing changes.) + +use std::ffi::OsString; +use std::{fs, io}; use crate::CmdLine; @@ -29,7 +40,7 @@ use std::path::{Path, PathBuf}; #[derive(Clone, Debug, Default)] pub struct ConfigurationSources { /// List of files to read (in order). - files: Vec<(PathBuf, MustRead)>, + files: Vec<(ConfigurationSource, MustRead)>, /// A list of command-line options to apply after parsing the files. options: Vec, /// We will check all files we read @@ -42,7 +53,8 @@ pub struct ConfigurationSources { /// aren't present. Others (like those specified on the command line) really /// need to be there. #[derive(Clone, Debug, Copy, Eq, PartialEq)] -enum MustRead { +#[allow(clippy::exhaustive_enums)] +pub enum MustRead { /// This file is okay to skip if it isn't present, TolerateAbsence, @@ -50,6 +62,85 @@ enum MustRead { MustRead, } +/// A configuration file or directory, for use by a `ConfigurationSources` +/// +/// You can make one out of a `PathBuf`, examining its syntax like `arti` does, +/// using `ConfigurationSource::from_path`. +#[derive(Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)] +#[allow(clippy::exhaustive_enums)] // Callers will need to understand this +pub enum ConfigurationSource { + /// A plain file + File(PathBuf), + + /// A directory + Dir(PathBuf), +} + +impl ConfigurationSource { + /// Interpret a path (or string) as a configuration file or directory spec + /// + /// If the path syntactically specifies a directory + /// (i.e., can be seen to be a directory without accessing the filesystem, + /// for example because it ends in a directory separator such as `/`) + /// it is treated as specifying a directory. + pub fn from_path>(p: P) -> ConfigurationSource { + use ConfigurationSource as CS; + let p = p.into(); + if is_syntactically_directory(&p) { + CS::Dir(p) + } else { + CS::File(p) + } + } + + /// Return a reference to the inner `Path` + pub fn as_path(&self) -> &Path { + self.as_ref() + } +} + +impl AsRef for ConfigurationSource { + fn as_ref(&self) -> &PathBuf { + use ConfigurationSource as CS; + match self { + CS::File(p) | CS::Dir(p) => p, + } + } +} + +/// Configuration files and directories we found in the filesystem +/// +/// Result of [`ConfigurationSources::scan`]. +/// +/// When loading configuration files and also watching for filesystem updates, +/// this type encapsulates all the actual filesystem objects that need watching. +#[derive(Debug)] +pub struct FoundConfigFiles<'srcs> { + /// The things we found + /// + /// This includes both: + /// * Files which ought to be read + /// * Directories, which may or may not contain any currently-relevant files + /// + /// The directories are retained for the purpose of watching for config changes: + /// we will want to detect files being created within them, + /// so our caller needs to discover them (via [`FoundConfigFiles::iter()`]). + files: Vec, + + /// Our parent, which contains details we need for `load` + sources: &'srcs ConfigurationSources, +} + +/// A configuration source file or directory, found or not found on the filesystem +#[derive(Debug, Clone)] +struct FoundConfigFile { + /// The path of the (putative) object + source: ConfigurationSource, + + /// Were we expecting this to definitely exist + must_read: MustRead, +} + impl ConfigurationSources { /// Create a new empty [`ConfigurationSources`]. pub fn new_empty() -> Self { @@ -60,15 +151,19 @@ impl ConfigurationSources { /// /// The caller should have parsed the program's command line, and extracted (inter alia) /// - /// * `config_files_options`: Paths of config file(s) + /// * `config_files_options`: Paths of config file(s) (or directories of `.toml` files) /// * `cmdline_toml_override_options`: Overrides ("key=value") /// - /// The caller should also provide `default_config_file`, the default location of the - /// configuration file. This is used if no file(s) are specified on the command line. + /// The caller should also provide `default_config_files`, the default locations of the + /// configuration files. This is used if no file(s) are specified on the command line. /// /// `mistrust` is used to check whether the configuration files have appropriate permissions. + /// + /// `ConfigurationSource::Dir`s + /// will be scanned for files whose name ends in `.toml`. + /// All those files (if any) will be read (in lexical order by filename). pub fn from_cmdline( - default_config_file: impl Into, + default_config_files: impl IntoIterator, config_files_options: impl IntoIterator, cmdline_toml_override_options: impl IntoIterator, ) -> Self @@ -81,12 +176,13 @@ impl ConfigurationSources { let mut any_files = false; for f in config_files_options { let f = f.into(); - cfg_sources.push_file(f); + cfg_sources.push_source(ConfigurationSource::from_path(f), MustRead::MustRead); any_files = true; } if !any_files { - let default = default_config_file.into(); - cfg_sources.push_optional_file(default); + for default in default_config_files { + cfg_sources.push_source(default, MustRead::TolerateAbsence); + } } for s in cmdline_toml_override_options { @@ -96,20 +192,14 @@ impl ConfigurationSources { cfg_sources } - /// Add `p` to the list of files that we want to read configuration from. + /// Add `src` to the list of files or directories that we want to read configuration from. /// /// Configuration files are loaded and applied in the order that they are /// added to this object. /// /// If the listed file is absent, loading the configuration won't succeed. - pub fn push_file(&mut self, p: impl Into) { - self.files.push((p.into(), MustRead::MustRead)); - } - - /// As `push_file`, but if the listed file can't be loaded, loading the - /// configuration can still succeed. - pub fn push_optional_file(&mut self, p: impl Into) { - self.files.push((p.into(), MustRead::TolerateAbsence)); + pub fn push_source(&mut self, src: ConfigurationSource, must_read: MustRead) { + self.files.push((src, must_read)); } /// Add `s` to the list of overridden options to apply to our configuration. @@ -132,63 +222,182 @@ impl ConfigurationSources { &self.mistrust } - /// Return an iterator over the files that we care about. - pub fn files(&self) -> impl Iterator { - self.files.iter().map(|(f, _)| f.as_path()) + /// Scan for files and load the configuration into a new [`config::Config`]. + /// + /// This is a convenience method for [`scan()`](Self::scan) + /// followed by [`files.load`]. + pub fn load(&self) -> Result { + let files = self.scan()?; + files.load() + } + + /// Scan for configuration source files (including scanning any directories) + pub fn scan(&self) -> Result { + let mut out = vec![]; + + for &(ref source, must_read) in &self.files { + let required = must_read == MustRead::MustRead; + + // Returns Err(error) if we shuold bail, + // or Ok(()) if we should ignore the error and skip the file. + let handle_io_error = |e: io::Error| { + if e.kind() == io::ErrorKind::NotFound && !required { + Ok(()) + } else { + Err(ConfigError::Foreign( + anyhow::anyhow!(format!( + "unable to access config path: {:?}: {}", + &source.as_path(), + e + )) + .into(), + )) + } + }; + + use ConfigurationSource as CS; + match &source { + CS::Dir(found) => { + let dir = match fs::read_dir(&found) { + Ok(y) => y, + Err(e) => { + handle_io_error(e)?; + continue; + } + }; + out.push(FoundConfigFile { + source: source.clone(), + must_read, + }); + // Rebinding `found` avoids using the directory name by mistake. + let mut entries = vec![]; + for found in dir { + // reuse map_io_err, which embeds the directory name, + // since if we have Err we don't have an entry name. + let found = match found { + Ok(y) => y, + Err(e) => { + handle_io_error(e)?; + continue; + } + }; + let leaf = found.file_name(); + let leaf: &Path = leaf.as_ref(); + match leaf.extension() { + Some(e) if e == "toml" => {} + _ => continue, + } + entries.push(found.path()); + } + entries.sort(); + out.extend(entries.into_iter().map(|path| FoundConfigFile { + source: CS::File(path), + must_read: MustRead::TolerateAbsence, + })); + } + CS::File(_) => { + out.push(FoundConfigFile { + source: source.clone(), + must_read, + }); + } + } + } + + Ok(FoundConfigFiles { + files: out, + sources: self, + }) + } +} + +impl FoundConfigFiles<'_> { + /// Iterate over the filesystem objects that the scan found + // + // This ought really to be `impl IntoIterator for &Self` but that's awkward without TAIT + pub fn iter(&self) -> impl Iterator { + self.files.iter().map(|f| &f.source) + } + + /// Add every file and commandline source to `builder`, returning a new + /// builder. + fn add_sources(self, mut builder: ConfigBuilder) -> Result { + for FoundConfigFile { source, must_read } in self.files { + use ConfigurationSource as CS; + + let required = must_read == MustRead::MustRead; + + let file = match source { + CS::File(file) => file, + CS::Dir(_) => continue, + }; + + match self + .sources + .mistrust + .verifier() + .permit_readable() + .check(&file) + { + Ok(()) => {} + Err(fs_mistrust::Error::NotFound(_)) if !required => {} + Err(e) => return Err(ConfigError::Foreign(e.into())), + } + + // Not going to use File::with_name here, since it doesn't + // quite do what we want. + let f: config::File<_, _> = file.into(); + builder = builder.add_source(f.format(config::FileFormat::Toml).required(required)); + } + + let mut cmdline = CmdLine::new(); + for opt in &self.sources.options { + cmdline.push_toml_line(opt.clone()); + } + builder = builder.add_source(cmdline); + + Ok(builder) } /// Load the configuration into a new [`config::Config`]. - pub fn load(&self) -> Result { + pub fn load(self) -> Result { let mut builder = config::Config::builder(); - builder = add_sources(builder, &self.mistrust, &self.files, &self.options)?; + builder = self.add_sources(builder)?; builder.build() } } -/// Add every file and commandline source to `builder`, returning a new -/// builder. -fn add_sources

( - mut builder: ConfigBuilder, - mistrust: &fs_mistrust::Mistrust, - files: &[(P, MustRead)], - opts: &[String], -) -> Result -where - P: AsRef, -{ - for (path, must_read) in files { - let required = must_read == &MustRead::MustRead; +/// Does it end in a slash? (Or some other way of saying this is a directory.) +fn is_syntactically_directory(p: &Path) -> bool { + use std::path::Component as PC; - match mistrust - .verifier() - .permit_readable() - .require_file() - .check(&path) - { - Ok(()) => {} - Err(fs_mistrust::Error::NotFound(_)) if !required => {} - Err(e) => return Err(ConfigError::Foreign(e.into())), + match p.components().rev().next() { + None => false, + Some(PC::Prefix(_)) | Some(PC::RootDir) | Some(PC::CurDir) | Some(PC::ParentDir) => true, + Some(PC::Normal(_)) => { + // Does it end in a slash? + let l = p.components().count(); + + // stdlib doesn't let us tell if the thing ends in a path separator. + // components() normalises, so doesn't give us an empty component + // But, if it ends in a path separator, adding a path component char will + // mean adding a component. + // This will work regardless of the path separator, on any platform where + // paths naming directories are like those for files. + // It would even work on some others, eg VMS. + let mut appended = OsString::from(p); + appended.push("a"); + let l2 = PathBuf::from(appended).components().count(); + l2 != l } - - // Not going to use File::with_name here, since it doesn't - // quite do what we want. - let f: config::File<_, _> = path.as_ref().into(); - builder = builder.add_source(f.format(config::FileFormat::Toml).required(required)); } - - let mut cmdline = CmdLine::new(); - for opt in opts { - cmdline.push_toml_line(opt.clone()); - } - builder = builder.add_source(cmdline); - - Ok(builder) } #[cfg(test)] mod test { #![allow(clippy::unwrap_used)] use super::*; + use itertools::Itertools; use tempfile::tempdir; static EX_TOML: &str = " @@ -197,17 +406,31 @@ world = \"stuff\" friends = 4242 "; + /// Make a ConfigurationSources (that doesn't include the arti defaults) + fn sources_nodefaults>( + files: &[(P, MustRead)], + opts: &[String], + ) -> ConfigurationSources { + let mistrust = fs_mistrust::Mistrust::new_dangerously_trust_everyone(); + let files = files + .iter() + .map(|(p, m)| (ConfigurationSource::from_path(p.as_ref()), *m)) + .collect_vec(); + let options = opts.iter().cloned().collect_vec(); + ConfigurationSources { + files, + options, + mistrust, + } + } + /// Load from a set of files and option strings, without taking /// the arti defaults into account. fn load_nodefaults>( files: &[(P, MustRead)], opts: &[String], ) -> Result { - let mistrust = fs_mistrust::Mistrust::new_dangerously_trust_everyone(); - - add_sources(config::Config::builder(), &mistrust, files, opts) - .unwrap() - .build() + sources_nodefaults(files, opts).load() } #[test] @@ -233,7 +456,47 @@ world = \"nonsense\" let c = load_nodefaults(&files, Default::default()).unwrap(); assert!(c.get_string("hello.friends").is_err()); - assert_eq!(c.get_string("hello.world").unwrap(), "nonsense".to_string()); + assert_eq!(c.get_string("hello.world").unwrap(), "nonsense"); + } + + #[test] + fn dir_with_some() { + let td = tempdir().unwrap(); + let cf = td.path().join("1.toml"); + let d = td.path().join("extra.d/"); + let df = d.join("2.toml"); + let xd = td.path().join("nonexistent.d/"); + std::fs::create_dir(&d).unwrap(); + std::fs::write(&cf, EX_TOML).unwrap(); + std::fs::write(&df, EX2_TOML).unwrap(); + std::fs::write(d.join("not-toml"), "SYNTAX ERROR").unwrap(); + + let files = vec![ + (cf, MustRead::MustRead), + (d, MustRead::MustRead), + (xd.clone(), MustRead::TolerateAbsence), + ]; + let c = sources_nodefaults(&files, Default::default()); + let found = c.scan().unwrap(); + + assert_eq!( + found + .iter() + .map(|p| p.as_path().strip_prefix(&td).unwrap().to_str().unwrap()) + .collect_vec(), + &["1.toml", "extra.d", "extra.d/2.toml"] + ); + + let c = found.load().unwrap(); + + assert_eq!(c.get_string("hello.friends").unwrap(), "4242"); + assert_eq!(c.get_string("hello.world").unwrap(), "nonsense"); + + let files = vec![(xd, MustRead::MustRead)]; + let e = load_nodefaults(&files, Default::default()) + .unwrap_err() + .to_string(); + assert!(dbg!(e).contains("nonexistent.d")); } #[test] @@ -247,20 +510,24 @@ world = \"nonsense\" let v2 = vec!["other.var=present".to_string()]; let c = load_nodefaults(&v, &v2).unwrap(); - assert_eq!(c.get_string("hello.friends").unwrap(), "4242".to_string()); - assert_eq!(c.get_string("hello.world").unwrap(), "nonsense".to_string()); - assert_eq!(c.get_string("other.var").unwrap(), "present".to_string()); + assert_eq!(c.get_string("hello.friends").unwrap(), "4242"); + assert_eq!(c.get_string("hello.world").unwrap(), "nonsense"); + assert_eq!(c.get_string("other.var").unwrap(), "present"); } #[test] fn from_cmdline() { // Try one with specified files let sources = ConfigurationSources::from_cmdline( - "/etc/loid.toml", + [ConfigurationSource::from_path("/etc/loid.toml")], ["/family/yor.toml", "/family/anya.toml"], ["decade=1960", "snack=peanuts"], ); - let files: Vec<_> = sources.files().map(|path| path.to_str().unwrap()).collect(); + let files: Vec<_> = sources + .files + .iter() + .map(|file| file.0.as_ref().to_str().unwrap()) + .collect(); assert_eq!(files, vec!["/family/yor.toml", "/family/anya.toml"]); assert_eq!(sources.files[0].1, MustRead::MustRead); assert_eq!( @@ -270,13 +537,38 @@ world = \"nonsense\" // Try once with default only. let sources = ConfigurationSources::from_cmdline( - "/etc/loid.toml", + [ConfigurationSource::from_path("/etc/loid.toml")], Vec::::new(), ["decade=1960", "snack=peanuts"], ); assert_eq!( &sources.files, - &vec![("/etc/loid.toml".into(), MustRead::TolerateAbsence)] + &vec![( + ConfigurationSource::from_path("/etc/loid.toml"), + MustRead::TolerateAbsence + )] ); } + + #[test] + fn dir_syntax() { + let chk = |tf, s: &str| assert_eq!(tf, is_syntactically_directory(s.as_ref()), "{:?}", s); + + chk(false, ""); + chk(false, "1"); + chk(false, "1/2"); + chk(false, "/1"); + chk(false, "/1/2"); + + chk(true, "/"); + chk(true, "."); + chk(true, "./"); + chk(true, ".."); + chk(true, "../"); + chk(true, "/"); + chk(true, "1/"); + chk(true, "1/2/"); + chk(true, "/1/"); + chk(true, "/1/2/"); + } }