Document parse.rs in tor-netdoc, and note things to move or fix

This commit is contained in:
Nick Mathewson 2020-05-09 10:58:44 -04:00
parent 07d6cd956a
commit 624dca4056
1 changed files with 82 additions and 4 deletions

View File

@ -1,19 +1,42 @@
//! Based on a set of rules, validate a token stream and collect the
//! tokens by type.
//!
//! See the "rules" module for definitions of rules that are used to
//! govern this process.
//!
//! # Example
//!
//! This is an internal API, so see the routerdesc.rs source for an
//! example of use.
use crate::rules::*;
use crate::tokenize::*;
use crate::{Error, Position, Result};
use std::str::FromStr;
/// Describe the rules for one section of a document.
///
/// The rules are represented as a mapping from token index to
/// rules::TokenFmt.
pub struct SectionRules<T: Keyword> {
rules: Vec<Option<TokenFmt<T>>>,
}
/// The entry or entries for a particular keyword within a document.
#[derive(Clone)]
enum TokVal<'a> {
/// No value has been found.
None,
Some([Item<'a>; 1]), // using a one-element array so we can slice it.
/// A single value has been found; we're storing it in place.
///
/// We use a one-element array here so that we can return a slice
/// of the array.
Some([Item<'a>; 1]),
/// Multiple vlaues have been found; they go in a vector.
Multi(Vec<Item<'a>>),
}
impl<'a> TokVal<'a> {
/// Return the number of Items for this value.
fn count(&self) -> usize {
match self {
TokVal::None => 0,
@ -21,6 +44,7 @@ impl<'a> TokVal<'a> {
TokVal::Multi(v) => v.len(),
}
}
/// Return the first Item for this value, or None if there wasn't one.
fn first(&self) -> Option<&Item<'a>> {
match self {
TokVal::None => None,
@ -28,12 +52,16 @@ impl<'a> TokVal<'a> {
TokVal::Multi(v) => Some(&v[0]),
}
}
/// Return the second Item for this value, or None if there wasn't one.
///
/// Used to make duplicate-token errors.
fn second(&self) -> Option<&Item<'a>> {
match self {
TokVal::Multi(v) if v.len() > 1 => Some(&v[1]),
_ => None,
}
}
/// Return the Item for this value, if there is exactly one.
fn singleton(&self) -> Option<&Item<'a>> {
match self {
TokVal::None => None,
@ -41,6 +69,7 @@ impl<'a> TokVal<'a> {
TokVal::Multi(_) => None,
}
}
/// Return all the Items for this value, as a slice.
fn as_slice(&self) -> &[Item<'a>] {
match self {
TokVal::None => &[],
@ -50,12 +79,16 @@ impl<'a> TokVal<'a> {
}
}
/// A Section is the result of sorting a document's entries by keyword.
pub struct Section<'a, T: Keyword> {
/// Map from Keyword index to TokVal
v: Vec<TokVal<'a>>,
/// Tells Rust it's okay that we are parameterizing on T.
_t: std::marker::PhantomData<T>,
}
impl<'a, T: Keyword> Section<'a, T> {
/// Make a new empty Section.
fn new() -> Self {
let n = T::n_vals();
let mut v = Vec::with_capacity(n);
@ -65,22 +98,35 @@ impl<'a, T: Keyword> Section<'a, T> {
_t: std::marker::PhantomData,
}
}
/// Helper: return the tokval for some Keyword.
fn get_tokval(&self, t: T) -> &TokVal<'a> {
let idx = t.idx();
&self.v[idx]
}
/// Return all the Items for some Keyword, as a slice.
pub fn get_slice(&self, t: T) -> &[Item<'a>] {
self.get_tokval(t).as_slice()
}
/// Return a single Item for some Keyword, if there is exactly one.
pub fn get(&self, t: T) -> Option<&Item<'a>> {
self.get_tokval(t).singleton()
}
/// Return a single Item for some Keyword, giving an error if there
/// is not exactly one.
pub fn get_required(&self, t: T) -> Result<&Item<'a>> {
self.get(t).ok_or(Error::Internal(Position::Unknown)) // XXXX
}
/// Return a proxy MaybeItem object for some keyword.
//
/// A MaybeItem is used to represent an object that might or might
/// not be there.
pub fn maybe<'b>(&'b self, t: T) -> MaybeItem<'b, 'a> {
MaybeItem(self.get(t))
}
/// Parsing implementation: try to insert an `item`.
///
/// The `item` must have parsed Keyword `t`; it is allowed to repeat if
/// `may_repeat` is true.
fn add_tok(&mut self, t: T, may_repeat: bool, item: Item<'a>) -> Result<()> {
let idx = Keyword::idx(t);
if idx >= self.v.len() {
@ -104,7 +150,10 @@ impl<'a, T: Keyword> Section<'a, T> {
}
}
// note: does not check multiplicity or absence.
/// Check whether a single Item matches a TokenFmt rule, with respect
/// to its number of arguments.
///
/// TODO: Move this to rules?
fn item_matches_fmt_args<'a, T: Keyword>(t: T, fmt: &TokenFmt<T>, item: &Item<'a>) -> Result<()> {
let n_args = item.n_args();
if let Some(max) = fmt.max_args {
@ -121,6 +170,10 @@ fn item_matches_fmt_args<'a, T: Keyword>(t: T, fmt: &TokenFmt<T>, item: &Item<'a
Ok(())
}
/// Check whether a single Item matches a TokenFmt rule, with respect
/// to its object's presence and type.
///
/// TODO: Move this to rules?
fn item_matches_fmt_obj<'a, T: Keyword>(t: T, fmt: &TokenFmt<T>, item: &Item<'a>) -> Result<()> {
match (&fmt.obj, item.has_obj()) {
(ObjKind::NoObj, true) => Err(Error::UnexpectedObject(t.to_str(), item.pos())),
@ -128,7 +181,11 @@ fn item_matches_fmt_obj<'a, T: Keyword>(t: T, fmt: &TokenFmt<T>, item: &Item<'a>
(_, _) => Ok(()),
}
}
impl<T: Keyword> SectionRules<T> {
/// Create a new SectionRules with no rules.
///
/// By default, no Keyword is allowed by this SectionRules.
pub fn new() -> Self {
let n = T::n_vals();
let mut rules = Vec::with_capacity(n);
@ -136,11 +193,20 @@ impl<T: Keyword> SectionRules<T> {
SectionRules { rules }
}
/// Add a rule to this SectionRules, based on a TokenFmtBuilder.
///
/// Requires that no rule yet exists for the provided keyword.
pub fn add(&mut self, t: TokenFmtBuilder<T>) {
let idx = t.idx();
assert!(self.rules[idx].is_none());
self.rules[idx] = Some(t.into());
}
/// Parse a stream of tokens into a Section object without (fully)
/// verifying them.
///
/// Some errors are detected early, but others only show up later
/// when we validate more carefully.
fn parse_unverified<'a, I>(&self, tokens: &mut I, section: &mut Section<'a, T>) -> Result<()>
where
I: Iterator<Item = Result<Item<'a>>>,
@ -162,6 +228,8 @@ impl<T: Keyword> SectionRules<T> {
Ok(())
}
/// Check whether the tokens in a section we've parsed conform to
/// these rules.
fn validate<'a>(&self, s: &Section<'a, T>) -> Result<()> {
// If there are more items in the section than we have rules for,
// they may be unexpected.
@ -174,15 +242,19 @@ impl<T: Keyword> SectionRules<T> {
}
}
// Iterate over every item, and make sure it matches the
// corresponding rule.
for (idx, (rule, t)) in self.rules.iter().zip(s.v.iter()).enumerate() {
match rule {
None => {
// We aren't supposed to have any of these.
if t.count() > 0 {
let tokname = T::idx_to_str(idx);
return Err(Error::UnexpectedToken(tokname, t.first().unwrap().pos()));
}
}
Some(rule) => {
// We're allowed to have this. Is the number right?
if t.count() == 0 && rule.required {
return Err(Error::MissingToken(T::idx_to_str(idx)));
} else if t.count() > 1 && !rule.may_repeat {
@ -191,6 +263,7 @@ impl<T: Keyword> SectionRules<T> {
t.second().unwrap().pos(),
));
}
// The number is right. Check each individual item.
for item in t.as_slice() {
let tok = T::from_idx(idx).unwrap();
item_matches_fmt_args(tok, rule, item)?;
@ -203,6 +276,7 @@ impl<T: Keyword> SectionRules<T> {
Ok(())
}
/// Parse a stream of tokens into a validated section.
pub fn parse<'a, I>(&self, tokens: &mut I) -> Result<Section<'a, T>>
where
I: Iterator<Item = Result<Item<'a>>>,
@ -214,9 +288,13 @@ impl<T: Keyword> SectionRules<T> {
}
}
// because i can't add methods to Option<Item>
/// Represents an Item that might not be present, whose arguments we
/// want to inspect. If the Item is there, this acts like a proxy to the
/// item; otherwise, it treats the item as having no arguments.
pub struct MaybeItem<'a, 'b>(Option<&'a Item<'b>>);
// All methods here are as for Item.
impl<'a, 'b> MaybeItem<'a, 'b> {
pub fn parse_arg<V: FromStr>(&self, idx: usize) -> Result<Option<V>>
where
@ -224,7 +302,7 @@ impl<'a, 'b> MaybeItem<'a, 'b> {
{
match self.0 {
Some(item) => item.parse_arg(idx).map(Some),
None => Ok(None),
None => Ok(None), // XXXX is this correct?
}
}
pub fn parse_optional_arg<V: FromStr>(&self, idx: usize) -> Result<Option<V>>