Document parse.rs in tor-netdoc, and note things to move or fix
This commit is contained in:
parent
07d6cd956a
commit
624dca4056
|
@ -1,19 +1,42 @@
|
|||
//! Based on a set of rules, validate a token stream and collect the
|
||||
//! tokens by type.
|
||||
//!
|
||||
//! See the "rules" module for definitions of rules that are used to
|
||||
//! govern this process.
|
||||
//!
|
||||
//! # Example
|
||||
//!
|
||||
//! This is an internal API, so see the routerdesc.rs source for an
|
||||
//! example of use.
|
||||
|
||||
use crate::rules::*;
|
||||
use crate::tokenize::*;
|
||||
use crate::{Error, Position, Result};
|
||||
use std::str::FromStr;
|
||||
|
||||
/// Describe the rules for one section of a document.
|
||||
///
|
||||
/// The rules are represented as a mapping from token index to
|
||||
/// rules::TokenFmt.
|
||||
pub struct SectionRules<T: Keyword> {
|
||||
rules: Vec<Option<TokenFmt<T>>>,
|
||||
}
|
||||
|
||||
/// The entry or entries for a particular keyword within a document.
|
||||
#[derive(Clone)]
|
||||
enum TokVal<'a> {
|
||||
/// No value has been found.
|
||||
None,
|
||||
Some([Item<'a>; 1]), // using a one-element array so we can slice it.
|
||||
/// A single value has been found; we're storing it in place.
|
||||
///
|
||||
/// We use a one-element array here so that we can return a slice
|
||||
/// of the array.
|
||||
Some([Item<'a>; 1]),
|
||||
/// Multiple vlaues have been found; they go in a vector.
|
||||
Multi(Vec<Item<'a>>),
|
||||
}
|
||||
impl<'a> TokVal<'a> {
|
||||
/// Return the number of Items for this value.
|
||||
fn count(&self) -> usize {
|
||||
match self {
|
||||
TokVal::None => 0,
|
||||
|
@ -21,6 +44,7 @@ impl<'a> TokVal<'a> {
|
|||
TokVal::Multi(v) => v.len(),
|
||||
}
|
||||
}
|
||||
/// Return the first Item for this value, or None if there wasn't one.
|
||||
fn first(&self) -> Option<&Item<'a>> {
|
||||
match self {
|
||||
TokVal::None => None,
|
||||
|
@ -28,12 +52,16 @@ impl<'a> TokVal<'a> {
|
|||
TokVal::Multi(v) => Some(&v[0]),
|
||||
}
|
||||
}
|
||||
/// Return the second Item for this value, or None if there wasn't one.
|
||||
///
|
||||
/// Used to make duplicate-token errors.
|
||||
fn second(&self) -> Option<&Item<'a>> {
|
||||
match self {
|
||||
TokVal::Multi(v) if v.len() > 1 => Some(&v[1]),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
/// Return the Item for this value, if there is exactly one.
|
||||
fn singleton(&self) -> Option<&Item<'a>> {
|
||||
match self {
|
||||
TokVal::None => None,
|
||||
|
@ -41,6 +69,7 @@ impl<'a> TokVal<'a> {
|
|||
TokVal::Multi(_) => None,
|
||||
}
|
||||
}
|
||||
/// Return all the Items for this value, as a slice.
|
||||
fn as_slice(&self) -> &[Item<'a>] {
|
||||
match self {
|
||||
TokVal::None => &[],
|
||||
|
@ -50,12 +79,16 @@ impl<'a> TokVal<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// A Section is the result of sorting a document's entries by keyword.
|
||||
pub struct Section<'a, T: Keyword> {
|
||||
/// Map from Keyword index to TokVal
|
||||
v: Vec<TokVal<'a>>,
|
||||
/// Tells Rust it's okay that we are parameterizing on T.
|
||||
_t: std::marker::PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<'a, T: Keyword> Section<'a, T> {
|
||||
/// Make a new empty Section.
|
||||
fn new() -> Self {
|
||||
let n = T::n_vals();
|
||||
let mut v = Vec::with_capacity(n);
|
||||
|
@ -65,22 +98,35 @@ impl<'a, T: Keyword> Section<'a, T> {
|
|||
_t: std::marker::PhantomData,
|
||||
}
|
||||
}
|
||||
/// Helper: return the tokval for some Keyword.
|
||||
fn get_tokval(&self, t: T) -> &TokVal<'a> {
|
||||
let idx = t.idx();
|
||||
&self.v[idx]
|
||||
}
|
||||
/// Return all the Items for some Keyword, as a slice.
|
||||
pub fn get_slice(&self, t: T) -> &[Item<'a>] {
|
||||
self.get_tokval(t).as_slice()
|
||||
}
|
||||
/// Return a single Item for some Keyword, if there is exactly one.
|
||||
pub fn get(&self, t: T) -> Option<&Item<'a>> {
|
||||
self.get_tokval(t).singleton()
|
||||
}
|
||||
/// Return a single Item for some Keyword, giving an error if there
|
||||
/// is not exactly one.
|
||||
pub fn get_required(&self, t: T) -> Result<&Item<'a>> {
|
||||
self.get(t).ok_or(Error::Internal(Position::Unknown)) // XXXX
|
||||
}
|
||||
/// Return a proxy MaybeItem object for some keyword.
|
||||
//
|
||||
/// A MaybeItem is used to represent an object that might or might
|
||||
/// not be there.
|
||||
pub fn maybe<'b>(&'b self, t: T) -> MaybeItem<'b, 'a> {
|
||||
MaybeItem(self.get(t))
|
||||
}
|
||||
/// Parsing implementation: try to insert an `item`.
|
||||
///
|
||||
/// The `item` must have parsed Keyword `t`; it is allowed to repeat if
|
||||
/// `may_repeat` is true.
|
||||
fn add_tok(&mut self, t: T, may_repeat: bool, item: Item<'a>) -> Result<()> {
|
||||
let idx = Keyword::idx(t);
|
||||
if idx >= self.v.len() {
|
||||
|
@ -104,7 +150,10 @@ impl<'a, T: Keyword> Section<'a, T> {
|
|||
}
|
||||
}
|
||||
|
||||
// note: does not check multiplicity or absence.
|
||||
/// Check whether a single Item matches a TokenFmt rule, with respect
|
||||
/// to its number of arguments.
|
||||
///
|
||||
/// TODO: Move this to rules?
|
||||
fn item_matches_fmt_args<'a, T: Keyword>(t: T, fmt: &TokenFmt<T>, item: &Item<'a>) -> Result<()> {
|
||||
let n_args = item.n_args();
|
||||
if let Some(max) = fmt.max_args {
|
||||
|
@ -121,6 +170,10 @@ fn item_matches_fmt_args<'a, T: Keyword>(t: T, fmt: &TokenFmt<T>, item: &Item<'a
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Check whether a single Item matches a TokenFmt rule, with respect
|
||||
/// to its object's presence and type.
|
||||
///
|
||||
/// TODO: Move this to rules?
|
||||
fn item_matches_fmt_obj<'a, T: Keyword>(t: T, fmt: &TokenFmt<T>, item: &Item<'a>) -> Result<()> {
|
||||
match (&fmt.obj, item.has_obj()) {
|
||||
(ObjKind::NoObj, true) => Err(Error::UnexpectedObject(t.to_str(), item.pos())),
|
||||
|
@ -128,7 +181,11 @@ fn item_matches_fmt_obj<'a, T: Keyword>(t: T, fmt: &TokenFmt<T>, item: &Item<'a>
|
|||
(_, _) => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Keyword> SectionRules<T> {
|
||||
/// Create a new SectionRules with no rules.
|
||||
///
|
||||
/// By default, no Keyword is allowed by this SectionRules.
|
||||
pub fn new() -> Self {
|
||||
let n = T::n_vals();
|
||||
let mut rules = Vec::with_capacity(n);
|
||||
|
@ -136,11 +193,20 @@ impl<T: Keyword> SectionRules<T> {
|
|||
SectionRules { rules }
|
||||
}
|
||||
|
||||
/// Add a rule to this SectionRules, based on a TokenFmtBuilder.
|
||||
///
|
||||
/// Requires that no rule yet exists for the provided keyword.
|
||||
pub fn add(&mut self, t: TokenFmtBuilder<T>) {
|
||||
let idx = t.idx();
|
||||
assert!(self.rules[idx].is_none());
|
||||
self.rules[idx] = Some(t.into());
|
||||
}
|
||||
|
||||
/// Parse a stream of tokens into a Section object without (fully)
|
||||
/// verifying them.
|
||||
///
|
||||
/// Some errors are detected early, but others only show up later
|
||||
/// when we validate more carefully.
|
||||
fn parse_unverified<'a, I>(&self, tokens: &mut I, section: &mut Section<'a, T>) -> Result<()>
|
||||
where
|
||||
I: Iterator<Item = Result<Item<'a>>>,
|
||||
|
@ -162,6 +228,8 @@ impl<T: Keyword> SectionRules<T> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Check whether the tokens in a section we've parsed conform to
|
||||
/// these rules.
|
||||
fn validate<'a>(&self, s: &Section<'a, T>) -> Result<()> {
|
||||
// If there are more items in the section than we have rules for,
|
||||
// they may be unexpected.
|
||||
|
@ -174,15 +242,19 @@ impl<T: Keyword> SectionRules<T> {
|
|||
}
|
||||
}
|
||||
|
||||
// Iterate over every item, and make sure it matches the
|
||||
// corresponding rule.
|
||||
for (idx, (rule, t)) in self.rules.iter().zip(s.v.iter()).enumerate() {
|
||||
match rule {
|
||||
None => {
|
||||
// We aren't supposed to have any of these.
|
||||
if t.count() > 0 {
|
||||
let tokname = T::idx_to_str(idx);
|
||||
return Err(Error::UnexpectedToken(tokname, t.first().unwrap().pos()));
|
||||
}
|
||||
}
|
||||
Some(rule) => {
|
||||
// We're allowed to have this. Is the number right?
|
||||
if t.count() == 0 && rule.required {
|
||||
return Err(Error::MissingToken(T::idx_to_str(idx)));
|
||||
} else if t.count() > 1 && !rule.may_repeat {
|
||||
|
@ -191,6 +263,7 @@ impl<T: Keyword> SectionRules<T> {
|
|||
t.second().unwrap().pos(),
|
||||
));
|
||||
}
|
||||
// The number is right. Check each individual item.
|
||||
for item in t.as_slice() {
|
||||
let tok = T::from_idx(idx).unwrap();
|
||||
item_matches_fmt_args(tok, rule, item)?;
|
||||
|
@ -203,6 +276,7 @@ impl<T: Keyword> SectionRules<T> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Parse a stream of tokens into a validated section.
|
||||
pub fn parse<'a, I>(&self, tokens: &mut I) -> Result<Section<'a, T>>
|
||||
where
|
||||
I: Iterator<Item = Result<Item<'a>>>,
|
||||
|
@ -214,9 +288,13 @@ impl<T: Keyword> SectionRules<T> {
|
|||
}
|
||||
}
|
||||
|
||||
// because i can't add methods to Option<Item>
|
||||
/// Represents an Item that might not be present, whose arguments we
|
||||
/// want to inspect. If the Item is there, this acts like a proxy to the
|
||||
/// item; otherwise, it treats the item as having no arguments.
|
||||
|
||||
pub struct MaybeItem<'a, 'b>(Option<&'a Item<'b>>);
|
||||
|
||||
// All methods here are as for Item.
|
||||
impl<'a, 'b> MaybeItem<'a, 'b> {
|
||||
pub fn parse_arg<V: FromStr>(&self, idx: usize) -> Result<Option<V>>
|
||||
where
|
||||
|
@ -224,7 +302,7 @@ impl<'a, 'b> MaybeItem<'a, 'b> {
|
|||
{
|
||||
match self.0 {
|
||||
Some(item) => item.parse_arg(idx).map(Some),
|
||||
None => Ok(None),
|
||||
None => Ok(None), // XXXX is this correct?
|
||||
}
|
||||
}
|
||||
pub fn parse_optional_arg<V: FromStr>(&self, idx: usize) -> Result<Option<V>>
|
||||
|
|
Loading…
Reference in New Issue