netdoc: move keyword into Item type.

This saves us from having to check specific strings in most cases.
This commit is contained in:
Nick Mathewson 2020-05-19 12:01:06 -04:00
parent e3d68d6db6
commit 8468c8ec49
5 changed files with 61 additions and 51 deletions

View File

@ -77,7 +77,7 @@ impl Microdesc {
let start_pos = {
let first = items.peek();
let kwd = match first {
Some(Ok(tok)) => tok.get_kwd(),
Some(Ok(tok)) => tok.get_kwd_str(),
_ => return Err(Error::MissingToken("onion-key")),
};
if kwd != "onion-key" {

View File

@ -28,18 +28,18 @@ pub struct SectionRules<T: Keyword> {
/// The entry or entries for a particular keyword within a document.
#[derive(Clone)]
enum TokVal<'a> {
enum TokVal<'a, K: Keyword> {
/// No value has been found.
None,
/// A single value has been found; we're storing it in place.
///
/// We use a one-element array here so that we can return a slice
/// of the array.
Some([Item<'a>; 1]),
Some([Item<'a, K>; 1]),
/// Multiple vlaues have been found; they go in a vector.
Multi(Vec<Item<'a>>),
Multi(Vec<Item<'a, K>>),
}
impl<'a> TokVal<'a> {
impl<'a, K: Keyword> TokVal<'a, K> {
/// Return the number of Items for this value.
fn count(&self) -> usize {
match self {
@ -49,7 +49,7 @@ impl<'a> TokVal<'a> {
}
}
/// Return the first Item for this value, or None if there wasn't one.
fn first(&self) -> Option<&Item<'a>> {
fn first(&self) -> Option<&Item<'a, K>> {
match self {
TokVal::None => None,
TokVal::Some([t]) => Some(t),
@ -57,7 +57,7 @@ impl<'a> TokVal<'a> {
}
}
/// Return the Item for this value, if there is exactly one.
fn singleton(&self) -> Option<&Item<'a>> {
fn singleton(&self) -> Option<&Item<'a, K>> {
match self {
TokVal::None => None,
TokVal::Some([t]) => Some(t),
@ -65,7 +65,7 @@ impl<'a> TokVal<'a> {
}
}
/// Return all the Items for this value, as a slice.
fn as_slice(&self) -> &[Item<'a>] {
fn as_slice(&self) -> &[Item<'a, K>] {
match self {
TokVal::None => &[],
TokVal::Some(t) => &t[..],
@ -77,9 +77,7 @@ impl<'a> TokVal<'a> {
/// A Section is the result of sorting a document's entries by keyword.
pub struct Section<'a, T: Keyword> {
/// Map from Keyword index to TokVal
v: Vec<TokVal<'a>>,
/// Tells Rust it's okay that we are parameterizing on T.
_t: std::marker::PhantomData<T>,
v: Vec<TokVal<'a, T>>,
}
impl<'a, T: Keyword> Section<'a, T> {
@ -88,22 +86,19 @@ impl<'a, T: Keyword> Section<'a, T> {
let n = T::n_vals();
let mut v = Vec::with_capacity(n);
v.resize(n, TokVal::None);
Section {
v,
_t: std::marker::PhantomData,
}
Section { v }
}
/// Helper: return the tokval for some Keyword.
fn get_tokval(&self, t: T) -> &TokVal<'a> {
fn get_tokval(&self, t: T) -> &TokVal<'a, T> {
let idx = t.idx();
&self.v[idx]
}
/// Return all the Items for some Keyword, as a slice.
pub fn get_slice(&self, t: T) -> &[Item<'a>] {
pub fn get_slice(&self, t: T) -> &[Item<'a, T>] {
self.get_tokval(t).as_slice()
}
/// Return a single Item for some Keyword, if there is exactly one.
pub fn get(&self, t: T) -> Option<&Item<'a>> {
pub fn get(&self, t: T) -> Option<&Item<'a, T>> {
self.get_tokval(t).singleton()
}
/// Return a single Item for some Keyword, giving an error if there
@ -111,20 +106,20 @@ impl<'a, T: Keyword> Section<'a, T> {
///
/// It is usually a mistake to use this function on a Keyword that is
/// not required.
pub fn get_required(&self, t: T) -> Result<&Item<'a>> {
pub fn get_required(&self, t: T) -> Result<&Item<'a, T>> {
self.get(t).ok_or_else(|| Error::MissingToken(t.to_str()))
}
/// Return a proxy MaybeItem object for some keyword.
//
/// A MaybeItem is used to represent an object that might or might
/// not be there.
pub fn maybe<'b>(&'b self, t: T) -> MaybeItem<'b, 'a> {
pub fn maybe<'b>(&'b self, t: T) -> MaybeItem<'b, 'a, T> {
MaybeItem::from_option(self.get(t))
}
/// Insert an `item`.
///
/// The `item` must have parsed Keyword `t`.
fn add_tok(&mut self, t: T, item: Item<'a>) {
fn add_tok(&mut self, t: T, item: Item<'a, T>) {
let idx = Keyword::idx(t);
if idx >= self.v.len() {
self.v.resize(idx + 1, TokVal::None);
@ -171,12 +166,12 @@ impl<T: Keyword> SectionRules<T> {
/// when we validate more carefully.
fn parse_unverified<'a, I>(&self, tokens: &mut I, section: &mut Section<'a, T>) -> Result<()>
where
I: Iterator<Item = Result<Item<'a>>>,
I: Iterator<Item = Result<Item<'a, T>>>,
{
for item in tokens {
let item = item?;
let tok = T::from_str(item.get_kwd());
let tok = item.get_kwd();
let tok_idx = tok.idx();
if let Some(rule) = &self.rules[tok_idx] {
// we want this token.
@ -233,7 +228,7 @@ impl<T: Keyword> SectionRules<T> {
/// Parse a stream of tokens into a validated section.
pub fn parse<'a, I>(&self, tokens: &mut I) -> Result<Section<'a, T>>
where
I: Iterator<Item = Result<Item<'a>>>,
I: Iterator<Item = Result<Item<'a, T>>>,
{
let mut section = Section::new();
self.parse_unverified(tokens, &mut section)?;

View File

@ -226,21 +226,22 @@ impl RouterDesc {
Section<'a, RouterKW>,
)> {
use crate::util::*;
use RouterKW::*;
let reader = crate::tokenize::NetDocReader::new(s);
// Parse everything up through the header.
let mut reader = reader.pause_at(|item| {
item.is_ok()
&& item.as_ref().unwrap().get_kwd() != "router"
&& item.as_ref().unwrap().get_kwd() != "identity-ed25519"
&& item.as_ref().unwrap().get_kwd() != ROUTER
&& item.as_ref().unwrap().get_kwd() != IDENTITY_ED25519
});
let header = ROUTER_HEADER_RULES.parse(&mut reader)?;
// Parse everything up to but not including the signature.
let mut reader = reader.new_pred(|item| {
item.is_ok() && (item.as_ref().unwrap().get_kwd() == "router-signature")
|| (item.as_ref().unwrap().get_kwd() == "router-sig-ed25519")
item.is_ok() && (item.as_ref().unwrap().get_kwd() == ROUTER_SIGNATURE)
|| (item.as_ref().unwrap().get_kwd() == ROUTER_SIG_ED25519)
});
let body = ROUTER_BODY_RULES.parse(&mut reader)?;
@ -481,7 +482,7 @@ impl RouterDesc {
let ipv4_policy = {
let mut pol = AddrPolicy::new();
for ruletok in body.get_slice(POLICY).iter() {
let accept = ruletok.get_kwd() == "accept";
let accept = ruletok.get_kwd_str() == "accept";
let pat: AddrPortPattern = ruletok
.args_as_str()
.parse()

View File

@ -41,7 +41,7 @@ impl<T: Keyword> TokenFmt<T> {
}
/// Check whether a single Item matches this TokenFmt rule, with respect
/// to its number of arguments.
fn item_matches_args<'a>(&self, item: &Item<'a>) -> Result<()> {
fn item_matches_args<'a>(&self, item: &Item<'a, T>) -> Result<()> {
let n_args = item.n_args();
if let Some(max) = self.max_args {
if n_args > max {
@ -58,7 +58,7 @@ impl<T: Keyword> TokenFmt<T> {
/// Check whether a single Item matches a TokenFmt rule, with respect
/// to its object's presence and type.
fn item_matches_obj<'a>(&self, item: &Item<'a>) -> Result<()> {
fn item_matches_obj<'a>(&self, item: &Item<'a, T>) -> Result<()> {
match (&self.obj, item.has_obj()) {
(ObjKind::NoObj, true) => Err(Error::UnexpectedObject(self.kwd.to_str(), item.pos())),
(ObjKind::RequireObj, false) => {
@ -70,13 +70,13 @@ impl<T: Keyword> TokenFmt<T> {
/// Check whether a single item has the right number of arguments
/// and object.
pub fn check_item<'a>(&self, item: &Item<'a>) -> Result<()> {
pub fn check_item<'a>(&self, item: &Item<'a, T>) -> Result<()> {
self.item_matches_args(item)?;
self.item_matches_obj(item)
}
/// Check whether this kind of item may appear this many times.
pub fn check_multiplicity<'a>(&self, items: &[Item<'a>]) -> Result<()> {
pub fn check_multiplicity<'a>(&self, items: &[Item<'a, T>]) -> Result<()> {
match items.len() {
0 => {
if self.required {

View File

@ -5,6 +5,7 @@
//! string into Items.
use crate::argtype::FromBytes;
use crate::keyword::Keyword;
use crate::{Error, Pos, Result};
use std::cell::{Ref, RefCell};
use std::str::FromStr;
@ -36,8 +37,9 @@ pub struct Object<'a> {
/// This is a zero-copy implementation that points to slices within a
/// containing string.
#[derive(Clone, Debug)]
pub struct Item<'a> {
kwd: &'a str,
pub struct Item<'a, K: Keyword> {
kwd: K,
kwd_str: &'a str,
args: &'a str,
/// The arguments, split by whitespace. This vector is contructed
/// as needed, using interior mutability.
@ -47,17 +49,23 @@ pub struct Item<'a> {
/// A cursor into a string that returns Items one by one.
#[derive(Clone, Debug)]
pub struct NetDocReader<'a> {
pub struct NetDocReader<'a, K: Keyword> {
/// The string we're parsing.
s: &'a str,
/// Our position within the string.
off: usize,
/// Tells Rust it's okay that we are parameterizing on K.
_k: std::marker::PhantomData<K>,
}
impl<'a> NetDocReader<'a> {
impl<'a, K: Keyword> NetDocReader<'a, K> {
/// Create a new NetDocReader to split a string into tokens.
pub fn new(s: &'a str) -> Self {
NetDocReader { s, off: 0 }
NetDocReader {
s,
off: 0,
_k: std::marker::PhantomData,
}
}
/// Return the current Pos within the string.
fn get_pos(&self, pos: usize) -> Pos {
@ -170,15 +178,17 @@ impl<'a> NetDocReader<'a> {
///
/// If successful, returns Ok(Some(Item)), or Ok(None) if exhausted.
/// Returns Err on failure.
pub fn get_item(&mut self) -> Result<Option<Item<'a>>> {
pub fn get_item(&mut self) -> Result<Option<Item<'a, K>>> {
if self.remaining() == 0 {
return Ok(None);
}
let (kwd, args) = self.get_kwdline()?;
let (kwd_str, args) = self.get_kwdline()?;
let object = self.get_object()?;
let split_args = RefCell::new(None);
let kwd = K::from_str(kwd_str);
Ok(Some(Item {
kwd,
kwd_str,
args,
split_args,
object,
@ -226,8 +236,8 @@ fn tag_keyword_ok(s: &str) -> bool {
}
/// When used as an Iterator, returns a sequence of Result<Item>.
impl<'a> Iterator for NetDocReader<'a> {
type Item = Result<Item<'a>>;
impl<'a, K: Keyword> Iterator for NetDocReader<'a, K> {
type Item = Result<Item<'a, K>>;
fn next(&mut self) -> Option<Self::Item> {
self.get_item().transpose()
}
@ -244,11 +254,15 @@ fn base64_decode_multiline(s: &str) -> std::result::Result<Vec<u8>, base64::Deco
Ok(v)
}
impl<'a> Item<'a> {
/// Return the keyword part of this item.
pub fn get_kwd(&self) -> &'a str {
impl<'a, K: Keyword> Item<'a, K> {
/// Return the parsed keyword part of this item.
pub fn get_kwd(&self) -> K {
self.kwd
}
/// Return the keyword part of this item, as a string.
pub fn get_kwd_str(&self) -> &'a str {
self.kwd_str
}
/// Return the arguments of this item, as a single string.
pub fn args_as_str(&self) -> &'a str {
self.args
@ -340,13 +354,13 @@ impl<'a> Item<'a> {
/// This position won't be useful unless it is later contextualized
/// with the containing string.
pub fn pos(&self) -> Pos {
Pos::at(self.kwd)
Pos::at(self.kwd_str)
}
/// Return the position of this Item in a string.
///
/// Returns None if this item doesn't actually belong to the string.
pub fn offset_in(&self, s: &str) -> Option<usize> {
crate::util::str_offset(s, self.kwd)
crate::util::str_offset(s, self.kwd_str)
}
/// Return the position of the n'th argument of this item.
///
@ -367,7 +381,7 @@ impl<'a> Item<'a> {
let last_arg = args[args.len() - 1];
Pos::at_end_of(last_arg)
} else {
Pos::at_end_of(self.kwd)
Pos::at_end_of(self.kwd_str)
}
}
/// Return the position of the end of this object.
@ -383,11 +397,11 @@ impl<'a> Item<'a> {
/// want to inspect. If the Item is there, this acts like a proxy to the
/// item; otherwise, it treats the item as having no arguments.
pub struct MaybeItem<'a, 'b>(Option<&'a Item<'b>>);
pub struct MaybeItem<'a, 'b, K: Keyword>(Option<&'a Item<'b, K>>);
// All methods here are as for Item.
impl<'a, 'b> MaybeItem<'a, 'b> {
pub fn from_option(opt: Option<&'a Item<'b>>) -> Self {
impl<'a, 'b, K: Keyword> MaybeItem<'a, 'b, K> {
pub fn from_option(opt: Option<&'a Item<'b, K>>) -> Self {
MaybeItem(opt)
}
pub fn parse_arg<V: FromStr>(&self, idx: usize) -> Result<Option<V>>