From 8468c8ec49c809d732c73ea22f9485e84ff5dbb4 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Tue, 19 May 2020 12:01:06 -0400 Subject: [PATCH] netdoc: move keyword into Item type. This saves us from having to check specific strings in most cases. --- tor-netdoc/src/microdesc.rs | 2 +- tor-netdoc/src/parse.rs | 41 +++++++++++++---------------- tor-netdoc/src/routerdesc.rs | 11 ++++---- tor-netdoc/src/rules.rs | 8 +++--- tor-netdoc/src/tokenize.rs | 50 +++++++++++++++++++++++------------- 5 files changed, 61 insertions(+), 51 deletions(-) diff --git a/tor-netdoc/src/microdesc.rs b/tor-netdoc/src/microdesc.rs index 9ec4bbeb9..7ded712f4 100644 --- a/tor-netdoc/src/microdesc.rs +++ b/tor-netdoc/src/microdesc.rs @@ -77,7 +77,7 @@ impl Microdesc { let start_pos = { let first = items.peek(); let kwd = match first { - Some(Ok(tok)) => tok.get_kwd(), + Some(Ok(tok)) => tok.get_kwd_str(), _ => return Err(Error::MissingToken("onion-key")), }; if kwd != "onion-key" { diff --git a/tor-netdoc/src/parse.rs b/tor-netdoc/src/parse.rs index 6d1c5b3b5..9a0c35c52 100644 --- a/tor-netdoc/src/parse.rs +++ b/tor-netdoc/src/parse.rs @@ -28,18 +28,18 @@ pub struct SectionRules { /// The entry or entries for a particular keyword within a document. #[derive(Clone)] -enum TokVal<'a> { +enum TokVal<'a, K: Keyword> { /// No value has been found. None, /// A single value has been found; we're storing it in place. /// /// We use a one-element array here so that we can return a slice /// of the array. - Some([Item<'a>; 1]), + Some([Item<'a, K>; 1]), /// Multiple vlaues have been found; they go in a vector. - Multi(Vec>), + Multi(Vec>), } -impl<'a> TokVal<'a> { +impl<'a, K: Keyword> TokVal<'a, K> { /// Return the number of Items for this value. fn count(&self) -> usize { match self { @@ -49,7 +49,7 @@ impl<'a> TokVal<'a> { } } /// Return the first Item for this value, or None if there wasn't one. - fn first(&self) -> Option<&Item<'a>> { + fn first(&self) -> Option<&Item<'a, K>> { match self { TokVal::None => None, TokVal::Some([t]) => Some(t), @@ -57,7 +57,7 @@ impl<'a> TokVal<'a> { } } /// Return the Item for this value, if there is exactly one. - fn singleton(&self) -> Option<&Item<'a>> { + fn singleton(&self) -> Option<&Item<'a, K>> { match self { TokVal::None => None, TokVal::Some([t]) => Some(t), @@ -65,7 +65,7 @@ impl<'a> TokVal<'a> { } } /// Return all the Items for this value, as a slice. - fn as_slice(&self) -> &[Item<'a>] { + fn as_slice(&self) -> &[Item<'a, K>] { match self { TokVal::None => &[], TokVal::Some(t) => &t[..], @@ -77,9 +77,7 @@ impl<'a> TokVal<'a> { /// A Section is the result of sorting a document's entries by keyword. pub struct Section<'a, T: Keyword> { /// Map from Keyword index to TokVal - v: Vec>, - /// Tells Rust it's okay that we are parameterizing on T. - _t: std::marker::PhantomData, + v: Vec>, } impl<'a, T: Keyword> Section<'a, T> { @@ -88,22 +86,19 @@ impl<'a, T: Keyword> Section<'a, T> { let n = T::n_vals(); let mut v = Vec::with_capacity(n); v.resize(n, TokVal::None); - Section { - v, - _t: std::marker::PhantomData, - } + Section { v } } /// Helper: return the tokval for some Keyword. - fn get_tokval(&self, t: T) -> &TokVal<'a> { + fn get_tokval(&self, t: T) -> &TokVal<'a, T> { let idx = t.idx(); &self.v[idx] } /// Return all the Items for some Keyword, as a slice. - pub fn get_slice(&self, t: T) -> &[Item<'a>] { + pub fn get_slice(&self, t: T) -> &[Item<'a, T>] { self.get_tokval(t).as_slice() } /// Return a single Item for some Keyword, if there is exactly one. - pub fn get(&self, t: T) -> Option<&Item<'a>> { + pub fn get(&self, t: T) -> Option<&Item<'a, T>> { self.get_tokval(t).singleton() } /// Return a single Item for some Keyword, giving an error if there @@ -111,20 +106,20 @@ impl<'a, T: Keyword> Section<'a, T> { /// /// It is usually a mistake to use this function on a Keyword that is /// not required. - pub fn get_required(&self, t: T) -> Result<&Item<'a>> { + pub fn get_required(&self, t: T) -> Result<&Item<'a, T>> { self.get(t).ok_or_else(|| Error::MissingToken(t.to_str())) } /// Return a proxy MaybeItem object for some keyword. // /// A MaybeItem is used to represent an object that might or might /// not be there. - pub fn maybe<'b>(&'b self, t: T) -> MaybeItem<'b, 'a> { + pub fn maybe<'b>(&'b self, t: T) -> MaybeItem<'b, 'a, T> { MaybeItem::from_option(self.get(t)) } /// Insert an `item`. /// /// The `item` must have parsed Keyword `t`. - fn add_tok(&mut self, t: T, item: Item<'a>) { + fn add_tok(&mut self, t: T, item: Item<'a, T>) { let idx = Keyword::idx(t); if idx >= self.v.len() { self.v.resize(idx + 1, TokVal::None); @@ -171,12 +166,12 @@ impl SectionRules { /// when we validate more carefully. fn parse_unverified<'a, I>(&self, tokens: &mut I, section: &mut Section<'a, T>) -> Result<()> where - I: Iterator>>, + I: Iterator>>, { for item in tokens { let item = item?; - let tok = T::from_str(item.get_kwd()); + let tok = item.get_kwd(); let tok_idx = tok.idx(); if let Some(rule) = &self.rules[tok_idx] { // we want this token. @@ -233,7 +228,7 @@ impl SectionRules { /// Parse a stream of tokens into a validated section. pub fn parse<'a, I>(&self, tokens: &mut I) -> Result> where - I: Iterator>>, + I: Iterator>>, { let mut section = Section::new(); self.parse_unverified(tokens, &mut section)?; diff --git a/tor-netdoc/src/routerdesc.rs b/tor-netdoc/src/routerdesc.rs index f5b2f2e9a..081639f73 100644 --- a/tor-netdoc/src/routerdesc.rs +++ b/tor-netdoc/src/routerdesc.rs @@ -226,21 +226,22 @@ impl RouterDesc { Section<'a, RouterKW>, )> { use crate::util::*; + use RouterKW::*; let reader = crate::tokenize::NetDocReader::new(s); // Parse everything up through the header. let mut reader = reader.pause_at(|item| { item.is_ok() - && item.as_ref().unwrap().get_kwd() != "router" - && item.as_ref().unwrap().get_kwd() != "identity-ed25519" + && item.as_ref().unwrap().get_kwd() != ROUTER + && item.as_ref().unwrap().get_kwd() != IDENTITY_ED25519 }); let header = ROUTER_HEADER_RULES.parse(&mut reader)?; // Parse everything up to but not including the signature. let mut reader = reader.new_pred(|item| { - item.is_ok() && (item.as_ref().unwrap().get_kwd() == "router-signature") - || (item.as_ref().unwrap().get_kwd() == "router-sig-ed25519") + item.is_ok() && (item.as_ref().unwrap().get_kwd() == ROUTER_SIGNATURE) + || (item.as_ref().unwrap().get_kwd() == ROUTER_SIG_ED25519) }); let body = ROUTER_BODY_RULES.parse(&mut reader)?; @@ -481,7 +482,7 @@ impl RouterDesc { let ipv4_policy = { let mut pol = AddrPolicy::new(); for ruletok in body.get_slice(POLICY).iter() { - let accept = ruletok.get_kwd() == "accept"; + let accept = ruletok.get_kwd_str() == "accept"; let pat: AddrPortPattern = ruletok .args_as_str() .parse() diff --git a/tor-netdoc/src/rules.rs b/tor-netdoc/src/rules.rs index f8c78fe5b..9b870ffe1 100644 --- a/tor-netdoc/src/rules.rs +++ b/tor-netdoc/src/rules.rs @@ -41,7 +41,7 @@ impl TokenFmt { } /// Check whether a single Item matches this TokenFmt rule, with respect /// to its number of arguments. - fn item_matches_args<'a>(&self, item: &Item<'a>) -> Result<()> { + fn item_matches_args<'a>(&self, item: &Item<'a, T>) -> Result<()> { let n_args = item.n_args(); if let Some(max) = self.max_args { if n_args > max { @@ -58,7 +58,7 @@ impl TokenFmt { /// Check whether a single Item matches a TokenFmt rule, with respect /// to its object's presence and type. - fn item_matches_obj<'a>(&self, item: &Item<'a>) -> Result<()> { + fn item_matches_obj<'a>(&self, item: &Item<'a, T>) -> Result<()> { match (&self.obj, item.has_obj()) { (ObjKind::NoObj, true) => Err(Error::UnexpectedObject(self.kwd.to_str(), item.pos())), (ObjKind::RequireObj, false) => { @@ -70,13 +70,13 @@ impl TokenFmt { /// Check whether a single item has the right number of arguments /// and object. - pub fn check_item<'a>(&self, item: &Item<'a>) -> Result<()> { + pub fn check_item<'a>(&self, item: &Item<'a, T>) -> Result<()> { self.item_matches_args(item)?; self.item_matches_obj(item) } /// Check whether this kind of item may appear this many times. - pub fn check_multiplicity<'a>(&self, items: &[Item<'a>]) -> Result<()> { + pub fn check_multiplicity<'a>(&self, items: &[Item<'a, T>]) -> Result<()> { match items.len() { 0 => { if self.required { diff --git a/tor-netdoc/src/tokenize.rs b/tor-netdoc/src/tokenize.rs index 6f495465f..c9d267227 100644 --- a/tor-netdoc/src/tokenize.rs +++ b/tor-netdoc/src/tokenize.rs @@ -5,6 +5,7 @@ //! string into Items. use crate::argtype::FromBytes; +use crate::keyword::Keyword; use crate::{Error, Pos, Result}; use std::cell::{Ref, RefCell}; use std::str::FromStr; @@ -36,8 +37,9 @@ pub struct Object<'a> { /// This is a zero-copy implementation that points to slices within a /// containing string. #[derive(Clone, Debug)] -pub struct Item<'a> { - kwd: &'a str, +pub struct Item<'a, K: Keyword> { + kwd: K, + kwd_str: &'a str, args: &'a str, /// The arguments, split by whitespace. This vector is contructed /// as needed, using interior mutability. @@ -47,17 +49,23 @@ pub struct Item<'a> { /// A cursor into a string that returns Items one by one. #[derive(Clone, Debug)] -pub struct NetDocReader<'a> { +pub struct NetDocReader<'a, K: Keyword> { /// The string we're parsing. s: &'a str, /// Our position within the string. off: usize, + /// Tells Rust it's okay that we are parameterizing on K. + _k: std::marker::PhantomData, } -impl<'a> NetDocReader<'a> { +impl<'a, K: Keyword> NetDocReader<'a, K> { /// Create a new NetDocReader to split a string into tokens. pub fn new(s: &'a str) -> Self { - NetDocReader { s, off: 0 } + NetDocReader { + s, + off: 0, + _k: std::marker::PhantomData, + } } /// Return the current Pos within the string. fn get_pos(&self, pos: usize) -> Pos { @@ -170,15 +178,17 @@ impl<'a> NetDocReader<'a> { /// /// If successful, returns Ok(Some(Item)), or Ok(None) if exhausted. /// Returns Err on failure. - pub fn get_item(&mut self) -> Result>> { + pub fn get_item(&mut self) -> Result>> { if self.remaining() == 0 { return Ok(None); } - let (kwd, args) = self.get_kwdline()?; + let (kwd_str, args) = self.get_kwdline()?; let object = self.get_object()?; let split_args = RefCell::new(None); + let kwd = K::from_str(kwd_str); Ok(Some(Item { kwd, + kwd_str, args, split_args, object, @@ -226,8 +236,8 @@ fn tag_keyword_ok(s: &str) -> bool { } /// When used as an Iterator, returns a sequence of Result. -impl<'a> Iterator for NetDocReader<'a> { - type Item = Result>; +impl<'a, K: Keyword> Iterator for NetDocReader<'a, K> { + type Item = Result>; fn next(&mut self) -> Option { self.get_item().transpose() } @@ -244,11 +254,15 @@ fn base64_decode_multiline(s: &str) -> std::result::Result, base64::Deco Ok(v) } -impl<'a> Item<'a> { - /// Return the keyword part of this item. - pub fn get_kwd(&self) -> &'a str { +impl<'a, K: Keyword> Item<'a, K> { + /// Return the parsed keyword part of this item. + pub fn get_kwd(&self) -> K { self.kwd } + /// Return the keyword part of this item, as a string. + pub fn get_kwd_str(&self) -> &'a str { + self.kwd_str + } /// Return the arguments of this item, as a single string. pub fn args_as_str(&self) -> &'a str { self.args @@ -340,13 +354,13 @@ impl<'a> Item<'a> { /// This position won't be useful unless it is later contextualized /// with the containing string. pub fn pos(&self) -> Pos { - Pos::at(self.kwd) + Pos::at(self.kwd_str) } /// Return the position of this Item in a string. /// /// Returns None if this item doesn't actually belong to the string. pub fn offset_in(&self, s: &str) -> Option { - crate::util::str_offset(s, self.kwd) + crate::util::str_offset(s, self.kwd_str) } /// Return the position of the n'th argument of this item. /// @@ -367,7 +381,7 @@ impl<'a> Item<'a> { let last_arg = args[args.len() - 1]; Pos::at_end_of(last_arg) } else { - Pos::at_end_of(self.kwd) + Pos::at_end_of(self.kwd_str) } } /// Return the position of the end of this object. @@ -383,11 +397,11 @@ impl<'a> Item<'a> { /// want to inspect. If the Item is there, this acts like a proxy to the /// item; otherwise, it treats the item as having no arguments. -pub struct MaybeItem<'a, 'b>(Option<&'a Item<'b>>); +pub struct MaybeItem<'a, 'b, K: Keyword>(Option<&'a Item<'b, K>>); // All methods here are as for Item. -impl<'a, 'b> MaybeItem<'a, 'b> { - pub fn from_option(opt: Option<&'a Item<'b>>) -> Self { +impl<'a, 'b, K: Keyword> MaybeItem<'a, 'b, K> { + pub fn from_option(opt: Option<&'a Item<'b, K>>) -> Self { MaybeItem(opt) } pub fn parse_arg(&self, idx: usize) -> Result>