netdoc: Refactor iteration over tokens.

I want the "peekable iterator" type to be passed around a lot, and
it needs to have some way to get at the string that's used with it.
This commit is contained in:
Nick Mathewson 2020-06-03 10:53:18 -04:00
parent 9742f3ac42
commit bc6d1b2228
4 changed files with 52 additions and 22 deletions

View File

@ -13,6 +13,7 @@ use crate::family::RelayFamily;
use crate::keyword::Keyword;
use crate::parse::SectionRules;
use crate::policy::PortPolicy;
use crate::tokenize::NetDocReader;
use crate::util;
use crate::{Error, Result};
use tor_llcrypto::d;
@ -70,9 +71,14 @@ lazy_static! {
impl Microdesc {
/// Parse a string into a new microdescriptor.
pub fn parse(s: &str) -> Result<Microdesc> {
let mut items = crate::tokenize::NetDocReader::new(s);
Self::parse_from_reader(&mut items)
}
/// Extract a single microdescriptor from a NetDocReader.
fn parse_from_reader(reader: &mut NetDocReader<'_, MicrodescKW>) -> Result<Microdesc> {
use MicrodescKW::*;
let mut items = crate::tokenize::NetDocReader::new(s).peekable();
let s = reader.str();
let mut items = reader.iter();
// We have to start with onion-key
let start_pos = {

View File

@ -33,7 +33,7 @@ use crate::family::RelayFamily;
use crate::keyword::Keyword;
use crate::parse::{Section, SectionRules};
use crate::policy::*;
use crate::tokenize::ItemResult;
use crate::tokenize::{ItemResult, NetDocReader};
use crate::version::TorVersion;
use crate::{Error, Result};
@ -222,7 +222,7 @@ impl RouterDesc {
/// Helper: tokenize `s`, and divide it into three validated sections.
fn parse_sections<'a>(
s: &'a str,
reader: &mut NetDocReader<'a, RouterKW>,
) -> Result<(
Section<'a, RouterKW>,
Section<'a, RouterKW>,
@ -231,11 +231,10 @@ impl RouterDesc {
use crate::util::*;
use RouterKW::*;
let reader = crate::tokenize::NetDocReader::new(s);
// Parse everything up through the header.
let mut reader =
reader.pause_at(|item| item.is_ok_with_kwd_not_in(&[ROUTER, IDENTITY_ED25519]));
let mut reader = reader
.iter()
.pause_at(|item| item.is_ok_with_kwd_not_in(&[ROUTER, IDENTITY_ED25519]));
let header = ROUTER_HEADER_RULES.parse(&mut reader)?;
// Parse everything up to but not including the signature.
@ -278,7 +277,8 @@ impl RouterDesc {
// that parse one item at a time should be made into sub-functions.
use RouterKW::*;
let (header, body, sig) = RouterDesc::parse_sections(s)?;
let mut r = NetDocReader::new(s);
let (header, body, sig) = RouterDesc::parse_sections(&mut r)?;
let start_offset = header.get_required(ROUTER)?.offset_in(s).unwrap();

View File

@ -67,8 +67,10 @@ pub struct Item<'a, K: Keyword> {
}
/// A cursor into a string that returns Items one by one.
#[derive(Clone, Debug)]
pub struct NetDocReader<'a, K: Keyword> {
///
/// (This type isn't used directly, but is returned wrapped in a Peekable.)
#[derive(Debug)]
struct NetDocReaderBase<'a, K: Keyword> {
/// The string we're parsing.
s: &'a str,
/// Our position within the string.
@ -77,10 +79,10 @@ pub struct NetDocReader<'a, K: Keyword> {
_k: std::marker::PhantomData<K>,
}
impl<'a, K: Keyword> NetDocReader<'a, K> {
impl<'a, K: Keyword> NetDocReaderBase<'a, K> {
/// Create a new NetDocReader to split a string into tokens.
pub fn new(s: &'a str) -> Self {
NetDocReader {
fn new(s: &'a str) -> Self {
NetDocReaderBase {
s,
off: 0,
_k: std::marker::PhantomData,
@ -202,14 +204,14 @@ impl<'a, K: Keyword> NetDocReader<'a, K> {
Ok(Some(Object { tag, data }))
}
/// Read the next Item from this NetDocReader.
/// Read the next Item from this NetDocReaderBase.
///
/// If successful, returns Ok(Some(Item)), or Ok(None) if exhausted.
/// Returns Err on failure.
///
/// Always consumes at least one line if possible; always ends on a
/// line boundary if one exists.
pub fn get_item(&mut self) -> Result<Option<Item<'a, K>>> {
fn get_item(&mut self) -> Result<Option<Item<'a, K>>> {
if self.remaining() == 0 {
return Ok(None);
}
@ -272,7 +274,7 @@ fn tag_keyword_ok(s: &str) -> bool {
}
/// When used as an Iterator, returns a sequence of Result<Item>.
impl<'a, K: Keyword> Iterator for NetDocReader<'a, K> {
impl<'a, K: Keyword> Iterator for NetDocReaderBase<'a, K> {
type Item = Result<Item<'a, K>>;
fn next(&mut self) -> Option<Self::Item> {
self.get_item().transpose()
@ -511,3 +513,30 @@ impl<'a, K: Keyword> ItemResult<K> for Result<Item<'a, K>> {
}
}
}
/// A peekable cursor into a string that returns Items one by one.
#[derive(Debug)]
pub struct NetDocReader<'a, K: Keyword> {
// TODO: I wish there were some way around having this string
// reference, since we already need one inside NetDocReaderBase.
s: &'a str,
tokens: std::iter::Peekable<NetDocReaderBase<'a, K>>,
}
impl<'a, K: Keyword> NetDocReader<'a, K> {
/// Construct a new NetDocReader to read tokens from `s`.
pub fn new(s: &'a str) -> Self {
NetDocReader {
s,
tokens: NetDocReaderBase::new(s).peekable(),
}
}
/// Return a reference to the string used for this NetDocReader.
pub fn str(&self) -> &'a str {
self.s
}
/// Return the peekable iterator over the string's tokens.
pub fn iter(&mut self) -> &mut std::iter::Peekable<impl Iterator<Item = Result<Item<'a, K>>>> {
&mut self.tokens
}
}

View File

@ -1,9 +1,4 @@
/// Helper functions and types for use in parsing
///
/// For now this module has a single type -- an iterator that pauses
/// when a certain predicate is true. We use it for chunking
/// documents into sections. If it turns out to be useful somewhere
/// else, we should move it.
use std::iter::Peekable;
/// An iterator adaptor that pauses when a given predicate is true.