netdoc: Refactor iteration over tokens.
I want the "peekable iterator" type to be passed around a lot, and it needs to have some way to get at the string that's used with it.
This commit is contained in:
parent
9742f3ac42
commit
bc6d1b2228
|
@ -13,6 +13,7 @@ use crate::family::RelayFamily;
|
|||
use crate::keyword::Keyword;
|
||||
use crate::parse::SectionRules;
|
||||
use crate::policy::PortPolicy;
|
||||
use crate::tokenize::NetDocReader;
|
||||
use crate::util;
|
||||
use crate::{Error, Result};
|
||||
use tor_llcrypto::d;
|
||||
|
@ -70,9 +71,14 @@ lazy_static! {
|
|||
impl Microdesc {
|
||||
/// Parse a string into a new microdescriptor.
|
||||
pub fn parse(s: &str) -> Result<Microdesc> {
|
||||
let mut items = crate::tokenize::NetDocReader::new(s);
|
||||
Self::parse_from_reader(&mut items)
|
||||
}
|
||||
/// Extract a single microdescriptor from a NetDocReader.
|
||||
fn parse_from_reader(reader: &mut NetDocReader<'_, MicrodescKW>) -> Result<Microdesc> {
|
||||
use MicrodescKW::*;
|
||||
|
||||
let mut items = crate::tokenize::NetDocReader::new(s).peekable();
|
||||
let s = reader.str();
|
||||
let mut items = reader.iter();
|
||||
|
||||
// We have to start with onion-key
|
||||
let start_pos = {
|
||||
|
|
|
@ -33,7 +33,7 @@ use crate::family::RelayFamily;
|
|||
use crate::keyword::Keyword;
|
||||
use crate::parse::{Section, SectionRules};
|
||||
use crate::policy::*;
|
||||
use crate::tokenize::ItemResult;
|
||||
use crate::tokenize::{ItemResult, NetDocReader};
|
||||
use crate::version::TorVersion;
|
||||
use crate::{Error, Result};
|
||||
|
||||
|
@ -222,7 +222,7 @@ impl RouterDesc {
|
|||
|
||||
/// Helper: tokenize `s`, and divide it into three validated sections.
|
||||
fn parse_sections<'a>(
|
||||
s: &'a str,
|
||||
reader: &mut NetDocReader<'a, RouterKW>,
|
||||
) -> Result<(
|
||||
Section<'a, RouterKW>,
|
||||
Section<'a, RouterKW>,
|
||||
|
@ -231,11 +231,10 @@ impl RouterDesc {
|
|||
use crate::util::*;
|
||||
use RouterKW::*;
|
||||
|
||||
let reader = crate::tokenize::NetDocReader::new(s);
|
||||
|
||||
// Parse everything up through the header.
|
||||
let mut reader =
|
||||
reader.pause_at(|item| item.is_ok_with_kwd_not_in(&[ROUTER, IDENTITY_ED25519]));
|
||||
let mut reader = reader
|
||||
.iter()
|
||||
.pause_at(|item| item.is_ok_with_kwd_not_in(&[ROUTER, IDENTITY_ED25519]));
|
||||
let header = ROUTER_HEADER_RULES.parse(&mut reader)?;
|
||||
|
||||
// Parse everything up to but not including the signature.
|
||||
|
@ -278,7 +277,8 @@ impl RouterDesc {
|
|||
// that parse one item at a time should be made into sub-functions.
|
||||
use RouterKW::*;
|
||||
|
||||
let (header, body, sig) = RouterDesc::parse_sections(s)?;
|
||||
let mut r = NetDocReader::new(s);
|
||||
let (header, body, sig) = RouterDesc::parse_sections(&mut r)?;
|
||||
|
||||
let start_offset = header.get_required(ROUTER)?.offset_in(s).unwrap();
|
||||
|
||||
|
|
|
@ -67,8 +67,10 @@ pub struct Item<'a, K: Keyword> {
|
|||
}
|
||||
|
||||
/// A cursor into a string that returns Items one by one.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct NetDocReader<'a, K: Keyword> {
|
||||
///
|
||||
/// (This type isn't used directly, but is returned wrapped in a Peekable.)
|
||||
#[derive(Debug)]
|
||||
struct NetDocReaderBase<'a, K: Keyword> {
|
||||
/// The string we're parsing.
|
||||
s: &'a str,
|
||||
/// Our position within the string.
|
||||
|
@ -77,10 +79,10 @@ pub struct NetDocReader<'a, K: Keyword> {
|
|||
_k: std::marker::PhantomData<K>,
|
||||
}
|
||||
|
||||
impl<'a, K: Keyword> NetDocReader<'a, K> {
|
||||
impl<'a, K: Keyword> NetDocReaderBase<'a, K> {
|
||||
/// Create a new NetDocReader to split a string into tokens.
|
||||
pub fn new(s: &'a str) -> Self {
|
||||
NetDocReader {
|
||||
fn new(s: &'a str) -> Self {
|
||||
NetDocReaderBase {
|
||||
s,
|
||||
off: 0,
|
||||
_k: std::marker::PhantomData,
|
||||
|
@ -202,14 +204,14 @@ impl<'a, K: Keyword> NetDocReader<'a, K> {
|
|||
Ok(Some(Object { tag, data }))
|
||||
}
|
||||
|
||||
/// Read the next Item from this NetDocReader.
|
||||
/// Read the next Item from this NetDocReaderBase.
|
||||
///
|
||||
/// If successful, returns Ok(Some(Item)), or Ok(None) if exhausted.
|
||||
/// Returns Err on failure.
|
||||
///
|
||||
/// Always consumes at least one line if possible; always ends on a
|
||||
/// line boundary if one exists.
|
||||
pub fn get_item(&mut self) -> Result<Option<Item<'a, K>>> {
|
||||
fn get_item(&mut self) -> Result<Option<Item<'a, K>>> {
|
||||
if self.remaining() == 0 {
|
||||
return Ok(None);
|
||||
}
|
||||
|
@ -272,7 +274,7 @@ fn tag_keyword_ok(s: &str) -> bool {
|
|||
}
|
||||
|
||||
/// When used as an Iterator, returns a sequence of Result<Item>.
|
||||
impl<'a, K: Keyword> Iterator for NetDocReader<'a, K> {
|
||||
impl<'a, K: Keyword> Iterator for NetDocReaderBase<'a, K> {
|
||||
type Item = Result<Item<'a, K>>;
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.get_item().transpose()
|
||||
|
@ -511,3 +513,30 @@ impl<'a, K: Keyword> ItemResult<K> for Result<Item<'a, K>> {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A peekable cursor into a string that returns Items one by one.
|
||||
#[derive(Debug)]
|
||||
pub struct NetDocReader<'a, K: Keyword> {
|
||||
// TODO: I wish there were some way around having this string
|
||||
// reference, since we already need one inside NetDocReaderBase.
|
||||
s: &'a str,
|
||||
tokens: std::iter::Peekable<NetDocReaderBase<'a, K>>,
|
||||
}
|
||||
|
||||
impl<'a, K: Keyword> NetDocReader<'a, K> {
|
||||
/// Construct a new NetDocReader to read tokens from `s`.
|
||||
pub fn new(s: &'a str) -> Self {
|
||||
NetDocReader {
|
||||
s,
|
||||
tokens: NetDocReaderBase::new(s).peekable(),
|
||||
}
|
||||
}
|
||||
/// Return a reference to the string used for this NetDocReader.
|
||||
pub fn str(&self) -> &'a str {
|
||||
self.s
|
||||
}
|
||||
/// Return the peekable iterator over the string's tokens.
|
||||
pub fn iter(&mut self) -> &mut std::iter::Peekable<impl Iterator<Item = Result<Item<'a, K>>>> {
|
||||
&mut self.tokens
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,9 +1,4 @@
|
|||
/// Helper functions and types for use in parsing
|
||||
///
|
||||
/// For now this module has a single type -- an iterator that pauses
|
||||
/// when a certain predicate is true. We use it for chunking
|
||||
/// documents into sections. If it turns out to be useful somewhere
|
||||
/// else, we should move it.
|
||||
use std::iter::Peekable;
|
||||
|
||||
/// An iterator adaptor that pauses when a given predicate is true.
|
||||
|
|
Loading…
Reference in New Issue