netdoc: Teach tokenizer about annotations.

I'm not sure that annotations are a feature I want to keep
long-term, but for now it would be useful if this code can also read
Tor's file formats for network directories.
This commit is contained in:
Nick Mathewson 2020-05-19 14:06:19 -04:00
parent c427b26fe7
commit 0f5915915b
2 changed files with 27 additions and 11 deletions

View File

@ -27,11 +27,12 @@ macro_rules! decl_keyword {
$(#[$meta])*
$v enum $name {
$( $i , )*
UNRECOGNIZED
UNRECOGNIZED,
ANN_UNRECOGNIZED
}
impl $crate::keyword::Keyword for $name {
fn idx(self) -> usize { self as usize }
fn n_vals() -> usize { ($name::UNRECOGNIZED as usize) + 1 }
fn n_vals() -> usize { ($name::ANN_UNRECOGNIZED as usize) + 1 }
fn from_str(s : &str) -> Self {
// Note usage of phf crate to create a perfect hash over
// the possible keywords. It will be even better if someday
@ -40,14 +41,23 @@ macro_rules! decl_keyword {
const KEYWORD: phf::Map<&'static str, $name> = phf::phf_map! {
$( $( $s => $name::$i , )+ )*
};
* KEYWORD.get(s).unwrap_or(& $name::UNRECOGNIZED)
match KEYWORD.get(s) {
Some(k) => *k,
None => if s.starts_with('@') {
$name::ANN_UNRECOGNIZED
} else {
$name::UNRECOGNIZED
}
}
}
fn from_idx(i : usize) -> Option<Self> {
// Note looking up the value in a vec. This may or may
// not be faster than a case statement would be.
lazy_static::lazy_static! {
static ref VALS: Vec<$name> =
vec![ $($name::$i , )* $name::UNRECOGNIZED ];
vec![ $($name::$i , )*
$name::UNRECOGNIZED,
$name::ANN_UNRECOGNIZED ];
};
VALS.get(i).copied()
}
@ -58,7 +68,8 @@ macro_rules! decl_keyword {
// "acceptreject", which is not great.
// "accept/reject" would be better.
$( $i => concat!{ $($s),+ } , )*
UNRECOGNIZED => "<unrecognized>"
UNRECOGNIZED => "<unrecognized>",
ANN_UNRECOGNIZED => "<unrecognized annotation>"
}
}
}

View File

@ -113,17 +113,17 @@ impl<'a, K: Keyword> NetDocReader<'a, K> {
fn get_kwdline(&mut self) -> Result<(&'a str, &'a str)> {
let pos = self.off;
let line = self.get_line()?;
let line = if line.starts_with("opt ") {
&line[4..]
let (line, anno_ok) = if line.starts_with("opt ") {
(&line[4..], false)
} else {
line
(line, true)
};
let mut parts_iter = line.splitn(2, |c| c == ' ' || c == '\t');
let kwd = match parts_iter.next() {
Some(k) => k,
None => return Err(Error::MissingKeyword(self.get_pos(pos))),
};
if !keyword_ok(kwd) {
if !keyword_ok(kwd, anno_ok) {
return Err(Error::BadKeyword(self.get_pos(pos)));
}
// XXXX spec should allow unicode in args.
@ -196,8 +196,10 @@ impl<'a, K: Keyword> NetDocReader<'a, K> {
}
}
/// Return true iff 's' is a valid keyword.
fn keyword_ok(s: &str) -> bool {
/// Return true iff 's' is a valid keyword or annotation.
///
/// (Only allow annotations if `anno_ok` is true.`
fn keyword_ok(mut s: &str, anno_ok: bool) -> bool {
fn kwd_char_ok(c: char) -> bool {
match c {
'A'..='Z' => true,
@ -211,6 +213,9 @@ fn keyword_ok(s: &str) -> bool {
if s.is_empty() {
return false;
}
if anno_ok && s.starts_with('@') {
s = &s[1..];
}
// XXXX I think we should disallow initial "-"
s.chars().all(kwd_char_ok)
}