netdoc: Teach tokenizer about annotations.
I'm not sure that annotations are a feature I want to keep long-term, but for now it would be useful if this code can also read Tor's file formats for network directories.
This commit is contained in:
parent
c427b26fe7
commit
0f5915915b
|
@ -27,11 +27,12 @@ macro_rules! decl_keyword {
|
|||
$(#[$meta])*
|
||||
$v enum $name {
|
||||
$( $i , )*
|
||||
UNRECOGNIZED
|
||||
UNRECOGNIZED,
|
||||
ANN_UNRECOGNIZED
|
||||
}
|
||||
impl $crate::keyword::Keyword for $name {
|
||||
fn idx(self) -> usize { self as usize }
|
||||
fn n_vals() -> usize { ($name::UNRECOGNIZED as usize) + 1 }
|
||||
fn n_vals() -> usize { ($name::ANN_UNRECOGNIZED as usize) + 1 }
|
||||
fn from_str(s : &str) -> Self {
|
||||
// Note usage of phf crate to create a perfect hash over
|
||||
// the possible keywords. It will be even better if someday
|
||||
|
@ -40,14 +41,23 @@ macro_rules! decl_keyword {
|
|||
const KEYWORD: phf::Map<&'static str, $name> = phf::phf_map! {
|
||||
$( $( $s => $name::$i , )+ )*
|
||||
};
|
||||
* KEYWORD.get(s).unwrap_or(& $name::UNRECOGNIZED)
|
||||
match KEYWORD.get(s) {
|
||||
Some(k) => *k,
|
||||
None => if s.starts_with('@') {
|
||||
$name::ANN_UNRECOGNIZED
|
||||
} else {
|
||||
$name::UNRECOGNIZED
|
||||
}
|
||||
}
|
||||
}
|
||||
fn from_idx(i : usize) -> Option<Self> {
|
||||
// Note looking up the value in a vec. This may or may
|
||||
// not be faster than a case statement would be.
|
||||
lazy_static::lazy_static! {
|
||||
static ref VALS: Vec<$name> =
|
||||
vec![ $($name::$i , )* $name::UNRECOGNIZED ];
|
||||
vec![ $($name::$i , )*
|
||||
$name::UNRECOGNIZED,
|
||||
$name::ANN_UNRECOGNIZED ];
|
||||
};
|
||||
VALS.get(i).copied()
|
||||
}
|
||||
|
@ -58,7 +68,8 @@ macro_rules! decl_keyword {
|
|||
// "acceptreject", which is not great.
|
||||
// "accept/reject" would be better.
|
||||
$( $i => concat!{ $($s),+ } , )*
|
||||
UNRECOGNIZED => "<unrecognized>"
|
||||
UNRECOGNIZED => "<unrecognized>",
|
||||
ANN_UNRECOGNIZED => "<unrecognized annotation>"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -113,17 +113,17 @@ impl<'a, K: Keyword> NetDocReader<'a, K> {
|
|||
fn get_kwdline(&mut self) -> Result<(&'a str, &'a str)> {
|
||||
let pos = self.off;
|
||||
let line = self.get_line()?;
|
||||
let line = if line.starts_with("opt ") {
|
||||
&line[4..]
|
||||
let (line, anno_ok) = if line.starts_with("opt ") {
|
||||
(&line[4..], false)
|
||||
} else {
|
||||
line
|
||||
(line, true)
|
||||
};
|
||||
let mut parts_iter = line.splitn(2, |c| c == ' ' || c == '\t');
|
||||
let kwd = match parts_iter.next() {
|
||||
Some(k) => k,
|
||||
None => return Err(Error::MissingKeyword(self.get_pos(pos))),
|
||||
};
|
||||
if !keyword_ok(kwd) {
|
||||
if !keyword_ok(kwd, anno_ok) {
|
||||
return Err(Error::BadKeyword(self.get_pos(pos)));
|
||||
}
|
||||
// XXXX spec should allow unicode in args.
|
||||
|
@ -196,8 +196,10 @@ impl<'a, K: Keyword> NetDocReader<'a, K> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Return true iff 's' is a valid keyword.
|
||||
fn keyword_ok(s: &str) -> bool {
|
||||
/// Return true iff 's' is a valid keyword or annotation.
|
||||
///
|
||||
/// (Only allow annotations if `anno_ok` is true.`
|
||||
fn keyword_ok(mut s: &str, anno_ok: bool) -> bool {
|
||||
fn kwd_char_ok(c: char) -> bool {
|
||||
match c {
|
||||
'A'..='Z' => true,
|
||||
|
@ -211,6 +213,9 @@ fn keyword_ok(s: &str) -> bool {
|
|||
if s.is_empty() {
|
||||
return false;
|
||||
}
|
||||
if anno_ok && s.starts_with('@') {
|
||||
s = &s[1..];
|
||||
}
|
||||
// XXXX I think we should disallow initial "-"
|
||||
s.chars().all(kwd_char_ok)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue