feat: introduce obitaxonomy crate for hierarchical taxonomy parsing
Adds the `obitaxonomy` crate to parse and validate hierarchical taxonomy paths using a strict `taxonomy:/name@rank/...` syntax. Replaces generic string-based path matching in predicates with structured `TaxPath` and `TaxPattern` types, enforcing explicit anchor constraints and rank-aware semantics. Updates filtering documentation to clarify optional leading slashes and segment-boundary matching rules.
This commit is contained in:
@@ -0,0 +1,6 @@
|
||||
[package]
|
||||
name = "obitaxonomy"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
@@ -0,0 +1,38 @@
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum TaxError {
|
||||
/// Stored value does not start with the `taxonomy:/` prefix.
|
||||
MissingPrefix,
|
||||
/// Stored path contains no segments after the prefix.
|
||||
EmptyPath,
|
||||
/// Query pattern contains no segments (after stripping anchors).
|
||||
EmptyPattern,
|
||||
/// A segment has an empty name (e.g. consecutive `/`).
|
||||
EmptySegmentName,
|
||||
/// A segment has a trailing `@` with no rank name.
|
||||
EmptyRankName { segment: String },
|
||||
/// A segment contains more than one `@`.
|
||||
AmbiguousRank { segment: String },
|
||||
}
|
||||
|
||||
impl fmt::Display for TaxError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
TaxError::MissingPrefix =>
|
||||
write!(f, "taxonomy path must start with \"taxonomy:/\""),
|
||||
TaxError::EmptyPath =>
|
||||
write!(f, "taxonomy path has no segments"),
|
||||
TaxError::EmptyPattern =>
|
||||
write!(f, "taxonomy query pattern has no segments"),
|
||||
TaxError::EmptySegmentName =>
|
||||
write!(f, "segment has an empty name"),
|
||||
TaxError::EmptyRankName { segment } =>
|
||||
write!(f, "segment has '@' with no rank name: {segment:?}"),
|
||||
TaxError::AmbiguousRank { segment } =>
|
||||
write!(f, "segment contains more than one '@': {segment:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for TaxError {}
|
||||
@@ -0,0 +1,11 @@
|
||||
mod error;
|
||||
mod segment;
|
||||
mod segment_pattern;
|
||||
mod path;
|
||||
mod pattern;
|
||||
|
||||
pub use error::TaxError;
|
||||
pub use segment::TaxSegment;
|
||||
pub use segment_pattern::SegmentPattern;
|
||||
pub use path::{TaxPath, PREFIX};
|
||||
pub use pattern::TaxPattern;
|
||||
@@ -0,0 +1,82 @@
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
|
||||
use crate::error::TaxError;
|
||||
use crate::segment::TaxSegment;
|
||||
|
||||
/// The prefix that marks a metadata value as a taxonomy path.
|
||||
pub const PREFIX: &str = "taxonomy:/";
|
||||
|
||||
/// A rooted, `/`-separated taxonomy path with optional per-segment rank annotations.
|
||||
///
|
||||
/// Stored form: `taxonomy:/seg1@rank1/seg2/seg3@rank3`
|
||||
/// The leading `taxonomy:/` is the discriminator; the remainder is one or more
|
||||
/// `/`-separated segments, each of the form `name` or `name@rank`.
|
||||
///
|
||||
/// `@` is reserved and may not appear in segment names or rank names.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct TaxPath {
|
||||
segments: Vec<TaxSegment>,
|
||||
}
|
||||
|
||||
impl TaxPath {
|
||||
pub fn parse(s: &str) -> Result<Self, TaxError> {
|
||||
let tail = s.strip_prefix(PREFIX).ok_or(TaxError::MissingPrefix)?;
|
||||
if tail.is_empty() {
|
||||
return Err(TaxError::EmptyPath);
|
||||
}
|
||||
let segments = tail.split('/')
|
||||
.map(TaxSegment::parse)
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
Ok(Self { segments })
|
||||
}
|
||||
|
||||
/// True if `self` is an ancestor of — or equal to — `other`.
|
||||
///
|
||||
/// Comparison is by segment name only; rank annotations are ignored.
|
||||
/// `self` must be a prefix of `other` at segment granularity.
|
||||
pub fn is_ancestor_of(&self, other: &TaxPath) -> bool {
|
||||
self.segments.len() <= other.segments.len()
|
||||
&& self.segments.iter().zip(other.segments.iter())
|
||||
.all(|(a, b)| a.name() == b.name())
|
||||
}
|
||||
|
||||
/// Returns the name of the first segment whose rank equals `rank`, if any.
|
||||
pub fn name_at_rank(&self, rank: &str) -> Option<&str> {
|
||||
self.segments.iter()
|
||||
.find(|s| s.rank() == Some(rank))
|
||||
.map(|s| s.name())
|
||||
}
|
||||
|
||||
/// True if any segment has the given rank.
|
||||
pub fn has_rank(&self, rank: &str) -> bool {
|
||||
self.segments.iter().any(|s| s.rank() == Some(rank))
|
||||
}
|
||||
|
||||
/// True if the path contains a segment with both the given rank and name.
|
||||
pub fn matches_rank(&self, rank: &str, name: &str) -> bool {
|
||||
self.segments.iter().any(|s| s.rank() == Some(rank) && s.name() == name)
|
||||
}
|
||||
|
||||
pub fn segments(&self) -> &[TaxSegment] { &self.segments }
|
||||
pub fn depth(&self) -> usize { self.segments.len() }
|
||||
pub fn is_empty(&self) -> bool { self.segments.is_empty() }
|
||||
}
|
||||
|
||||
impl fmt::Display for TaxPath {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}", PREFIX)?;
|
||||
let mut first = true;
|
||||
for seg in &self.segments {
|
||||
if !first { write!(f, "/")?; }
|
||||
write!(f, "{seg}")?;
|
||||
first = false;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for TaxPath {
|
||||
type Err = TaxError;
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> { Self::parse(s) }
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
use crate::error::TaxError;
|
||||
use crate::path::TaxPath;
|
||||
use crate::segment::TaxSegment;
|
||||
use crate::segment_pattern::SegmentPattern;
|
||||
|
||||
/// A query pattern for matching against stored `TaxPath` values.
|
||||
///
|
||||
/// Syntax:
|
||||
///
|
||||
/// | Form | Semantics |
|
||||
/// |----------|-----------|
|
||||
/// | `A/B` | A then B as a contiguous sub-path, anywhere in the value |
|
||||
/// | `/A/B` | value starts with A then B (start-anchored) |
|
||||
/// | `A/B$` | value ends with A then B (end-anchored) |
|
||||
/// | `/A/B$` | value is exactly A then B (fully anchored) |
|
||||
/// | `A@x/B` | A with rank `x`, followed by B with any rank |
|
||||
///
|
||||
/// A segment pattern without `@` matches any segment with that name regardless
|
||||
/// of its stored rank.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct TaxPattern {
|
||||
start_anchored: bool,
|
||||
end_anchored: bool,
|
||||
segments: Vec<SegmentPattern>,
|
||||
}
|
||||
|
||||
impl TaxPattern {
|
||||
pub fn parse(s: &str) -> Result<Self, TaxError> {
|
||||
let s = s.trim();
|
||||
|
||||
let start_anchored = s.starts_with('/');
|
||||
let s = if start_anchored { &s[1..] } else { s };
|
||||
|
||||
let end_anchored = s.ends_with('$');
|
||||
let s = if end_anchored { &s[..s.len() - 1] } else { s };
|
||||
|
||||
if s.is_empty() {
|
||||
return Err(TaxError::EmptyPattern);
|
||||
}
|
||||
|
||||
let segments = s.split('/')
|
||||
.map(SegmentPattern::parse)
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
|
||||
Ok(Self { start_anchored, end_anchored, segments })
|
||||
}
|
||||
|
||||
/// True if this pattern matches `path` according to the anchor flags.
|
||||
///
|
||||
/// The pattern must match a contiguous run of segments in the path.
|
||||
/// Start/end anchors restrict where that run may begin or end.
|
||||
pub fn matches(&self, path: &TaxPath) -> bool {
|
||||
let n = self.segments.len();
|
||||
let m = path.depth();
|
||||
|
||||
if n > m { return false; }
|
||||
|
||||
let segs = path.segments();
|
||||
match (self.start_anchored, self.end_anchored) {
|
||||
(true, true) => n == m && self.window_matches(segs, 0),
|
||||
(true, false) => self.window_matches(segs, 0),
|
||||
(false, true) => self.window_matches(segs, m - n),
|
||||
(false, false) => (0..=(m - n)).any(|i| self.window_matches(segs, i)),
|
||||
}
|
||||
}
|
||||
|
||||
fn window_matches(&self, segs: &[TaxSegment], start: usize) -> bool {
|
||||
self.segments.iter()
|
||||
.zip(segs[start..start + self.segments.len()].iter())
|
||||
.all(|(pat, seg)| pat.matches(seg))
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
use std::fmt;
|
||||
|
||||
use crate::error::TaxError;
|
||||
|
||||
/// A single node in a taxonomy path: a name and an optional rank.
|
||||
///
|
||||
/// Neither `name` nor `rank` may contain `@` (reserved separator).
|
||||
/// Serialised form: `name` or `name@rank`.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct TaxSegment {
|
||||
name: String,
|
||||
rank: Option<String>,
|
||||
}
|
||||
|
||||
impl TaxSegment {
|
||||
pub fn parse(raw: &str) -> Result<Self, TaxError> {
|
||||
let parts: Vec<&str> = raw.splitn(3, '@').collect();
|
||||
|
||||
let (name_raw, rank_raw) = match parts.as_slice() {
|
||||
[name] => (*name, None),
|
||||
[name, rank] => (*name, Some(*rank)),
|
||||
_ => return Err(TaxError::AmbiguousRank { segment: raw.to_string() }),
|
||||
};
|
||||
|
||||
if name_raw.is_empty() {
|
||||
return Err(TaxError::EmptySegmentName);
|
||||
}
|
||||
|
||||
let rank = match rank_raw {
|
||||
None => None,
|
||||
Some("") => return Err(TaxError::EmptyRankName { segment: raw.to_string() }),
|
||||
Some(r) => Some(r.to_string()),
|
||||
};
|
||||
|
||||
Ok(Self { name: name_raw.to_string(), rank })
|
||||
}
|
||||
|
||||
pub fn name(&self) -> &str { &self.name }
|
||||
pub fn rank(&self) -> Option<&str> { self.rank.as_deref() }
|
||||
}
|
||||
|
||||
impl fmt::Display for TaxSegment {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match &self.rank {
|
||||
None => write!(f, "{}", self.name),
|
||||
Some(r) => write!(f, "{}@{}", self.name, r),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
use crate::error::TaxError;
|
||||
use crate::segment::TaxSegment;
|
||||
|
||||
/// A single segment in a query pattern: a required name and an optional rank filter.
|
||||
///
|
||||
/// If `rank` is `None`, the pattern matches any segment with the given name,
|
||||
/// regardless of its stored rank. If `rank` is `Some(r)`, both name and rank
|
||||
/// must match exactly.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct SegmentPattern {
|
||||
name: String,
|
||||
rank: Option<String>,
|
||||
}
|
||||
|
||||
impl SegmentPattern {
|
||||
pub fn parse(raw: &str) -> Result<Self, TaxError> {
|
||||
let parts: Vec<&str> = raw.splitn(3, '@').collect();
|
||||
let (name_raw, rank_raw) = match parts.as_slice() {
|
||||
[name] => (*name, None),
|
||||
[name, rank] => (*name, Some(*rank)),
|
||||
_ => return Err(TaxError::AmbiguousRank { segment: raw.to_string() }),
|
||||
};
|
||||
if name_raw.is_empty() {
|
||||
return Err(TaxError::EmptySegmentName);
|
||||
}
|
||||
let rank = match rank_raw {
|
||||
None => None,
|
||||
Some("") => return Err(TaxError::EmptyRankName { segment: raw.to_string() }),
|
||||
Some(r) => Some(r.to_string()),
|
||||
};
|
||||
Ok(Self { name: name_raw.to_string(), rank })
|
||||
}
|
||||
|
||||
/// True if this pattern matches `seg`.
|
||||
/// Name must match exactly. If a rank is specified in the pattern, the
|
||||
/// segment's rank must match; otherwise any rank (or no rank) is accepted.
|
||||
pub fn matches(&self, seg: &TaxSegment) -> bool {
|
||||
self.name == seg.name()
|
||||
&& self.rank.as_deref().map_or(true, |r| seg.rank() == Some(r))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user