feat: add selection filter DSL parser
Implement parser for filter strings like "species:duck sex:female -tag:old". Supports AND (space), OR (|), negation (-), and quoted values. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,9 +1,15 @@
|
||||
# ABOUTME: Selection system for resolving animal sets from filters.
|
||||
# ABOUTME: Provides resolver functions for animal selection contexts.
|
||||
# ABOUTME: Provides parser, AST, and resolver for animal selection contexts.
|
||||
|
||||
from animaltrack.selection.ast import FieldFilter, FilterAST
|
||||
from animaltrack.selection.parser import ParseError, parse_filter
|
||||
from animaltrack.selection.resolver import SelectionResolverError, resolve_selection
|
||||
|
||||
__all__ = [
|
||||
"FieldFilter",
|
||||
"FilterAST",
|
||||
"ParseError",
|
||||
"SelectionResolverError",
|
||||
"parse_filter",
|
||||
"resolve_selection",
|
||||
]
|
||||
|
||||
41
src/animaltrack/selection/ast.py
Normal file
41
src/animaltrack/selection/ast.py
Normal file
@@ -0,0 +1,41 @@
|
||||
# ABOUTME: AST node classes for the selection filter DSL.
|
||||
# ABOUTME: Represents parsed filter expressions as a tree structure.
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FieldFilter:
|
||||
"""A single field filter condition.
|
||||
|
||||
Examples:
|
||||
- species:duck -> FieldFilter("species", ["duck"], False)
|
||||
- species:duck|goose -> FieldFilter("species", ["duck", "goose"], False)
|
||||
- -sex:male -> FieldFilter("sex", ["male"], True)
|
||||
"""
|
||||
|
||||
field: str
|
||||
values: list[str]
|
||||
negated: bool = False
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
# Convert values to tuple for hashability since frozen=True
|
||||
object.__setattr__(self, "values", tuple(self.values))
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FilterAST:
|
||||
"""Root AST node containing all field filters combined with AND.
|
||||
|
||||
An empty filters list means "match all".
|
||||
"""
|
||||
|
||||
filters: list[FieldFilter]
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
# Convert filters to tuple for hashability since frozen=True
|
||||
object.__setattr__(self, "filters", tuple(self.filters))
|
||||
|
||||
def is_match_all(self) -> bool:
|
||||
"""Return True if this filter matches all animals."""
|
||||
return len(self.filters) == 0
|
||||
169
src/animaltrack/selection/parser.py
Normal file
169
src/animaltrack/selection/parser.py
Normal file
@@ -0,0 +1,169 @@
|
||||
# ABOUTME: Parser for the selection filter DSL.
|
||||
# ABOUTME: Converts filter strings into FilterAST for query execution.
|
||||
|
||||
from collections.abc import Iterator
|
||||
|
||||
from animaltrack.selection.ast import FieldFilter, FilterAST
|
||||
|
||||
# Supported filter fields
|
||||
VALID_FIELDS = frozenset({"location", "species", "sex", "life_stage", "identified", "tag"})
|
||||
|
||||
# Fields that can be used as flags (without :value)
|
||||
FLAG_FIELDS = frozenset({"identified"})
|
||||
|
||||
|
||||
class ParseError(Exception):
|
||||
"""Raised when filter string cannot be parsed."""
|
||||
|
||||
|
||||
def _tokenize(filter_str: str) -> Iterator[str]:
|
||||
"""Split filter string into tokens, respecting quoted strings.
|
||||
|
||||
Yields tokens like:
|
||||
- "species:duck"
|
||||
- "location:\"Strip 1\""
|
||||
- "-tag:sick"
|
||||
"""
|
||||
i = 0
|
||||
n = len(filter_str)
|
||||
|
||||
while i < n:
|
||||
# Skip whitespace
|
||||
while i < n and filter_str[i].isspace():
|
||||
i += 1
|
||||
if i >= n:
|
||||
break
|
||||
|
||||
# Start of a token
|
||||
token_start = i
|
||||
|
||||
# Handle negation prefix
|
||||
if filter_str[i] == "-":
|
||||
i += 1
|
||||
if i >= n:
|
||||
raise ParseError("Unexpected end after negation '-'")
|
||||
|
||||
# Read until colon or space
|
||||
while i < n and filter_str[i] not in ":\"' \t":
|
||||
i += 1
|
||||
|
||||
if i >= n or filter_str[i].isspace():
|
||||
# No colon - could be a flag field or error
|
||||
token = filter_str[token_start:i]
|
||||
yield token
|
||||
continue
|
||||
|
||||
if filter_str[i] == ":":
|
||||
i += 1 # consume colon
|
||||
|
||||
if i >= n:
|
||||
raise ParseError(f"Empty value after colon in '{filter_str[token_start:i]}'")
|
||||
|
||||
# Check for quoted value
|
||||
if i < n and filter_str[i] in "\"'":
|
||||
quote_char = filter_str[i]
|
||||
i += 1 # consume opening quote
|
||||
value_start = i
|
||||
|
||||
# Find closing quote
|
||||
while i < n and filter_str[i] != quote_char:
|
||||
i += 1
|
||||
|
||||
if i >= n:
|
||||
raise ParseError(
|
||||
f"Unclosed quote in filter starting at '{filter_str[token_start:value_start]}'"
|
||||
)
|
||||
|
||||
i += 1 # consume closing quote
|
||||
else:
|
||||
# Unquoted value - read until space
|
||||
while i < n and not filter_str[i].isspace():
|
||||
i += 1
|
||||
|
||||
token = filter_str[token_start:i]
|
||||
yield token
|
||||
else:
|
||||
# Quote without colon
|
||||
raise ParseError(f"Unexpected quote in token starting at position {token_start}")
|
||||
|
||||
|
||||
def _parse_token(token: str) -> FieldFilter:
|
||||
"""Parse a single token into a FieldFilter."""
|
||||
negated = False
|
||||
if token.startswith("-"):
|
||||
negated = True
|
||||
token = token[1:]
|
||||
|
||||
# Check for flag field (no colon)
|
||||
if ":" not in token:
|
||||
if token in FLAG_FIELDS:
|
||||
return FieldFilter(field=token, values=["1"], negated=negated)
|
||||
else:
|
||||
raise ParseError(f"Missing ':' in token '{token}' (not a flag field)")
|
||||
|
||||
# Split on first colon
|
||||
colon_idx = token.index(":")
|
||||
field = token[:colon_idx]
|
||||
value_part = token[colon_idx + 1 :]
|
||||
|
||||
# Validate field
|
||||
if field not in VALID_FIELDS:
|
||||
raise ParseError(
|
||||
f"Unknown field '{field}'. Valid fields: {', '.join(sorted(VALID_FIELDS))}"
|
||||
)
|
||||
|
||||
# Strip quotes from value if present
|
||||
if value_part.startswith('"') and value_part.endswith('"'):
|
||||
value_part = value_part[1:-1]
|
||||
elif value_part.startswith("'") and value_part.endswith("'"):
|
||||
value_part = value_part[1:-1]
|
||||
|
||||
# Check for empty value
|
||||
if not value_part:
|
||||
raise ParseError(f"Empty value for field '{field}'")
|
||||
|
||||
# Split on pipe for OR values
|
||||
values = value_part.split("|")
|
||||
|
||||
# Check for empty values in OR
|
||||
for v in values:
|
||||
if not v:
|
||||
raise ParseError(f"Empty value in OR expression for field '{field}'")
|
||||
|
||||
return FieldFilter(field=field, values=values, negated=negated)
|
||||
|
||||
|
||||
def parse_filter(filter_str: str) -> FilterAST:
|
||||
"""Parse a filter string into a FilterAST.
|
||||
|
||||
Args:
|
||||
filter_str: Filter string like "species:duck sex:female -tag:sick"
|
||||
|
||||
Returns:
|
||||
FilterAST containing parsed FieldFilter nodes.
|
||||
|
||||
Raises:
|
||||
ParseError: If the filter string is invalid.
|
||||
|
||||
Examples:
|
||||
>>> parse_filter("species:duck")
|
||||
FilterAST(filters=(FieldFilter(field='species', values=('duck',), negated=False),))
|
||||
|
||||
>>> parse_filter("species:duck|goose sex:female")
|
||||
FilterAST(filters=(
|
||||
FieldFilter(field='species', values=('duck', 'goose'), negated=False),
|
||||
FieldFilter(field='sex', values=('female',), negated=False),
|
||||
))
|
||||
|
||||
>>> parse_filter("")
|
||||
FilterAST(filters=()) # matches all
|
||||
"""
|
||||
filter_str = filter_str.strip()
|
||||
|
||||
if not filter_str:
|
||||
return FilterAST(filters=[])
|
||||
|
||||
tokens = list(_tokenize(filter_str))
|
||||
filters = [_parse_token(token) for token in tokens]
|
||||
|
||||
return FilterAST(filters=filters)
|
||||
Reference in New Issue
Block a user