feat: add selection filter DSL parser

Implement parser for filter strings like "species:duck sex:female -tag:old".
Supports AND (space), OR (|), negation (-), and quoted values.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-29 15:19:11 +00:00
parent 457f403e32
commit 6e9fd17327
5 changed files with 456 additions and 7 deletions

View File

@@ -1,9 +1,15 @@
# ABOUTME: Selection system for resolving animal sets from filters.
# ABOUTME: Provides resolver functions for animal selection contexts.
# ABOUTME: Provides parser, AST, and resolver for animal selection contexts.
from animaltrack.selection.ast import FieldFilter, FilterAST
from animaltrack.selection.parser import ParseError, parse_filter
from animaltrack.selection.resolver import SelectionResolverError, resolve_selection
__all__ = [
"FieldFilter",
"FilterAST",
"ParseError",
"SelectionResolverError",
"parse_filter",
"resolve_selection",
]

View File

@@ -0,0 +1,41 @@
# ABOUTME: AST node classes for the selection filter DSL.
# ABOUTME: Represents parsed filter expressions as a tree structure.
from dataclasses import dataclass
@dataclass(frozen=True)
class FieldFilter:
"""A single field filter condition.
Examples:
- species:duck -> FieldFilter("species", ["duck"], False)
- species:duck|goose -> FieldFilter("species", ["duck", "goose"], False)
- -sex:male -> FieldFilter("sex", ["male"], True)
"""
field: str
values: list[str]
negated: bool = False
def __post_init__(self) -> None:
# Convert values to tuple for hashability since frozen=True
object.__setattr__(self, "values", tuple(self.values))
@dataclass(frozen=True)
class FilterAST:
"""Root AST node containing all field filters combined with AND.
An empty filters list means "match all".
"""
filters: list[FieldFilter]
def __post_init__(self) -> None:
# Convert filters to tuple for hashability since frozen=True
object.__setattr__(self, "filters", tuple(self.filters))
def is_match_all(self) -> bool:
"""Return True if this filter matches all animals."""
return len(self.filters) == 0

View File

@@ -0,0 +1,169 @@
# ABOUTME: Parser for the selection filter DSL.
# ABOUTME: Converts filter strings into FilterAST for query execution.
from collections.abc import Iterator
from animaltrack.selection.ast import FieldFilter, FilterAST
# Supported filter fields
VALID_FIELDS = frozenset({"location", "species", "sex", "life_stage", "identified", "tag"})
# Fields that can be used as flags (without :value)
FLAG_FIELDS = frozenset({"identified"})
class ParseError(Exception):
"""Raised when filter string cannot be parsed."""
def _tokenize(filter_str: str) -> Iterator[str]:
"""Split filter string into tokens, respecting quoted strings.
Yields tokens like:
- "species:duck"
- "location:\"Strip 1\""
- "-tag:sick"
"""
i = 0
n = len(filter_str)
while i < n:
# Skip whitespace
while i < n and filter_str[i].isspace():
i += 1
if i >= n:
break
# Start of a token
token_start = i
# Handle negation prefix
if filter_str[i] == "-":
i += 1
if i >= n:
raise ParseError("Unexpected end after negation '-'")
# Read until colon or space
while i < n and filter_str[i] not in ":\"' \t":
i += 1
if i >= n or filter_str[i].isspace():
# No colon - could be a flag field or error
token = filter_str[token_start:i]
yield token
continue
if filter_str[i] == ":":
i += 1 # consume colon
if i >= n:
raise ParseError(f"Empty value after colon in '{filter_str[token_start:i]}'")
# Check for quoted value
if i < n and filter_str[i] in "\"'":
quote_char = filter_str[i]
i += 1 # consume opening quote
value_start = i
# Find closing quote
while i < n and filter_str[i] != quote_char:
i += 1
if i >= n:
raise ParseError(
f"Unclosed quote in filter starting at '{filter_str[token_start:value_start]}'"
)
i += 1 # consume closing quote
else:
# Unquoted value - read until space
while i < n and not filter_str[i].isspace():
i += 1
token = filter_str[token_start:i]
yield token
else:
# Quote without colon
raise ParseError(f"Unexpected quote in token starting at position {token_start}")
def _parse_token(token: str) -> FieldFilter:
"""Parse a single token into a FieldFilter."""
negated = False
if token.startswith("-"):
negated = True
token = token[1:]
# Check for flag field (no colon)
if ":" not in token:
if token in FLAG_FIELDS:
return FieldFilter(field=token, values=["1"], negated=negated)
else:
raise ParseError(f"Missing ':' in token '{token}' (not a flag field)")
# Split on first colon
colon_idx = token.index(":")
field = token[:colon_idx]
value_part = token[colon_idx + 1 :]
# Validate field
if field not in VALID_FIELDS:
raise ParseError(
f"Unknown field '{field}'. Valid fields: {', '.join(sorted(VALID_FIELDS))}"
)
# Strip quotes from value if present
if value_part.startswith('"') and value_part.endswith('"'):
value_part = value_part[1:-1]
elif value_part.startswith("'") and value_part.endswith("'"):
value_part = value_part[1:-1]
# Check for empty value
if not value_part:
raise ParseError(f"Empty value for field '{field}'")
# Split on pipe for OR values
values = value_part.split("|")
# Check for empty values in OR
for v in values:
if not v:
raise ParseError(f"Empty value in OR expression for field '{field}'")
return FieldFilter(field=field, values=values, negated=negated)
def parse_filter(filter_str: str) -> FilterAST:
"""Parse a filter string into a FilterAST.
Args:
filter_str: Filter string like "species:duck sex:female -tag:sick"
Returns:
FilterAST containing parsed FieldFilter nodes.
Raises:
ParseError: If the filter string is invalid.
Examples:
>>> parse_filter("species:duck")
FilterAST(filters=(FieldFilter(field='species', values=('duck',), negated=False),))
>>> parse_filter("species:duck|goose sex:female")
FilterAST(filters=(
FieldFilter(field='species', values=('duck', 'goose'), negated=False),
FieldFilter(field='sex', values=('female',), negated=False),
))
>>> parse_filter("")
FilterAST(filters=()) # matches all
"""
filter_str = filter_str.strip()
if not filter_str:
return FilterAST(filters=[])
tokens = list(_tokenize(filter_str))
filters = [_parse_token(token) for token in tokens]
return FilterAST(filters=filters)