feat: add selection filter DSL parser

Implement parser for filter strings like "species:duck sex:female -tag:old". Supports AND (space), OR (|), negation (-), and quoted values. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-29 15:19:11 +00:00
parent 457f403e32
commit 6e9fd17327
5 changed files with 456 additions and 7 deletions
--- a/src/animaltrack/selection/init.py
+++ b/src/animaltrack/selection/init.py
@@ -1,9 +1,15 @@
 # ABOUTME: Selection system for resolving animal sets from filters.
-# ABOUTME: Provides resolver functions for animal selection contexts.
+# ABOUTME: Provides parser, AST, and resolver for animal selection contexts.

+from animaltrack.selection.ast import FieldFilter, FilterAST
+from animaltrack.selection.parser import ParseError, parse_filter
 from animaltrack.selection.resolver import SelectionResolverError, resolve_selection

 __all__ = [
+    "FieldFilter",
+    "FilterAST",
+    "ParseError",
    "SelectionResolverError",
+    "parse_filter",
    "resolve_selection",
 ]
--- a/src/animaltrack/selection/ast.py
+++ b/src/animaltrack/selection/ast.py
@@ -0,0 +1,41 @@
+# ABOUTME: AST node classes for the selection filter DSL.
+# ABOUTME: Represents parsed filter expressions as a tree structure.
+
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True)
+class FieldFilter:
+    """A single field filter condition.
+
+    Examples:
+        - species:duck -> FieldFilter("species", ["duck"], False)
+        - species:duck|goose -> FieldFilter("species", ["duck", "goose"], False)
+        - -sex:male -> FieldFilter("sex", ["male"], True)
+    """
+
+    field: str
+    values: list[str]
+    negated: bool = False
+
+    def __post_init__(self) -> None:
+        # Convert values to tuple for hashability since frozen=True
+        object.__setattr__(self, "values", tuple(self.values))
+
+
+@dataclass(frozen=True)
+class FilterAST:
+    """Root AST node containing all field filters combined with AND.
+
+    An empty filters list means "match all".
+    """
+
+    filters: list[FieldFilter]
+
+    def __post_init__(self) -> None:
+        # Convert filters to tuple for hashability since frozen=True
+        object.__setattr__(self, "filters", tuple(self.filters))
+
+    def is_match_all(self) -> bool:
+        """Return True if this filter matches all animals."""
+        return len(self.filters) == 0
--- a/src/animaltrack/selection/parser.py
+++ b/src/animaltrack/selection/parser.py
@@ -0,0 +1,169 @@
+# ABOUTME: Parser for the selection filter DSL.
+# ABOUTME: Converts filter strings into FilterAST for query execution.
+
+from collections.abc import Iterator
+
+from animaltrack.selection.ast import FieldFilter, FilterAST
+
+# Supported filter fields
+VALID_FIELDS = frozenset({"location", "species", "sex", "life_stage", "identified", "tag"})
+
+# Fields that can be used as flags (without :value)
+FLAG_FIELDS = frozenset({"identified"})
+
+
+class ParseError(Exception):
+    """Raised when filter string cannot be parsed."""
+
+
+def _tokenize(filter_str: str) -> Iterator[str]:
+    """Split filter string into tokens, respecting quoted strings.
+
+    Yields tokens like:
+        - "species:duck"
+        - "location:\"Strip 1\""
+        - "-tag:sick"
+    """
+    i = 0
+    n = len(filter_str)
+
+    while i < n:
+        # Skip whitespace
+        while i < n and filter_str[i].isspace():
+            i += 1
+        if i >= n:
+            break
+
+        # Start of a token
+        token_start = i
+
+        # Handle negation prefix
+        if filter_str[i] == "-":
+            i += 1
+            if i >= n:
+                raise ParseError("Unexpected end after negation '-'")
+
+        # Read until colon or space
+        while i < n and filter_str[i] not in ":\"' \t":
+            i += 1
+
+        if i >= n or filter_str[i].isspace():
+            # No colon - could be a flag field or error
+            token = filter_str[token_start:i]
+            yield token
+            continue
+
+        if filter_str[i] == ":":
+            i += 1  # consume colon
+
+            if i >= n:
+                raise ParseError(f"Empty value after colon in '{filter_str[token_start:i]}'")
+
+            # Check for quoted value
+            if i < n and filter_str[i] in "\"'":
+                quote_char = filter_str[i]
+                i += 1  # consume opening quote
+                value_start = i
+
+                # Find closing quote
+                while i < n and filter_str[i] != quote_char:
+                    i += 1
+
+                if i >= n:
+                    raise ParseError(
+                        f"Unclosed quote in filter starting at '{filter_str[token_start:value_start]}'"
+                    )
+
+                i += 1  # consume closing quote
+            else:
+                # Unquoted value - read until space
+                while i < n and not filter_str[i].isspace():
+                    i += 1
+
+            token = filter_str[token_start:i]
+            yield token
+        else:
+            # Quote without colon
+            raise ParseError(f"Unexpected quote in token starting at position {token_start}")
+
+
+def _parse_token(token: str) -> FieldFilter:
+    """Parse a single token into a FieldFilter."""
+    negated = False
+    if token.startswith("-"):
+        negated = True
+        token = token[1:]
+
+    # Check for flag field (no colon)
+    if ":" not in token:
+        if token in FLAG_FIELDS:
+            return FieldFilter(field=token, values=["1"], negated=negated)
+        else:
+            raise ParseError(f"Missing ':' in token '{token}' (not a flag field)")
+
+    # Split on first colon
+    colon_idx = token.index(":")
+    field = token[:colon_idx]
+    value_part = token[colon_idx + 1 :]
+
+    # Validate field
+    if field not in VALID_FIELDS:
+        raise ParseError(
+            f"Unknown field '{field}'. Valid fields: {', '.join(sorted(VALID_FIELDS))}"
+        )
+
+    # Strip quotes from value if present
+    if value_part.startswith('"') and value_part.endswith('"'):
+        value_part = value_part[1:-1]
+    elif value_part.startswith("'") and value_part.endswith("'"):
+        value_part = value_part[1:-1]
+
+    # Check for empty value
+    if not value_part:
+        raise ParseError(f"Empty value for field '{field}'")
+
+    # Split on pipe for OR values
+    values = value_part.split("|")
+
+    # Check for empty values in OR
+    for v in values:
+        if not v:
+            raise ParseError(f"Empty value in OR expression for field '{field}'")
+
+    return FieldFilter(field=field, values=values, negated=negated)
+
+
+def parse_filter(filter_str: str) -> FilterAST:
+    """Parse a filter string into a FilterAST.
+
+    Args:
+        filter_str: Filter string like "species:duck sex:female -tag:sick"
+
+    Returns:
+        FilterAST containing parsed FieldFilter nodes.
+
+    Raises:
+        ParseError: If the filter string is invalid.
+
+    Examples:
+        >>> parse_filter("species:duck")
+        FilterAST(filters=(FieldFilter(field='species', values=('duck',), negated=False),))
+
+        >>> parse_filter("species:duck|goose sex:female")
+        FilterAST(filters=(
+            FieldFilter(field='species', values=('duck', 'goose'), negated=False),
+            FieldFilter(field='sex', values=('female',), negated=False),
+        ))
+
+        >>> parse_filter("")
+        FilterAST(filters=())  # matches all
+    """
+    filter_str = filter_str.strip()
+
+    if not filter_str:
+        return FilterAST(filters=[])
+
+    tokens = list(_tokenize(filter_str))
+    filters = [_parse_token(token) for token in tokens]
+
+    return FilterAST(filters=filters)