feat: add historical state resolver with point-in-time queries

Implement resolve_filter() to resolve animals matching FilterAST at ts_utc.
Uses interval tables for historical location, sex, life_stage, and tags.
Includes roster hash computation using xxhash64.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-29 15:31:07 +00:00
parent 6e9fd17327
commit c80d9f7fda
5 changed files with 590 additions and 18 deletions

View File

@@ -1,15 +1,24 @@
# ABOUTME: Selection system for resolving animal sets from filters.
# ABOUTME: Provides parser, AST, and resolver for animal selection contexts.
# ABOUTME: Provides parser, AST, resolver, and hash for animal selection contexts.
from animaltrack.selection.ast import FieldFilter, FilterAST
from animaltrack.selection.hash import compute_roster_hash
from animaltrack.selection.parser import ParseError, parse_filter
from animaltrack.selection.resolver import SelectionResolverError, resolve_selection
from animaltrack.selection.resolver import (
SelectionResolverError,
SelectionResult,
resolve_filter,
resolve_selection,
)
__all__ = [
"FieldFilter",
"FilterAST",
"ParseError",
"SelectionResolverError",
"SelectionResult",
"compute_roster_hash",
"parse_filter",
"resolve_filter",
"resolve_selection",
]

View File

@@ -0,0 +1,25 @@
# ABOUTME: Roster hash computation using xxhash64.
# ABOUTME: Used for optimistic locking in selection context.
import xxhash
def compute_roster_hash(
animal_ids: list[str],
from_location_id: str | None = None,
) -> str:
"""Compute xxhash64 hash of sorted animal IDs.
Args:
animal_ids: List of animal IDs to hash.
from_location_id: Optional location ID to include in hash
(used for move operations).
Returns:
Hex string of xxhash64 hash.
"""
sorted_ids = sorted(animal_ids)
hash_input = "|".join(sorted_ids)
if from_location_id:
hash_input = f"{from_location_id}|{hash_input}"
return xxhash.xxh64(hash_input.encode()).hexdigest()

View File

@@ -1,21 +1,33 @@
# ABOUTME: Basic animal selection resolver for Step 4.3.
# ABOUTME: Validates resolved_ids exist and are alive.
# ABOUTME: Selection resolver for animal filtering and historical resolution.
# ABOUTME: Resolves FilterAST at point-in-time using interval tables.
from dataclasses import dataclass
from typing import Any
from animaltrack.selection.ast import FieldFilter, FilterAST
from animaltrack.selection.hash import compute_roster_hash
class SelectionResolverError(Exception):
"""Base exception for selection resolver errors."""
@dataclass
class SelectionResult:
"""Result of resolving a filter at a point in time."""
animal_ids: list[str] # sorted
roster_hash: str
def resolve_selection(
db: Any,
resolved_ids: list[str],
) -> list[str]:
"""Validate that animal IDs exist and are alive.
This is the basic resolver for Step 4.3. Full filter DSL
parsing and historical resolution are added in Phase 5.
This function validates pre-resolved IDs (backward compatibility).
For filter-based resolution, use resolve_filter().
Args:
db: Database connection.
@@ -48,3 +60,135 @@ def resolve_selection(
raise SelectionResolverError(f"Animal '{animal_id}' is not alive (status: {status})")
return resolved_ids
def resolve_filter(
db: Any,
filter_ast: FilterAST,
ts_utc: int,
) -> SelectionResult:
"""Resolve animals matching filter at historical timestamp.
Uses interval tables to determine animal state at ts_utc.
Returns sorted animal IDs and roster hash.
Args:
db: Database connection.
filter_ast: Parsed filter AST.
ts_utc: Timestamp in ms since Unix epoch.
Returns:
SelectionResult with sorted animal_ids and roster_hash.
"""
# Build base query - all animals with location interval at ts_utc
# and status='alive' at ts_utc
base_query = """
SELECT DISTINCT ali.animal_id
FROM animal_location_intervals ali
WHERE ali.start_utc <= ?
AND (ali.end_utc IS NULL OR ali.end_utc > ?)
AND EXISTS (
SELECT 1 FROM animal_attr_intervals aai
WHERE aai.animal_id = ali.animal_id
AND aai.attr = 'status'
AND aai.value = 'alive'
AND aai.start_utc <= ?
AND (aai.end_utc IS NULL OR aai.end_utc > ?)
)
"""
params: list[Any] = [ts_utc, ts_utc, ts_utc, ts_utc]
# Apply each filter
for field_filter in filter_ast.filters:
clause, filter_params = _build_filter_clause(field_filter, ts_utc)
if field_filter.negated:
base_query += f"\n AND ali.animal_id NOT IN ({clause})"
else:
base_query += f"\n AND ali.animal_id IN ({clause})"
params.extend(filter_params)
base_query += "\n ORDER BY ali.animal_id"
rows = db.execute(base_query, params).fetchall()
animal_ids = [row[0] for row in rows]
roster_hash = compute_roster_hash(animal_ids)
return SelectionResult(animal_ids=animal_ids, roster_hash=roster_hash)
def _build_filter_clause(field_filter: FieldFilter, ts_utc: int) -> tuple[str, list[Any]]:
"""Build SQL subquery for a single field filter.
Args:
field_filter: The field filter to build clause for.
ts_utc: Timestamp for historical queries.
Returns:
Tuple of (SQL subquery string, list of parameters).
"""
field = field_filter.field
values = list(field_filter.values)
if field == "species":
# Species from animal_registry (current state)
placeholders = ",".join("?" * len(values))
query = f"""
SELECT animal_id FROM animal_registry
WHERE species_code IN ({placeholders})
"""
return query, values
elif field == "identified":
# Identified from animal_registry (current state)
# Values are "0" or "1" strings
placeholders = ",".join("?" * len(values))
int_values = [int(v) for v in values]
query = f"""
SELECT animal_id FROM animal_registry
WHERE identified IN ({placeholders})
"""
return query, int_values
elif field == "location":
# Location by name - join with locations table, historical
placeholders = ",".join("?" * len(values))
query = f"""
SELECT ali.animal_id
FROM animal_location_intervals ali
JOIN locations l ON ali.location_id = l.id
WHERE l.name IN ({placeholders})
AND ali.start_utc <= ?
AND (ali.end_utc IS NULL OR ali.end_utc > ?)
"""
params = values + [ts_utc, ts_utc]
return query, params
elif field in ("sex", "life_stage"):
# Historical attribute from animal_attr_intervals
placeholders = ",".join("?" * len(values))
query = f"""
SELECT animal_id FROM animal_attr_intervals
WHERE attr = ?
AND value IN ({placeholders})
AND start_utc <= ?
AND (end_utc IS NULL OR end_utc > ?)
"""
params = [field] + values + [ts_utc, ts_utc]
return query, params
elif field == "tag":
# Historical tag from animal_tag_intervals
placeholders = ",".join("?" * len(values))
query = f"""
SELECT animal_id FROM animal_tag_intervals
WHERE tag IN ({placeholders})
AND start_utc <= ?
AND (end_utc IS NULL OR end_utc > ?)
"""
params = values + [ts_utc, ts_utc]
return query, params
else:
# Unknown field - should not happen if parser validates
msg = f"Unknown filter field: {field}"
raise SelectionResolverError(msg)