"""Eval configuration — YAML fixture loader and dataclasses. Fixtures come in two families: 1. **Agent fixtures** — test the batch agent pipeline. Three modes controlled by ``mode``: ``step1`` — classification prompt only. ``step2`` — processing prompt only. ``full`` — both steps in sequence. 2. **Journey fixtures** — test the prompt-template builder conversation (unchanged). """ from __future__ import annotations import logging from dataclasses import dataclass, field from pathlib import Path from typing import Any, Literal import yaml logger = logging.getLogger(__name__) EvalMode = Literal["step1", "step2", "full"] @dataclass class ExpectedRecord: """A single expected extraction result. Only the fields specified are checked — unspecified fields are ignored. """ table: str # tasks | notes | timelines | projects fields: dict[str, Any] # field_name → expected_value @dataclass class ExpectedClassification: """Expected output of step-1 classification for one file.""" file: str # relative path to the sample file project_id: str # expected matched project id, or "new" domains: list[str] # expected domain list new_project_name: str | None = None @dataclass class EvalFixture: """A complete test scenario loaded from YAML. ``mode`` determines which pipeline steps are exercised: - **step1**: only ``_classify_file`` - **step2**: only the processing LLM + tool loop - **full**: both steps in sequence (``run_local_agent``) """ name: str description: str mode: EvalMode directory: str # relative path to sample files data_types: list[str] file_extensions: list[str] models: list[str] # if empty, use CLI default fixture_path: Path = field(default_factory=lambda: Path(".")) # ── Step-1 inputs (classification) ─────────────────────────── domain_definitions: str = "" projects_list: list[dict[str, Any]] = field(default_factory=list) custom_step1_prompt: str = "" # ── Step-2 inputs (processing) ─────────────────────────────── existing_context: str = "" project_context: str = "" custom_prompt_section: str = "" # ── Seed records for mock executor ─────────────────────────── seed_records: dict[str, list[dict]] = field(default_factory=dict) # ── Expected outputs ───────────────────────────────────────── expected_classification: list[ExpectedClassification] = field(default_factory=list) expected: list[ExpectedRecord] = field(default_factory=list) @property def fixture_dir(self) -> Path: """Absolute path to the sample files directory.""" return self.fixture_path.parent / self.directory @classmethod def from_yaml(cls, path: Path) -> "EvalFixture": """Load a fixture from a YAML file.""" raw = yaml.safe_load(path.read_text(encoding="utf-8")) mode: EvalMode = raw.get("mode", "full") # Parse expected records (step2/full) expected: list[ExpectedRecord] = [] for table, records in (raw.get("expected") or {}).items(): for rec in records: expected.append(ExpectedRecord(table=table, fields=rec)) # Parse expected classification (step1/full) expected_classification: list[ExpectedClassification] = [] for item in raw.get("expected_classification") or []: expected_classification.append(ExpectedClassification( file=item["file"], project_id=item["project_id"], domains=item.get("domains", []), new_project_name=item.get("new_project_name"), )) return cls( name=raw["name"], description=raw.get("description", ""), mode=mode, directory=raw.get("directory", "sample_files"), data_types=raw.get("data_types", ["tasks"]), file_extensions=raw.get("file_extensions", []), models=raw.get("models", []), fixture_path=path, # Step-1 inputs domain_definitions=raw.get("domain_definitions", ""), projects_list=raw.get("projects_list", []), # Step-2 inputs existing_context=raw.get("existing_context", ""), project_context=raw.get("project_context", ""), custom_prompt_section=raw.get("custom_prompt_section", ""), # Shared seed_records=raw.get("seed_records", {}), expected_classification=expected_classification, expected=expected, ) def discover_fixtures(fixtures_dir: Path | None = None) -> list[EvalFixture]: """Find and load all YAML fixtures in the fixtures directory.""" if fixtures_dir is None: fixtures_dir = Path(__file__).parent / "fixtures" fixtures: list[EvalFixture] = [] if not fixtures_dir.is_dir(): logger.warning("eval: fixtures directory not found: %s", fixtures_dir) return fixtures for yaml_path in sorted(fixtures_dir.glob("*.yaml")): try: raw = yaml.safe_load(yaml_path.read_text(encoding="utf-8")) if raw.get("type") == "journey": continue # Skip journey fixtures fixtures.append(EvalFixture.from_yaml(yaml_path)) logger.info("eval: loaded fixture %s from %s", fixtures[-1].name, yaml_path.name) except Exception as exc: logger.error("eval: failed to load fixture %s: %s", yaml_path.name, exc) return fixtures # ── Journey fixtures ───────────────────────────────────────────────────── @dataclass class JourneyFixture: """A journey test scenario — tests the prompt_template builder conversation.""" name: str description: str directory: str # relative path to sample files data_types: list[str] expected_template_criteria: list[str] # what the template should contain/satisfy user_messages: list[str] = field(default_factory=list) # for automated journey runs (unused in interactive mode) models: list[str] = field(default_factory=list) fixture_path: Path = field(default_factory=lambda: Path(".")) @property def fixture_dir(self) -> Path: """Absolute path to the sample files directory.""" return self.fixture_path.parent / self.directory @classmethod def from_yaml(cls, path: Path) -> "JourneyFixture": """Load a journey fixture from a YAML file.""" raw = yaml.safe_load(path.read_text(encoding="utf-8")) return cls( name=raw["name"], description=raw.get("description", ""), directory=raw.get("directory", "sample_files"), data_types=raw.get("data_types", ["tasks"]), user_messages=raw.get("user_messages", []), expected_template_criteria=raw.get("expected_template_criteria", []), models=raw.get("models", []), fixture_path=path, ) def discover_journey_fixtures(fixtures_dir: Path | None = None) -> list[JourneyFixture]: """Find and load all journey YAML fixtures in the fixtures directory.""" if fixtures_dir is None: fixtures_dir = Path(__file__).parent / "fixtures" fixtures: list[JourneyFixture] = [] if not fixtures_dir.is_dir(): logger.warning("eval: fixtures directory not found: %s", fixtures_dir) return fixtures for yaml_path in sorted(fixtures_dir.glob("*.yaml")): try: raw = yaml.safe_load(yaml_path.read_text(encoding="utf-8")) if raw.get("type") != "journey": continue fixtures.append(JourneyFixture.from_yaml(yaml_path)) logger.info("eval: loaded journey fixture %s from %s", fixtures[-1].name, yaml_path.name) except Exception as exc: logger.error("eval: failed to load journey fixture %s: %s", yaml_path.name, exc) return fixtures