"""Eval configuration — YAML fixture loader and dataclasses. A *fixture* is a YAML file that defines a complete test scenario: .. code-block:: yaml name: freelance-invoices description: Extract tasks and notes from invoice PDFs (text layer) directory: sample_files/invoices # relative to fixture dir data_types: [tasks, notes] file_extensions: [txt, md] # Preseeded records the agent "sees" as existing data seed_records: projects: - id: proj-1 name: "Website Redesign" status: active tasks: [] # Prompt variations to test (at least one required) prompt_variants: baseline: | Extract action items as tasks and meeting summaries as notes. Set priority based on urgency keywords. detailed: | Extract action items as tasks. Map "URGENT" to high priority, "ASAP" to medium. Summaries become notes with full content. # Expected extractions — what the agent SHOULD produce expected: tasks: - title: "Send revised invoice to client" priority: high status: todo - title: "Update project timeline" priority: medium notes: - title: "Meeting summary - March kickoff" # Optional: models to test (overrides CLI --models) models: [] A *journey fixture* tests the prompt-template builder conversation: .. code-block:: yaml type: journey name: journey-invoices description: Test journey builds a good template for invoices directory: sample_files/invoices data_types: [tasks, notes] # Simulated user responses for multi-turn conversation user_messages: - "I want to extract action items and meeting summaries" - "Yes, map URGENTE to high priority" - "That looks good, generate the template" # Criteria the generated prompt_template should satisfy expected_template_criteria: - "mentions tasks and notes as target entities" - "includes priority mapping rules" - "references isAiSuggested=1" - "does not mention projectId" models: [] """ from __future__ import annotations import logging from dataclasses import dataclass, field from pathlib import Path from typing import Any import yaml logger = logging.getLogger(__name__) @dataclass class ExpectedRecord: """A single expected extraction result. Only the fields specified are checked — unspecified fields are ignored. """ table: str # tasks | notes | timelines | projects fields: dict[str, Any] # field_name → expected_value @dataclass class EvalFixture: """A complete test scenario loaded from YAML.""" name: str description: str directory: str # relative path to sample files data_types: list[str] file_extensions: list[str] seed_records: dict[str, list[dict]] prompt_variants: dict[str, str] # variant_name → prompt_template expected: list[ExpectedRecord] models: list[str] # if empty, use CLI default fixture_path: Path = field(default_factory=lambda: Path(".")) @property def fixture_dir(self) -> Path: """Absolute path to the sample files directory.""" return self.fixture_path.parent / self.directory @classmethod def from_yaml(cls, path: Path) -> "EvalFixture": """Load a fixture from a YAML file.""" raw = yaml.safe_load(path.read_text(encoding="utf-8")) expected: list[ExpectedRecord] = [] for table, records in (raw.get("expected") or {}).items(): for rec in records: expected.append(ExpectedRecord(table=table, fields=rec)) return cls( name=raw["name"], description=raw.get("description", ""), directory=raw.get("directory", "sample_files"), data_types=raw.get("data_types", ["tasks"]), file_extensions=raw.get("file_extensions", []), seed_records=raw.get("seed_records", {}), prompt_variants=raw.get("prompt_variants", {"default": ""}), expected=expected, models=raw.get("models", []), fixture_path=path, ) def discover_fixtures(fixtures_dir: Path | None = None) -> list[EvalFixture]: """Find and load all YAML fixtures in the fixtures directory.""" if fixtures_dir is None: fixtures_dir = Path(__file__).parent / "fixtures" fixtures: list[EvalFixture] = [] if not fixtures_dir.is_dir(): logger.warning("eval: fixtures directory not found: %s", fixtures_dir) return fixtures for yaml_path in sorted(fixtures_dir.glob("*.yaml")): try: raw = yaml.safe_load(yaml_path.read_text(encoding="utf-8")) if raw.get("type") == "journey": continue # Skip journey fixtures fixtures.append(EvalFixture.from_yaml(yaml_path)) logger.info("eval: loaded fixture %s from %s", fixtures[-1].name, yaml_path.name) except Exception as exc: logger.error("eval: failed to load fixture %s: %s", yaml_path.name, exc) return fixtures # ── Journey fixtures ───────────────────────────────────────────────────── @dataclass class JourneyFixture: """A journey test scenario — tests the prompt_template builder conversation.""" name: str description: str directory: str # relative path to sample files data_types: list[str] user_messages: list[str] # simulated user responses expected_template_criteria: list[str] # what the template should contain/satisfy models: list[str] fixture_path: Path = field(default_factory=lambda: Path(".")) @property def fixture_dir(self) -> Path: """Absolute path to the sample files directory.""" return self.fixture_path.parent / self.directory @classmethod def from_yaml(cls, path: Path) -> "JourneyFixture": """Load a journey fixture from a YAML file.""" raw = yaml.safe_load(path.read_text(encoding="utf-8")) return cls( name=raw["name"], description=raw.get("description", ""), directory=raw.get("directory", "sample_files"), data_types=raw.get("data_types", ["tasks"]), user_messages=raw.get("user_messages", []), expected_template_criteria=raw.get("expected_template_criteria", []), models=raw.get("models", []), fixture_path=path, ) def discover_journey_fixtures(fixtures_dir: Path | None = None) -> list[JourneyFixture]: """Find and load all journey YAML fixtures in the fixtures directory.""" if fixtures_dir is None: fixtures_dir = Path(__file__).parent / "fixtures" fixtures: list[JourneyFixture] = [] if not fixtures_dir.is_dir(): logger.warning("eval: fixtures directory not found: %s", fixtures_dir) return fixtures for yaml_path in sorted(fixtures_dir.glob("*.yaml")): try: raw = yaml.safe_load(yaml_path.read_text(encoding="utf-8")) if raw.get("type") != "journey": continue fixtures.append(JourneyFixture.from_yaml(yaml_path)) logger.info("eval: loaded journey fixture %s from %s", fixtures[-1].name, yaml_path.name) except Exception as exc: logger.error("eval: failed to load journey fixture %s: %s", yaml_path.name, exc) return fixtures