- eval/mock_executor.py: intercepts execute_on_client, serves fixture files from disk, records all mutations (insert/update/delete) - eval/config.py: YAML fixture loader with prompt variants, expected results, seed records, model overrides - eval/scorer.py: FieldMatchScorer (fuzzy title match, per-field accuracy, precision/recall/F1) + LLMJudgeScorer (semantic eval) - eval/langfuse_eval.py: sync fixtures to Langfuse datasets, create dataset runs, post scores, link traces to runs - eval/runner.py: orchestrates fixture → mock → agent pipeline → scoring → Langfuse reporting - eval/cli.py: CLI (python -m eval run/list/sync) with --models, --variants, --fixture, --no-judge flags - eval/fixtures/: example Italian freelance scenario with 3 prompt variants (baseline, detailed_italian, minimal)
209 lines
8.2 KiB
Python
209 lines
8.2 KiB
Python
"""Mock executor — intercepts execute_on_client for offline E2E testing.
|
|
|
|
Patches ``app.ws_context.execute_on_client`` so agent pipeline runs don't
|
|
require a live Electron client or Redis. Instead:
|
|
|
|
- **Filesystem actions** (list_directory, read_file_content, get_file_metadata)
|
|
are served from local fixture files on disk.
|
|
- **Read actions** (select, get) return preseeded records from an in-memory
|
|
store provided by the test fixture.
|
|
- **Write actions** (insert, update, delete) are captured as *mutations* and
|
|
stored for later comparison against expected results.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import time
|
|
import uuid
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Any
|
|
from unittest.mock import AsyncMock, patch
|
|
|
|
|
|
@dataclass
|
|
class Mutation:
|
|
"""A single recorded write operation."""
|
|
|
|
action: str # insert | update | delete
|
|
table: str
|
|
data: dict[str, Any]
|
|
timestamp: float = field(default_factory=time.time)
|
|
|
|
|
|
@dataclass
|
|
class MockExecutor:
|
|
"""In-memory executor that replaces Redis-based tool round-trip.
|
|
|
|
Parameters
|
|
----------
|
|
fixture_dir : Path
|
|
Directory containing sample files for filesystem tool calls.
|
|
seed_records : dict[str, list[dict]]
|
|
Pre-existing records per table, e.g. ``{"tasks": [...], "projects": [...]}``.
|
|
The executor returns these for ``select`` / ``get`` actions and auto-updates
|
|
them on ``insert`` / ``update`` / ``delete`` so subsequent selects reflect changes.
|
|
"""
|
|
|
|
fixture_dir: Path
|
|
seed_records: dict[str, list[dict]] = field(default_factory=dict)
|
|
mutations: list[Mutation] = field(default_factory=list)
|
|
_id_counter: int = field(default=1000, repr=False)
|
|
|
|
# ── Public API ───────────────────────────────────────────────────
|
|
|
|
def reset(self) -> None:
|
|
"""Clear recorded mutations (keep seed_records intact)."""
|
|
self.mutations.clear()
|
|
|
|
def get_mutations(self, *, table: str | None = None, action: str | None = None) -> list[Mutation]:
|
|
"""Filter mutations by table and/or action."""
|
|
result = self.mutations
|
|
if table:
|
|
result = [m for m in result if m.table == table]
|
|
if action:
|
|
result = [m for m in result if m.action == action]
|
|
return result
|
|
|
|
def created_records(self, table: str) -> list[dict]:
|
|
"""Return data dicts of all inserts into *table*."""
|
|
return [m.data for m in self.mutations if m.table == table and m.action == "insert"]
|
|
|
|
def updated_records(self, table: str) -> list[dict]:
|
|
"""Return data dicts of all updates to *table*."""
|
|
return [m.data for m in self.mutations if m.table == table and m.action == "update"]
|
|
|
|
# ── Context manager for patching ──────────────────────────────
|
|
|
|
def patch(self):
|
|
"""Return an async context-manager that patches execute_on_client."""
|
|
return patch(
|
|
"app.ws_context.execute_on_client",
|
|
new=AsyncMock(side_effect=self._handle),
|
|
)
|
|
|
|
# ── Internal dispatch ─────────────────────────────────────────
|
|
|
|
async def _handle(
|
|
self,
|
|
action: str,
|
|
table: str | None = None,
|
|
data: dict[str, Any] | None = None,
|
|
filters: dict[str, Any] | None = None,
|
|
vector: list[float] | None = None,
|
|
limit: int | None = None,
|
|
) -> dict[str, Any]:
|
|
# Filesystem
|
|
if action == "list_directory":
|
|
return self._list_directory(data or {})
|
|
if action == "read_file_content":
|
|
return self._read_file(data or {})
|
|
if action == "get_file_metadata":
|
|
return self._get_file_metadata(data or {})
|
|
|
|
# CRUD
|
|
if action == "select":
|
|
return self._select(table or "", filters)
|
|
if action == "get":
|
|
return self._get(table or "", data or {})
|
|
if action == "insert":
|
|
return self._insert(table or "", data or {})
|
|
if action == "update":
|
|
return self._update(table or "", data or {})
|
|
if action == "delete":
|
|
return self._delete(table or "", data or {})
|
|
|
|
# Vector (no-op for eval)
|
|
if action in ("vector_upsert", "vector_search"):
|
|
return {"rows": []}
|
|
|
|
return {"error": f"Unknown action: {action}"}
|
|
|
|
# ── Filesystem handlers ───────────────────────────────────────
|
|
|
|
def _list_directory(self, data: dict) -> dict:
|
|
rel_path = data.get("path", "")
|
|
abs_path = self.fixture_dir / rel_path.lstrip("/\\")
|
|
if not abs_path.is_dir():
|
|
return {"entries": []}
|
|
entries: list[dict] = []
|
|
for child in sorted(abs_path.iterdir()):
|
|
entry_type = "directory" if child.is_dir() else "file"
|
|
# Return paths relative to fixture_dir but with the original prefix
|
|
entry_path = rel_path.rstrip("/\\") + "/" + child.name
|
|
entries.append({
|
|
"name": child.name,
|
|
"path": entry_path,
|
|
"type": entry_type,
|
|
})
|
|
return {"entries": entries}
|
|
|
|
def _read_file(self, data: dict) -> dict:
|
|
rel_path = data.get("path", "")
|
|
abs_path = self.fixture_dir / rel_path.lstrip("/\\")
|
|
if not abs_path.is_file():
|
|
return {"content": "", "error": f"File not found: {rel_path}"}
|
|
return {"content": abs_path.read_text(encoding="utf-8", errors="replace")}
|
|
|
|
def _get_file_metadata(self, data: dict) -> dict:
|
|
rel_path = data.get("path", "")
|
|
abs_path = self.fixture_dir / rel_path.lstrip("/\\")
|
|
if not abs_path.exists():
|
|
return {"error": f"Not found: {rel_path}"}
|
|
stat = abs_path.stat()
|
|
return {
|
|
"path": rel_path,
|
|
"size": stat.st_size,
|
|
"modifiedAt": int(stat.st_mtime * 1000),
|
|
"createdAt": int(stat.st_ctime * 1000),
|
|
"isDirectory": abs_path.is_dir(),
|
|
}
|
|
|
|
# ── CRUD handlers ─────────────────────────────────────────────
|
|
|
|
def _select(self, table: str, filters: dict | None) -> dict:
|
|
rows = list(self.seed_records.get(table, []))
|
|
if filters:
|
|
rows = [
|
|
r for r in rows
|
|
if all(r.get(k) == v for k, v in filters.items() if v is not None)
|
|
]
|
|
return {"rows": rows}
|
|
|
|
def _get(self, table: str, data: dict) -> dict:
|
|
record_id = data.get("id", "")
|
|
rows = self.seed_records.get(table, [])
|
|
for r in rows:
|
|
if r.get("id") == record_id:
|
|
return {"row": r}
|
|
return {"row": None}
|
|
|
|
def _insert(self, table: str, data: dict) -> dict:
|
|
self._id_counter += 1
|
|
record = {**data, "id": str(self._id_counter)}
|
|
# Add to seed so subsequent selects can find it
|
|
self.seed_records.setdefault(table, []).append(record)
|
|
self.mutations.append(Mutation(action="insert", table=table, data=record))
|
|
return {"row": record}
|
|
|
|
def _update(self, table: str, data: dict) -> dict:
|
|
record_id = data.get("id", "")
|
|
rows = self.seed_records.get(table, [])
|
|
for r in rows:
|
|
if r.get("id") == record_id:
|
|
r.update({k: v for k, v in data.items() if v is not None and v != ""})
|
|
self.mutations.append(Mutation(action="update", table=table, data=dict(r)))
|
|
return {"row": r}
|
|
# Record not found — still log the mutation
|
|
self.mutations.append(Mutation(action="update", table=table, data=data))
|
|
return {"row": data}
|
|
|
|
def _delete(self, table: str, data: dict) -> dict:
|
|
record_id = data.get("id", "")
|
|
rows = self.seed_records.get(table, [])
|
|
self.seed_records[table] = [r for r in rows if r.get("id") != record_id]
|
|
self.mutations.append(Mutation(action="delete", table=table, data={"id": record_id}))
|
|
return {"deleted": True}
|