Step 3 (prerequisite): - app/schemas.py: add ContentTypeConfig + AgentConfig Pydantic models - app/models.py: add agent_config (JSON, nullable) to LocalAgentConfig - alembic migration a3b9c0d1e2f3: ADD COLUMN agent_config Step 2 (runner refactor): - Remove _classify_file() and _BATCH_FILE_CLASSIFIER_PROMPT (LLM classification step) - Add Phase A: detect_content_type + preprocess (zero LLM, per file) - Add _UNIFIED_PROCESSING_PROMPT (hot-swappable via Langfuse "unified_processing") - Add helper functions: _format_projects, _format_metadata, _get_extraction_rules, _get_no_match_behavior - Single LLM call per file with tools (classify + extract + create) - Fix items_created: count create_* tool calls via _tool_calls_out param - test_agent_runner_v2.py: 10 cases (2.1-2.10) with Langfuse eval scoring Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
588 lines
21 KiB
Python
588 lines
21 KiB
Python
"""Tests for Local Agent V2 runner (Step 2).
|
||
|
||
Covers the unified per-file flow:
|
||
Phase A — detect + preprocess (Python, zero LLM)
|
||
Phase B — single LLM call with tools (classify + extract + create)
|
||
|
||
Test cases:
|
||
2.1 Happy path: email with action → create_task called
|
||
2.2 Happy path: email informative → create_note called
|
||
2.3 Happy path: email with date → create_timeline called
|
||
2.4 Project matching via filename → correct project_id used
|
||
2.5 Project matching via content → correct project_id used
|
||
2.6 No project match + global rule → no create_* called
|
||
2.7 Deduplication → update_task, not create_task
|
||
2.8 items_created count (unit) → items_created == N create_* calls
|
||
2.9 Device offline (unit) → status=error
|
||
2.10 Empty file (unit) → items_processed=0, status=success
|
||
|
||
Run:
|
||
pytest tests/test_agent_runner_v2.py -v
|
||
pytest tests/test_agent_runner_v2.py -v -k "2_9 or 2_10 or 2_8" # unit only
|
||
pytest tests/test_agent_runner_v2.py -v -k "eval" # LLM evals only
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import uuid
|
||
from datetime import datetime, timezone
|
||
from typing import Any
|
||
from unittest.mock import AsyncMock, MagicMock, patch
|
||
|
||
import pytest
|
||
|
||
from app.core.agent_runner import (
|
||
_format_metadata,
|
||
_format_projects,
|
||
_get_extraction_rules,
|
||
_get_no_match_behavior,
|
||
_is_overdue,
|
||
run_local_agent,
|
||
)
|
||
from app.core.device_manager import DeviceConnectionManager
|
||
from app.core.langfuse_client import get_langfuse, get_prompt_or_fallback
|
||
from app.models import AgentRunLog, LocalAgentConfig
|
||
from tests.conftest import TEST_USER_IDS
|
||
|
||
# ── Constants ─────────────────────────────────────────────────────────────
|
||
|
||
_USER_ID = TEST_USER_IDS["power"]
|
||
|
||
_AGENT_CONFIG = {
|
||
"content_types": [
|
||
{
|
||
"id": "email_html",
|
||
"label": "Email HTML",
|
||
"detection_hint": "HTML file with From/To/Subject headers",
|
||
"preprocessing": "email_html",
|
||
"extraction_prompt": (
|
||
"If the email contains a direct action request or task assignment → create a task. "
|
||
"If the email contains informational content, updates, or FYI → create a note. "
|
||
"If the email mentions a specific date for a meeting or deadline → create a timeline entry."
|
||
),
|
||
}
|
||
],
|
||
"global_rules": [
|
||
"Se il file non è riconducibile a nessun progetto, non creare alcuna entità."
|
||
],
|
||
"data_types": ["tasks", "notes", "timelines"],
|
||
}
|
||
|
||
_PROJECT_ALPHA = {"id": "proj-alpha", "name": "Project Alpha", "status": "active"}
|
||
_PROJECT_BETA = {"id": "proj-beta", "name": "Project Beta", "status": "active"}
|
||
|
||
# ── Sample email content ──────────────────────────────────────────────────
|
||
|
||
_ACTION_EMAIL = """\
|
||
<html><head></head><body>
|
||
<p><b>From:</b> boss@company.com</p>
|
||
<p><b>To:</b> dev@company.com</p>
|
||
<p><b>Subject:</b> Fix the login bug</p>
|
||
<p><b>Date:</b> 2026-04-07</p>
|
||
<p>Hi,<br>Please fix the login bug in Project Alpha by Friday. High priority!</p>
|
||
</body></html>
|
||
"""
|
||
|
||
_INFO_EMAIL = """\
|
||
<html><head></head><body>
|
||
<p><b>From:</b> pm@company.com</p>
|
||
<p><b>To:</b> team@company.com</p>
|
||
<p><b>Subject:</b> FYI: New policy for Project Alpha</p>
|
||
<p>Just a heads-up that starting next week all code reviews must be done
|
||
within 24 hours for Project Alpha. No action needed from you now.</p>
|
||
</body></html>
|
||
"""
|
||
|
||
_DATE_EMAIL = """\
|
||
<html><head></head><body>
|
||
<p><b>From:</b> pm@company.com</p>
|
||
<p><b>Subject:</b> Project Alpha kick-off meeting</p>
|
||
<p>The kick-off meeting for Project Alpha is scheduled for 2026-04-15 at 10:00.</p>
|
||
</body></html>
|
||
"""
|
||
|
||
_NO_PROJECT_EMAIL = """\
|
||
<html><head></head><body>
|
||
<p><b>From:</b> newsletter@ads.com</p>
|
||
<p><b>Subject:</b> Weekly newsletter</p>
|
||
<p>Check out our latest deals on electronics!</p>
|
||
</body></html>
|
||
"""
|
||
|
||
_EXISTING_TASK = {
|
||
"id": "task-existing",
|
||
"title": "Fix the login bug",
|
||
"status": "todo",
|
||
"priority": "medium",
|
||
}
|
||
|
||
|
||
# ── Test helpers ──────────────────────────────────────────────────────────
|
||
|
||
|
||
def _make_config(
|
||
agent_config: dict | None = None,
|
||
directory: str = "/emails",
|
||
device_id: str = "dev-001",
|
||
) -> LocalAgentConfig:
|
||
return LocalAgentConfig(
|
||
id=str(uuid.uuid4()),
|
||
user_id=_USER_ID,
|
||
device_id=device_id,
|
||
name="Test V2 Agent",
|
||
directory_paths=[directory],
|
||
data_types=["tasks", "notes", "timelines"],
|
||
prompt_template="",
|
||
agent_config=agent_config or _AGENT_CONFIG,
|
||
file_extensions=[".html", ".eml"],
|
||
schedule_cron="0 */6 * * *",
|
||
enabled=True,
|
||
last_run_at=None,
|
||
)
|
||
|
||
|
||
def _make_run_log(agent_id: str) -> AgentRunLog:
|
||
return AgentRunLog(
|
||
id=str(uuid.uuid4()),
|
||
agent_id=agent_id,
|
||
agent_type="local",
|
||
user_id=_USER_ID,
|
||
status="running",
|
||
started_at=datetime.now(timezone.utc),
|
||
)
|
||
|
||
|
||
def _make_manager(online: bool = True) -> DeviceConnectionManager:
|
||
mgr = DeviceConnectionManager()
|
||
if online:
|
||
ws = MagicMock()
|
||
ws.send_text = AsyncMock()
|
||
mgr.register(_USER_ID, "dev-001", ws)
|
||
return mgr
|
||
|
||
|
||
def _make_executor(
|
||
file_path: str,
|
||
file_content: str,
|
||
projects: list[dict] | None = None,
|
||
existing_tasks: list[dict] | None = None,
|
||
existing_notes: list[dict] | None = None,
|
||
existing_timelines: list[dict] | None = None,
|
||
) -> tuple[Any, list[dict]]:
|
||
"""Return (async_executor, captured_calls).
|
||
|
||
The executor handles all ``execute_on_client`` payloads:
|
||
directory listing, file reading, project/entity fetching, and CRUD.
|
||
"""
|
||
calls: list[dict] = []
|
||
_projects = projects or [_PROJECT_ALPHA, _PROJECT_BETA]
|
||
|
||
async def _executor(payload: dict) -> dict:
|
||
action = payload.get("action", "")
|
||
table = payload.get("table", "")
|
||
data = payload.get("data") or {}
|
||
calls.append({"action": action, "table": table, "data": data})
|
||
|
||
if action == "list_directory":
|
||
path = data.get("path", "") or payload.get("data", {}).get("path", "")
|
||
return {
|
||
"entries": [{"type": "file", "path": file_path}]
|
||
}
|
||
|
||
if action == "get_file_metadata":
|
||
return {"modifiedAt": None}
|
||
|
||
if action == "read_file_content":
|
||
return {"content": file_content}
|
||
|
||
if action == "select":
|
||
if table == "projects":
|
||
return {"rows": _projects}
|
||
if table == "tasks":
|
||
return {"rows": existing_tasks or []}
|
||
if table == "notes":
|
||
return {"rows": existing_notes or []}
|
||
if table == "timelines":
|
||
return {"rows": existing_timelines or []}
|
||
return {"rows": []}
|
||
|
||
if action == "insert":
|
||
return {"row": {"id": str(uuid.uuid4()), **data}}
|
||
|
||
if action == "update":
|
||
return {"success": True}
|
||
|
||
return {}
|
||
|
||
return _executor, calls
|
||
|
||
|
||
# ── Unit: helper functions ────────────────────────────────────────────────
|
||
|
||
|
||
def test_format_projects_empty():
|
||
assert "(no projects" in _format_projects([])
|
||
|
||
|
||
def test_format_projects_with_data():
|
||
result = _format_projects([_PROJECT_ALPHA])
|
||
assert "proj-alpha" in result
|
||
assert "Project Alpha" in result
|
||
|
||
|
||
def test_format_metadata_empty():
|
||
assert _format_metadata({}) == ""
|
||
|
||
|
||
def test_format_metadata_email():
|
||
meta = {"subject": "Fix bug", "from": "boss@co.com", "date": "2026-04-07"}
|
||
result = _format_metadata(meta)
|
||
assert "Fix bug" in result
|
||
assert "boss@co.com" in result
|
||
|
||
|
||
def test_get_extraction_rules_match():
|
||
rules = _get_extraction_rules(_AGENT_CONFIG, "email_html")
|
||
assert "task" in rules.lower()
|
||
|
||
|
||
def test_get_extraction_rules_fallback():
|
||
rules = _get_extraction_rules(_AGENT_CONFIG, "plain_text")
|
||
assert "extract" in rules.lower()
|
||
|
||
|
||
def test_get_no_match_behavior_from_global_rules():
|
||
behavior = _get_no_match_behavior(_AGENT_CONFIG)
|
||
# The global rule says "non creare alcuna entità" → skip behavior
|
||
assert behavior # non-empty
|
||
|
||
|
||
def test_get_no_match_behavior_default():
|
||
behavior = _get_no_match_behavior({})
|
||
assert "project" in behavior.lower()
|
||
|
||
|
||
# ── Unit: 2.9 — device offline ───────────────────────────────────────────
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_2_9_device_offline():
|
||
"""2.9 No device online → status=error, no executor created."""
|
||
config = _make_config()
|
||
run_log = _make_run_log(config.id)
|
||
mgr = _make_manager(online=False)
|
||
|
||
with patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock) as mock_fin:
|
||
await run_local_agent(_USER_ID, config, run_log, mgr)
|
||
|
||
_, kwargs = mock_fin.call_args
|
||
assert kwargs["status"] == "error"
|
||
assert any("not connected" in e for e in kwargs.get("errors", []))
|
||
|
||
|
||
# ── Unit: 2.10 — empty file ──────────────────────────────────────────────
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_2_10_empty_file():
|
||
"""2.10 File with empty content → skipped, items_processed=0, success."""
|
||
config = _make_config()
|
||
run_log = _make_run_log(config.id)
|
||
mgr = _make_manager()
|
||
|
||
executor, calls = _make_executor(
|
||
file_path="/emails/empty.html",
|
||
file_content="", # empty
|
||
projects=[_PROJECT_ALPHA],
|
||
)
|
||
|
||
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
||
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock) as mock_fin:
|
||
await run_local_agent(_USER_ID, config, run_log, mgr)
|
||
|
||
_, kwargs = mock_fin.call_args
|
||
assert kwargs["items_processed"] == 0
|
||
assert kwargs["status"] == "success"
|
||
assert kwargs["items_created"] == 0
|
||
|
||
|
||
# ── Unit: 2.8 — items_created count ─────────────────────────────────────
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_2_8_items_created_count():
|
||
"""2.8 items_created == number of create_* tool calls per run."""
|
||
config = _make_config()
|
||
run_log = _make_run_log(config.id)
|
||
mgr = _make_manager()
|
||
|
||
executor, _calls = _make_executor(
|
||
file_path="/emails/action.html",
|
||
file_content=_ACTION_EMAIL,
|
||
projects=[_PROJECT_ALPHA],
|
||
)
|
||
|
||
# Simulate LLM calling create_task twice and update_note once.
|
||
async def mock_run_agent(*, _tool_calls_out=None, **kw) -> str:
|
||
if _tool_calls_out is not None:
|
||
_tool_calls_out.extend(["create_task", "create_note", "update_task"])
|
||
return "Done."
|
||
|
||
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
||
patch("app.core.agent_runner._run_agent_with_tools", side_effect=mock_run_agent), \
|
||
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock) as mock_fin:
|
||
await run_local_agent(_USER_ID, config, run_log, mgr)
|
||
|
||
_, kwargs = mock_fin.call_args
|
||
# Only create_task + create_note count (not update_task).
|
||
assert kwargs["items_created"] == 2
|
||
assert kwargs["items_processed"] == 1
|
||
|
||
|
||
# ── Eval: 2.1–2.7 (real LLM + Langfuse scoring) ──────────────────────────
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
@pytest.mark.eval
|
||
async def test_2_1_email_to_task():
|
||
"""2.1 Action email → LLM calls create_task. Score: runner.email_to_task."""
|
||
lf = get_langfuse()
|
||
trace = lf.trace(
|
||
name="eval-runner-2.1-email-to-task",
|
||
metadata={"step": "2"},
|
||
) if lf else None
|
||
|
||
config = _make_config()
|
||
run_log = _make_run_log(config.id)
|
||
mgr = _make_manager()
|
||
|
||
executor, calls = _make_executor(
|
||
file_path="/emails/ProjectAlpha_action.html",
|
||
file_content=_ACTION_EMAIL,
|
||
projects=[_PROJECT_ALPHA, _PROJECT_BETA],
|
||
)
|
||
|
||
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
||
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock) as mock_fin:
|
||
await run_local_agent(_USER_ID, config, run_log, mgr)
|
||
|
||
_, kwargs = mock_fin.call_args
|
||
task_creates = [c for c in calls if c["action"] == "insert" and c["table"] == "tasks"]
|
||
score = 1.0 if len(task_creates) >= 1 else 0.0
|
||
|
||
if lf and trace:
|
||
lf.score(
|
||
trace_id=trace.id,
|
||
name="runner.email_to_task",
|
||
value=score,
|
||
comment=f"task_creates={len(task_creates)} items_created={kwargs.get('items_created')}",
|
||
)
|
||
lf.flush()
|
||
|
||
assert score == 1.0, f"Expected at least 1 task created, got {len(task_creates)}"
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
@pytest.mark.eval
|
||
async def test_2_2_email_to_note():
|
||
"""2.2 Informational email → LLM calls create_note. Score: runner.email_to_note."""
|
||
lf = get_langfuse()
|
||
trace = lf.trace(name="eval-runner-2.2-email-to-note", metadata={"step": "2"}) if lf else None
|
||
|
||
config = _make_config()
|
||
run_log = _make_run_log(config.id)
|
||
mgr = _make_manager()
|
||
|
||
executor, calls = _make_executor(
|
||
file_path="/emails/ProjectAlpha_info.html",
|
||
file_content=_INFO_EMAIL,
|
||
projects=[_PROJECT_ALPHA, _PROJECT_BETA],
|
||
)
|
||
|
||
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
||
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock):
|
||
await run_local_agent(_USER_ID, config, run_log, mgr)
|
||
|
||
note_creates = [c for c in calls if c["action"] == "insert" and c["table"] == "notes"]
|
||
score = 1.0 if len(note_creates) >= 1 else 0.0
|
||
|
||
if lf and trace:
|
||
lf.score(trace_id=trace.id, name="runner.email_to_note", value=score,
|
||
comment=f"note_creates={len(note_creates)}")
|
||
lf.flush()
|
||
|
||
assert score == 1.0, f"Expected at least 1 note created, got {len(note_creates)}"
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
@pytest.mark.eval
|
||
async def test_2_3_email_to_timeline():
|
||
"""2.3 Email with event date → LLM calls create_timeline. Score: runner.email_to_timeline."""
|
||
lf = get_langfuse()
|
||
trace = lf.trace(name="eval-runner-2.3-email-to-timeline", metadata={"step": "2"}) if lf else None
|
||
|
||
config = _make_config()
|
||
run_log = _make_run_log(config.id)
|
||
mgr = _make_manager()
|
||
|
||
executor, calls = _make_executor(
|
||
file_path="/emails/ProjectAlpha_kickoff.html",
|
||
file_content=_DATE_EMAIL,
|
||
projects=[_PROJECT_ALPHA, _PROJECT_BETA],
|
||
)
|
||
|
||
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
||
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock):
|
||
await run_local_agent(_USER_ID, config, run_log, mgr)
|
||
|
||
tl_creates = [c for c in calls if c["action"] == "insert" and c["table"] == "timelines"]
|
||
score = 1.0 if len(tl_creates) >= 1 else 0.0
|
||
|
||
if lf and trace:
|
||
lf.score(trace_id=trace.id, name="runner.email_to_timeline", value=score,
|
||
comment=f"timeline_creates={len(tl_creates)}")
|
||
lf.flush()
|
||
|
||
assert score == 1.0, f"Expected at least 1 timeline created, got {len(tl_creates)}"
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
@pytest.mark.eval
|
||
async def test_2_4_project_matching_filename():
|
||
"""2.4 Filename contains 'ProjectAlpha' → LLM assigns to proj-alpha. Score: runner.project_filename."""
|
||
lf = get_langfuse()
|
||
trace = lf.trace(name="eval-runner-2.4-project-filename", metadata={"step": "2"}) if lf else None
|
||
|
||
config = _make_config()
|
||
run_log = _make_run_log(config.id)
|
||
mgr = _make_manager()
|
||
|
||
executor, calls = _make_executor(
|
||
file_path="/emails/ProjectAlpha_report.html",
|
||
file_content=_ACTION_EMAIL,
|
||
projects=[_PROJECT_ALPHA, _PROJECT_BETA],
|
||
)
|
||
|
||
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
||
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock):
|
||
await run_local_agent(_USER_ID, config, run_log, mgr)
|
||
|
||
# Check that project_id = proj-alpha was used in any insert
|
||
inserts = [c for c in calls if c["action"] == "insert"]
|
||
correct_project = any(
|
||
c.get("data", {}).get("projectId") == "proj-alpha"
|
||
for c in inserts
|
||
)
|
||
score = 1.0 if correct_project else 0.0
|
||
|
||
if lf and trace:
|
||
lf.score(trace_id=trace.id, name="runner.project_filename", value=score)
|
||
lf.flush()
|
||
|
||
assert score == 1.0, "Expected inserts to use proj-alpha based on filename"
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
@pytest.mark.eval
|
||
async def test_2_5_project_matching_content():
|
||
"""2.5 Email body mentions 'Project Alpha' → correct project assigned. Score: runner.project_content."""
|
||
lf = get_langfuse()
|
||
trace = lf.trace(name="eval-runner-2.5-project-content", metadata={"step": "2"}) if lf else None
|
||
|
||
config = _make_config()
|
||
run_log = _make_run_log(config.id)
|
||
mgr = _make_manager()
|
||
|
||
executor, calls = _make_executor(
|
||
file_path="/emails/email_001.html", # generic filename, no project hint
|
||
file_content=_ACTION_EMAIL, # body mentions "Project Alpha"
|
||
projects=[_PROJECT_ALPHA, _PROJECT_BETA],
|
||
)
|
||
|
||
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
||
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock):
|
||
await run_local_agent(_USER_ID, config, run_log, mgr)
|
||
|
||
inserts = [c for c in calls if c["action"] == "insert"]
|
||
correct_project = any(
|
||
c.get("data", {}).get("projectId") == "proj-alpha"
|
||
for c in inserts
|
||
)
|
||
score = 1.0 if correct_project else 0.0
|
||
|
||
if lf and trace:
|
||
lf.score(trace_id=trace.id, name="runner.project_content", value=score)
|
||
lf.flush()
|
||
|
||
assert score == 1.0, "Expected inserts to use proj-alpha based on email body content"
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
@pytest.mark.eval
|
||
async def test_2_6_no_project_match_global_rule():
|
||
"""2.6 Newsletter email + global rule 'no project = no entities' → no creates. Score: runner.no_project."""
|
||
lf = get_langfuse()
|
||
trace = lf.trace(name="eval-runner-2.6-no-project", metadata={"step": "2"}) if lf else None
|
||
|
||
config = _make_config()
|
||
run_log = _make_run_log(config.id)
|
||
mgr = _make_manager()
|
||
|
||
executor, calls = _make_executor(
|
||
file_path="/emails/newsletter.html",
|
||
file_content=_NO_PROJECT_EMAIL,
|
||
projects=[_PROJECT_ALPHA, _PROJECT_BETA],
|
||
)
|
||
|
||
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
||
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock) as mock_fin:
|
||
await run_local_agent(_USER_ID, config, run_log, mgr)
|
||
|
||
_, kwargs = mock_fin.call_args
|
||
inserts = [c for c in calls if c["action"] == "insert"]
|
||
score = 1.0 if len(inserts) == 0 else 0.0
|
||
|
||
if lf and trace:
|
||
lf.score(trace_id=trace.id, name="runner.no_project", value=score,
|
||
comment=f"inserts={len(inserts)}")
|
||
lf.flush()
|
||
|
||
assert score == 1.0, f"Expected 0 inserts for unmatched newsletter, got {len(inserts)}"
|
||
|
||
|
||
@pytest.mark.asyncio
|
||
@pytest.mark.eval
|
||
async def test_2_7_deduplication():
|
||
"""2.7 Existing task with same title → LLM calls update_task, not create_task. Score: runner.dedup."""
|
||
lf = get_langfuse()
|
||
trace = lf.trace(name="eval-runner-2.7-dedup", metadata={"step": "2"}) if lf else None
|
||
|
||
config = _make_config()
|
||
run_log = _make_run_log(config.id)
|
||
mgr = _make_manager()
|
||
|
||
executor, calls = _make_executor(
|
||
file_path="/emails/ProjectAlpha_followup.html",
|
||
file_content=_ACTION_EMAIL, # "Fix the login bug" — already exists
|
||
projects=[_PROJECT_ALPHA],
|
||
existing_tasks=[_EXISTING_TASK], # task already exists
|
||
)
|
||
|
||
with patch("app.core.agent_runner._make_agent_executor", return_value=executor), \
|
||
patch("app.core.agent_runner._finalize_run", new_callable=AsyncMock):
|
||
await run_local_agent(_USER_ID, config, run_log, mgr)
|
||
|
||
task_creates = [c for c in calls if c["action"] == "insert" and c["table"] == "tasks"]
|
||
task_updates = [c for c in calls if c["action"] == "update" and c.get("table") == "tasks"]
|
||
# Prefer update over create
|
||
score = 1.0 if len(task_creates) == 0 or len(task_updates) >= 1 else 0.0
|
||
|
||
if lf and trace:
|
||
lf.score(trace_id=trace.id, name="runner.dedup", value=score,
|
||
comment=f"creates={len(task_creates)} updates={len(task_updates)}")
|
||
lf.flush()
|
||
|
||
assert score == 1.0, (
|
||
f"Expected deduplication: creates={len(task_creates)}, updates={len(task_updates)}"
|
||
)
|