Files
api/tests/test_run_contextual.py
Roberto d63fd5f3b9 fix(contextual): narrow tool palette + forbid legacy read tools
Smoke trace 0b46841484ba7d024ed9f8d5ac8b1df0 showed the agent
defaulting to list_projects + get_project for a 'summarize
project Nexus' query, returning a shallow row without aiSummary
or tasks/notes. The legacy read tools were exposed via
*PROJECT_TOOLS / *TASK_TOOLS spreading.

Now _contextual_tools exposes exactly:
- get_page_details (sole read; supports per-entity + list views)
- create_task, update_task
- create_note
- create_timeline

Prompt rule 2 explicitly forbids the legacy reads, and the test
asserts they are excluded from the palette.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-15 18:23:55 +02:00

86 lines
3.0 KiB
Python

"""Tests for run_contextual_stream.
These tests monkeypatch _run_single_agent_stream (the actual internal runner)
rather than the plan's fictional _run_agent_loop, matching the real
deep_agent.py architecture.
"""
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from app.schemas.contextual import ContextualScope
@pytest.mark.asyncio
async def test_run_contextual_stream_includes_scope_block(monkeypatch):
"""run_contextual_stream must inject the scope block into the system prompt
and include get_page_details in the tool list while excluding note-edit tools."""
import app.core.deep_agent as deep_agent
captured = {}
async def fake_stream(
*,
user_id,
system_prompt,
message,
context,
agent_name="agent",
tools=None,
conversation_history=None,
**kwargs,
):
captured["sys"] = system_prompt
captured["tool_names"] = [getattr(t, "name", str(t)) for t in (tools or [])]
captured["agent_name"] = agent_name
# Async generator that yields nothing — still satisfies the protocol.
if False:
yield # pragma: no cover
monkeypatch.setattr(deep_agent, "_run_single_agent_stream", fake_stream)
scope = ContextualScope(
page="project",
entity_type="project",
entity_id="p1",
entity_name="Acme",
counts={"tasks": 1, "notes": 0, "milestones": 0},
)
context = {
"conversation_history": [],
"_debug": {"session_id": "s1"},
}
results = []
async for item in deep_agent.run_contextual_stream(
user_id="user1",
message="hi",
context=context,
scope=scope,
):
results.append(item)
assert "Acme" in captured["sys"], "scope block must appear in system prompt"
assert "Current view" in captured["sys"], "section header must be present"
names = captured["tool_names"]
assert "get_page_details" in names, "get_page_details tool must be included"
# Entity-create tools: at least one of these must be present.
assert any(n in names for n in ("create_task", "create_note", "update_task")), (
"at least one entity-create tool must be present"
)
assert "create_timeline" in names, "create_timeline tool must be included"
# Note edit tools must NOT be exposed.
assert "propose_note_edit" not in names, "propose_note_edit must be excluded"
# Legacy read tools must be excluded — they return shallow snapshots and
# cause the agent to under-answer (see trace 0b46841484ba7d024ed9f8d5ac8b1df0).
assert "list_projects" not in names, "list_projects must be excluded (legacy read)"
assert "get_project" not in names, "get_project must be excluded (legacy read)"
assert "list_tasks" not in names, "list_tasks must be excluded (legacy read)"
assert "get_task" not in names, "get_task must be excluded (legacy read)"
assert "list_notes" not in names, "list_notes must be excluded (legacy read)"
assert "get_note" not in names, "get_note must be excluded (legacy read)"