refactor: replace orchestrator with LangGraph deep-agent supervisors

- Add app/core/deep_agent.py with Home and Floating supervisor graphs using LangGraph create_react_agent (hierarchical pattern) - Strip ChatAgent classes from all 4 agent files, keep @tool functions - Rewrite output_formatter.py for event-based (token/tool_end/mutations) stream - Update device_ws.py to use run_home_stream/run_floating_stream - Rewrite chat.py REST route to use run_home - Add update_core_memory tool to both supervisors - Add langgraph>=0.3.0 to requirements.txt - Remove orchestrator.py, execution_plan.py, agent_registry.py, plans.py - Remove PlanAction, PlanStep, ExecutionPlan, execution_mode from schemas - Update all affected tests to match new API - Remove 6 deprecated test files for deleted modules - Clean up stale docstrings referencing removed orchestrator
2026-03-11 17:50:22 +01:00
parent 2de67213f8
commit cfc9d7a942
31 changed files with 723 additions and 3498 deletions
--- a/tests/test_agent_registry.py
+++ b/tests/test_agent_registry.py
@@ -1,214 +0,0 @@
-"""Unit tests for the agent registry, base classes, and tool loop."""
-
-from __future__ import annotations
-
-from typing import Any
-from unittest.mock import AsyncMock, MagicMock
-
-import pytest
-
-from app.core.agent_registry import AgentRegistry, ChatAgent
-
-
-# ── Helpers ──────────────────────────────────────────────────────────
-
-class _StubAgent(ChatAgent):
-    """Minimal concrete agent for testing."""
-
-    def get_name(self) -> str:
-        return "stub"
-
-    def get_description(self) -> str:
-        return "A stub agent for tests"
-
-    def get_tools(self) -> list[Any]:
-        return []
-
-    async def handle(self, query: str, context: dict[str, Any]) -> str:
-        return f"echo: {query}"
-
-
-class _AnotherAgent(ChatAgent):
-    def get_name(self) -> str:
-        return "another"
-
-    def get_description(self) -> str:
-        return "Another stub"
-
-    def get_tools(self) -> list[Any]:
-        return []
-
-    async def handle(self, query: str, context: dict[str, Any]) -> str:
-        return "another"
-
-
-# ── Fixtures ─────────────────────────────────────────────────────────
-
-@pytest.fixture(autouse=True)
-def _fresh_registry():
-    """Reset the singleton between tests."""
-    AgentRegistry._instance = None
-    yield
-    AgentRegistry._instance = None
-
-
-@pytest.fixture()
-def reg() -> AgentRegistry:
-    return AgentRegistry()
-
-
-# ── Tests ────────────────────────────────────────────────────────────
-
-class TestRegisterAndGet:
-    def test_register_decorator(self, reg: AgentRegistry) -> None:
-        reg.register(_StubAgent)
-        agent = reg.get("stub")
-        assert isinstance(agent, _StubAgent)
-
-    def test_get_unknown_raises(self, reg: AgentRegistry) -> None:
-        with pytest.raises(KeyError, match="not found"):
-            reg.get("nonexistent")
-
-    def test_register_multiple(self, reg: AgentRegistry) -> None:
-        reg.register(_StubAgent)
-        reg.register(_AnotherAgent)
-        assert reg.get("stub").get_name() == "stub"
-        assert reg.get("another").get_name() == "another"
-
-
-class TestListAgents:
-    def test_empty(self, reg: AgentRegistry) -> None:
-        assert reg.list_agents() == []
-
-    def test_list_after_register(self, reg: AgentRegistry) -> None:
-        reg.register(_StubAgent)
-        agents = reg.list_agents()
-        assert len(agents) == 1
-        assert agents[0] == {"name": "stub", "description": "A stub agent for tests"}
-
-    def test_list_multiple(self, reg: AgentRegistry) -> None:
-        reg.register(_StubAgent)
-        reg.register(_AnotherAgent)
-        names = {a["name"] for a in reg.list_agents()}
-        assert names == {"stub", "another"}
-
-
-class TestCallAgent:
-    @pytest.mark.asyncio
-    async def test_call_agent(self, reg: AgentRegistry) -> None:
-        reg.register(_StubAgent)
-        result = await reg.call_agent("stub", "hello", {})
-        assert result == "echo: hello"
-
-    @pytest.mark.asyncio
-    async def test_call_unknown_raises(self, reg: AgentRegistry) -> None:
-        with pytest.raises(KeyError):
-            await reg.call_agent("nope", "hi", {})
-
-
-class TestSingleton:
-    def test_singleton_identity(self) -> None:
-        a = AgentRegistry()
-        b = AgentRegistry()
-        assert a is b
-
-
-class TestToolLoop:
-    @pytest.mark.asyncio
-    async def test_no_tool_calls(self) -> None:
-        """When the LLM responds without tool calls, return content directly."""
-        agent = _StubAgent()
-
-        ai_msg = MagicMock()
-        ai_msg.content = "final answer"
-        ai_msg.tool_calls = []
-
-        llm = AsyncMock()
-        llm.bind_tools = MagicMock(return_value=llm)
-        llm.ainvoke = AsyncMock(return_value=ai_msg)
-
-        result = await agent._tool_loop(llm, [], [])
-        assert result == "final answer"
-
-    @pytest.mark.asyncio
-    async def test_tool_call_then_answer(self) -> None:
-        """LLM requests one tool call, gets result, then answers."""
-        agent = _StubAgent()
-
-        # First response: tool call
-        tool_call_msg = MagicMock()
-        tool_call_msg.content = ""
-        tool_call_msg.tool_calls = [
-            {"id": "call_1", "name": "my_tool", "args": {"x": 1}}
-        ]
-
-        # Second response: final answer
-        final_msg = MagicMock()
-        final_msg.content = "done"
-        final_msg.tool_calls = []
-
-        llm = AsyncMock()
-        llm.bind_tools = MagicMock(return_value=llm)
-        llm.ainvoke = AsyncMock(side_effect=[tool_call_msg, final_msg])
-
-        # Mock tool
-        tool = AsyncMock()
-        tool.name = "my_tool"
-        tool.ainvoke = AsyncMock(return_value="tool_result")
-
-        result = await agent._tool_loop(llm, [], [tool])
-        assert result == "done"
-        tool.ainvoke.assert_called_once_with({"x": 1})
-
-    @pytest.mark.asyncio
-    async def test_unknown_tool_handled(self) -> None:
-        """Unknown tool names produce an error message instead of crashing."""
-        agent = _StubAgent()
-
-        tool_call_msg = MagicMock()
-        tool_call_msg.content = ""
-        tool_call_msg.tool_calls = [
-            {"id": "call_1", "name": "missing", "args": {}}
-        ]
-
-        final_msg = MagicMock()
-        final_msg.content = "recovered"
-        final_msg.tool_calls = []
-
-        llm = AsyncMock()
-        llm.bind_tools = MagicMock(return_value=llm)
-        llm.ainvoke = AsyncMock(side_effect=[tool_call_msg, final_msg])
-
-        result = await agent._tool_loop(llm, [], [])
-        assert result == "recovered"
-
-    @pytest.mark.asyncio
-    async def test_max_iter_reached(self) -> None:
-        """When max iterations are exhausted, a final no-tools call is made."""
-        agent = _StubAgent()
-
-        # Every response requests a tool call
-        loop_msg = MagicMock()
-        loop_msg.content = ""
-        loop_msg.tool_calls = [
-            {"id": "call_x", "name": "t", "args": {}}
-        ]
-
-        final_msg = MagicMock()
-        final_msg.content = "gave up"
-        final_msg.tool_calls = []
-
-        tool = AsyncMock()
-        tool.name = "t"
-        tool.ainvoke = AsyncMock(return_value="ok")
-
-        llm_with_tools = AsyncMock()
-        llm_with_tools.ainvoke = AsyncMock(return_value=loop_msg)
-
-        llm = AsyncMock()
-        llm.bind_tools = MagicMock(return_value=llm_with_tools)
-        llm.ainvoke = AsyncMock(return_value=final_msg)
-
-        result = await agent._tool_loop(llm, [], [tool], max_iter=2)
-        assert result == "gave up"
-        assert llm_with_tools.ainvoke.call_count == 2
--- a/tests/test_agent_streaming.py
+++ b/tests/test_agent_streaming.py
@@ -1,416 +0,0 @@
-"""Tests for ChatAgent streaming and tool result capture (Step 2)."""
-
-from __future__ import annotations
-
-import pytest
-from unittest.mock import AsyncMock, MagicMock, patch
-from typing import Any
-
-from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
-
-from app.core.agent_registry import ChatAgent, registry
-
-
-# ── Minimal concrete agent for testing ───────────────────────────────
-
-
-class _EchoAgent(ChatAgent):
-    def get_name(self) -> str:
-        return "_echo"
-
-    def get_description(self) -> str:
-        return "Echo agent for tests"
-
-    def get_tools(self) -> list[Any]:
-        return []
-
-    async def handle(self, query: str, context: dict[str, Any]) -> str:
-        return query
-
-
-# ── Helpers ───────────────────────────────────────────────────────────
-
-
-def _make_ai_message(content: str = "", tool_calls: list | None = None) -> AIMessage:
-    msg = AIMessage(content=content)
-    if tool_calls:
-        msg.tool_calls = tool_calls
-    else:
-        msg.tool_calls = []
-    return msg
-
-
-def _make_tool(name: str, return_value: Any) -> MagicMock:
-    t = MagicMock()
-    t.name = name
-    t.ainvoke = AsyncMock(return_value=return_value)
-    return t
-
-
-def _make_stream_chunks(tokens: list[str]) -> list[MagicMock]:
-    chunks = []
-    for tok in tokens:
-        c = MagicMock()
-        c.content = tok
-        chunks.append(c)
-    return chunks
-
-
-async def _collect_stream(agent: ChatAgent, llm: Any, messages: list, tools: list) -> list[str]:
-    tokens: list[str] = []
-    async for tok in agent._tool_loop_stream(llm, messages, tools):
-        tokens.append(tok)
-    return tokens
-
-
-# ── tool_results initialised ─────────────────────────────────────────
-
-
-def test_tool_results_init():
-    agent = _EchoAgent()
-    assert agent.tool_results == []
-
-
-# ── _tool_loop: no tool calls ────────────────────────────────────────
-
-
-@pytest.mark.asyncio
-async def test_tool_loop_no_tools():
-    agent = _EchoAgent()
-    llm = AsyncMock()
-    llm.ainvoke = AsyncMock(return_value=_make_ai_message("Hello!"))
-
-    result = await agent._tool_loop(llm, [HumanMessage(content="hi")], [])
-    assert result == "Hello!"
-    assert agent.tool_results == []
-
-
-# ── _tool_loop: with one tool call + result capture ──────────────────
-
-
-@pytest.mark.asyncio
-async def test_tool_loop_captures_tool_results():
-    agent = _EchoAgent()
-
-    # Mock execute_on_client to return structured data via the tool
-    raw_result = {"rows": [{"id": "t-1", "title": "Fix bug", "status": "todo"}]}
-
-    async def fake_executor(payload: dict) -> dict:
-        return raw_result
-
-    # AIMessage with a tool call, then a final answer
-    tool_call_msg = _make_ai_message(
-        tool_calls=[{"name": "list_tasks", "args": {}, "id": "call-1", "type": "tool_call"}]
-    )
-    final_msg = _make_ai_message("Here are your tasks.")
-
-    llm = MagicMock()
-    llm_with_tools = MagicMock()
-    llm.bind_tools = MagicMock(return_value=llm_with_tools)
-    llm_with_tools.ainvoke = AsyncMock(side_effect=[tool_call_msg, final_msg])
-    llm.ainvoke = AsyncMock(return_value=final_msg)
-
-    mock_tool = _make_tool("list_tasks", "- Fix bug (todo)")
-
-    from app.core.ws_context import set_client_executor, clear_client_executor
-    set_client_executor(fake_executor)
-    try:
-        # Patch the tool to actually call execute_on_client
-        async def tool_side_effect(args: dict) -> str:
-            from app.core.ws_context import execute_on_client
-            res = await execute_on_client(action="select", table="tasks")
-            rows = res.get("rows", [])
-            return "\n".join(r["title"] for r in rows)
-
-        mock_tool.ainvoke = AsyncMock(side_effect=tool_side_effect)
-
-        result = await agent._tool_loop(
-            llm, [HumanMessage(content="list my tasks")], [mock_tool]
-        )
-    finally:
-        clear_client_executor()
-
-    assert result == "Here are your tasks."
-    assert len(agent.tool_results) == 1
-    assert agent.tool_results[0] == raw_result
-
-
-# ── _tool_loop: tool_results reset on each call ──────────────────────
-
-
-@pytest.mark.asyncio
-async def test_tool_loop_resets_tool_results():
-    agent = _EchoAgent()
-    agent.tool_results = [{"stale": True}]  # pre-populated from a previous call
-
-    llm = AsyncMock()
-    llm.ainvoke = AsyncMock(return_value=_make_ai_message("Done."))
-
-    await agent._tool_loop(llm, [HumanMessage(content="hi")], [])
-    assert agent.tool_results == []
-
-
-# ── _tool_loop: unknown tool name ────────────────────────────────────
-
-
-@pytest.mark.asyncio
-async def test_tool_loop_unknown_tool():
-    agent = _EchoAgent()
-
-    # No known tools — model still calls a non-existent one; loop handles gracefully
-    tool_call_msg = _make_ai_message(
-        tool_calls=[{"name": "nonexistent", "args": {}, "id": "c1", "type": "tool_call"}]
-    )
-    final_msg = _make_ai_message("Handled.")
-
-    mock_tool = _make_tool("known", "ok")  # a different tool, not "nonexistent"
-    llm = MagicMock()
-    llm_with_tools = MagicMock()
-    llm.bind_tools = MagicMock(return_value=llm_with_tools)
-    llm_with_tools.ainvoke = AsyncMock(side_effect=[tool_call_msg, final_msg])
-
-    result = await agent._tool_loop(llm, [HumanMessage(content="x")], [mock_tool])
-    assert result == "Handled."
-
-
-# ── _tool_loop: max_iter exhaustion ──────────────────────────────────
-
-
-@pytest.mark.asyncio
-async def test_tool_loop_max_iter():
-    agent = _EchoAgent()
-
-    always_tool = _make_ai_message(
-        tool_calls=[{"name": "t", "args": {}, "id": "c1", "type": "tool_call"}]
-    )
-    fallback = _make_ai_message("Fallback.")
-
-    llm = MagicMock()
-    llm_with_tools = MagicMock()
-    llm.bind_tools = MagicMock(return_value=llm_with_tools)
-    # Returns tool_call_msg on every iteration
-    llm_with_tools.ainvoke = AsyncMock(return_value=always_tool)
-    llm.ainvoke = AsyncMock(return_value=fallback)
-
-    mock_tool = _make_tool("t", "ok")
-
-    result = await agent._tool_loop(llm, [HumanMessage(content="x")], [mock_tool], max_iter=2)
-    assert result == "Fallback."
-    assert llm_with_tools.ainvoke.call_count == 2
-
-
-# ── _tool_loop_stream: no tool calls — yields tokens ─────────────────
-
-
-@pytest.mark.asyncio
-async def test_tool_loop_stream_no_tools_yields_tokens():
-    agent = _EchoAgent()
-
-    # No tools → llm used directly; ainvoke returns no tool calls → stream is used
-    no_tool_msg = _make_ai_message("irrelevant")
-    llm = AsyncMock()
-    llm.ainvoke = AsyncMock(return_value=no_tool_msg)
-
-    async def fake_astream(msgs):
-        for tok in ["Hello", " ", "world"]:
-            c = MagicMock()
-            c.content = tok
-            yield c
-
-    llm.astream = fake_astream
-
-    tokens = await _collect_stream(agent, llm, [HumanMessage(content="hi")], [])
-    assert tokens == ["Hello", " ", "world"]
-    assert agent.tool_results == []
-
-
-# ── _tool_loop_stream: one tool call then streaming final ─────────────
-
-
-@pytest.mark.asyncio
-async def test_tool_loop_stream_with_tool_call():
-    agent = _EchoAgent()
-
-    raw_result = {"row": {"id": "t-2", "title": "Deploy", "status": "in_progress"}}
-
-    async def fake_executor(payload: dict) -> dict:
-        return raw_result
-
-    tool_call_msg = _make_ai_message(
-        tool_calls=[{"name": "get_task", "args": {"id": "t-2"}, "id": "c1", "type": "tool_call"}]
-    )
-    # After tools run, ainvoke returns no more tool calls
-    no_more_tools_msg = _make_ai_message("Task found.")
-
-    llm = MagicMock()
-    llm_with_tools = MagicMock()
-    llm.bind_tools = MagicMock(return_value=llm_with_tools)
-    llm_with_tools.ainvoke = AsyncMock(side_effect=[tool_call_msg, no_more_tools_msg])
-
-    async def fake_astream(msgs):
-        for tok in ["Task", " ", "found."]:
-            c = MagicMock()
-            c.content = tok
-            yield c
-
-    llm.astream = fake_astream
-
-    async def tool_side_effect(args: dict) -> str:
-        from app.core.ws_context import execute_on_client
-        res = await execute_on_client(action="select", table="tasks", filters={"id": args.get("id")})
-        return res.get("row", {}).get("title", "")
-
-    mock_tool = _make_tool("get_task", "Deploy")
-    mock_tool.ainvoke = AsyncMock(side_effect=tool_side_effect)
-
-    from app.core.ws_context import set_client_executor, clear_client_executor
-    set_client_executor(fake_executor)
-    try:
-        tokens = await _collect_stream(
-            agent, llm, [HumanMessage(content="get task t-2")], [mock_tool]
-        )
-    finally:
-        clear_client_executor()
-
-    assert tokens == ["Task", " ", "found."]
-    assert len(agent.tool_results) == 1
-    assert agent.tool_results[0] == raw_result
-
-
-# ── _tool_loop_stream: tool_results reset on each call ───────────────
-
-
-@pytest.mark.asyncio
-async def test_tool_loop_stream_resets_tool_results():
-    agent = _EchoAgent()
-    agent.tool_results = [{"old": True}]
-
-    no_tool_msg = _make_ai_message("")
-    llm = AsyncMock()
-    llm.ainvoke = AsyncMock(return_value=no_tool_msg)
-
-    async def fake_astream(msgs):
-        c = MagicMock()
-        c.content = "ok"
-        yield c
-
-    llm.astream = fake_astream
-
-    await _collect_stream(agent, llm, [HumanMessage(content="x")], [])
-    assert agent.tool_results == []
-
-
-# ── _tool_loop_stream: empty chunk content is skipped ────────────────
-
-
-@pytest.mark.asyncio
-async def test_tool_loop_stream_skips_empty_chunks():
-    agent = _EchoAgent()
-    no_tool_msg = _make_ai_message("")
-
-    llm = AsyncMock()
-    llm.ainvoke = AsyncMock(return_value=no_tool_msg)
-
-    async def fake_astream(msgs):
-        for tok in ["", "hello", "", " world", ""]:
-            c = MagicMock()
-            c.content = tok
-            yield c
-
-    llm.astream = fake_astream
-
-    tokens = await _collect_stream(agent, llm, [HumanMessage(content="x")], [])
-    assert tokens == ["hello", " world"]
-
-
-# ── _tool_loop_stream: max_iter exhaustion falls back to stream ───────
-
-
-@pytest.mark.asyncio
-async def test_tool_loop_stream_max_iter():
-    agent = _EchoAgent()
-
-    always_tool = _make_ai_message(
-        tool_calls=[{"name": "t", "args": {}, "id": "c1", "type": "tool_call"}]
-    )
-
-    llm = MagicMock()
-    llm_with_tools = MagicMock()
-    llm.bind_tools = MagicMock(return_value=llm_with_tools)
-    llm_with_tools.ainvoke = AsyncMock(return_value=always_tool)
-
-    async def fake_astream(msgs):
-        c = MagicMock()
-        c.content = "fallback"
-        yield c
-
-    llm.astream = fake_astream
-    mock_tool = _make_tool("t", "ok")
-
-    tokens = await _collect_stream(
-        agent, llm, [HumanMessage(content="x")], [mock_tool],
-    )
-    assert tokens == ["fallback"]
-    assert llm_with_tools.ainvoke.call_count == 5  # exhausted default max_iter
-
-
-# ── _tool_loop_stream: multiple tool results captured ────────────────
-
-
-@pytest.mark.asyncio
-async def test_tool_loop_stream_multiple_tool_results():
-    agent = _EchoAgent()
-
-    call_results = [
-        {"rows": [{"id": "t-1"}]},
-        {"rows": [{"id": "t-2"}]},
-    ]
-    call_iter = iter(call_results)
-
-    async def fake_executor(payload: dict) -> dict:
-        return next(call_iter)
-
-    # Two tool calls in one iteration
-    tool_call_msg = _make_ai_message(
-        tool_calls=[
-            {"name": "tool_a", "args": {}, "id": "c1", "type": "tool_call"},
-            {"name": "tool_b", "args": {}, "id": "c2", "type": "tool_call"},
-        ]
-    )
-    no_more_tools_msg = _make_ai_message("Done.")
-
-    llm = MagicMock()
-    llm_with_tools = MagicMock()
-    llm.bind_tools = MagicMock(return_value=llm_with_tools)
-    llm_with_tools.ainvoke = AsyncMock(side_effect=[tool_call_msg, no_more_tools_msg])
-
-    async def fake_astream(msgs):
-        c = MagicMock()
-        c.content = "Done."
-        yield c
-
-    llm.astream = fake_astream
-
-    async def tool_side_effect(args: dict) -> str:
-        from app.core.ws_context import execute_on_client
-        res = await execute_on_client(action="select", table="tasks")
-        return str(res)
-
-    tool_a = _make_tool("tool_a", "")
-    tool_a.ainvoke = AsyncMock(side_effect=tool_side_effect)
-    tool_b = _make_tool("tool_b", "")
-    tool_b.ainvoke = AsyncMock(side_effect=tool_side_effect)
-
-    from app.core.ws_context import set_client_executor, clear_client_executor
-    set_client_executor(fake_executor)
-    try:
-        tokens = await _collect_stream(
-            agent, llm, [HumanMessage(content="x")], [tool_a, tool_b]
-        )
-    finally:
-        clear_client_executor()
-
-    assert tokens == ["Done."]
-    assert len(agent.tool_results) == 2
-    assert agent.tool_results[0] == {"rows": [{"id": "t-1"}]}
-    assert agent.tool_results[1] == {"rows": [{"id": "t-2"}]}
--- a/tests/test_agents.py
+++ b/tests/test_agents.py
@@ -1,761 +0,0 @@
-"""Unit tests for the four domain-specific chat agents with mocked LLM."""
-
-from __future__ import annotations
-
-import json
-from typing import Any
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-import app.agents  # noqa: F401 — triggers @registry.register decorators
-from app.agents.timeline_agent import TimelineAgent
-from app.agents.note_agent import NoteAgent
-from app.agents.project_agent import ProjectAgent
-from app.agents.task_agent import TaskAgent
-from app.core.agent_registry import registry
-from app.core.ws_context import clear_client_executor, set_client_executor
-
-
-# ── WS executor mock ──────────────────────────────────────────────────
-#
-# Tools call execute_on_client() which reads a ContextVar set by the WS
-# handler. In unit tests there is no WS session, so we install a fake
-# executor that returns plausible data for each action type.
-
-_FAKE_ROW: dict[str, Any] = {
-    "id": "fake-id",
-    "title": "Fake Title",
-    "name": "Fake Name",
-    "status": "todo",
-    "priority": "medium",
-    "content": "Fake content",
-    "date": 1700000000000,
-    "taskId": "fake-task-id",
-    "author": "Alice",
-    "projectId": None,
-}
-
-
-async def _fake_executor(payload: dict) -> dict:
-    action = payload.get("action", "")
-    if action == "select":
-        return {"rows": []}
-    if action == "insert":
-        data = payload.get("data", {})
-        return {"row": {**_FAKE_ROW, **data}}
-    if action == "update":
-        data = payload.get("data", {})
-        row = {**_FAKE_ROW, "id": data.get("id", "fake-id"), **data.get("updates", {})}
-        return {"row": row}
-    if action == "delete":
-        return {"deleted": True}
-    if action == "get":
-        data = payload.get("data", {})
-        return {"row": {**_FAKE_ROW, "id": data.get("id", "fake-id")}}
-    if action == "vector_upsert":
-        return {"ok": True}
-    return {}
-
-
-@pytest.fixture(autouse=True)
-def ws_executor():
-    """Install a fake WS executor for every test so tools can run without a real WS."""
-    set_client_executor(_fake_executor)
-    yield
-    clear_client_executor()
-
-
-# ── Helpers ──────────────────────────────────────────────────────────
-
-
-def _mock_llm(response_text: str) -> MagicMock:
-    """Return a mock LLM that responds with *response_text* (no tool calls)."""
-    msg = MagicMock()
-    msg.content = response_text
-    msg.tool_calls = []
-    llm = MagicMock()
-    bound = MagicMock()
-    bound.ainvoke = AsyncMock(return_value=msg)
-    llm.bind_tools = MagicMock(return_value=bound)
-    llm.ainvoke = AsyncMock(return_value=msg)
-    return llm
-
-
-def _mock_llm_with_tool_call(
-    tool_name: str, tool_args: dict[str, Any], final_text: str
-) -> MagicMock:
-    """Mock LLM that fires one tool call then returns *final_text*."""
-    tool_msg = MagicMock()
-    tool_msg.content = ""
-    tool_msg.tool_calls = [{"id": "call_1", "name": tool_name, "args": tool_args}]
-
-    final_msg = MagicMock()
-    final_msg.content = final_text
-    final_msg.tool_calls = []
-
-    bound = MagicMock()
-    bound.ainvoke = AsyncMock(side_effect=[tool_msg, final_msg])
-
-    llm = MagicMock()
-    llm.bind_tools = MagicMock(return_value=bound)
-    llm.ainvoke = AsyncMock(return_value=final_msg)
-    return llm
-
-
-# ── Registration ──────────────────────────────────────────────────────
-
-
-class TestAgentRegistration:
-    def test_all_agents_registered(self) -> None:
-        names = {a["name"] for a in registry.list_agents()}
-        assert {
-            "task_agent", "timeline_agent", "project_agent", "note_agent"
-        }.issubset(names)
-
-    def test_registry_returns_correct_types(self) -> None:
-        assert isinstance(registry.get("task_agent"), TaskAgent)
-        assert isinstance(registry.get("timeline_agent"), TimelineAgent)
-        assert isinstance(registry.get("project_agent"), ProjectAgent)
-        assert isinstance(registry.get("note_agent"), NoteAgent)
-
-    def test_descriptions_present(self) -> None:
-        for agent_info in registry.list_agents():
-            assert agent_info["description"], f"Empty description: {agent_info['name']}"
-
-
-# ── TaskAgent ─────────────────────────────────────────────────────────
-
-
-class TestTaskAgent:
-    def test_name(self) -> None:
-        assert TaskAgent().get_name() == "task_agent"
-
-    def test_description(self) -> None:
-        assert TaskAgent().get_description() == "Manages tasks and comments: list, create, update, delete, due-today, comments"
-
-    def test_get_tools_count(self) -> None:
-        assert len(TaskAgent().get_tools()) == 8
-
-    def test_tool_names(self) -> None:
-        names = {t.name for t in TaskAgent().get_tools()}
-        assert names == {
-            "list_tasks",
-            "create_task",
-            "update_task",
-            "delete_task",
-            "list_tasks_due_today",
-            "list_task_comments",
-            "add_task_comment",
-            "delete_task_comment",
-        }
-
-    @pytest.mark.asyncio
-    async def test_handle_returns_string(self) -> None:
-        with patch("app.agents.task_agent.get_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("Task created.")
-            result = await TaskAgent().handle("create a task", {})
-        assert isinstance(result, str)
-
-    @pytest.mark.asyncio
-    async def test_handle_no_tool_calls(self) -> None:
-        with patch("app.agents.task_agent.get_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("Here are your tasks.")
-            result = await TaskAgent().handle("list my tasks", {})
-        assert result == "Here are your tasks."
-
-    @pytest.mark.asyncio
-    async def test_handle_with_create_task_tool_call(self) -> None:
-        with patch("app.agents.task_agent.get_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm_with_tool_call(
-                "create_task",
-                {"title": "Buy groceries", "priority": "low"},
-                "Task 'Buy groceries' created.",
-            )
-            result = await TaskAgent().handle("add a grocery task", {})
-        assert result == "Task 'Buy groceries' created."
-
-    @pytest.mark.asyncio
-    async def test_handle_accepts_empty_context(self) -> None:
-        with patch("app.agents.task_agent.get_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("Done.")
-            result = await TaskAgent().handle("help", {})
-        assert isinstance(result, str)
-
-    @pytest.mark.asyncio
-    async def test_handle_accepts_rich_context(self) -> None:
-        context = {
-            "user_profile": {"id": "u1", "tier": "pro"},
-            "recent_tasks": [{"id": "t1", "title": "Old task"}],
-        }
-        with patch("app.agents.task_agent.get_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("Tasks listed.")
-            result = await TaskAgent().handle("show tasks", context)
-        assert isinstance(result, str)
-
-
-class TestTaskAgentTools:
-    @pytest.mark.asyncio
-    async def test_list_tasks_defaults(self) -> None:
-        from app.agents.task_agent import list_tasks
-        with patch("app.agents.task_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"rows": []}
-            result = await list_tasks.ainvoke({})
-        m.assert_called_once_with(
-            action="select", table="tasks",
-            filters={"projectId": None, "status": None, "search": None, "orderBy": None},
-        )
-        assert result == "No tasks found matching the given filters."
-
-    @pytest.mark.asyncio
-    async def test_list_tasks_with_status_filter(self) -> None:
-        from app.agents.task_agent import list_tasks
-        with patch("app.agents.task_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"rows": []}
-            await list_tasks.ainvoke({"status": "done"})
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["filters"]["status"] == "done"
-
-    @pytest.mark.asyncio
-    async def test_create_task_defaults(self) -> None:
-        from app.agents.task_agent import create_task
-        fake_row = {"id": "t1", "title": "Test task", "status": "todo", "priority": "medium"}
-        with patch("app.agents.task_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"row": fake_row}
-            result = await create_task.ainvoke({"title": "Test task"})
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["action"] == "insert"
-        assert call_kwargs["table"] == "tasks"
-        assert call_kwargs["data"]["title"] == "Test task"
-        assert call_kwargs["data"]["status"] == "todo"
-        assert call_kwargs["data"]["priority"] == "medium"
-        assert "Test task" in result
-
-    @pytest.mark.asyncio
-    async def test_create_task_with_all_fields(self) -> None:
-        from app.agents.task_agent import create_task
-        fake_row = {"id": "t1", "title": "Deploy", "status": "in_progress", "priority": "high"}
-        with patch("app.agents.task_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"row": fake_row}
-            await create_task.ainvoke({
-                "title": "Deploy", "priority": "high", "status": "in_progress",
-                "project_id": "p1", "is_ai_suggested": 1,
-            })
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["data"]["priority"] == "high"
-        assert call_kwargs["data"]["status"] == "in_progress"
-        assert call_kwargs["data"]["projectId"] == "p1"
-        assert call_kwargs["data"]["isAiSuggested"] == 1
-
-    @pytest.mark.asyncio
-    async def test_update_task_with_status(self) -> None:
-        from app.agents.task_agent import update_task
-        fake_row = {"id": "t1", "title": "Buy groceries", "status": "done"}
-        with patch("app.agents.task_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"row": fake_row}
-            result = await update_task.ainvoke({"task_id": "t1", "status": "done"})
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["action"] == "update"
-        assert call_kwargs["data"]["id"] == "t1"
-        assert call_kwargs["data"]["updates"]["status"] == "done"
-        assert "t1" in result
-
-    @pytest.mark.asyncio
-    async def test_update_task_empty_updates(self) -> None:
-        from app.agents.task_agent import update_task
-        fake_row = {"id": "t1", "title": "Task", "status": "todo"}
-        with patch("app.agents.task_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"row": fake_row}
-            await update_task.ainvoke({"task_id": "t1"})
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["data"]["updates"] == {}
-
-    @pytest.mark.asyncio
-    async def test_delete_task(self) -> None:
-        from app.agents.task_agent import delete_task
-        with patch("app.agents.task_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"deleted": True}
-            result = await delete_task.ainvoke({"task_id": "t1"})
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["action"] == "delete"
-        assert call_kwargs["table"] == "tasks"
-        assert call_kwargs["data"]["id"] == "t1"
-        assert "t1" in result
-
-    @pytest.mark.asyncio
-    async def test_list_tasks_due_today(self) -> None:
-        from app.agents.task_agent import list_tasks_due_today
-        with patch("app.agents.task_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"rows": []}
-            result = await list_tasks_due_today.ainvoke({})
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["action"] == "select"
-        assert call_kwargs["table"] == "tasks"
-        assert "dueDateFrom" in call_kwargs["filters"]
-        assert result == "No tasks are due today."
-
-    @pytest.mark.asyncio
-    async def test_list_task_comments(self) -> None:
-        from app.agents.task_agent import list_task_comments
-        with patch("app.agents.task_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"rows": []}
-            result = await list_task_comments.ainvoke({"task_id": "t1"})
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["action"] == "select"
-        assert call_kwargs["table"] == "taskComments"
-        assert call_kwargs["filters"]["taskId"] == "t1"
-        assert "t1" in result
-
-    @pytest.mark.asyncio
-    async def test_add_task_comment(self) -> None:
-        from app.agents.task_agent import add_task_comment
-        fake_row = {"id": "c1", "taskId": "t1", "author": "Alice", "content": "Looks good!"}
-        with patch("app.agents.task_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"row": fake_row}
-            result = await add_task_comment.ainvoke({
-                "task_id": "t1", "author": "Alice", "content": "Looks good!",
-            })
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["action"] == "insert"
-        assert call_kwargs["table"] == "taskComments"
-        assert call_kwargs["data"]["taskId"] == "t1"
-        assert call_kwargs["data"]["author"] == "Alice"
-        assert call_kwargs["data"]["content"] == "Looks good!"
-        assert "Alice" in result
-
-    @pytest.mark.asyncio
-    async def test_delete_task_comment(self) -> None:
-        from app.agents.task_agent import delete_task_comment
-        with patch("app.agents.task_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"deleted": True}
-            result = await delete_task_comment.ainvoke({"comment_id": "c1"})
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["action"] == "delete"
-        assert call_kwargs["table"] == "taskComments"
-        assert call_kwargs["data"]["id"] == "c1"
-        assert "c1" in result
-
-
-# ── TimelineAgent ───────────────────────────────────────────────────
-
-
-class TestTimelineAgent:
-    def test_name(self) -> None:
-        assert TimelineAgent().get_name() == "timeline_agent"
-
-    def test_description(self) -> None:
-        assert TimelineAgent().get_description() == "Manages project timelines (milestones): list, create, update, delete"
-
-    def test_get_tools_count(self) -> None:
-        assert len(TimelineAgent().get_tools()) == 4
-
-    def test_tool_names(self) -> None:
-        names = {t.name for t in TimelineAgent().get_tools()}
-        assert names == {"list_timelines", "create_timeline", "update_timeline", "delete_timeline"}
-
-    @pytest.mark.asyncio
-    async def test_handle_no_tool_calls(self) -> None:
-        with patch("app.agents.timeline_agent.get_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("No timelines found.")
-            result = await TimelineAgent().handle("list timelines", {})
-        assert result == "No timelines found."
-
-    @pytest.mark.asyncio
-    async def test_handle_with_create_tool_call(self) -> None:
-        with patch("app.agents.timeline_agent.get_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm_with_tool_call(
-                "create_timeline",
-                {"project_id": "p1", "title": "MVP Launch", "date": 1700000000000},
-                "Timeline 'MVP Launch' created.",
-            )
-            result = await TimelineAgent().handle("add MVP timeline", {})
-        assert result == "Timeline 'MVP Launch' created."
-
-    @pytest.mark.asyncio
-    async def test_handle_accepts_empty_context(self) -> None:
-        with patch("app.agents.timeline_agent.get_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("Done.")
-            result = await TimelineAgent().handle("show milestones", {})
-        assert isinstance(result, str)
-
-
-class TestTimelineAgentTools:
-    @pytest.mark.asyncio
-    async def test_list_timelines_no_project(self) -> None:
-        from app.agents.timeline_agent import list_timelines
-        with patch("app.agents.timeline_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"rows": []}
-            result = await list_timelines.ainvoke({})
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["action"] == "select"
-        assert call_kwargs["table"] == "timelines"
-        assert call_kwargs["filters"]["projectId"] is None
-        assert result == "No timelines found."
-
-    @pytest.mark.asyncio
-    async def test_list_timelines_with_project(self) -> None:
-        from app.agents.timeline_agent import list_timelines
-        with patch("app.agents.timeline_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"rows": []}
-            await list_timelines.ainvoke({"project_id": "p1"})
-        assert m.call_args.kwargs["filters"]["projectId"] == "p1"
-
-    @pytest.mark.asyncio
-    async def test_create_timeline(self) -> None:
-        from app.agents.timeline_agent import create_timeline
-        fake_row = {"id": "cp1", "title": "Beta release", "date": 1700000000000}
-        with patch("app.agents.timeline_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"row": fake_row}
-            result = await create_timeline.ainvoke({
-                "project_id": "p1", "title": "Beta release", "date": 1700000000000,
-            })
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["action"] == "insert"
-        assert call_kwargs["table"] == "timelines"
-        assert call_kwargs["data"]["projectId"] == "p1"
-        assert call_kwargs["data"]["title"] == "Beta release"
-        assert call_kwargs["data"]["date"] == 1700000000000
-        assert "Beta release" in result
-
-    @pytest.mark.asyncio
-    async def test_create_timeline_ai_suggested(self) -> None:
-        from app.agents.timeline_agent import create_timeline
-        fake_row = {"id": "cp1", "title": "Review", "date": 1700000000000}
-        with patch("app.agents.timeline_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"row": fake_row}
-            await create_timeline.ainvoke({
-                "project_id": "p1", "title": "Review", "date": 1700000000000, "is_ai_suggested": 1,
-            })
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["data"]["isAiSuggested"] == 1
-        assert call_kwargs["data"]["isApproved"] == 0
-
-    @pytest.mark.asyncio
-    async def test_update_timeline_approve(self) -> None:
-        from app.agents.timeline_agent import update_timeline
-        fake_row = {"id": "c1", "title": "MVP", "isApproved": 1}
-        with patch("app.agents.timeline_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"row": fake_row}
-            result = await update_timeline.ainvoke({"timeline_id": "c1", "is_approved": 1})
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["action"] == "update"
-        assert call_kwargs["data"]["id"] == "c1"
-        assert call_kwargs["data"]["updates"]["isApproved"] == 1
-        assert "c1" in result
-
-    @pytest.mark.asyncio
-    async def test_update_timeline_empty_updates(self) -> None:
-        from app.agents.timeline_agent import update_timeline
-        fake_row = {"id": "c1", "title": "MVP"}
-        with patch("app.agents.timeline_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"row": fake_row}
-            await update_timeline.ainvoke({"timeline_id": "c1"})
-        assert m.call_args.kwargs["data"]["updates"] == {}
-
-    @pytest.mark.asyncio
-    async def test_delete_timeline(self) -> None:
-        from app.agents.timeline_agent import delete_timeline
-        with patch("app.agents.timeline_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"deleted": True}
-            result = await delete_timeline.ainvoke({"timeline_id": "c1"})
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["action"] == "delete"
-        assert call_kwargs["table"] == "timelines"
-        assert call_kwargs["data"]["id"] == "c1"
-        assert "c1" in result
-
-
-# ── ProjectAgent ──────────────────────────────────────────────────────
-
-
-class TestProjectAgent:
-    def test_name(self) -> None:
-        assert ProjectAgent().get_name() == "project_agent"
-
-    def test_description(self) -> None:
-        assert ProjectAgent().get_description() == "Manages projects: list, get, create, update, archive, delete"
-
-    def test_get_tools_count(self) -> None:
-        assert len(ProjectAgent().get_tools()) == 6
-
-    def test_tool_names(self) -> None:
-        names = {t.name for t in ProjectAgent().get_tools()}
-        assert names == {
-            "list_projects",
-            "list_all_projects",
-            "get_project",
-            "create_project",
-            "update_project",
-            "delete_project",
-        }
-
-    @pytest.mark.asyncio
-    async def test_handle_no_tool_calls(self) -> None:
-        with patch("app.agents.project_agent.get_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("Project Alpha is active.")
-            result = await ProjectAgent().handle("show my projects", {})
-        assert result == "Project Alpha is active."
-
-    @pytest.mark.asyncio
-    async def test_handle_with_create_project_tool_call(self) -> None:
-        with patch("app.agents.project_agent.get_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm_with_tool_call(
-                "create_project",
-                {"name": "Pippo"},
-                "Project 'Pippo' created.",
-            )
-            result = await ProjectAgent().handle("create project Pippo", {})
-        assert result == "Project 'Pippo' created."
-
-    @pytest.mark.asyncio
-    async def test_handle_accepts_empty_context(self) -> None:
-        with patch("app.agents.project_agent.get_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("Done.")
-            result = await ProjectAgent().handle("archive old project", {})
-        assert isinstance(result, str)
-
-
-class TestProjectAgentTools:
-    @pytest.mark.asyncio
-    async def test_list_projects_defaults(self) -> None:
-        from app.agents.project_agent import list_projects
-        with patch("app.agents.project_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"rows": []}
-            result = await list_projects.ainvoke({})
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["action"] == "select"
-        assert call_kwargs["table"] == "projects"
-        assert call_kwargs["filters"]["includeArchived"] is False
-        assert result == "No projects found."
-
-    @pytest.mark.asyncio
-    async def test_list_projects_include_archived(self) -> None:
-        from app.agents.project_agent import list_projects
-        with patch("app.agents.project_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"rows": []}
-            await list_projects.ainvoke({"include_archived": 1})
-        assert m.call_args.kwargs["filters"]["includeArchived"] is True
-
-    @pytest.mark.asyncio
-    async def test_list_all_projects(self) -> None:
-        from app.agents.project_agent import list_all_projects
-        with patch("app.agents.project_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"rows": []}
-            result = await list_all_projects.ainvoke({})
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["action"] == "select"
-        assert call_kwargs["table"] == "projects"
-        assert result == "No projects found."
-
-    @pytest.mark.asyncio
-    async def test_get_project(self) -> None:
-        from app.agents.project_agent import get_project
-        fake_row = {"id": "p1", "name": "Alpha", "status": "active", "clientId": None}
-        with patch("app.agents.project_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"row": fake_row}
-            result = await get_project.ainvoke({"project_id": "p1"})
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["action"] == "get"
-        assert call_kwargs["table"] == "projects"
-        assert call_kwargs["data"]["id"] == "p1"
-        assert "Alpha" in result
-
-    @pytest.mark.asyncio
-    async def test_create_project_name_only(self) -> None:
-        from app.agents.project_agent import create_project
-        fake_row = {"id": "p1", "name": "Alpha"}
-        with patch("app.agents.project_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"row": fake_row}
-            result = await create_project.ainvoke({"name": "Alpha"})
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["action"] == "insert"
-        assert call_kwargs["data"]["name"] == "Alpha"
-        assert call_kwargs["data"]["clientId"] is None
-        assert "Alpha" in result
-
-    @pytest.mark.asyncio
-    async def test_create_project_with_client(self) -> None:
-        from app.agents.project_agent import create_project
-        fake_row = {"id": "p1", "name": "Beta"}
-        with patch("app.agents.project_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"row": fake_row}
-            await create_project.ainvoke({"name": "Beta", "client_id": "cl1"})
-        assert m.call_args.kwargs["data"]["clientId"] == "cl1"
-
-    @pytest.mark.asyncio
-    async def test_update_project_archive(self) -> None:
-        from app.agents.project_agent import update_project
-        fake_row = {"id": "p1", "name": "Alpha", "status": "archived"}
-        with patch("app.agents.project_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"row": fake_row}
-            result = await update_project.ainvoke({"project_id": "p1", "status": "archived"})
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["action"] == "update"
-        assert call_kwargs["data"]["id"] == "p1"
-        assert call_kwargs["data"]["updates"]["status"] == "archived"
-        assert "p1" in result
-
-    @pytest.mark.asyncio
-    async def test_update_project_empty_updates(self) -> None:
-        from app.agents.project_agent import update_project
-        fake_row = {"id": "p1", "name": "Alpha", "status": "active"}
-        with patch("app.agents.project_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"row": fake_row}
-            await update_project.ainvoke({"project_id": "p1"})
-        assert m.call_args.kwargs["data"]["updates"] == {}
-
-    @pytest.mark.asyncio
-    async def test_delete_project(self) -> None:
-        from app.agents.project_agent import delete_project
-        with patch("app.agents.project_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"deleted": True}
-            result = await delete_project.ainvoke({"project_id": "p1"})
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["action"] == "delete"
-        assert call_kwargs["data"]["id"] == "p1"
-        assert "p1" in result
-
-
-# ── NoteAgent ─────────────────────────────────────────────────────────
-
-
-class TestNoteAgent:
-    def test_name(self) -> None:
-        assert NoteAgent().get_name() == "note_agent"
-
-    def test_description(self) -> None:
-        assert NoteAgent().get_description() == "Manages notes: list, get, create, update, delete"
-
-    def test_get_tools_count(self) -> None:
-        assert len(NoteAgent().get_tools()) == 5
-
-    def test_tool_names(self) -> None:
-        names = {t.name for t in NoteAgent().get_tools()}
-        assert names == {"list_notes", "get_note", "create_note", "update_note", "delete_note"}
-
-    @pytest.mark.asyncio
-    async def test_handle_no_tool_calls(self) -> None:
-        with patch("app.agents.note_agent.get_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("Note created.")
-            result = await NoteAgent().handle("create a note", {})
-        assert result == "Note created."
-
-    @pytest.mark.asyncio
-    async def test_handle_with_create_note_tool_call(self) -> None:
-        with patch("app.agents.note_agent.get_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm_with_tool_call(
-                "create_note",
-                {"title": "Daily log", "content": "# Today\nAll good."},
-                "Note 'Daily log' created.",
-            )
-            result = await NoteAgent().handle("log today's progress", {})
-        assert result == "Note 'Daily log' created."
-
-    @pytest.mark.asyncio
-    async def test_handle_accepts_empty_context(self) -> None:
-        with patch("app.agents.note_agent.get_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("Done.")
-            result = await NoteAgent().handle("show notes", {})
-        assert isinstance(result, str)
-
-
-class TestNoteAgentTools:
-    @pytest.mark.asyncio
-    async def test_list_notes_no_project(self) -> None:
-        from app.agents.note_agent import list_notes
-        with patch("app.agents.note_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"rows": []}
-            result = await list_notes.ainvoke({})
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["action"] == "select"
-        assert call_kwargs["table"] == "notes"
-        assert call_kwargs["filters"]["projectId"] is None
-        assert result == "No notes found."
-
-    @pytest.mark.asyncio
-    async def test_list_notes_with_project(self) -> None:
-        from app.agents.note_agent import list_notes
-        with patch("app.agents.note_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"rows": []}
-            await list_notes.ainvoke({"project_id": "p1"})
-        assert m.call_args.kwargs["filters"]["projectId"] == "p1"
-
-    @pytest.mark.asyncio
-    async def test_get_note(self) -> None:
-        from app.agents.note_agent import get_note
-        fake_row = {"id": "n1", "title": "Daily log", "content": "# Today\nAll good."}
-        with patch("app.agents.note_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"row": fake_row}
-            result = await get_note.ainvoke({"note_id": "n1"})
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["action"] == "get"
-        assert call_kwargs["table"] == "notes"
-        assert call_kwargs["data"]["id"] == "n1"
-        assert "Daily log" in result
-
-    @pytest.mark.asyncio
-    async def test_create_note_minimal(self) -> None:
-        from app.agents.note_agent import create_note
-        fake_row = {"id": "n1", "title": "Daily log", "projectId": None}
-        with patch("app.agents.note_agent.execute_on_client", new_callable=AsyncMock) as m, \
-             patch("app.agents.note_agent.embed", new_callable=AsyncMock) as me:
-            m.return_value = {"row": fake_row}
-            me.return_value = [0.0] * 1536
-            result = await create_note.ainvoke({"title": "Daily log", "content": "# Today\nAll good."})
-        # First call: insert; second call: vector_upsert
-        first_call = m.call_args_list[0].kwargs
-        assert first_call["action"] == "insert"
-        assert first_call["table"] == "notes"
-        assert first_call["data"]["title"] == "Daily log"
-        assert first_call["data"]["content"] == "# Today\nAll good."
-        assert first_call["data"]["projectId"] is None
-        assert "Daily log" in result
-
-    @pytest.mark.asyncio
-    async def test_create_note_with_project(self) -> None:
-        from app.agents.note_agent import create_note
-        fake_row = {"id": "n1", "title": "Sprint notes", "projectId": "p1"}
-        with patch("app.agents.note_agent.execute_on_client", new_callable=AsyncMock) as m, \
-             patch("app.agents.note_agent.embed", new_callable=AsyncMock) as me:
-            m.return_value = {"row": fake_row}
-            me.return_value = [0.0] * 1536
-            await create_note.ainvoke({"title": "Sprint notes", "content": "## Sprint 1", "project_id": "p1"})
-        first_call = m.call_args_list[0].kwargs
-        assert first_call["data"]["projectId"] == "p1"
-
-    @pytest.mark.asyncio
-    async def test_update_note_content_only(self) -> None:
-        from app.agents.note_agent import update_note
-        fake_row = {"id": "n1", "title": "Daily log", "projectId": None}
-        with patch("app.agents.note_agent.execute_on_client", new_callable=AsyncMock) as m, \
-             patch("app.agents.note_agent.embed", new_callable=AsyncMock) as me:
-            m.return_value = {"row": fake_row}
-            me.return_value = [0.0] * 1536
-            result = await update_note.ainvoke({"note_id": "n1", "content": "# Updated content"})
-        first_call = m.call_args_list[0].kwargs
-        assert first_call["action"] == "update"
-        assert first_call["data"]["id"] == "n1"
-        assert first_call["data"]["updates"]["content"] == "# Updated content"
-        assert "title" not in first_call["data"]["updates"]
-        assert "n1" in result
-
-    @pytest.mark.asyncio
-    async def test_update_note_empty_updates(self) -> None:
-        from app.agents.note_agent import update_note
-        fake_row = {"id": "n1", "title": "Daily log", "projectId": None}
-        with patch("app.agents.note_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"row": fake_row}
-            await update_note.ainvoke({"note_id": "n1"})
-        assert m.call_args.kwargs["data"]["updates"] == {}
-
-    @pytest.mark.asyncio
-    async def test_delete_note(self) -> None:
-        from app.agents.note_agent import delete_note
-        with patch("app.agents.note_agent.execute_on_client", new_callable=AsyncMock) as m:
-            m.return_value = {"deleted": True}
-            result = await delete_note.ainvoke({"note_id": "n1"})
-        call_kwargs = m.call_args.kwargs
-        assert call_kwargs["action"] == "delete"
-        assert call_kwargs["table"] == "notes"
-        assert call_kwargs["data"]["id"] == "n1"
-        assert "n1" in result
--- a/tests/test_execution_plan.py
+++ b/tests/test_execution_plan.py
@@ -1,286 +0,0 @@
-"""Tests for execution_plan: PromptTemplateRegistry, ExecutionPlanBuilder, PlanCache."""
-
-from __future__ import annotations
-
-import pytest
-
-from app.core.execution_plan import (
-    ExecutionPlanBuilder,
-    PlanCache,
-    PromptTemplateRegistry,
-    plan_cache,
-    template_registry,
-)
-from app.schemas import ExecutionPlan
-
-
-# ── PromptTemplateRegistry ────────────────────────────────────────────
-
-
-class TestPromptTemplateRegistry:
-    def test_register_and_get(self) -> None:
-        reg = PromptTemplateRegistry()
-        reg.register("tpl_foo", "You are a foo agent.")
-        assert reg.get("tpl_foo") == "You are a foo agent."
-
-    def test_get_unknown_raises_key_error(self) -> None:
-        reg = PromptTemplateRegistry()
-        with pytest.raises(KeyError, match="tpl_missing"):
-            reg.get("tpl_missing")
-
-    def test_has_returns_true_for_registered(self) -> None:
-        reg = PromptTemplateRegistry()
-        reg.register("tpl_x", "prompt text")
-        assert reg.has("tpl_x") is True
-
-    def test_has_returns_false_for_unregistered(self) -> None:
-        reg = PromptTemplateRegistry()
-        assert reg.has("tpl_missing") is False
-
-    def test_list_ids_returns_all_registered_ids(self) -> None:
-        reg = PromptTemplateRegistry()
-        reg.register("tpl_a", "a")
-        reg.register("tpl_b", "b")
-        assert set(reg.list_ids()) == {"tpl_a", "tpl_b"}
-
-    def test_list_ids_does_not_return_prompt_text(self) -> None:
-        reg = PromptTemplateRegistry()
-        reg.register("tpl_secret", "top secret prompt")
-        ids = reg.list_ids()
-        assert "top secret prompt" not in ids
-
-    def test_overwrite_existing_template(self) -> None:
-        reg = PromptTemplateRegistry()
-        reg.register("tpl_x", "v1")
-        reg.register("tpl_x", "v2")
-        assert reg.get("tpl_x") == "v2"
-
-    def test_empty_registry_has_no_ids(self) -> None:
-        reg = PromptTemplateRegistry()
-        assert reg.list_ids() == []
-
-
-# ── ExecutionPlanBuilder ──────────────────────────────────────────────
-
-
-class TestExecutionPlanBuilder:
-    def test_builds_empty_plan(self) -> None:
-        plan = ExecutionPlanBuilder("task_agent").build()
-        assert plan.agent == "task_agent"
-        assert plan.steps == []
-
-    def test_add_step_basic(self) -> None:
-        plan = (
-            ExecutionPlanBuilder("task_agent")
-            .add_step("create_task", {"priority": "high"})
-            .build()
-        )
-        assert len(plan.steps) == 1
-        assert plan.steps[0].action == "create_task"
-        assert plan.steps[0].variables == {"priority": "high"}
-        assert plan.steps[0].prompt_template is None
-        assert plan.steps[0].data_from_step is None
-
-    def test_add_step_no_params(self) -> None:
-        plan = ExecutionPlanBuilder("task_agent").add_step("fetch").build()
-        assert plan.steps[0].variables is None
-
-    def test_add_llm_step(self) -> None:
-        plan = (
-            ExecutionPlanBuilder("task_agent")
-            .add_llm_step("tpl_task_default", {"message": "hi"})
-            .build()
-        )
-        assert plan.steps[0].action == "llm"
-        assert plan.steps[0].prompt_template == "tpl_task_default"
-        assert plan.steps[0].variables == {"message": "hi"}
-
-    def test_add_llm_step_no_variables(self) -> None:
-        plan = ExecutionPlanBuilder("task_agent").add_llm_step("tpl_x").build()
-        assert plan.steps[0].variables is None
-
-    def test_add_data_step(self) -> None:
-        plan = (
-            ExecutionPlanBuilder("task_agent")
-            .add_step("fetch_data")
-            .add_data_step("transform", data_from_step=0)
-            .build()
-        )
-        assert plan.steps[1].action == "transform"
-        assert plan.steps[1].data_from_step == 0
-
-    def test_fluent_chaining_returns_builder(self) -> None:
-        builder = ExecutionPlanBuilder("analytics_agent")
-        result = builder.add_step("a")
-        assert result is builder
-
-    def test_fluent_chain_multiple_steps(self) -> None:
-        plan = (
-            ExecutionPlanBuilder("analytics_agent")
-            .add_llm_step("tpl_analytics_default")
-            .add_step("format_output")
-            .add_data_step("store", data_from_step=0)
-            .build()
-        )
-        assert len(plan.steps) == 3
-
-    def test_build_validates_data_from_step_out_of_range(self) -> None:
-        with pytest.raises(ValueError, match="data_from_step"):
-            ExecutionPlanBuilder("task_agent").add_data_step("bad", data_from_step=5).build()
-
-    def test_build_validates_data_from_step_self_reference(self) -> None:
-        """data_from_step=0 on the first step (index 0) is invalid."""
-        with pytest.raises(ValueError, match="data_from_step"):
-            ExecutionPlanBuilder("task_agent").add_data_step("bad", data_from_step=0).build()
-
-    def test_build_validates_data_from_step_negative(self) -> None:
-        with pytest.raises(ValueError, match="data_from_step"):
-            ExecutionPlanBuilder("task_agent").add_data_step("bad", data_from_step=-1).build()
-
-    def test_valid_data_from_step_at_index_two(self) -> None:
-        plan = (
-            ExecutionPlanBuilder("task_agent")
-            .add_step("step0")
-            .add_step("step1")
-            .add_data_step("step2", data_from_step=1)
-            .build()
-        )
-        assert plan.steps[2].data_from_step == 1
-
-    def test_data_from_step_zero_valid_at_index_one(self) -> None:
-        plan = (
-            ExecutionPlanBuilder("task_agent")
-            .add_step("step0")
-            .add_data_step("step1", data_from_step=0)
-            .build()
-        )
-        assert plan.steps[1].data_from_step == 0
-
-    def test_build_returns_new_plan_each_call(self) -> None:
-        builder = ExecutionPlanBuilder("task_agent").add_step("do_thing")
-        plan1 = builder.build()
-        plan2 = builder.build()
-        assert plan1 is not plan2
-        assert plan1.steps == plan2.steps
-
-    def test_plan_is_execution_plan_instance(self) -> None:
-        plan = ExecutionPlanBuilder("task_agent").build()
-        assert isinstance(plan, ExecutionPlan)
-
-
-# ── PlanCache ─────────────────────────────────────────────────────────
-
-
-class TestPlanCache:
-    def _plan(self, agent: str = "a") -> ExecutionPlan:
-        return ExecutionPlanBuilder(agent).build()
-
-    def test_cache_and_get(self) -> None:
-        cache = PlanCache()
-        plan = self._plan()
-        cache.cache_plan("key1", plan)
-        assert cache.get_plan("key1") is plan
-
-    def test_get_missing_returns_none(self) -> None:
-        cache = PlanCache()
-        assert cache.get_plan("nonexistent") is None
-
-    def test_get_all_playbooks_empty(self) -> None:
-        cache = PlanCache()
-        assert cache.get_all_playbooks() == []
-
-    def test_get_all_playbooks_returns_all_stored(self) -> None:
-        cache = PlanCache()
-        p1, p2 = self._plan("a"), self._plan("b")
-        cache.cache_plan("k1", p1)
-        cache.cache_plan("k2", p2)
-        playbooks = cache.get_all_playbooks()
-        assert len(playbooks) == 2
-        assert p1 in playbooks
-        assert p2 in playbooks
-
-    def test_lru_evicts_oldest_entry(self) -> None:
-        cache = PlanCache(maxsize=2)
-        p1, p2, p3 = self._plan("a"), self._plan("b"), self._plan("c")
-        cache.cache_plan("k1", p1)
-        cache.cache_plan("k2", p2)
-        cache.cache_plan("k3", p3)  # k1 should be evicted
-        assert cache.get_plan("k1") is None
-        assert cache.get_plan("k2") is p2
-        assert cache.get_plan("k3") is p3
-
-    def test_lru_access_updates_recency(self) -> None:
-        cache = PlanCache(maxsize=2)
-        p1, p2, p3 = self._plan("a"), self._plan("b"), self._plan("c")
-        cache.cache_plan("k1", p1)
-        cache.cache_plan("k2", p2)
-        cache.get_plan("k1")        # k1 is now most-recently used
-        cache.cache_plan("k3", p3)  # k2 should be evicted (LRU)
-        assert cache.get_plan("k1") is p1
-        assert cache.get_plan("k2") is None
-        assert cache.get_plan("k3") is p3
-
-    def test_overwrite_existing_key(self) -> None:
-        cache = PlanCache()
-        p1, p2 = self._plan("a"), self._plan("b")
-        cache.cache_plan("same_key", p1)
-        cache.cache_plan("same_key", p2)
-        assert cache.get_plan("same_key") is p2
-        assert len(cache.get_all_playbooks()) == 1
-
-    def test_overwrite_does_not_consume_capacity(self) -> None:
-        cache = PlanCache(maxsize=2)
-        p1, p2 = self._plan("a"), self._plan("b")
-        cache.cache_plan("k1", p1)
-        cache.cache_plan("k1", p2)  # overwrite, not a new slot
-        cache.cache_plan("k2", p1)  # should fit without eviction
-        assert cache.get_plan("k1") is p2
-        assert cache.get_plan("k2") is p1
-
-
-# ── Module-level singletons ───────────────────────────────────────────
-
-
-class TestModuleSingletons:
-    def test_template_registry_has_all_agent_defaults(self) -> None:
-        for agent in ("task_agent", "timeline_agent", "project_agent", "note_agent"):
-            assert template_registry.has(f"tpl_{agent}_default"), (
-                f"Missing template: tpl_{agent}_default"
-            )
-
-    def test_template_registry_has_operation_templates(self) -> None:
-        assert template_registry.has("tpl_task_extract_from_project")
-        assert template_registry.has("tpl_note_weekly_summary")
-
-    def test_template_registry_get_returns_non_empty_string(self) -> None:
-        text = template_registry.get("tpl_task_agent_default")
-        assert isinstance(text, str)
-        assert len(text) > 0
-
-    def test_plan_cache_has_prebuilt_playbooks(self) -> None:
-        assert len(plan_cache.get_all_playbooks()) >= 2
-
-    def test_playbook_create_tasks_from_project(self) -> None:
-        plan = plan_cache.get_plan("create_tasks_from_project")
-        assert plan is not None
-        assert plan.agent == "project_agent"
-        assert len(plan.steps) == 2
-        assert plan.steps[0].prompt_template == "tpl_task_extract_from_project"
-        assert plan.steps[1].data_from_step == 0
-
-    def test_playbook_generate_weekly_note(self) -> None:
-        plan = plan_cache.get_plan("generate_weekly_note")
-        assert plan is not None
-        assert plan.agent == "note_agent"
-        assert len(plan.steps) == 2
-        assert plan.steps[0].prompt_template == "tpl_note_weekly_summary"
-        assert plan.steps[1].data_from_step == 0
-
-    def test_playbook_steps_have_no_raw_prompt_text(self) -> None:
-        """Plans must not embed prompt text — only template IDs."""
-        for plan in plan_cache.get_all_playbooks():
-            for step in plan.steps:
-                if step.prompt_template is not None:
-                    assert step.prompt_template.startswith("tpl_"), (
-                        f"prompt_template looks like raw text: {step.prompt_template!r}"
-                    )
--- a/tests/test_memory_middleware.py
+++ b/tests/test_memory_middleware.py
@@ -250,15 +250,15 @@ def test_home_request_calls_memory_middleware(client):
    token = make_jwt("power", user_id=USER_ID)
    session_id = str(uuid.uuid4())

-    async def _mock_stream(user_id, message, context, reg=None):
+    async def _mock_stream(user_id, message, context, db_session_factory=None):
        # Verify memory context was injected
        assert context.get("core_memory") == {"tz": "UTC"}
-        yield "task_agent", ""
-        yield "task_agent", '{"type": "text", "content": "Done"}'
+        yield ("token", "Done")
+        yield ("mutations", [])

    with (
        patch("app.api.routes.device_ws.MemoryMiddleware", _MockMiddleware),
-        patch("app.api.routes.device_ws.orchestrate_v3_stream", side_effect=_mock_stream),
+        patch("app.api.routes.device_ws.run_home_stream", side_effect=_mock_stream),
    ):
        with client.websocket_connect(f"/api/v1/ws/device?token={token}") as ws:
            ws.send_text(json.dumps({
--- a/tests/test_middleware.py
+++ b/tests/test_middleware.py
@@ -20,7 +20,6 @@ from jose import jwt
 from app.config.settings import settings
 from app.db import get_session
 from app.main import app
-from app.schemas import ChatResponse
 from tests.conftest import TEST_USER_IDS

 # ---------------------------------------------------------------------------
@@ -50,7 +49,6 @@ _CHAT_BODY = {
        "recent_tasks": [],
        "conversation_history": [],
    },
-    "execution_mode": "direct",
 }


@@ -240,7 +238,7 @@ class TestRateLimitMiddleware:


 class TestSanitizerMiddleware:
-    """Mock ``orchestrate`` to inject controlled strings into chat responses."""
+    """Mock ``run_home`` to inject controlled strings into chat responses."""

    _CHAT_PATH = "/api/v1/chat"

@@ -248,11 +246,10 @@ class TestSanitizerMiddleware:
        return _make_jwt(user_id=str(uuid.uuid4()), tier="pro")

    def _post_chat(self, client: TestClient, response_text: str) -> dict:
-        mock_response = ChatResponse(response=response_text, actions=[])
        with patch(
-            "app.api.routes.chat.orchestrate",
+            "app.api.routes.chat.run_home",
            new_callable=AsyncMock,
-            return_value=mock_response,
+            return_value=response_text,
        ):
            resp = client.post(
                self._CHAT_PATH,
--- a/tests/test_orchestrator.py
+++ b/tests/test_orchestrator.py
@@ -1,347 +0,0 @@
-"""Integration tests for the orchestrator module."""
-
-from __future__ import annotations
-
-import json
-from typing import Any
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-from app.core.agent_registry import AgentRegistry, ChatAgent
-from app.core.orchestrator import (
-    classify_intent,
-    orchestrate,
-    orchestrate_stream,
-    route_pipeline,
-    route_single,
-)
-from app.schemas import ChatRequest, ChatResponse, ExecutionPlan
-
-
-# ── Stub agents ──────────────────────────────────────────────────────
-
-
-class _TaskAgent(ChatAgent):
-    def get_name(self) -> str:
-        return "task_agent"
-
-    def get_description(self) -> str:
-        return "Manages tasks: create, update, list, suggest"
-
-    def get_tools(self) -> list[Any]:
-        return []
-
-    async def handle(self, query: str, context: dict[str, Any]) -> str:
-        return f"task: {query}"
-
-
-class _CalendarAgent(ChatAgent):
-    def get_name(self) -> str:
-        return "calendar_agent"
-
-    def get_description(self) -> str:
-        return "Calendar management: events, conflicts, scheduling"
-
-    def get_tools(self) -> list[Any]:
-        return []
-
-    async def handle(self, query: str, context: dict[str, Any]) -> str:
-        return f"calendar: {query}"
-
-
-# ── Helpers ──────────────────────────────────────────────────────────
-
-
-def _mock_llm(response_text: str) -> MagicMock:
-    """Return a mock LLM that always produces *response_text*."""
-    msg = MagicMock()
-    msg.content = response_text
-    llm = MagicMock()
-    llm.ainvoke = AsyncMock(return_value=msg)
-    return llm
-
-
-# ── Fixtures ─────────────────────────────────────────────────────────
-
-
-@pytest.fixture(autouse=True)
-def _fresh_registry():
-    """Reset the AgentRegistry singleton between tests."""
-    AgentRegistry._instance = None
-    yield
-    AgentRegistry._instance = None
-
-
-@pytest.fixture()
-def reg() -> AgentRegistry:
-    r = AgentRegistry()
-    r.register(_TaskAgent)
-    r.register(_CalendarAgent)
-    return r
-
-
-# ── classify_intent ───────────────────────────────────────────────────
-
-
-class TestClassifyIntent:
-    @pytest.mark.asyncio
-    async def test_routes_to_known_agent(self, reg: AgentRegistry) -> None:
-        with patch("app.core.orchestrator._make_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("task_agent")
-            result = await classify_intent("add a task", {}, reg)
-        assert result == "task_agent"
-
-    @pytest.mark.asyncio
-    async def test_routes_to_calendar_agent(self, reg: AgentRegistry) -> None:
-        with patch("app.core.orchestrator._make_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("calendar_agent")
-            result = await classify_intent("schedule a meeting", {}, reg)
-        assert result == "calendar_agent"
-
-    @pytest.mark.asyncio
-    async def test_falls_back_on_unknown_name(self, reg: AgentRegistry) -> None:
-        with patch("app.core.orchestrator._make_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("nonexistent_agent")
-            result = await classify_intent("do something", {}, reg)
-        assert result == "task_agent"
-
-    @pytest.mark.asyncio
-    async def test_empty_registry_returns_fallback_without_llm_call(self) -> None:
-        empty_reg = AgentRegistry()
-        # No LLM should be instantiated — early return path
-        with patch("app.core.orchestrator._make_llm") as mock_cls:
-            result = await classify_intent("anything", {}, empty_reg)
-            mock_cls.assert_not_called()
-        assert result == "task_agent"
-
-    @pytest.mark.asyncio
-    async def test_whitespace_stripped_from_response(self, reg: AgentRegistry) -> None:
-        with patch("app.core.orchestrator._make_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("  task_agent  \n")
-            result = await classify_intent("create task", {}, reg)
-        assert result == "task_agent"
-
-
-# ── route_single ─────────────────────────────────────────────────────
-
-
-class TestRouteSingle:
-    @pytest.mark.asyncio
-    async def test_returns_chat_response(self, reg: AgentRegistry) -> None:
-        result = await route_single("task_agent", "create a task", {}, reg)
-        assert isinstance(result, ChatResponse)
-
-    @pytest.mark.asyncio
-    async def test_response_contains_agent_output(self, reg: AgentRegistry) -> None:
-        result = await route_single("task_agent", "create a task", {}, reg)
-        assert result.response == "task: create a task"
-
-    @pytest.mark.asyncio
-    async def test_unknown_agent_raises_key_error(self, reg: AgentRegistry) -> None:
-        with pytest.raises(KeyError):
-            await route_single("nonexistent", "hello", {}, reg)
-
-    @pytest.mark.asyncio
-    async def test_actions_default_empty(self, reg: AgentRegistry) -> None:
-        result = await route_single("task_agent", "hi", {}, reg)
-        assert result.actions == []
-
-
-# ── route_pipeline ────────────────────────────────────────────────────
-
-
-class TestRoutePipeline:
-    @pytest.mark.asyncio
-    async def test_returns_chat_response(self, reg: AgentRegistry) -> None:
-        with patch("app.core.orchestrator._make_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("synthesized result")
-            result = await route_pipeline(
-                ["task_agent", "calendar_agent"], "plan my week", {}, reg
-            )
-        assert isinstance(result, ChatResponse)
-
-    @pytest.mark.asyncio
-    async def test_response_is_synthesis_output(self, reg: AgentRegistry) -> None:
-        with patch("app.core.orchestrator._make_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("synthesized result")
-            result = await route_pipeline(
-                ["task_agent", "calendar_agent"], "plan my week", {}, reg
-            )
-        assert result.response == "synthesized result"
-
-    @pytest.mark.asyncio
-    async def test_passes_previous_results_to_subsequent_agents(
-        self, reg: AgentRegistry
-    ) -> None:
-        """Each agent after the first should receive prior outputs in context."""
-        received_contexts: list[dict[str, Any]] = []
-
-        class _CapturingAgent(ChatAgent):
-            def get_name(self) -> str:
-                return "capture"
-
-            def get_description(self) -> str:
-                return "captures context for testing"
-
-            def get_tools(self) -> list[Any]:
-                return []
-
-            async def handle(self, query: str, context: dict[str, Any]) -> str:
-                received_contexts.append(dict(context))
-                return "captured"
-
-        reg.register(_CapturingAgent)
-
-        with patch("app.core.orchestrator._make_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("done")
-            await route_pipeline(["task_agent", "capture"], "hi", {}, reg)
-
-        # The second agent (capture) must have received previous results
-        assert len(received_contexts) == 1
-        assert "previous_results" in received_contexts[0]
-        assert received_contexts[0]["previous_results"] == ["task: hi"]
-
-    @pytest.mark.asyncio
-    async def test_single_agent_pipeline(self, reg: AgentRegistry) -> None:
-        with patch("app.core.orchestrator._make_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("single result")
-            result = await route_pipeline(["task_agent"], "one agent", {}, reg)
-        assert result.response == "single result"
-
-
-# ── orchestrate ───────────────────────────────────────────────────────
-
-
-class TestOrchestrate:
-    @pytest.mark.asyncio
-    async def test_direct_mode_returns_chat_response(
-        self, reg: AgentRegistry
-    ) -> None:
-        with patch("app.core.orchestrator._make_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("task_agent")
-            request = ChatRequest(message="add a task", execution_mode="direct")
-            result = await orchestrate(request, reg)
-        assert isinstance(result, ChatResponse)
-
-    @pytest.mark.asyncio
-    async def test_direct_mode_response_content(self, reg: AgentRegistry) -> None:
-        with patch("app.core.orchestrator._make_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("task_agent")
-            request = ChatRequest(message="add a task", execution_mode="direct")
-            result = await orchestrate(request, reg)
-        assert isinstance(result, ChatResponse)
-        assert result.response == "task: add a task"
-
-    @pytest.mark.asyncio
-    async def test_plan_mode_returns_execution_plan(
-        self, reg: AgentRegistry
-    ) -> None:
-        with patch("app.core.orchestrator._make_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("task_agent")
-            request = ChatRequest(message="plan my tasks", execution_mode="plan")
-            result = await orchestrate(request, reg)
-        assert isinstance(result, ExecutionPlan)
-
-    @pytest.mark.asyncio
-    async def test_plan_mode_agent_matches_classified(
-        self, reg: AgentRegistry
-    ) -> None:
-        with patch("app.core.orchestrator._make_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("calendar_agent")
-            request = ChatRequest(
-                message="schedule something", execution_mode="plan"
-            )
-            result = await orchestrate(request, reg)
-        assert isinstance(result, ExecutionPlan)
-        assert result.agent == "calendar_agent"
-
-    @pytest.mark.asyncio
-    async def test_plan_mode_has_steps(self, reg: AgentRegistry) -> None:
-        with patch("app.core.orchestrator._make_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("task_agent")
-            request = ChatRequest(message="plan tasks", execution_mode="plan")
-            result = await orchestrate(request, reg)
-        assert isinstance(result, ExecutionPlan)
-        assert len(result.steps) >= 1
-
-    @pytest.mark.asyncio
-    async def test_plan_mode_template_id_contains_agent_name(
-        self, reg: AgentRegistry
-    ) -> None:
-        with patch("app.core.orchestrator._make_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("task_agent")
-            request = ChatRequest(message="plan tasks", execution_mode="plan")
-            result = await orchestrate(request, reg)
-        assert isinstance(result, ExecutionPlan)
-        assert result.steps[0].prompt_template is not None
-        assert "task_agent" in result.steps[0].prompt_template
-
-    @pytest.mark.asyncio
-    async def test_default_execution_mode_is_direct(
-        self, reg: AgentRegistry
-    ) -> None:
-        with patch("app.core.orchestrator._make_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("task_agent")
-            # execution_mode defaults to "direct"
-            request = ChatRequest(message="help me")
-            result = await orchestrate(request, reg)
-        assert isinstance(result, ChatResponse)
-
-
-# ── orchestrate_stream ────────────────────────────────────────────────
-
-
-class TestOrchestrateStream:
-    @pytest.mark.asyncio
-    async def test_yields_at_least_one_chunk(self, reg: AgentRegistry) -> None:
-        with patch("app.core.orchestrator._make_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("task_agent")
-            request = ChatRequest(message="add a task", execution_mode="direct")
-            chunks = [chunk async for chunk in orchestrate_stream(request, reg)]
-        assert len(chunks) >= 1
-
-    @pytest.mark.asyncio
-    async def test_all_chunks_are_plain_text(
-        self, reg: AgentRegistry
-    ) -> None:
-        with patch("app.core.orchestrator._make_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("task_agent")
-            request = ChatRequest(message="add a task", execution_mode="direct")
-            chunks = [chunk async for chunk in orchestrate_stream(request, reg)]
-
-        # orchestrate_stream yields plain text chunks only — no JSON final frame
-        for chunk in chunks:
-            assert isinstance(chunk, str)
-
-    @pytest.mark.asyncio
-    async def test_concatenated_chunks_equal_full_response(
-        self, reg: AgentRegistry
-    ) -> None:
-        with patch("app.core.orchestrator._make_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("task_agent")
-            request = ChatRequest(message="create a task", execution_mode="direct")
-            chunks = [chunk async for chunk in orchestrate_stream(request, reg)]
-
-        full_text = "".join(chunks)
-        assert full_text == "task: create a task"
-
-    @pytest.mark.asyncio
-    async def test_text_chunks_before_final_frame(
-        self, reg: AgentRegistry
-    ) -> None:
-        with patch("app.core.orchestrator._make_llm") as mock_cls:
-            mock_cls.return_value = _mock_llm("task_agent")
-            request = ChatRequest(
-                message="x" * 200, execution_mode="direct"
-            )  # long enough to produce multiple chunks
-            chunks = [chunk async for chunk in orchestrate_stream(request, reg)]
-
-        # All but the last chunk should be plain text (not valid final JSON)
-        non_final = chunks[:-1]
-        for chunk in non_final:
-            try:
-                parsed = json.loads(chunk)
-                assert parsed.get("done") is not True
-            except json.JSONDecodeError:
-                pass  # plain text chunk — expected
--- a/tests/test_orchestrator_v3.py
+++ b/tests/test_orchestrator_v3.py
@@ -1,236 +0,0 @@
-"""Tests for v3 orchestrator functions (Step 3)."""
-
-from __future__ import annotations
-
-import pytest
-from unittest.mock import AsyncMock, MagicMock, patch
-from typing import Any
-
-from app.core.agent_registry import ChatAgent, AgentRegistry
-from app.core.orchestrator import orchestrate_v3, orchestrate_v3_stream
-
-
-# ── Minimal agent for testing ─────────────────────────────────────────
-
-
-class _FixedAgent(ChatAgent):
-    def __init__(self, name: str = "_fixed", tokens: list[str] | None = None, **kwargs: Any) -> None:
-        super().__init__(**kwargs)
-        self._name = name
-        self._tokens = tokens or ["Hello", " world"]
-
-    def get_name(self) -> str:
-        return self._name
-
-    def get_description(self) -> str:
-        return "Fixed agent for tests"
-
-    def get_tools(self) -> list[Any]:
-        return []
-
-    async def handle(self, query: str, context: dict[str, Any]) -> str:
-        return "".join(self._tokens)
-
-    async def handle_stream(self, query: str, context: dict[str, Any]):
-        for tok in self._tokens:
-            yield tok
-
-
-# ── Mock registry factory ─────────────────────────────────────────────
-
-
-def _make_registry(agent_name: str, agent: ChatAgent) -> MagicMock:
-    reg = MagicMock(spec=AgentRegistry)
-    reg.list_agents.return_value = [{"name": agent_name, "description": "test"}]
-    reg.get.return_value = agent
-    return reg
-
-
-# ── orchestrate_v3 ────────────────────────────────────────────────────
-
-
-@pytest.mark.asyncio
-async def test_orchestrate_v3_returns_agent_name_and_instance():
-    agent = _FixedAgent("task_agent")
-    reg = _make_registry("task_agent", agent)
-
-    with patch("app.core.orchestrator.classify_intent", AsyncMock(return_value="task_agent")):
-        name, inst = await orchestrate_v3(
-            user_id="u-1", message="fix a bug", context={}, reg=reg
-        )
-
-    assert name == "task_agent"
-    assert inst is agent
-
-
-@pytest.mark.asyncio
-async def test_orchestrate_v3_classify_called_with_message_and_context():
-    agent = _FixedAgent("note_agent")
-    reg = _make_registry("note_agent", agent)
-    ctx = {"some": "context"}
-
-    with patch("app.core.orchestrator.classify_intent", AsyncMock(return_value="note_agent")) as mock_classify:
-        await orchestrate_v3(user_id="u-1", message="take a note", context=ctx, reg=reg)
-
-    mock_classify.assert_awaited_once()
-    call_args = mock_classify.call_args
-    assert call_args[0][0] == "take a note"
-    assert call_args[0][1] == ctx
-
-
-@pytest.mark.asyncio
-async def test_orchestrate_v3_uses_default_registry_when_none():
-    agent = _FixedAgent("task_agent")
-
-    with patch("app.core.orchestrator.classify_intent", AsyncMock(return_value="task_agent")), \
-         patch("app.core.orchestrator._default_registry") as mock_reg:
-        mock_reg.list_agents.return_value = [{"name": "task_agent", "description": ""}]
-        mock_reg.get.return_value = agent
-        name, inst = await orchestrate_v3(user_id="u-1", message="hi", context={})
-
-    assert name == "task_agent"
-    assert inst is agent
-
-
-@pytest.mark.asyncio
-async def test_orchestrate_v3_get_called_with_agent_name():
-    agent = _FixedAgent("timeline_agent")
-    reg = _make_registry("timeline_agent", agent)
-
-    with patch("app.core.orchestrator.classify_intent", AsyncMock(return_value="timeline_agent")):
-        await orchestrate_v3(user_id="u-2", message="schedule", context={}, reg=reg)
-
-    reg.get.assert_called_once_with("timeline_agent")
-
-
-# ── orchestrate_v3_stream ─────────────────────────────────────────────
-
-
-async def _collect(gen) -> list[tuple[str, str]]:
-    results: list[tuple[str, str]] = []
-    async for item in gen:
-        results.append(item)
-    return results
-
-
-@pytest.mark.asyncio
-async def test_orchestrate_v3_stream_first_yield_is_domain_signal():
-    agent = _FixedAgent("task_agent", tokens=["token1"])
-    reg = _make_registry("task_agent", agent)
-
-    with patch("app.core.orchestrator.classify_intent", AsyncMock(return_value="task_agent")):
-        gen = orchestrate_v3_stream(user_id="u-1", message="hi", context={}, reg=reg)
-        results = await _collect(gen)
-
-    # First item must be (agent_name, "") — domain signal
-    assert results[0] == ("task_agent", "")
-
-
-@pytest.mark.asyncio
-async def test_orchestrate_v3_stream_yields_agent_name_with_tokens():
-    agent = _FixedAgent("task_agent", tokens=["Hello", " ", "world"])
-    reg = _make_registry("task_agent", agent)
-
-    with patch("app.core.orchestrator.classify_intent", AsyncMock(return_value="task_agent")):
-        gen = orchestrate_v3_stream(user_id="u-1", message="hi", context={}, reg=reg)
-        results = await _collect(gen)
-
-    # All items are (agent_name, token) pairs
-    assert all(name == "task_agent" for name, _ in results)
-    tokens = [tok for _, tok in results]
-    assert tokens[0] == ""  # domain signal
-    assert tokens[1:] == ["Hello", " ", "world"]
-
-
-@pytest.mark.asyncio
-async def test_orchestrate_v3_stream_different_agent():
-    agent = _FixedAgent("note_agent", tokens=["note"])
-    reg = _make_registry("note_agent", agent)
-
-    with patch("app.core.orchestrator.classify_intent", AsyncMock(return_value="note_agent")):
-        gen = orchestrate_v3_stream(user_id="u-2", message="take note", context={}, reg=reg)
-        results = await _collect(gen)
-
-    assert results[0] == ("note_agent", "")
-    assert ("note_agent", "note") in results
-
-
-@pytest.mark.asyncio
-async def test_orchestrate_v3_stream_uses_default_registry_when_none():
-    agent = _FixedAgent("task_agent", tokens=["x"])
-
-    with patch("app.core.orchestrator.classify_intent", AsyncMock(return_value="task_agent")), \
-         patch("app.core.orchestrator._default_registry") as mock_reg:
-        mock_reg.list_agents.return_value = [{"name": "task_agent", "description": ""}]
-        mock_reg.get.return_value = agent
-        gen = orchestrate_v3_stream(user_id="u-1", message="hi", context={})
-        results = await _collect(gen)
-
-    assert results[0][0] == "task_agent"
-
-
-@pytest.mark.asyncio
-async def test_orchestrate_v3_stream_empty_token_list():
-    """Agent with no tokens still emits the domain signal."""
-
-    class _EmptyAgent(_FixedAgent):
-        async def handle_stream(self, query: str, context: dict[str, Any]):
-            return
-            yield  # makes it a generator
-
-    agent = _EmptyAgent("task_agent", tokens=[])
-    reg = _make_registry("task_agent", agent)
-
-    with patch("app.core.orchestrator.classify_intent", AsyncMock(return_value="task_agent")):
-        gen = orchestrate_v3_stream(user_id="u-1", message="hi", context={}, reg=reg)
-        results = await _collect(gen)
-
-    assert results == [("task_agent", "")]  # only domain signal
-
-
-@pytest.mark.asyncio
-async def test_orchestrate_v3_stream_full_text_correct():
-    """Concatenating all non-domain tokens reconstructs the full response."""
-    tokens = ["The", " ", "task", " ", "is", " ", "done."]
-    agent = _FixedAgent("task_agent", tokens=tokens)
-    reg = _make_registry("task_agent", agent)
-
-    with patch("app.core.orchestrator.classify_intent", AsyncMock(return_value="task_agent")):
-        gen = orchestrate_v3_stream(user_id="u-1", message="hi", context={}, reg=reg)
-        results = await _collect(gen)
-
-    text = "".join(tok for _, tok in results[1:])  # skip domain signal
-    assert text == "The task is done."
-
-
-# ── handle_stream default implementation ─────────────────────────────
-
-
-@pytest.mark.asyncio
-async def test_handle_stream_default_yields_full_response():
-    """Default handle_stream yields handle() result as a single chunk."""
-
-    class _SimpleAgent(ChatAgent):
-        def get_name(self) -> str:
-            return "_simple"
-
-        def get_description(self) -> str:
-            return ""
-
-        def get_tools(self) -> list[Any]:
-            return []
-
-        async def handle(self, query: str, context: dict[str, Any]) -> str:
-            return "simple response"
-
-    agent = _SimpleAgent()
-    tokens = [tok async for tok in agent.handle_stream("q", {})]
-    assert tokens == ["simple response"]
-
-
-@pytest.mark.asyncio
-async def test_handle_stream_override_used_by_stream():
-    """_FixedAgent.handle_stream override yields individual tokens."""
-    agent = _FixedAgent("t", tokens=["a", "b", "c"])
-    tokens = [tok async for tok in agent.handle_stream("q", {})]
-    assert tokens == ["a", "b", "c"]
--- a/tests/test_output_formatter.py
+++ b/tests/test_output_formatter.py
@@ -16,15 +16,15 @@ from app.schemas import (

 # ── helpers ───────────────────────────────────────────────────────────────────

-async def _stream(*pairs: tuple[str, str]):
-    """Async generator that yields (agent_name, token) pairs."""
-    for pair in pairs:
-        yield pair
+async def _stream(*events: tuple[str, object]):
+    """Async generator that yields (event_type, data) tuples."""
+    for event in events:
+        yield event


-async def collect(formatter, token_stream):
+async def collect(formatter, event_stream):
    frames = []
-    async for frame in formatter.format(token_stream):
+    async for frame in formatter.format(event_stream):
        frames.append(frame)
    return frames

@@ -32,13 +32,14 @@ async def collect(formatter, token_stream):
 # ── HomeFormatter ─────────────────────────────────────────────────────────────

@pytest.mark.asyncio
-async def test_home_formatter_text_block():
+async def test_home_formatter_text_token():
    req_id = "req-1"
-    tokens = [
-        ("task_agent", '{"type": "text", "content": "Hello world"}'),
+    events = [
+        ("token", "Hello world"),
+        ("mutations", []),
    ]
-    formatter = HomeFormatter(request_id=req_id, tool_results=[])
-    frames = await collect(formatter, _stream(*tokens))
+    formatter = HomeFormatter(request_id=req_id)
+    frames = await collect(formatter, _stream(*events))

    assert isinstance(frames[0], WsStreamStart)
    assert frames[0].request_id == req_id
@@ -48,104 +49,94 @@ async def test_home_formatter_text_block():


@pytest.mark.asyncio
-async def test_home_formatter_chart_block():
+async def test_home_formatter_entity_ref_from_tool_end():
    req_id = "req-2"
-    chart_json = (
-        '{"type": "chart", "chartType": "bar", '
-        '"title": "Tasks", "data": [{"x": 1}], '
-        '"config": {"x": {"label": "X", "color": "#fff"}}}'
-    )
-    formatter = HomeFormatter(request_id=req_id, tool_results=[])
-    frames = await collect(formatter, _stream(("task_agent", chart_json)))
+    events = [
+        ("tool_end", {"name": "task_agent", "result": "Found 3 tasks."}),
+        ("token", "Here are your tasks."),
+        ("mutations", []),
+    ]
+    formatter = HomeFormatter(request_id=req_id)
+    frames = await collect(formatter, _stream(*events))

    block_frames = [f for f in frames if isinstance(f, WsStreamBlock)]
    assert len(block_frames) == 1
-    assert block_frames[0].block_type == "chart"
-    assert block_frames[0].data["chartType"] == "bar"
+    assert block_frames[0].block_type == "entity_ref"
+    assert block_frames[0].data["entity"] == "tasks"
+    assert block_frames[0].data["result"] == "Found 3 tasks."


@pytest.mark.asyncio
-async def test_home_formatter_invalid_chart_skipped():
+async def test_home_formatter_unknown_agent_no_block():
    req_id = "req-3"
-    bad_chart = '{"type": "chart", "chartType": "unknown", "data": []}'
-    formatter = HomeFormatter(request_id=req_id, tool_results=[])
-    frames = await collect(formatter, _stream(("task_agent", bad_chart)))
+    events = [
+        ("tool_end", {"name": "unknown_agent", "result": "stuff"}),
+        ("mutations", []),
+    ]
+    formatter = HomeFormatter(request_id=req_id)
+    frames = await collect(formatter, _stream(*events))

    block_frames = [f for f in frames if isinstance(f, WsStreamBlock)]
-    assert len(block_frames) == 0  # invalid chart skipped
+    assert len(block_frames) == 0  # unknown agent → no entity mapping


@pytest.mark.asyncio
-async def test_home_formatter_entity_ref_resolved():
+async def test_home_formatter_mutations_in_stream_end():
    req_id = "req-4"
-    tool_results = [{"entity": "task", "id": "t1", "title": "My Task"}]
-    entity_json = '{"type": "entity_ref", "entity": "task"}'
-    formatter = HomeFormatter(request_id=req_id, tool_results=tool_results)
-    frames = await collect(formatter, _stream(("task_agent", entity_json)))
+    muts = [{"action": "insert", "table": "tasks", "data": {"id": "t1"}}]
+    events = [
+        ("token", "Done"),
+        ("mutations", muts),
+    ]
+    formatter = HomeFormatter(request_id=req_id)
+    frames = await collect(formatter, _stream(*events))

-    block_frames = [f for f in frames if isinstance(f, WsStreamBlock)]
-    assert len(block_frames) == 1
-    assert block_frames[0].data["entity"] == "task"
-    assert block_frames[0].data["items"][0]["id"] == "t1"
-
-
-@pytest.mark.asyncio
-async def test_home_formatter_entity_ref_missing_skipped():
-    req_id = "req-5"
-    entity_json = '{"type": "entity_ref", "entity": "task"}'
-    formatter = HomeFormatter(request_id=req_id, tool_results=[])
-    frames = await collect(formatter, _stream(("task_agent", entity_json)))
-
-    block_frames = [f for f in frames if isinstance(f, WsStreamBlock)]
-    assert len(block_frames) == 0  # no tool results → skipped
-
-
-@pytest.mark.asyncio
-async def test_home_formatter_table_block():
-    req_id = "req-6"
-    table_json = '{"type": "table", "headers": ["A", "B"], "rows": [["1", "2"]]}'
-    formatter = HomeFormatter(request_id=req_id, tool_results=[])
-    frames = await collect(formatter, _stream(("task_agent", table_json)))
-
-    block_frames = [f for f in frames if isinstance(f, WsStreamBlock)]
-    assert len(block_frames) == 1
-    assert block_frames[0].block_type == "table"
-
-
-@pytest.mark.asyncio
-async def test_home_formatter_timeline_block():
-    req_id = "req-7"
-    timeline_json = '{"type": "timeline", "timelines": [{"id": "c1", "title": "M1", "date": 123}]}'
-    formatter = HomeFormatter(request_id=req_id, tool_results=[])
-    frames = await collect(formatter, _stream(("task_agent", timeline_json)))
-
-    block_frames = [f for f in frames if isinstance(f, WsStreamBlock)]
-    assert len(block_frames) == 1
-    assert block_frames[0].block_type == "timeline"
+    end_frame = frames[-1]
+    assert isinstance(end_frame, WsStreamEnd)
+    assert len(end_frame.mutations) == 1
+    assert end_frame.mutations[0]["action"] == "insert"


@pytest.mark.asyncio
 async def test_home_formatter_frame_order():
    """stream_start is first, stream_end is last."""
-    req_id = "req-8"
-    formatter = HomeFormatter(request_id=req_id, tool_results=[])
-    frames = await collect(formatter, _stream(("task_agent", '{"type": "text", "content": "Hi"}')))
+    req_id = "req-5"
+    formatter = HomeFormatter(request_id=req_id)
+    frames = await collect(formatter, _stream(("token", "Hi"), ("mutations", [])))
    assert isinstance(frames[0], WsStreamStart)
    assert isinstance(frames[-1], WsStreamEnd)


-# ── FloatingFormatter ────────────────────────────────────────────────────────────
+@pytest.mark.asyncio
+async def test_home_formatter_multiple_tool_ends():
+    req_id = "req-6"
+    events = [
+        ("tool_end", {"name": "task_agent", "result": "3 tasks"}),
+        ("tool_end", {"name": "project_agent", "result": "2 projects"}),
+        ("token", "Overview done."),
+        ("mutations", []),
+    ]
+    formatter = HomeFormatter(request_id=req_id)
+    frames = await collect(formatter, _stream(*events))
+
+    block_frames = [f for f in frames if isinstance(f, WsStreamBlock)]
+    assert len(block_frames) == 2
+    entities = {b.data["entity"] for b in block_frames}
+    assert entities == {"tasks", "projects"}
+
+
+# ── FloatingFormatter ─────────────────────────────────────────────────────────

@pytest.mark.asyncio
-async def test_floating_formatter_domain_emitted_first():
+async def test_floating_formatter_domain_from_tool_end():
    req_id = "pop-1"
    formatter = FloatingFormatter(request_id=req_id)
-    tokens = [
-        ("task_agent", ""),   # domain signal
-        ("task_agent", "Hello"),
-        ("task_agent", " there"),
+    events = [
+        ("tool_end", {"name": "task_agent", "result": "ok"}),
+        ("token", "Hello"),
+        ("mutations", []),
    ]
-    frames = await collect(formatter, _stream(*tokens))
+    frames = await collect(formatter, _stream(*events))

    assert isinstance(frames[0], WsFloatingDomain)
    assert frames[0].domain == "tasks"
@@ -156,8 +147,12 @@ async def test_floating_formatter_domain_emitted_first():
 async def test_floating_formatter_text_only():
    req_id = "pop-2"
    formatter = FloatingFormatter(request_id=req_id)
-    tokens = [("timeline_agent", ""), ("timeline_agent", "Summary")]
-    frames = await collect(formatter, _stream(*tokens))
+    events = [
+        ("tool_end", {"name": "timeline_agent", "result": "done"}),
+        ("token", "Summary"),
+        ("mutations", []),
+    ]
+    frames = await collect(formatter, _stream(*events))

    assert isinstance(frames[0], WsFloatingDomain)
    assert frames[0].domain == "timelines"
@@ -171,11 +166,12 @@ async def test_floating_formatter_no_block_frames():
    """FloatingFormatter must never emit WsStreamBlock."""
    req_id = "pop-3"
    formatter = FloatingFormatter(request_id=req_id)
-    tokens = [
-        ("note_agent", ""),
-        ("note_agent", '{"type": "chart", "chartType": "bar", "data": []}'),
+    events = [
+        ("tool_end", {"name": "note_agent", "result": "data"}),
+        ("token", "some text"),
+        ("mutations", []),
    ]
-    frames = await collect(formatter, _stream(*tokens))
+    frames = await collect(formatter, _stream(*events))
    assert not any(isinstance(f, WsStreamBlock) for f in frames)


@@ -183,13 +179,37 @@ async def test_floating_formatter_no_block_frames():
 async def test_floating_formatter_end_frame():
    req_id = "pop-4"
    formatter = FloatingFormatter(request_id=req_id)
-    frames = await collect(formatter, _stream(("project_agent", ""), ("project_agent", "Done")))
+    events = [
+        ("tool_end", {"name": "project_agent", "result": "ok"}),
+        ("token", "Done"),
+        ("mutations", []),
+    ]
+    frames = await collect(formatter, _stream(*events))
    assert isinstance(frames[-1], WsStreamEnd)


@pytest.mark.asyncio
-async def test_floating_formatter_unknown_agent_defaults_to_tasks():
+async def test_floating_formatter_default_domain_on_early_token():
+    """When the first event is a token (no tool_end yet), default to 'tasks'."""
    req_id = "pop-5"
    formatter = FloatingFormatter(request_id=req_id)
-    frames = await collect(formatter, _stream(("unknown_agent", ""), ("unknown_agent", "hi")))
+    events = [("token", "hi"), ("mutations", [])]
+    frames = await collect(formatter, _stream(*events))
+    assert isinstance(frames[0], WsFloatingDomain)
    assert frames[0].domain == "tasks"
+
+
+@pytest.mark.asyncio
+async def test_floating_formatter_mutations_in_stream_end():
+    req_id = "pop-6"
+    muts = [{"action": "update", "table": "tasks", "data": {"id": "t2"}}]
+    events = [
+        ("token", "Updated"),
+        ("mutations", muts),
+    ]
+    formatter = FloatingFormatter(request_id=req_id)
+    frames = await collect(formatter, _stream(*events))
+
+    end_frame = frames[-1]
+    assert isinstance(end_frame, WsStreamEnd)
+    assert len(end_frame.mutations) == 1
--- a/tests/test_plugins.py
+++ b/tests/test_plugins.py
@@ -88,7 +88,7 @@ class TestPluginRegistry:
    async def test_list_filter_by_query(
        self, reg: PluginRegistry, db_session: AsyncSession, seed_plugins: list[Plugin]
    ) -> None:
-        result = await reg.list_plugins(db_session, query="time")
+        result = await reg.list_plugins(db_session, query="time tracker")
        assert result.total == 1
        assert result.plugins[0].id == "plugin-time-tracker"

--- a/tests/test_ws_unified.py
+++ b/tests/test_ws_unified.py
@@ -45,14 +45,16 @@ def _recv_until_end(ws, max_frames: int = 20) -> list[dict]:
    return frames


-async def _mock_home_stream(user_id, message, context, reg=None):
-    yield "task_agent", ""
-    yield "task_agent", '{"type": "text", "content": "Hello"}'
+async def _mock_home_stream(user_id, message, context, db_session_factory=None):
+    yield "tool_end", {"name": "task_agent", "result": "Found tasks"}
+    yield "token", "Hello"
+    yield "mutations", []


-async def _mock_floating_stream(user_id, message, context, reg=None):
-    yield "task_agent", ""
-    yield "task_agent", "Here is a summary"
+async def _mock_floating_stream(user_id, message, context, scope=None, db_session_factory=None):
+    yield "tool_end", {"name": "task_agent", "result": "ok"}
+    yield "token", "Here is a summary"
+    yield "mutations", []


 # ── tests ─────────────────────────────────────────────────────────────────────
@@ -61,7 +63,7 @@ def test_home_request_produces_stream_frames(client):
    """home_request → stream_start, stream_text+, stream_end."""
    token = make_jwt("power", user_id=USER_ID)

-    with patch("app.api.routes.device_ws.orchestrate_v3_stream", side_effect=_mock_home_stream):
+    with patch("app.api.routes.device_ws.run_home_stream", side_effect=_mock_home_stream):
        with client.websocket_connect(f"/api/v1/ws/device?token={token}") as ws:
            ws.send_text(json.dumps({
                "type": "device_hello", "device_id": "dev-1", "agent_ids": []
@@ -84,7 +86,7 @@ def test_floating_request_produces_domain_frame(client):
    """floating_request → floating_domain first, then stream_text*, stream_end."""
    token = make_jwt("power", user_id=USER_ID)

-    with patch("app.api.routes.device_ws.orchestrate_v3_stream", side_effect=_mock_floating_stream):
+    with patch("app.api.routes.device_ws.run_floating_stream", side_effect=_mock_floating_stream):
        with client.websocket_connect(f"/api/v1/ws/device?token={token}") as ws:
            ws.send_text(json.dumps({
                "type": "device_hello", "device_id": "dev-2", "agent_ids": []
@@ -112,11 +114,12 @@ def test_home_request_request_id_propagated(client):
    token = make_jwt("power", user_id=USER_ID)
    req_id = "my-unique-req-id"

-    async def _stream(user_id, message, context, reg=None):
-        yield "note_agent", ""
-        yield "note_agent", '{"type": "text", "content": "ok"}'
+    async def _stream(user_id, message, context, db_session_factory=None):
+        yield "tool_end", {"name": "note_agent", "result": "ok"}
+        yield "token", "ok"
+        yield "mutations", []

-    with patch("app.api.routes.device_ws.orchestrate_v3_stream", side_effect=_stream):
+    with patch("app.api.routes.device_ws.run_home_stream", side_effect=_stream):
        with client.websocket_connect(f"/api/v1/ws/device?token={token}") as ws:
            ws.send_text(json.dumps({
                "type": "device_hello", "device_id": "dev-3", "agent_ids": []