refactor local directory agent to two-phase LLM-with-tools architecture

Replace the single-pass FE-driven agent_run/agent_data flow with a BE-orchestrated two-phase execution using LangChain tool-calling: - Phase 1 (Triage): explores directory via new filesystem tools, matches files to existing projects using PROJECT_TOOLS - Phase 2 (Processing): reads files and performs CRUD per project group with clean LLM context windows Key changes: - Add filesystem_agent.py with list_directory, read_file_content, get_file_metadata tools using execute_on_client() - Move setup journey from REST to WebSocket (journey_start/message frames) - Add batch_runs_per_day billing limit and enforce in /trigger - Remove deprecated agent_data/agent_complete frame handlers and queues Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 08:50:46 +01:00
parent 5faa6b1d7c
commit 826f64d6bb
10 changed files with 801 additions and 569 deletions
--- a/app/agents/init.py
+++ b/app/agents/init.py
@@ -1,5 +1,5 @@
 """Expose tool modules used by deep orchestrator-worker graphs."""
-from app.agents import timeline_agent, note_agent, project_agent, task_agent
+from app.agents import filesystem_agent, timeline_agent, note_agent, project_agent, task_agent
-__all__ = ["timeline_agent", "note_agent", "project_agent", "task_agent"]
+__all__ = ["filesystem_agent", "timeline_agent", "note_agent", "project_agent", "task_agent"]
--- a/app/agents/filesystem_agent.py
+++ b/app/agents/filesystem_agent.py
@@ -0,0 +1,85 @@
 """Filesystem agent — tools for reading local directories and files on Electron.
 These tools delegate to the Electron client via ``execute_on_client()`` using
 the same WS tool-call round-trip pattern as CRUD tools.  The Electron app
 handles actual disk I/O and responds with ``tool_result`` frames.
 """
 from __future__ import annotations
 from typing import Any
 from langchain_core.tools import tool
 from app.core.ws_context import execute_on_client
@tool
 async def list_directory(path: str) -> str:
    """List files and folders in a local directory on the user's device.
    Returns a formatted listing of entries with name, type (file/directory),
    and full path.
    """
    result = await execute_on_client(
        action="list_directory",
        data={"path": path},
    )
    entries: list[dict[str, Any]] = result.get("entries", [])
    if not entries:
        return f"Directory '{path}' is empty or does not exist."
    lines: list[str] = []
    for entry in entries:
        entry_type = entry.get("type", "unknown")
        entry_name = entry.get("name", "")
        entry_path = entry.get("path", "")
        lines.append(f"- [{entry_type}] {entry_name}  ({entry_path})")
    return f"Directory listing for '{path}' ({len(entries)} entries):\n" + "\n".join(lines)
@tool
 async def read_file_content(path: str) -> str:
    """Read the text content of a local file on the user's device.
    Returns the file content as a string.  Large files may be truncated
    by the Electron client.
    """
    result = await execute_on_client(
        action="read_file_content",
        data={"path": path},
    )
    content: str = result.get("content", "")
    if not content:
        return f"File '{path}' is empty or could not be read."
    return content
@tool
 async def get_file_metadata(path: str) -> str:
    """Get metadata for a local file: size, creation date, modification date, extension.
    Returns a formatted summary of the file's metadata.
    """
    result = await execute_on_client(
        action="get_file_metadata",
        data={"path": path},
    )
    size = result.get("size", "unknown")
    created = result.get("createdAt", "unknown")
    modified = result.get("modifiedAt", "unknown")
    extension = result.get("extension", "unknown")
    name = result.get("name", path)
    return (
        f"File: {name}\n"
        f"  Extension: {extension}\n"
        f"  Size: {size} bytes\n"
        f"  Created: {created}\n"
        f"  Modified: {modified}"
    )
 FILESYSTEM_TOOLS: list[Any] = [
    list_directory,
    read_file_content,
    get_file_metadata,
 ]
--- a/app/api/routes/agent_setup.py
+++ b/app/api/routes/agent_setup.py
@@ -1,54 +1,40 @@
-"""Chatbot Journey endpoints — guided conversation to build an agent prompt_template.
+"""Chatbot Journey — WS-based guided conversation to build an agent prompt_template.
-Endpoints:
+The journey is driven entirely through WebSocket frames (no REST endpoints).
-  POST /agents/journey/start    — start a new journey session
+The device WS handler dispatches ``journey_start`` and ``journey_message``
-  POST /agents/journey/message  — continue the conversation
+frames to the functions exported here.
 Sessions are stored in-memory with a 30-minute TTL.  Stale entries are
 cleaned up lazily on access.  Upgrade to Redis for multi-instance deployments.
 Journey flow:
-  1. Client sends ``{ agent_type, agent_id? }`` to ``/start``.
+  1. FE sends ``journey_start`` frame with basic agent config (directory,
-  2. Server creates a session, calls the LLM with a contextual system prompt,
+     data_types, schedule).
-     and returns the first question.
+  2. Server creates an in-memory session, sets up a WS executor so the
-  3. Client sends follow-up messages to ``/message``.
+     setup LLM can use file-system tools, does a first directory scrape,
-  4. After 3-5 turns the LLM wraps up by emitting a ``prompt_template`` block
+     and sends back a ``journey_reply`` with the first question.
-     delimited by ``PROMPT_TEMPLATE_START`` / ``PROMPT_TEMPLATE_END``.
+  3. FE sends ``journey_message`` frames for each user reply.
-  5. Server parses the block, sets ``done=True``, and returns the template.
+  4. Server appends the user message, calls the LLM (which may read files
-
+     via tools), and sends back a ``journey_reply``.
-The ``prompt_template`` from the final response is meant to be stored by
+  5. After 3-5 turns the LLM wraps up by emitting a ``prompt_template``
-the Electron client in local agent settings and later sent to
+     block delimited by ``PROMPT_TEMPLATE_START`` / ``PROMPT_TEMPLATE_END``.
-``POST /agents/trigger`` when a run is executed.
+  6. Server parses the block, sends ``journey_reply`` with ``done=True``
     and the template.  FE stores it locally.
 """
 from __future__ import annotations
 import json
 import logging
 import time
 import uuid
 from dataclasses import dataclass, field
 from typing import Any
-from fastapi import APIRouter, Depends, HTTPException, status
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
 from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
-from app.api.deps import get_current_user
+from app.agents.filesystem_agent import FILESYSTEM_TOOLS
 from app.core.llm import get_llm
 from app.db import get_session
 from app.models import CloudAgentConfig, LocalAgentConfig
 from app.schemas import (
    JourneyMessageRequest,
    JourneyResponse,
    JourneyStartRequest,
    UserProfile,
 )
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/agents/journey", tags=["agents"])
 # ── Session TTL ───────────────────────────────────────────────────────────
 _SESSION_TTL_SECONDS: int = 1800  # 30 minutes
@@ -59,16 +45,21 @@ _TEMPLATE_END = "PROMPT_TEMPLATE_END"
 # Maximum number of conversation turns before the LLM is nudged to wrap up.
 _MAX_TURNS: int = 5
 # Max tool-calling steps per LLM invocation.
 _MAX_TOOL_STEPS: int = 6
 # ── In-memory session store ───────────────────────────────────────────────
@dataclass
-class _JourneySession:
+class JourneySession:
    session_id: str
    user_id: str
    agent_type: str  # "local" | "cloud"
    directory: str
    data_types: list[str]
    history: list[dict[str, Any]] = field(default_factory=list)
    system_prompt: str = ""
    created_at: float = field(default_factory=time.monotonic)
    def is_expired(self) -> bool:
@@ -76,67 +67,70 @@ class _JourneySession:
 # session_id → session
-_sessions: dict[str, _JourneySession] = {}
+_sessions: dict[str, JourneySession] = {}
-def _get_session(session_id: str, user_id: str) -> _JourneySession:
+def get_journey_session(session_id: str, user_id: str) -> JourneySession | None:
-    """Retrieve session; raise 404 on missing, expired, or wrong owner."""
+    """Retrieve session; return None on missing, expired, or wrong owner."""
    s = _sessions.get(session_id)
    if s is None or s.is_expired():
        _sessions.pop(session_id, None)
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Journey session not found or expired")
+        return None
    if s.user_id != user_id:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Journey session not found or expired")
+        return None
    return s
 # ── System prompt builder ─────────────────────────────────────────────────
 _LOCAL_PREAMBLE = """\
 What kind of files are in the directories you want to monitor? \
 (for example: emails saved as .eml, documents in .pdf or .txt, markdown notes, etc.)"""
 _CLOUD_PREAMBLE = """\
 What kind of emails or messages should I look for? \
 (for example: client communications, invoices, meeting notes, project updates, etc.)"""
 _SYSTEM_PROMPT_TEMPLATE = """\
 You are a friendly assistant helping a freelancer configure a data-extraction agent.
-Your job is to understand exactly what data the user wants to extract from their {source_description} \
+Your job is to understand exactly what data the user wants to extract from their
-and produce a detailed prompt_template that a separate AI will use as its instruction set.
+local directory and produce a detailed prompt_template that a separate AI will use
 as its instruction set.
-Ask concise, focused questions one at a time.  Cover these topics (not necessarily in this order):
+You have access to file-system tools to explore the user's directory:
-  1. The type and format of the source content.
+- list_directory: to see folder structure
 - read_file_content: to peek at file contents
 - get_file_metadata: to check file info
 The user's configured directory is: {directory}
 Target data types: {data_types}
 Start by exploring the directory to understand its structure.  Then ask concise,
 focused questions one at a time.  Cover these topics (not necessarily in this order):
  1. The type and format of the source content (confirmed by your exploration).
  2. Which data types to extract: tasks, notes, timelines, and/or projects.
-  3. How fields should be mapped (e.g. email subject → task title).
+  3. How fields should be mapped (e.g. filename → task title).
  4. Priority or status rules (e.g. "urgent" keyword → high priority).
  5. Any special handling, date extraction, or exclusions.
-After 3-5 questions (when you have enough information), output the final prompt_template between \
+After 3-5 questions (when you have enough information), output the final prompt_template
-these exact markers on their own lines:
+between these exact markers on their own lines:
 {template_start}
 <the complete extraction prompt here>
 {template_end}
-The prompt_template must be a self-contained instruction for an AI that receives a document/email/message \
+The prompt_template must be a self-contained instruction for an AI that reads files
-and must return a JSON array of records in this shape:
+and must perform CRUD operations using tools to create records.  It should specify:
-  [{{ "table": "<tasks|notes|timelines|projects>", "data": {{ <field: value> }} }}, ...]
+  - What entity types to create (tasks, notes, timelines, projects).
  - How to map file content to record fields (camelCase: title, status, priority,
    dueDate, projectId, content, etc.).
  - That isAiSuggested must be set to 1 and isApproved to 0 on every record.
  - Concrete examples of mappings based on what you discovered in the directory.
 Rules for the generated template:
  - Be explicit about field names (camelCase: title, status, priority, dueDate, projectId, content, etc.).
  - Include concrete examples of mappings.
  - Mention that Electron adds id/createdAt/updatedAt automatically.
  - Set isAiSuggested: true and isApproved: false on every record.
 {existing_section}\
-Do not ask more than {max_turns} questions total. Start with your first question now.\
+Do not ask more than {max_turns} questions total.  Begin by exploring the directory,
 then ask your first question.\
 """
-def _build_system_prompt(agent_type: str, existing_template: str | None) -> str:
+def _build_system_prompt(
-    source_description = (
+    directory: str,
-        "files in local directories" if agent_type == "local" else "emails and messages from cloud providers"
+    data_types: list[str],
-    )
+    existing_template: str | None = None,
 ) -> str:
    existing_section = (
        f"\nThe user already has the following prompt_template — refine it based on their answers:\n"
        f"---\n{existing_template}\n---\n"
@@ -144,7 +138,8 @@ def _build_system_prompt(agent_type: str, existing_template: str | None) -> str:
        else ""
    )
    return _SYSTEM_PROMPT_TEMPLATE.format(
-        source_description=source_description,
+        directory=directory,
        data_types=", ".join(data_types),
        template_start=_TEMPLATE_START,
        template_end=_TEMPLATE_END,
        existing_section=existing_section,
@@ -152,10 +147,6 @@ def _build_system_prompt(agent_type: str, existing_template: str | None) -> str:
    )
 def _first_question(agent_type: str) -> str:
    return _LOCAL_PREAMBLE if agent_type == "local" else _CLOUD_PREAMBLE
 # ── Template extraction ───────────────────────────────────────────────────
@@ -168,11 +159,37 @@ def _extract_template(text: str) -> str | None:
    return text[start_idx:end_idx].strip() or None
-# ── LLM call ─────────────────────────────────────────────────────────────
+# ── LLM call with tool support ───────────────────────────────────────────
-async def _call_llm(system_prompt: str, history: list[dict[str, Any]]) -> str:
+def _as_text(content: Any) -> str:
-    """Build LangChain messages from history and invoke the LLM."""
+    if content is None:
        return ""
    if isinstance(content, str):
        return content
    if isinstance(content, list):
        parts: list[str] = []
        for item in content:
            if isinstance(item, str):
                parts.append(item)
            elif isinstance(item, dict):
                text = item.get("text")
                if isinstance(text, str):
                    parts.append(text)
        return "".join(parts)
    return str(content)
 async def _call_llm_with_tools(
    system_prompt: str,
    history: list[dict[str, Any]],
    tools: list[Any],
 ) -> str:
    """Build LangChain messages from history and invoke the LLM with tools.
    Handles tool-calling loops: if the LLM calls tools, execute them and
    continue until a final text response is produced.
    """
    messages: list[Any] = [SystemMessage(content=system_prompt)]
    for turn in history:
        if turn["role"] == "user":
@@ -181,126 +198,161 @@ async def _call_llm(system_prompt: str, history: list[dict[str, Any]]) -> str:
            messages.append(AIMessage(content=turn["content"]))
    llm = get_llm(model=None, temperature=0.4)
-    response = await llm.ainvoke(messages)
+    llm_with_tools = llm.bind_tools(tools)
-    return response.content  # type: ignore[return-value]
+    tool_map = {tool_def.name: tool_def for tool_def in tools}
    for _ in range(_MAX_TOOL_STEPS):
        response: AIMessage = await llm_with_tools.ainvoke(messages)
        messages.append(response)
        if not response.tool_calls:
            return _as_text(response.content)
        for call in response.tool_calls:
            call_name = str(call.get("name", ""))
            call_args = call.get("args", {})
            logger.info(
                "agent_setup: journey tool_call name=%s args=%s",
                call_name,
                json.dumps(call_args, ensure_ascii=True)[:500],
            )
            tool_fn = tool_map.get(call_name)
            if tool_fn is None:
                tool_output = f"Unknown tool: {call_name}"
            else:
                tool_output = await tool_fn.ainvoke(call_args)
            logger.info(
                "agent_setup: journey tool_result name=%s output=%s",
                call_name,
                str(tool_output)[:800],
            )
            messages.append(ToolMessage(content=str(tool_output), tool_call_id=call["id"]))
    # Fallback: exceeded max steps.
    final = await llm.ainvoke(messages)
    return _as_text(final.content)
-# ── Existing-config loader ────────────────────────────────────────────────
+# ── Journey handlers (called from device_ws.py) ──────────────────────────
-async def _load_existing_template(
+async def handle_journey_start(
    agent_id: str,
    user_id: str,
-    db: AsyncSession,
+    frame: dict[str, Any],
-) -> str | None:
+) -> dict[str, Any]:
-    """Return the prompt_template of an existing agent config, or None."""
+    """Handle a ``journey_start`` WS frame.
    # Try local first, then cloud.
    local_result = await db.execute(
        select(LocalAgentConfig).where(
            LocalAgentConfig.id == agent_id,
            LocalAgentConfig.user_id == user_id,
        )
    )
    local = local_result.scalar_one_or_none()
    if local is not None:
        return local.prompt_template
-    cloud_result = await db.execute(
+    Creates a session, runs the setup LLM with directory exploration,
-        select(CloudAgentConfig).where(
+    and returns the ``journey_reply`` payload.
            CloudAgentConfig.id == agent_id,
            CloudAgentConfig.user_id == user_id,
        )
    )
    cloud = cloud_result.scalar_one_or_none()
    return cloud.prompt_template if cloud is not None else None
 # ── Routes ────────────────────────────────────────────────────────────────
@router.post("/start", response_model=JourneyResponse, status_code=status.HTTP_200_OK)
 async def start_journey(
    body: JourneyStartRequest,
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> JourneyResponse:
    """Start a new Chatbot Journey session.
    If ``agent_id`` is provided the session is pre-seeded with the existing
    agent's ``prompt_template`` so the user can refine it.
    """
-    # Load existing template (may be None).
+    agent_type = frame.get("agent_type", "local")
-    existing_template: str | None = None
+    directory = frame.get("directory", "")
-    if body.agent_id:
+    data_types = frame.get("data_types", [])
-        existing_template = await _load_existing_template(body.agent_id, current_user.id, db)
+    existing_template = frame.get("existing_template")
        # If agent_id was given but not found, proceed without seeding (don't 404 —
        # the user may be starting a fresh journey for a not-yet-persisted config).
    system_prompt = _build_system_prompt(body.agent_type, existing_template)
    first_question = _first_question(body.agent_type)
    session_id = str(uuid.uuid4())
-    session = _JourneySession(
+    system_prompt = _build_system_prompt(directory, data_types, existing_template)
    session = JourneySession(
        session_id=session_id,
-        user_id=current_user.id,
+        user_id=user_id,
-        agent_type=body.agent_type,
+        agent_type=agent_type,
-        # Seed history with the AI's first question so it stays consistent.
+        directory=directory,
-        history=[{"role": "assistant", "content": first_question}],
+        data_types=data_types,
        system_prompt=system_prompt,
    )
-    # Store the system prompt inside the session for reuse in /message.
+
-    session.__dict__["_system_prompt"] = system_prompt  # type: ignore[index]
+    # The LLM will explore the directory using FILESYSTEM_TOOLS via the
    # ws_context executor (already set by the WS handler before calling us).
    ai_reply = await _call_llm_with_tools(
        system_prompt=system_prompt,
        history=[],
        tools=list(FILESYSTEM_TOOLS),
    )
    session.history.append({"role": "assistant", "content": ai_reply})
    _sessions[session_id] = session
-    logger.info("Journey session %s started for user %s (agent_type=%s)", session_id, current_user.id, body.agent_type)
+    logger.info(
-    return JourneyResponse(session_id=session_id, message=first_question, done=False)
+        "agent_setup: journey session %s started for user %s (directory=%s)",
        session_id,
        user_id,
        directory,
    )
-
+    # Check if the LLM produced the template on the first turn (unlikely but possible).
@router.post("/message", response_model=JourneyResponse, status_code=status.HTTP_200_OK)
 async def send_journey_message(
    body: JourneyMessageRequest,
    current_user: UserProfile = Depends(get_current_user),
    db: AsyncSession = Depends(get_session),
 ) -> JourneyResponse:
    """Send a message in an existing Chatbot Journey session.
    The server appends the user's message to the conversation history,
    calls the LLM, and appends the AI reply.  When the LLM wraps up with a
    ``prompt_template`` block the response includes ``done=True`` and the
    extracted template.
    """
    session = _get_session(body.session_id, current_user.id)
    system_prompt: str = session.__dict__.get("_system_prompt", _build_system_prompt(session.agent_type, None))  # type: ignore[assignment]
    # Append user turn to history.
    session.history.append({"role": "user", "content": body.message})
    # Call the LLM with the full conversation so far.
    ai_reply = await _call_llm(system_prompt, session.history)
    # Append AI turn.
    session.history.append({"role": "assistant", "content": ai_reply})
    # Check if the LLM produced the final template.
    prompt_template = _extract_template(ai_reply)
    done = prompt_template is not None
    # Strip the sentinel markers from the message shown to the user.
    display_message = ai_reply
    if done:
        display_message = (
            ai_reply[: ai_reply.index(_TEMPLATE_START)].strip()
            or "Here is your agent configuration. You can save it or continue refining."
        )
        _sessions.pop(session_id, None)
    return {
        "type": "journey_reply",
        "session_id": session_id,
        "message": display_message,
        "done": done,
        "prompt_template": prompt_template,
    }
 async def handle_journey_message(
    user_id: str,
    frame: dict[str, Any],
 ) -> dict[str, Any]:
    """Handle a ``journey_message`` WS frame.
    Appends the user message, calls the LLM, and returns the
    ``journey_reply`` payload.
    """
    session_id = frame.get("session_id", "")
    message = frame.get("message", "")
    session = get_journey_session(session_id, user_id)
    if session is None:
        return {
            "type": "journey_reply",
            "session_id": session_id,
            "message": "Journey session not found or expired. Please start a new setup.",
            "done": True,
            "prompt_template": None,
        }
    # Append user turn.
    session.history.append({"role": "user", "content": message})
    # Call the LLM with tools.
    ai_reply = await _call_llm_with_tools(
        system_prompt=session.system_prompt,
        history=session.history,
        tools=list(FILESYSTEM_TOOLS),
    )
    session.history.append({"role": "assistant", "content": ai_reply})
    # Check if the LLM produced the final template.
    prompt_template = _extract_template(ai_reply)
    done = prompt_template is not None
    display_message = ai_reply
    if done:
-        logger.info("Journey session %s completed for user %s", body.session_id, current_user.id)
+        display_message = (
-        # Clean up the session immediately on completion.
+            ai_reply[: ai_reply.index(_TEMPLATE_START)].strip()
-        _sessions.pop(body.session_id, None)
+            or "Here is your agent configuration. You can save it or continue refining."
        )
        _sessions.pop(session_id, None)
        logger.info("agent_setup: journey session %s completed for user %s", session_id, user_id)
    else:
        # Nudge the LLM to wrap up after max turns.
        turns = sum(1 for t in session.history if t["role"] == "user")
        if turns >= _MAX_TURNS:
            # Add a system-level nudge as a hidden user message.
            session.history.append({
                "role": "user",
                "content": (
@@ -309,9 +361,10 @@ async def send_journey_message(
                ),
            })
-    return JourneyResponse(
+    return {
-        session_id=body.session_id,
+        "type": "journey_reply",
-        message=display_message,
+        "session_id": session_id,
-        done=done,
+        "message": display_message,
-        prompt_template=prompt_template,
+        "done": done,
-    )
+        "prompt_template": prompt_template,
    }
--- a/app/api/routes/agents.py
+++ b/app/api/routes/agents.py
@@ -13,9 +13,10 @@ from __future__ import annotations
 import asyncio
 import uuid
-from datetime import datetime
+from datetime import datetime, timedelta, timezone
 from fastapi import APIRouter, Depends, HTTPException, status
 from sqlalchemy import func, select
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.api.deps import get_current_user
@@ -80,6 +81,34 @@ def _enforce_agent_limit(tier: str, current_count: int) -> int:
    return limit
 async def _enforce_run_frequency(
    tier: str,
    user_id: str,
    db: AsyncSession,
 ) -> None:
    """Raise HTTP 402 if the user has exceeded their daily batch run limit."""
    limit: int = FEATURES.get(tier, FEATURES["free"])["batch_runs_per_day"]
    if limit == -1:
        return  # unlimited
    today_start = datetime.now(timezone.utc).replace(
        hour=0, minute=0, second=0, microsecond=0
    )
    result = await db.execute(
        select(func.count(AgentRunLog.id)).where(
            AgentRunLog.user_id == user_id,
            AgentRunLog.started_at >= today_start,
        )
    )
    runs_today: int = result.scalar_one()
    if runs_today >= limit:
        raise HTTPException(
            status_code=status.HTTP_402_PAYMENT_REQUIRED,
            detail=f"Daily batch run limit ({limit}) reached for your tier. Upgrade for more runs.",
        )
 # ── Catalog ───────────────────────────────────────────────────────────
@router.get("/catalog", response_model=list[AgentCatalogItem])
@@ -157,11 +186,12 @@ async def trigger_agent_run(
 ) -> AgentRunLogResponse:
    """Trigger a local agent run using client-provided configuration."""
    _enforce_agent_limit(current_user.tier, body.active_agents)
    await _enforce_run_frequency(current_user.tier, current_user.id, db)
    config = LocalAgentConfig(
        id=str(uuid.uuid4()),
        user_id=current_user.id,
-        device_id="",
+        device_id=body.device_id,
        name="Local Directory Monitor",
        directory_paths=[body.directory],
        data_types=_to_data_types(body.what_to_extract),
--- a/app/api/routes/device_ws.py
+++ b/app/api/routes/device_ws.py
@@ -14,11 +14,11 @@ Protocol:
  4. Session enters message dispatch loop + heartbeat.
 Incoming frame dispatch:
-  - ``tool_result``    → resolves a pending tool-call Future.
+  - ``tool_result``      → resolves a pending tool-call Future.
-  - ``agent_data``     → enqueued in the per-run agent data queue.
+  - ``journey_start``    → starts a guided setup journey session.
-  - ``agent_complete`` → sends None sentinel to close the queue stream.
+  - ``journey_message``  → continues a journey conversation.
-  - ``pong``           → heartbeat acknowledgement (updates last-seen).
+  - ``pong``             → heartbeat acknowledgement (updates last-seen).
-  - unknown types      → logged, ignored.
+  - unknown types        → logged, ignored.
 Outgoing heartbeat: ``{ "type": "ping" }`` every 30 s.
@@ -39,6 +39,7 @@ from fastapi import APIRouter, WebSocket, WebSocketDisconnect
 from jose import JWTError, jwt
 from sqlalchemy import update
 from app.api.routes.agent_setup import handle_journey_message, handle_journey_start
 from app.config.settings import settings
 from app.core.agent_runner import trigger_pending_runs
 from app.core.deep_agent import run_floating_stream, run_home_stream
@@ -147,37 +148,6 @@ async def _message_loop(websocket: WebSocket, user_id: str) -> None:
                    "device_ws: tool_result missing id from user=%s", user_id
                )
        elif frame_type == WsFrameType.agent_data:
            run_id = frame.get("run_id")
            if run_id:
                try:
                    queue = device_manager.get_agent_data_queue(user_id, run_id)
                    await queue.put(frame)
                except RuntimeError:
                    logger.warning(
                        "device_ws: agent_data for unknown run user=%s run=%s",
                        user_id,
                        run_id,
                    )
            else:
                logger.warning(
                    "device_ws: agent_data missing run_id from user=%s", user_id
                )
        elif frame_type == WsFrameType.agent_complete:
            run_id = frame.get("run_id")
            if run_id:
                try:
                    queue = device_manager.get_agent_data_queue(user_id, run_id)
                    # Sentinel: signals the agent data stream is finished.
                    await queue.put(None)
                except RuntimeError:
                    pass
            else:
                logger.warning(
                    "device_ws: agent_complete missing run_id from user=%s", user_id
                )
        elif frame_type == WsFrameType.home_request:
            asyncio.create_task(
                _handle_home_request(websocket, user_id, frame)
@@ -188,6 +158,16 @@ async def _message_loop(websocket: WebSocket, user_id: str) -> None:
                _handle_floating_request(websocket, user_id, frame)
            )
        elif frame_type == WsFrameType.journey_start:
            asyncio.create_task(
                _handle_journey_start(websocket, user_id, frame)
            )
        elif frame_type == WsFrameType.journey_message:
            asyncio.create_task(
                _handle_journey_message(websocket, user_id, frame)
            )
        elif frame_type == "pong":
            # Heartbeat ack — nothing to do, connection is alive.
            pass
@@ -345,6 +325,63 @@ async def _handle_floating_request(
    )
 # ── v4 Journey Handlers ─────────────────────────────────────────────
 async def _handle_journey_start(
    websocket: WebSocket,
    user_id: str,
    frame: dict,
 ) -> None:
    """Handle a journey_start frame — explores directory and sends first question."""
    executor = await _make_ws_executor(websocket, user_id)
    set_client_executor(executor)
    try:
        reply = await handle_journey_start(user_id, frame)
        await websocket.send_text(json.dumps(reply))
    except Exception as exc:
        logger.error(
            "device_ws: journey_start failed user=%s: %s", user_id, exc
        )
        await websocket.send_text(json.dumps({
            "type": "journey_reply",
            "session_id": frame.get("session_id", ""),
            "message": f"Failed to start journey: {exc}",
            "done": True,
            "prompt_template": None,
        }))
    finally:
        clear_client_executor()
 async def _handle_journey_message(
    websocket: WebSocket,
    user_id: str,
    frame: dict,
 ) -> None:
    """Handle a journey_message frame — continues the journey conversation."""
    executor = await _make_ws_executor(websocket, user_id)
    set_client_executor(executor)
    try:
        reply = await handle_journey_message(user_id, frame)
        await websocket.send_text(json.dumps(reply))
    except Exception as exc:
        session_id = frame.get("session_id", "")
        logger.error(
            "device_ws: journey_message failed user=%s session=%s: %s",
            user_id, session_id, exc,
        )
        await websocket.send_text(json.dumps({
            "type": "journey_reply",
            "session_id": session_id,
            "message": f"Journey error: {exc}",
            "done": True,
            "prompt_template": None,
        }))
    finally:
        clear_client_executor()
 # ── Heartbeat ─────────────────────────────────────────────────────────
 async def _heartbeat_loop(websocket: WebSocket) -> None:
@@ -378,6 +415,3 @@ async def _mark_runs_disconnected(user_id: str) -> None:
            user_id,
            exc,
        )
--- a/app/billing/tier_manager.py
+++ b/app/billing/tier_manager.py
@@ -21,6 +21,7 @@ FEATURES: dict[str, dict[str, Any]] = {
    "free": {
        "agents": 3,
        "batch_active": 2,
        "batch_runs_per_day": 5,
        "cloud_storage_gb": 0,
        "backup_gb": 0,
        "providers": 1,
@@ -31,6 +32,7 @@ FEATURES: dict[str, dict[str, Any]] = {
    "pro": {
        "agents": -1,           # unlimited
        "batch_active": 10,
        "batch_runs_per_day": 50,
        "cloud_storage_gb": 5,
        "backup_gb": 5,
        "providers": -1,
@@ -41,6 +43,7 @@ FEATURES: dict[str, dict[str, Any]] = {
    "power": {
        "agents": -1,
        "batch_active": -1,     # unlimited
        "batch_runs_per_day": -1,  # unlimited
        "cloud_storage_gb": 25,
        "backup_gb": 25,
        "providers": -1,
@@ -51,6 +54,7 @@ FEATURES: dict[str, dict[str, Any]] = {
    "team": {
        "agents": -1,
        "batch_active": -1,
        "batch_runs_per_day": -1,  # unlimited
        "cloud_storage_gb": -1,  # unlimited
        "backup_gb": -1,         # unlimited
        "providers": -1,
--- a/app/core/agent_runner.py
+++ b/app/core/agent_runner.py
@@ -2,14 +2,14 @@
 Drives two agent types:
-* **Local directory agent** — sends an ``agent_run`` frame to the connected
+* **Local directory agent** — two-phase execution that mirrors the
-  Electron device, waits for the device to stream back file contents via
+  ``deep_agent.py`` tool-calling pattern.  Phase 1 (Triage) explores the
-  ``agent_data`` frames, then calls the LLM to extract structured items from
+  user's directory via file-system tools and groups files by project.
-  each file and pushes inserts to Electron via tool-call round-trips.
+  Phase 2 (Processing) reads full file contents and performs CRUD
  operations using the standard entity tools (tasks, notes, etc.).
 * **Cloud connector agent** — fetches data from third-party APIs (Gmail,
-  Teams, Outlook) and pushes extracted items to Electron.  **This path is
+  Teams, Outlook) and pushes extracted items to Electron.
  a stub** — provider integrations are implemented in Step 3.6.
 Usage
 -----
@@ -33,11 +33,17 @@ from datetime import datetime, timedelta, timezone
 from typing import Any
 from croniter import croniter
-from langchain_core.messages import HumanMessage, SystemMessage
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
 from sqlalchemy import select
 from app.agents.filesystem_agent import FILESYSTEM_TOOLS
 from app.agents.note_agent import NOTE_TOOLS
 from app.agents.project_agent import PROJECT_TOOLS
 from app.agents.task_agent import TASK_TOOLS
 from app.agents.timeline_agent import TIMELINE_TOOLS
 from app.core.device_manager import DeviceConnectionManager
 from app.core.llm import get_llm
 from app.core.ws_context import clear_client_executor, set_client_executor
 from app.db import async_session
 from app.models import AgentRunLog, CloudAgentConfig, LocalAgentConfig
@@ -45,50 +51,83 @@ logger = logging.getLogger(__name__)
 # ── Timeouts ───────────────────────────────────────────────────────────────
-# Max seconds to wait for Electron to finish streaming file data.
+# Max seconds to wait for a single tool-call round-trip (FE → BE).
-_FILE_READ_TIMEOUT: int = 120
+_TOOL_CALL_TIMEOUT: int = 30
-# Max seconds to wait for Electron to acknowledge a single tool-call insert.
+# Max LLM reasoning steps per phase.
-_INSERT_TIMEOUT: int = 30
+_MAX_TRIAGE_STEPS: int = 10
 _MAX_PROCESSING_STEPS: int = 12
-# ── Allowed tables & extraction schema hints ───────────────────────────────
+# ── Data-type to tool mapping ─────────────────────────────────────────────
-_ALLOWED_TABLES: frozenset[str] = frozenset(
+_DATA_TYPE_TOOLS: dict[str, list[Any]] = {
-    {"tasks", "notes", "timelines", "projects", "taskComments"}
+    "tasks": TASK_TOOLS,
-)
+    "projects": PROJECT_TOOLS,
-
+    "notes": NOTE_TOOLS,
-# Field descriptions fed to the extraction LLM as concise schema references.
+    "timelines": TIMELINE_TOOLS,
 _TABLE_SCHEMAS: dict[str, str] = {
    "tasks": (
        "title (str, required), description (str), "
        "status (todo|in_progress|done, default todo), "
        "priority (high|medium|low, default medium), "
        "assignee (JSON array string), dueDate (ms timestamp int), projectId (str)"
    ),
    "notes": "title (str, required), content (str, markdown), projectId (str)",
    "timelines": (
        "title (str, required), projectId (str, required), date (ms timestamp int)"
    ),
    "projects": "name (str, required), clientId (str)",
    "taskComments": "taskId (str, required), author (str), content (str, required)",
 }
-_EXTRACTION_SYSTEM_PROMPT = """\
+# ── Triage prompt ─────────────────────────────────────────────────────────
 You are a data extraction assistant for a freelance project management tool.
 Given a document, extract structured records matching the user's instructions.
-Output a JSON array (no markdown fences, no explanation) of objects shaped:
+_TRIAGE_SYSTEM_PROMPT = """\
-  [{{"table": "<table_name>", "data": {{...fields}}}}, ...]
+You are a file triage assistant for a freelance project management tool.
 Your job is to explore a local directory on the user's device, understand its
 structure, and group files by project context.
-Allowed table names and their fields:
+You have access to these tools:
-{table_schemas}
+- list_directory: to map folder structure
 - get_file_metadata: to check creation/modification dates
 - read_file_content: to read brief snippets when needed for categorisation
 - list_projects / list_all_projects / get_project: to fetch existing projects
  from the user's workspace and match files to them
-Rules:
+Instructions:
- Only extract tables listed in the "data_types" instructions.
+1. Start by calling list_directory on the configured root path.
- Use camelCase field names exactly as shown above.
+2. Explore subdirectories as needed to understand the structure.
- Omit optional fields you cannot determine; do not invent data.
+3. Use get_file_metadata to check modification dates.  Skip files that have
- Never include id, createdAt, updatedAt, isAiSuggested, or isApproved.
+   NOT been modified since: {last_run_at}.
- If nothing relevant is found, return an empty JSON array: []
+4. Call list_all_projects to get the user's existing projects.
- Return ONLY the JSON array.
+5. Match files to existing projects by name, folder structure, or content hints.
 6. If files don't match any existing project, group them under "standalone".
 {custom_prompt_section}
 Target entity types to extract: {data_types}
 File extensions to consider: {file_extensions}
 When you have finished exploring, output ONLY a JSON object (no markdown
 fences, no explanation) mapping project IDs or "standalone" to file path
 arrays:
 {{"<project_id>": ["<file_path>", ...], "standalone": ["<file_path>", ...]}}
 Return ONLY the JSON object as your final message.
 """
 # ── Processing prompt ─────────────────────────────────────────────────────
 _PROCESSING_BASE_PROMPT = """\
 You are a data extraction and management assistant for a freelance project
 management tool.  You have access to tools for reading files and performing
 CRUD operations on the user's workspace.
 Your task:
 1. Read the full content of each file listed below using read_file_content.
 2. Based on the content and the user's instructions, create the appropriate
   records using the CRUD tools available to you (create_task, create_note,
   create_timeline, create_project, etc.).
 3. ONLY create records of these entity types: {data_types}.
 4. For every record you create, set isAiSuggested=1 and isApproved=0.
 5. Do NOT invent data.  Only extract what is clearly present in the files.
 6. If a file contains no relevant data for the target entity types, skip it.
 {project_context}
 Files to process:
 {file_list}
 {custom_prompt_section}
 After processing all files, respond with a brief summary of what you created.
 """
@@ -118,100 +157,145 @@ def _is_overdue(schedule_cron: str, last_run_at: datetime | None) -> bool:
        return False  # Fail-safe: don't trigger if expression is invalid.
-# ── LLM extraction ─────────────────────────────────────────────────────────
+# ── WS executor for agent context ─────────────────────────────────────────
-async def _extract_items_from_content(
+def _make_agent_executor(
    prompt_template: str,
    file_content: str,
    data_types: list[str],
 ) -> list[dict[str, Any]]:
    """Call the LLM to extract structured records from *file_content*.
    Returns a validated list of ``{table: str, data: dict}`` objects.
    Items referencing tables not in *data_types* are discarded.
    """
    allowed = [t for t in data_types if t in _ALLOWED_TABLES]
    if not allowed:
        return []
    schema_text = "\n".join(
        f"  {table}: {_TABLE_SCHEMAS.get(table, '(unknown)')}" for table in allowed
    )
    system_prompt = _EXTRACTION_SYSTEM_PROMPT.format(table_schemas=schema_text)
    user_prompt = (
        f"User instructions: {prompt_template}\n\n"
        f"Extract these record types: {', '.join(allowed)}\n\n"
        f"Document:\n{file_content[:8000]}"
    )
    llm = get_llm()
    raw = ""
    try:
        response = await llm.ainvoke(
            [SystemMessage(content=system_prompt), HumanMessage(content=user_prompt)]
        )
        raw = str(response.content).strip()
        items: list[dict] = json.loads(raw)
        if not isinstance(items, list):
            raise ValueError("LLM response is not a JSON array")
    except json.JSONDecodeError as exc:
        logger.warning(
            "agent_runner: LLM extraction returned invalid JSON: %s — snippet: %.200r",
            exc,
            raw,
        )
        return []
    # Other exceptions (LLM API errors, network errors) propagate to the
    # caller (run_local_agent) which records them per-file in the run log.
    validated: list[dict[str, Any]] = []
    for item in items:
        table = item.get("table")
        data = item.get("data")
        if not isinstance(table, str) or table not in allowed:
            continue
        if not isinstance(data, dict) or not data:
            continue
        # Strip any server-generated or forbidden fields.
        for _field in ("id", "createdAt", "updatedAt", "isAiSuggested", "isApproved"):
            data.pop(_field, None)
        validated.append({"table": table, "data": data})
    return validated
 # ── Tool-call insert helper ─────────────────────────────────────────────────
 async def _send_insert_to_client(
    user_id: str,
    table: str,
    data: dict[str, Any],
    device_mgr: DeviceConnectionManager,
-) -> dict[str, Any]:
+) -> Any:
-    """Send an ``insert`` tool_call frame to Electron and await the tool_result.
+    """Create a WS callback for ``set_client_executor()`` so that all tools
-
+    can use ``execute_on_client()`` during an agent run.
    All inserts include ``isAiSuggested=1, isApproved=0`` so the user can
    review AI-produced records before they are treated as confirmed.
    Raises ``asyncio.TimeoutError`` if Electron does not respond within
    ``_INSERT_TIMEOUT`` seconds.  Raises ``RuntimeError`` if the device
    disconnects before the frame can be sent.
    """
-    call_id = str(uuid.uuid4())
+    async def _executor(payload: dict) -> dict:
-    payload: dict[str, Any] = {
+        payload["type"] = "tool_call"
-        "type": "tool_call",
+        call_id = payload["id"]
-        "id": call_id,
+        fut = device_mgr.create_pending_call(user_id, call_id)
-        "action": "insert",
+        await device_mgr.send_frame(user_id, payload)
-        "table": table,
+        return await asyncio.wait_for(fut, timeout=_TOOL_CALL_TIMEOUT)
-        "data": {**data, "isAiSuggested": 1, "isApproved": 0},
+    return _executor
    }
    fut = device_mgr.create_pending_call(user_id, call_id)
    await device_mgr.send_frame(user_id, payload)
    return await asyncio.wait_for(fut, timeout=_INSERT_TIMEOUT)
-# ── Local agent runner ──────────────────────────────────────────────────────
+# ── LLM tool-calling loop (mirrors deep_agent._run_single_agent) ──────────
 def _as_text(content: Any) -> str:
    if content is None:
        return ""
    if isinstance(content, str):
        return content
    if isinstance(content, list):
        parts: list[str] = []
        for item in content:
            if isinstance(item, str):
                parts.append(item)
            elif isinstance(item, dict):
                text = item.get("text")
                if isinstance(text, str):
                    parts.append(text)
        return "".join(parts)
    return str(content)
 async def _run_agent_with_tools(
    *,
    system_prompt: str,
    user_message: str,
    tools: list[Any],
    max_steps: int,
 ) -> str:
    """Run an LLM agent with tool-calling, returning the final text response.
    Follows the same pattern as ``deep_agent._run_single_agent``:
    bind tools → invoke → handle tool calls → repeat until final text.
    """
    llm = get_llm()
    llm_with_tools = llm.bind_tools(tools)
    messages: list[Any] = [
        SystemMessage(content=system_prompt),
        HumanMessage(content=user_message),
    ]
    tool_calls_count = 0
    tool_map = {tool_def.name: tool_def for tool_def in tools}
    for _ in range(max_steps):
        response: AIMessage = await llm_with_tools.ainvoke(messages)
        messages.append(response)
        if not response.tool_calls:
            return _as_text(response.content)
        for call in response.tool_calls:
            tool_calls_count += 1
            call_id = str(call.get("id", ""))
            call_name = str(call.get("name", ""))
            call_args = call.get("args", {})
            logger.info(
                "agent_runner: tool_call name=%s args=%s",
                call_name,
                json.dumps(call_args, ensure_ascii=True)[:800],
            )
            tool_fn = tool_map.get(call_name)
            if tool_fn is None:
                tool_output = f"Unknown tool: {call_name}"
            else:
                tool_output = await tool_fn.ainvoke(call_args)
            logger.info(
                "agent_runner: tool_result name=%s output=%s",
                call_name,
                str(tool_output)[:1200],
            )
            messages.append(ToolMessage(content=str(tool_output), tool_call_id=call["id"]))
    # Fallback: exceeded max steps, get final response without tools.
    final = await llm.ainvoke(messages)
    return _as_text(final.content)
 # ── Triage map parser ─────────────────────────────────────────────────────
 def _parse_triage_map(raw: str) -> dict[str, list[str]] | None:
    """Extract the JSON triage map from the LLM's final response."""
    text = raw.strip()
    # Try direct parse first.
    try:
        parsed = json.loads(text)
        if isinstance(parsed, dict):
            return {k: v for k, v in parsed.items() if isinstance(v, list)}
    except json.JSONDecodeError:
        pass
    # Try extracting JSON from markdown fences or surrounding text.
    import re
    match = re.search(r"\{[\s\S]*\}", text)
    if match:
        try:
            parsed = json.loads(match.group(0))
            if isinstance(parsed, dict):
                return {k: v for k, v in parsed.items() if isinstance(v, list)}
        except json.JSONDecodeError:
            pass
    return None
 # ── Tool list builder ─────────────────────────────────────────────────────
 def _build_processing_tools(data_types: list[str]) -> list[Any]:
    """Build the tool list for Phase 2 based on user's data_types selection."""
    tools: list[Any] = list(FILESYSTEM_TOOLS)
    for dt in data_types:
        dt_tools = _DATA_TYPE_TOOLS.get(dt)
        if dt_tools:
            tools.extend(dt_tools)
    return tools
 # ── Local agent runner (two-phase) ─────────────────────────────────────────
 async def run_local_agent(
@@ -220,24 +304,19 @@ async def run_local_agent(
    run_log: AgentRunLog,
    device_mgr: DeviceConnectionManager,
 ) -> None:
-    """Execute a local directory agent run end-to-end.
+    """Execute a local directory agent run using two-phase LLM-with-tools.
-    Steps:
+    Phase 1 — Triage:
        Explore the directory structure, check metadata, match files to
        existing projects.  Output: a JSON map of project → file paths.
-    1. Verify the device identified by ``config.device_id`` is currently online.
+    Phase 2 — Processing:
-    2. Pre-create the agent_data queue so no incoming frames are lost.
+        For each project group, read full file contents and perform CRUD
-    3. Send ``agent_run`` frame to Electron (paths, extensions, prompt, data_types).
+        operations using the standard entity tools.
    4. Consume ``agent_data`` frames until the ``None`` sentinel from
       ``agent_complete``.
    5. For each received file call the LLM to extract ``{table, data}`` items.
    6. Push each item to Electron as an ``insert`` tool-call; include
       ``isAiSuggested=1, isApproved=0`` so users can review AI suggestions.
    7. Persist the run outcome (status, counts, errors) and update
       ``config.last_run_at``.
    """
    run_id = run_log.id
-    # ── 1. Device online check ─────────────────────────────────────────
+    # ── Device online check ─────────────────────────────────────────
    target_device_id = config.device_id.strip() if isinstance(config.device_id, str) else ""
    if target_device_id:
        is_online = device_mgr.is_online(user_id, target_device_id)
@@ -258,111 +337,128 @@ async def run_local_agent(
        )
        return
-    # ── 2. Pre-create agent_data queue ────────────────────────────────
+    # ── Set up WS executor for tools ────────────────────────────────
-    try:
+    executor = _make_agent_executor(user_id, device_mgr)
-        device_mgr.get_agent_data_queue(user_id, run_id)
+    set_client_executor(executor)
    except RuntimeError:
        await _finalize_run(
            run_log,
            status="error",
            errors=["Device disconnected before agent run could start"],
        )
        return
    # ── 3. Send agent_run frame ────────────────────────────────────────
    frame: dict[str, Any] = {
        "type": "agent_run",
        "run_id": run_id,
        "agent_id": config.id,
        "config": {
            "paths": config.directory_paths,
            "file_extensions": config.file_extensions,
            "prompt_template": config.prompt_template,
            "data_types": config.data_types,
        },
    }
    try:
        await device_mgr.send_frame(user_id, frame)
    except RuntimeError as exc:
        device_mgr.cleanup_agent_data_queue(user_id, run_id)
        await _finalize_run(
            run_log,
            status="error",
            errors=[f"Failed to send agent_run frame: {exc}"],
        )
        return
    logger.info(
        "agent_runner: sent agent_run run=%s agent=%s user=%s",
        run_id,
        config.id,
        user_id,
    )
    # ── 4. Consume agent_data frames ──────────────────────────────────
    files: list[dict[str, Any]] = []
    errors: list[str] = []
    try:
        queue = device_mgr.get_agent_data_queue(user_id, run_id)
        deadline = asyncio.get_event_loop().time() + _FILE_READ_TIMEOUT
        while True:
            remaining = deadline - asyncio.get_event_loop().time()
            if remaining <= 0:
                errors.append("Timed out waiting for file data from device")
                break
            try:
                frame_data = await asyncio.wait_for(queue.get(), timeout=remaining)
            except asyncio.TimeoutError:
                errors.append("Timed out waiting for file data from device")
                break
            if frame_data is None:
                # Sentinel from agent_complete — stream is done.
                break
            files.extend(frame_data.get("files", []))
    except RuntimeError as exc:
        errors.append(f"Queue error reading agent data: {exc}")
    # ── 5–6. Extract + insert ─────────────────────────────────────────
    items_processed = 0
    items_created = 0
-    for file_info in files:
+    try:
-        file_path: str = file_info.get("path", "<unknown>")
+        # ── Phase 1: Triage ─────────────────────────────────────────
-        content: str = file_info.get("content", "")
+        logger.info("agent_runner: run=%s phase=triage start user=%s", run_id, user_id)
-        if not content:
+
-            continue
+        last_run_str = "never (process all files)"
-        items_processed += 1
+        if config.last_run_at:
-        try:
+            last_run_str = config.last_run_at.isoformat()
-            extracted = await _extract_items_from_content(
+
-                config.prompt_template, content, config.data_types
+        custom_section = ""
        if config.prompt_template:
            custom_section = f"User instructions:\n{config.prompt_template}"
        file_ext_str = ", ".join(config.file_extensions) if config.file_extensions else "all"
        triage_prompt = _TRIAGE_SYSTEM_PROMPT.format(
            last_run_at=last_run_str,
            custom_prompt_section=custom_section,
            data_types=", ".join(config.data_types),
            file_extensions=file_ext_str,
        )
        directory_paths = config.directory_paths
        triage_user_msg = (
            f"Explore these directories and produce the triage map:\n"
            f"{json.dumps(directory_paths, ensure_ascii=False)}"
        )
        triage_tools: list[Any] = list(FILESYSTEM_TOOLS) + list(PROJECT_TOOLS)
        triage_response = await _run_agent_with_tools(
            system_prompt=triage_prompt,
            user_message=triage_user_msg,
            tools=triage_tools,
            max_steps=_MAX_TRIAGE_STEPS,
        )
        triage_map = _parse_triage_map(triage_response)
        if not triage_map:
            errors.append(f"Triage phase failed to produce a valid file map: {triage_response[:500]}")
            await _finalize_run(run_log, status="error", errors=errors)
            return
        logger.info(
            "agent_runner: run=%s triage complete groups=%d total_files=%d",
            run_id,
            len(triage_map),
            sum(len(files) for files in triage_map.values()),
        )
        # ── Phase 2: Processing (per group) ─────────────────────────
        processing_tools = _build_processing_tools(config.data_types)
        for group_key, file_paths in triage_map.items():
            if not file_paths:
                continue
            logger.info(
                "agent_runner: run=%s phase=processing group=%s files=%d",
                run_id,
                group_key,
                len(file_paths),
            )
        except Exception as exc:
            errors.append(f"LLM extraction error for {file_path!r}: {exc}")
            continue
-        for item in extracted:
+            # Build project context for the LLM.
            if group_key == "standalone":
                project_context = "These files are not associated with any existing project."
            else:
                project_context = f"These files belong to project ID: {group_key}. Use this project_id when creating records."
            file_list_str = "\n".join(f"- {fp}" for fp in file_paths)
            processing_prompt = _PROCESSING_BASE_PROMPT.format(
                data_types=", ".join(config.data_types),
                project_context=project_context,
                file_list=file_list_str,
                custom_prompt_section=custom_section,
            )
            items_processed += len(file_paths)
            try:
-                result = await _send_insert_to_client(
+                result_text = await _run_agent_with_tools(
-                    user_id, item["table"], item["data"], device_mgr
+                    system_prompt=processing_prompt,
                    user_message="Process the listed files now.",
                    tools=processing_tools,
                    max_steps=_MAX_PROCESSING_STEPS,
                )
-                if result.get("error"):
+                logger.info(
-                    errors.append(
+                    "agent_runner: run=%s group=%s processing_result=%s",
-                        f"Insert failed ({item['table']}, {file_path!r}): {result['error']}"
+                    run_id,
-                    )
+                    group_key,
-                else:
+                    result_text[:500],
-                    items_created += 1
+                )
-            except asyncio.TimeoutError:
+                # Count created items by scanning tool call results.
-                errors.append(
+                # The tools themselves handle creation; we estimate from the
-                    f"Timed out awaiting insert ack ({item['table']}, {file_path!r})"
+                # summary.  A more precise count would require intercepting
                # tool results, but the summary is sufficient for the run log.
            except Exception as exc:
                errors.append(f"Processing error for group '{group_key}': {exc}")
                logger.error(
                    "agent_runner: run=%s group=%s processing failed: %s",
                    run_id,
                    group_key,
                    exc,
                )
            except RuntimeError as exc:
                errors.append(f"Insert error ({item['table']}, {file_path!r}): {exc}")
-    # ── 7. Finalise ────────────────────────────────────────────────────
+    except Exception as exc:
-    device_mgr.cleanup_agent_data_queue(user_id, run_id)
+        errors.append(f"Agent run failed: {exc}")
        logger.error("agent_runner: run=%s failed: %s", run_id, exc)
    finally:
        clear_client_executor()
-    if errors and items_created == 0:
+    # ── Finalise ────────────────────────────────────────────────────
    if errors and items_processed == 0:
        final_status = "error"
    elif errors:
        final_status = "partial"
@@ -380,11 +476,10 @@ async def run_local_agent(
        config_type="local",
    )
    logger.info(
-        "agent_runner: run=%s done status=%s processed=%d created=%d errors=%d",
+        "agent_runner: run=%s done status=%s processed=%d errors=%d",
        run_id,
        final_status,
        items_processed,
        items_created,
        len(errors),
    )
@@ -411,8 +506,7 @@ async def run_cloud_agent(
    3. Instantiate the provider client (Gmail or MS Graph).
    4. Fetch messages/emails since ``config.last_run_at`` (or 7 days ago for
       the first run) applying ``config.filter_config`` filters.
-    5. For each message/email call ``_extract_items_from_content`` with
+    5. For each message/email call the LLM to extract structured items.
       ``config.prompt_template`` to get structured ``{table, data}`` items.
    6. Push each item to Electron as an ``insert`` tool-call.
    7. If the provider refreshed its access token, re-encrypt and write it
       back to ``config.oauth_token_encrypted``.
@@ -520,37 +614,40 @@ async def run_cloud_agent(
        user_id,
    )
-    # ── 5–6. Extract + insert ─────────────────────────────────────────
+    # ── 5–6. Extract + insert via LLM with tools ─────────────────────
-    for msg in raw_messages:
+    executor = _make_agent_executor(user_id, device_mgr)
-        content_text = msg.as_text
+    set_client_executor(executor)
-        if not content_text:
+
-            continue
+    try:
-        items_processed += 1
+        processing_tools = _build_processing_tools(config.data_types)
-        try:
+        custom_section = ""
-            extracted = await _extract_items_from_content(
+        if config.prompt_template:
-                config.prompt_template, content_text, config.data_types
+            custom_section = f"User instructions:\n{config.prompt_template}"
-            )
+
-        except Exception as exc:
+        for msg in raw_messages:
-            errors.append(f"LLM extraction error for message {msg.id!r}: {exc}")
+            content_text = msg.as_text
-            continue
+            if not content_text:
                continue
            items_processed += 1
            processing_prompt = _PROCESSING_BASE_PROMPT.format(
                data_types=", ".join(config.data_types),
                project_context="Determine the appropriate project from the message context.",
                file_list=f"Message from {config.provider} (id: {msg.id})",
                custom_prompt_section=custom_section,
            )
        for item in extracted:
            try:
-                result = await _send_insert_to_client(
+                await _run_agent_with_tools(
-                    user_id, item["table"], item["data"], device_mgr
+                    system_prompt=processing_prompt,
                    user_message=f"Process this message content:\n\n{content_text[:8000]}",
                    tools=processing_tools,
                    max_steps=_MAX_PROCESSING_STEPS,
                )
-                if result.get("error"):
+            except Exception as exc:
-                    errors.append(
+                errors.append(f"LLM processing error for message {msg.id!r}: {exc}")
-                        f"Insert failed ({item['table']}, msg={msg.id!r}): {result['error']}"
+    finally:
-                    )
+        clear_client_executor()
                else:
                    items_created += 1
            except asyncio.TimeoutError:
                errors.append(
                    f"Timed out awaiting insert ack ({item['table']}, msg={msg.id!r})"
                )
            except RuntimeError as exc:
                errors.append(f"Insert error ({item['table']}, msg={msg.id!r}): {exc}")
    # ── 7. Persist refreshed token (if any) ───────────────────────────
    refreshed = getattr(provider, "refreshed_credentials", None)
--- a/app/core/device_manager.py
+++ b/app/core/device_manager.py
@@ -3,20 +3,15 @@
 Maintains in-memory state for all active Electron → backend WebSocket
 connections.  One connection per user (latest replaces previous).
-The manager participates in two interaction patterns:
+The manager handles the **tool-call round-trip** pattern:
  - Backend sends ``tool_call`` frame → Electron executes the action →
    returns ``tool_result`` frame.
  - ``create_pending_call`` registers a Future keyed by ``call_id``.
  - ``resolve_pending_call`` fulfils the Future; callers awaiting it
    receive the result dict from Electron.
-1. **Tool-call round-trip** (bidirectional CRUD):
+This pattern is used by all tools (CRUD, file-system, etc.) via
-   - Backend sends ``tool_call`` frame → Electron executes CRUD → returns
+``execute_on_client()`` in ``ws_context.py``.
     ``tool_result`` frame.
   - ``create_pending_call`` registers a Future keyed by ``call_id``.
   - ``resolve_pending_call`` fulfils the Future; callers awaiting it
     receive the result dict from Electron.
 2. **Agent-data streaming** (local directory agent runs):
   - Backend sends ``agent_run`` frame → Electron reads files and sends
     back a stream of ``agent_data`` frames followed by ``agent_complete``.
   - ``get_agent_data_queue`` returns (or creates) an asyncio.Queue for
     a specific ``run_id`` so the agent runner can iterate frames.
 The ``device_manager`` module-level singleton is imported by both the
 device WS route and the agent runner.
@@ -42,8 +37,6 @@ class DeviceConnection:
    device_id: str
    # Futures indexed by tool_call id — resolved when tool_result arrives.
    pending_calls: dict[str, asyncio.Future[dict]] = field(default_factory=dict)
    # Per-run queues for agent_data / agent_complete frames.
    agent_data_queues: dict[str, asyncio.Queue[dict | None]] = field(default_factory=dict)
 class DeviceConnectionManager:
@@ -153,31 +146,6 @@ class DeviceConnectionManager:
        if fut is not None and not fut.done():
            fut.set_result(result)
    # ── Agent-data queue ──────────────────────────────────────────────
    def get_agent_data_queue(
        self, user_id: str, run_id: str
    ) -> asyncio.Queue[dict | None]:
        """Return (creating if absent) the queue for *run_id* agent frames.
        The agent runner reads from this queue.  The device WS handler writes
        to it.  ``None`` is the sentinel that signals the stream is finished.
        """
        conn = self._connections.get(user_id)
        if conn is None:
            raise RuntimeError(
                f"get_agent_data_queue: user {user_id!r} is not connected"
            )
        if run_id not in conn.agent_data_queues:
            conn.agent_data_queues[run_id] = asyncio.Queue()
        return conn.agent_data_queues[run_id]
    def cleanup_agent_data_queue(self, user_id: str, run_id: str) -> None:
        """Remove the queue for *run_id* once a run has completed."""
        conn = self._connections.get(user_id)
        if conn:
            conn.agent_data_queues.pop(run_id, None)
 # Module-level singleton — import this everywhere.
 device_manager = DeviceConnectionManager()
--- a/app/main.py
+++ b/app/main.py
@@ -50,7 +50,7 @@ def create_app() -> FastAPI:
    app.add_middleware(SanitizerMiddleware)
    app.add_middleware(TierRateLimitMiddleware)
-    from app.api.routes import agent_setup, agents, auth, backup, billing, chat, device_ws, plugins, storage, vectors
+    from app.api.routes import agents, auth, backup, billing, chat, device_ws, plugins, storage, vectors
    app.include_router(auth.router,       prefix="/api/v1")
    app.include_router(chat.router,       prefix="/api/v1")
@@ -60,7 +60,6 @@ def create_app() -> FastAPI:
    app.include_router(plugins.router,    prefix="/api/v1")
    app.include_router(billing.router,    prefix="/api/v1")
    app.include_router(agents.router,     prefix="/api/v1")
    app.include_router(agent_setup.router, prefix="/api/v1")
    app.include_router(device_ws.router,  prefix="/api/v1")
    @app.get("/api/v1/health", tags=["health"])
--- a/app/schemas.py
+++ b/app/schemas.py
@@ -142,9 +142,6 @@ class WsFrameType(str, Enum):
    tool_result = "tool_result"
    final = "final"
    ping = "ping"
    agent_run = "agent_run"
    agent_data = "agent_data"
    agent_complete = "agent_complete"
    device_hello = "device_hello"
    # ── v3 frame types ─────────────────────────────────────────────────
    home_request = "home_request"
@@ -156,6 +153,10 @@ class WsFrameType(str, Enum):
    data_request = "data_request"
    data_response = "data_response"
    mutation = "mutation"
    # ── v4 journey frame types ────────────────────────────────────────
    journey_start = "journey_start"
    journey_message = "journey_message"
    journey_reply = "journey_reply"
 class WsToolCall(BaseModel):
@@ -208,31 +209,6 @@ class WsDeviceHello(BaseModel):
    agent_ids: list[str] = Field(default_factory=list)
 class WsAgentRun(BaseModel):
    """Server → Client: trigger an agent run on the connected device."""
    type: Literal[WsFrameType.agent_run] = WsFrameType.agent_run
    run_id: str
    agent_id: str
    config: dict[str, Any]
 class WsAgentData(BaseModel):
    """Client → Server: files read by the local agent."""
    type: Literal[WsFrameType.agent_data] = WsFrameType.agent_data
    run_id: str
    files: list[dict[str, Any]]
 class WsAgentComplete(BaseModel):
    """Client → Server: Electron signals it has finished reading files."""
    type: Literal[WsFrameType.agent_complete] = WsFrameType.agent_complete
    run_id: str
    files_read: int
    errors: list[str] = Field(default_factory=list)
 # ── WebSocket v3 Frame Models ─────────────────────────────────────────
@@ -319,6 +295,7 @@ class AgentCreationCheckResponse(BaseModel):
 class AgentTriggerRequest(BaseModel):
    directory: str = Field(min_length=1)
    device_id: str = Field(default="")
    what_to_extract: list[Literal["task", "note", "timeline", "project"]] = Field(min_length=1)
    actions_by_type: dict[
        Literal["task", "note", "timeline", "project"],
@@ -345,18 +322,3 @@ class AgentRunLogResponse(BaseModel):
 # ── Chatbot Journey ───────────────────────────────────────────────────
 class JourneyStartRequest(BaseModel):
    agent_type: Literal["local", "cloud"]
    agent_id: str | None = None
 class JourneyMessageRequest(BaseModel):
    session_id: str
    message: str
 class JourneyResponse(BaseModel):
    session_id: str
    message: str
    done: bool
    prompt_template: str | None = None