refactor local directory agent to two-phase LLM-with-tools architecture

Replace the single-pass FE-driven agent_run/agent_data flow with a BE-orchestrated two-phase execution using LangChain tool-calling: - Phase 1 (Triage): explores directory via new filesystem tools, matches files to existing projects using PROJECT_TOOLS - Phase 2 (Processing): reads files and performs CRUD per project group with clean LLM context windows Key changes: - Add filesystem_agent.py with list_directory, read_file_content, get_file_metadata tools using execute_on_client() - Move setup journey from REST to WebSocket (journey_start/message frames) - Add batch_runs_per_day billing limit and enforce in /trigger - Remove deprecated agent_data/agent_complete frame handlers and queues Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 08:50:46 +01:00
parent 5faa6b1d7c
commit 826f64d6bb
10 changed files with 801 additions and 569 deletions
--- a/app/api/routes/agent_setup.py
+++ b/app/api/routes/agent_setup.py
@@ -1,54 +1,40 @@
-"""Chatbot Journey endpoints — guided conversation to build an agent prompt_template.
+"""Chatbot Journey — WS-based guided conversation to build an agent prompt_template.

-Endpoints:
-  POST /agents/journey/start    — start a new journey session
-  POST /agents/journey/message  — continue the conversation
-
-Sessions are stored in-memory with a 30-minute TTL.  Stale entries are
-cleaned up lazily on access.  Upgrade to Redis for multi-instance deployments.
+The journey is driven entirely through WebSocket frames (no REST endpoints).
+The device WS handler dispatches ``journey_start`` and ``journey_message``
+frames to the functions exported here.

 Journey flow:
-  1. Client sends ``{ agent_type, agent_id? }`` to ``/start``.
-  2. Server creates a session, calls the LLM with a contextual system prompt,
-     and returns the first question.
-  3. Client sends follow-up messages to ``/message``.
-  4. After 3-5 turns the LLM wraps up by emitting a ``prompt_template`` block
-     delimited by ``PROMPT_TEMPLATE_START`` / ``PROMPT_TEMPLATE_END``.
-  5. Server parses the block, sets ``done=True``, and returns the template.
-
-The ``prompt_template`` from the final response is meant to be stored by
-the Electron client in local agent settings and later sent to
-``POST /agents/trigger`` when a run is executed.
+  1. FE sends ``journey_start`` frame with basic agent config (directory,
+     data_types, schedule).
+  2. Server creates an in-memory session, sets up a WS executor so the
+     setup LLM can use file-system tools, does a first directory scrape,
+     and sends back a ``journey_reply`` with the first question.
+  3. FE sends ``journey_message`` frames for each user reply.
+  4. Server appends the user message, calls the LLM (which may read files
+     via tools), and sends back a ``journey_reply``.
+  5. After 3-5 turns the LLM wraps up by emitting a ``prompt_template``
+     block delimited by ``PROMPT_TEMPLATE_START`` / ``PROMPT_TEMPLATE_END``.
+  6. Server parses the block, sends ``journey_reply`` with ``done=True``
+     and the template.  FE stores it locally.
 """

 from __future__ import annotations

+import json
 import logging
 import time
 import uuid
 from dataclasses import dataclass, field
 from typing import Any

-from fastapi import APIRouter, Depends, HTTPException, status
-from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
-from sqlalchemy import select
-from sqlalchemy.ext.asyncio import AsyncSession
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage

-from app.api.deps import get_current_user
+from app.agents.filesystem_agent import FILESYSTEM_TOOLS
 from app.core.llm import get_llm
-from app.db import get_session
-from app.models import CloudAgentConfig, LocalAgentConfig
-from app.schemas import (
-    JourneyMessageRequest,
-    JourneyResponse,
-    JourneyStartRequest,
-    UserProfile,
-)

 logger = logging.getLogger(__name__)

-router = APIRouter(prefix="/agents/journey", tags=["agents"])
-
 # ── Session TTL ───────────────────────────────────────────────────────────

 _SESSION_TTL_SECONDS: int = 1800  # 30 minutes
@@ -59,16 +45,21 @@ _TEMPLATE_END = "PROMPT_TEMPLATE_END"

 # Maximum number of conversation turns before the LLM is nudged to wrap up.
 _MAX_TURNS: int = 5
+# Max tool-calling steps per LLM invocation.
+_MAX_TOOL_STEPS: int = 6

 # ── In-memory session store ───────────────────────────────────────────────


@dataclass
-class _JourneySession:
+class JourneySession:
    session_id: str
    user_id: str
    agent_type: str  # "local" | "cloud"
+    directory: str
+    data_types: list[str]
    history: list[dict[str, Any]] = field(default_factory=list)
+    system_prompt: str = ""
    created_at: float = field(default_factory=time.monotonic)

    def is_expired(self) -> bool:
@@ -76,67 +67,70 @@ class _JourneySession:


 # session_id → session
-_sessions: dict[str, _JourneySession] = {}
+_sessions: dict[str, JourneySession] = {}


-def _get_session(session_id: str, user_id: str) -> _JourneySession:
-    """Retrieve session; raise 404 on missing, expired, or wrong owner."""
+def get_journey_session(session_id: str, user_id: str) -> JourneySession | None:
+    """Retrieve session; return None on missing, expired, or wrong owner."""
    s = _sessions.get(session_id)
    if s is None or s.is_expired():
        _sessions.pop(session_id, None)
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Journey session not found or expired")
+        return None
    if s.user_id != user_id:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Journey session not found or expired")
+        return None
    return s


 # ── System prompt builder ─────────────────────────────────────────────────

-_LOCAL_PREAMBLE = """\
-What kind of files are in the directories you want to monitor? \
-(for example: emails saved as .eml, documents in .pdf or .txt, markdown notes, etc.)"""
-
-_CLOUD_PREAMBLE = """\
-What kind of emails or messages should I look for? \
-(for example: client communications, invoices, meeting notes, project updates, etc.)"""
-
 _SYSTEM_PROMPT_TEMPLATE = """\
 You are a friendly assistant helping a freelancer configure a data-extraction agent.
-Your job is to understand exactly what data the user wants to extract from their {source_description} \
-and produce a detailed prompt_template that a separate AI will use as its instruction set.
+Your job is to understand exactly what data the user wants to extract from their
+local directory and produce a detailed prompt_template that a separate AI will use
+as its instruction set.

-Ask concise, focused questions one at a time.  Cover these topics (not necessarily in this order):
-  1. The type and format of the source content.
+You have access to file-system tools to explore the user's directory:
+- list_directory: to see folder structure
+- read_file_content: to peek at file contents
+- get_file_metadata: to check file info
+
+The user's configured directory is: {directory}
+Target data types: {data_types}
+
+Start by exploring the directory to understand its structure.  Then ask concise,
+focused questions one at a time.  Cover these topics (not necessarily in this order):
+  1. The type and format of the source content (confirmed by your exploration).
  2. Which data types to extract: tasks, notes, timelines, and/or projects.
-  3. How fields should be mapped (e.g. email subject → task title).
+  3. How fields should be mapped (e.g. filename → task title).
  4. Priority or status rules (e.g. "urgent" keyword → high priority).
  5. Any special handling, date extraction, or exclusions.

-After 3-5 questions (when you have enough information), output the final prompt_template between \
-these exact markers on their own lines:
+After 3-5 questions (when you have enough information), output the final prompt_template
+between these exact markers on their own lines:

 {template_start}
 <the complete extraction prompt here>
 {template_end}

-The prompt_template must be a self-contained instruction for an AI that receives a document/email/message \
-and must return a JSON array of records in this shape:
-  [{{ "table": "<tasks|notes|timelines|projects>", "data": {{ <field: value> }} }}, ...]
+The prompt_template must be a self-contained instruction for an AI that reads files
+and must perform CRUD operations using tools to create records.  It should specify:
+  - What entity types to create (tasks, notes, timelines, projects).
+  - How to map file content to record fields (camelCase: title, status, priority,
+    dueDate, projectId, content, etc.).
+  - That isAiSuggested must be set to 1 and isApproved to 0 on every record.
+  - Concrete examples of mappings based on what you discovered in the directory.

-Rules for the generated template:
-  - Be explicit about field names (camelCase: title, status, priority, dueDate, projectId, content, etc.).
-  - Include concrete examples of mappings.
-  - Mention that Electron adds id/createdAt/updatedAt automatically.
-  - Set isAiSuggested: true and isApproved: false on every record.
 {existing_section}\
-Do not ask more than {max_turns} questions total. Start with your first question now.\
+Do not ask more than {max_turns} questions total.  Begin by exploring the directory,
+then ask your first question.\
 """


-def _build_system_prompt(agent_type: str, existing_template: str | None) -> str:
-    source_description = (
-        "files in local directories" if agent_type == "local" else "emails and messages from cloud providers"
-    )
+def _build_system_prompt(
+    directory: str,
+    data_types: list[str],
+    existing_template: str | None = None,
+) -> str:
    existing_section = (
        f"\nThe user already has the following prompt_template — refine it based on their answers:\n"
        f"---\n{existing_template}\n---\n"
@@ -144,7 +138,8 @@ def _build_system_prompt(agent_type: str, existing_template: str | None) -> str:
        else ""
    )
    return _SYSTEM_PROMPT_TEMPLATE.format(
-        source_description=source_description,
+        directory=directory,
+        data_types=", ".join(data_types),
        template_start=_TEMPLATE_START,
        template_end=_TEMPLATE_END,
        existing_section=existing_section,
@@ -152,10 +147,6 @@ def _build_system_prompt(agent_type: str, existing_template: str | None) -> str:
    )


-def _first_question(agent_type: str) -> str:
-    return _LOCAL_PREAMBLE if agent_type == "local" else _CLOUD_PREAMBLE
-
-
 # ── Template extraction ───────────────────────────────────────────────────


@@ -168,11 +159,37 @@ def _extract_template(text: str) -> str | None:
    return text[start_idx:end_idx].strip() or None


-# ── LLM call ─────────────────────────────────────────────────────────────
+# ── LLM call with tool support ───────────────────────────────────────────


-async def _call_llm(system_prompt: str, history: list[dict[str, Any]]) -> str:
-    """Build LangChain messages from history and invoke the LLM."""
+def _as_text(content: Any) -> str:
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts: list[str] = []
+        for item in content:
+            if isinstance(item, str):
+                parts.append(item)
+            elif isinstance(item, dict):
+                text = item.get("text")
+                if isinstance(text, str):
+                    parts.append(text)
+        return "".join(parts)
+    return str(content)
+
+
+async def _call_llm_with_tools(
+    system_prompt: str,
+    history: list[dict[str, Any]],
+    tools: list[Any],
+) -> str:
+    """Build LangChain messages from history and invoke the LLM with tools.
+
+    Handles tool-calling loops: if the LLM calls tools, execute them and
+    continue until a final text response is produced.
+    """
    messages: list[Any] = [SystemMessage(content=system_prompt)]
    for turn in history:
        if turn["role"] == "user":
@@ -181,126 +198,161 @@ async def _call_llm(system_prompt: str, history: list[dict[str, Any]]) -> str:
            messages.append(AIMessage(content=turn["content"]))

    llm = get_llm(model=None, temperature=0.4)
-    response = await llm.ainvoke(messages)
-    return response.content  # type: ignore[return-value]
+    llm_with_tools = llm.bind_tools(tools)
+    tool_map = {tool_def.name: tool_def for tool_def in tools}
+
+    for _ in range(_MAX_TOOL_STEPS):
+        response: AIMessage = await llm_with_tools.ainvoke(messages)
+        messages.append(response)
+
+        if not response.tool_calls:
+            return _as_text(response.content)
+
+        for call in response.tool_calls:
+            call_name = str(call.get("name", ""))
+            call_args = call.get("args", {})
+            logger.info(
+                "agent_setup: journey tool_call name=%s args=%s",
+                call_name,
+                json.dumps(call_args, ensure_ascii=True)[:500],
+            )
+
+            tool_fn = tool_map.get(call_name)
+            if tool_fn is None:
+                tool_output = f"Unknown tool: {call_name}"
+            else:
+                tool_output = await tool_fn.ainvoke(call_args)
+
+            logger.info(
+                "agent_setup: journey tool_result name=%s output=%s",
+                call_name,
+                str(tool_output)[:800],
+            )
+            messages.append(ToolMessage(content=str(tool_output), tool_call_id=call["id"]))
+
+    # Fallback: exceeded max steps.
+    final = await llm.ainvoke(messages)
+    return _as_text(final.content)


-# ── Existing-config loader ────────────────────────────────────────────────
+# ── Journey handlers (called from device_ws.py) ──────────────────────────


-async def _load_existing_template(
-    agent_id: str,
+async def handle_journey_start(
    user_id: str,
-    db: AsyncSession,
-) -> str | None:
-    """Return the prompt_template of an existing agent config, or None."""
-    # Try local first, then cloud.
-    local_result = await db.execute(
-        select(LocalAgentConfig).where(
-            LocalAgentConfig.id == agent_id,
-            LocalAgentConfig.user_id == user_id,
-        )
-    )
-    local = local_result.scalar_one_or_none()
-    if local is not None:
-        return local.prompt_template
+    frame: dict[str, Any],
+) -> dict[str, Any]:
+    """Handle a ``journey_start`` WS frame.

-    cloud_result = await db.execute(
-        select(CloudAgentConfig).where(
-            CloudAgentConfig.id == agent_id,
-            CloudAgentConfig.user_id == user_id,
-        )
-    )
-    cloud = cloud_result.scalar_one_or_none()
-    return cloud.prompt_template if cloud is not None else None
-
-
-# ── Routes ────────────────────────────────────────────────────────────────
-
-
-@router.post("/start", response_model=JourneyResponse, status_code=status.HTTP_200_OK)
-async def start_journey(
-    body: JourneyStartRequest,
-    current_user: UserProfile = Depends(get_current_user),
-    db: AsyncSession = Depends(get_session),
-) -> JourneyResponse:
-    """Start a new Chatbot Journey session.
-
-    If ``agent_id`` is provided the session is pre-seeded with the existing
-    agent's ``prompt_template`` so the user can refine it.
+    Creates a session, runs the setup LLM with directory exploration,
+    and returns the ``journey_reply`` payload.
    """
-    # Load existing template (may be None).
-    existing_template: str | None = None
-    if body.agent_id:
-        existing_template = await _load_existing_template(body.agent_id, current_user.id, db)
-        # If agent_id was given but not found, proceed without seeding (don't 404 —
-        # the user may be starting a fresh journey for a not-yet-persisted config).
-
-    system_prompt = _build_system_prompt(body.agent_type, existing_template)
-    first_question = _first_question(body.agent_type)
+    agent_type = frame.get("agent_type", "local")
+    directory = frame.get("directory", "")
+    data_types = frame.get("data_types", [])
+    existing_template = frame.get("existing_template")

    session_id = str(uuid.uuid4())
-    session = _JourneySession(
+    system_prompt = _build_system_prompt(directory, data_types, existing_template)
+
+    session = JourneySession(
        session_id=session_id,
-        user_id=current_user.id,
-        agent_type=body.agent_type,
-        # Seed history with the AI's first question so it stays consistent.
-        history=[{"role": "assistant", "content": first_question}],
+        user_id=user_id,
+        agent_type=agent_type,
+        directory=directory,
+        data_types=data_types,
+        system_prompt=system_prompt,
    )
-    # Store the system prompt inside the session for reuse in /message.
-    session.__dict__["_system_prompt"] = system_prompt  # type: ignore[index]
+
+    # The LLM will explore the directory using FILESYSTEM_TOOLS via the
+    # ws_context executor (already set by the WS handler before calling us).
+    ai_reply = await _call_llm_with_tools(
+        system_prompt=system_prompt,
+        history=[],
+        tools=list(FILESYSTEM_TOOLS),
+    )
+
+    session.history.append({"role": "assistant", "content": ai_reply})
    _sessions[session_id] = session

-    logger.info("Journey session %s started for user %s (agent_type=%s)", session_id, current_user.id, body.agent_type)
-    return JourneyResponse(session_id=session_id, message=first_question, done=False)
+    logger.info(
+        "agent_setup: journey session %s started for user %s (directory=%s)",
+        session_id,
+        user_id,
+        directory,
+    )

-
-@router.post("/message", response_model=JourneyResponse, status_code=status.HTTP_200_OK)
-async def send_journey_message(
-    body: JourneyMessageRequest,
-    current_user: UserProfile = Depends(get_current_user),
-    db: AsyncSession = Depends(get_session),
-) -> JourneyResponse:
-    """Send a message in an existing Chatbot Journey session.
-
-    The server appends the user's message to the conversation history,
-    calls the LLM, and appends the AI reply.  When the LLM wraps up with a
-    ``prompt_template`` block the response includes ``done=True`` and the
-    extracted template.
-    """
-    session = _get_session(body.session_id, current_user.id)
-    system_prompt: str = session.__dict__.get("_system_prompt", _build_system_prompt(session.agent_type, None))  # type: ignore[assignment]
-
-    # Append user turn to history.
-    session.history.append({"role": "user", "content": body.message})
-
-    # Call the LLM with the full conversation so far.
-    ai_reply = await _call_llm(system_prompt, session.history)
-
-    # Append AI turn.
-    session.history.append({"role": "assistant", "content": ai_reply})
-
-    # Check if the LLM produced the final template.
+    # Check if the LLM produced the template on the first turn (unlikely but possible).
    prompt_template = _extract_template(ai_reply)
    done = prompt_template is not None

-    # Strip the sentinel markers from the message shown to the user.
    display_message = ai_reply
    if done:
        display_message = (
            ai_reply[: ai_reply.index(_TEMPLATE_START)].strip()
            or "Here is your agent configuration. You can save it or continue refining."
        )
+        _sessions.pop(session_id, None)

+    return {
+        "type": "journey_reply",
+        "session_id": session_id,
+        "message": display_message,
+        "done": done,
+        "prompt_template": prompt_template,
+    }
+
+
+async def handle_journey_message(
+    user_id: str,
+    frame: dict[str, Any],
+) -> dict[str, Any]:
+    """Handle a ``journey_message`` WS frame.
+
+    Appends the user message, calls the LLM, and returns the
+    ``journey_reply`` payload.
+    """
+    session_id = frame.get("session_id", "")
+    message = frame.get("message", "")
+
+    session = get_journey_session(session_id, user_id)
+    if session is None:
+        return {
+            "type": "journey_reply",
+            "session_id": session_id,
+            "message": "Journey session not found or expired. Please start a new setup.",
+            "done": True,
+            "prompt_template": None,
+        }
+
+    # Append user turn.
+    session.history.append({"role": "user", "content": message})
+
+    # Call the LLM with tools.
+    ai_reply = await _call_llm_with_tools(
+        system_prompt=session.system_prompt,
+        history=session.history,
+        tools=list(FILESYSTEM_TOOLS),
+    )
+
+    session.history.append({"role": "assistant", "content": ai_reply})
+
+    # Check if the LLM produced the final template.
+    prompt_template = _extract_template(ai_reply)
+    done = prompt_template is not None
+
+    display_message = ai_reply
    if done:
-        logger.info("Journey session %s completed for user %s", body.session_id, current_user.id)
-        # Clean up the session immediately on completion.
-        _sessions.pop(body.session_id, None)
+        display_message = (
+            ai_reply[: ai_reply.index(_TEMPLATE_START)].strip()
+            or "Here is your agent configuration. You can save it or continue refining."
+        )
+        _sessions.pop(session_id, None)
+        logger.info("agent_setup: journey session %s completed for user %s", session_id, user_id)
    else:
        # Nudge the LLM to wrap up after max turns.
        turns = sum(1 for t in session.history if t["role"] == "user")
        if turns >= _MAX_TURNS:
-            # Add a system-level nudge as a hidden user message.
            session.history.append({
                "role": "user",
                "content": (
@@ -309,9 +361,10 @@ async def send_journey_message(
                ),
            })

-    return JourneyResponse(
-        session_id=body.session_id,
-        message=display_message,
-        done=done,
-        prompt_template=prompt_template,
-    )
+    return {
+        "type": "journey_reply",
+        "session_id": session_id,
+        "message": display_message,
+        "done": done,
+        "prompt_template": prompt_template,
+    }