testing journey agent creation

2026-04-09 00:40:16 +02:00
parent 41db3a7089
commit 7253f6fe72
6 changed files with 133 additions and 70 deletions
--- a/app/agents/filesystem_agent.py
+++ b/app/agents/filesystem_agent.py
@@ -7,12 +7,31 @@ handles actual disk I/O and responds with ``tool_result`` frames.

 from __future__ import annotations

+import os
+import re
+from pathlib import Path
 from typing import Any

 from langchain_core.tools import tool

 from app.core.ws_context import execute_on_client

+# Max characters returned by read_file_content in journey (exploration) tools.
+# The journey only needs to understand file structure, not full content.
+_JOURNEY_READ_MAX_CHARS: int = 4000
+
+
+def _resolve_path(path: str, base: str) -> str:
+    """Resolve *path* against *base* when *path* is relative.
+
+    The LLM often passes ``"."`` meaning "the configured directory".
+    Without this, Electron resolves ``"."`` relative to its own CWD instead
+    of the user's chosen directory.
+    """
+    if os.path.isabs(path):
+        return path
+    return str(Path(base) / path)
+

@tool
 async def list_directory(path: str) -> str:
@@ -83,3 +102,93 @@ FILESYSTEM_TOOLS: list[Any] = [
    read_file_content,
    get_file_metadata,
 ]
+
+
+def make_directory_tools(base_directory: str) -> list[Any]:
+    """Return filesystem tools that resolve relative paths against *base_directory*.
+
+    Use this instead of ``FILESYSTEM_TOOLS`` whenever you know the user's target
+    directory upfront (e.g., journey setup sessions).  Relative paths like ``"."``
+    from the LLM are resolved to the correct absolute path before being sent to
+    the Electron client, preventing it from falling back to its own CWD.
+    """
+
+    def _compact_for_journey(raw: str) -> str:
+        """Strip HTML noise and truncate for journey exploration.
+
+        The journey LLM only needs to understand file structure (headers,
+        first paragraphs).  Full CSS/style blocks are pure noise that eat
+        up context window budget.
+        """
+        text = re.sub(r"<style[^>]*>.*?</style>", "", raw, flags=re.DOTALL | re.IGNORECASE)
+        text = re.sub(r"<script[^>]*>.*?</script>", "", text, flags=re.DOTALL | re.IGNORECASE)
+        text = re.sub(r"<!--.*?-->", "", text, flags=re.DOTALL)
+        if len(text) > _JOURNEY_READ_MAX_CHARS:
+            text = text[:_JOURNEY_READ_MAX_CHARS] + "\n[…truncated for exploration]"
+        return text
+
+    @tool
+    async def list_directory(path: str) -> str:  # noqa: F811
+        """List files and folders in a local directory on the user's device.
+
+        Returns a formatted listing of entries with name, type (file/directory),
+        and full path.
+        """
+        resolved = _resolve_path(path, base_directory)
+        result = await execute_on_client(
+            action="list_directory",
+            data={"path": resolved},
+        )
+        entries: list[dict[str, Any]] = result.get("entries", [])
+        if not entries:
+            return f"Directory '{resolved}' is empty or does not exist."
+        lines: list[str] = []
+        for entry in entries:
+            entry_type = entry.get("type", "unknown")
+            entry_name = entry.get("name", "")
+            entry_path = entry.get("path", "")
+            lines.append(f"- [{entry_type}] {entry_name}  ({entry_path})")
+        return f"Directory listing for '{resolved}' ({len(entries)} entries):\n" + "\n".join(lines)
+
+    @tool
+    async def read_file_content(path: str) -> str:  # noqa: F811
+        """Read the text content of a local file on the user's device.
+
+        Returns the file content as a string.  Large files may be truncated
+        by the Electron client.
+        """
+        resolved = _resolve_path(path, base_directory)
+        result = await execute_on_client(
+            action="read_file_content",
+            data={"path": resolved},
+        )
+        content: str = result.get("content", "")
+        if not content:
+            return f"File '{resolved}' is empty or could not be read."
+        return _compact_for_journey(content)
+
+    @tool
+    async def get_file_metadata(path: str) -> str:  # noqa: F811
+        """Get metadata for a local file: size, creation date, modification date, extension.
+
+        Returns a formatted summary of the file's metadata.
+        """
+        resolved = _resolve_path(path, base_directory)
+        result = await execute_on_client(
+            action="get_file_metadata",
+            data={"path": resolved},
+        )
+        size = result.get("size", "unknown")
+        created = result.get("createdAt", "unknown")
+        modified = result.get("modifiedAt", "unknown")
+        extension = result.get("extension", "unknown")
+        name = result.get("name", resolved)
+        return (
+            f"File: {name}\n"
+            f"  Extension: {extension}\n"
+            f"  Size: {size} bytes\n"
+            f"  Created: {created}\n"
+            f"  Modified: {modified}"
+        )
+
+    return [list_directory, read_file_content, get_file_metadata]
--- a/app/agents/note_agent.py
+++ b/app/agents/note_agent.py
@@ -18,21 +18,6 @@ _UUID_RE = re.compile(
 def _is_uuid(value: str) -> bool:
    return bool(_UUID_RE.match(value))

-NOTE_SYSTEM_PROMPT = (
-    "You are a note-taking assistant. You help users create, retrieve, update,\n"
-    "and delete Markdown notes in their workspace.\n\n"
-    "Rules:\n"
-    "  - content is always Markdown; preserve formatting when updating\n"
-    "  - project_id is optional; link a note to a project when mentioned\n"
-    "  - When updating, call get_note first if you need to read existing content\n"
-    "    before appending or replacing sections\n"
-    "  - list_notes without project_id returns all notes; scope with project_id\n"
-    "    when the user is working within a specific project\n"
-    "  - project_id must be a UUID; if you only know a project name, do not pass it as project_id\n"
-    "  - Do not fabricate note content — reflect what the user provides or what\n"
-    "    is already in the note (retrieved via get_note)."
-)
-

@tool
 async def list_notes(project_id: str = "") -> str:
--- a/app/agents/project_agent.py
+++ b/app/agents/project_agent.py
@@ -8,22 +8,6 @@ from langchain_core.tools import tool

 from app.core.ws_context import execute_on_client

-PROJECT_SYSTEM_PROMPT = (
-    "You are a project management assistant. You help users create, find,\n"
-    "update, and archive projects in their workspace.\n\n"
-    "Rules:\n"
-    "  - status must be one of: active, archived\n"
-    "  - client_id is optional; link to a client only when explicitly mentioned\n"
-    "  - ai_summary is populated only when the user asks for a project summary;\n"
-    "    derive it from context data — do not fabricate content\n"
-    "  - Use list_projects for scoped queries; list_all_projects only when the\n"
-    "    user wants a complete cross-client view including archived projects\n"
-    "  - get_project requires a project UUID; resolve the ID first by calling\n"
-    "    list_projects if you only have a project name\n"
-    "  - Prefer archiving (update_project status=archived) over deletion;\n"
-    "    only call delete_project when the user explicitly confirms deletion."
-)
-

@tool
 async def list_projects(
--- a/app/agents/task_agent.py
+++ b/app/agents/task_agent.py
@@ -18,23 +18,6 @@ _UUID_RE = re.compile(
 def _is_uuid(value: str) -> bool:
    return bool(_UUID_RE.match(value))

-TASK_SYSTEM_PROMPT = (
-    "You are a task management assistant for a project workspace.\n"
-    "You create, update, list, and track tasks and their comments.\n\n"
-    "Rules:\n"
-    "  - status must be one of: todo, in_progress, done\n"
-    "  - priority must be one of: high, medium, low\n"
-    "  - due_date is a Unix timestamp in milliseconds; convert human dates\n"
-    "  - assignees is a JSON-encoded array of strings (e.g. '[\"Alice\",\"Bob\"]')\n"
-    "  - project_id is optional; link to a project when the user mentions one\n"
-    "  - is_ai_suggested: 1 only when proactively proposing a task the user\n"
-    "    did not explicitly request; 0 otherwise\n"
-    "  - is_ai_suggested: 1 only when proactively proposing a task the user did not explicitly request; 0 otherwise\n"
-    "  - Use list_tasks_due_today for 'what's due today' queries\n"
-    "  - For update_task, use -1 for integer fields you do not want to change\n"
-    "  - Always confirm the action in plain, user-friendly language."
-)
-

 # ── Task tools ────────────────────────────────────────────────────────

--- a/app/agents/timeline_agent.py
+++ b/app/agents/timeline_agent.py
@@ -17,20 +17,6 @@ _UUID_RE = re.compile(
 def _is_uuid(value: str) -> bool:
    return bool(_UUID_RE.match(value))

-TIMELINE_SYSTEM_PROMPT = (
-    "You are a project timeline assistant. Timelines are milestone dates that\n"
-    "track progress on a project — they are not calendar events.\n\n"
-    "Rules:\n"
-    "  - project_id is REQUIRED for every create; confirm with the user if unknown\n"
-    "  - For listing, project_id must be a UUID; never pass plain names as project_id\n"
-    "  - date is a Unix timestamp in milliseconds; convert human-readable dates\n"
-    "  - is_ai_suggested: 1 when proactively proposing a timeline, 0 otherwise\n"
-    "  - is_ai_suggested: 1 when proactively proposing a timeline, 0 otherwise\n"
-    "  - For update_timeline, use -1 for integer fields you do not want to change\n"
-    "  - Listing without a project_id returns all timelines across projects\n"
-    "  - Always echo the title and formatted date in your confirmation."
-)
-

@tool
 async def list_timelines(project_id: str = "") -> str:
--- a/app/api/routes/agent_setup.py
+++ b/app/api/routes/agent_setup.py
@@ -31,7 +31,7 @@ from typing import Any

 from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage

-from app.agents.filesystem_agent import FILESYSTEM_TOOLS
+from app.agents.filesystem_agent import make_directory_tools
 from app.config.settings import settings
 from app.core.langfuse_client import compile_prompt, extract_usage, get_langfuse, get_prompt_or_fallback
 from app.core.llm import get_llm
@@ -273,7 +273,7 @@ async def _call_llm_with_tools(
    _span = _span_ctx.__enter__() if _span_ctx else None

    try:
-        for _ in range(_MAX_TOOL_STEPS):
+        for step in range(_MAX_TOOL_STEPS):
            _gen_ctx = (
                lf.start_as_current_observation(
                    as_type="generation",
@@ -290,12 +290,24 @@ async def _call_llm_with_tools(
                _gen.update(output=_as_text(response.content), usage=extract_usage(response))
                _gen_ctx.__exit__(None, None, None)

+            resp_text = _as_text(response.content)
+
+            # Guard against empty responses (e.g. model returned finish_reason
+            # 'error' which LiteLLM maps to 'stop' with empty content).
+            if not response.tool_calls and not resp_text.strip():
+                logger.warning(
+                    "agent_setup: journey LLM returned empty response at step %d — retrying",
+                    step,
+                )
+                # Drop the empty AIMessage so we don't pollute history, and retry.
+                continue
+
            messages.append(response)

            if not response.tool_calls:
                if _span:
-                    _span.update(output=_as_text(response.content))
-                return _as_text(response.content)
+                    _span.update(output=resp_text)
+                return resp_text

            for call in response.tool_calls:
                call_name = str(call.get("name", ""))
@@ -324,7 +336,10 @@ async def _call_llm_with_tools(
        final_text = _as_text(final.content)
        if _span:
            _span.update(output=final_text)
-        return final_text
+        return final_text or (
+            "Sorry, I had trouble processing the files. "
+            "Could you try again? If the issue persists, the files might be too large for me to analyse."
+        )
    finally:
        if _span_ctx:
            _span_ctx.__exit__(None, None, None)
@@ -372,7 +387,7 @@ async def handle_journey_start(
    ai_reply = await _call_llm_with_tools(
        system_prompt=system_prompt,
        history=seed_history,
-        tools=list(FILESYSTEM_TOOLS),
+        tools=make_directory_tools(directory),
        user_id=user_id,
        session_id=session_id,
        langfuse_prompt=langfuse_prompt,
@@ -436,10 +451,11 @@ async def handle_journey_message(
    session.history.append({"role": "user", "content": message})

    # Call the LLM with tools.
+    session_tools = make_directory_tools(session.directory)
    ai_reply = await _call_llm_with_tools(
        system_prompt=session.system_prompt,
        history=session.history,
-        tools=list(FILESYSTEM_TOOLS),
+        tools=session_tools,
        user_id=session.user_id,
        session_id=session_id,
        langfuse_prompt=session.langfuse_prompt,
@@ -464,7 +480,7 @@ async def handle_journey_message(
            nudge_reply = await _call_llm_with_tools(
                system_prompt=session.system_prompt,
                history=session.history,
-                tools=list(FILESYSTEM_TOOLS),
+                tools=session_tools,
                user_id=session.user_id,
                session_id=session_id,
                langfuse_prompt=session.langfuse_prompt,