feat: enhance agent configuration and model management with per-agent overrides

2026-04-10 08:45:14 +02:00
9 changed files with 106 additions and 22 deletions
--- a/.env.example
+++ b/.env.example
@@ -13,10 +13,45 @@ JWT_REFRESH_TOKEN_EXPIRE_DAYS=30
 # ── LLM ───────────────────────────────────────────────────────────────────────
 # LiteLLM model identifiers — change to swap providers without code changes.
 # Examples: gpt-4o, anthropic/claude-sonnet-4-20250514, gemini/gemini-pro, ollama/llama3
+#
+# API keys — only the key(s) matching your chosen provider(s) are required.
+# The correct key is picked automatically from the model prefix (e.g.
+# "anthropic/..." → ANTHROPIC_API_KEY, "gemini/..." → GOOGLE_API_KEY).
 OPENAI_API_KEY=
 ANTHROPIC_API_KEY=
 GOOGLE_API_KEY=
+CEREBRAS_API_KEY=
+
+# Default model used by any agent that does not have a specific override below.
 LLM_MODEL=gpt-5-mini
+LLM_EMBED_MODEL=text-embedding-3-small
+
+# GitHub Copilot — leave empty to use the LiteLLM default token directory.
+# In Docker, point this to a named-volume path so tokens survive restarts.
+# GITHUB_COPILOT_TOKEN_DIR=
+
+# ── Per-agent model overrides ─────────────────────────────────────────────────
+# Leave a value empty to fall back to LLM_MODEL.
+# Each agent resolves its API key from the model prefix automatically.
+#
+# Intent classifier — routes user messages to the right domain agent.
+# A small/fast model (e.g. gpt-4o-mini) is usually sufficient here.
+LLM_MODEL_CLASSIFIER=
+
+# Home-agent — handles chat from the home screen (all tools available).
+LLM_MODEL_HOME_AGENT=
+
+# Floating-agent — handles contextual chat triggered from a task/project/note.
+LLM_MODEL_FLOATING_AGENT=
+
+# Unified-processor — processes local directory files (local agent runner).
+LLM_MODEL_UNIFIED_PROCESSOR=
+
+# Cloud-processor — fetches and processes data from cloud connectors.
+LLM_MODEL_CLOUD_PROCESSOR=
+
+# Setup-agent — guided journey to build an AgentConfig via WebSocket chat.
+LLM_MODEL_SETUP_AGENT=

 # ── Stripe (leave empty to stub billing) ──────────────────────────────────────
 STRIPE_SECRET_KEY=
--- a/app/api/routes/agent_setup.py
+++ b/app/api/routes/agent_setup.py
@@ -32,9 +32,8 @@ from typing import Any
 from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage

 from app.agents.filesystem_agent import make_directory_tools
-from app.config.settings import settings
 from app.core.langfuse_client import compile_prompt, extract_usage, get_langfuse, get_prompt_or_fallback
-from app.core.llm import get_llm
+from app.core.llm import get_agent_llm, model_for_agent
 from app.schemas import AgentConfig

 logger = logging.getLogger(__name__)
@@ -257,7 +256,7 @@ async def _call_llm_with_tools(
        else:
            messages.append(AIMessage(content=turn["content"]))

-    llm = get_llm(model=None, temperature=0.4)
+    llm = get_agent_llm("setup", temperature=0.4)
    llm_with_tools = llm.bind_tools(tools)
    tool_map = {tool_def.name: tool_def for tool_def in tools}

@@ -278,7 +277,7 @@ async def _call_llm_with_tools(
                lf.start_as_current_observation(
                    as_type="generation",
                    name="journey-setup-llm",
-                    model=settings.LLM_MODEL,
+                    model=model_for_agent("setup"),
                    prompt=langfuse_prompt,
                    input=messages,
                )
--- a/app/api/routes/agents.py
+++ b/app/api/routes/agents.py
@@ -177,6 +177,12 @@ async def trigger_agent_run(
    _enforce_agent_limit(current_user.tier, body.active_agents)
    await _enforce_run_frequency(current_user.tier, current_user.id, db)

+    last_run_dt = (
+        datetime.fromtimestamp(body.last_run_at / 1000, tz=timezone.utc)
+        if body.last_run_at
+        else None
+    )
+
    config = LocalAgentConfig(
        id=str(uuid.uuid4()),
        user_id=current_user.id,
@@ -184,10 +190,12 @@ async def trigger_agent_run(
        name="Local Directory Monitor",
        directory_paths=[body.directory],
        data_types=_to_data_types(body.what_to_extract),
-        prompt_template=body.custom_agent_prompt,
+        prompt_template=body.custom_agent_prompt or "",
+        agent_config=body.agent_config,
        file_extensions=[],
        schedule_cron=body.batch_interval,
        enabled=True,
+        last_run_at=last_run_dt,
    )

    # Use the FE's stable agent_id if provided, fall back to the ephemeral config id.
--- a/app/config/settings.py
+++ b/app/config/settings.py
@@ -20,6 +20,14 @@ class Settings(BaseSettings):
    LLM_MODEL: str = "gpt-4o"
    LLM_EMBED_MODEL: str = "text-embedding-3-small"

+    # Per-agent model overrides. Leave empty to fall back to LLM_MODEL.
+    LLM_MODEL_CLASSIFIER: str = ""        # _infer_floating_domain (intent routing)
+    LLM_MODEL_HOME_AGENT: str = ""        # home-agent (run_single_agent / stream)
+    LLM_MODEL_FLOATING_AGENT: str = ""    # floating-agent (contextual chat)
+    LLM_MODEL_UNIFIED_PROCESSOR: str = "" # unified-processor (agent_runner)
+    LLM_MODEL_CLOUD_PROCESSOR: str = ""   # cloud-processor (agent_runner)
+    LLM_MODEL_SETUP_AGENT: str = ""       # agent-setup journey
+
    # GitHub Copilot OAuth token storage directory.
    # Leave empty to use the LiteLLM default (~/.config/litellm/github_copilot).
    # In Docker, set this to a path backed by a named volume so tokens survive restarts.
--- a/app/core/agent_runner.py
+++ b/app/core/agent_runner.py
@@ -43,10 +43,9 @@ from app.agents.note_agent import NOTE_TOOLS
 from app.agents.project_agent import PROJECT_TOOLS
 from app.agents.task_agent import TASK_TOOLS
 from app.agents.timeline_agent import TIMELINE_TOOLS
-from app.config.settings import settings
 from app.core.device_manager import DeviceConnectionManager
 from app.core.langfuse_client import compile_prompt, extract_usage, get_langfuse, get_prompt_or_fallback
-from app.core.llm import get_llm
+from app.core.llm import get_agent_llm, model_for_agent
 from app.core.preprocessors import detect_content_type, preprocess
 from app.core.ws_context import clear_client_executor, execute_on_client, set_client_executor
 from app.db import async_session
@@ -74,13 +73,13 @@ _MAX_PROCESSING_STEPS: int = 12
 _MAX_SCAN_DEPTH: int = 5

 # ── Data-type to tool mapping ─────────────────────────────────────────────
-# NOTE: "projects" is intentionally excluded — project creation/assignment is
-# handled in code by the runner, never delegated to the Step 2 LLM.

 _DATA_TYPE_TOOLS: dict[str, list[Any]] = {
    "tasks": TASK_TOOLS,
    "notes": NOTE_TOOLS,
    "timelines": TIMELINE_TOOLS,
+    "timelineEvents": TIMELINE_TOOLS,
+    "projects": PROJECT_TOOLS,
 }

 # ── V2: Unified processing prompt (hot-swappable via Langfuse "unified_processing") ──
@@ -238,7 +237,7 @@ async def _run_agent_with_tools(
    run is appended to it (used by the caller to count ``create_*`` calls).
    """
    lf = get_langfuse()
-    llm = get_llm()
+    llm = get_agent_llm(agent_name)
    llm_with_tools = llm.bind_tools(tools)
    messages: list[Any] = [
        SystemMessage(content=system_prompt),
@@ -264,7 +263,7 @@ async def _run_agent_with_tools(
                lf.start_as_current_observation(
                    as_type="generation",
                    name=f"{agent_name}-llm",
-                    model=settings.LLM_MODEL,
+                    model=model_for_agent(agent_name),
                    prompt=langfuse_prompt,
                    input=messages,
                )
@@ -696,6 +695,12 @@ async def run_local_agent(
                )
                items_created += file_created

+                # Refresh project list when a project was created so
+                # subsequent files see it in the prompt context.
+                if "create_project" in file_tool_calls:
+                    projects = await _fetch_projects()
+                    projects_block = _format_projects(projects)
+
                logger.info(
                    "agent_runner: run=%s file=%r created=%d result=%s",
                    run_id, file_path, file_created, result_text[:200],
--- a/app/core/deep_agent.py
+++ b/app/core/deep_agent.py
@@ -17,8 +17,7 @@ from app.agents.project_agent import PROJECT_TOOLS
 from app.agents.task_agent import TASK_TOOLS
 from app.agents.timeline_agent import TIMELINE_TOOLS
 from app.core.langfuse_client import extract_usage, get_langfuse, get_prompt_or_fallback
-from app.core.llm import get_llm
-from app.config.settings import settings
+from app.core.llm import get_agent_llm, model_for_agent
 from app.core.memory_middleware import MemoryMiddleware
 from app.core.ws_context import clear_tool_result_collector, execute_on_client, set_tool_result_collector
 from app.db import async_session
@@ -537,7 +536,7 @@ async def _infer_floating_domain(message: str, context: dict[str, Any]) -> dict[
    }

    try:
-        llm = get_llm()
+        llm = get_agent_llm("classifier")
        classifier_messages = [
            SystemMessage(content=_FLOATING_DOMAIN_CLASSIFIER_PROMPT),
            HumanMessage(
@@ -555,7 +554,7 @@ async def _infer_floating_domain(message: str, context: dict[str, Any]) -> dict[
            with lf.start_as_current_observation(
                as_type="generation",
                name="floating-classifier",
-                model=settings.LLM_MODEL,
+                model=model_for_agent("classifier"),
                prompt=classifier_prompt_obj,
                input=classifier_messages,
            ) as gen:
@@ -592,7 +591,7 @@ async def _run_single_agent(
 ) -> str:
    trace_id = _trace_id_from_context(context)
    lf = get_langfuse()
-    llm = get_llm()
+    llm = get_agent_llm(agent_name)
    tools = _all_tools_for_user(user_id, trace_id)
    model_context = _context_for_model(context)
    logger.info("deep_agent: run_single_agent_start trace=%s user=%s", trace_id or "-", user_id)
@@ -628,7 +627,7 @@ async def _run_single_agent(
                lf.start_as_current_observation(
                    as_type="generation",
                    name=f"{agent_name}-llm",
-                    model=settings.LLM_MODEL,
+                    model=model_for_agent(agent_name),
                    prompt=langfuse_prompt,
                    input=messages,
                )
@@ -715,7 +714,7 @@ async def _run_single_agent_stream(
 ) -> AsyncGenerator[tuple[str, Any], None]:
    trace_id = _trace_id_from_context(context)
    lf = get_langfuse()
-    llm = get_llm()
+    llm = get_agent_llm(agent_name)
    tools = _all_tools_for_user(user_id, trace_id)
    model_context = _context_for_model(context)
    logger.info("deep_agent: run_single_agent_stream_start trace=%s user=%s", trace_id or "-", user_id)
@@ -753,7 +752,7 @@ async def _run_single_agent_stream(
                lf.start_as_current_observation(
                    as_type="generation",
                    name=f"{agent_name}-llm",
-                    model=settings.LLM_MODEL,
+                    model=model_for_agent(agent_name),
                    prompt=langfuse_prompt,
                    input=messages,
                )
--- a/app/core/llm.py
+++ b/app/core/llm.py
@@ -19,6 +19,7 @@ from __future__ import annotations

 import os
 import warnings
+from collections.abc import Callable

 from openai import AsyncOpenAI
 import litellm
@@ -95,6 +96,35 @@ def get_llm(
    )


+_AGENT_MODEL_SETTINGS: dict[str, Callable[[], str]] = {
+    "classifier":          lambda: settings.LLM_MODEL_CLASSIFIER or settings.LLM_MODEL,
+    "home-agent":          lambda: settings.LLM_MODEL_HOME_AGENT or settings.LLM_MODEL,
+    "floating-agent":      lambda: settings.LLM_MODEL_FLOATING_AGENT or settings.LLM_MODEL,
+    "unified-processor":   lambda: settings.LLM_MODEL_UNIFIED_PROCESSOR or settings.LLM_MODEL,
+    "cloud-processor":     lambda: settings.LLM_MODEL_CLOUD_PROCESSOR or settings.LLM_MODEL,
+    "setup":               lambda: settings.LLM_MODEL_SETUP_AGENT or settings.LLM_MODEL,
+}
+
+
+def model_for_agent(agent_name: str) -> str:
+    """Return the resolved model string for *agent_name* (for Langfuse tracking)."""
+    return _AGENT_MODEL_SETTINGS.get(agent_name, lambda: settings.LLM_MODEL)()
+
+
+def get_agent_llm(
+    agent_name: str,
+    *,
+    temperature: float = 0,
+) -> ChatOpenAI | ChatLiteLLM:
+    """Return an LLM configured for *agent_name*, respecting per-agent overrides.
+
+    Falls back to ``settings.LLM_MODEL`` for unknown agent names or when the
+    per-agent override is left empty in ``.env``.
+    """
+    model = model_for_agent(agent_name)
+    return get_llm(model=model, temperature=temperature)
+
+
 async def embed(text: str) -> list[float]:
    """Return an embedding vector for *text*.

--- a/app/schemas.py
+++ b/app/schemas.py
@@ -236,10 +236,11 @@ class AgentTriggerRequest(BaseModel):
    device_id: str = Field(default="")
    agent_id: str | None = None  # FE stable agent ID (electron-store UUID)
    what_to_extract: list[str] = Field(min_length=1)
-    actions_by_type: dict[str, list[str]] | None = None
    batch_interval: str = Field(min_length=1)
-    custom_agent_prompt: str = Field(min_length=1)
+    custom_agent_prompt: str | None = None
+    agent_config: dict | None = None
    active_agents: int = Field(ge=0, default=0)
+    last_run_at: int | None = None  # epoch ms from FE — enables incremental scanning


 # ── Agent Run Log ─────────────────────────────────────────────────────
--- a/tests/test_agent_runner.py
+++ b/tests/test_agent_runner.py
@@ -791,7 +791,6 @@ async def test_trigger_run_local_agent_creates_run_log(client, db_session):
            json={
                "directory": "/home/user/docs",
                "what_to_extract": ["task", "note"],
-                "actions_by_type": {"task": ["add", "update"], "note": ["add"]},
                "batch_interval": "0 */6 * * *",
                "custom_agent_prompt": "Extract tasks and notes.",
                "active_agents": 0,