1 Commits

Author SHA1 Message Date
Roberto Musso
3cf067faea feat: enhance agent configuration and model management with per-agent overrides 2026-04-10 08:45:14 +02:00
9 changed files with 106 additions and 22 deletions

View File

@@ -13,10 +13,45 @@ JWT_REFRESH_TOKEN_EXPIRE_DAYS=30
# ── LLM ───────────────────────────────────────────────────────────────────────
# LiteLLM model identifiers — change to swap providers without code changes.
# Examples: gpt-4o, anthropic/claude-sonnet-4-20250514, gemini/gemini-pro, ollama/llama3
#
# API keys — only the key(s) matching your chosen provider(s) are required.
# The correct key is picked automatically from the model prefix (e.g.
# "anthropic/..." → ANTHROPIC_API_KEY, "gemini/..." → GOOGLE_API_KEY).
OPENAI_API_KEY=
ANTHROPIC_API_KEY=
GOOGLE_API_KEY=
CEREBRAS_API_KEY=
# Default model used by any agent that does not have a specific override below.
LLM_MODEL=gpt-5-mini
LLM_EMBED_MODEL=text-embedding-3-small
# GitHub Copilot — leave empty to use the LiteLLM default token directory.
# In Docker, point this to a named-volume path so tokens survive restarts.
# GITHUB_COPILOT_TOKEN_DIR=
# ── Per-agent model overrides ─────────────────────────────────────────────────
# Leave a value empty to fall back to LLM_MODEL.
# Each agent resolves its API key from the model prefix automatically.
#
# Intent classifier — routes user messages to the right domain agent.
# A small/fast model (e.g. gpt-4o-mini) is usually sufficient here.
LLM_MODEL_CLASSIFIER=
# Home-agent — handles chat from the home screen (all tools available).
LLM_MODEL_HOME_AGENT=
# Floating-agent — handles contextual chat triggered from a task/project/note.
LLM_MODEL_FLOATING_AGENT=
# Unified-processor — processes local directory files (local agent runner).
LLM_MODEL_UNIFIED_PROCESSOR=
# Cloud-processor — fetches and processes data from cloud connectors.
LLM_MODEL_CLOUD_PROCESSOR=
# Setup-agent — guided journey to build an AgentConfig via WebSocket chat.
LLM_MODEL_SETUP_AGENT=
# ── Stripe (leave empty to stub billing) ──────────────────────────────────────
STRIPE_SECRET_KEY=

View File

@@ -32,9 +32,8 @@ from typing import Any
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
from app.agents.filesystem_agent import make_directory_tools
from app.config.settings import settings
from app.core.langfuse_client import compile_prompt, extract_usage, get_langfuse, get_prompt_or_fallback
from app.core.llm import get_llm
from app.core.llm import get_agent_llm, model_for_agent
from app.schemas import AgentConfig
logger = logging.getLogger(__name__)
@@ -257,7 +256,7 @@ async def _call_llm_with_tools(
else:
messages.append(AIMessage(content=turn["content"]))
llm = get_llm(model=None, temperature=0.4)
llm = get_agent_llm("setup", temperature=0.4)
llm_with_tools = llm.bind_tools(tools)
tool_map = {tool_def.name: tool_def for tool_def in tools}
@@ -278,7 +277,7 @@ async def _call_llm_with_tools(
lf.start_as_current_observation(
as_type="generation",
name="journey-setup-llm",
model=settings.LLM_MODEL,
model=model_for_agent("setup"),
prompt=langfuse_prompt,
input=messages,
)

View File

@@ -177,6 +177,12 @@ async def trigger_agent_run(
_enforce_agent_limit(current_user.tier, body.active_agents)
await _enforce_run_frequency(current_user.tier, current_user.id, db)
last_run_dt = (
datetime.fromtimestamp(body.last_run_at / 1000, tz=timezone.utc)
if body.last_run_at
else None
)
config = LocalAgentConfig(
id=str(uuid.uuid4()),
user_id=current_user.id,
@@ -184,10 +190,12 @@ async def trigger_agent_run(
name="Local Directory Monitor",
directory_paths=[body.directory],
data_types=_to_data_types(body.what_to_extract),
prompt_template=body.custom_agent_prompt,
prompt_template=body.custom_agent_prompt or "",
agent_config=body.agent_config,
file_extensions=[],
schedule_cron=body.batch_interval,
enabled=True,
last_run_at=last_run_dt,
)
# Use the FE's stable agent_id if provided, fall back to the ephemeral config id.

View File

@@ -20,6 +20,14 @@ class Settings(BaseSettings):
LLM_MODEL: str = "gpt-4o"
LLM_EMBED_MODEL: str = "text-embedding-3-small"
# Per-agent model overrides. Leave empty to fall back to LLM_MODEL.
LLM_MODEL_CLASSIFIER: str = "" # _infer_floating_domain (intent routing)
LLM_MODEL_HOME_AGENT: str = "" # home-agent (run_single_agent / stream)
LLM_MODEL_FLOATING_AGENT: str = "" # floating-agent (contextual chat)
LLM_MODEL_UNIFIED_PROCESSOR: str = "" # unified-processor (agent_runner)
LLM_MODEL_CLOUD_PROCESSOR: str = "" # cloud-processor (agent_runner)
LLM_MODEL_SETUP_AGENT: str = "" # agent-setup journey
# GitHub Copilot OAuth token storage directory.
# Leave empty to use the LiteLLM default (~/.config/litellm/github_copilot).
# In Docker, set this to a path backed by a named volume so tokens survive restarts.

View File

@@ -43,10 +43,9 @@ from app.agents.note_agent import NOTE_TOOLS
from app.agents.project_agent import PROJECT_TOOLS
from app.agents.task_agent import TASK_TOOLS
from app.agents.timeline_agent import TIMELINE_TOOLS
from app.config.settings import settings
from app.core.device_manager import DeviceConnectionManager
from app.core.langfuse_client import compile_prompt, extract_usage, get_langfuse, get_prompt_or_fallback
from app.core.llm import get_llm
from app.core.llm import get_agent_llm, model_for_agent
from app.core.preprocessors import detect_content_type, preprocess
from app.core.ws_context import clear_client_executor, execute_on_client, set_client_executor
from app.db import async_session
@@ -74,13 +73,13 @@ _MAX_PROCESSING_STEPS: int = 12
_MAX_SCAN_DEPTH: int = 5
# ── Data-type to tool mapping ─────────────────────────────────────────────
# NOTE: "projects" is intentionally excluded — project creation/assignment is
# handled in code by the runner, never delegated to the Step 2 LLM.
_DATA_TYPE_TOOLS: dict[str, list[Any]] = {
"tasks": TASK_TOOLS,
"notes": NOTE_TOOLS,
"timelines": TIMELINE_TOOLS,
"timelineEvents": TIMELINE_TOOLS,
"projects": PROJECT_TOOLS,
}
# ── V2: Unified processing prompt (hot-swappable via Langfuse "unified_processing") ──
@@ -238,7 +237,7 @@ async def _run_agent_with_tools(
run is appended to it (used by the caller to count ``create_*`` calls).
"""
lf = get_langfuse()
llm = get_llm()
llm = get_agent_llm(agent_name)
llm_with_tools = llm.bind_tools(tools)
messages: list[Any] = [
SystemMessage(content=system_prompt),
@@ -264,7 +263,7 @@ async def _run_agent_with_tools(
lf.start_as_current_observation(
as_type="generation",
name=f"{agent_name}-llm",
model=settings.LLM_MODEL,
model=model_for_agent(agent_name),
prompt=langfuse_prompt,
input=messages,
)
@@ -696,6 +695,12 @@ async def run_local_agent(
)
items_created += file_created
# Refresh project list when a project was created so
# subsequent files see it in the prompt context.
if "create_project" in file_tool_calls:
projects = await _fetch_projects()
projects_block = _format_projects(projects)
logger.info(
"agent_runner: run=%s file=%r created=%d result=%s",
run_id, file_path, file_created, result_text[:200],

View File

@@ -17,8 +17,7 @@ from app.agents.project_agent import PROJECT_TOOLS
from app.agents.task_agent import TASK_TOOLS
from app.agents.timeline_agent import TIMELINE_TOOLS
from app.core.langfuse_client import extract_usage, get_langfuse, get_prompt_or_fallback
from app.core.llm import get_llm
from app.config.settings import settings
from app.core.llm import get_agent_llm, model_for_agent
from app.core.memory_middleware import MemoryMiddleware
from app.core.ws_context import clear_tool_result_collector, execute_on_client, set_tool_result_collector
from app.db import async_session
@@ -537,7 +536,7 @@ async def _infer_floating_domain(message: str, context: dict[str, Any]) -> dict[
}
try:
llm = get_llm()
llm = get_agent_llm("classifier")
classifier_messages = [
SystemMessage(content=_FLOATING_DOMAIN_CLASSIFIER_PROMPT),
HumanMessage(
@@ -555,7 +554,7 @@ async def _infer_floating_domain(message: str, context: dict[str, Any]) -> dict[
with lf.start_as_current_observation(
as_type="generation",
name="floating-classifier",
model=settings.LLM_MODEL,
model=model_for_agent("classifier"),
prompt=classifier_prompt_obj,
input=classifier_messages,
) as gen:
@@ -592,7 +591,7 @@ async def _run_single_agent(
) -> str:
trace_id = _trace_id_from_context(context)
lf = get_langfuse()
llm = get_llm()
llm = get_agent_llm(agent_name)
tools = _all_tools_for_user(user_id, trace_id)
model_context = _context_for_model(context)
logger.info("deep_agent: run_single_agent_start trace=%s user=%s", trace_id or "-", user_id)
@@ -628,7 +627,7 @@ async def _run_single_agent(
lf.start_as_current_observation(
as_type="generation",
name=f"{agent_name}-llm",
model=settings.LLM_MODEL,
model=model_for_agent(agent_name),
prompt=langfuse_prompt,
input=messages,
)
@@ -715,7 +714,7 @@ async def _run_single_agent_stream(
) -> AsyncGenerator[tuple[str, Any], None]:
trace_id = _trace_id_from_context(context)
lf = get_langfuse()
llm = get_llm()
llm = get_agent_llm(agent_name)
tools = _all_tools_for_user(user_id, trace_id)
model_context = _context_for_model(context)
logger.info("deep_agent: run_single_agent_stream_start trace=%s user=%s", trace_id or "-", user_id)
@@ -753,7 +752,7 @@ async def _run_single_agent_stream(
lf.start_as_current_observation(
as_type="generation",
name=f"{agent_name}-llm",
model=settings.LLM_MODEL,
model=model_for_agent(agent_name),
prompt=langfuse_prompt,
input=messages,
)

View File

@@ -19,6 +19,7 @@ from __future__ import annotations
import os
import warnings
from collections.abc import Callable
from openai import AsyncOpenAI
import litellm
@@ -95,6 +96,35 @@ def get_llm(
)
_AGENT_MODEL_SETTINGS: dict[str, Callable[[], str]] = {
"classifier": lambda: settings.LLM_MODEL_CLASSIFIER or settings.LLM_MODEL,
"home-agent": lambda: settings.LLM_MODEL_HOME_AGENT or settings.LLM_MODEL,
"floating-agent": lambda: settings.LLM_MODEL_FLOATING_AGENT or settings.LLM_MODEL,
"unified-processor": lambda: settings.LLM_MODEL_UNIFIED_PROCESSOR or settings.LLM_MODEL,
"cloud-processor": lambda: settings.LLM_MODEL_CLOUD_PROCESSOR or settings.LLM_MODEL,
"setup": lambda: settings.LLM_MODEL_SETUP_AGENT or settings.LLM_MODEL,
}
def model_for_agent(agent_name: str) -> str:
"""Return the resolved model string for *agent_name* (for Langfuse tracking)."""
return _AGENT_MODEL_SETTINGS.get(agent_name, lambda: settings.LLM_MODEL)()
def get_agent_llm(
agent_name: str,
*,
temperature: float = 0,
) -> ChatOpenAI | ChatLiteLLM:
"""Return an LLM configured for *agent_name*, respecting per-agent overrides.
Falls back to ``settings.LLM_MODEL`` for unknown agent names or when the
per-agent override is left empty in ``.env``.
"""
model = model_for_agent(agent_name)
return get_llm(model=model, temperature=temperature)
async def embed(text: str) -> list[float]:
"""Return an embedding vector for *text*.

View File

@@ -236,10 +236,11 @@ class AgentTriggerRequest(BaseModel):
device_id: str = Field(default="")
agent_id: str | None = None # FE stable agent ID (electron-store UUID)
what_to_extract: list[str] = Field(min_length=1)
actions_by_type: dict[str, list[str]] | None = None
batch_interval: str = Field(min_length=1)
custom_agent_prompt: str = Field(min_length=1)
custom_agent_prompt: str | None = None
agent_config: dict | None = None
active_agents: int = Field(ge=0, default=0)
last_run_at: int | None = None # epoch ms from FE — enables incremental scanning
# ── Agent Run Log ─────────────────────────────────────────────────────

View File

@@ -791,7 +791,6 @@ async def test_trigger_run_local_agent_creates_run_log(client, db_session):
json={
"directory": "/home/user/docs",
"what_to_extract": ["task", "note"],
"actions_by_type": {"task": ["add", "update"], "note": ["add"]},
"batch_interval": "0 */6 * * *",
"custom_agent_prompt": "Extract tasks and notes.",
"active_agents": 0,