feat: enhance agent configuration and model management with per-agent overrides
This commit is contained in:
35
.env.example
35
.env.example
@@ -13,10 +13,45 @@ JWT_REFRESH_TOKEN_EXPIRE_DAYS=30
|
||||
# ── LLM ───────────────────────────────────────────────────────────────────────
|
||||
# LiteLLM model identifiers — change to swap providers without code changes.
|
||||
# Examples: gpt-4o, anthropic/claude-sonnet-4-20250514, gemini/gemini-pro, ollama/llama3
|
||||
#
|
||||
# API keys — only the key(s) matching your chosen provider(s) are required.
|
||||
# The correct key is picked automatically from the model prefix (e.g.
|
||||
# "anthropic/..." → ANTHROPIC_API_KEY, "gemini/..." → GOOGLE_API_KEY).
|
||||
OPENAI_API_KEY=
|
||||
ANTHROPIC_API_KEY=
|
||||
GOOGLE_API_KEY=
|
||||
CEREBRAS_API_KEY=
|
||||
|
||||
# Default model used by any agent that does not have a specific override below.
|
||||
LLM_MODEL=gpt-5-mini
|
||||
LLM_EMBED_MODEL=text-embedding-3-small
|
||||
|
||||
# GitHub Copilot — leave empty to use the LiteLLM default token directory.
|
||||
# In Docker, point this to a named-volume path so tokens survive restarts.
|
||||
# GITHUB_COPILOT_TOKEN_DIR=
|
||||
|
||||
# ── Per-agent model overrides ─────────────────────────────────────────────────
|
||||
# Leave a value empty to fall back to LLM_MODEL.
|
||||
# Each agent resolves its API key from the model prefix automatically.
|
||||
#
|
||||
# Intent classifier — routes user messages to the right domain agent.
|
||||
# A small/fast model (e.g. gpt-4o-mini) is usually sufficient here.
|
||||
LLM_MODEL_CLASSIFIER=
|
||||
|
||||
# Home-agent — handles chat from the home screen (all tools available).
|
||||
LLM_MODEL_HOME_AGENT=
|
||||
|
||||
# Floating-agent — handles contextual chat triggered from a task/project/note.
|
||||
LLM_MODEL_FLOATING_AGENT=
|
||||
|
||||
# Unified-processor — processes local directory files (local agent runner).
|
||||
LLM_MODEL_UNIFIED_PROCESSOR=
|
||||
|
||||
# Cloud-processor — fetches and processes data from cloud connectors.
|
||||
LLM_MODEL_CLOUD_PROCESSOR=
|
||||
|
||||
# Setup-agent — guided journey to build an AgentConfig via WebSocket chat.
|
||||
LLM_MODEL_SETUP_AGENT=
|
||||
|
||||
# ── Stripe (leave empty to stub billing) ──────────────────────────────────────
|
||||
STRIPE_SECRET_KEY=
|
||||
|
||||
@@ -32,9 +32,8 @@ from typing import Any
|
||||
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
|
||||
|
||||
from app.agents.filesystem_agent import make_directory_tools
|
||||
from app.config.settings import settings
|
||||
from app.core.langfuse_client import compile_prompt, extract_usage, get_langfuse, get_prompt_or_fallback
|
||||
from app.core.llm import get_llm
|
||||
from app.core.llm import get_agent_llm, model_for_agent
|
||||
from app.schemas import AgentConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -257,7 +256,7 @@ async def _call_llm_with_tools(
|
||||
else:
|
||||
messages.append(AIMessage(content=turn["content"]))
|
||||
|
||||
llm = get_llm(model=None, temperature=0.4)
|
||||
llm = get_agent_llm("setup", temperature=0.4)
|
||||
llm_with_tools = llm.bind_tools(tools)
|
||||
tool_map = {tool_def.name: tool_def for tool_def in tools}
|
||||
|
||||
@@ -278,7 +277,7 @@ async def _call_llm_with_tools(
|
||||
lf.start_as_current_observation(
|
||||
as_type="generation",
|
||||
name="journey-setup-llm",
|
||||
model=settings.LLM_MODEL,
|
||||
model=model_for_agent("setup"),
|
||||
prompt=langfuse_prompt,
|
||||
input=messages,
|
||||
)
|
||||
|
||||
@@ -177,6 +177,12 @@ async def trigger_agent_run(
|
||||
_enforce_agent_limit(current_user.tier, body.active_agents)
|
||||
await _enforce_run_frequency(current_user.tier, current_user.id, db)
|
||||
|
||||
last_run_dt = (
|
||||
datetime.fromtimestamp(body.last_run_at / 1000, tz=timezone.utc)
|
||||
if body.last_run_at
|
||||
else None
|
||||
)
|
||||
|
||||
config = LocalAgentConfig(
|
||||
id=str(uuid.uuid4()),
|
||||
user_id=current_user.id,
|
||||
@@ -184,10 +190,12 @@ async def trigger_agent_run(
|
||||
name="Local Directory Monitor",
|
||||
directory_paths=[body.directory],
|
||||
data_types=_to_data_types(body.what_to_extract),
|
||||
prompt_template=body.custom_agent_prompt,
|
||||
prompt_template=body.custom_agent_prompt or "",
|
||||
agent_config=body.agent_config,
|
||||
file_extensions=[],
|
||||
schedule_cron=body.batch_interval,
|
||||
enabled=True,
|
||||
last_run_at=last_run_dt,
|
||||
)
|
||||
|
||||
# Use the FE's stable agent_id if provided, fall back to the ephemeral config id.
|
||||
|
||||
@@ -20,6 +20,14 @@ class Settings(BaseSettings):
|
||||
LLM_MODEL: str = "gpt-4o"
|
||||
LLM_EMBED_MODEL: str = "text-embedding-3-small"
|
||||
|
||||
# Per-agent model overrides. Leave empty to fall back to LLM_MODEL.
|
||||
LLM_MODEL_CLASSIFIER: str = "" # _infer_floating_domain (intent routing)
|
||||
LLM_MODEL_HOME_AGENT: str = "" # home-agent (run_single_agent / stream)
|
||||
LLM_MODEL_FLOATING_AGENT: str = "" # floating-agent (contextual chat)
|
||||
LLM_MODEL_UNIFIED_PROCESSOR: str = "" # unified-processor (agent_runner)
|
||||
LLM_MODEL_CLOUD_PROCESSOR: str = "" # cloud-processor (agent_runner)
|
||||
LLM_MODEL_SETUP_AGENT: str = "" # agent-setup journey
|
||||
|
||||
# GitHub Copilot OAuth token storage directory.
|
||||
# Leave empty to use the LiteLLM default (~/.config/litellm/github_copilot).
|
||||
# In Docker, set this to a path backed by a named volume so tokens survive restarts.
|
||||
|
||||
@@ -43,10 +43,9 @@ from app.agents.note_agent import NOTE_TOOLS
|
||||
from app.agents.project_agent import PROJECT_TOOLS
|
||||
from app.agents.task_agent import TASK_TOOLS
|
||||
from app.agents.timeline_agent import TIMELINE_TOOLS
|
||||
from app.config.settings import settings
|
||||
from app.core.device_manager import DeviceConnectionManager
|
||||
from app.core.langfuse_client import compile_prompt, extract_usage, get_langfuse, get_prompt_or_fallback
|
||||
from app.core.llm import get_llm
|
||||
from app.core.llm import get_agent_llm, model_for_agent
|
||||
from app.core.preprocessors import detect_content_type, preprocess
|
||||
from app.core.ws_context import clear_client_executor, execute_on_client, set_client_executor
|
||||
from app.db import async_session
|
||||
@@ -74,13 +73,13 @@ _MAX_PROCESSING_STEPS: int = 12
|
||||
_MAX_SCAN_DEPTH: int = 5
|
||||
|
||||
# ── Data-type to tool mapping ─────────────────────────────────────────────
|
||||
# NOTE: "projects" is intentionally excluded — project creation/assignment is
|
||||
# handled in code by the runner, never delegated to the Step 2 LLM.
|
||||
|
||||
_DATA_TYPE_TOOLS: dict[str, list[Any]] = {
|
||||
"tasks": TASK_TOOLS,
|
||||
"notes": NOTE_TOOLS,
|
||||
"timelines": TIMELINE_TOOLS,
|
||||
"timelineEvents": TIMELINE_TOOLS,
|
||||
"projects": PROJECT_TOOLS,
|
||||
}
|
||||
|
||||
# ── V2: Unified processing prompt (hot-swappable via Langfuse "unified_processing") ──
|
||||
@@ -238,7 +237,7 @@ async def _run_agent_with_tools(
|
||||
run is appended to it (used by the caller to count ``create_*`` calls).
|
||||
"""
|
||||
lf = get_langfuse()
|
||||
llm = get_llm()
|
||||
llm = get_agent_llm(agent_name)
|
||||
llm_with_tools = llm.bind_tools(tools)
|
||||
messages: list[Any] = [
|
||||
SystemMessage(content=system_prompt),
|
||||
@@ -264,7 +263,7 @@ async def _run_agent_with_tools(
|
||||
lf.start_as_current_observation(
|
||||
as_type="generation",
|
||||
name=f"{agent_name}-llm",
|
||||
model=settings.LLM_MODEL,
|
||||
model=model_for_agent(agent_name),
|
||||
prompt=langfuse_prompt,
|
||||
input=messages,
|
||||
)
|
||||
@@ -696,6 +695,12 @@ async def run_local_agent(
|
||||
)
|
||||
items_created += file_created
|
||||
|
||||
# Refresh project list when a project was created so
|
||||
# subsequent files see it in the prompt context.
|
||||
if "create_project" in file_tool_calls:
|
||||
projects = await _fetch_projects()
|
||||
projects_block = _format_projects(projects)
|
||||
|
||||
logger.info(
|
||||
"agent_runner: run=%s file=%r created=%d result=%s",
|
||||
run_id, file_path, file_created, result_text[:200],
|
||||
|
||||
@@ -17,8 +17,7 @@ from app.agents.project_agent import PROJECT_TOOLS
|
||||
from app.agents.task_agent import TASK_TOOLS
|
||||
from app.agents.timeline_agent import TIMELINE_TOOLS
|
||||
from app.core.langfuse_client import extract_usage, get_langfuse, get_prompt_or_fallback
|
||||
from app.core.llm import get_llm
|
||||
from app.config.settings import settings
|
||||
from app.core.llm import get_agent_llm, model_for_agent
|
||||
from app.core.memory_middleware import MemoryMiddleware
|
||||
from app.core.ws_context import clear_tool_result_collector, execute_on_client, set_tool_result_collector
|
||||
from app.db import async_session
|
||||
@@ -537,7 +536,7 @@ async def _infer_floating_domain(message: str, context: dict[str, Any]) -> dict[
|
||||
}
|
||||
|
||||
try:
|
||||
llm = get_llm()
|
||||
llm = get_agent_llm("classifier")
|
||||
classifier_messages = [
|
||||
SystemMessage(content=_FLOATING_DOMAIN_CLASSIFIER_PROMPT),
|
||||
HumanMessage(
|
||||
@@ -555,7 +554,7 @@ async def _infer_floating_domain(message: str, context: dict[str, Any]) -> dict[
|
||||
with lf.start_as_current_observation(
|
||||
as_type="generation",
|
||||
name="floating-classifier",
|
||||
model=settings.LLM_MODEL,
|
||||
model=model_for_agent("classifier"),
|
||||
prompt=classifier_prompt_obj,
|
||||
input=classifier_messages,
|
||||
) as gen:
|
||||
@@ -592,7 +591,7 @@ async def _run_single_agent(
|
||||
) -> str:
|
||||
trace_id = _trace_id_from_context(context)
|
||||
lf = get_langfuse()
|
||||
llm = get_llm()
|
||||
llm = get_agent_llm(agent_name)
|
||||
tools = _all_tools_for_user(user_id, trace_id)
|
||||
model_context = _context_for_model(context)
|
||||
logger.info("deep_agent: run_single_agent_start trace=%s user=%s", trace_id or "-", user_id)
|
||||
@@ -628,7 +627,7 @@ async def _run_single_agent(
|
||||
lf.start_as_current_observation(
|
||||
as_type="generation",
|
||||
name=f"{agent_name}-llm",
|
||||
model=settings.LLM_MODEL,
|
||||
model=model_for_agent(agent_name),
|
||||
prompt=langfuse_prompt,
|
||||
input=messages,
|
||||
)
|
||||
@@ -715,7 +714,7 @@ async def _run_single_agent_stream(
|
||||
) -> AsyncGenerator[tuple[str, Any], None]:
|
||||
trace_id = _trace_id_from_context(context)
|
||||
lf = get_langfuse()
|
||||
llm = get_llm()
|
||||
llm = get_agent_llm(agent_name)
|
||||
tools = _all_tools_for_user(user_id, trace_id)
|
||||
model_context = _context_for_model(context)
|
||||
logger.info("deep_agent: run_single_agent_stream_start trace=%s user=%s", trace_id or "-", user_id)
|
||||
@@ -753,7 +752,7 @@ async def _run_single_agent_stream(
|
||||
lf.start_as_current_observation(
|
||||
as_type="generation",
|
||||
name=f"{agent_name}-llm",
|
||||
model=settings.LLM_MODEL,
|
||||
model=model_for_agent(agent_name),
|
||||
prompt=langfuse_prompt,
|
||||
input=messages,
|
||||
)
|
||||
|
||||
@@ -19,6 +19,7 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
import warnings
|
||||
from collections.abc import Callable
|
||||
|
||||
from openai import AsyncOpenAI
|
||||
import litellm
|
||||
@@ -95,6 +96,35 @@ def get_llm(
|
||||
)
|
||||
|
||||
|
||||
_AGENT_MODEL_SETTINGS: dict[str, Callable[[], str]] = {
|
||||
"classifier": lambda: settings.LLM_MODEL_CLASSIFIER or settings.LLM_MODEL,
|
||||
"home-agent": lambda: settings.LLM_MODEL_HOME_AGENT or settings.LLM_MODEL,
|
||||
"floating-agent": lambda: settings.LLM_MODEL_FLOATING_AGENT or settings.LLM_MODEL,
|
||||
"unified-processor": lambda: settings.LLM_MODEL_UNIFIED_PROCESSOR or settings.LLM_MODEL,
|
||||
"cloud-processor": lambda: settings.LLM_MODEL_CLOUD_PROCESSOR or settings.LLM_MODEL,
|
||||
"setup": lambda: settings.LLM_MODEL_SETUP_AGENT or settings.LLM_MODEL,
|
||||
}
|
||||
|
||||
|
||||
def model_for_agent(agent_name: str) -> str:
|
||||
"""Return the resolved model string for *agent_name* (for Langfuse tracking)."""
|
||||
return _AGENT_MODEL_SETTINGS.get(agent_name, lambda: settings.LLM_MODEL)()
|
||||
|
||||
|
||||
def get_agent_llm(
|
||||
agent_name: str,
|
||||
*,
|
||||
temperature: float = 0,
|
||||
) -> ChatOpenAI | ChatLiteLLM:
|
||||
"""Return an LLM configured for *agent_name*, respecting per-agent overrides.
|
||||
|
||||
Falls back to ``settings.LLM_MODEL`` for unknown agent names or when the
|
||||
per-agent override is left empty in ``.env``.
|
||||
"""
|
||||
model = model_for_agent(agent_name)
|
||||
return get_llm(model=model, temperature=temperature)
|
||||
|
||||
|
||||
async def embed(text: str) -> list[float]:
|
||||
"""Return an embedding vector for *text*.
|
||||
|
||||
|
||||
@@ -236,10 +236,11 @@ class AgentTriggerRequest(BaseModel):
|
||||
device_id: str = Field(default="")
|
||||
agent_id: str | None = None # FE stable agent ID (electron-store UUID)
|
||||
what_to_extract: list[str] = Field(min_length=1)
|
||||
actions_by_type: dict[str, list[str]] | None = None
|
||||
batch_interval: str = Field(min_length=1)
|
||||
custom_agent_prompt: str = Field(min_length=1)
|
||||
custom_agent_prompt: str | None = None
|
||||
agent_config: dict | None = None
|
||||
active_agents: int = Field(ge=0, default=0)
|
||||
last_run_at: int | None = None # epoch ms from FE — enables incremental scanning
|
||||
|
||||
|
||||
# ── Agent Run Log ─────────────────────────────────────────────────────
|
||||
|
||||
@@ -791,7 +791,6 @@ async def test_trigger_run_local_agent_creates_run_log(client, db_session):
|
||||
json={
|
||||
"directory": "/home/user/docs",
|
||||
"what_to_extract": ["task", "note"],
|
||||
"actions_by_type": {"task": ["add", "update"], "note": ["add"]},
|
||||
"batch_interval": "0 */6 * * *",
|
||||
"custom_agent_prompt": "Extract tasks and notes.",
|
||||
"active_agents": 0,
|
||||
|
||||
Reference in New Issue
Block a user