diff --git a/.env.example b/.env.example index d8d134d..40e18c4 100644 --- a/.env.example +++ b/.env.example @@ -13,10 +13,45 @@ JWT_REFRESH_TOKEN_EXPIRE_DAYS=30 # ── LLM ─────────────────────────────────────────────────────────────────────── # LiteLLM model identifiers — change to swap providers without code changes. # Examples: gpt-4o, anthropic/claude-sonnet-4-20250514, gemini/gemini-pro, ollama/llama3 +# +# API keys — only the key(s) matching your chosen provider(s) are required. +# The correct key is picked automatically from the model prefix (e.g. +# "anthropic/..." → ANTHROPIC_API_KEY, "gemini/..." → GOOGLE_API_KEY). OPENAI_API_KEY= ANTHROPIC_API_KEY= GOOGLE_API_KEY= +CEREBRAS_API_KEY= + +# Default model used by any agent that does not have a specific override below. LLM_MODEL=gpt-5-mini +LLM_EMBED_MODEL=text-embedding-3-small + +# GitHub Copilot — leave empty to use the LiteLLM default token directory. +# In Docker, point this to a named-volume path so tokens survive restarts. +# GITHUB_COPILOT_TOKEN_DIR= + +# ── Per-agent model overrides ───────────────────────────────────────────────── +# Leave a value empty to fall back to LLM_MODEL. +# Each agent resolves its API key from the model prefix automatically. +# +# Intent classifier — routes user messages to the right domain agent. +# A small/fast model (e.g. gpt-4o-mini) is usually sufficient here. +LLM_MODEL_CLASSIFIER= + +# Home-agent — handles chat from the home screen (all tools available). +LLM_MODEL_HOME_AGENT= + +# Floating-agent — handles contextual chat triggered from a task/project/note. +LLM_MODEL_FLOATING_AGENT= + +# Unified-processor — processes local directory files (local agent runner). +LLM_MODEL_UNIFIED_PROCESSOR= + +# Cloud-processor — fetches and processes data from cloud connectors. +LLM_MODEL_CLOUD_PROCESSOR= + +# Setup-agent — guided journey to build an AgentConfig via WebSocket chat. +LLM_MODEL_SETUP_AGENT= # ── Stripe (leave empty to stub billing) ────────────────────────────────────── STRIPE_SECRET_KEY= diff --git a/app/api/routes/agent_setup.py b/app/api/routes/agent_setup.py index b54cea7..d833632 100644 --- a/app/api/routes/agent_setup.py +++ b/app/api/routes/agent_setup.py @@ -32,9 +32,8 @@ from typing import Any from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage from app.agents.filesystem_agent import make_directory_tools -from app.config.settings import settings from app.core.langfuse_client import compile_prompt, extract_usage, get_langfuse, get_prompt_or_fallback -from app.core.llm import get_llm +from app.core.llm import get_agent_llm, model_for_agent from app.schemas import AgentConfig logger = logging.getLogger(__name__) @@ -257,7 +256,7 @@ async def _call_llm_with_tools( else: messages.append(AIMessage(content=turn["content"])) - llm = get_llm(model=None, temperature=0.4) + llm = get_agent_llm("setup", temperature=0.4) llm_with_tools = llm.bind_tools(tools) tool_map = {tool_def.name: tool_def for tool_def in tools} @@ -278,7 +277,7 @@ async def _call_llm_with_tools( lf.start_as_current_observation( as_type="generation", name="journey-setup-llm", - model=settings.LLM_MODEL, + model=model_for_agent("setup"), prompt=langfuse_prompt, input=messages, ) diff --git a/app/api/routes/agents.py b/app/api/routes/agents.py index 30ecfc9..0a66a65 100644 --- a/app/api/routes/agents.py +++ b/app/api/routes/agents.py @@ -177,6 +177,12 @@ async def trigger_agent_run( _enforce_agent_limit(current_user.tier, body.active_agents) await _enforce_run_frequency(current_user.tier, current_user.id, db) + last_run_dt = ( + datetime.fromtimestamp(body.last_run_at / 1000, tz=timezone.utc) + if body.last_run_at + else None + ) + config = LocalAgentConfig( id=str(uuid.uuid4()), user_id=current_user.id, @@ -184,10 +190,12 @@ async def trigger_agent_run( name="Local Directory Monitor", directory_paths=[body.directory], data_types=_to_data_types(body.what_to_extract), - prompt_template=body.custom_agent_prompt, + prompt_template=body.custom_agent_prompt or "", + agent_config=body.agent_config, file_extensions=[], schedule_cron=body.batch_interval, enabled=True, + last_run_at=last_run_dt, ) # Use the FE's stable agent_id if provided, fall back to the ephemeral config id. diff --git a/app/config/settings.py b/app/config/settings.py index 823c5d1..f9eeabd 100644 --- a/app/config/settings.py +++ b/app/config/settings.py @@ -20,6 +20,14 @@ class Settings(BaseSettings): LLM_MODEL: str = "gpt-4o" LLM_EMBED_MODEL: str = "text-embedding-3-small" + # Per-agent model overrides. Leave empty to fall back to LLM_MODEL. + LLM_MODEL_CLASSIFIER: str = "" # _infer_floating_domain (intent routing) + LLM_MODEL_HOME_AGENT: str = "" # home-agent (run_single_agent / stream) + LLM_MODEL_FLOATING_AGENT: str = "" # floating-agent (contextual chat) + LLM_MODEL_UNIFIED_PROCESSOR: str = "" # unified-processor (agent_runner) + LLM_MODEL_CLOUD_PROCESSOR: str = "" # cloud-processor (agent_runner) + LLM_MODEL_SETUP_AGENT: str = "" # agent-setup journey + # GitHub Copilot OAuth token storage directory. # Leave empty to use the LiteLLM default (~/.config/litellm/github_copilot). # In Docker, set this to a path backed by a named volume so tokens survive restarts. diff --git a/app/core/agent_runner.py b/app/core/agent_runner.py index 072bf7b..a91d1da 100644 --- a/app/core/agent_runner.py +++ b/app/core/agent_runner.py @@ -43,10 +43,9 @@ from app.agents.note_agent import NOTE_TOOLS from app.agents.project_agent import PROJECT_TOOLS from app.agents.task_agent import TASK_TOOLS from app.agents.timeline_agent import TIMELINE_TOOLS -from app.config.settings import settings from app.core.device_manager import DeviceConnectionManager from app.core.langfuse_client import compile_prompt, extract_usage, get_langfuse, get_prompt_or_fallback -from app.core.llm import get_llm +from app.core.llm import get_agent_llm, model_for_agent from app.core.preprocessors import detect_content_type, preprocess from app.core.ws_context import clear_client_executor, execute_on_client, set_client_executor from app.db import async_session @@ -74,13 +73,13 @@ _MAX_PROCESSING_STEPS: int = 12 _MAX_SCAN_DEPTH: int = 5 # ── Data-type to tool mapping ───────────────────────────────────────────── -# NOTE: "projects" is intentionally excluded — project creation/assignment is -# handled in code by the runner, never delegated to the Step 2 LLM. _DATA_TYPE_TOOLS: dict[str, list[Any]] = { "tasks": TASK_TOOLS, "notes": NOTE_TOOLS, "timelines": TIMELINE_TOOLS, + "timelineEvents": TIMELINE_TOOLS, + "projects": PROJECT_TOOLS, } # ── V2: Unified processing prompt (hot-swappable via Langfuse "unified_processing") ── @@ -238,7 +237,7 @@ async def _run_agent_with_tools( run is appended to it (used by the caller to count ``create_*`` calls). """ lf = get_langfuse() - llm = get_llm() + llm = get_agent_llm(agent_name) llm_with_tools = llm.bind_tools(tools) messages: list[Any] = [ SystemMessage(content=system_prompt), @@ -264,7 +263,7 @@ async def _run_agent_with_tools( lf.start_as_current_observation( as_type="generation", name=f"{agent_name}-llm", - model=settings.LLM_MODEL, + model=model_for_agent(agent_name), prompt=langfuse_prompt, input=messages, ) @@ -696,6 +695,12 @@ async def run_local_agent( ) items_created += file_created + # Refresh project list when a project was created so + # subsequent files see it in the prompt context. + if "create_project" in file_tool_calls: + projects = await _fetch_projects() + projects_block = _format_projects(projects) + logger.info( "agent_runner: run=%s file=%r created=%d result=%s", run_id, file_path, file_created, result_text[:200], diff --git a/app/core/deep_agent.py b/app/core/deep_agent.py index 38e85d3..44a7d1d 100644 --- a/app/core/deep_agent.py +++ b/app/core/deep_agent.py @@ -17,8 +17,7 @@ from app.agents.project_agent import PROJECT_TOOLS from app.agents.task_agent import TASK_TOOLS from app.agents.timeline_agent import TIMELINE_TOOLS from app.core.langfuse_client import extract_usage, get_langfuse, get_prompt_or_fallback -from app.core.llm import get_llm -from app.config.settings import settings +from app.core.llm import get_agent_llm, model_for_agent from app.core.memory_middleware import MemoryMiddleware from app.core.ws_context import clear_tool_result_collector, execute_on_client, set_tool_result_collector from app.db import async_session @@ -537,7 +536,7 @@ async def _infer_floating_domain(message: str, context: dict[str, Any]) -> dict[ } try: - llm = get_llm() + llm = get_agent_llm("classifier") classifier_messages = [ SystemMessage(content=_FLOATING_DOMAIN_CLASSIFIER_PROMPT), HumanMessage( @@ -555,7 +554,7 @@ async def _infer_floating_domain(message: str, context: dict[str, Any]) -> dict[ with lf.start_as_current_observation( as_type="generation", name="floating-classifier", - model=settings.LLM_MODEL, + model=model_for_agent("classifier"), prompt=classifier_prompt_obj, input=classifier_messages, ) as gen: @@ -592,7 +591,7 @@ async def _run_single_agent( ) -> str: trace_id = _trace_id_from_context(context) lf = get_langfuse() - llm = get_llm() + llm = get_agent_llm(agent_name) tools = _all_tools_for_user(user_id, trace_id) model_context = _context_for_model(context) logger.info("deep_agent: run_single_agent_start trace=%s user=%s", trace_id or "-", user_id) @@ -628,7 +627,7 @@ async def _run_single_agent( lf.start_as_current_observation( as_type="generation", name=f"{agent_name}-llm", - model=settings.LLM_MODEL, + model=model_for_agent(agent_name), prompt=langfuse_prompt, input=messages, ) @@ -715,7 +714,7 @@ async def _run_single_agent_stream( ) -> AsyncGenerator[tuple[str, Any], None]: trace_id = _trace_id_from_context(context) lf = get_langfuse() - llm = get_llm() + llm = get_agent_llm(agent_name) tools = _all_tools_for_user(user_id, trace_id) model_context = _context_for_model(context) logger.info("deep_agent: run_single_agent_stream_start trace=%s user=%s", trace_id or "-", user_id) @@ -753,7 +752,7 @@ async def _run_single_agent_stream( lf.start_as_current_observation( as_type="generation", name=f"{agent_name}-llm", - model=settings.LLM_MODEL, + model=model_for_agent(agent_name), prompt=langfuse_prompt, input=messages, ) diff --git a/app/core/llm.py b/app/core/llm.py index 1787ce9..d833bf4 100644 --- a/app/core/llm.py +++ b/app/core/llm.py @@ -19,6 +19,7 @@ from __future__ import annotations import os import warnings +from collections.abc import Callable from openai import AsyncOpenAI import litellm @@ -95,6 +96,35 @@ def get_llm( ) +_AGENT_MODEL_SETTINGS: dict[str, Callable[[], str]] = { + "classifier": lambda: settings.LLM_MODEL_CLASSIFIER or settings.LLM_MODEL, + "home-agent": lambda: settings.LLM_MODEL_HOME_AGENT or settings.LLM_MODEL, + "floating-agent": lambda: settings.LLM_MODEL_FLOATING_AGENT or settings.LLM_MODEL, + "unified-processor": lambda: settings.LLM_MODEL_UNIFIED_PROCESSOR or settings.LLM_MODEL, + "cloud-processor": lambda: settings.LLM_MODEL_CLOUD_PROCESSOR or settings.LLM_MODEL, + "setup": lambda: settings.LLM_MODEL_SETUP_AGENT or settings.LLM_MODEL, +} + + +def model_for_agent(agent_name: str) -> str: + """Return the resolved model string for *agent_name* (for Langfuse tracking).""" + return _AGENT_MODEL_SETTINGS.get(agent_name, lambda: settings.LLM_MODEL)() + + +def get_agent_llm( + agent_name: str, + *, + temperature: float = 0, +) -> ChatOpenAI | ChatLiteLLM: + """Return an LLM configured for *agent_name*, respecting per-agent overrides. + + Falls back to ``settings.LLM_MODEL`` for unknown agent names or when the + per-agent override is left empty in ``.env``. + """ + model = model_for_agent(agent_name) + return get_llm(model=model, temperature=temperature) + + async def embed(text: str) -> list[float]: """Return an embedding vector for *text*. diff --git a/app/schemas.py b/app/schemas.py index d0301fd..80996ba 100644 --- a/app/schemas.py +++ b/app/schemas.py @@ -236,10 +236,11 @@ class AgentTriggerRequest(BaseModel): device_id: str = Field(default="") agent_id: str | None = None # FE stable agent ID (electron-store UUID) what_to_extract: list[str] = Field(min_length=1) - actions_by_type: dict[str, list[str]] | None = None batch_interval: str = Field(min_length=1) - custom_agent_prompt: str = Field(min_length=1) + custom_agent_prompt: str | None = None + agent_config: dict | None = None active_agents: int = Field(ge=0, default=0) + last_run_at: int | None = None # epoch ms from FE — enables incremental scanning # ── Agent Run Log ───────────────────────────────────────────────────── diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py index 2764f77..ee46b55 100644 --- a/tests/test_agent_runner.py +++ b/tests/test_agent_runner.py @@ -791,7 +791,6 @@ async def test_trigger_run_local_agent_creates_run_log(client, db_session): json={ "directory": "/home/user/docs", "what_to_extract": ["task", "note"], - "actions_by_type": {"task": ["add", "update"], "note": ["add"]}, "batch_interval": "0 */6 * * *", "custom_agent_prompt": "Extract tasks and notes.", "active_agents": 0,