contextual_request + contextual_scope_update are the only WS flows for ad-hoc contextual chat now. Floating system prompt constant removed; Langfuse 'floating_system' is deleted in a separate manual step. Also removes floating-agent LLM slot from llm.py and the associated LLM_MODEL_FLOATING_AGENT setting entry.
157 lines
6.1 KiB
Python
157 lines
6.1 KiB
Python
"""LLM factory — centralised model instantiation via LiteLLM.
|
||
|
||
Every agent and the orchestrator call ``get_llm()``
|
||
instead of directly constructing a provider-specific class. The model string
|
||
follows the `LiteLLM model naming convention
|
||
<https://docs.litellm.ai/docs/providers>`_:
|
||
|
||
* OpenAI: ``gpt-4o``, ``gpt-4o-mini``
|
||
* Anthropic: ``anthropic/claude-3.5-sonnet``
|
||
* Google: ``gemini/gemini-pro``
|
||
* Ollama: ``ollama/llama3``
|
||
* Bedrock: ``bedrock/anthropic.claude-v2``
|
||
|
||
Switch providers by changing **LLM_MODEL** in ``.env``
|
||
— no code changes required.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import os
|
||
import warnings
|
||
from collections.abc import Callable
|
||
|
||
from openai import AsyncOpenAI
|
||
import litellm
|
||
|
||
from langchain_openai import ChatOpenAI
|
||
from langchain_litellm import ChatLiteLLM
|
||
from litellm import get_supported_openai_params # noqa: F401 – validates install
|
||
|
||
from app.config.settings import settings
|
||
|
||
# Some models (e.g. gpt-5, o-series) reject unsupported params like temperature.
|
||
# Drop them silently instead of raising UnsupportedParamsError.
|
||
litellm.drop_params = True
|
||
|
||
# Some provider responses include a plain dict in the `usage` field where a
|
||
# richer Pydantic model is expected. This warning is noisy but non-fatal.
|
||
warnings.filterwarnings(
|
||
"ignore",
|
||
message=r"PydanticSerializationUnexpectedValue\(Expected `ResponseAPIUsage`",
|
||
category=UserWarning,
|
||
)
|
||
|
||
|
||
def _api_key_for_model(model: str) -> str | None:
|
||
"""Return the most appropriate API key for the given LiteLLM model string."""
|
||
if model.startswith("anthropic/"):
|
||
return settings.ANTHROPIC_API_KEY or None
|
||
if model.startswith("gemini/") or model.startswith("google/"):
|
||
return settings.GOOGLE_API_KEY or None
|
||
if model.startswith("cerebras/"):
|
||
return settings.CEREBRAS_API_KEY or None
|
||
if model.startswith("groq/"):
|
||
return settings.GROQ_API_KEY or None
|
||
if model.startswith("deepseek/"):
|
||
return settings.DEEPSEEK_API_KEY or None
|
||
if model.startswith("github_copilot/"):
|
||
# GitHub Copilot uses OAuth device-flow tokens managed by LiteLLM.
|
||
# No API key is required; returning None lets LiteLLM handle auth.
|
||
return None
|
||
# Default: OpenAI-compatible (covers plain model names like "gpt-4o")
|
||
return settings.OPENAI_API_KEY or None
|
||
|
||
|
||
def get_llm(
|
||
*,
|
||
model: str | None = None,
|
||
temperature: float = 0,
|
||
) -> ChatOpenAI | ChatLiteLLM:
|
||
"""Return a LangChain chat model backed by LiteLLM.
|
||
|
||
LiteLLM exposes an OpenAI-compatible API, so we use ``ChatOpenAI`` pointed
|
||
at the LiteLLM proxy endpoint. In practice, ``litellm`` patches the
|
||
``openai`` client transparently when the model string contains a provider
|
||
prefix (``anthropic/…``, ``gemini/…``, etc.).
|
||
|
||
Parameters
|
||
----------
|
||
model:
|
||
LiteLLM model identifier. Defaults to ``settings.LLM_MODEL``.
|
||
temperature:
|
||
Sampling temperature. ``0`` = deterministic.
|
||
"""
|
||
model = model or settings.LLM_MODEL
|
||
|
||
# Point LiteLLM to the custom token directory when configured.
|
||
if settings.GITHUB_COPILOT_TOKEN_DIR:
|
||
os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR)
|
||
|
||
# Use ChatLiteLLM for provider-prefixed models (github_copilot/, anthropic/, etc.)
|
||
# so LiteLLM handles routing and auth. ChatOpenAI for plain OpenAI model names.
|
||
if "/" in model:
|
||
return ChatLiteLLM(model=model, temperature=temperature)
|
||
|
||
return ChatOpenAI(
|
||
model=model,
|
||
temperature=temperature,
|
||
api_key=_api_key_for_model(model),
|
||
)
|
||
|
||
|
||
_AGENT_MODEL_SETTINGS: dict[str, Callable[[], str]] = {
|
||
"classifier": lambda: settings.LLM_MODEL_CLASSIFIER or settings.LLM_MODEL,
|
||
"home-agent": lambda: settings.LLM_MODEL_HOME_AGENT or settings.LLM_MODEL,
|
||
"unified-processor": lambda: settings.LLM_MODEL_UNIFIED_PROCESSOR or settings.LLM_MODEL,
|
||
"cloud-processor": lambda: settings.LLM_MODEL_CLOUD_PROCESSOR or settings.LLM_MODEL,
|
||
"brief-agent": lambda: settings.LLM_MODEL_BRIEF_AGENT or settings.LLM_MODEL,
|
||
"task-brief-agent": lambda: settings.LLM_MODEL_TASK_BRIEF_AGENT or settings.LLM_MODEL,
|
||
"setup": lambda: settings.LLM_MODEL_SETUP_AGENT or settings.LLM_MODEL,
|
||
"memory-extractor": lambda: settings.LLM_MODEL_MEMORY_EXTRACTOR or "gpt-4o-mini",
|
||
"memory-miner": lambda: settings.LLM_MODEL_MEMORY_MINER or "gpt-4o-mini",
|
||
"memory-auditor": lambda: settings.LLM_MODEL_MEMORY_AUDITOR or settings.LLM_MODEL,
|
||
"note-summarizer": lambda: "gpt-4o-mini",
|
||
}
|
||
|
||
|
||
def model_for_agent(agent_name: str) -> str:
|
||
"""Return the resolved model string for *agent_name* (for Langfuse tracking)."""
|
||
return _AGENT_MODEL_SETTINGS.get(agent_name, lambda: settings.LLM_MODEL)()
|
||
|
||
|
||
def get_agent_llm(
|
||
agent_name: str,
|
||
*,
|
||
temperature: float = 0,
|
||
) -> ChatOpenAI | ChatLiteLLM:
|
||
"""Return an LLM configured for *agent_name*, respecting per-agent overrides.
|
||
|
||
Falls back to ``settings.LLM_MODEL`` for unknown agent names or when the
|
||
per-agent override is left empty in ``.env``.
|
||
"""
|
||
model = model_for_agent(agent_name)
|
||
return get_llm(model=model, temperature=temperature)
|
||
|
||
|
||
async def embed(text: str) -> list[float]:
|
||
"""Return an embedding vector for *text*.
|
||
|
||
Uses ``settings.LLM_EMBED_MODEL`` so the same provider switch in ``.env``
|
||
(e.g. ``github_copilot/text-embedding-3-small``) applies here without any
|
||
code changes. Falls back to the raw AsyncOpenAI client for plain OpenAI
|
||
model names to preserve existing behaviour.
|
||
"""
|
||
model = settings.LLM_EMBED_MODEL
|
||
|
||
if model.startswith("github_copilot/") or "/" in model:
|
||
# Use LiteLLM for all provider-prefixed models (Copilot, Bedrock, etc.)
|
||
# so the provider's auth mechanism is applied correctly.
|
||
response = await litellm.aembedding(model=model, input=[text])
|
||
return response.data[0]["embedding"]
|
||
|
||
# Plain OpenAI model name — use the raw AsyncOpenAI client (existing path).
|
||
client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY)
|
||
response = await client.embeddings.create(model=model, input=text)
|
||
return response.data[0].embedding
|