"""LLM factory — centralised model instantiation via LiteLLM. Every agent and the orchestrator call ``get_llm()`` instead of directly constructing a provider-specific class. The model string follows the `LiteLLM model naming convention `_: * OpenAI: ``gpt-4o``, ``gpt-4o-mini`` * Anthropic: ``anthropic/claude-3.5-sonnet`` * Google: ``gemini/gemini-pro`` * Ollama: ``ollama/llama3`` * Bedrock: ``bedrock/anthropic.claude-v2`` Switch providers by changing **LLM_MODEL** in ``.env`` — no code changes required. """ from __future__ import annotations import os import warnings from collections.abc import Callable from openai import AsyncOpenAI import litellm from langchain_openai import ChatOpenAI from langchain_litellm import ChatLiteLLM from litellm import get_supported_openai_params # noqa: F401 – validates install from app.config.settings import settings # Some models (e.g. gpt-5, o-series) reject unsupported params like temperature. # Drop them silently instead of raising UnsupportedParamsError. litellm.drop_params = True # Some provider responses include a plain dict in the `usage` field where a # richer Pydantic model is expected. This warning is noisy but non-fatal. warnings.filterwarnings( "ignore", message=r"PydanticSerializationUnexpectedValue\(Expected `ResponseAPIUsage`", category=UserWarning, ) def _api_key_for_model(model: str) -> str | None: """Return the most appropriate API key for the given LiteLLM model string.""" if model.startswith("anthropic/"): return settings.ANTHROPIC_API_KEY or None if model.startswith("gemini/") or model.startswith("google/"): return settings.GOOGLE_API_KEY or None if model.startswith("cerebras/"): return settings.CEREBRAS_API_KEY or None if model.startswith("groq/"): return settings.GROQ_API_KEY or None if model.startswith("deepseek/"): return settings.DEEPSEEK_API_KEY or None if model.startswith("github_copilot/"): # GitHub Copilot uses OAuth device-flow tokens managed by LiteLLM. # No API key is required; returning None lets LiteLLM handle auth. return None # Default: OpenAI-compatible (covers plain model names like "gpt-4o") return settings.OPENAI_API_KEY or None def get_llm( *, model: str | None = None, temperature: float = 0, ) -> ChatOpenAI | ChatLiteLLM: """Return a LangChain chat model backed by LiteLLM. LiteLLM exposes an OpenAI-compatible API, so we use ``ChatOpenAI`` pointed at the LiteLLM proxy endpoint. In practice, ``litellm`` patches the ``openai`` client transparently when the model string contains a provider prefix (``anthropic/…``, ``gemini/…``, etc.). Parameters ---------- model: LiteLLM model identifier. Defaults to ``settings.LLM_MODEL``. temperature: Sampling temperature. ``0`` = deterministic. """ model = model or settings.LLM_MODEL # Point LiteLLM to the custom token directory when configured. if settings.GITHUB_COPILOT_TOKEN_DIR: os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR) # Use ChatLiteLLM for provider-prefixed models (github_copilot/, anthropic/, etc.) # so LiteLLM handles routing and auth. ChatOpenAI for plain OpenAI model names. if "/" in model: return ChatLiteLLM(model=model, temperature=temperature) return ChatOpenAI( model=model, temperature=temperature, api_key=_api_key_for_model(model), ) _AGENT_MODEL_SETTINGS: dict[str, Callable[[], str]] = { "classifier": lambda: settings.LLM_MODEL_CLASSIFIER or settings.LLM_MODEL, "home-agent": lambda: settings.LLM_MODEL_HOME_AGENT or settings.LLM_MODEL, "floating-agent": lambda: settings.LLM_MODEL_FLOATING_AGENT or settings.LLM_MODEL, "unified-processor": lambda: settings.LLM_MODEL_UNIFIED_PROCESSOR or settings.LLM_MODEL, "cloud-processor": lambda: settings.LLM_MODEL_CLOUD_PROCESSOR or settings.LLM_MODEL, "brief-agent": lambda: settings.LLM_MODEL_BRIEF_AGENT or settings.LLM_MODEL, "task-brief-agent": lambda: settings.LLM_MODEL_TASK_BRIEF_AGENT or settings.LLM_MODEL, "setup": lambda: settings.LLM_MODEL_SETUP_AGENT or settings.LLM_MODEL, "memory-extractor": lambda: settings.LLM_MODEL_MEMORY_EXTRACTOR or "gpt-4o-mini", "memory-miner": lambda: settings.LLM_MODEL_MEMORY_MINER or "gpt-4o-mini", "memory-auditor": lambda: settings.LLM_MODEL_MEMORY_AUDITOR or settings.LLM_MODEL, "note-summarizer": lambda: "gpt-4o-mini", } def model_for_agent(agent_name: str) -> str: """Return the resolved model string for *agent_name* (for Langfuse tracking).""" return _AGENT_MODEL_SETTINGS.get(agent_name, lambda: settings.LLM_MODEL)() def get_agent_llm( agent_name: str, *, temperature: float = 0, ) -> ChatOpenAI | ChatLiteLLM: """Return an LLM configured for *agent_name*, respecting per-agent overrides. Falls back to ``settings.LLM_MODEL`` for unknown agent names or when the per-agent override is left empty in ``.env``. """ model = model_for_agent(agent_name) return get_llm(model=model, temperature=temperature) async def embed(text: str) -> list[float]: """Return an embedding vector for *text*. Uses ``settings.LLM_EMBED_MODEL`` so the same provider switch in ``.env`` (e.g. ``github_copilot/text-embedding-3-small``) applies here without any code changes. Falls back to the raw AsyncOpenAI client for plain OpenAI model names to preserve existing behaviour. """ model = settings.LLM_EMBED_MODEL if model.startswith("github_copilot/") or "/" in model: # Use LiteLLM for all provider-prefixed models (Copilot, Bedrock, etc.) # so the provider's auth mechanism is applied correctly. response = await litellm.aembedding(model=model, input=[text]) return response.data[0]["embedding"] # Plain OpenAI model name — use the raw AsyncOpenAI client (existing path). client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY) response = await client.embeddings.create(model=model, input=text) return response.data[0].embedding