api/app/core/llm.py

"""LLM factory — centralised model instantiation via LiteLLM.

Every agent and the orchestrator call ``get_llm()`` or ``get_router_llm()``
instead of directly constructing a provider-specific class.  The model string
follows the `LiteLLM model naming convention
<https://docs.litellm.ai/docs/providers>`_:

* OpenAI:     ``gpt-4o``, ``gpt-4o-mini``
* Anthropic:  ``anthropic/claude-3.5-sonnet``
* Google:     ``gemini/gemini-pro``
* Ollama:     ``ollama/llama3``
* Bedrock:    ``bedrock/anthropic.claude-v2``

Switch providers by changing **LLM_MODEL** / **LLM_ROUTER_MODEL** in ``.env``
— no code changes required.
"""

from __future__ import annotations

from langchain_openai import ChatOpenAI
from litellm import get_supported_openai_params  # noqa: F401 – validates install

from app.config.settings import settings


def _api_key_for_model(model: str) -> str | None:
    """Return the most appropriate API key for the given LiteLLM model string."""
    if model.startswith("anthropic/"):
        return getattr(settings, "ANTHROPIC_API_KEY", None) or None
    if model.startswith("gemini/") or model.startswith("google/"):
        return getattr(settings, "GOOGLE_API_KEY", None) or None
    # Default: OpenAI-compatible (covers plain model names like "gpt-4o")
    return settings.OPENAI_API_KEY or None


def get_llm(
    *,
    model: str | None = None,
    temperature: float = 0,
) -> ChatOpenAI:
    """Return a LangChain chat model backed by LiteLLM.

    LiteLLM exposes an OpenAI-compatible API, so we use ``ChatOpenAI`` pointed
    at the LiteLLM proxy endpoint.  In practice, ``litellm`` patches the
    ``openai`` client transparently when the model string contains a provider
    prefix (``anthropic/…``, ``gemini/…``, etc.).

    Parameters
    ----------
    model:
        LiteLLM model identifier. Defaults to ``settings.LLM_MODEL``.
    temperature:
        Sampling temperature.  ``0`` = deterministic.
    """
    model = model or settings.LLM_MODEL
    return ChatOpenAI(
        model=model,
        temperature=temperature,
        api_key=_api_key_for_model(model),
    )


def get_router_llm(
    *,
    temperature: float = 0,
) -> ChatOpenAI:
    """Return the lighter model used for intent classification / routing."""
    return get_llm(model=settings.LLM_ROUTER_MODEL, temperature=temperature)