"""LLM factory — centralised model instantiation via LiteLLM. Every agent and the orchestrator call ``get_llm()`` or ``get_router_llm()`` instead of directly constructing a provider-specific class. The model string follows the `LiteLLM model naming convention `_: * OpenAI: ``gpt-4o``, ``gpt-4o-mini`` * Anthropic: ``anthropic/claude-3.5-sonnet`` * Google: ``gemini/gemini-pro`` * Ollama: ``ollama/llama3`` * Bedrock: ``bedrock/anthropic.claude-v2`` Switch providers by changing **LLM_MODEL** / **LLM_ROUTER_MODEL** in ``.env`` — no code changes required. """ from __future__ import annotations import os from openai import AsyncOpenAI import litellm from langchain_openai import ChatOpenAI from litellm import get_supported_openai_params # noqa: F401 – validates install from app.config.settings import settings def _api_key_for_model(model: str) -> str | None: """Return the most appropriate API key for the given LiteLLM model string.""" if model.startswith("anthropic/"): return settings.ANTHROPIC_API_KEY or None if model.startswith("gemini/") or model.startswith("google/"): return settings.GOOGLE_API_KEY or None if model.startswith("cerebras/"): return settings.CEREBRAS_API_KEY or None if model.startswith("github_copilot/"): # GitHub Copilot uses OAuth device-flow tokens managed by LiteLLM. # No API key is required; returning None lets LiteLLM handle auth. return None # Default: OpenAI-compatible (covers plain model names like "gpt-4o") return settings.OPENAI_API_KEY or None def get_llm( *, model: str | None = None, temperature: float = 0, ) -> ChatOpenAI: """Return a LangChain chat model backed by LiteLLM. LiteLLM exposes an OpenAI-compatible API, so we use ``ChatOpenAI`` pointed at the LiteLLM proxy endpoint. In practice, ``litellm`` patches the ``openai`` client transparently when the model string contains a provider prefix (``anthropic/…``, ``gemini/…``, etc.). Parameters ---------- model: LiteLLM model identifier. Defaults to ``settings.LLM_MODEL``. temperature: Sampling temperature. ``0`` = deterministic. """ model = model or settings.LLM_MODEL # Point LiteLLM to the custom token directory when configured. if settings.GITHUB_COPILOT_TOKEN_DIR: os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR) return ChatOpenAI( model=model, temperature=temperature, api_key=_api_key_for_model(model), ) def get_router_llm( *, temperature: float = 0, ) -> ChatOpenAI: """Return the lighter model used for intent classification / routing.""" return get_llm(model=settings.LLM_ROUTER_MODEL, temperature=temperature) async def embed(text: str) -> list[float]: """Return an embedding vector for *text*. Uses ``settings.LLM_EMBED_MODEL`` so the same provider switch in ``.env`` (e.g. ``github_copilot/text-embedding-3-small``) applies here without any code changes. Falls back to the raw AsyncOpenAI client for plain OpenAI model names to preserve existing behaviour. """ model = settings.LLM_EMBED_MODEL if model.startswith("github_copilot/") or "/" in model: # Use LiteLLM for all provider-prefixed models (Copilot, Bedrock, etc.) # so the provider's auth mechanism is applied correctly. response = await litellm.aembedding(model=model, input=[text]) return response.data[0]["embedding"] # Plain OpenAI model name — use the raw AsyncOpenAI client (existing path). client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY) response = await client.embeddings.create(model=model, input=text) return response.data[0].embedding