- Langfuse SDK v4: fix prompt-to-trace linking (as_type=generation) - tracing: compile_prompt with Langfuse managed prompt fallback - journey: remove journey CLI subcommand (keep only interactive) - LLM: add service-specific llm modules for batch-agent and chat - gitignore: exclude eval private test data - config: add LANGFUSE settings to shared config
78 lines
2.2 KiB
Python
78 lines
2.2 KiB
Python
"""LLM factory — centralised model instantiation via LiteLLM.
|
|
|
|
Adapted from app/core/llm.py for the Chat Service.
|
|
Uses shared.config.settings instead of app.config.settings.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import warnings
|
|
|
|
from openai import AsyncOpenAI
|
|
import litellm
|
|
|
|
from langchain_openai import ChatOpenAI
|
|
from langchain_litellm import ChatLiteLLM
|
|
|
|
from shared.config import settings
|
|
|
|
litellm.drop_params = True
|
|
|
|
warnings.filterwarnings(
|
|
"ignore",
|
|
message=r"PydanticSerializationUnexpectedValue\(Expected `ResponseAPIUsage`",
|
|
category=UserWarning,
|
|
)
|
|
|
|
|
|
def _api_key_for_model(model: str) -> str | None:
|
|
if model.startswith("anthropic/"):
|
|
return settings.ANTHROPIC_API_KEY or None
|
|
if model.startswith("gemini/") or model.startswith("google/"):
|
|
return settings.GOOGLE_API_KEY or None
|
|
if model.startswith("cerebras/"):
|
|
return settings.CEREBRAS_API_KEY or None
|
|
if model.startswith("github/"):
|
|
return settings.GITHUB_TOKEN or None
|
|
if model.startswith("github_copilot/"):
|
|
return None
|
|
return settings.OPENAI_API_KEY or None
|
|
|
|
|
|
def get_llm(
|
|
*,
|
|
model: str | None = None,
|
|
temperature: float = 0,
|
|
callbacks: list | None = None,
|
|
) -> ChatOpenAI | ChatLiteLLM:
|
|
model = model or settings.LLM_MODEL
|
|
|
|
if settings.GITHUB_COPILOT_TOKEN_DIR:
|
|
os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR)
|
|
|
|
if settings.GITHUB_TOKEN:
|
|
os.environ.setdefault("GITHUB_TOKEN", settings.GITHUB_TOKEN)
|
|
|
|
if "/" in model:
|
|
return ChatLiteLLM(model=model, temperature=temperature, callbacks=callbacks)
|
|
|
|
return ChatOpenAI(
|
|
model=model,
|
|
temperature=temperature,
|
|
api_key=_api_key_for_model(model),
|
|
callbacks=callbacks,
|
|
)
|
|
|
|
|
|
async def embed(text: str) -> list[float]:
|
|
model = settings.LLM_EMBED_MODEL
|
|
|
|
if model.startswith("github_copilot/") or "/" in model:
|
|
response = await litellm.aembedding(model=model, input=[text])
|
|
return response.data[0]["embedding"]
|
|
|
|
client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY)
|
|
response = await client.embeddings.create(model=model, input=text)
|
|
return response.data[0].embedding
|