feat(chat): integrate Langfuse tracing, prompt management & generation tracking

- shared/config.py: add LANGFUSE_SECRET_KEY, LANGFUSE_PUBLIC_KEY, LANGFUSE_HOST - services/chat/app/tracing.py: new module — Langfuse client singleton, create_trace(), get_langfuse_callback(), get_prompt(), link_prompt_to_trace(), score_trace(), flush/shutdown helpers. Gracefully no-ops when keys are missing. - services/chat/app/llm.py: add callbacks param to get_llm() for LangChain callback handler injection - services/chat/app/deep_agent.py: accept langfuse_handler in all run_* and _run_single_agent* functions, pipe callbacks to LLM calls, fetch managed prompts from Langfuse with fallback to hardcoded system prompts - services/chat/app/redis_consumer.py: create Langfuse trace per request (home_request/floating_request), pass callback handler to deep_agent, link prompt name to trace, attach output preview, flush after each request - services/chat/app/main.py: shutdown Langfuse client in lifespan teardown - services/chat/requirements.txt: add langfuse>=2.0.0 Langfuse prompt names: 'home_system', 'floating_system' — create these in the Langfuse dashboard to manage prompts. Without them, hardcoded defaults are used transparently.
2026-03-22 23:15:04 +01:00
parent aff68a9051
commit 0d5fa3e569
7 changed files with 362 additions and 18 deletions
--- a/services/chat/app/llm.py
+++ b/services/chat/app/llm.py
@@ -42,6 +42,7 @@ def get_llm(
    *,
    model: str | None = None,
    temperature: float = 0,
+    callbacks: list | None = None,
 ) -> ChatOpenAI | ChatLiteLLM:
    model = model or settings.LLM_MODEL

@@ -49,22 +50,16 @@ def get_llm(
        os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR)

    if "/" in model:
-        return ChatLiteLLM(model=model, temperature=temperature)
+        return ChatLiteLLM(model=model, temperature=temperature, callbacks=callbacks)

    return ChatOpenAI(
        model=model,
        temperature=temperature,
        api_key=_api_key_for_model(model),
+        callbacks=callbacks,
    )


-def get_router_llm(
-    *,
-    temperature: float = 0,
-) -> ChatOpenAI | ChatLiteLLM:
-    return get_llm(model=settings.LLM_ROUTER_MODEL, temperature=temperature)
-
-
 async def embed(text: str) -> list[float]:
    model = settings.LLM_EMBED_MODEL