feat(chat): integrate Langfuse tracing, prompt management & generation tracking

- shared/config.py: add LANGFUSE_SECRET_KEY, LANGFUSE_PUBLIC_KEY, LANGFUSE_HOST - services/chat/app/tracing.py: new module — Langfuse client singleton, create_trace(), get_langfuse_callback(), get_prompt(), link_prompt_to_trace(), score_trace(), flush/shutdown helpers. Gracefully no-ops when keys are missing. - services/chat/app/llm.py: add callbacks param to get_llm() for LangChain callback handler injection - services/chat/app/deep_agent.py: accept langfuse_handler in all run_* and _run_single_agent* functions, pipe callbacks to LLM calls, fetch managed prompts from Langfuse with fallback to hardcoded system prompts - services/chat/app/redis_consumer.py: create Langfuse trace per request (home_request/floating_request), pass callback handler to deep_agent, link prompt name to trace, attach output preview, flush after each request - services/chat/app/main.py: shutdown Langfuse client in lifespan teardown - services/chat/requirements.txt: add langfuse>=2.0.0 Langfuse prompt names: 'home_system', 'floating_system' — create these in the Langfuse dashboard to manage prompts. Without them, hardcoded defaults are used transparently.
2026-03-22 23:15:04 +01:00
parent aff68a9051
commit 0d5fa3e569
7 changed files with 362 additions and 18 deletions
--- a/services/chat/app/redis_consumer.py
+++ b/services/chat/app/redis_consumer.py
@@ -18,6 +18,7 @@ from app.deep_agent import run_floating_stream, run_home_stream
 from app.memory_middleware import MemoryMiddleware
 from app.output_formatter import StreamFormatter
 from app.ws_context import clear_current_user, set_current_user
+from app import tracing

 logger = logging.getLogger(__name__)

@@ -84,6 +85,19 @@ async def _handle_home_request(user_id: str, frame: dict) -> None:
        user_id, request_id, message[:200],
    )

+    # Create Langfuse trace
+    trace = tracing.create_trace(
+        name="home_request",
+        user_id=user_id,
+        session_id=session_id,
+        trace_id=request_id,
+        metadata={"message_preview": message[:200]},
+        tags=["home"],
+    )
+    langfuse_handler = tracing.get_langfuse_callback(
+        trace=trace, span_name="home_agent",
+    )
+
    # Enrich with memory context
    async with async_session() as db:
        memory = MemoryMiddleware(db)
@@ -101,7 +115,7 @@ async def _handle_home_request(user_id: str, frame: dict) -> None:
    set_current_user(user_id)
    response_chunks: list[str] = []
    try:
-        event_stream = run_home_stream(user_id, message, context)
+        event_stream = run_home_stream(user_id, message, context, langfuse_handler=langfuse_handler)
        formatter = StreamFormatter(request_id=request_id)
        async for ws_frame in formatter.format(event_stream):
            await _publish_frame(user_id, ws_frame.model_dump_json())
@@ -112,6 +126,13 @@ async def _handle_home_request(user_id: str, frame: dict) -> None:
    finally:
        clear_current_user()

+    # Link prompt and flush trace
+    if trace is not None:
+        tracing.link_prompt_to_trace(trace, "home_system")
+        response_text = "".join(response_chunks)
+        trace.update(output=response_text[:500] if response_text else None)
+    tracing.flush()
+
    # Store episode
    async with async_session() as db:
        memory = MemoryMiddleware(db)
@@ -133,6 +154,19 @@ async def _handle_floating_request(user_id: str, frame: dict) -> None:
        user_id, request_id, json.dumps(scope)[:200], message[:200],
    )

+    # Create Langfuse trace
+    trace = tracing.create_trace(
+        name="floating_request",
+        user_id=user_id,
+        session_id=session_id,
+        trace_id=request_id,
+        metadata={"message_preview": message[:200], "scope": scope},
+        tags=["floating"],
+    )
+    langfuse_handler = tracing.get_langfuse_callback(
+        trace=trace, span_name="floating_agent",
+    )
+
    # Enrich with memory context
    async with async_session() as db:
        memory = MemoryMiddleware(db)
@@ -150,7 +184,7 @@ async def _handle_floating_request(user_id: str, frame: dict) -> None:
    set_current_user(user_id)
    response_chunks: list[str] = []
    try:
-        event_stream = run_floating_stream(user_id, message, context)
+        event_stream = run_floating_stream(user_id, message, context, langfuse_handler=langfuse_handler)
        formatter = StreamFormatter(request_id=request_id)
        async for ws_frame in formatter.format(event_stream):
            await _publish_frame(user_id, ws_frame.model_dump_json())
@@ -161,6 +195,13 @@ async def _handle_floating_request(user_id: str, frame: dict) -> None:
    finally:
        clear_current_user()

+    # Link prompt and flush trace
+    if trace is not None:
+        tracing.link_prompt_to_trace(trace, "floating_system")
+        response_text = "".join(response_chunks)
+        trace.update(output=response_text[:500] if response_text else None)
+    tracing.flush()
+
    # Store episode
    async with async_session() as db:
        memory = MemoryMiddleware(db)