feat(chat): integrate Langfuse tracing, prompt management & generation tracking

- shared/config.py: add LANGFUSE_SECRET_KEY, LANGFUSE_PUBLIC_KEY, LANGFUSE_HOST
- services/chat/app/tracing.py: new module — Langfuse client singleton,
  create_trace(), get_langfuse_callback(), get_prompt(), link_prompt_to_trace(),
  score_trace(), flush/shutdown helpers. Gracefully no-ops when keys are missing.
- services/chat/app/llm.py: add callbacks param to get_llm() for LangChain
  callback handler injection
- services/chat/app/deep_agent.py: accept langfuse_handler in all run_* and
  _run_single_agent* functions, pipe callbacks to LLM calls, fetch managed
  prompts from Langfuse with fallback to hardcoded system prompts
- services/chat/app/redis_consumer.py: create Langfuse trace per request
  (home_request/floating_request), pass callback handler to deep_agent,
  link prompt name to trace, attach output preview, flush after each request
- services/chat/app/main.py: shutdown Langfuse client in lifespan teardown
- services/chat/requirements.txt: add langfuse>=2.0.0

Langfuse prompt names: 'home_system', 'floating_system' — create these in
the Langfuse dashboard to manage prompts. Without them, hardcoded defaults
are used transparently.
This commit is contained in:
Roberto Musso
2026-03-22 23:15:04 +01:00
parent aff68a9051
commit 0d5fa3e569
7 changed files with 362 additions and 18 deletions

View File

@@ -18,6 +18,7 @@ from app.deep_agent import run_floating_stream, run_home_stream
from app.memory_middleware import MemoryMiddleware
from app.output_formatter import StreamFormatter
from app.ws_context import clear_current_user, set_current_user
from app import tracing
logger = logging.getLogger(__name__)
@@ -84,6 +85,19 @@ async def _handle_home_request(user_id: str, frame: dict) -> None:
user_id, request_id, message[:200],
)
# Create Langfuse trace
trace = tracing.create_trace(
name="home_request",
user_id=user_id,
session_id=session_id,
trace_id=request_id,
metadata={"message_preview": message[:200]},
tags=["home"],
)
langfuse_handler = tracing.get_langfuse_callback(
trace=trace, span_name="home_agent",
)
# Enrich with memory context
async with async_session() as db:
memory = MemoryMiddleware(db)
@@ -101,7 +115,7 @@ async def _handle_home_request(user_id: str, frame: dict) -> None:
set_current_user(user_id)
response_chunks: list[str] = []
try:
event_stream = run_home_stream(user_id, message, context)
event_stream = run_home_stream(user_id, message, context, langfuse_handler=langfuse_handler)
formatter = StreamFormatter(request_id=request_id)
async for ws_frame in formatter.format(event_stream):
await _publish_frame(user_id, ws_frame.model_dump_json())
@@ -112,6 +126,13 @@ async def _handle_home_request(user_id: str, frame: dict) -> None:
finally:
clear_current_user()
# Link prompt and flush trace
if trace is not None:
tracing.link_prompt_to_trace(trace, "home_system")
response_text = "".join(response_chunks)
trace.update(output=response_text[:500] if response_text else None)
tracing.flush()
# Store episode
async with async_session() as db:
memory = MemoryMiddleware(db)
@@ -133,6 +154,19 @@ async def _handle_floating_request(user_id: str, frame: dict) -> None:
user_id, request_id, json.dumps(scope)[:200], message[:200],
)
# Create Langfuse trace
trace = tracing.create_trace(
name="floating_request",
user_id=user_id,
session_id=session_id,
trace_id=request_id,
metadata={"message_preview": message[:200], "scope": scope},
tags=["floating"],
)
langfuse_handler = tracing.get_langfuse_callback(
trace=trace, span_name="floating_agent",
)
# Enrich with memory context
async with async_session() as db:
memory = MemoryMiddleware(db)
@@ -150,7 +184,7 @@ async def _handle_floating_request(user_id: str, frame: dict) -> None:
set_current_user(user_id)
response_chunks: list[str] = []
try:
event_stream = run_floating_stream(user_id, message, context)
event_stream = run_floating_stream(user_id, message, context, langfuse_handler=langfuse_handler)
formatter = StreamFormatter(request_id=request_id)
async for ws_frame in formatter.format(event_stream):
await _publish_frame(user_id, ws_frame.model_dump_json())
@@ -161,6 +195,13 @@ async def _handle_floating_request(user_id: str, frame: dict) -> None:
finally:
clear_current_user()
# Link prompt and flush trace
if trace is not None:
tracing.link_prompt_to_trace(trace, "floating_system")
response_text = "".join(response_chunks)
trace.update(output=response_text[:500] if response_text else None)
tracing.flush()
# Store episode
async with async_session() as db:
memory = MemoryMiddleware(db)