feat(chat): integrate Langfuse tracing, prompt management & generation tracking
- shared/config.py: add LANGFUSE_SECRET_KEY, LANGFUSE_PUBLIC_KEY, LANGFUSE_HOST - services/chat/app/tracing.py: new module — Langfuse client singleton, create_trace(), get_langfuse_callback(), get_prompt(), link_prompt_to_trace(), score_trace(), flush/shutdown helpers. Gracefully no-ops when keys are missing. - services/chat/app/llm.py: add callbacks param to get_llm() for LangChain callback handler injection - services/chat/app/deep_agent.py: accept langfuse_handler in all run_* and _run_single_agent* functions, pipe callbacks to LLM calls, fetch managed prompts from Langfuse with fallback to hardcoded system prompts - services/chat/app/redis_consumer.py: create Langfuse trace per request (home_request/floating_request), pass callback handler to deep_agent, link prompt name to trace, attach output preview, flush after each request - services/chat/app/main.py: shutdown Langfuse client in lifespan teardown - services/chat/requirements.txt: add langfuse>=2.0.0 Langfuse prompt names: 'home_system', 'floating_system' — create these in the Langfuse dashboard to manage prompts. Without them, hardcoded defaults are used transparently.
This commit is contained in:
@@ -18,6 +18,7 @@ from app.deep_agent import run_floating_stream, run_home_stream
|
||||
from app.memory_middleware import MemoryMiddleware
|
||||
from app.output_formatter import StreamFormatter
|
||||
from app.ws_context import clear_current_user, set_current_user
|
||||
from app import tracing
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -84,6 +85,19 @@ async def _handle_home_request(user_id: str, frame: dict) -> None:
|
||||
user_id, request_id, message[:200],
|
||||
)
|
||||
|
||||
# Create Langfuse trace
|
||||
trace = tracing.create_trace(
|
||||
name="home_request",
|
||||
user_id=user_id,
|
||||
session_id=session_id,
|
||||
trace_id=request_id,
|
||||
metadata={"message_preview": message[:200]},
|
||||
tags=["home"],
|
||||
)
|
||||
langfuse_handler = tracing.get_langfuse_callback(
|
||||
trace=trace, span_name="home_agent",
|
||||
)
|
||||
|
||||
# Enrich with memory context
|
||||
async with async_session() as db:
|
||||
memory = MemoryMiddleware(db)
|
||||
@@ -101,7 +115,7 @@ async def _handle_home_request(user_id: str, frame: dict) -> None:
|
||||
set_current_user(user_id)
|
||||
response_chunks: list[str] = []
|
||||
try:
|
||||
event_stream = run_home_stream(user_id, message, context)
|
||||
event_stream = run_home_stream(user_id, message, context, langfuse_handler=langfuse_handler)
|
||||
formatter = StreamFormatter(request_id=request_id)
|
||||
async for ws_frame in formatter.format(event_stream):
|
||||
await _publish_frame(user_id, ws_frame.model_dump_json())
|
||||
@@ -112,6 +126,13 @@ async def _handle_home_request(user_id: str, frame: dict) -> None:
|
||||
finally:
|
||||
clear_current_user()
|
||||
|
||||
# Link prompt and flush trace
|
||||
if trace is not None:
|
||||
tracing.link_prompt_to_trace(trace, "home_system")
|
||||
response_text = "".join(response_chunks)
|
||||
trace.update(output=response_text[:500] if response_text else None)
|
||||
tracing.flush()
|
||||
|
||||
# Store episode
|
||||
async with async_session() as db:
|
||||
memory = MemoryMiddleware(db)
|
||||
@@ -133,6 +154,19 @@ async def _handle_floating_request(user_id: str, frame: dict) -> None:
|
||||
user_id, request_id, json.dumps(scope)[:200], message[:200],
|
||||
)
|
||||
|
||||
# Create Langfuse trace
|
||||
trace = tracing.create_trace(
|
||||
name="floating_request",
|
||||
user_id=user_id,
|
||||
session_id=session_id,
|
||||
trace_id=request_id,
|
||||
metadata={"message_preview": message[:200], "scope": scope},
|
||||
tags=["floating"],
|
||||
)
|
||||
langfuse_handler = tracing.get_langfuse_callback(
|
||||
trace=trace, span_name="floating_agent",
|
||||
)
|
||||
|
||||
# Enrich with memory context
|
||||
async with async_session() as db:
|
||||
memory = MemoryMiddleware(db)
|
||||
@@ -150,7 +184,7 @@ async def _handle_floating_request(user_id: str, frame: dict) -> None:
|
||||
set_current_user(user_id)
|
||||
response_chunks: list[str] = []
|
||||
try:
|
||||
event_stream = run_floating_stream(user_id, message, context)
|
||||
event_stream = run_floating_stream(user_id, message, context, langfuse_handler=langfuse_handler)
|
||||
formatter = StreamFormatter(request_id=request_id)
|
||||
async for ws_frame in formatter.format(event_stream):
|
||||
await _publish_frame(user_id, ws_frame.model_dump_json())
|
||||
@@ -161,6 +195,13 @@ async def _handle_floating_request(user_id: str, frame: dict) -> None:
|
||||
finally:
|
||||
clear_current_user()
|
||||
|
||||
# Link prompt and flush trace
|
||||
if trace is not None:
|
||||
tracing.link_prompt_to_trace(trace, "floating_system")
|
||||
response_text = "".join(response_chunks)
|
||||
trace.update(output=response_text[:500] if response_text else None)
|
||||
tracing.flush()
|
||||
|
||||
# Store episode
|
||||
async with async_session() as db:
|
||||
memory = MemoryMiddleware(db)
|
||||
|
||||
Reference in New Issue
Block a user