feat(chat): integrate Langfuse tracing, prompt management & generation tracking

- shared/config.py: add LANGFUSE_SECRET_KEY, LANGFUSE_PUBLIC_KEY, LANGFUSE_HOST
- services/chat/app/tracing.py: new module — Langfuse client singleton,
  create_trace(), get_langfuse_callback(), get_prompt(), link_prompt_to_trace(),
  score_trace(), flush/shutdown helpers. Gracefully no-ops when keys are missing.
- services/chat/app/llm.py: add callbacks param to get_llm() for LangChain
  callback handler injection
- services/chat/app/deep_agent.py: accept langfuse_handler in all run_* and
  _run_single_agent* functions, pipe callbacks to LLM calls, fetch managed
  prompts from Langfuse with fallback to hardcoded system prompts
- services/chat/app/redis_consumer.py: create Langfuse trace per request
  (home_request/floating_request), pass callback handler to deep_agent,
  link prompt name to trace, attach output preview, flush after each request
- services/chat/app/main.py: shutdown Langfuse client in lifespan teardown
- services/chat/requirements.txt: add langfuse>=2.0.0

Langfuse prompt names: 'home_system', 'floating_system' — create these in
the Langfuse dashboard to manage prompts. Without them, hardcoded defaults
are used transparently.
This commit is contained in:
Roberto Musso
2026-03-22 23:15:04 +01:00
parent aff68a9051
commit 0d5fa3e569
7 changed files with 362 additions and 18 deletions

View File

@@ -23,6 +23,7 @@ from app.agents.timeline_agent import TIMELINE_TOOLS
from app.llm import get_llm
from app.memory_middleware import MemoryMiddleware
from app.ws_context import clear_tool_result_collector, execute_on_client, set_tool_result_collector
from app import tracing
from shared.db import async_session
logger = logging.getLogger(__name__)
@@ -566,6 +567,19 @@ async def _infer_floating_domain(message: str, context: dict[str, Any]) -> dict[
return _infer_floating_domain_rule_based(message, context)
def _get_system_prompt(langfuse_name: str, fallback: str) -> str:
"""Fetch a managed prompt from Langfuse, falling back to the hardcoded string."""
managed = tracing.get_prompt(langfuse_name, fallback=None)
return managed if managed is not None else fallback
def _build_callbacks(langfuse_handler: Any | None) -> list[Any] | None:
"""Return a callbacks list if a Langfuse handler is available."""
if langfuse_handler is None:
return None
return [langfuse_handler]
async def _run_single_agent(
*,
user_id: str,
@@ -573,9 +587,11 @@ async def _run_single_agent(
message: str,
context: dict[str, Any],
max_steps: int = 6,
langfuse_handler: Any | None = None,
) -> str:
trace_id = _trace_id_from_context(context)
llm = get_llm()
callbacks = _build_callbacks(langfuse_handler)
llm = get_llm(callbacks=callbacks)
tools = _all_tools_for_user(user_id, trace_id)
model_context = _context_for_model(context)
logger.info("deep_agent: run_single_agent_start trace=%s user=%s", trace_id or "-", user_id)
@@ -658,9 +674,11 @@ async def _run_single_agent_stream(
message: str,
context: dict[str, Any],
max_steps: int = 6,
langfuse_handler: Any | None = None,
) -> AsyncGenerator[tuple[str, Any], None]:
trace_id = _trace_id_from_context(context)
llm = get_llm()
callbacks = _build_callbacks(langfuse_handler)
llm = get_llm(callbacks=callbacks)
tools = _all_tools_for_user(user_id, trace_id)
model_context = _context_for_model(context)
logger.info("deep_agent: run_single_agent_stream_start trace=%s user=%s", trace_id or "-", user_id)
@@ -751,25 +769,29 @@ async def _run_single_agent_stream(
clear_tool_result_collector()
async def run_home(user_id: str, message: str, context: dict[str, Any]) -> str:
async def run_home(user_id: str, message: str, context: dict[str, Any], *, langfuse_handler: Any | None = None) -> str:
prepared_context = await _prepare_context(message, context)
system_prompt = _get_system_prompt("home_system", _HOME_SINGLE_AGENT_SYSTEM)
response = await _run_single_agent(
user_id=user_id,
system_prompt=_HOME_SINGLE_AGENT_SYSTEM,
system_prompt=system_prompt,
message=message,
context=prepared_context,
langfuse_handler=langfuse_handler,
)
return _normalize_tagged_list_lines(response, message)
async def run_floating(user_id: str, message: str, context: dict[str, Any]) -> tuple[str, dict[str, str | None]]:
async def run_floating(user_id: str, message: str, context: dict[str, Any], *, langfuse_handler: Any | None = None) -> tuple[str, dict[str, str | None]]:
prepared_context = await _prepare_context(message, context)
domain = await _infer_floating_domain(message, prepared_context)
system_prompt = _get_system_prompt("floating_system", _FLOATING_SINGLE_AGENT_SYSTEM)
response = await _run_single_agent(
user_id=user_id,
system_prompt=_FLOATING_SINGLE_AGENT_SYSTEM,
system_prompt=system_prompt,
message=message,
context=prepared_context,
langfuse_handler=langfuse_handler,
)
sanitized = _strip_floating_markup(response)
if not sanitized and response:
@@ -781,14 +803,18 @@ async def run_home_stream(
user_id: str,
message: str,
context: dict[str, Any],
*,
langfuse_handler: Any | None = None,
) -> AsyncGenerator[tuple[str, Any], None]:
prepared_context = await _prepare_context(message, context)
system_prompt = _get_system_prompt("home_system", _HOME_SINGLE_AGENT_SYSTEM)
text_chunks: list[str] = []
async for event in _run_single_agent_stream(
user_id=user_id,
system_prompt=_HOME_SINGLE_AGENT_SYSTEM,
system_prompt=system_prompt,
message=message,
context=prepared_context,
langfuse_handler=langfuse_handler,
):
event_type, data = event
if event_type != "token":
@@ -805,19 +831,23 @@ async def run_floating_stream(
user_id: str,
message: str,
context: dict[str, Any],
*,
langfuse_handler: Any | None = None,
) -> AsyncGenerator[tuple[str, Any], None]:
prepared_context = await _prepare_context(message, context)
domain = await _infer_floating_domain(message, prepared_context)
yield "floating_domain", domain
system_prompt = _get_system_prompt("floating_system", _FLOATING_SINGLE_AGENT_SYSTEM)
sanitizer = _FloatingStreamSanitizer()
emitted_sanitized = False
raw_chunks: list[str] = []
async for event in _run_single_agent_stream(
user_id=user_id,
system_prompt=_FLOATING_SINGLE_AGENT_SYSTEM,
system_prompt=system_prompt,
message=message,
context=prepared_context,
langfuse_handler=langfuse_handler,
):
event_type, data = event
if event_type != "token":