feat(chat): integrate Langfuse tracing, prompt management & generation tracking
- shared/config.py: add LANGFUSE_SECRET_KEY, LANGFUSE_PUBLIC_KEY, LANGFUSE_HOST - services/chat/app/tracing.py: new module — Langfuse client singleton, create_trace(), get_langfuse_callback(), get_prompt(), link_prompt_to_trace(), score_trace(), flush/shutdown helpers. Gracefully no-ops when keys are missing. - services/chat/app/llm.py: add callbacks param to get_llm() for LangChain callback handler injection - services/chat/app/deep_agent.py: accept langfuse_handler in all run_* and _run_single_agent* functions, pipe callbacks to LLM calls, fetch managed prompts from Langfuse with fallback to hardcoded system prompts - services/chat/app/redis_consumer.py: create Langfuse trace per request (home_request/floating_request), pass callback handler to deep_agent, link prompt name to trace, attach output preview, flush after each request - services/chat/app/main.py: shutdown Langfuse client in lifespan teardown - services/chat/requirements.txt: add langfuse>=2.0.0 Langfuse prompt names: 'home_system', 'floating_system' — create these in the Langfuse dashboard to manage prompts. Without them, hardcoded defaults are used transparently.
This commit is contained in:
@@ -23,6 +23,7 @@ from app.agents.timeline_agent import TIMELINE_TOOLS
|
||||
from app.llm import get_llm
|
||||
from app.memory_middleware import MemoryMiddleware
|
||||
from app.ws_context import clear_tool_result_collector, execute_on_client, set_tool_result_collector
|
||||
from app import tracing
|
||||
from shared.db import async_session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -566,6 +567,19 @@ async def _infer_floating_domain(message: str, context: dict[str, Any]) -> dict[
|
||||
return _infer_floating_domain_rule_based(message, context)
|
||||
|
||||
|
||||
def _get_system_prompt(langfuse_name: str, fallback: str) -> str:
|
||||
"""Fetch a managed prompt from Langfuse, falling back to the hardcoded string."""
|
||||
managed = tracing.get_prompt(langfuse_name, fallback=None)
|
||||
return managed if managed is not None else fallback
|
||||
|
||||
|
||||
def _build_callbacks(langfuse_handler: Any | None) -> list[Any] | None:
|
||||
"""Return a callbacks list if a Langfuse handler is available."""
|
||||
if langfuse_handler is None:
|
||||
return None
|
||||
return [langfuse_handler]
|
||||
|
||||
|
||||
async def _run_single_agent(
|
||||
*,
|
||||
user_id: str,
|
||||
@@ -573,9 +587,11 @@ async def _run_single_agent(
|
||||
message: str,
|
||||
context: dict[str, Any],
|
||||
max_steps: int = 6,
|
||||
langfuse_handler: Any | None = None,
|
||||
) -> str:
|
||||
trace_id = _trace_id_from_context(context)
|
||||
llm = get_llm()
|
||||
callbacks = _build_callbacks(langfuse_handler)
|
||||
llm = get_llm(callbacks=callbacks)
|
||||
tools = _all_tools_for_user(user_id, trace_id)
|
||||
model_context = _context_for_model(context)
|
||||
logger.info("deep_agent: run_single_agent_start trace=%s user=%s", trace_id or "-", user_id)
|
||||
@@ -658,9 +674,11 @@ async def _run_single_agent_stream(
|
||||
message: str,
|
||||
context: dict[str, Any],
|
||||
max_steps: int = 6,
|
||||
langfuse_handler: Any | None = None,
|
||||
) -> AsyncGenerator[tuple[str, Any], None]:
|
||||
trace_id = _trace_id_from_context(context)
|
||||
llm = get_llm()
|
||||
callbacks = _build_callbacks(langfuse_handler)
|
||||
llm = get_llm(callbacks=callbacks)
|
||||
tools = _all_tools_for_user(user_id, trace_id)
|
||||
model_context = _context_for_model(context)
|
||||
logger.info("deep_agent: run_single_agent_stream_start trace=%s user=%s", trace_id or "-", user_id)
|
||||
@@ -751,25 +769,29 @@ async def _run_single_agent_stream(
|
||||
clear_tool_result_collector()
|
||||
|
||||
|
||||
async def run_home(user_id: str, message: str, context: dict[str, Any]) -> str:
|
||||
async def run_home(user_id: str, message: str, context: dict[str, Any], *, langfuse_handler: Any | None = None) -> str:
|
||||
prepared_context = await _prepare_context(message, context)
|
||||
system_prompt = _get_system_prompt("home_system", _HOME_SINGLE_AGENT_SYSTEM)
|
||||
response = await _run_single_agent(
|
||||
user_id=user_id,
|
||||
system_prompt=_HOME_SINGLE_AGENT_SYSTEM,
|
||||
system_prompt=system_prompt,
|
||||
message=message,
|
||||
context=prepared_context,
|
||||
langfuse_handler=langfuse_handler,
|
||||
)
|
||||
return _normalize_tagged_list_lines(response, message)
|
||||
|
||||
|
||||
async def run_floating(user_id: str, message: str, context: dict[str, Any]) -> tuple[str, dict[str, str | None]]:
|
||||
async def run_floating(user_id: str, message: str, context: dict[str, Any], *, langfuse_handler: Any | None = None) -> tuple[str, dict[str, str | None]]:
|
||||
prepared_context = await _prepare_context(message, context)
|
||||
domain = await _infer_floating_domain(message, prepared_context)
|
||||
system_prompt = _get_system_prompt("floating_system", _FLOATING_SINGLE_AGENT_SYSTEM)
|
||||
response = await _run_single_agent(
|
||||
user_id=user_id,
|
||||
system_prompt=_FLOATING_SINGLE_AGENT_SYSTEM,
|
||||
system_prompt=system_prompt,
|
||||
message=message,
|
||||
context=prepared_context,
|
||||
langfuse_handler=langfuse_handler,
|
||||
)
|
||||
sanitized = _strip_floating_markup(response)
|
||||
if not sanitized and response:
|
||||
@@ -781,14 +803,18 @@ async def run_home_stream(
|
||||
user_id: str,
|
||||
message: str,
|
||||
context: dict[str, Any],
|
||||
*,
|
||||
langfuse_handler: Any | None = None,
|
||||
) -> AsyncGenerator[tuple[str, Any], None]:
|
||||
prepared_context = await _prepare_context(message, context)
|
||||
system_prompt = _get_system_prompt("home_system", _HOME_SINGLE_AGENT_SYSTEM)
|
||||
text_chunks: list[str] = []
|
||||
async for event in _run_single_agent_stream(
|
||||
user_id=user_id,
|
||||
system_prompt=_HOME_SINGLE_AGENT_SYSTEM,
|
||||
system_prompt=system_prompt,
|
||||
message=message,
|
||||
context=prepared_context,
|
||||
langfuse_handler=langfuse_handler,
|
||||
):
|
||||
event_type, data = event
|
||||
if event_type != "token":
|
||||
@@ -805,19 +831,23 @@ async def run_floating_stream(
|
||||
user_id: str,
|
||||
message: str,
|
||||
context: dict[str, Any],
|
||||
*,
|
||||
langfuse_handler: Any | None = None,
|
||||
) -> AsyncGenerator[tuple[str, Any], None]:
|
||||
prepared_context = await _prepare_context(message, context)
|
||||
domain = await _infer_floating_domain(message, prepared_context)
|
||||
yield "floating_domain", domain
|
||||
|
||||
system_prompt = _get_system_prompt("floating_system", _FLOATING_SINGLE_AGENT_SYSTEM)
|
||||
sanitizer = _FloatingStreamSanitizer()
|
||||
emitted_sanitized = False
|
||||
raw_chunks: list[str] = []
|
||||
async for event in _run_single_agent_stream(
|
||||
user_id=user_id,
|
||||
system_prompt=_FLOATING_SINGLE_AGENT_SYSTEM,
|
||||
system_prompt=system_prompt,
|
||||
message=message,
|
||||
context=prepared_context,
|
||||
langfuse_handler=langfuse_handler,
|
||||
):
|
||||
event_type, data = event
|
||||
if event_type != "token":
|
||||
|
||||
Reference in New Issue
Block a user