feat(batch-agent): integrate Langfuse tracing

- tracing.py: init/shutdown, trace_span, get_langfuse_callback, prompt mgmt - main.py: init_langfuse at startup, shutdown on teardown - redis_consumer.py: trace_span around journey_start/message/agent_trigger - agent_runner.py: thread langfuse_handler through classify + processing LLM - journey.py: thread langfuse_handler through _call_llm_with_tools - llm.py: accept callbacks param, forward to LLM constructors - requirements.txt: add langfuse>=3.0.0
2026-03-23 08:43:15 +01:00
parent 333bba6fdd
commit 971f1dd84f
7 changed files with 347 additions and 14 deletions
--- a/services/batch-agent/app/redis_consumer.py
+++ b/services/batch-agent/app/redis_consumer.py
@@ -17,6 +17,7 @@ from typing import Any

 from shared.redis import redis_client, batch_request_channel, ws_out_channel

+import app.tracing as tracing
 from app.ws_context import set_current_user, clear_current_user

 logger = logging.getLogger(__name__)
@@ -32,15 +33,27 @@ async def _handle_journey_start(user_id: str, data: dict[str, Any]) -> None:
    """Handle a journey_start request from WS Gateway."""
    from app.journey import handle_journey_start

+    session_id = data.get("session_id", "")
    set_current_user(user_id)
    try:
-        reply = await handle_journey_start(user_id, data)
-        await _publish_to_user(user_id, reply)
+        with tracing.trace_span(
+            name="journey_start",
+            user_id=user_id,
+            session_id=session_id,
+            input=data.get("directory", ""),
+            metadata={"data_types": data.get("data_types", [])},
+            tags=["journey"],
+        ) as span:
+            langfuse_handler = tracing.get_langfuse_callback()
+            reply = await handle_journey_start(user_id, data, langfuse_handler=langfuse_handler)
+            span.update(output=reply.get("message", "")[:500])
+            await _publish_to_user(user_id, reply)
+        tracing.flush()
    except Exception as exc:
        logger.error("batch-agent: journey_start failed user=%s: %s", user_id, exc)
        await _publish_to_user(user_id, {
            "type": "journey_reply",
-            "session_id": data.get("session_id", ""),
+            "session_id": session_id,
            "message": f"Journey setup failed: {exc}",
            "done": True,
            "prompt_template": None,
@@ -53,15 +66,26 @@ async def _handle_journey_message(user_id: str, data: dict[str, Any]) -> None:
    """Handle a journey_message from WS Gateway."""
    from app.journey import handle_journey_message

+    session_id = data.get("session_id", "")
    set_current_user(user_id)
    try:
-        reply = await handle_journey_message(user_id, data)
-        await _publish_to_user(user_id, reply)
+        with tracing.trace_span(
+            name="journey_message",
+            user_id=user_id,
+            session_id=session_id,
+            input=data.get("message", "")[:200],
+            tags=["journey"],
+        ) as span:
+            langfuse_handler = tracing.get_langfuse_callback()
+            reply = await handle_journey_message(user_id, data, langfuse_handler=langfuse_handler)
+            span.update(output=reply.get("message", "")[:500])
+            await _publish_to_user(user_id, reply)
+        tracing.flush()
    except Exception as exc:
        logger.error("batch-agent: journey_message failed user=%s: %s", user_id, exc)
        await _publish_to_user(user_id, {
            "type": "journey_reply",
-            "session_id": data.get("session_id", ""),
+            "session_id": session_id,
            "message": f"Journey processing failed: {exc}",
            "done": True,
            "prompt_template": None,
@@ -74,15 +98,28 @@ async def _handle_agent_trigger(user_id: str, data: dict[str, Any]) -> None:
    """Handle an agent_trigger request from the REST route (forwarded via Redis)."""
    from app.agent_runner import run_local_agent

+    run_context = data.get("run_context", {})
+    agent_id = run_context.get("agent_id", "")
    set_current_user(user_id)
    try:
-        await run_local_agent(user_id, data)
+        with tracing.trace_span(
+            name="agent_trigger",
+            user_id=user_id,
+            trace_id=run_context.get("run_id"),
+            input={"agent_id": agent_id, "directory": data.get("directory", "")},
+            metadata={"data_types": data.get("data_types", [])},
+            tags=["batch", "agent_run"],
+        ) as span:
+            langfuse_handler = tracing.get_langfuse_callback()
+            await run_local_agent(user_id, data, langfuse_handler=langfuse_handler)
+            span.update(output={"status": "completed"})
+        tracing.flush()
    except Exception as exc:
        logger.error("batch-agent: agent_trigger failed user=%s: %s", user_id, exc)
        await _publish_to_user(user_id, {
            "type": "run_complete",
            "status": "error",
-            "run_context": data.get("run_context", {}),
+            "run_context": run_context,
        })
    finally:
        clear_current_user()