Add Langfuse observability: traces, prompt management, prompt-to-generation linking

- New app/core/langfuse_client.py: lazy singleton client, get_prompt_or_fallback() helper (returns raw template + prompt obj for linking), extract_usage() for token counts. No-ops when LANGFUSE_* env vars are not set. - deep_agent.py: home-agent and floating-agent runs wrapped in spans; each ainvoke wrapped in a generation with model/input/output/usage; prompts fetched from Langfuse (adiuva-home-agent, adiuva-floating-agent, adiuva-floating-classifier) with hardcoded fallback. - agent_runner.py: step1-classifier and step2-processor LLM calls traced; batch agent _run_agent_with_tools spans + generations; cloud-processor included. Prompts: adiuva-step1-classifier, adiuva-step2-processor, adiuva-cloud-processor. - agent_setup.py: journey-setup span + generation per ainvoke; prompt_obj stored on JourneySession and reused across turns. Prompt: journey_system. - settings.py: LANGFUSE_SECRET_KEY, LANGFUSE_PUBLIC_KEY, LANGFUSE_HOST added. - .env.example: Langfuse section with EU/US/self-hosted host comments. - requirements.txt: langfuse>=2.0.0. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-07 00:19:20 +02:00
parent 552b8eb305
commit 1ce1d492b0
7 changed files with 455 additions and 78 deletions
--- a/app/api/routes/agent_setup.py
+++ b/app/api/routes/agent_setup.py
@@ -31,6 +31,8 @@ from typing import Any
 from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage

 from app.agents.filesystem_agent import FILESYSTEM_TOOLS
+from app.config.settings import settings
+from app.core.langfuse_client import extract_usage, get_langfuse, get_prompt_or_fallback
 from app.core.llm import get_llm

 logger = logging.getLogger(__name__)
@@ -62,6 +64,7 @@ class JourneySession:
    data_types: list[str]
    history: list[dict[str, Any]] = field(default_factory=list)
    system_prompt: str = ""
+    langfuse_prompt: Any = None
    created_at: float = field(default_factory=time.monotonic)

    def is_expired(self) -> bool:
@@ -146,20 +149,25 @@ def _build_system_prompt(
    directory: str,
    data_types: list[str],
    existing_template: str | None = None,
-) -> str:
+) -> tuple[str, Any]:
+    """Return ``(compiled_system_prompt, langfuse_prompt_obj_or_None)``."""
    existing_section = (
        f"\nThe user already has the following prompt_template — refine it based on their answers:\n"
        f"---\n{existing_template}\n---\n"
        if existing_template
        else ""
    )
-    return _SYSTEM_PROMPT_TEMPLATE.format(
+    template, prompt_obj = get_prompt_or_fallback(
+        "journey_system", _SYSTEM_PROMPT_TEMPLATE
+    )
+    compiled = template.format(
        directory=directory,
        data_types=", ".join(data_types),
        template_start=_TEMPLATE_START,
        template_end=_TEMPLATE_END,
        existing_section=existing_section,
    )
+    return compiled, prompt_obj


 # ── Template extraction ───────────────────────────────────────────────────
@@ -199,12 +207,17 @@ async def _call_llm_with_tools(
    system_prompt: str,
    history: list[dict[str, Any]],
    tools: list[Any],
+    *,
+    user_id: str = "",
+    session_id: str = "",
+    langfuse_prompt: Any = None,
 ) -> str:
    """Build LangChain messages from history and invoke the LLM with tools.

    Handles tool-calling loops: if the LLM calls tools, execute them and
    continue until a final text response is produced.
    """
+    lf = get_langfuse()
    messages: list[Any] = [SystemMessage(content=system_prompt)]
    for turn in history:
        if turn["role"] == "user":
@@ -216,38 +229,76 @@ async def _call_llm_with_tools(
    llm_with_tools = llm.bind_tools(tools)
    tool_map = {tool_def.name: tool_def for tool_def in tools}

-    for _ in range(_MAX_TOOL_STEPS):
-        response: AIMessage = await llm_with_tools.ainvoke(messages)
-        messages.append(response)
+    _span_ctx = (
+        lf.start_as_current_observation(
+            as_type="span",
+            name="journey-setup",
+            user_id=user_id or None,
+            session_id=session_id or None,
+            input=history[-1]["content"] if history else "",
+        )
+        if lf else None
+    )
+    _span = _span_ctx.__enter__() if _span_ctx else None

-        if not response.tool_calls:
-            return _as_text(response.content)
-
-        for call in response.tool_calls:
-            call_name = str(call.get("name", ""))
-            call_args = call.get("args", {})
-            logger.info(
-                "agent_setup: journey tool_call name=%s args=%s",
-                call_name,
-                json.dumps(call_args, ensure_ascii=True)[:500],
+    try:
+        for _ in range(_MAX_TOOL_STEPS):
+            _gen_ctx = (
+                lf.start_as_current_observation(
+                    as_type="generation",
+                    name="journey-setup-llm",
+                    model=settings.LLM_MODEL,
+                    prompt=langfuse_prompt,
+                    input=messages,
+                )
+                if lf else None
            )
+            _gen = _gen_ctx.__enter__() if _gen_ctx else None
+            response: AIMessage = await llm_with_tools.ainvoke(messages)
+            if _gen_ctx:
+                _gen.update(output=_as_text(response.content), usage=extract_usage(response))
+                _gen_ctx.__exit__(None, None, None)

-            tool_fn = tool_map.get(call_name)
-            if tool_fn is None:
-                tool_output = f"Unknown tool: {call_name}"
-            else:
-                tool_output = await tool_fn.ainvoke(call_args)
+            messages.append(response)

-            logger.info(
-                "agent_setup: journey tool_result name=%s output=%s",
-                call_name,
-                str(tool_output)[:800],
-            )
-            messages.append(ToolMessage(content=str(tool_output), tool_call_id=call["id"]))
+            if not response.tool_calls:
+                if _span:
+                    _span.update(output=_as_text(response.content))
+                return _as_text(response.content)

-    # Fallback: exceeded max steps.
-    final = await llm.ainvoke(messages)
-    return _as_text(final.content)
+            for call in response.tool_calls:
+                call_name = str(call.get("name", ""))
+                call_args = call.get("args", {})
+                logger.info(
+                    "agent_setup: journey tool_call name=%s args=%s",
+                    call_name,
+                    json.dumps(call_args, ensure_ascii=True)[:500],
+                )
+
+                tool_fn = tool_map.get(call_name)
+                if tool_fn is None:
+                    tool_output = f"Unknown tool: {call_name}"
+                else:
+                    tool_output = await tool_fn.ainvoke(call_args)
+
+                logger.info(
+                    "agent_setup: journey tool_result name=%s output=%s",
+                    call_name,
+                    str(tool_output)[:800],
+                )
+                messages.append(ToolMessage(content=str(tool_output), tool_call_id=call["id"]))
+
+        # Fallback: exceeded max steps.
+        final = await llm.ainvoke(messages)
+        final_text = _as_text(final.content)
+        if _span:
+            _span.update(output=final_text)
+        return final_text
+    finally:
+        if _span_ctx:
+            _span_ctx.__exit__(None, None, None)
+        if lf:
+            lf.flush()


 # ── Journey handlers (called from device_ws.py) ──────────────────────────
@@ -270,7 +321,7 @@ async def handle_journey_start(
    # Use the session_id provided by the FE so the reply matches the
    # listener key; fall back to a generated one if absent.
    session_id = frame.get("session_id") or str(uuid.uuid4())
-    system_prompt = _build_system_prompt(directory, data_types, existing_template)
+    system_prompt, langfuse_prompt = _build_system_prompt(directory, data_types, existing_template)

    session = JourneySession(
        session_id=session_id,
@@ -279,6 +330,7 @@ async def handle_journey_start(
        directory=directory,
        data_types=data_types,
        system_prompt=system_prompt,
+        langfuse_prompt=langfuse_prompt,
    )

    # The LLM will explore the directory using FILESYSTEM_TOOLS via the
@@ -292,6 +344,9 @@ async def handle_journey_start(
        system_prompt=system_prompt,
        history=seed_history,
        tools=list(FILESYSTEM_TOOLS),
+        user_id=user_id,
+        session_id=session_id,
+        langfuse_prompt=langfuse_prompt,
    )

    session.history.extend(seed_history)
@@ -356,6 +411,9 @@ async def handle_journey_message(
        system_prompt=session.system_prompt,
        history=session.history,
        tools=list(FILESYSTEM_TOOLS),
+        user_id=session.user_id,
+        session_id=session_id,
+        langfuse_prompt=session.langfuse_prompt,
    )

    session.history.append({"role": "assistant", "content": ai_reply})
@@ -379,6 +437,9 @@ async def handle_journey_message(
                system_prompt=session.system_prompt,
                history=session.history,
                tools=list(FILESYSTEM_TOOLS),
+                user_id=session.user_id,
+                session_id=session_id,
+                langfuse_prompt=session.langfuse_prompt,
            )
            session.history.append({"role": "assistant", "content": nudge_reply})