feat(batch-agent): integrate Langfuse tracing
- tracing.py: init/shutdown, trace_span, get_langfuse_callback, prompt mgmt - main.py: init_langfuse at startup, shutdown on teardown - redis_consumer.py: trace_span around journey_start/message/agent_trigger - agent_runner.py: thread langfuse_handler through classify + processing LLM - journey.py: thread langfuse_handler through _call_llm_with_tools - llm.py: accept callbacks param, forward to LLM constructors - requirements.txt: add langfuse>=3.0.0
This commit is contained in:
264
services/batch-agent/app/tracing.py
Normal file
264
services/batch-agent/app/tracing.py
Normal file
@@ -0,0 +1,264 @@
|
||||
"""Langfuse tracing & prompt management for the Batch Agent Service (v4 SDK).
|
||||
|
||||
Provides:
|
||||
- ``init_langfuse()`` — initialise the singleton client at startup
|
||||
- ``trace_span()`` — context manager that creates a trace + span
|
||||
- ``get_langfuse_callback()`` — LangChain callback handler (auto-inherits trace)
|
||||
- ``get_prompt()`` — fetch a managed prompt from Langfuse by name
|
||||
- ``flush()`` / ``shutdown()`` — lifecycle management
|
||||
|
||||
All functions gracefully degrade to no-ops when Langfuse is not configured,
|
||||
so the service works identically with or without observability keys.
|
||||
|
||||
Requires ``langfuse >= 3.0.0`` (v4 / "Fast Preview" SDK).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from contextlib import contextmanager
|
||||
from typing import Any
|
||||
|
||||
from shared.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ── State ────────────────────────────────────────────────────────────────
|
||||
|
||||
_initialised: bool = False
|
||||
_disabled: bool = False
|
||||
|
||||
|
||||
def _is_configured() -> bool:
|
||||
return bool(settings.LANGFUSE_SECRET_KEY and settings.LANGFUSE_PUBLIC_KEY)
|
||||
|
||||
|
||||
def init_langfuse() -> None:
|
||||
"""Initialise the Langfuse singleton. Call once at startup."""
|
||||
global _initialised, _disabled
|
||||
|
||||
if _initialised or _disabled:
|
||||
return
|
||||
|
||||
if not _is_configured():
|
||||
_disabled = True
|
||||
logger.info("tracing: Langfuse keys not set — tracing disabled")
|
||||
return
|
||||
|
||||
try:
|
||||
from langfuse import Langfuse
|
||||
|
||||
Langfuse(
|
||||
secret_key=settings.LANGFUSE_SECRET_KEY,
|
||||
public_key=settings.LANGFUSE_PUBLIC_KEY,
|
||||
host=settings.LANGFUSE_HOST,
|
||||
)
|
||||
_initialised = True
|
||||
logger.info("tracing: Langfuse client initialised (host=%s)", settings.LANGFUSE_HOST)
|
||||
except Exception as exc:
|
||||
_disabled = True
|
||||
logger.warning("tracing: failed to initialise Langfuse: %s", exc)
|
||||
|
||||
|
||||
def _get_client() -> Any | None:
|
||||
"""Return the singleton Langfuse client, or *None* if disabled."""
|
||||
if _disabled:
|
||||
return None
|
||||
if not _initialised:
|
||||
init_langfuse()
|
||||
if _disabled:
|
||||
return None
|
||||
try:
|
||||
from langfuse import get_client
|
||||
return get_client()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
# ── Null span (no-op when Langfuse is disabled) ─────────────────────────
|
||||
|
||||
|
||||
class _NullSpan:
|
||||
"""Drop-in replacement when Langfuse is disabled."""
|
||||
|
||||
def update(self, **_: Any) -> None: ...
|
||||
def set_trace_io(self, **_: Any) -> None: ...
|
||||
def score_trace(self, **_: Any) -> None: ...
|
||||
|
||||
|
||||
# ── Trace context manager ───────────────────────────────────────────────
|
||||
|
||||
|
||||
@contextmanager
|
||||
def trace_span(
|
||||
*,
|
||||
name: str,
|
||||
user_id: str,
|
||||
session_id: str | None = None,
|
||||
trace_id: str | None = None,
|
||||
input: Any = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
tags: list[str] | None = None,
|
||||
):
|
||||
"""Context manager that creates a Langfuse trace/span.
|
||||
|
||||
Yields the span object (or a ``_NullSpan`` if Langfuse is disabled).
|
||||
A ``CallbackHandler`` created inside this block auto-inherits the trace
|
||||
context, so there is no need to pass trace IDs manually.
|
||||
"""
|
||||
lf = _get_client()
|
||||
if lf is None:
|
||||
yield _NullSpan()
|
||||
return
|
||||
|
||||
try:
|
||||
from langfuse import Langfuse, propagate_attributes
|
||||
|
||||
trace_ctx: dict[str, str] = {}
|
||||
if trace_id is not None:
|
||||
trace_ctx["trace_id"] = Langfuse.create_trace_id(seed=trace_id)
|
||||
|
||||
with lf.start_as_current_observation(
|
||||
as_type="span",
|
||||
name=name,
|
||||
input=input,
|
||||
metadata=metadata or {},
|
||||
**({"trace_context": trace_ctx} if trace_ctx else {}),
|
||||
) as span:
|
||||
with propagate_attributes(
|
||||
user_id=user_id,
|
||||
session_id=session_id,
|
||||
tags=tags or [],
|
||||
):
|
||||
yield span
|
||||
except Exception as exc:
|
||||
logger.warning("tracing: trace_span(%s) failed: %s", name, exc)
|
||||
yield _NullSpan()
|
||||
|
||||
|
||||
# ── LangChain callback handler ──────────────────────────────────────────
|
||||
|
||||
|
||||
def get_langfuse_callback() -> Any | None:
|
||||
"""Return a LangChain ``CallbackHandler`` that auto-inherits the current trace.
|
||||
|
||||
Must be called inside a ``trace_span()`` block for proper linking.
|
||||
Returns *None* when Langfuse is disabled.
|
||||
"""
|
||||
if _disabled and not _initialised:
|
||||
return None
|
||||
|
||||
try:
|
||||
from langfuse.langchain import CallbackHandler
|
||||
return CallbackHandler()
|
||||
except Exception as exc:
|
||||
logger.warning("tracing: get_langfuse_callback failed: %s", exc)
|
||||
return None
|
||||
|
||||
|
||||
# ── Prompt management ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def get_prompt(
|
||||
name: str,
|
||||
*,
|
||||
version: int | None = None,
|
||||
label: str | None = None,
|
||||
fallback: str | None = None,
|
||||
cache_ttl_seconds: int = 300,
|
||||
) -> str | None:
|
||||
"""Fetch a managed prompt from Langfuse by name.
|
||||
|
||||
Returns the compiled prompt string, or *fallback* if the prompt is not
|
||||
found or Langfuse is disabled.
|
||||
"""
|
||||
lf = _get_client()
|
||||
if lf is None:
|
||||
return fallback
|
||||
|
||||
try:
|
||||
kwargs: dict[str, Any] = {
|
||||
"name": name,
|
||||
"cache_ttl_seconds": cache_ttl_seconds,
|
||||
}
|
||||
if version is not None:
|
||||
kwargs["version"] = version
|
||||
if label is not None:
|
||||
kwargs["label"] = label
|
||||
prompt = lf.get_prompt(**kwargs)
|
||||
return prompt.prompt
|
||||
except Exception as exc:
|
||||
logger.warning("tracing: get_prompt(%s) failed: %s", name, exc)
|
||||
return fallback
|
||||
|
||||
|
||||
def link_prompt_to_trace(
|
||||
span: Any,
|
||||
prompt_name: str,
|
||||
*,
|
||||
version: int | None = None,
|
||||
label: str | None = None,
|
||||
) -> None:
|
||||
"""Attach prompt metadata to a span/trace."""
|
||||
lf = _get_client()
|
||||
if lf is None or isinstance(span, _NullSpan):
|
||||
return
|
||||
|
||||
try:
|
||||
kwargs: dict[str, Any] = {"name": prompt_name}
|
||||
if version is not None:
|
||||
kwargs["version"] = version
|
||||
if label is not None:
|
||||
kwargs["label"] = label
|
||||
prompt = lf.get_prompt(**kwargs)
|
||||
span.update(metadata={"prompt": {"name": prompt_name, "version": prompt.version}})
|
||||
except Exception as exc:
|
||||
logger.warning("tracing: link_prompt_to_trace(%s) failed: %s", prompt_name, exc)
|
||||
|
||||
|
||||
# ── Scoring helper ───────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def score_trace(
|
||||
trace_id: str,
|
||||
name: str,
|
||||
value: float,
|
||||
*,
|
||||
comment: str | None = None,
|
||||
) -> None:
|
||||
"""Post a score to a trace (e.g. user feedback, latency, quality)."""
|
||||
lf = _get_client()
|
||||
if lf is None:
|
||||
return
|
||||
|
||||
try:
|
||||
lf.create_score(trace_id=trace_id, name=name, value=value, comment=comment)
|
||||
except Exception as exc:
|
||||
logger.warning("tracing: score_trace failed: %s", exc)
|
||||
|
||||
|
||||
# ── Shutdown ─────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def flush() -> None:
|
||||
"""Flush pending Langfuse events."""
|
||||
lf = _get_client()
|
||||
if lf is not None:
|
||||
try:
|
||||
lf.flush()
|
||||
except Exception as exc:
|
||||
logger.warning("tracing: flush failed: %s", exc)
|
||||
|
||||
|
||||
def shutdown() -> None:
|
||||
"""Flush and close the Langfuse client."""
|
||||
global _initialised, _disabled
|
||||
lf = _get_client()
|
||||
if lf is not None:
|
||||
try:
|
||||
lf.flush()
|
||||
lf.shutdown()
|
||||
except Exception as exc:
|
||||
logger.warning("tracing: shutdown failed: %s", exc)
|
||||
_initialised = False
|
||||
_disabled = False
|
||||
Reference in New Issue
Block a user