feat(langfuse): propagate user_id and session_id to all traces

- Add hash_user_id() to SHA-256 hash user IDs before sending to Langfuse
- Add langfuse_context() helper wrapping propagate_attributes()
- deep_agent: extract session_id from _debug context, wrap all agent
  runs and classifier with langfuse_context(user_id, session_id)
- agent_runner: add session_id param, pass run_id as session for batch
- agent_setup: wrap journey LLM calls with langfuse_context
- Remove redundant metadata dicts (now handled by propagate_attributes)
This commit is contained in:
Roberto Musso
2026-04-10 22:38:02 +02:00
parent 90500a3462
commit a85f8fde29
5 changed files with 104 additions and 23 deletions

View File

@@ -39,8 +39,10 @@ Linking a prompt to a generation::
from __future__ import annotations
import hashlib
import logging
from typing import Any
from contextlib import contextmanager
from typing import Any, Generator
logger = logging.getLogger(__name__)
@@ -145,3 +147,44 @@ def extract_usage(response: Any) -> dict[str, int]:
"output": int(meta.get("output_tokens", 0)),
"total": int(meta.get("total_tokens", 0)),
}
def hash_user_id(user_id: str) -> str:
"""Return a SHA-256 hash of *user_id* for use as Langfuse ``user_id``.
This avoids sending raw database UUIDs to external observability services
while still providing a stable, deterministic identifier for per-user
metrics in the Langfuse dashboard.
"""
return hashlib.sha256(user_id.encode()).hexdigest()
@contextmanager
def langfuse_context(
user_id: str | None = None,
session_id: str | None = None,
) -> Generator[None, None, None]:
"""Propagate ``user_id`` (hashed) and ``session_id`` to all Langfuse observations.
No-op when Langfuse is not configured or parameters are empty.
"""
lf = get_langfuse()
if lf is None or (not user_id and not session_id):
yield
return
try:
from langfuse import propagate_attributes
except ImportError:
logger.debug("langfuse: propagate_attributes not available — skipping context")
yield
return
attrs: dict[str, str] = {}
if user_id:
attrs["user_id"] = hash_user_id(user_id)
if session_id:
attrs["session_id"] = session_id
with propagate_attributes(**attrs):
yield