fix: langfuse v4 SDK compatibility and pass user message as trace input
This commit is contained in:
@@ -25,7 +25,6 @@ OPENAI_API_KEY=
|
|||||||
ANTHROPIC_API_KEY=
|
ANTHROPIC_API_KEY=
|
||||||
GOOGLE_API_KEY=
|
GOOGLE_API_KEY=
|
||||||
LLM_MODEL=gpt-4o
|
LLM_MODEL=gpt-4o
|
||||||
LLM_ROUTER_MODEL=gpt-4o-mini
|
|
||||||
|
|
||||||
# ── Stripe (leave empty to stub billing) ──────────────────────────────────────
|
# ── Stripe (leave empty to stub billing) ──────────────────────────────────────
|
||||||
STRIPE_SECRET_KEY=
|
STRIPE_SECRET_KEY=
|
||||||
@@ -50,3 +49,8 @@ QDRANT_API_KEY=
|
|||||||
# ── CORS ──────────────────────────────────────────────────────────────────────
|
# ── CORS ──────────────────────────────────────────────────────────────────────
|
||||||
# Comma-separated list parsed by Settings (override default if needed)
|
# Comma-separated list parsed by Settings (override default if needed)
|
||||||
# CORS_ORIGINS=["app://.","http://localhost:3000"]
|
# CORS_ORIGINS=["app://.","http://localhost:3000"]
|
||||||
|
|
||||||
|
# ── Langfuse (observability) ─────────────────────────────────────────────────
|
||||||
|
LANGFUSE_SECRET_KEY=sk-lf-...
|
||||||
|
LANGFUSE_PUBLIC_KEY=pk-lf-...
|
||||||
|
LANGFUSE_HOST=https://cloud.langfuse.com # or self-hosted URL
|
||||||
@@ -739,7 +739,7 @@ adiuva-api/
|
|||||||
│ │
|
│ │
|
||||||
│ ├── core/ # Orchestration engine
|
│ ├── core/ # Orchestration engine
|
||||||
│ │ ├── agent_registry.py # BaseAgent, ChatAgent, AgentRegistry
|
│ │ ├── agent_registry.py # BaseAgent, ChatAgent, AgentRegistry
|
||||||
│ │ ├── llm.py # LiteLLM factory (get_llm, get_router_llm)
|
│ │ ├── llm.py # LiteLLM factory (get_llm)
|
||||||
│ │ ├── orchestrator.py # Intent classification & routing
|
│ │ ├── orchestrator.py # Intent classification & routing
|
||||||
│ │ └── execution_plan.py # Plan builder, templates, cache
|
│ │ └── execution_plan.py # Plan builder, templates, cache
|
||||||
│ │
|
│ │
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
"""LLM factory — centralised model instantiation via LiteLLM.
|
"""LLM factory — centralised model instantiation via LiteLLM.
|
||||||
|
|
||||||
Every agent and the orchestrator call ``get_llm()`` or ``get_router_llm()``
|
Every agent and the orchestrator call ``get_llm()``
|
||||||
instead of directly constructing a provider-specific class. The model string
|
instead of directly constructing a provider-specific class. The model string
|
||||||
follows the `LiteLLM model naming convention
|
follows the `LiteLLM model naming convention
|
||||||
<https://docs.litellm.ai/docs/providers>`_:
|
<https://docs.litellm.ai/docs/providers>`_:
|
||||||
@@ -11,7 +11,7 @@ follows the `LiteLLM model naming convention
|
|||||||
* Ollama: ``ollama/llama3``
|
* Ollama: ``ollama/llama3``
|
||||||
* Bedrock: ``bedrock/anthropic.claude-v2``
|
* Bedrock: ``bedrock/anthropic.claude-v2``
|
||||||
|
|
||||||
Switch providers by changing **LLM_MODEL** / **LLM_ROUTER_MODEL** in ``.env``
|
Switch providers by changing **LLM_MODEL** in ``.env``
|
||||||
— no code changes required.
|
— no code changes required.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -95,14 +95,6 @@ def get_llm(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_router_llm(
|
|
||||||
*,
|
|
||||||
temperature: float = 0,
|
|
||||||
) -> ChatOpenAI | ChatLiteLLM:
|
|
||||||
"""Return the lighter model used for intent classification / routing."""
|
|
||||||
return get_llm(model=settings.LLM_ROUTER_MODEL, temperature=temperature)
|
|
||||||
|
|
||||||
|
|
||||||
async def embed(text: str) -> list[float]:
|
async def embed(text: str) -> list[float]:
|
||||||
"""Return an embedding vector for *text*.
|
"""Return an embedding vector for *text*.
|
||||||
|
|
||||||
|
|||||||
@@ -33,4 +33,5 @@ google-auth-httplib2>=0.2.0
|
|||||||
msal>=1.28.0
|
msal>=1.28.0
|
||||||
cryptography>=42.0.0
|
cryptography>=42.0.0
|
||||||
redis>=5.0.0
|
redis>=5.0.0
|
||||||
|
langfuse>=3.0.0
|
||||||
ruff>=0.8.0
|
ruff>=0.8.0
|
||||||
|
|||||||
@@ -528,7 +528,9 @@ def _infer_floating_domain_rule_based(message: str, context: dict[str, Any]) ->
|
|||||||
return {"type": "task", "id": None, "section": None}
|
return {"type": "task", "id": None, "section": None}
|
||||||
|
|
||||||
|
|
||||||
async def _infer_floating_domain(message: str, context: dict[str, Any]) -> dict[str, str | None]:
|
async def _infer_floating_domain(
|
||||||
|
message: str, context: dict[str, Any], *, langfuse_handler: Any | None = None,
|
||||||
|
) -> dict[str, str | None]:
|
||||||
resolved_project_id = context.get("resolved_project_id") if isinstance(context, dict) else None
|
resolved_project_id = context.get("resolved_project_id") if isinstance(context, dict) else None
|
||||||
project_id = resolved_project_id if isinstance(resolved_project_id, str) and resolved_project_id else None
|
project_id = resolved_project_id if isinstance(resolved_project_id, str) and resolved_project_id else None
|
||||||
|
|
||||||
@@ -538,10 +540,14 @@ async def _infer_floating_domain(message: str, context: dict[str, Any]) -> dict[
|
|||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
llm = get_llm()
|
classifier_prompt = _get_system_prompt(
|
||||||
|
"floating_domain_classifier", _FLOATING_DOMAIN_CLASSIFIER_SYSTEM,
|
||||||
|
)
|
||||||
|
callbacks = _build_callbacks(langfuse_handler)
|
||||||
|
llm = get_llm(callbacks=callbacks)
|
||||||
response = await llm.ainvoke(
|
response = await llm.ainvoke(
|
||||||
[
|
[
|
||||||
SystemMessage(content=_FLOATING_DOMAIN_CLASSIFIER_SYSTEM),
|
SystemMessage(content=classifier_prompt),
|
||||||
HumanMessage(
|
HumanMessage(
|
||||||
content=(
|
content=(
|
||||||
f"Message:\n{message}\n\n"
|
f"Message:\n{message}\n\n"
|
||||||
@@ -784,7 +790,7 @@ async def run_home(user_id: str, message: str, context: dict[str, Any], *, langf
|
|||||||
|
|
||||||
async def run_floating(user_id: str, message: str, context: dict[str, Any], *, langfuse_handler: Any | None = None) -> tuple[str, dict[str, str | None]]:
|
async def run_floating(user_id: str, message: str, context: dict[str, Any], *, langfuse_handler: Any | None = None) -> tuple[str, dict[str, str | None]]:
|
||||||
prepared_context = await _prepare_context(message, context)
|
prepared_context = await _prepare_context(message, context)
|
||||||
domain = await _infer_floating_domain(message, prepared_context)
|
domain = await _infer_floating_domain(message, prepared_context, langfuse_handler=langfuse_handler)
|
||||||
system_prompt = _get_system_prompt("floating_system", _FLOATING_SINGLE_AGENT_SYSTEM)
|
system_prompt = _get_system_prompt("floating_system", _FLOATING_SINGLE_AGENT_SYSTEM)
|
||||||
response = await _run_single_agent(
|
response = await _run_single_agent(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
@@ -835,7 +841,7 @@ async def run_floating_stream(
|
|||||||
langfuse_handler: Any | None = None,
|
langfuse_handler: Any | None = None,
|
||||||
) -> AsyncGenerator[tuple[str, Any], None]:
|
) -> AsyncGenerator[tuple[str, Any], None]:
|
||||||
prepared_context = await _prepare_context(message, context)
|
prepared_context = await _prepare_context(message, context)
|
||||||
domain = await _infer_floating_domain(message, prepared_context)
|
domain = await _infer_floating_domain(message, prepared_context, langfuse_handler=langfuse_handler)
|
||||||
yield "floating_domain", domain
|
yield "floating_domain", domain
|
||||||
|
|
||||||
system_prompt = _get_system_prompt("floating_system", _FLOATING_SINGLE_AGENT_SYSTEM)
|
system_prompt = _get_system_prompt("floating_system", _FLOATING_SINGLE_AGENT_SYSTEM)
|
||||||
|
|||||||
@@ -31,6 +31,11 @@ logging.getLogger("sqlalchemy.pool").setLevel(logging.WARNING)
|
|||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI):
|
||||||
|
# Initialise Langfuse tracing (no-op if keys are missing)
|
||||||
|
from app.tracing import init_langfuse
|
||||||
|
|
||||||
|
init_langfuse()
|
||||||
|
|
||||||
# Start Redis consumer in background
|
# Start Redis consumer in background
|
||||||
from app.redis_consumer import start_consumer
|
from app.redis_consumer import start_consumer
|
||||||
|
|
||||||
|
|||||||
@@ -85,52 +85,51 @@ async def _handle_home_request(user_id: str, frame: dict) -> None:
|
|||||||
user_id, request_id, message[:200],
|
user_id, request_id, message[:200],
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create Langfuse trace
|
response_chunks: list[str] = []
|
||||||
trace = tracing.create_trace(
|
|
||||||
|
with tracing.trace_span(
|
||||||
name="home_request",
|
name="home_request",
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
session_id=session_id,
|
session_id=session_id,
|
||||||
trace_id=request_id,
|
trace_id=request_id,
|
||||||
|
input=message,
|
||||||
metadata={"message_preview": message[:200]},
|
metadata={"message_preview": message[:200]},
|
||||||
tags=["home"],
|
tags=["home"],
|
||||||
)
|
) as span:
|
||||||
langfuse_handler = tracing.get_langfuse_callback(
|
langfuse_handler = tracing.get_langfuse_callback()
|
||||||
trace=trace, span_name="home_agent",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Enrich with memory context
|
# Enrich with memory context
|
||||||
async with async_session() as db:
|
async with async_session() as db:
|
||||||
memory = MemoryMiddleware(db)
|
memory = MemoryMiddleware(db)
|
||||||
memory_context = await memory.enrich_context(
|
memory_context = await memory.enrich_context(
|
||||||
user_id, message,
|
user_id, message,
|
||||||
trace_id=request_id, session_id=session_id,
|
trace_id=request_id, session_id=session_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
context: dict = {
|
context: dict = {
|
||||||
"conversation_history": frame.get("conversation_history", []),
|
"conversation_history": frame.get("conversation_history", []),
|
||||||
"_debug": {"request_id": request_id, "session_id": session_id, "user_id": user_id},
|
"_debug": {"request_id": request_id, "session_id": session_id, "user_id": user_id},
|
||||||
**memory_context,
|
**memory_context,
|
||||||
}
|
}
|
||||||
|
|
||||||
set_current_user(user_id)
|
set_current_user(user_id)
|
||||||
response_chunks: list[str] = []
|
try:
|
||||||
try:
|
event_stream = run_home_stream(user_id, message, context, langfuse_handler=langfuse_handler)
|
||||||
event_stream = run_home_stream(user_id, message, context, langfuse_handler=langfuse_handler)
|
formatter = StreamFormatter(request_id=request_id)
|
||||||
formatter = StreamFormatter(request_id=request_id)
|
async for ws_frame in formatter.format(event_stream):
|
||||||
async for ws_frame in formatter.format(event_stream):
|
await _publish_frame(user_id, ws_frame.model_dump_json())
|
||||||
await _publish_frame(user_id, ws_frame.model_dump_json())
|
if hasattr(ws_frame, "chunk"):
|
||||||
if hasattr(ws_frame, "chunk"):
|
response_chunks.append(ws_frame.chunk)
|
||||||
response_chunks.append(ws_frame.chunk)
|
except Exception as exc:
|
||||||
except Exception as exc:
|
logger.error("redis_consumer: home_request failed user=%s req=%s: %s", user_id, request_id, exc)
|
||||||
logger.error("redis_consumer: home_request failed user=%s req=%s: %s", user_id, request_id, exc)
|
finally:
|
||||||
finally:
|
clear_current_user()
|
||||||
clear_current_user()
|
|
||||||
|
|
||||||
# Link prompt and flush trace
|
# Link prompt and attach output preview
|
||||||
if trace is not None:
|
tracing.link_prompt_to_trace(span, "home_system")
|
||||||
tracing.link_prompt_to_trace(trace, "home_system")
|
|
||||||
response_text = "".join(response_chunks)
|
response_text = "".join(response_chunks)
|
||||||
trace.update(output=response_text[:500] if response_text else None)
|
span.update(output=response_text[:500] if response_text else None)
|
||||||
|
|
||||||
tracing.flush()
|
tracing.flush()
|
||||||
|
|
||||||
# Store episode
|
# Store episode
|
||||||
@@ -154,52 +153,51 @@ async def _handle_floating_request(user_id: str, frame: dict) -> None:
|
|||||||
user_id, request_id, json.dumps(scope)[:200], message[:200],
|
user_id, request_id, json.dumps(scope)[:200], message[:200],
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create Langfuse trace
|
response_chunks: list[str] = []
|
||||||
trace = tracing.create_trace(
|
|
||||||
|
with tracing.trace_span(
|
||||||
name="floating_request",
|
name="floating_request",
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
session_id=session_id,
|
session_id=session_id,
|
||||||
trace_id=request_id,
|
trace_id=request_id,
|
||||||
|
input=message,
|
||||||
metadata={"message_preview": message[:200], "scope": scope},
|
metadata={"message_preview": message[:200], "scope": scope},
|
||||||
tags=["floating"],
|
tags=["floating"],
|
||||||
)
|
) as span:
|
||||||
langfuse_handler = tracing.get_langfuse_callback(
|
langfuse_handler = tracing.get_langfuse_callback()
|
||||||
trace=trace, span_name="floating_agent",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Enrich with memory context
|
# Enrich with memory context
|
||||||
async with async_session() as db:
|
async with async_session() as db:
|
||||||
memory = MemoryMiddleware(db)
|
memory = MemoryMiddleware(db)
|
||||||
memory_context = await memory.enrich_context(
|
memory_context = await memory.enrich_context(
|
||||||
user_id, message,
|
user_id, message,
|
||||||
trace_id=request_id, session_id=session_id,
|
trace_id=request_id, session_id=session_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
context: dict = {
|
context: dict = {
|
||||||
"scope": scope,
|
"scope": scope,
|
||||||
"_debug": {"request_id": request_id, "session_id": session_id, "user_id": user_id},
|
"_debug": {"request_id": request_id, "session_id": session_id, "user_id": user_id},
|
||||||
**memory_context,
|
**memory_context,
|
||||||
}
|
}
|
||||||
|
|
||||||
set_current_user(user_id)
|
set_current_user(user_id)
|
||||||
response_chunks: list[str] = []
|
try:
|
||||||
try:
|
event_stream = run_floating_stream(user_id, message, context, langfuse_handler=langfuse_handler)
|
||||||
event_stream = run_floating_stream(user_id, message, context, langfuse_handler=langfuse_handler)
|
formatter = StreamFormatter(request_id=request_id)
|
||||||
formatter = StreamFormatter(request_id=request_id)
|
async for ws_frame in formatter.format(event_stream):
|
||||||
async for ws_frame in formatter.format(event_stream):
|
await _publish_frame(user_id, ws_frame.model_dump_json())
|
||||||
await _publish_frame(user_id, ws_frame.model_dump_json())
|
if hasattr(ws_frame, "chunk"):
|
||||||
if hasattr(ws_frame, "chunk"):
|
response_chunks.append(ws_frame.chunk)
|
||||||
response_chunks.append(ws_frame.chunk)
|
except Exception as exc:
|
||||||
except Exception as exc:
|
logger.error("redis_consumer: floating_request failed user=%s req=%s: %s", user_id, request_id, exc)
|
||||||
logger.error("redis_consumer: floating_request failed user=%s req=%s: %s", user_id, request_id, exc)
|
finally:
|
||||||
finally:
|
clear_current_user()
|
||||||
clear_current_user()
|
|
||||||
|
|
||||||
# Link prompt and flush trace
|
# Link prompt and attach output preview
|
||||||
if trace is not None:
|
tracing.link_prompt_to_trace(span, "floating_system")
|
||||||
tracing.link_prompt_to_trace(trace, "floating_system")
|
|
||||||
response_text = "".join(response_chunks)
|
response_text = "".join(response_chunks)
|
||||||
trace.update(output=response_text[:500] if response_text else None)
|
span.update(output=response_text[:500] if response_text else None)
|
||||||
|
|
||||||
tracing.flush()
|
tracing.flush()
|
||||||
|
|
||||||
# Store episode
|
# Store episode
|
||||||
|
|||||||
@@ -1,137 +1,156 @@
|
|||||||
"""Langfuse tracing & prompt management for the Chat Service.
|
"""Langfuse tracing & prompt management for the Chat Service (v4 SDK).
|
||||||
|
|
||||||
Provides:
|
Provides:
|
||||||
- ``langfuse`` — singleton Langfuse client (lazy, no-op when keys are missing)
|
- ``init_langfuse()`` — initialise the singleton client at startup
|
||||||
- ``create_trace()`` — start a new trace for a chat request
|
- ``trace_span()`` — context manager that creates a trace + span
|
||||||
- ``get_langfuse_callback()`` — LangChain callback handler for a trace/span
|
- ``get_langfuse_callback()`` — LangChain callback handler (auto-inherits trace)
|
||||||
- ``get_prompt()`` — fetch a managed prompt from Langfuse by name
|
- ``get_prompt()`` — fetch a managed prompt from Langfuse by name
|
||||||
- ``flush()`` — ensure all events are sent before shutdown
|
- ``flush()`` / ``shutdown()`` — lifecycle management
|
||||||
|
|
||||||
All functions gracefully degrade to no-ops when Langfuse is not configured,
|
All functions gracefully degrade to no-ops when Langfuse is not configured,
|
||||||
so the service works identically with or without observability keys.
|
so the service works identically with or without observability keys.
|
||||||
|
|
||||||
|
Requires ``langfuse >= 3.0.0`` (v4 / "Fast Preview" SDK).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
from contextlib import contextmanager
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from shared.config import settings
|
from shared.config import settings
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# ── Lazy singleton ───────────────────────────────────────────────────────
|
# ── State ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
_langfuse_client: Any | None = None
|
_initialised: bool = False
|
||||||
_langfuse_disabled: bool = False
|
_disabled: bool = False
|
||||||
|
|
||||||
|
|
||||||
def _is_configured() -> bool:
|
def _is_configured() -> bool:
|
||||||
return bool(settings.LANGFUSE_SECRET_KEY and settings.LANGFUSE_PUBLIC_KEY)
|
return bool(settings.LANGFUSE_SECRET_KEY and settings.LANGFUSE_PUBLIC_KEY)
|
||||||
|
|
||||||
|
|
||||||
def _get_langfuse() -> Any | None:
|
def init_langfuse() -> None:
|
||||||
"""Return the Langfuse client singleton, or None if not configured."""
|
"""Initialise the Langfuse singleton. Call once at startup."""
|
||||||
global _langfuse_client, _langfuse_disabled
|
global _initialised, _disabled
|
||||||
|
|
||||||
if _langfuse_disabled:
|
if _initialised or _disabled:
|
||||||
return None
|
return
|
||||||
|
|
||||||
if _langfuse_client is not None:
|
|
||||||
return _langfuse_client
|
|
||||||
|
|
||||||
if not _is_configured():
|
if not _is_configured():
|
||||||
_langfuse_disabled = True
|
_disabled = True
|
||||||
logger.info("tracing: Langfuse keys not set — tracing disabled")
|
logger.info("tracing: Langfuse keys not set — tracing disabled")
|
||||||
return None
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from langfuse import Langfuse
|
from langfuse import Langfuse
|
||||||
|
|
||||||
_langfuse_client = Langfuse(
|
Langfuse(
|
||||||
secret_key=settings.LANGFUSE_SECRET_KEY,
|
secret_key=settings.LANGFUSE_SECRET_KEY,
|
||||||
public_key=settings.LANGFUSE_PUBLIC_KEY,
|
public_key=settings.LANGFUSE_PUBLIC_KEY,
|
||||||
host=settings.LANGFUSE_HOST,
|
host=settings.LANGFUSE_HOST,
|
||||||
)
|
)
|
||||||
|
_initialised = True
|
||||||
logger.info("tracing: Langfuse client initialised (host=%s)", settings.LANGFUSE_HOST)
|
logger.info("tracing: Langfuse client initialised (host=%s)", settings.LANGFUSE_HOST)
|
||||||
return _langfuse_client
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
_langfuse_disabled = True
|
_disabled = True
|
||||||
logger.warning("tracing: failed to initialise Langfuse: %s", exc)
|
logger.warning("tracing: failed to initialise Langfuse: %s", exc)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_client() -> Any | None:
|
||||||
|
"""Return the singleton Langfuse client, or *None* if disabled."""
|
||||||
|
if _disabled:
|
||||||
|
return None
|
||||||
|
if not _initialised:
|
||||||
|
init_langfuse()
|
||||||
|
if _disabled:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
from langfuse import get_client
|
||||||
|
return get_client()
|
||||||
|
except Exception:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
# ── Trace lifecycle ──────────────────────────────────────────────────────
|
# ── Null span (no-op when Langfuse is disabled) ─────────────────────────
|
||||||
|
|
||||||
|
|
||||||
def create_trace(
|
class _NullSpan:
|
||||||
|
"""Drop-in replacement when Langfuse is disabled."""
|
||||||
|
|
||||||
|
def update(self, **_: Any) -> None: ...
|
||||||
|
def set_trace_io(self, **_: Any) -> None: ...
|
||||||
|
def score_trace(self, **_: Any) -> None: ...
|
||||||
|
|
||||||
|
|
||||||
|
# ── Trace context manager ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def trace_span(
|
||||||
*,
|
*,
|
||||||
name: str,
|
name: str,
|
||||||
user_id: str,
|
user_id: str,
|
||||||
session_id: str | None = None,
|
session_id: str | None = None,
|
||||||
trace_id: str | None = None,
|
trace_id: str | None = None,
|
||||||
|
input: Any = None,
|
||||||
metadata: dict[str, Any] | None = None,
|
metadata: dict[str, Any] | None = None,
|
||||||
tags: list[str] | None = None,
|
tags: list[str] | None = None,
|
||||||
) -> Any | None:
|
):
|
||||||
"""Create a Langfuse trace. Returns the trace object, or None if disabled."""
|
"""Context manager that creates a Langfuse trace/span.
|
||||||
lf = _get_langfuse()
|
|
||||||
|
Yields the span object (or a ``_NullSpan`` if Langfuse is disabled).
|
||||||
|
A ``CallbackHandler`` created inside this block auto-inherits the trace
|
||||||
|
context, so there is no need to pass trace IDs manually.
|
||||||
|
"""
|
||||||
|
lf = _get_client()
|
||||||
if lf is None:
|
if lf is None:
|
||||||
return None
|
yield _NullSpan()
|
||||||
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return lf.trace(
|
from langfuse import Langfuse, propagate_attributes
|
||||||
id=trace_id,
|
|
||||||
|
trace_ctx: dict[str, str] = {}
|
||||||
|
if trace_id is not None:
|
||||||
|
trace_ctx["trace_id"] = Langfuse.create_trace_id(seed=trace_id)
|
||||||
|
|
||||||
|
with lf.start_as_current_observation(
|
||||||
|
as_type="span",
|
||||||
name=name,
|
name=name,
|
||||||
user_id=user_id,
|
input=input,
|
||||||
session_id=session_id,
|
|
||||||
metadata=metadata or {},
|
metadata=metadata or {},
|
||||||
tags=tags or [],
|
**({"trace_context": trace_ctx} if trace_ctx else {}),
|
||||||
)
|
) as span:
|
||||||
|
with propagate_attributes(
|
||||||
|
user_id=user_id,
|
||||||
|
session_id=session_id,
|
||||||
|
tags=tags or [],
|
||||||
|
):
|
||||||
|
yield span
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("tracing: create_trace failed: %s", exc)
|
logger.warning("tracing: trace_span(%s) failed: %s", name, exc)
|
||||||
return None
|
yield _NullSpan()
|
||||||
|
|
||||||
|
|
||||||
# ── LangChain callback handler ──────────────────────────────────────────
|
# ── LangChain callback handler ──────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
def get_langfuse_callback(
|
def get_langfuse_callback() -> Any | None:
|
||||||
*,
|
"""Return a LangChain ``CallbackHandler`` that auto-inherits the current trace.
|
||||||
trace_id: str | None = None,
|
|
||||||
trace: Any | None = None,
|
|
||||||
span_name: str | None = None,
|
|
||||||
update_parent: bool = True,
|
|
||||||
) -> Any | None:
|
|
||||||
"""Return a ``CallbackHandler`` wired to an existing trace.
|
|
||||||
|
|
||||||
This handler is passed to LangChain's ``ainvoke`` / ``astream`` as a
|
Must be called inside a ``trace_span()`` block for proper linking.
|
||||||
callback so every LLM generation and tool call is automatically
|
Returns *None* when Langfuse is disabled.
|
||||||
captured as a nested span inside the trace.
|
|
||||||
|
|
||||||
If both *trace* and *trace_id* are given, *trace* takes precedence.
|
|
||||||
Returns None when Langfuse is disabled.
|
|
||||||
"""
|
"""
|
||||||
lf = _get_langfuse()
|
if _disabled and not _initialised:
|
||||||
if lf is None:
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from langfuse.callback import CallbackHandler
|
from langfuse.langchain import CallbackHandler
|
||||||
|
return CallbackHandler()
|
||||||
kwargs: dict[str, Any] = {
|
|
||||||
"secret_key": settings.LANGFUSE_SECRET_KEY,
|
|
||||||
"public_key": settings.LANGFUSE_PUBLIC_KEY,
|
|
||||||
"host": settings.LANGFUSE_HOST,
|
|
||||||
"update_parent": update_parent,
|
|
||||||
}
|
|
||||||
if trace is not None:
|
|
||||||
kwargs["trace_id"] = trace.id
|
|
||||||
elif trace_id is not None:
|
|
||||||
kwargs["trace_id"] = trace_id
|
|
||||||
if span_name:
|
|
||||||
kwargs["root_span"] = span_name
|
|
||||||
|
|
||||||
return CallbackHandler(**kwargs)
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("tracing: get_langfuse_callback failed: %s", exc)
|
logger.warning("tracing: get_langfuse_callback failed: %s", exc)
|
||||||
return None
|
return None
|
||||||
@@ -152,21 +171,8 @@ def get_prompt(
|
|||||||
|
|
||||||
Returns the compiled prompt string, or *fallback* if the prompt is not
|
Returns the compiled prompt string, or *fallback* if the prompt is not
|
||||||
found or Langfuse is disabled.
|
found or Langfuse is disabled.
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
name : str
|
|
||||||
Prompt name as registered in Langfuse.
|
|
||||||
version : int, optional
|
|
||||||
Pin to a specific version; omit for the latest production version.
|
|
||||||
label : str, optional
|
|
||||||
Fetch by label (e.g. ``"production"``, ``"staging"``).
|
|
||||||
fallback : str, optional
|
|
||||||
Value returned when the prompt cannot be fetched.
|
|
||||||
cache_ttl_seconds : int
|
|
||||||
How long to cache the prompt locally (default 5 min).
|
|
||||||
"""
|
"""
|
||||||
lf = _get_langfuse()
|
lf = _get_client()
|
||||||
if lf is None:
|
if lf is None:
|
||||||
return fallback
|
return fallback
|
||||||
|
|
||||||
@@ -187,20 +193,15 @@ def get_prompt(
|
|||||||
|
|
||||||
|
|
||||||
def link_prompt_to_trace(
|
def link_prompt_to_trace(
|
||||||
trace: Any,
|
span: Any,
|
||||||
prompt_name: str,
|
prompt_name: str,
|
||||||
*,
|
*,
|
||||||
version: int | None = None,
|
version: int | None = None,
|
||||||
label: str | None = None,
|
label: str | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Attach a Langfuse prompt reference to a trace/generation.
|
"""Attach prompt metadata to a span/trace."""
|
||||||
|
lf = _get_client()
|
||||||
Call this *after* creating a generation on the trace to associate the
|
if lf is None or isinstance(span, _NullSpan):
|
||||||
prompt that was used. The prompt object is fetched and linked so
|
|
||||||
Langfuse can display prompt→trace associations in the dashboard.
|
|
||||||
"""
|
|
||||||
lf = _get_langfuse()
|
|
||||||
if lf is None or trace is None:
|
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -210,7 +211,7 @@ def link_prompt_to_trace(
|
|||||||
if label is not None:
|
if label is not None:
|
||||||
kwargs["label"] = label
|
kwargs["label"] = label
|
||||||
prompt = lf.get_prompt(**kwargs)
|
prompt = lf.get_prompt(**kwargs)
|
||||||
trace.update(metadata={"prompt": {"name": prompt_name, "version": prompt.version}})
|
span.update(metadata={"prompt": {"name": prompt_name, "version": prompt.version}})
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("tracing: link_prompt_to_trace(%s) failed: %s", prompt_name, exc)
|
logger.warning("tracing: link_prompt_to_trace(%s) failed: %s", prompt_name, exc)
|
||||||
|
|
||||||
@@ -226,12 +227,12 @@ def score_trace(
|
|||||||
comment: str | None = None,
|
comment: str | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Post a score to a trace (e.g. user feedback, latency, quality)."""
|
"""Post a score to a trace (e.g. user feedback, latency, quality)."""
|
||||||
lf = _get_langfuse()
|
lf = _get_client()
|
||||||
if lf is None:
|
if lf is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
lf.score(trace_id=trace_id, name=name, value=value, comment=comment)
|
lf.create_score(trace_id=trace_id, name=name, value=value, comment=comment)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("tracing: score_trace failed: %s", exc)
|
logger.warning("tracing: score_trace failed: %s", exc)
|
||||||
|
|
||||||
@@ -240,22 +241,24 @@ def score_trace(
|
|||||||
|
|
||||||
|
|
||||||
def flush() -> None:
|
def flush() -> None:
|
||||||
"""Flush pending Langfuse events. Call this on service shutdown."""
|
"""Flush pending Langfuse events."""
|
||||||
if _langfuse_client is not None:
|
lf = _get_client()
|
||||||
|
if lf is not None:
|
||||||
try:
|
try:
|
||||||
_langfuse_client.flush()
|
lf.flush()
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("tracing: flush failed: %s", exc)
|
logger.warning("tracing: flush failed: %s", exc)
|
||||||
|
|
||||||
|
|
||||||
def shutdown() -> None:
|
def shutdown() -> None:
|
||||||
"""Flush and close the Langfuse client."""
|
"""Flush and close the Langfuse client."""
|
||||||
global _langfuse_client, _langfuse_disabled
|
global _initialised, _disabled
|
||||||
if _langfuse_client is not None:
|
lf = _get_client()
|
||||||
|
if lf is not None:
|
||||||
try:
|
try:
|
||||||
_langfuse_client.flush()
|
lf.flush()
|
||||||
_langfuse_client.shutdown()
|
lf.shutdown()
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("tracing: shutdown failed: %s", exc)
|
logger.warning("tracing: shutdown failed: %s", exc)
|
||||||
_langfuse_client = None
|
_initialised = False
|
||||||
_langfuse_disabled = False
|
_disabled = False
|
||||||
|
|||||||
@@ -14,4 +14,4 @@ langchain-litellm>=0.3.0
|
|||||||
litellm>=1.50.0
|
litellm>=1.50.0
|
||||||
openai>=1.50.0
|
openai>=1.50.0
|
||||||
httpx>=0.27.0
|
httpx>=0.27.0
|
||||||
langfuse>=2.0.0
|
langfuse>=3.0.0
|
||||||
|
|||||||
@@ -6,8 +6,15 @@ and routes frames between Electron and downstream services via Redis pub/sub.
|
|||||||
This service has NO business logic — it only routes JSON frames.
|
This service has NO business logic — it only routes JSON frames.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
import logging
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Ensure the repo root is on sys.path so "shared" is importable in local dev.
|
||||||
|
_repo_root = str(Path(__file__).resolve().parents[3])
|
||||||
|
if _repo_root not in sys.path:
|
||||||
|
sys.path.insert(0, _repo_root)
|
||||||
|
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
from shared.config import settings
|
from shared.config import settings
|
||||||
|
|||||||
124
tests/test_e2e_flow.py
Normal file
124
tests/test_e2e_flow.py
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
"""End-to-end test: Auth → WS Gateway → Chat Service round-trip.
|
||||||
|
|
||||||
|
Usage (from repo root, with venv activated):
|
||||||
|
python test_e2e_flow.py
|
||||||
|
|
||||||
|
Requires: Auth (8001), WS Gateway (8002), Chat (8003) all running.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import websockets
|
||||||
|
|
||||||
|
AUTH_URL = "http://127.0.0.1:8001/api/v1/auth"
|
||||||
|
WS_URL = "ws://127.0.0.1:8002/api/v1/ws/device"
|
||||||
|
|
||||||
|
# ── 1. Authenticate ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
async def get_token() -> str:
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
# Try login first, register if user doesn't exist
|
||||||
|
resp = await client.post(
|
||||||
|
f"{AUTH_URL}/login",
|
||||||
|
json={"email": "e2e@test.com", "password": "Test1234!"},
|
||||||
|
)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
print("[1/4] Logged in as e2e@test.com")
|
||||||
|
return resp.json()["access_token"]
|
||||||
|
|
||||||
|
resp = await client.post(
|
||||||
|
f"{AUTH_URL}/register",
|
||||||
|
json={
|
||||||
|
"email": "e2e@test.com",
|
||||||
|
"password": "Test1234!",
|
||||||
|
"name": "E2E",
|
||||||
|
"surname": "Test",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
print("[1/4] Registered + logged in as e2e@test.com")
|
||||||
|
return resp.json()["access_token"]
|
||||||
|
|
||||||
|
|
||||||
|
# ── 2. WebSocket flow ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
async def run_e2e():
|
||||||
|
token = await get_token()
|
||||||
|
|
||||||
|
uri = f"{WS_URL}?token={token}"
|
||||||
|
async with websockets.connect(uri) as ws:
|
||||||
|
# Send device_hello
|
||||||
|
await ws.send(json.dumps({
|
||||||
|
"type": "device_hello",
|
||||||
|
"device_id": str(uuid.uuid4()),
|
||||||
|
"agent_ids": ["task", "note", "project", "timeline"],
|
||||||
|
}))
|
||||||
|
print("[2/4] Device registered with WS Gateway")
|
||||||
|
|
||||||
|
# Send a home_request (simple greeting — unlikely to need tools)
|
||||||
|
await ws.send(json.dumps({
|
||||||
|
"type": "home_request",
|
||||||
|
"message": "Hello! How are you doing today?",
|
||||||
|
"context": {},
|
||||||
|
}))
|
||||||
|
print("[3/4] Sent home_request → waiting for Chat Service response...")
|
||||||
|
|
||||||
|
# Listen for response frames (text_chunk, tool_call, final)
|
||||||
|
full_response = []
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
raw = await asyncio.wait_for(ws.recv(), timeout=60)
|
||||||
|
frame = json.loads(raw)
|
||||||
|
ftype = frame.get("type")
|
||||||
|
|
||||||
|
if ftype == "text_chunk":
|
||||||
|
chunk = frame.get("chunk", frame.get("text", ""))
|
||||||
|
full_response.append(chunk)
|
||||||
|
print(f" ← text_chunk: {chunk[:80]}")
|
||||||
|
|
||||||
|
elif ftype == "tool_call":
|
||||||
|
# Respond with a mock tool_result so the agent doesn't hang
|
||||||
|
call_id = frame.get("id")
|
||||||
|
action = frame.get("action")
|
||||||
|
table = frame.get("table", "")
|
||||||
|
print(f" ← tool_call: {action} {table} (id={call_id})")
|
||||||
|
|
||||||
|
mock_result = {"rows": [], "row": None}
|
||||||
|
await ws.send(json.dumps({
|
||||||
|
"type": "tool_result",
|
||||||
|
"id": call_id,
|
||||||
|
**mock_result,
|
||||||
|
}))
|
||||||
|
print(f" → tool_result (mock) for {call_id}")
|
||||||
|
|
||||||
|
elif ftype == "final":
|
||||||
|
text = frame.get("text", "")
|
||||||
|
if text:
|
||||||
|
full_response.append(text)
|
||||||
|
print(f" ← final")
|
||||||
|
break
|
||||||
|
|
||||||
|
elif ftype == "ping":
|
||||||
|
# Ignore heartbeats
|
||||||
|
continue
|
||||||
|
|
||||||
|
else:
|
||||||
|
print(f" ← {ftype}: {json.dumps(frame)[:120]}")
|
||||||
|
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
print(" ⚠ Timed out waiting for response (60s)")
|
||||||
|
|
||||||
|
print()
|
||||||
|
if full_response:
|
||||||
|
print(f"[4/4] Full response: {''.join(full_response)}")
|
||||||
|
else:
|
||||||
|
print("[4/4] No text response received (check Chat Service logs)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(run_e2e())
|
||||||
Reference in New Issue
Block a user