Add Langfuse observability: traces, prompt management, prompt-to-generation linking

- New app/core/langfuse_client.py: lazy singleton client, get_prompt_or_fallback()
  helper (returns raw template + prompt obj for linking), extract_usage() for token
  counts. No-ops when LANGFUSE_* env vars are not set.
- deep_agent.py: home-agent and floating-agent runs wrapped in spans; each ainvoke
  wrapped in a generation with model/input/output/usage; prompts fetched from
  Langfuse (adiuva-home-agent, adiuva-floating-agent, adiuva-floating-classifier)
  with hardcoded fallback.
- agent_runner.py: step1-classifier and step2-processor LLM calls traced; batch
  agent _run_agent_with_tools spans + generations; cloud-processor included.
  Prompts: adiuva-step1-classifier, adiuva-step2-processor, adiuva-cloud-processor.
- agent_setup.py: journey-setup span + generation per ainvoke; prompt_obj stored
  on JourneySession and reused across turns. Prompt: journey_system.
- settings.py: LANGFUSE_SECRET_KEY, LANGFUSE_PUBLIC_KEY, LANGFUSE_HOST added.
- .env.example: Langfuse section with EU/US/self-hosted host comments.
- requirements.txt: langfuse>=2.0.0.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Roberto Musso
2026-04-07 00:19:20 +02:00
parent 552b8eb305
commit 1ce1d492b0
7 changed files with 455 additions and 78 deletions

View File

@@ -42,7 +42,9 @@ from app.agents.note_agent import NOTE_TOOLS
from app.agents.project_agent import PROJECT_TOOLS
from app.agents.task_agent import TASK_TOOLS
from app.agents.timeline_agent import TIMELINE_TOOLS
from app.config.settings import settings
from app.core.device_manager import DeviceConnectionManager
from app.core.langfuse_client import extract_usage, get_langfuse, get_prompt_or_fallback
from app.core.llm import get_llm
from app.core.ws_context import clear_client_executor, execute_on_client, set_client_executor
from app.db import async_session
@@ -268,8 +270,12 @@ async def _run_agent_with_tools(
user_message: str,
tools: list[Any],
max_steps: int,
user_id: str = "",
langfuse_prompt: Any = None,
agent_name: str = "batch-agent",
) -> str:
"""Run an LLM agent with tool-calling, returning the final text response."""
lf = get_langfuse()
llm = get_llm()
llm_with_tools = llm.bind_tools(tools)
messages: list[Any] = [
@@ -279,38 +285,76 @@ async def _run_agent_with_tools(
tool_map = {tool_def.name: tool_def for tool_def in tools}
for _ in range(max_steps):
response: AIMessage = await llm_with_tools.ainvoke(messages)
messages.append(response)
_span_ctx = (
lf.start_as_current_observation(
as_type="span",
name=agent_name,
user_id=user_id or None,
input=user_message,
)
if lf else None
)
_span = _span_ctx.__enter__() if _span_ctx else None
if not response.tool_calls:
return _as_text(response.content)
for call in response.tool_calls:
call_id = str(call.get("id", ""))
call_name = str(call.get("name", ""))
call_args = call.get("args", {})
logger.info(
"agent_runner: tool_call name=%s args=%s",
call_name,
json.dumps(call_args, ensure_ascii=True)[:800],
try:
for _ in range(max_steps):
_gen_ctx = (
lf.start_as_current_observation(
as_type="generation",
name=f"{agent_name}-llm",
model=settings.LLM_MODEL,
prompt=langfuse_prompt,
input=messages,
)
if lf else None
)
_gen = _gen_ctx.__enter__() if _gen_ctx else None
response: AIMessage = await llm_with_tools.ainvoke(messages)
if _gen_ctx:
_gen.update(output=_as_text(response.content), usage=extract_usage(response))
_gen_ctx.__exit__(None, None, None)
tool_fn = tool_map.get(call_name)
if tool_fn is None:
tool_output = f"Unknown tool: {call_name}"
else:
tool_output = await tool_fn.ainvoke(call_args)
messages.append(response)
logger.info(
"agent_runner: tool_result name=%s output=%s",
call_name,
str(tool_output)[:200],
)
messages.append(ToolMessage(content=str(tool_output), tool_call_id=call["id"]))
if not response.tool_calls:
final_text = _as_text(response.content)
if _span:
_span.update(output=final_text)
return final_text
final = await llm.ainvoke(messages)
return _as_text(final.content)
for call in response.tool_calls:
call_id = str(call.get("id", ""))
call_name = str(call.get("name", ""))
call_args = call.get("args", {})
logger.info(
"agent_runner: tool_call name=%s args=%s",
call_name,
json.dumps(call_args, ensure_ascii=True)[:800],
)
tool_fn = tool_map.get(call_name)
if tool_fn is None:
tool_output = f"Unknown tool: {call_name}"
else:
tool_output = await tool_fn.ainvoke(call_args)
logger.info(
"agent_runner: tool_result name=%s output=%s",
call_name,
str(tool_output)[:200],
)
messages.append(ToolMessage(content=str(tool_output), tool_call_id=call["id"]))
final = await llm.ainvoke(messages)
final_text = _as_text(final.content)
if _span:
_span.update(output=final_text)
return final_text
finally:
if _span_ctx:
_span_ctx.__exit__(None, None, None)
if lf:
lf.flush()
# ── Tool list builder ─────────────────────────────────────────────────────
@@ -515,17 +559,33 @@ async def _classify_file(
if d in _DOMAIN_DESCRIPTIONS
)
system = _STEP1_SYSTEM_PROMPT.format(
step1_template, step1_prompt_obj = get_prompt_or_fallback(
"batch_file_classifier", _STEP1_SYSTEM_PROMPT
)
system = step1_template.format(
domain_definitions=domain_definitions,
projects_list=projects_list,
)
lf = get_langfuse()
llm = get_llm()
classifier_messages = [
SystemMessage(content=system),
HumanMessage(content=f"File: {file_path}\n\nContent:\n{file_content[:4000]}"),
]
try:
response = await llm.ainvoke([
SystemMessage(content=system),
HumanMessage(content=f"File: {file_path}\n\nContent:\n{file_content[:4000]}"),
])
if lf:
with lf.start_as_current_observation(
as_type="generation",
name="step1-classifier",
model=settings.LLM_ROUTER_MODEL,
prompt=step1_prompt_obj,
input=classifier_messages,
) as gen:
response = await llm.ainvoke(classifier_messages)
gen.update(output=_as_text(response.content), usage=extract_usage(response))
else:
response = await llm.ainvoke(classifier_messages)
raw = _as_text(response.content).strip()
# Strip markdown fences if the model wraps the JSON.
if raw.startswith("```"):
@@ -713,7 +773,10 @@ async def run_local_agent(
existing_context = "\n\n".join(existing_blocks)
system_prompt = _PROCESSING_SYSTEM_PROMPT.format(
step2_template, step2_prompt_obj = get_prompt_or_fallback(
"batch_processing", _PROCESSING_SYSTEM_PROMPT
)
system_prompt = step2_template.format(
existing_context=existing_context,
project_context=project_context,
data_types=", ".join(domains),
@@ -730,6 +793,9 @@ async def run_local_agent(
),
tools=processing_tools,
max_steps=_MAX_PROCESSING_STEPS,
user_id=user_id,
langfuse_prompt=step2_prompt_obj,
agent_name="step2-processor",
)
logger.info(
"agent_runner: run=%s file=%r result=%s",
@@ -928,7 +994,10 @@ async def run_cloud_agent(
continue
items_processed += 1
processing_prompt = _CLOUD_PROCESSING_PROMPT.format(
cloud_template, cloud_prompt_obj = get_prompt_or_fallback(
"batch_cloud_processing", _CLOUD_PROCESSING_PROMPT
)
processing_prompt = cloud_template.format(
data_types=", ".join(config.data_types),
project_context="Determine the appropriate project from the message context.",
file_list=f"Message from {config.provider} (id: {msg.id})",
@@ -941,6 +1010,9 @@ async def run_cloud_agent(
user_message=f"Process this message content:\n\n{content_text[:8000]}",
tools=processing_tools,
max_steps=_MAX_PROCESSING_STEPS,
user_id=user_id,
langfuse_prompt=cloud_prompt_obj,
agent_name="cloud-processor",
)
except Exception as exc:
errors.append(f"LLM processing error for message {msg.id!r}: {exc}")