fix: Langfuse SDK v4 migration, tracing improvements, and LLM config
- Langfuse SDK v4: fix prompt-to-trace linking (as_type=generation) - tracing: compile_prompt with Langfuse managed prompt fallback - journey: remove journey CLI subcommand (keep only interactive) - LLM: add service-specific llm modules for batch-agent and chat - gitignore: exclude eval private test data - config: add LANGFUSE settings to shared config
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -35,3 +35,6 @@ Thumbs.db
|
|||||||
# Claude Code
|
# Claude Code
|
||||||
.claude/
|
.claude/
|
||||||
logs/
|
logs/
|
||||||
|
|
||||||
|
# Eval private test data
|
||||||
|
services/batch-agent/eval/fixtures/private_data/
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ class Settings(BaseSettings):
|
|||||||
ANTHROPIC_API_KEY: str = ""
|
ANTHROPIC_API_KEY: str = ""
|
||||||
GOOGLE_API_KEY: str = ""
|
GOOGLE_API_KEY: str = ""
|
||||||
CEREBRAS_API_KEY: str = ""
|
CEREBRAS_API_KEY: str = ""
|
||||||
|
GITHUB_TOKEN: str = ""
|
||||||
|
|
||||||
LLM_MODEL: str = "gpt-4o"
|
LLM_MODEL: str = "gpt-4o"
|
||||||
LLM_EMBED_MODEL: str = "text-embedding-3-small"
|
LLM_EMBED_MODEL: str = "text-embedding-3-small"
|
||||||
|
|||||||
@@ -50,6 +50,8 @@ def _api_key_for_model(model: str) -> str | None:
|
|||||||
return settings.GOOGLE_API_KEY or None
|
return settings.GOOGLE_API_KEY or None
|
||||||
if model.startswith("cerebras/"):
|
if model.startswith("cerebras/"):
|
||||||
return settings.CEREBRAS_API_KEY or None
|
return settings.CEREBRAS_API_KEY or None
|
||||||
|
if model.startswith("github/"):
|
||||||
|
return settings.GITHUB_TOKEN or None
|
||||||
if model.startswith("github_copilot/"):
|
if model.startswith("github_copilot/"):
|
||||||
# GitHub Copilot uses OAuth device-flow tokens managed by LiteLLM.
|
# GitHub Copilot uses OAuth device-flow tokens managed by LiteLLM.
|
||||||
# No API key is required; returning None lets LiteLLM handle auth.
|
# No API key is required; returning None lets LiteLLM handle auth.
|
||||||
@@ -83,6 +85,9 @@ def get_llm(
|
|||||||
if settings.GITHUB_COPILOT_TOKEN_DIR:
|
if settings.GITHUB_COPILOT_TOKEN_DIR:
|
||||||
os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR)
|
os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR)
|
||||||
|
|
||||||
|
if settings.GITHUB_TOKEN:
|
||||||
|
os.environ.setdefault("GITHUB_TOKEN", settings.GITHUB_TOKEN)
|
||||||
|
|
||||||
# Use ChatLiteLLM for provider-prefixed models (github_copilot/, anthropic/, etc.)
|
# Use ChatLiteLLM for provider-prefixed models (github_copilot/, anthropic/, etc.)
|
||||||
# so LiteLLM handles routing and auth. ChatOpenAI for plain OpenAI model names.
|
# so LiteLLM handles routing and auth. ChatOpenAI for plain OpenAI model names.
|
||||||
if "/" in model:
|
if "/" in model:
|
||||||
|
|||||||
@@ -80,17 +80,9 @@ def get_journey_session(session_id: str, user_id: str) -> JourneySession | None:
|
|||||||
_SYSTEM_PROMPT_TEMPLATE = """\
|
_SYSTEM_PROMPT_TEMPLATE = """\
|
||||||
You are a friendly assistant helping a freelancer configure a data-extraction agent.
|
You are a friendly assistant helping a freelancer configure a data-extraction agent.
|
||||||
Your job is to understand exactly what data the user wants to extract from their
|
Your job is to understand exactly what data the user wants to extract from their
|
||||||
local directory and produce a detailed prompt_template that a separate AI will use
|
local directory and produce a concise prompt_template that a separate AI will use
|
||||||
as its instruction set.
|
as its instruction set.
|
||||||
|
|
||||||
The extraction agent already has this base behaviour built in:
|
|
||||||
- Reads each file using file-system tools.
|
|
||||||
- Creates records (tasks, notes, timelines, projects) via CRUD tools.
|
|
||||||
- Sets isAiSuggested=1 on every new record.
|
|
||||||
- Only extracts data explicitly present in the files — it never invents information.
|
|
||||||
The user's custom prompt is appended AFTER this base behaviour, so focus on
|
|
||||||
what to look for and how to map it — not on the general extraction mechanics.
|
|
||||||
|
|
||||||
You have access to file-system tools to explore the user's directory:
|
You have access to file-system tools to explore the user's directory:
|
||||||
- list_directory: to see folder structure
|
- list_directory: to see folder structure
|
||||||
- read_file_content: to peek at file contents
|
- read_file_content: to peek at file contents
|
||||||
@@ -99,38 +91,43 @@ You have access to file-system tools to explore the user's directory:
|
|||||||
The user's configured directory is: {directory}
|
The user's configured directory is: {directory}
|
||||||
Target data types: {data_types}
|
Target data types: {data_types}
|
||||||
|
|
||||||
IMPORTANT — project assignment is handled automatically by the main agent runner
|
IMPORTANT — project assignment is handled automatically. You MUST NOT ask the user
|
||||||
before the custom prompt is ever used. You MUST NOT ask the user about projects,
|
about projects, projectId, or how to link records to projects. Never include
|
||||||
projectId, or how to link records to projects. Never include projectId logic or
|
projectId logic or project creation instructions in the generated prompt_template.
|
||||||
project creation instructions in the generated prompt_template.
|
|
||||||
|
|
||||||
Start by exploring the directory to understand its structure. Then ask concise,
|
Start by exploring the directory to understand its structure. Then ask concise,
|
||||||
focused questions one at a time. Cover these topics (not necessarily in this order):
|
focused questions one at a time. Cover only the topics relevant to the target
|
||||||
1. The type and format of the source content (confirmed by your exploration).
|
data types listed above:
|
||||||
2. How fields should be mapped (e.g. filename → task title).
|
|
||||||
3. Priority or status rules (e.g. "urgent" keyword → high priority).
|
|
||||||
4. Any special handling, date extraction, or exclusions.
|
|
||||||
|
|
||||||
Once you reach 90% confidence, output the final prompt_template between these exact
|
1. Content type and format — confirmed by your exploration.
|
||||||
markers on their own lines:
|
2. For TASKS (if in scope): field mapping for title, status, priority, content,
|
||||||
|
dueDate (where is the date found? what's the fallback when absent?),
|
||||||
|
and assignee (is there a person name to assign?).
|
||||||
|
3. For NOTES when TASKS are also in scope: note vs task distinction —
|
||||||
|
what makes something a note rather than a task?
|
||||||
|
4. For TIMELINES (if in scope): the date source — what marks a milestone or event?
|
||||||
|
5. Exclusions and special handling applicable to the target data types.
|
||||||
|
|
||||||
|
Keep asking focused questions until you are at least 90% confident. Then stop and
|
||||||
|
output the final prompt_template immediately, wrapped between these exact markers
|
||||||
|
on their own lines:
|
||||||
|
|
||||||
{template_start}
|
{template_start}
|
||||||
<the complete extraction prompt here>
|
<the complete extraction prompt here>
|
||||||
{template_end}
|
{template_end}
|
||||||
|
|
||||||
The prompt_template must be a self-contained instruction for an AI that reads files
|
The prompt_template must be concise (bullet points, ~15–25 lines maximum).
|
||||||
and must perform CRUD operations using tools to create records. It should specify:
|
Specify only:
|
||||||
- What entity types to create (tasks, notes, timelines) — never projects.
|
- Scope: what files/content qualify and what entity types to create.
|
||||||
- How to map file content to record fields (camelCase: title, status, priority,
|
- Field mapping rules per entity type (camelCase fields: title, status, priority,
|
||||||
dueDate, content, etc.) — never include projectId.
|
dueDate, content, assignee, etc.).
|
||||||
- That isAiSuggested must be set to 1 on every new record.
|
- dueDate rule (if tasks in scope): source and fallback behaviour.
|
||||||
- Concrete examples of mappings based on what you discovered in the directory.
|
- Note vs task rule (if both in scope): the criterion that separates them.
|
||||||
|
- Timeline date rule (if timelines in scope): what constitutes a timeline event.
|
||||||
|
- Exclusion/filtering rules.
|
||||||
|
- 2–3 concrete mapping examples based on what you discovered.
|
||||||
|
|
||||||
{existing_section}\
|
{existing_section}Begin by exploring the directory, then ask your first question.\
|
||||||
Keep asking clarifying questions until you are at least 90% confident you have
|
|
||||||
enough information to generate an accurate prompt_template. Once you reach that
|
|
||||||
confidence level, stop asking and produce the final template immediately.
|
|
||||||
Begin by exploring the directory, then ask your first question.\
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
@@ -152,8 +149,6 @@ def _build_system_prompt(
|
|||||||
variables={
|
variables={
|
||||||
"directory": directory,
|
"directory": directory,
|
||||||
"data_types": ", ".join(data_types),
|
"data_types": ", ".join(data_types),
|
||||||
"template_start": _TEMPLATE_START,
|
|
||||||
"template_end": _TEMPLATE_END,
|
|
||||||
"existing_section": existing_section,
|
"existing_section": existing_section,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|||||||
76
services/batch-agent/app/llm.py
Normal file
76
services/batch-agent/app/llm.py
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
"""LLM factory — centralised model instantiation via LiteLLM.
|
||||||
|
|
||||||
|
Identical to services/chat/app/llm.py. Uses shared.config.settings.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
from openai import AsyncOpenAI
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
from langchain_openai import ChatOpenAI
|
||||||
|
from langchain_litellm import ChatLiteLLM
|
||||||
|
|
||||||
|
from shared.config import settings
|
||||||
|
|
||||||
|
litellm.drop_params = True
|
||||||
|
|
||||||
|
warnings.filterwarnings(
|
||||||
|
"ignore",
|
||||||
|
message=r"PydanticSerializationUnexpectedValue\(Expected `ResponseAPIUsage`",
|
||||||
|
category=UserWarning,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _api_key_for_model(model: str) -> str | None:
|
||||||
|
if model.startswith("anthropic/"):
|
||||||
|
return settings.ANTHROPIC_API_KEY or None
|
||||||
|
if model.startswith("gemini/") or model.startswith("google/"):
|
||||||
|
return settings.GOOGLE_API_KEY or None
|
||||||
|
if model.startswith("cerebras/"):
|
||||||
|
return settings.CEREBRAS_API_KEY or None
|
||||||
|
if model.startswith("github/"):
|
||||||
|
return settings.GITHUB_TOKEN or None
|
||||||
|
if model.startswith("github_copilot/"):
|
||||||
|
return None
|
||||||
|
return settings.OPENAI_API_KEY or None
|
||||||
|
|
||||||
|
|
||||||
|
def get_llm(
|
||||||
|
*,
|
||||||
|
model: str | None = None,
|
||||||
|
temperature: float = 0,
|
||||||
|
callbacks: list | None = None,
|
||||||
|
) -> ChatOpenAI | ChatLiteLLM:
|
||||||
|
model = model or settings.LLM_MODEL
|
||||||
|
|
||||||
|
if settings.GITHUB_COPILOT_TOKEN_DIR:
|
||||||
|
os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR)
|
||||||
|
|
||||||
|
if settings.GITHUB_TOKEN:
|
||||||
|
os.environ.setdefault("GITHUB_TOKEN", settings.GITHUB_TOKEN)
|
||||||
|
|
||||||
|
if "/" in model:
|
||||||
|
return ChatLiteLLM(model=model, temperature=temperature, callbacks=callbacks)
|
||||||
|
|
||||||
|
return ChatOpenAI(
|
||||||
|
model=model,
|
||||||
|
temperature=temperature,
|
||||||
|
api_key=_api_key_for_model(model),
|
||||||
|
callbacks=callbacks,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def embed(text: str) -> list[float]:
|
||||||
|
model = settings.LLM_EMBED_MODEL
|
||||||
|
|
||||||
|
if model.startswith("github_copilot/") or "/" in model:
|
||||||
|
response = await litellm.aembedding(model=model, input=[text])
|
||||||
|
return response.data[0]["embedding"]
|
||||||
|
|
||||||
|
client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY)
|
||||||
|
response = await client.embeddings.create(model=model, input=text)
|
||||||
|
return response.data[0].embedding
|
||||||
@@ -138,6 +138,8 @@ async def _dispatch(user_id: str, message_data: dict[str, Any]) -> None:
|
|||||||
await _handle_journey_message(user_id, message_data)
|
await _handle_journey_message(user_id, message_data)
|
||||||
elif msg_type == "agent_trigger":
|
elif msg_type == "agent_trigger":
|
||||||
await _handle_agent_trigger(user_id, message_data)
|
await _handle_agent_trigger(user_id, message_data)
|
||||||
|
elif msg_type == "device_online":
|
||||||
|
logger.info("batch-agent: device_online user=%s device=%s", user_id, message_data.get("device_id", "?"))
|
||||||
else:
|
else:
|
||||||
logger.warning("batch-agent: unknown message type %r from user=%s", msg_type, user_id)
|
logger.warning("batch-agent: unknown message type %r from user=%s", msg_type, user_id)
|
||||||
|
|
||||||
|
|||||||
@@ -232,6 +232,38 @@ def compile_prompt(
|
|||||||
return fallback.format(**variables)
|
return fallback.format(**variables)
|
||||||
|
|
||||||
|
|
||||||
|
def get_prompt_object(
|
||||||
|
name: str,
|
||||||
|
*,
|
||||||
|
version: int | None = None,
|
||||||
|
label: str | None = None,
|
||||||
|
cache_ttl_seconds: int = 300,
|
||||||
|
) -> Any | None:
|
||||||
|
"""Fetch the raw Langfuse prompt *object* (not the compiled string).
|
||||||
|
|
||||||
|
Returns ``None`` when Langfuse is disabled or the prompt is not found.
|
||||||
|
Use this when you need to pass the prompt to ``start_observation(prompt=...)``
|
||||||
|
for linking the prompt to a trace in the Langfuse UI.
|
||||||
|
"""
|
||||||
|
lf = _get_client()
|
||||||
|
if lf is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
kwargs: dict[str, Any] = {
|
||||||
|
"name": name,
|
||||||
|
"cache_ttl_seconds": cache_ttl_seconds,
|
||||||
|
}
|
||||||
|
if version is not None:
|
||||||
|
kwargs["version"] = version
|
||||||
|
if label is not None:
|
||||||
|
kwargs["label"] = label
|
||||||
|
return lf.get_prompt(**kwargs)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("tracing: get_prompt_object(%s) failed: %s", name, exc)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def link_prompt_to_trace(
|
def link_prompt_to_trace(
|
||||||
span: Any,
|
span: Any,
|
||||||
prompt_name: str,
|
prompt_name: str,
|
||||||
@@ -239,19 +271,19 @@ def link_prompt_to_trace(
|
|||||||
version: int | None = None,
|
version: int | None = None,
|
||||||
label: str | None = None,
|
label: str | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Attach prompt metadata to a span/trace."""
|
"""Link a Langfuse managed prompt to a span/observation.
|
||||||
|
|
||||||
|
Uses the SDK v4 ``prompt=`` parameter so that the prompt version
|
||||||
|
appears linked in the Langfuse UI with metrics tracking.
|
||||||
|
"""
|
||||||
lf = _get_client()
|
lf = _get_client()
|
||||||
if lf is None or isinstance(span, _NullSpan):
|
if lf is None or isinstance(span, _NullSpan):
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
kwargs: dict[str, Any] = {"name": prompt_name}
|
prompt = get_prompt_object(prompt_name, version=version, label=label)
|
||||||
if version is not None:
|
if prompt is not None:
|
||||||
kwargs["version"] = version
|
span.update(prompt=prompt)
|
||||||
if label is not None:
|
|
||||||
kwargs["label"] = label
|
|
||||||
prompt = lf.get_prompt(**kwargs)
|
|
||||||
span.update(metadata={"prompt": {"name": prompt_name, "version": prompt.version}})
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("tracing: link_prompt_to_trace(%s) failed: %s", prompt_name, exc)
|
logger.warning("tracing: link_prompt_to_trace(%s) failed: %s", prompt_name, exc)
|
||||||
|
|
||||||
|
|||||||
77
services/chat/app/llm.py
Normal file
77
services/chat/app/llm.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
"""LLM factory — centralised model instantiation via LiteLLM.
|
||||||
|
|
||||||
|
Adapted from app/core/llm.py for the Chat Service.
|
||||||
|
Uses shared.config.settings instead of app.config.settings.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
from openai import AsyncOpenAI
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
from langchain_openai import ChatOpenAI
|
||||||
|
from langchain_litellm import ChatLiteLLM
|
||||||
|
|
||||||
|
from shared.config import settings
|
||||||
|
|
||||||
|
litellm.drop_params = True
|
||||||
|
|
||||||
|
warnings.filterwarnings(
|
||||||
|
"ignore",
|
||||||
|
message=r"PydanticSerializationUnexpectedValue\(Expected `ResponseAPIUsage`",
|
||||||
|
category=UserWarning,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _api_key_for_model(model: str) -> str | None:
|
||||||
|
if model.startswith("anthropic/"):
|
||||||
|
return settings.ANTHROPIC_API_KEY or None
|
||||||
|
if model.startswith("gemini/") or model.startswith("google/"):
|
||||||
|
return settings.GOOGLE_API_KEY or None
|
||||||
|
if model.startswith("cerebras/"):
|
||||||
|
return settings.CEREBRAS_API_KEY or None
|
||||||
|
if model.startswith("github/"):
|
||||||
|
return settings.GITHUB_TOKEN or None
|
||||||
|
if model.startswith("github_copilot/"):
|
||||||
|
return None
|
||||||
|
return settings.OPENAI_API_KEY or None
|
||||||
|
|
||||||
|
|
||||||
|
def get_llm(
|
||||||
|
*,
|
||||||
|
model: str | None = None,
|
||||||
|
temperature: float = 0,
|
||||||
|
callbacks: list | None = None,
|
||||||
|
) -> ChatOpenAI | ChatLiteLLM:
|
||||||
|
model = model or settings.LLM_MODEL
|
||||||
|
|
||||||
|
if settings.GITHUB_COPILOT_TOKEN_DIR:
|
||||||
|
os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR)
|
||||||
|
|
||||||
|
if settings.GITHUB_TOKEN:
|
||||||
|
os.environ.setdefault("GITHUB_TOKEN", settings.GITHUB_TOKEN)
|
||||||
|
|
||||||
|
if "/" in model:
|
||||||
|
return ChatLiteLLM(model=model, temperature=temperature, callbacks=callbacks)
|
||||||
|
|
||||||
|
return ChatOpenAI(
|
||||||
|
model=model,
|
||||||
|
temperature=temperature,
|
||||||
|
api_key=_api_key_for_model(model),
|
||||||
|
callbacks=callbacks,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def embed(text: str) -> list[float]:
|
||||||
|
model = settings.LLM_EMBED_MODEL
|
||||||
|
|
||||||
|
if model.startswith("github_copilot/") or "/" in model:
|
||||||
|
response = await litellm.aembedding(model=model, input=[text])
|
||||||
|
return response.data[0]["embedding"]
|
||||||
|
|
||||||
|
client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY)
|
||||||
|
response = await client.embeddings.create(model=model, input=text)
|
||||||
|
return response.data[0].embedding
|
||||||
@@ -62,6 +62,7 @@ class Settings(BaseSettings):
|
|||||||
ANTHROPIC_API_KEY: str = ""
|
ANTHROPIC_API_KEY: str = ""
|
||||||
GOOGLE_API_KEY: str = ""
|
GOOGLE_API_KEY: str = ""
|
||||||
CEREBRAS_API_KEY: str = ""
|
CEREBRAS_API_KEY: str = ""
|
||||||
|
GITHUB_TOKEN: str = ""
|
||||||
|
|
||||||
LLM_MODEL: str = "gpt-4o"
|
LLM_MODEL: str = "gpt-4o"
|
||||||
LLM_EMBED_MODEL: str = "text-embedding-3-small"
|
LLM_EMBED_MODEL: str = "text-embedding-3-small"
|
||||||
|
|||||||
@@ -33,6 +33,8 @@ def _api_key_for_model(model: str) -> str | None:
|
|||||||
return settings.GOOGLE_API_KEY or None
|
return settings.GOOGLE_API_KEY or None
|
||||||
if model.startswith("cerebras/"):
|
if model.startswith("cerebras/"):
|
||||||
return settings.CEREBRAS_API_KEY or None
|
return settings.CEREBRAS_API_KEY or None
|
||||||
|
if model.startswith("github/"):
|
||||||
|
return settings.GITHUB_TOKEN or None
|
||||||
if model.startswith("github_copilot/"):
|
if model.startswith("github_copilot/"):
|
||||||
return None
|
return None
|
||||||
return settings.OPENAI_API_KEY or None
|
return settings.OPENAI_API_KEY or None
|
||||||
@@ -49,6 +51,9 @@ def get_llm(
|
|||||||
if settings.GITHUB_COPILOT_TOKEN_DIR:
|
if settings.GITHUB_COPILOT_TOKEN_DIR:
|
||||||
os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR)
|
os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR)
|
||||||
|
|
||||||
|
if settings.GITHUB_TOKEN:
|
||||||
|
os.environ.setdefault("GITHUB_TOKEN", settings.GITHUB_TOKEN)
|
||||||
|
|
||||||
if "/" in model:
|
if "/" in model:
|
||||||
return ChatLiteLLM(model=model, temperature=temperature, callbacks=callbacks)
|
return ChatLiteLLM(model=model, temperature=temperature, callbacks=callbacks)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user