diff --git a/.gitignore b/.gitignore index 8e6f860..a098c93 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,6 @@ Thumbs.db # Claude Code .claude/ logs/ + +# Eval private test data +services/batch-agent/eval/fixtures/private_data/ diff --git a/app/config/settings.py b/app/config/settings.py index 03c9906..e566969 100644 --- a/app/config/settings.py +++ b/app/config/settings.py @@ -27,6 +27,7 @@ class Settings(BaseSettings): ANTHROPIC_API_KEY: str = "" GOOGLE_API_KEY: str = "" CEREBRAS_API_KEY: str = "" + GITHUB_TOKEN: str = "" LLM_MODEL: str = "gpt-4o" LLM_EMBED_MODEL: str = "text-embedding-3-small" diff --git a/app/core/llm.py b/app/core/llm.py index 1787ce9..cfd9d84 100644 --- a/app/core/llm.py +++ b/app/core/llm.py @@ -50,6 +50,8 @@ def _api_key_for_model(model: str) -> str | None: return settings.GOOGLE_API_KEY or None if model.startswith("cerebras/"): return settings.CEREBRAS_API_KEY or None + if model.startswith("github/"): + return settings.GITHUB_TOKEN or None if model.startswith("github_copilot/"): # GitHub Copilot uses OAuth device-flow tokens managed by LiteLLM. # No API key is required; returning None lets LiteLLM handle auth. @@ -83,6 +85,9 @@ def get_llm( if settings.GITHUB_COPILOT_TOKEN_DIR: os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR) + if settings.GITHUB_TOKEN: + os.environ.setdefault("GITHUB_TOKEN", settings.GITHUB_TOKEN) + # Use ChatLiteLLM for provider-prefixed models (github_copilot/, anthropic/, etc.) # so LiteLLM handles routing and auth. ChatOpenAI for plain OpenAI model names. if "/" in model: diff --git a/services/batch-agent/app/journey.py b/services/batch-agent/app/journey.py index 9dcafeb..fe6d8d0 100644 --- a/services/batch-agent/app/journey.py +++ b/services/batch-agent/app/journey.py @@ -80,17 +80,9 @@ def get_journey_session(session_id: str, user_id: str) -> JourneySession | None: _SYSTEM_PROMPT_TEMPLATE = """\ You are a friendly assistant helping a freelancer configure a data-extraction agent. Your job is to understand exactly what data the user wants to extract from their -local directory and produce a detailed prompt_template that a separate AI will use +local directory and produce a concise prompt_template that a separate AI will use as its instruction set. -The extraction agent already has this base behaviour built in: - - Reads each file using file-system tools. - - Creates records (tasks, notes, timelines, projects) via CRUD tools. - - Sets isAiSuggested=1 on every new record. - - Only extracts data explicitly present in the files — it never invents information. -The user's custom prompt is appended AFTER this base behaviour, so focus on -what to look for and how to map it — not on the general extraction mechanics. - You have access to file-system tools to explore the user's directory: - list_directory: to see folder structure - read_file_content: to peek at file contents @@ -99,38 +91,43 @@ You have access to file-system tools to explore the user's directory: The user's configured directory is: {directory} Target data types: {data_types} -IMPORTANT — project assignment is handled automatically by the main agent runner -before the custom prompt is ever used. You MUST NOT ask the user about projects, -projectId, or how to link records to projects. Never include projectId logic or -project creation instructions in the generated prompt_template. +IMPORTANT — project assignment is handled automatically. You MUST NOT ask the user +about projects, projectId, or how to link records to projects. Never include +projectId logic or project creation instructions in the generated prompt_template. Start by exploring the directory to understand its structure. Then ask concise, -focused questions one at a time. Cover these topics (not necessarily in this order): - 1. The type and format of the source content (confirmed by your exploration). - 2. How fields should be mapped (e.g. filename → task title). - 3. Priority or status rules (e.g. "urgent" keyword → high priority). - 4. Any special handling, date extraction, or exclusions. +focused questions one at a time. Cover only the topics relevant to the target +data types listed above: -Once you reach 90% confidence, output the final prompt_template between these exact -markers on their own lines: + 1. Content type and format — confirmed by your exploration. + 2. For TASKS (if in scope): field mapping for title, status, priority, content, + dueDate (where is the date found? what's the fallback when absent?), + and assignee (is there a person name to assign?). + 3. For NOTES when TASKS are also in scope: note vs task distinction — + what makes something a note rather than a task? + 4. For TIMELINES (if in scope): the date source — what marks a milestone or event? + 5. Exclusions and special handling applicable to the target data types. + +Keep asking focused questions until you are at least 90% confident. Then stop and +output the final prompt_template immediately, wrapped between these exact markers +on their own lines: {template_start} {template_end} -The prompt_template must be a self-contained instruction for an AI that reads files -and must perform CRUD operations using tools to create records. It should specify: - - What entity types to create (tasks, notes, timelines) — never projects. - - How to map file content to record fields (camelCase: title, status, priority, - dueDate, content, etc.) — never include projectId. - - That isAiSuggested must be set to 1 on every new record. - - Concrete examples of mappings based on what you discovered in the directory. +The prompt_template must be concise (bullet points, ~15–25 lines maximum). +Specify only: + - Scope: what files/content qualify and what entity types to create. + - Field mapping rules per entity type (camelCase fields: title, status, priority, + dueDate, content, assignee, etc.). + - dueDate rule (if tasks in scope): source and fallback behaviour. + - Note vs task rule (if both in scope): the criterion that separates them. + - Timeline date rule (if timelines in scope): what constitutes a timeline event. + - Exclusion/filtering rules. + - 2–3 concrete mapping examples based on what you discovered. -{existing_section}\ -Keep asking clarifying questions until you are at least 90% confident you have -enough information to generate an accurate prompt_template. Once you reach that -confidence level, stop asking and produce the final template immediately. -Begin by exploring the directory, then ask your first question.\ +{existing_section}Begin by exploring the directory, then ask your first question.\ """ @@ -152,8 +149,6 @@ def _build_system_prompt( variables={ "directory": directory, "data_types": ", ".join(data_types), - "template_start": _TEMPLATE_START, - "template_end": _TEMPLATE_END, "existing_section": existing_section, }, ) diff --git a/services/batch-agent/app/llm.py b/services/batch-agent/app/llm.py new file mode 100644 index 0000000..603e376 --- /dev/null +++ b/services/batch-agent/app/llm.py @@ -0,0 +1,76 @@ +"""LLM factory — centralised model instantiation via LiteLLM. + +Identical to services/chat/app/llm.py. Uses shared.config.settings. +""" + +from __future__ import annotations + +import os +import warnings + +from openai import AsyncOpenAI +import litellm + +from langchain_openai import ChatOpenAI +from langchain_litellm import ChatLiteLLM + +from shared.config import settings + +litellm.drop_params = True + +warnings.filterwarnings( + "ignore", + message=r"PydanticSerializationUnexpectedValue\(Expected `ResponseAPIUsage`", + category=UserWarning, +) + + +def _api_key_for_model(model: str) -> str | None: + if model.startswith("anthropic/"): + return settings.ANTHROPIC_API_KEY or None + if model.startswith("gemini/") or model.startswith("google/"): + return settings.GOOGLE_API_KEY or None + if model.startswith("cerebras/"): + return settings.CEREBRAS_API_KEY or None + if model.startswith("github/"): + return settings.GITHUB_TOKEN or None + if model.startswith("github_copilot/"): + return None + return settings.OPENAI_API_KEY or None + + +def get_llm( + *, + model: str | None = None, + temperature: float = 0, + callbacks: list | None = None, +) -> ChatOpenAI | ChatLiteLLM: + model = model or settings.LLM_MODEL + + if settings.GITHUB_COPILOT_TOKEN_DIR: + os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR) + + if settings.GITHUB_TOKEN: + os.environ.setdefault("GITHUB_TOKEN", settings.GITHUB_TOKEN) + + if "/" in model: + return ChatLiteLLM(model=model, temperature=temperature, callbacks=callbacks) + + return ChatOpenAI( + model=model, + temperature=temperature, + api_key=_api_key_for_model(model), + callbacks=callbacks, + ) + + +async def embed(text: str) -> list[float]: + model = settings.LLM_EMBED_MODEL + + if model.startswith("github_copilot/") or "/" in model: + response = await litellm.aembedding(model=model, input=[text]) + return response.data[0]["embedding"] + + client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY) + response = await client.embeddings.create(model=model, input=text) + return response.data[0].embedding diff --git a/services/batch-agent/app/redis_consumer.py b/services/batch-agent/app/redis_consumer.py index 8adb02f..cb2d8a3 100644 --- a/services/batch-agent/app/redis_consumer.py +++ b/services/batch-agent/app/redis_consumer.py @@ -138,6 +138,8 @@ async def _dispatch(user_id: str, message_data: dict[str, Any]) -> None: await _handle_journey_message(user_id, message_data) elif msg_type == "agent_trigger": await _handle_agent_trigger(user_id, message_data) + elif msg_type == "device_online": + logger.info("batch-agent: device_online user=%s device=%s", user_id, message_data.get("device_id", "?")) else: logger.warning("batch-agent: unknown message type %r from user=%s", msg_type, user_id) diff --git a/services/batch-agent/app/tracing.py b/services/batch-agent/app/tracing.py index 430edf1..75a0dbb 100644 --- a/services/batch-agent/app/tracing.py +++ b/services/batch-agent/app/tracing.py @@ -232,6 +232,38 @@ def compile_prompt( return fallback.format(**variables) +def get_prompt_object( + name: str, + *, + version: int | None = None, + label: str | None = None, + cache_ttl_seconds: int = 300, +) -> Any | None: + """Fetch the raw Langfuse prompt *object* (not the compiled string). + + Returns ``None`` when Langfuse is disabled or the prompt is not found. + Use this when you need to pass the prompt to ``start_observation(prompt=...)`` + for linking the prompt to a trace in the Langfuse UI. + """ + lf = _get_client() + if lf is None: + return None + + try: + kwargs: dict[str, Any] = { + "name": name, + "cache_ttl_seconds": cache_ttl_seconds, + } + if version is not None: + kwargs["version"] = version + if label is not None: + kwargs["label"] = label + return lf.get_prompt(**kwargs) + except Exception as exc: + logger.warning("tracing: get_prompt_object(%s) failed: %s", name, exc) + return None + + def link_prompt_to_trace( span: Any, prompt_name: str, @@ -239,19 +271,19 @@ def link_prompt_to_trace( version: int | None = None, label: str | None = None, ) -> None: - """Attach prompt metadata to a span/trace.""" + """Link a Langfuse managed prompt to a span/observation. + + Uses the SDK v4 ``prompt=`` parameter so that the prompt version + appears linked in the Langfuse UI with metrics tracking. + """ lf = _get_client() if lf is None or isinstance(span, _NullSpan): return try: - kwargs: dict[str, Any] = {"name": prompt_name} - if version is not None: - kwargs["version"] = version - if label is not None: - kwargs["label"] = label - prompt = lf.get_prompt(**kwargs) - span.update(metadata={"prompt": {"name": prompt_name, "version": prompt.version}}) + prompt = get_prompt_object(prompt_name, version=version, label=label) + if prompt is not None: + span.update(prompt=prompt) except Exception as exc: logger.warning("tracing: link_prompt_to_trace(%s) failed: %s", prompt_name, exc) diff --git a/services/chat/app/llm.py b/services/chat/app/llm.py new file mode 100644 index 0000000..b979362 --- /dev/null +++ b/services/chat/app/llm.py @@ -0,0 +1,77 @@ +"""LLM factory — centralised model instantiation via LiteLLM. + +Adapted from app/core/llm.py for the Chat Service. +Uses shared.config.settings instead of app.config.settings. +""" + +from __future__ import annotations + +import os +import warnings + +from openai import AsyncOpenAI +import litellm + +from langchain_openai import ChatOpenAI +from langchain_litellm import ChatLiteLLM + +from shared.config import settings + +litellm.drop_params = True + +warnings.filterwarnings( + "ignore", + message=r"PydanticSerializationUnexpectedValue\(Expected `ResponseAPIUsage`", + category=UserWarning, +) + + +def _api_key_for_model(model: str) -> str | None: + if model.startswith("anthropic/"): + return settings.ANTHROPIC_API_KEY or None + if model.startswith("gemini/") or model.startswith("google/"): + return settings.GOOGLE_API_KEY or None + if model.startswith("cerebras/"): + return settings.CEREBRAS_API_KEY or None + if model.startswith("github/"): + return settings.GITHUB_TOKEN or None + if model.startswith("github_copilot/"): + return None + return settings.OPENAI_API_KEY or None + + +def get_llm( + *, + model: str | None = None, + temperature: float = 0, + callbacks: list | None = None, +) -> ChatOpenAI | ChatLiteLLM: + model = model or settings.LLM_MODEL + + if settings.GITHUB_COPILOT_TOKEN_DIR: + os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR) + + if settings.GITHUB_TOKEN: + os.environ.setdefault("GITHUB_TOKEN", settings.GITHUB_TOKEN) + + if "/" in model: + return ChatLiteLLM(model=model, temperature=temperature, callbacks=callbacks) + + return ChatOpenAI( + model=model, + temperature=temperature, + api_key=_api_key_for_model(model), + callbacks=callbacks, + ) + + +async def embed(text: str) -> list[float]: + model = settings.LLM_EMBED_MODEL + + if model.startswith("github_copilot/") or "/" in model: + response = await litellm.aembedding(model=model, input=[text]) + return response.data[0]["embedding"] + + client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY) + response = await client.embeddings.create(model=model, input=text) + return response.data[0].embedding diff --git a/shared/config.py b/shared/config.py index dd41ee2..567d8fe 100644 --- a/shared/config.py +++ b/shared/config.py @@ -62,6 +62,7 @@ class Settings(BaseSettings): ANTHROPIC_API_KEY: str = "" GOOGLE_API_KEY: str = "" CEREBRAS_API_KEY: str = "" + GITHUB_TOKEN: str = "" LLM_MODEL: str = "gpt-4o" LLM_EMBED_MODEL: str = "text-embedding-3-small" diff --git a/shared/llm.py b/shared/llm.py index e2ed26f..cae1491 100644 --- a/shared/llm.py +++ b/shared/llm.py @@ -33,6 +33,8 @@ def _api_key_for_model(model: str) -> str | None: return settings.GOOGLE_API_KEY or None if model.startswith("cerebras/"): return settings.CEREBRAS_API_KEY or None + if model.startswith("github/"): + return settings.GITHUB_TOKEN or None if model.startswith("github_copilot/"): return None return settings.OPENAI_API_KEY or None @@ -49,6 +51,9 @@ def get_llm( if settings.GITHUB_COPILOT_TOKEN_DIR: os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR) + if settings.GITHUB_TOKEN: + os.environ.setdefault("GITHUB_TOKEN", settings.GITHUB_TOKEN) + if "/" in model: return ChatLiteLLM(model=model, temperature=temperature, callbacks=callbacks)