Files
api/app/core/agent_runner.py
Roberto 1ccb0282fe refactor(models): rename Agent classes to Scout
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-15 23:52:29 +02:00

1052 lines
40 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Agent run orchestrator.
Drives two agent types:
* **Local directory agent** — V2 unified flow per file:
Phase A (Detect + Preprocess, zero LLM): Python detects the content type
and strips markup/noise, producing clean text + metadata.
Phase B (Single LLM call with tools): the LLM identifies the project,
checks for duplicates via list_* tools, and creates/updates records.
``items_created`` is counted from ``create_*`` tool calls.
* **Cloud connector agent** — fetches data from third-party APIs (Gmail,
Teams, Outlook) and pushes extracted items to Electron.
Usage
-----
Background tasks are spawned with ``asyncio.create_task()``::
asyncio.create_task(run_local_agent(user_id, config, run_log, device_manager))
asyncio.create_task(trigger_pending_runs(user_id, device_id, device_manager))
The ``trigger_pending_runs`` function is called by the device WS endpoint
when Electron sends ``device_hello``, so any overdue runs fire immediately
when the device reconnects.
"""
from __future__ import annotations
import asyncio
import json
import logging
import os
from datetime import datetime, timedelta, timezone
from typing import Any
from croniter import croniter
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
from sqlalchemy import select
from app.agents.filesystem_agent import FILESYSTEM_TOOLS
from app.agents.note_agent import NOTE_TOOLS
from app.agents.project_agent import PROJECT_TOOLS
from app.agents.task_agent import TASK_TOOLS
from app.agents.timeline_agent import TIMELINE_TOOLS
from app.core.device_manager import DeviceConnectionManager
from app.core.langfuse_client import compile_prompt, extract_usage, get_langfuse, get_prompt_or_fallback, langfuse_context
from app.core.llm import get_agent_llm, model_for_agent
from app.core.preprocessors import detect_content_type, preprocess
from app.core.ws_context import clear_client_executor, execute_on_client, set_client_executor
from app.db import async_session
from app.models import ScoutRunLog, CloudScoutConfig, LocalScoutConfig
logger = logging.getLogger(__name__)
# ── Concurrency guard ─────────────────────────────────────────────────────
# Tracks agent IDs that currently have a run in progress.
# Prevents multiple simultaneous runs of the same agent within a single process.
_running_agents: set[str] = set()
def is_agent_running(agent_id: str) -> bool:
"""Return ``True`` if *agent_id* already has a run in progress."""
return agent_id in _running_agents
# ── Timeouts ───────────────────────────────────────────────────────────────
# Max seconds to wait for a single tool-call round-trip (FE → BE).
_TOOL_CALL_TIMEOUT: int = 30
# Max LLM reasoning steps for Step 2 processing.
_MAX_PROCESSING_STEPS: int = 12
# Max directory recursion depth during scan.
_MAX_SCAN_DEPTH: int = 5
# ── Data-type to tool mapping ─────────────────────────────────────────────
_DATA_TYPE_TOOLS: dict[str, list[Any]] = {
"tasks": TASK_TOOLS,
"notes": NOTE_TOOLS,
"timelines": TIMELINE_TOOLS,
"timelineEvents": TIMELINE_TOOLS,
"projects": PROJECT_TOOLS,
}
# ── V2: Unified processing prompt (hot-swappable via Langfuse "unified_processing") ──
_UNIFIED_PROCESSING_PROMPT = """\
You are a data extraction assistant for a freelance project management tool.
## Your process (follow this exact order)
### 1. Identify the project
File: {filename}
{metadata_section}
Existing projects:
{projects_list}
Match this file to an existing project using the filename and content clues.
If no project matches, {no_match_behavior}.
### 2. Check existing records
Once you identify the project, use list_tasks / list_notes / list_timelines
(filtered by projectId) to see what already exists.
NEVER create a record that already exists under the same or similar title.
### 3. Extract and create / update
{extraction_rules}
### Rules
- Set isAiSuggested=1 on every new record.
- Set projectId on every record (use the id from the project list above).
- Update existing records when a match is found by title or topic.
- Do NOT invent data — only extract what is clearly stated in the content.
- Target entity types: {data_types}.
{global_rules}
"""
# ── Cloud processing prompt (kept separate for cloud agent) ───────────────
_BATCH_CLOUD_PROCESSING_PROMPT = """\
You are a data extraction and management assistant for a freelance project
management tool.
Available tools:
Filesystem : read_file_content, list_directory, get_file_metadata
Tasks : list_tasks, create_task, update_task, add_task_comment
Notes : list_notes, get_note, create_note, update_note
Timelines : list_timelines, create_timeline, update_timeline
Projects : list_all_projects, get_project, create_project, update_project
Your task:
1. Read the full content of each file below using read_file_content.
2. For each piece of information found, ALWAYS try to match and update an
existing record before creating a new one.
3. ONLY act on these entity types: {data_types}.
4. Do NOT invent data. Only extract what is clearly present in the files.
5. If a file contains no relevant data for the target entity types, skip it.
{project_context}
Files to process:
{file_list}
{custom_prompt_section}
After processing all files, respond with a brief summary of what you updated
and what you created.
"""
# ── Cron helper ────────────────────────────────────────────────────────────
def _is_overdue(schedule_cron: str, last_run_at: datetime | None) -> bool:
"""Return ``True`` if the next scheduled run time has already passed.
Always validates the cron expression first — an invalid expression returns
``False`` (fail-safe: never trigger an unparseable schedule).
"""
try:
now = datetime.now(timezone.utc)
if last_run_at is None:
croniter(schedule_cron, now)
return True
ts = last_run_at
if ts.tzinfo is None:
ts = ts.replace(tzinfo=timezone.utc)
cron = croniter(schedule_cron, ts)
next_run: datetime = cron.get_next(datetime)
return now >= next_run
except Exception as exc:
logger.warning("agent_runner: cannot parse cron %r: %s", schedule_cron, exc)
return False
# ── WS executor for agent context ─────────────────────────────────────────
def _make_agent_executor(
user_id: str,
device_mgr: DeviceConnectionManager,
run_context: dict | None = None,
) -> Any:
"""Create a WS callback for ``set_client_executor()`` so that all tools
can use ``execute_on_client()`` during an agent run.
If *run_context* is provided it is attached to every ``tool_call`` frame
so the Electron client can attribute actions to the correct agent run.
"""
async def _executor(payload: dict) -> dict:
payload["type"] = "tool_call"
if run_context:
payload["run_context"] = run_context
call_id = payload["id"]
fut = device_mgr.create_pending_call(user_id, call_id)
await device_mgr.send_frame(user_id, payload)
return await asyncio.wait_for(fut, timeout=_TOOL_CALL_TIMEOUT)
return _executor
# ── LLM tool-calling loop ─────────────────────────────────────────────────
def _as_text(content: Any) -> str:
if content is None:
return ""
if isinstance(content, str):
return content
if isinstance(content, list):
parts: list[str] = []
for item in content:
if isinstance(item, str):
parts.append(item)
elif isinstance(item, dict):
text = item.get("text")
if isinstance(text, str):
parts.append(text)
return "".join(parts)
return str(content)
async def _run_agent_with_tools(
*,
system_prompt: str,
user_message: str,
tools: list[Any],
max_steps: int,
user_id: str = "",
session_id: str = "",
langfuse_prompt: Any = None,
agent_name: str = "batch-agent",
_tool_calls_out: list[str] | None = None,
) -> str:
"""Run an LLM agent with tool-calling, returning the final text response.
If *_tool_calls_out* is provided, the name of every tool called during the
run is appended to it (used by the caller to count ``create_*`` calls).
"""
lf = get_langfuse()
llm = get_agent_llm(agent_name)
llm_with_tools = llm.bind_tools(tools)
messages: list[Any] = [
SystemMessage(content=system_prompt),
HumanMessage(content=user_message),
]
tool_map = {tool_def.name: tool_def for tool_def in tools}
_lf_ctx = langfuse_context(user_id=user_id or None, session_id=session_id or None)
_lf_ctx.__enter__()
_span_ctx = (
lf.start_as_current_observation(
as_type="span",
name=agent_name,
metadata={"user_id": user_id} if user_id else None,
input=user_message,
)
if lf else None
)
_span = _span_ctx.__enter__() if _span_ctx else None
try:
for _ in range(max_steps):
_gen_ctx = (
lf.start_as_current_observation(
as_type="generation",
name=f"{agent_name}-llm",
model=model_for_agent(agent_name),
prompt=langfuse_prompt,
input=messages,
)
if lf else None
)
_gen = _gen_ctx.__enter__() if _gen_ctx else None
response: AIMessage = await llm_with_tools.ainvoke(messages)
if _gen_ctx:
_gen.update(output=_as_text(response.content), usage_details=extract_usage(response))
_gen_ctx.__exit__(None, None, None)
messages.append(response)
if not response.tool_calls:
final_text = _as_text(response.content)
if _span:
_span.update(output=final_text)
return final_text
for call in response.tool_calls:
call_name = str(call.get("name", ""))
call_args = call.get("args", {})
logger.info(
"agent_runner: tool_call name=%s args=%s",
call_name,
json.dumps(call_args, ensure_ascii=True)[:800],
)
if _tool_calls_out is not None:
_tool_calls_out.append(call_name)
tool_fn = tool_map.get(call_name)
if tool_fn is None:
tool_output = f"Unknown tool: {call_name}"
else:
tool_output = await tool_fn.ainvoke(call_args)
logger.info(
"agent_runner: tool_result name=%s output=%s",
call_name,
str(tool_output)[:200],
)
messages.append(ToolMessage(content=str(tool_output), tool_call_id=call["id"]))
final = await llm.ainvoke(messages)
final_text = _as_text(final.content)
if _span:
_span.update(output=final_text)
return final_text
finally:
if _span_ctx:
_span_ctx.__exit__(None, None, None)
_lf_ctx.__exit__(None, None, None)
if lf:
lf.flush()
# ── Tool list builder ─────────────────────────────────────────────────────
def _build_processing_tools(data_types: list[str]) -> list[Any]:
"""Build the tool list for processing based on user's data_types selection."""
tools: list[Any] = list(FILESYSTEM_TOOLS)
for dt in data_types:
dt_tools = _DATA_TYPE_TOOLS.get(dt)
if dt_tools:
tools.extend(dt_tools)
return tools
# ── Code-based directory scanner ─────────────────────────────────────────
async def _scan_directories(
paths: list[str],
extensions: list[str],
last_run_at: datetime | None,
) -> list[str]:
"""Walk directories via WS tool calls and return filtered file paths.
Recursion is capped at ``_MAX_SCAN_DEPTH``. Files are filtered by
extension (if configured) and by modification date (if ``last_run_at``
is set). Fails open: if metadata cannot be read, the file is included.
"""
all_files: list[str] = []
ext_set = {e.lstrip(".").lower() for e in extensions} if extensions else set()
async def _walk(path: str, depth: int) -> None:
if depth > _MAX_SCAN_DEPTH:
return
try:
result = await execute_on_client(action="list_directory", data={"path": path})
except Exception as exc:
logger.warning("agent_runner: list_directory failed %r: %s", path, exc)
return
for entry in result.get("entries", []):
entry_path = entry.get("path", "")
if not entry_path:
continue
if entry.get("type") == "directory":
await _walk(entry_path, depth + 1)
elif entry.get("type") == "file":
if ext_set:
dot_pos = entry_path.rfind(".")
file_ext = entry_path[dot_pos + 1:].lower() if dot_pos != -1 else ""
if file_ext not in ext_set:
continue
all_files.append(entry_path)
for root in paths:
await _walk(root, depth=0)
if last_run_at is None:
return all_files
# Filter by modification date.
last_run_ms = int(last_run_at.timestamp() * 1000)
filtered: list[str] = []
for file_path in all_files:
try:
meta = await execute_on_client(action="get_file_metadata", data={"path": file_path})
# FE sends snake_case keys on the wire (toSnakeCase transform)
modified_at = meta.get("modified_at") or meta.get("modifiedAt")
if modified_at is None:
filtered.append(file_path)
continue
if isinstance(modified_at, (int, float)):
mod_ms = int(modified_at)
else:
mod_ms = int(datetime.fromisoformat(str(modified_at)).timestamp() * 1000)
if mod_ms > last_run_ms:
filtered.append(file_path)
except Exception:
filtered.append(file_path) # fail-open
return filtered
# ── Code-based entity fetchers ────────────────────────────────────────────
async def _fetch_projects() -> list[dict]:
"""Fetch all projects from the Electron client via WS."""
try:
result = await execute_on_client(action="select", table="projects")
return result.get("rows", [])
except Exception as exc:
logger.warning("agent_runner: failed to fetch projects: %s", exc)
return []
_DOMAIN_TABLE: dict[str, str] = {
"tasks": "tasks",
"notes": "notes",
"timelines": "timelines",
"projects": "projects",
}
async def _fetch_domain_entities(domain: str, project_id: str) -> list[dict]:
"""Fetch existing rows for a domain, scoped to a project where applicable."""
table = _DOMAIN_TABLE.get(domain)
if not table:
return []
filters: dict[str, Any] = {}
if project_id != "standalone" and domain != "projects":
filters["projectId"] = project_id
try:
result = await execute_on_client(
action="select",
table=table,
filters=filters if filters else None,
)
return result.get("rows", [])
except Exception as exc:
logger.warning("agent_runner: failed to fetch %s: %s", domain, exc)
return []
def _format_entities_for_context(domain: str, rows: list[dict]) -> str:
"""Format existing entity rows as a readable context block for the LLM.
Includes enough detail per record for the LLM to make a confident
update-vs-create decision without overwhelming the context.
Note content is truncated to 200 chars to stay within token budget.
"""
if not rows:
return f"No existing {domain}."
lines: list[str] = []
for r in rows:
if domain == "tasks":
desc = r.get("description") or ""
desc_part = f"{desc[:120]}" if desc else ""
assignee = r.get("assignee") or r.get("assignees") or ""
due = r.get("dueDate") or r.get("due_date") or ""
meta = ", ".join(filter(None, [
f"priority: {r.get('priority', '')}" if r.get("priority") else "",
f"assignee: {assignee}" if assignee else "",
f"due: {due}" if due else "",
]))
lines.append(
f" - [{r.get('status', '?')}] {r.get('title', '')}{desc_part}"
f" ({meta}, id: {r['id']})"
)
elif domain == "notes":
snippet = (r.get("content") or "")[:200].replace("\n", " ")
snippet_part = f"\n Preview: {snippet}" if snippet else ""
lines.append(
f" - {r.get('title', '')} (id: {r['id']}){snippet_part}"
)
elif domain == "timelines":
lines.append(
f" - {r.get('title', '')} date={r.get('date', '')} (id: {r['id']})"
)
elif domain == "projects":
summary = (r.get("aiSummary") or r.get("ai_summary") or "")[:120]
summary_part = f"{summary}" if summary else ""
lines.append(
f" - {r.get('name', '')} [{r.get('status', '')}]{summary_part}"
f" (id: {r['id']})"
)
return f"Existing {domain}:\n" + "\n".join(lines)
# ── V2 helper functions ───────────────────────────────────────────────────
def _format_projects(projects: list[dict]) -> str:
"""Format the project list for the unified system prompt."""
if not projects:
return " (no projects yet)"
lines: list[str] = []
for p in projects:
summary = (p.get("aiSummary") or p.get("ai_summary") or "").strip()
summary_part = f"{summary[:100]}" if summary else ""
lines.append(
f" - id={p['id']} | name={p.get('name', '')} | "
f"status={p.get('status', '')}{summary_part}"
)
return "\n".join(lines)
def _format_metadata(metadata: dict) -> str:
"""Format preprocessor metadata as a compact context block."""
if not metadata:
return ""
parts: list[str] = []
for key in ("subject", "from", "to", "date"):
if metadata.get(key):
parts.append(f"{key.capitalize()}: {metadata[key]}")
# any remaining keys
for key, val in metadata.items():
if key not in ("subject", "from", "to", "date") and val:
parts.append(f"{key}: {val}")
return "\n".join(parts)
def _get_extraction_rules(agent_config: dict, content_type: str) -> str:
"""Return the extraction_prompt for *content_type* from *agent_config*.
Falls back to a generic instruction when the type is not configured.
"""
for ct in agent_config.get("content_types", []):
if ct.get("id") == content_type:
prompt = ct.get("extraction_prompt", "").strip()
if prompt:
return prompt
return (
"Extract relevant information as tasks (action items), notes "
"(informational content), or timelines (dated events)."
)
def _get_no_match_behavior(agent_config: dict) -> str:
"""Derive the 'no project match' instruction from global_rules."""
rules = agent_config.get("global_rules", [])
for rule in rules:
lower = rule.lower()
if "no project" in lower or "no match" in lower or "skip" in lower:
return rule
return "create a new project with a concise name derived from the file content"
# ── Local agent runner (V2 — unified per-file flow) ───────────────────────
async def run_local_agent(
user_id: str,
config: LocalScoutConfig,
run_log: ScoutRunLog,
device_mgr: DeviceConnectionManager,
run_context: dict | None = None,
) -> None:
"""Execute a local directory agent run — V2 unified flow.
Phase A — Detect + Preprocess (zero LLM, per file):
Python detects the content type from filename + content patterns and
runs the appropriate handler (e.g. email_html) to produce clean text
and structured metadata.
Phase B — Single LLM call with tools (per file):
One LLM call handles project identification, duplicate checking, and
record creation/update. ``create_*`` tool calls are counted to
produce the accurate ``items_created`` metric.
"""
run_id = run_log.id
agent_id = (run_context or {}).get("agent_id") or config.id
_running_agents.add(agent_id)
# ── Device online check ─────────────────────────────────────────
target_device_id = config.device_id.strip() if isinstance(config.device_id, str) else ""
is_online = (
device_mgr.is_online(user_id, target_device_id)
if target_device_id
else device_mgr.is_online(user_id)
)
if not is_online:
logger.info(
"agent_runner: skip run=%s — device %r offline for user=%s",
run_id,
target_device_id or "<any>",
user_id,
)
await _finalize_run(
run_log,
status="error",
errors=[f"Device {target_device_id or '<any>'!r} is not connected"],
)
return
# ── Set up WS executor for tools ────────────────────────────────
executor = _make_agent_executor(user_id, device_mgr, run_context)
set_client_executor(executor)
errors: list[str] = []
items_processed = 0
items_created = 0
agent_config: dict = config.scout_config or {}
processing_tools = _build_processing_tools(config.data_types)
try:
# ── Code: scan directories ───────────────────────────────────
file_paths = await _scan_directories(
paths=config.directory_paths,
extensions=config.file_extensions or [],
last_run_at=config.last_run_at,
)
logger.info(
"agent_runner: run=%s found %d file(s) after filtering", run_id, len(file_paths)
)
if not file_paths:
await _finalize_run(run_log, status="success", items_processed=0, items_created=0)
return
# ── Code: fetch all projects once ────────────────────────────
projects = await _fetch_projects()
projects_block = _format_projects(projects)
# Prompt template + Langfuse version linking (hot-swappable from UI).
unified_template, prompt_obj = get_prompt_or_fallback(
"unified_processing", _UNIFIED_PROCESSING_PROMPT
)
for file_path in file_paths:
try:
# ── Phase A: read + detect + preprocess ─────────────
file_result = await execute_on_client(
action="read_file_content", data={"path": file_path}
)
raw_content: str = file_result.get("content", "")
if not raw_content.strip():
logger.debug(
"agent_runner: run=%s skipping empty file %r", run_id, file_path
)
continue
items_processed += 1
filename = os.path.basename(file_path)
content_type = detect_content_type(filename, raw_content)
preprocessed = preprocess(content_type, raw_content)
logger.info(
"agent_runner: run=%s file=%r content_type=%s clean_len=%d",
run_id, file_path, content_type, len(preprocessed.clean_text),
)
# ── Phase B: single LLM call ─────────────────────────
extraction_rules = _get_extraction_rules(agent_config, content_type)
no_match_behavior = _get_no_match_behavior(agent_config)
base_global_rules = list(agent_config.get("global_rules", []))
if "notes" in config.data_types:
base_global_rules.append(
"For notes: when updating an existing note use `propose_note_edit` "
"(type=append/insert/replace) so the user can review AI changes. "
"Only call `update_note` for complete content replacement without review."
)
global_rules_lines = "\n".join(f"- {r}" for r in base_global_rules)
metadata_section = _format_metadata(preprocessed.metadata)
system_prompt = compile_prompt(
unified_template,
prompt_obj,
filename=filename,
metadata_section=metadata_section,
projects_list=projects_block,
no_match_behavior=no_match_behavior,
extraction_rules=extraction_rules,
global_rules=global_rules_lines,
data_types=", ".join(config.data_types),
)
user_message = (
f"Process this file and extract relevant information.\n\n"
f"File: {file_path}\n\n"
f"Content:\n{preprocessed.clean_text}"
)
file_tool_calls: list[str] = []
result_text = await _run_agent_with_tools(
system_prompt=system_prompt,
user_message=user_message,
tools=processing_tools,
max_steps=_MAX_PROCESSING_STEPS,
user_id=user_id,
session_id=run_id,
langfuse_prompt=prompt_obj,
agent_name="unified-processor",
_tool_calls_out=file_tool_calls,
)
file_created = sum(
1 for name in file_tool_calls if name.startswith("create_")
)
items_created += file_created
# Refresh project list when a project was created so
# subsequent files see it in the prompt context.
if "create_project" in file_tool_calls:
projects = await _fetch_projects()
projects_block = _format_projects(projects)
logger.info(
"agent_runner: run=%s file=%r created=%d result=%s",
run_id, file_path, file_created, result_text[:200],
)
except Exception as exc:
errors.append(f"Error processing '{file_path}': {exc}")
logger.error(
"agent_runner: run=%s file=%r failed: %s", run_id, file_path, exc
)
except Exception as exc:
errors.append(f"Agent run failed: {exc}")
logger.error("agent_runner: run=%s failed: %s", run_id, exc)
finally:
_running_agents.discard(agent_id)
clear_client_executor()
# ── Finalise ────────────────────────────────────────────────────
if errors and items_processed == 0:
final_status = "error"
elif errors:
final_status = "partial"
else:
final_status = "success"
await _finalize_run(
run_log,
status=final_status,
items_processed=items_processed,
items_created=items_created,
errors=errors,
)
logger.info(
"agent_runner: run=%s done status=%s processed=%d created=%d errors=%d",
run_id,
final_status,
items_processed,
items_created,
len(errors),
)
# Notify Electron that the run is complete.
if run_context and device_mgr.is_online(user_id):
try:
await device_mgr.send_frame(user_id, {
"type": "run_complete",
"run_context": run_context,
"status": final_status,
})
except Exception as exc:
logger.warning(
"agent_runner: run=%s failed to send run_complete: %s", run_id, exc
)
# ── Cloud agent runner ─────────────────────────────────────────────────────
_CLOUD_DEFAULT_LOOKBACK_DAYS: int = 7
async def run_cloud_agent(
user_id: str,
config: CloudScoutConfig,
run_log: ScoutRunLog,
device_mgr: DeviceConnectionManager,
) -> None:
"""Execute a cloud connector agent run end-to-end.
Steps:
1. Verify the user's device is online.
2. Decrypt the stored OAuth token from ``config.oauth_token_encrypted``.
3. Instantiate the provider client (Gmail or MS Graph).
4. Fetch messages/emails since ``config.last_run_at`` (or 7 days ago for
the first run) applying ``config.filter_config`` filters.
5. For each message/email call the LLM to extract structured items.
6. Push each item to Electron as an ``insert`` tool-call.
7. If the provider refreshed its access token, re-encrypt and write it
back to ``config.oauth_token_encrypted``.
8. Persist the run outcome via ``_finalize_run``.
"""
run_id = run_log.id
# ── 1. Device online check ─────────────────────────────────────────
if not device_mgr.is_online(user_id):
logger.info(
"agent_runner: skip cloud run=%s — no device online for user=%s",
run_id,
user_id,
)
await _finalize_run(
run_log,
status="error",
errors=["No connected device — cloud agent results cannot be delivered"],
)
return
# ── 2. Decrypt OAuth token ─────────────────────────────────────────
from app.integrations import decrypt_token, encrypt_token, get_provider
if not config.oauth_token_encrypted:
await _finalize_run(
run_log,
status="error",
errors=[f"No OAuth token stored for cloud agent '{config.name}'"],
)
return
try:
credentials_info = decrypt_token(config.oauth_token_encrypted)
except ValueError as exc:
logger.error("agent_runner: failed to decrypt OAuth token for agent %s: %s", config.id, exc)
await _finalize_run(
run_log,
status="error",
errors=[f"Failed to decrypt OAuth token: {exc}"],
)
return
# ── 3. Instantiate provider client ────────────────────────────────
try:
provider = get_provider(config.provider, credentials_info)
except ValueError as exc:
await _finalize_run(run_log, status="error", errors=[str(exc)])
return
# ── 4. Fetch messages ─────────────────────────────────────────────
since: datetime | None = config.last_run_at
if since is None:
since = datetime.now(timezone.utc) - timedelta(days=_CLOUD_DEFAULT_LOOKBACK_DAYS)
if since.tzinfo is None:
since = since.replace(tzinfo=timezone.utc)
errors: list[str] = []
items_processed = 0
items_created = 0
try:
if config.provider == "gmail":
raw_messages = await provider.fetch_messages( # type: ignore[union-attr]
filter_config=config.filter_config,
since=since,
)
elif config.provider == "outlook":
raw_messages = await provider.fetch_emails( # type: ignore[union-attr]
filter_config=config.filter_config,
since=since,
)
elif config.provider == "teams":
raw_messages = await provider.fetch_messages( # type: ignore[union-attr]
filter_config=config.filter_config,
since=since,
)
else:
raw_messages = []
except RuntimeError as exc:
logger.error(
"agent_runner: provider fetch failed for cloud agent %s: %s", config.id, exc
)
await _finalize_run(
run_log,
status="error",
errors=[f"Provider fetch failed: {exc}"],
update_config_last_run=True,
config_id=config.id,
config_type="cloud",
)
return
logger.info(
"agent_runner: cloud agent %s fetched %d item(s) from %s for user=%s",
config.id,
len(raw_messages),
config.provider,
user_id,
)
# ── 56. Extract + insert via LLM with tools ─────────────────────
executor = _make_agent_executor(user_id, device_mgr)
set_client_executor(executor)
try:
processing_tools = _build_processing_tools(config.data_types)
custom_section = (
f"User instructions:\n{config.prompt_template}"
if config.prompt_template
else ""
)
for msg in raw_messages:
content_text = msg.as_text
if not content_text:
continue
items_processed += 1
cloud_template, cloud_prompt_obj = get_prompt_or_fallback(
"batch_cloud_processing", _BATCH_CLOUD_PROCESSING_PROMPT
)
processing_prompt = compile_prompt(
cloud_template,
cloud_prompt_obj,
data_types=", ".join(config.data_types),
project_context="Determine the appropriate project from the message context.",
file_list=f"Message from {config.provider} (id: {msg.id})",
custom_prompt_section=custom_section,
)
try:
await _run_agent_with_tools(
system_prompt=processing_prompt,
user_message=f"Process this message content:\n\n{content_text[:8000]}",
tools=processing_tools,
max_steps=_MAX_PROCESSING_STEPS,
user_id=user_id,
session_id=run_id,
langfuse_prompt=cloud_prompt_obj,
agent_name="cloud-processor",
)
except Exception as exc:
errors.append(f"LLM processing error for message {msg.id!r}: {exc}")
finally:
clear_client_executor()
# ── 7. Persist refreshed token (if any) ───────────────────────────
refreshed = getattr(provider, "refreshed_credentials", None)
if refreshed:
try:
new_encrypted = encrypt_token(refreshed)
async with async_session() as db:
cfg_result = await db.execute(
select(CloudScoutConfig).where(CloudScoutConfig.id == config.id)
)
cfg_row = cfg_result.scalar_one_or_none()
if cfg_row:
cfg_row.oauth_token_encrypted = new_encrypted
await db.commit()
logger.debug("agent_runner: refreshed OAuth token persisted for agent %s", config.id)
except Exception as exc:
logger.warning(
"agent_runner: failed to persist refreshed token for agent %s: %s",
config.id,
exc,
)
# ── 8. Finalise ────────────────────────────────────────────────────
if errors and items_created == 0:
final_status = "error"
elif errors:
final_status = "partial"
else:
final_status = "success"
await _finalize_run(
run_log,
status=final_status,
items_processed=items_processed,
items_created=items_created,
errors=errors,
update_config_last_run=True,
config_id=config.id,
config_type="cloud",
)
logger.info(
"agent_runner: cloud run=%s done status=%s processed=%d created=%d errors=%d",
run_id,
final_status,
items_processed,
items_created,
len(errors),
)
# ── Pending-run trigger ─────────────────────────────────────────────────────
async def trigger_pending_runs(
user_id: str,
device_id: str,
device_mgr: DeviceConnectionManager,
) -> None:
"""Dispatch any overdue agent runs after an Electron device connects.
Called as a background task from the device WS endpoint on ``device_hello``.
"""
logger.info(
"agent_runner: pending-run scan skipped for user=%s device=%s (client-owned agent config)",
user_id,
device_id,
)
return
# ── Internal helper ─────────────────────────────────────────────────────────
async def _finalize_run(
run_log: ScoutRunLog,
*,
status: str,
items_processed: int = 0,
items_created: int = 0,
errors: list[str] | None = None,
update_config_last_run: bool = False,
config_id: str | None = None,
config_type: str | None = None,
) -> None:
"""Persist the run outcome and optionally update ``last_run_at`` on the config."""
now = datetime.now(timezone.utc)
try:
async with async_session() as db:
managed = await db.merge(run_log)
managed.status = status
managed.items_processed = items_processed
managed.items_created = items_created
managed.errors = errors or []
managed.completed_at = now
if update_config_last_run and config_id:
if config_type == "local":
cfg_result = await db.execute(
select(LocalScoutConfig).where(LocalScoutConfig.id == config_id)
)
cfg = cfg_result.scalar_one_or_none()
if cfg:
cfg.last_run_at = now
elif config_type == "cloud":
cfg_result = await db.execute(
select(CloudScoutConfig).where(CloudScoutConfig.id == config_id)
)
cfg = cfg_result.scalar_one_or_none()
if cfg:
cfg.last_run_at = now
await db.commit()
except Exception as exc:
logger.error(
"agent_runner: failed to finalize run_log=%s: %s", run_log.id, exc
)