"""Agent run orchestrator. Drives two agent types: * **Local directory agent** — V2 unified flow per file: Phase A (Detect + Preprocess, zero LLM): Python detects the content type and strips markup/noise, producing clean text + metadata. Phase B (Single LLM call with tools): the LLM identifies the project, checks for duplicates via list_* tools, and creates/updates records. ``items_created`` is counted from ``create_*`` tool calls. * **Cloud connector agent** — fetches data from third-party APIs (Gmail, Teams, Outlook) and pushes extracted items to Electron. Usage ----- Background tasks are spawned with ``asyncio.create_task()``:: asyncio.create_task(run_local_agent(user_id, config, run_log, device_manager)) asyncio.create_task(trigger_pending_runs(user_id, device_id, device_manager)) The ``trigger_pending_runs`` function is called by the device WS endpoint when Electron sends ``device_hello``, so any overdue runs fire immediately when the device reconnects. """ from __future__ import annotations import asyncio import json import logging import os from datetime import datetime, timedelta, timezone from typing import Any from croniter import croniter from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage from sqlalchemy import select from app.agents.filesystem_agent import FILESYSTEM_TOOLS from app.agents.note_agent import NOTE_TOOLS from app.agents.project_agent import PROJECT_TOOLS from app.agents.task_agent import TASK_TOOLS from app.agents.timeline_agent import TIMELINE_TOOLS from app.core.device_manager import DeviceConnectionManager from app.core.langfuse_client import compile_prompt, extract_usage, get_langfuse, get_prompt_or_fallback, langfuse_context from app.core.llm import get_agent_llm, model_for_agent from app.core.preprocessors import detect_content_type, preprocess from app.core.ws_context import clear_client_executor, execute_on_client, set_client_executor from app.db import async_session from app.models import ScoutRunLog, CloudScoutConfig, LocalScoutConfig logger = logging.getLogger(__name__) # ── Concurrency guard ───────────────────────────────────────────────────── # Tracks agent IDs that currently have a run in progress. # Prevents multiple simultaneous runs of the same agent within a single process. _running_agents: set[str] = set() def is_agent_running(agent_id: str) -> bool: """Return ``True`` if *agent_id* already has a run in progress.""" return agent_id in _running_agents # ── Timeouts ─────────────────────────────────────────────────────────────── # Max seconds to wait for a single tool-call round-trip (FE → BE). _TOOL_CALL_TIMEOUT: int = 30 # Max LLM reasoning steps for Step 2 processing. _MAX_PROCESSING_STEPS: int = 12 # Max directory recursion depth during scan. _MAX_SCAN_DEPTH: int = 5 # ── Data-type to tool mapping ───────────────────────────────────────────── _DATA_TYPE_TOOLS: dict[str, list[Any]] = { "tasks": TASK_TOOLS, "notes": NOTE_TOOLS, "timelines": TIMELINE_TOOLS, "timelineEvents": TIMELINE_TOOLS, "projects": PROJECT_TOOLS, } # ── V2: Unified processing prompt (hot-swappable via Langfuse "unified_processing") ── _UNIFIED_PROCESSING_PROMPT = """\ You are a data extraction assistant for a freelance project management tool. ## Your process (follow this exact order) ### 1. Identify the project File: {filename} {metadata_section} Existing projects: {projects_list} Match this file to an existing project using the filename and content clues. If no project matches, {no_match_behavior}. ### 2. Check existing records Once you identify the project, use list_tasks / list_notes / list_timelines (filtered by projectId) to see what already exists. NEVER create a record that already exists under the same or similar title. ### 3. Extract and create / update {extraction_rules} ### Rules - Set isAiSuggested=1 on every new record. - Set projectId on every record (use the id from the project list above). - Update existing records when a match is found by title or topic. - Do NOT invent data — only extract what is clearly stated in the content. - Target entity types: {data_types}. {global_rules} """ # ── Cloud processing prompt (kept separate for cloud agent) ─────────────── _BATCH_CLOUD_PROCESSING_PROMPT = """\ You are a data extraction and management assistant for a freelance project management tool. Available tools: Filesystem : read_file_content, list_directory, get_file_metadata Tasks : list_tasks, create_task, update_task, add_task_comment Notes : list_notes, get_note, create_note, update_note Timelines : list_timelines, create_timeline, update_timeline Projects : list_all_projects, get_project, create_project, update_project Your task: 1. Read the full content of each file below using read_file_content. 2. For each piece of information found, ALWAYS try to match and update an existing record before creating a new one. 3. ONLY act on these entity types: {data_types}. 4. Do NOT invent data. Only extract what is clearly present in the files. 5. If a file contains no relevant data for the target entity types, skip it. {project_context} Files to process: {file_list} {custom_prompt_section} After processing all files, respond with a brief summary of what you updated and what you created. """ # ── Cron helper ──────────────────────────────────────────────────────────── def _is_overdue(schedule_cron: str, last_run_at: datetime | None) -> bool: """Return ``True`` if the next scheduled run time has already passed. Always validates the cron expression first — an invalid expression returns ``False`` (fail-safe: never trigger an unparseable schedule). """ try: now = datetime.now(timezone.utc) if last_run_at is None: croniter(schedule_cron, now) return True ts = last_run_at if ts.tzinfo is None: ts = ts.replace(tzinfo=timezone.utc) cron = croniter(schedule_cron, ts) next_run: datetime = cron.get_next(datetime) return now >= next_run except Exception as exc: logger.warning("scout_runner: cannot parse cron %r: %s", schedule_cron, exc) return False # ── WS executor for agent context ───────────────────────────────────────── def _make_agent_executor( user_id: str, device_mgr: DeviceConnectionManager, run_context: dict | None = None, ) -> Any: """Create a WS callback for ``set_client_executor()`` so that all tools can use ``execute_on_client()`` during an agent run. If *run_context* is provided it is attached to every ``tool_call`` frame so the Electron client can attribute actions to the correct agent run. """ async def _executor(payload: dict) -> dict: payload["type"] = "tool_call" if run_context: payload["run_context"] = run_context call_id = payload["id"] fut = device_mgr.create_pending_call(user_id, call_id) await device_mgr.send_frame(user_id, payload) return await asyncio.wait_for(fut, timeout=_TOOL_CALL_TIMEOUT) return _executor # ── LLM tool-calling loop ───────────────────────────────────────────────── def _as_text(content: Any) -> str: if content is None: return "" if isinstance(content, str): return content if isinstance(content, list): parts: list[str] = [] for item in content: if isinstance(item, str): parts.append(item) elif isinstance(item, dict): text = item.get("text") if isinstance(text, str): parts.append(text) return "".join(parts) return str(content) async def _run_agent_with_tools( *, system_prompt: str, user_message: str, tools: list[Any], max_steps: int, user_id: str = "", session_id: str = "", langfuse_prompt: Any = None, agent_name: str = "batch-agent", _tool_calls_out: list[str] | None = None, ) -> str: """Run an LLM agent with tool-calling, returning the final text response. If *_tool_calls_out* is provided, the name of every tool called during the run is appended to it (used by the caller to count ``create_*`` calls). """ lf = get_langfuse() llm = get_agent_llm(agent_name) llm_with_tools = llm.bind_tools(tools) messages: list[Any] = [ SystemMessage(content=system_prompt), HumanMessage(content=user_message), ] tool_map = {tool_def.name: tool_def for tool_def in tools} _lf_ctx = langfuse_context(user_id=user_id or None, session_id=session_id or None) _lf_ctx.__enter__() _span_ctx = ( lf.start_as_current_observation( as_type="span", name=agent_name, metadata={"user_id": user_id} if user_id else None, input=user_message, ) if lf else None ) _span = _span_ctx.__enter__() if _span_ctx else None try: for _ in range(max_steps): _gen_ctx = ( lf.start_as_current_observation( as_type="generation", name=f"{agent_name}-llm", model=model_for_agent(agent_name), prompt=langfuse_prompt, input=messages, ) if lf else None ) _gen = _gen_ctx.__enter__() if _gen_ctx else None response: AIMessage = await llm_with_tools.ainvoke(messages) if _gen_ctx: _gen.update(output=_as_text(response.content), usage_details=extract_usage(response)) _gen_ctx.__exit__(None, None, None) messages.append(response) if not response.tool_calls: final_text = _as_text(response.content) if _span: _span.update(output=final_text) return final_text for call in response.tool_calls: call_name = str(call.get("name", "")) call_args = call.get("args", {}) logger.info( "scout_runner: tool_call name=%s args=%s", call_name, json.dumps(call_args, ensure_ascii=True)[:800], ) if _tool_calls_out is not None: _tool_calls_out.append(call_name) tool_fn = tool_map.get(call_name) if tool_fn is None: tool_output = f"Unknown tool: {call_name}" else: tool_output = await tool_fn.ainvoke(call_args) logger.info( "scout_runner: tool_result name=%s output=%s", call_name, str(tool_output)[:200], ) messages.append(ToolMessage(content=str(tool_output), tool_call_id=call["id"])) final = await llm.ainvoke(messages) final_text = _as_text(final.content) if _span: _span.update(output=final_text) return final_text finally: if _span_ctx: _span_ctx.__exit__(None, None, None) _lf_ctx.__exit__(None, None, None) if lf: lf.flush() # ── Tool list builder ───────────────────────────────────────────────────── def _build_processing_tools(data_types: list[str]) -> list[Any]: """Build the tool list for processing based on user's data_types selection.""" tools: list[Any] = list(FILESYSTEM_TOOLS) for dt in data_types: dt_tools = _DATA_TYPE_TOOLS.get(dt) if dt_tools: tools.extend(dt_tools) return tools # ── Code-based directory scanner ───────────────────────────────────────── async def _scan_directories( paths: list[str], extensions: list[str], last_run_at: datetime | None, ) -> list[str]: """Walk directories via WS tool calls and return filtered file paths. Recursion is capped at ``_MAX_SCAN_DEPTH``. Files are filtered by extension (if configured) and by modification date (if ``last_run_at`` is set). Fails open: if metadata cannot be read, the file is included. """ all_files: list[str] = [] ext_set = {e.lstrip(".").lower() for e in extensions} if extensions else set() async def _walk(path: str, depth: int) -> None: if depth > _MAX_SCAN_DEPTH: return try: result = await execute_on_client(action="list_directory", data={"path": path}) except Exception as exc: logger.warning("scout_runner: list_directory failed %r: %s", path, exc) return for entry in result.get("entries", []): entry_path = entry.get("path", "") if not entry_path: continue if entry.get("type") == "directory": await _walk(entry_path, depth + 1) elif entry.get("type") == "file": if ext_set: dot_pos = entry_path.rfind(".") file_ext = entry_path[dot_pos + 1:].lower() if dot_pos != -1 else "" if file_ext not in ext_set: continue all_files.append(entry_path) for root in paths: await _walk(root, depth=0) if last_run_at is None: return all_files # Filter by modification date. last_run_ms = int(last_run_at.timestamp() * 1000) filtered: list[str] = [] for file_path in all_files: try: meta = await execute_on_client(action="get_file_metadata", data={"path": file_path}) # FE sends snake_case keys on the wire (toSnakeCase transform) modified_at = meta.get("modified_at") or meta.get("modifiedAt") if modified_at is None: filtered.append(file_path) continue if isinstance(modified_at, (int, float)): mod_ms = int(modified_at) else: mod_ms = int(datetime.fromisoformat(str(modified_at)).timestamp() * 1000) if mod_ms > last_run_ms: filtered.append(file_path) except Exception: filtered.append(file_path) # fail-open return filtered # ── Code-based entity fetchers ──────────────────────────────────────────── async def _fetch_projects() -> list[dict]: """Fetch all projects from the Electron client via WS.""" try: result = await execute_on_client(action="select", table="projects") return result.get("rows", []) except Exception as exc: logger.warning("scout_runner: failed to fetch projects: %s", exc) return [] _DOMAIN_TABLE: dict[str, str] = { "tasks": "tasks", "notes": "notes", "timelines": "timelines", "projects": "projects", } async def _fetch_domain_entities(domain: str, project_id: str) -> list[dict]: """Fetch existing rows for a domain, scoped to a project where applicable.""" table = _DOMAIN_TABLE.get(domain) if not table: return [] filters: dict[str, Any] = {} if project_id != "standalone" and domain != "projects": filters["projectId"] = project_id try: result = await execute_on_client( action="select", table=table, filters=filters if filters else None, ) return result.get("rows", []) except Exception as exc: logger.warning("scout_runner: failed to fetch %s: %s", domain, exc) return [] def _format_entities_for_context(domain: str, rows: list[dict]) -> str: """Format existing entity rows as a readable context block for the LLM. Includes enough detail per record for the LLM to make a confident update-vs-create decision without overwhelming the context. Note content is truncated to 200 chars to stay within token budget. """ if not rows: return f"No existing {domain}." lines: list[str] = [] for r in rows: if domain == "tasks": desc = r.get("description") or "" desc_part = f" — {desc[:120]}" if desc else "" assignee = r.get("assignee") or r.get("assignees") or "" due = r.get("dueDate") or r.get("due_date") or "" meta = ", ".join(filter(None, [ f"priority: {r.get('priority', '')}" if r.get("priority") else "", f"assignee: {assignee}" if assignee else "", f"due: {due}" if due else "", ])) lines.append( f" - [{r.get('status', '?')}] {r.get('title', '')}{desc_part}" f" ({meta}, id: {r['id']})" ) elif domain == "notes": snippet = (r.get("content") or "")[:200].replace("\n", " ") snippet_part = f"\n Preview: {snippet}" if snippet else "" lines.append( f" - {r.get('title', '')} (id: {r['id']}){snippet_part}" ) elif domain == "timelines": lines.append( f" - {r.get('title', '')} date={r.get('date', '')} (id: {r['id']})" ) elif domain == "projects": summary = (r.get("aiSummary") or r.get("ai_summary") or "")[:120] summary_part = f" — {summary}" if summary else "" lines.append( f" - {r.get('name', '')} [{r.get('status', '')}]{summary_part}" f" (id: {r['id']})" ) return f"Existing {domain}:\n" + "\n".join(lines) # ── V2 helper functions ─────────────────────────────────────────────────── def _format_projects(projects: list[dict]) -> str: """Format the project list for the unified system prompt.""" if not projects: return " (no projects yet)" lines: list[str] = [] for p in projects: summary = (p.get("aiSummary") or p.get("ai_summary") or "").strip() summary_part = f" — {summary[:100]}" if summary else "" lines.append( f" - id={p['id']} | name={p.get('name', '')} | " f"status={p.get('status', '')}{summary_part}" ) return "\n".join(lines) def _format_metadata(metadata: dict) -> str: """Format preprocessor metadata as a compact context block.""" if not metadata: return "" parts: list[str] = [] for key in ("subject", "from", "to", "date"): if metadata.get(key): parts.append(f"{key.capitalize()}: {metadata[key]}") # any remaining keys for key, val in metadata.items(): if key not in ("subject", "from", "to", "date") and val: parts.append(f"{key}: {val}") return "\n".join(parts) def _get_extraction_rules(agent_config: dict, content_type: str) -> str: """Return the extraction_prompt for *content_type* from *agent_config*. Falls back to a generic instruction when the type is not configured. """ for ct in agent_config.get("content_types", []): if ct.get("id") == content_type: prompt = ct.get("extraction_prompt", "").strip() if prompt: return prompt return ( "Extract relevant information as tasks (action items), notes " "(informational content), or timelines (dated events)." ) def _get_no_match_behavior(agent_config: dict) -> str: """Derive the 'no project match' instruction from global_rules.""" rules = agent_config.get("global_rules", []) for rule in rules: lower = rule.lower() if "no project" in lower or "no match" in lower or "skip" in lower: return rule return "create a new project with a concise name derived from the file content" # ── Local agent runner (V2 — unified per-file flow) ─────────────────────── async def run_local_agent( user_id: str, config: LocalScoutConfig, run_log: ScoutRunLog, device_mgr: DeviceConnectionManager, run_context: dict | None = None, ) -> None: """Execute a local directory agent run — V2 unified flow. Phase A — Detect + Preprocess (zero LLM, per file): Python detects the content type from filename + content patterns and runs the appropriate handler (e.g. email_html) to produce clean text and structured metadata. Phase B — Single LLM call with tools (per file): One LLM call handles project identification, duplicate checking, and record creation/update. ``create_*`` tool calls are counted to produce the accurate ``items_created`` metric. """ run_id = run_log.id agent_id = (run_context or {}).get("agent_id") or config.id _running_agents.add(agent_id) # ── Device online check ───────────────────────────────────────── target_device_id = config.device_id.strip() if isinstance(config.device_id, str) else "" is_online = ( device_mgr.is_online(user_id, target_device_id) if target_device_id else device_mgr.is_online(user_id) ) if not is_online: logger.info( "scout_runner: skip run=%s — device %r offline for user=%s", run_id, target_device_id or "", user_id, ) await _finalize_run( run_log, status="error", errors=[f"Device {target_device_id or ''!r} is not connected"], ) return # ── Set up WS executor for tools ──────────────────────────────── executor = _make_agent_executor(user_id, device_mgr, run_context) set_client_executor(executor) errors: list[str] = [] items_processed = 0 items_created = 0 agent_config: dict = config.scout_config or {} processing_tools = _build_processing_tools(config.data_types) try: # ── Code: scan directories ─────────────────────────────────── file_paths = await _scan_directories( paths=config.directory_paths, extensions=config.file_extensions or [], last_run_at=config.last_run_at, ) logger.info( "scout_runner: run=%s found %d file(s) after filtering", run_id, len(file_paths) ) if not file_paths: await _finalize_run(run_log, status="success", items_processed=0, items_created=0) return # ── Code: fetch all projects once ──────────────────────────── projects = await _fetch_projects() projects_block = _format_projects(projects) # Prompt template + Langfuse version linking (hot-swappable from UI). unified_template, prompt_obj = get_prompt_or_fallback( "unified_processing", _UNIFIED_PROCESSING_PROMPT ) for file_path in file_paths: try: # ── Phase A: read + detect + preprocess ───────────── file_result = await execute_on_client( action="read_file_content", data={"path": file_path} ) raw_content: str = file_result.get("content", "") if not raw_content.strip(): logger.debug( "scout_runner: run=%s skipping empty file %r", run_id, file_path ) continue items_processed += 1 filename = os.path.basename(file_path) content_type = detect_content_type(filename, raw_content) preprocessed = preprocess(content_type, raw_content) logger.info( "scout_runner: run=%s file=%r content_type=%s clean_len=%d", run_id, file_path, content_type, len(preprocessed.clean_text), ) # ── Phase B: single LLM call ───────────────────────── extraction_rules = _get_extraction_rules(agent_config, content_type) no_match_behavior = _get_no_match_behavior(agent_config) base_global_rules = list(agent_config.get("global_rules", [])) if "notes" in config.data_types: base_global_rules.append( "For notes: when updating an existing note use `propose_note_edit` " "(type=append/insert/replace) so the user can review AI changes. " "Only call `update_note` for complete content replacement without review." ) global_rules_lines = "\n".join(f"- {r}" for r in base_global_rules) metadata_section = _format_metadata(preprocessed.metadata) system_prompt = compile_prompt( unified_template, prompt_obj, filename=filename, metadata_section=metadata_section, projects_list=projects_block, no_match_behavior=no_match_behavior, extraction_rules=extraction_rules, global_rules=global_rules_lines, data_types=", ".join(config.data_types), ) user_message = ( f"Process this file and extract relevant information.\n\n" f"File: {file_path}\n\n" f"Content:\n{preprocessed.clean_text}" ) file_tool_calls: list[str] = [] result_text = await _run_agent_with_tools( system_prompt=system_prompt, user_message=user_message, tools=processing_tools, max_steps=_MAX_PROCESSING_STEPS, user_id=user_id, session_id=run_id, langfuse_prompt=prompt_obj, agent_name="unified-processor", _tool_calls_out=file_tool_calls, ) file_created = sum( 1 for name in file_tool_calls if name.startswith("create_") ) items_created += file_created # Refresh project list when a project was created so # subsequent files see it in the prompt context. if "create_project" in file_tool_calls: projects = await _fetch_projects() projects_block = _format_projects(projects) logger.info( "scout_runner: run=%s file=%r created=%d result=%s", run_id, file_path, file_created, result_text[:200], ) except Exception as exc: errors.append(f"Error processing '{file_path}': {exc}") logger.error( "scout_runner: run=%s file=%r failed: %s", run_id, file_path, exc ) except Exception as exc: errors.append(f"Agent run failed: {exc}") logger.error("scout_runner: run=%s failed: %s", run_id, exc) finally: _running_agents.discard(agent_id) clear_client_executor() # ── Finalise ──────────────────────────────────────────────────── if errors and items_processed == 0: final_status = "error" elif errors: final_status = "partial" else: final_status = "success" await _finalize_run( run_log, status=final_status, items_processed=items_processed, items_created=items_created, errors=errors, ) logger.info( "scout_runner: run=%s done status=%s processed=%d created=%d errors=%d", run_id, final_status, items_processed, items_created, len(errors), ) # Notify Electron that the run is complete. if run_context and device_mgr.is_online(user_id): try: await device_mgr.send_frame(user_id, { "type": "run_complete", "run_context": run_context, "status": final_status, }) except Exception as exc: logger.warning( "scout_runner: run=%s failed to send run_complete: %s", run_id, exc ) # ── Cloud agent runner ───────────────────────────────────────────────────── _CLOUD_DEFAULT_LOOKBACK_DAYS: int = 7 async def run_cloud_agent( user_id: str, config: CloudScoutConfig, run_log: ScoutRunLog, device_mgr: DeviceConnectionManager, ) -> None: """Execute a cloud connector agent run end-to-end. Steps: 1. Verify the user's device is online. 2. Decrypt the stored OAuth token from ``config.oauth_token_encrypted``. 3. Instantiate the provider client (Gmail or MS Graph). 4. Fetch messages/emails since ``config.last_run_at`` (or 7 days ago for the first run) applying ``config.filter_config`` filters. 5. For each message/email call the LLM to extract structured items. 6. Push each item to Electron as an ``insert`` tool-call. 7. If the provider refreshed its access token, re-encrypt and write it back to ``config.oauth_token_encrypted``. 8. Persist the run outcome via ``_finalize_run``. """ run_id = run_log.id # ── 1. Device online check ───────────────────────────────────────── if not device_mgr.is_online(user_id): logger.info( "scout_runner: skip cloud run=%s — no device online for user=%s", run_id, user_id, ) await _finalize_run( run_log, status="error", errors=["No connected device — cloud agent results cannot be delivered"], ) return # ── 2. Decrypt OAuth token ───────────────────────────────────────── from app.integrations import decrypt_token, encrypt_token, get_provider if not config.oauth_token_encrypted: await _finalize_run( run_log, status="error", errors=[f"No OAuth token stored for cloud agent '{config.name}'"], ) return try: credentials_info = decrypt_token(config.oauth_token_encrypted) except ValueError as exc: logger.error("scout_runner: failed to decrypt OAuth token for agent %s: %s", config.id, exc) await _finalize_run( run_log, status="error", errors=[f"Failed to decrypt OAuth token: {exc}"], ) return # ── 3. Instantiate provider client ──────────────────────────────── try: provider = get_provider(config.provider, credentials_info) except ValueError as exc: await _finalize_run(run_log, status="error", errors=[str(exc)]) return # ── 4. Fetch messages ───────────────────────────────────────────── since: datetime | None = config.last_run_at if since is None: since = datetime.now(timezone.utc) - timedelta(days=_CLOUD_DEFAULT_LOOKBACK_DAYS) if since.tzinfo is None: since = since.replace(tzinfo=timezone.utc) errors: list[str] = [] items_processed = 0 items_created = 0 try: if config.provider == "gmail": raw_messages = await provider.fetch_messages( # type: ignore[union-attr] filter_config=config.filter_config, since=since, ) elif config.provider == "outlook": raw_messages = await provider.fetch_emails( # type: ignore[union-attr] filter_config=config.filter_config, since=since, ) elif config.provider == "teams": raw_messages = await provider.fetch_messages( # type: ignore[union-attr] filter_config=config.filter_config, since=since, ) else: raw_messages = [] except RuntimeError as exc: logger.error( "scout_runner: provider fetch failed for cloud agent %s: %s", config.id, exc ) await _finalize_run( run_log, status="error", errors=[f"Provider fetch failed: {exc}"], update_config_last_run=True, config_id=config.id, config_type="cloud", ) return logger.info( "scout_runner: cloud agent %s fetched %d item(s) from %s for user=%s", config.id, len(raw_messages), config.provider, user_id, ) # ── 5–6. Extract + insert via LLM with tools ───────────────────── executor = _make_agent_executor(user_id, device_mgr) set_client_executor(executor) try: processing_tools = _build_processing_tools(config.data_types) custom_section = ( f"User instructions:\n{config.prompt_template}" if config.prompt_template else "" ) for msg in raw_messages: content_text = msg.as_text if not content_text: continue items_processed += 1 cloud_template, cloud_prompt_obj = get_prompt_or_fallback( "batch_cloud_processing", _BATCH_CLOUD_PROCESSING_PROMPT ) processing_prompt = compile_prompt( cloud_template, cloud_prompt_obj, data_types=", ".join(config.data_types), project_context="Determine the appropriate project from the message context.", file_list=f"Message from {config.provider} (id: {msg.id})", custom_prompt_section=custom_section, ) try: await _run_agent_with_tools( system_prompt=processing_prompt, user_message=f"Process this message content:\n\n{content_text[:8000]}", tools=processing_tools, max_steps=_MAX_PROCESSING_STEPS, user_id=user_id, session_id=run_id, langfuse_prompt=cloud_prompt_obj, agent_name="cloud-processor", ) except Exception as exc: errors.append(f"LLM processing error for message {msg.id!r}: {exc}") finally: clear_client_executor() # ── 7. Persist refreshed token (if any) ─────────────────────────── refreshed = getattr(provider, "refreshed_credentials", None) if refreshed: try: new_encrypted = encrypt_token(refreshed) async with async_session() as db: cfg_result = await db.execute( select(CloudScoutConfig).where(CloudScoutConfig.id == config.id) ) cfg_row = cfg_result.scalar_one_or_none() if cfg_row: cfg_row.oauth_token_encrypted = new_encrypted await db.commit() logger.debug("scout_runner: refreshed OAuth token persisted for agent %s", config.id) except Exception as exc: logger.warning( "scout_runner: failed to persist refreshed token for agent %s: %s", config.id, exc, ) # ── 8. Finalise ──────────────────────────────────────────────────── if errors and items_created == 0: final_status = "error" elif errors: final_status = "partial" else: final_status = "success" await _finalize_run( run_log, status=final_status, items_processed=items_processed, items_created=items_created, errors=errors, update_config_last_run=True, config_id=config.id, config_type="cloud", ) logger.info( "scout_runner: cloud run=%s done status=%s processed=%d created=%d errors=%d", run_id, final_status, items_processed, items_created, len(errors), ) # ── Pending-run trigger ───────────────────────────────────────────────────── async def trigger_pending_runs( user_id: str, device_id: str, device_mgr: DeviceConnectionManager, ) -> None: """Dispatch any overdue agent runs after an Electron device connects. Called as a background task from the device WS endpoint on ``device_hello``. """ logger.info( "scout_runner: pending-run scan skipped for user=%s device=%s (client-owned agent config)", user_id, device_id, ) return # ── Internal helper ───────────────────────────────────────────────────────── async def _finalize_run( run_log: ScoutRunLog, *, status: str, items_processed: int = 0, items_created: int = 0, errors: list[str] | None = None, update_config_last_run: bool = False, config_id: str | None = None, config_type: str | None = None, ) -> None: """Persist the run outcome and optionally update ``last_run_at`` on the config.""" now = datetime.now(timezone.utc) try: async with async_session() as db: managed = await db.merge(run_log) managed.status = status managed.items_processed = items_processed managed.items_created = items_created managed.errors = errors or [] managed.completed_at = now if update_config_last_run and config_id: if config_type == "local": cfg_result = await db.execute( select(LocalScoutConfig).where(LocalScoutConfig.id == config_id) ) cfg = cfg_result.scalar_one_or_none() if cfg: cfg.last_run_at = now elif config_type == "cloud": cfg_result = await db.execute( select(CloudScoutConfig).where(CloudScoutConfig.id == config_id) ) cfg = cfg_result.scalar_one_or_none() if cfg: cfg.last_run_at = now await db.commit() except Exception as exc: logger.error( "scout_runner: failed to finalize run_log=%s: %s", run_log.id, exc )