feat(batch-agent): extract Batch Agent Service (Step 3)

- agent_runner: local directory + cloud agent orchestration via Redis
- 5 domain agents: filesystem, task, note, project, timeline
- integrations: Gmail, MS Graph (Outlook + Teams)
- journey: guided chatbot conversation to build prompt_template
- routes: REST endpoints (catalog, can-create, trigger)
- redis_consumer: subscribes to batch:request:* pattern
- ws_context: Redis-based execute_on_client for tool round-trip
- Dockerfile with 300s timeout for long-running batch jobs
This commit is contained in:
Roberto Musso
2026-03-23 07:19:02 +01:00
parent 229e20d073
commit 333bba6fdd
18 changed files with 3157 additions and 0 deletions

View File

@@ -0,0 +1,135 @@
"""WebSocket context for Batch Agent Service — Redis-based tool call round-trip.
Same pattern as services/chat/app/ws_context.py: publishes tool_call frames
to Redis ws:out:{user_id} and awaits BRPOP on tool:result:{call_id}.
Additionally provides set_client_executor / clear_client_executor stubs
for backward compatibility with the agent_runner code (which originally
used a DeviceConnectionManager callback). In the microservice world these
are no-ops — execute_on_client() always uses the Redis path.
"""
from __future__ import annotations
import json
import logging
from contextvars import ContextVar
from typing import Any, Callable, Coroutine
from uuid import uuid4
from shared.redis import redis_client, tool_result_key, ws_out_channel
logger = logging.getLogger(__name__)
_TOOL_CALL_TIMEOUT = 30 # seconds — BRPOP timeout
# Per-request user_id context var (set before agent run)
_current_user_id: ContextVar[str | None] = ContextVar("_current_user_id", default=None)
# Optional collector for debug / logging
_tool_result_collector: ContextVar[list[dict] | None] = ContextVar(
"_tool_result_collector", default=None
)
def set_current_user(user_id: str) -> None:
_current_user_id.set(user_id)
def clear_current_user() -> None:
_current_user_id.set(None)
def set_tool_result_collector(lst: list[dict]) -> None:
_tool_result_collector.set(lst)
def clear_tool_result_collector() -> None:
_tool_result_collector.set(None)
# ── Compatibility shims ──────────────────────────────────────────────────
# agent_runner.py originally called set_client_executor / clear_client_executor
# with a DeviceConnectionManager callback. In the microservice world the
# Redis-based execute_on_client replaces this, so these are no-ops that
# keep the agent_runner code unchanged.
def set_client_executor(fn: Callable[[dict], Coroutine[Any, Any, dict]] | None) -> None:
"""No-op — kept for agent_runner compatibility."""
pass
def clear_client_executor() -> None:
"""No-op — kept for agent_runner compatibility."""
pass
async def execute_on_client(
action: str,
table: str | None = None,
data: dict[str, Any] | None = None,
filters: dict[str, Any] | None = None,
vector: list[float] | None = None,
limit: int | None = None,
) -> dict[str, Any]:
"""Send a tool_call to Electron via Redis and await the result.
1. Build tool_call payload
2. Publish to ws:out:{user_id} (WS Gateway forwards to Electron)
3. BRPOP on tool:result:{call_id} (WS Gateway pushes when Electron replies)
4. Return result dict
Raises RuntimeError if no user_id is set or if the call times out.
"""
user_id = _current_user_id.get()
if not user_id:
raise RuntimeError(
"execute_on_client() called without a user_id — "
"set_current_user() must be called first."
)
call_id = str(uuid4())
payload: dict[str, Any] = {
"type": "tool_call",
"id": call_id,
"action": action,
}
if table is not None:
payload["table"] = table
if data is not None:
payload["data"] = data
if filters is not None:
payload["filters"] = {k: v for k, v in filters.items() if v is not None}
if vector is not None:
payload["vector"] = vector
if limit is not None:
payload["limit"] = limit
# Publish tool_call to WS Gateway → Electron
channel = ws_out_channel(user_id)
await redis_client.publish(channel, json.dumps(payload))
# Wait for Electron's tool_result
result_key = tool_result_key(call_id)
response = await redis_client.brpop(result_key, timeout=_TOOL_CALL_TIMEOUT)
if response is None:
raise RuntimeError(
f"Tool call {call_id} timed out after {_TOOL_CALL_TIMEOUT}s — "
f"device may be offline or unresponsive."
)
# response is (key, value) tuple
_, raw = response
result = json.loads(raw)
# Collect for debug if requested
collector = _tool_result_collector.get(None)
if collector is not None:
collector.append({
"action": action,
"table": table,
"data": result,
})
return result