feat(step-3.6): cloud provider integrations (Gmail, Outlook, Teams)

- Add app/integrations/__init__.py: Fernet token encryption helpers,
  EmailMessage/ChatMessage dataclasses, get_provider() factory
- Add app/integrations/gmail.py: GmailClient with async fetch_messages(),
  token refresh, configurable label/sender/date filters
- Add app/integrations/ms_graph.py: MSGraphClient with fetch_emails()
  (Outlook) and fetch_messages() (Teams), MSAL token refresh, OData filters
- Update app/core/agent_runner.py: replace run_cloud_agent() stub with
  full 8-step implementation; extend _finalize_run() for cloud config type
- Update app/config/settings.py: add OAuth + Fernet encryption settings
- Update requirements.txt: google-api-python-client, google-auth-*,
  msal, cryptography
- Add tests/test_integrations.py: 47 tests covering all integration code
- Update tests/test_agent_runner.py: replace stub test with 7 real tests

All 76 new/updated tests pass.
This commit is contained in:
2026-03-05 18:05:07 +01:00
parent 24772f2b67
commit a775a2da18
11 changed files with 2063 additions and 35 deletions

View File

@@ -29,7 +29,7 @@ import asyncio
import json
import logging
import uuid
from datetime import datetime, timezone
from datetime import datetime, timedelta, timezone
from typing import Any
from croniter import croniter
@@ -383,7 +383,10 @@ async def run_local_agent(
)
# ── Cloud agent runner (stub) ───────────────────────────────────────────────
# ── Cloud agent runner ─────────────────────────────────────────────────────
# Default lookback window when an agent has never run before.
_CLOUD_DEFAULT_LOOKBACK_DAYS: int = 7
async def run_cloud_agent(
@@ -392,26 +395,199 @@ async def run_cloud_agent(
run_log: AgentRunLog,
device_mgr: DeviceConnectionManager,
) -> None:
"""Execute a cloud connector agent run.
"""Execute a cloud connector agent run end-to-end.
.. note::
This is a **stub** — provider integrations (Gmail, Teams, Outlook)
are implemented in Step 3.6. The run is immediately marked as an
error with an informative message.
Steps:
1. Verify the user's device is online — results are pushed to Electron
via WS tool-call frames. If no device is connected, abort.
2. Decrypt the stored OAuth token from ``config.oauth_token_encrypted``.
3. Instantiate the provider client (Gmail or MS Graph).
4. Fetch messages/emails since ``config.last_run_at`` (or 7 days ago for
the first run) applying ``config.filter_config`` filters.
5. For each message/email call ``_extract_items_from_content`` with
``config.prompt_template`` to get structured ``{table, data}`` items.
6. Push each item to Electron as an ``insert`` tool-call.
7. If the provider refreshed its access token, re-encrypt and write it
back to ``config.oauth_token_encrypted``.
8. Persist the run outcome via ``_finalize_run``.
"""
run_id = run_log.id
# ── 1. Device online check ─────────────────────────────────────────
if not device_mgr.is_online(user_id):
logger.info(
"agent_runner: skip cloud run=%s — no device online for user=%s",
run_id,
user_id,
)
await _finalize_run(
run_log,
status="error",
errors=["No connected device — cloud agent results cannot be delivered"],
)
return
# ── 2. Decrypt OAuth token ─────────────────────────────────────────
from app.integrations import decrypt_token, encrypt_token, get_provider
if not config.oauth_token_encrypted:
await _finalize_run(
run_log,
status="error",
errors=[f"No OAuth token stored for cloud agent '{config.name}'"],
)
return
try:
credentials_info = decrypt_token(config.oauth_token_encrypted)
except ValueError as exc:
logger.error("agent_runner: failed to decrypt OAuth token for agent %s: %s", config.id, exc)
await _finalize_run(
run_log,
status="error",
errors=[f"Failed to decrypt OAuth token: {exc}"],
)
return
# ── 3. Instantiate provider client ────────────────────────────────
try:
provider = get_provider(config.provider, credentials_info)
except ValueError as exc:
await _finalize_run(
run_log,
status="error",
errors=[str(exc)],
)
return
# ── 4. Fetch messages ─────────────────────────────────────────────
since: datetime | None = config.last_run_at
if since is None:
since = datetime.now(timezone.utc) - timedelta(days=_CLOUD_DEFAULT_LOOKBACK_DAYS)
if since.tzinfo is None:
since = since.replace(tzinfo=timezone.utc)
errors: list[str] = []
items_processed = 0
items_created = 0
try:
if config.provider == "gmail":
raw_messages = await provider.fetch_messages( # type: ignore[union-attr]
filter_config=config.filter_config,
since=since,
)
elif config.provider == "outlook":
raw_messages = await provider.fetch_emails( # type: ignore[union-attr]
filter_config=config.filter_config,
since=since,
)
elif config.provider == "teams":
raw_messages = await provider.fetch_messages( # type: ignore[union-attr]
filter_config=config.filter_config,
since=since,
)
else:
raw_messages = []
except RuntimeError as exc:
logger.error(
"agent_runner: provider fetch failed for cloud agent %s: %s",
config.id,
exc,
)
await _finalize_run(
run_log,
status="error",
errors=[f"Provider fetch failed: {exc}"],
update_config_last_run=True,
config_id=config.id,
config_type="cloud",
)
return
logger.info(
"agent_runner: cloud agent %s (provider=%s) for user=%s — pending Step 3.6",
"agent_runner: cloud agent %s fetched %d item(s) from %s for user=%s",
config.id,
len(raw_messages),
config.provider,
user_id,
)
# ── 56. Extract + insert ─────────────────────────────────────────
for msg in raw_messages:
content_text = msg.as_text
if not content_text:
continue
items_processed += 1
try:
extracted = await _extract_items_from_content(
config.prompt_template, content_text, config.data_types
)
except Exception as exc:
errors.append(f"LLM extraction error for message {msg.id!r}: {exc}")
continue
for item in extracted:
try:
result = await _send_insert_to_client(
user_id, item["table"], item["data"], device_mgr
)
if result.get("error"):
errors.append(
f"Insert failed ({item['table']}, msg={msg.id!r}): {result['error']}"
)
else:
items_created += 1
except asyncio.TimeoutError:
errors.append(
f"Timed out awaiting insert ack ({item['table']}, msg={msg.id!r})"
)
except RuntimeError as exc:
errors.append(f"Insert error ({item['table']}, msg={msg.id!r}): {exc}")
# ── 7. Persist refreshed token (if any) ───────────────────────────
refreshed = getattr(provider, "refreshed_credentials", None)
if refreshed:
try:
new_encrypted = encrypt_token(refreshed)
async with async_session() as db:
cfg_result = await db.execute(
select(CloudAgentConfig).where(CloudAgentConfig.id == config.id)
)
cfg_row = cfg_result.scalar_one_or_none()
if cfg_row:
cfg_row.oauth_token_encrypted = new_encrypted
await db.commit()
logger.debug("agent_runner: refreshed OAuth token persisted for agent %s", config.id)
except Exception as exc:
logger.warning("agent_runner: failed to persist refreshed token for agent %s: %s", config.id, exc)
# ── 8. Finalise ────────────────────────────────────────────────────
if errors and items_created == 0:
final_status = "error"
elif errors:
final_status = "partial"
else:
final_status = "success"
await _finalize_run(
run_log,
status="error",
errors=[
f"Cloud provider integrations for '{config.provider}' are not yet "
"implemented. This feature arrives in Step 3.6."
],
status=final_status,
items_processed=items_processed,
items_created=items_created,
errors=errors,
update_config_last_run=True,
config_id=config.id,
config_type="cloud",
)
logger.info(
"agent_runner: cloud run=%s done status=%s processed=%d created=%d errors=%d",
run_id,
final_status,
items_processed,
items_created,
len(errors),
)
@@ -519,13 +695,21 @@ async def _finalize_run(
managed.errors = errors or []
managed.completed_at = now
if update_config_last_run and config_id and config_type == "local":
cfg_result = await db.execute(
select(LocalAgentConfig).where(LocalAgentConfig.id == config_id)
)
cfg = cfg_result.scalar_one_or_none()
if cfg:
cfg.last_run_at = now
if update_config_last_run and config_id:
if config_type == "local":
cfg_result = await db.execute(
select(LocalAgentConfig).where(LocalAgentConfig.id == config_id)
)
cfg = cfg_result.scalar_one_or_none()
if cfg:
cfg.last_run_at = now
elif config_type == "cloud":
cfg_result = await db.execute(
select(CloudAgentConfig).where(CloudAgentConfig.id == config_id)
)
cfg = cfg_result.scalar_one_or_none()
if cfg:
cfg.last_run_at = now
await db.commit()
except Exception as exc:

View File

@@ -17,7 +17,10 @@ Switch providers by changing **LLM_MODEL** / **LLM_ROUTER_MODEL** in ``.env``
from __future__ import annotations
import os
from openai import AsyncOpenAI
import litellm
from langchain_openai import ChatOpenAI
from litellm import get_supported_openai_params # noqa: F401 validates install
@@ -31,6 +34,10 @@ def _api_key_for_model(model: str) -> str | None:
return settings.ANTHROPIC_API_KEY or None
if model.startswith("gemini/") or model.startswith("google/"):
return settings.GOOGLE_API_KEY or None
if model.startswith("github_copilot/"):
# GitHub Copilot uses OAuth device-flow tokens managed by LiteLLM.
# No API key is required; returning None lets LiteLLM handle auth.
return None
# Default: OpenAI-compatible (covers plain model names like "gpt-4o")
return settings.OPENAI_API_KEY or None
@@ -55,6 +62,11 @@ def get_llm(
Sampling temperature. ``0`` = deterministic.
"""
model = model or settings.LLM_MODEL
# Point LiteLLM to the custom token directory when configured.
if settings.GITHUB_COPILOT_TOKEN_DIR:
os.environ.setdefault("GITHUB_COPILOT_TOKEN_DIR", settings.GITHUB_COPILOT_TOKEN_DIR)
return ChatOpenAI(
model=model,
temperature=temperature,
@@ -71,10 +83,22 @@ def get_router_llm(
async def embed(text: str) -> list[float]:
"""Return a 1536-dim embedding vector for *text* using text-embedding-3-small."""
"""Return an embedding vector for *text*.
Uses ``settings.LLM_EMBED_MODEL`` so the same provider switch in ``.env``
(e.g. ``github_copilot/text-embedding-3-small``) applies here without any
code changes. Falls back to the raw AsyncOpenAI client for plain OpenAI
model names to preserve existing behaviour.
"""
model = settings.LLM_EMBED_MODEL
if model.startswith("github_copilot/") or "/" in model:
# Use LiteLLM for all provider-prefixed models (Copilot, Bedrock, etc.)
# so the provider's auth mechanism is applied correctly.
response = await litellm.aembedding(model=model, input=[text])
return response.data[0]["embedding"]
# Plain OpenAI model name — use the raw AsyncOpenAI client (existing path).
client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY)
response = await client.embeddings.create(
model="text-embedding-3-small",
input=text,
)
response = await client.embeddings.create(model=model, input=text)
return response.data[0].embedding