- agent_runner: local directory + cloud agent orchestration via Redis - 5 domain agents: filesystem, task, note, project, timeline - integrations: Gmail, MS Graph (Outlook + Teams) - journey: guided chatbot conversation to build prompt_template - routes: REST endpoints (catalog, can-create, trigger) - redis_consumer: subscribes to batch:request:* pattern - ws_context: Redis-based execute_on_client for tool round-trip - Dockerfile with 300s timeout for long-running batch jobs
267 lines
9.7 KiB
Python
267 lines
9.7 KiB
Python
"""Microsoft Graph API client for Outlook and Teams.
|
|
|
|
Adapted for Batch Agent Service: import settings from shared.config.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import re
|
|
from datetime import datetime, timezone
|
|
from typing import Any
|
|
|
|
import httpx
|
|
|
|
from shared.config import settings
|
|
from app.integrations import ChatMessage, EmailMessage
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_GRAPH_BASE = "https://graph.microsoft.com/v1.0"
|
|
|
|
_MAX_EMAILS = 200
|
|
_MAX_MESSAGES = 200
|
|
_BODY_TRUNCATE = 8_000
|
|
|
|
|
|
def _strip_html(raw: str) -> str:
|
|
no_tags = re.sub(r"<[^>]+>", " ", raw)
|
|
import html as _html
|
|
decoded = _html.unescape(no_tags)
|
|
return re.sub(r"\s+", " ", decoded).strip()
|
|
|
|
|
|
def _odata_datetime(dt: datetime) -> str:
|
|
utc = dt.astimezone(timezone.utc)
|
|
return utc.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
|
|
def _build_email_filter(
|
|
filter_config: dict[str, Any] | None,
|
|
since: datetime | None,
|
|
) -> str:
|
|
clauses: list[str] = []
|
|
cfg = filter_config or {}
|
|
|
|
senders: list[str] = cfg.get("senders", [])
|
|
if senders:
|
|
sender_clauses = [f"from/emailAddress/address eq '{s}'" for s in senders]
|
|
clauses.append("(" + " or ".join(sender_clauses) + ")")
|
|
|
|
date_range: dict = cfg.get("date_range", {})
|
|
from_str: str | None = date_range.get("from")
|
|
|
|
effective_since: datetime | None = since
|
|
if from_str:
|
|
try:
|
|
cfg_since = datetime.fromisoformat(from_str.replace("Z", "+00:00"))
|
|
if cfg_since.tzinfo is None:
|
|
cfg_since = cfg_since.replace(tzinfo=timezone.utc)
|
|
if effective_since is None or cfg_since > effective_since:
|
|
effective_since = cfg_since
|
|
except ValueError:
|
|
logger.warning("ms_graph: invalid date_range.from %r — ignoring", from_str)
|
|
|
|
if effective_since:
|
|
clauses.append(f"receivedDateTime ge {_odata_datetime(effective_since)}")
|
|
|
|
to_str: str | None = date_range.get("to")
|
|
if to_str:
|
|
try:
|
|
to_dt = datetime.fromisoformat(to_str.replace("Z", "+00:00"))
|
|
if to_dt.tzinfo is None:
|
|
to_dt = to_dt.replace(tzinfo=timezone.utc)
|
|
clauses.append(f"receivedDateTime le {_odata_datetime(to_dt)}")
|
|
except ValueError:
|
|
logger.warning("ms_graph: invalid date_range.to %r — ignoring", to_str)
|
|
|
|
return " and ".join(clauses)
|
|
|
|
|
|
class MSGraphClient:
|
|
def __init__(self, credentials_info: dict[str, Any]) -> None:
|
|
self._credentials_info = credentials_info
|
|
self._access_token: str = credentials_info.get("access_token", "")
|
|
self._original_access_token: str = self._access_token
|
|
self._refresh_token: str | None = credentials_info.get("refresh_token")
|
|
|
|
def _auth_headers(self) -> dict[str, str]:
|
|
return {"Authorization": f"Bearer {self._access_token}"}
|
|
|
|
async def _refresh_access_token(self) -> None:
|
|
import msal
|
|
|
|
app = msal.ConfidentialClientApplication(
|
|
client_id=settings.MS_CLIENT_ID,
|
|
client_credential=settings.MS_CLIENT_SECRET,
|
|
authority=f"https://login.microsoftonline.com/{settings.MS_TENANT_ID}",
|
|
)
|
|
scopes: list[str] = self._credentials_info.get("scope", "").split()
|
|
if not scopes:
|
|
scopes = ["https://graph.microsoft.com/.default"]
|
|
|
|
result = app.acquire_token_by_refresh_token(
|
|
self._refresh_token,
|
|
scopes=scopes,
|
|
)
|
|
if "access_token" not in result:
|
|
error = result.get("error_description", result.get("error", "unknown"))
|
|
raise RuntimeError(f"MS Graph token refresh failed: {error}")
|
|
|
|
self._access_token = result["access_token"]
|
|
if "refresh_token" in result:
|
|
self._refresh_token = result["refresh_token"]
|
|
self._credentials_info["refresh_token"] = result["refresh_token"]
|
|
self._credentials_info["access_token"] = self._access_token
|
|
|
|
@property
|
|
def refreshed_credentials(self) -> dict[str, Any] | None:
|
|
if self._access_token != self._original_access_token:
|
|
return {**self._credentials_info, "access_token": self._access_token}
|
|
return None
|
|
|
|
async def _get(
|
|
self,
|
|
client: httpx.AsyncClient,
|
|
url: str,
|
|
params: dict[str, Any] | None = None,
|
|
*,
|
|
retry_on_401: bool = True,
|
|
) -> dict[str, Any]:
|
|
resp = await client.get(url, params=params, headers=self._auth_headers())
|
|
if resp.status_code == 401 and retry_on_401 and self._refresh_token:
|
|
await self._refresh_access_token()
|
|
resp = await client.get(url, params=params, headers=self._auth_headers())
|
|
if resp.status_code == 429:
|
|
raise RuntimeError("MS Graph rate limit hit (429). Try again later.")
|
|
resp.raise_for_status()
|
|
return resp.json()
|
|
|
|
async def fetch_emails(
|
|
self,
|
|
filter_config: dict[str, Any] | None = None,
|
|
since: datetime | None = None,
|
|
) -> list[EmailMessage]:
|
|
odata_filter = _build_email_filter(filter_config, since)
|
|
params: dict[str, Any] = {
|
|
"$top": 50,
|
|
"$select": "id,subject,from,receivedDateTime,body,bodyPreview",
|
|
"$orderby": "receivedDateTime desc",
|
|
}
|
|
if odata_filter:
|
|
params["$filter"] = odata_filter
|
|
|
|
emails: list[EmailMessage] = []
|
|
url = f"{_GRAPH_BASE}/me/messages"
|
|
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
while url and len(emails) < _MAX_EMAILS:
|
|
data = await self._get(client, url, params if url.startswith(_GRAPH_BASE) else None)
|
|
for item in data.get("value", []):
|
|
emails.append(self._parse_email(item))
|
|
if len(emails) >= _MAX_EMAILS:
|
|
break
|
|
url = data.get("@odata.nextLink", "")
|
|
params = {}
|
|
|
|
logger.info("ms_graph: fetched %d Outlook email(s)", len(emails))
|
|
return emails
|
|
|
|
async def fetch_messages(
|
|
self,
|
|
filter_config: dict[str, Any] | None = None,
|
|
since: datetime | None = None,
|
|
) -> list[ChatMessage]:
|
|
cfg = filter_config or {}
|
|
channel_filter: list[str] = [c.lower() for c in cfg.get("channels", [])]
|
|
params: dict[str, Any] = {"$top": 50}
|
|
if since:
|
|
params["$filter"] = f"createdDateTime ge {_odata_datetime(since)}"
|
|
|
|
messages: list[ChatMessage] = []
|
|
url = f"{_GRAPH_BASE}/me/chats/getAllMessages"
|
|
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
while url and len(messages) < _MAX_MESSAGES:
|
|
try:
|
|
data = await self._get(client, url, params if url.startswith(_GRAPH_BASE) else None)
|
|
except httpx.HTTPStatusError as exc:
|
|
if exc.response.status_code in (403, 404):
|
|
logger.warning(
|
|
"ms_graph: /me/chats/getAllMessages not available (%d)",
|
|
exc.response.status_code,
|
|
)
|
|
break
|
|
raise
|
|
|
|
for item in data.get("value", []):
|
|
msg = self._parse_teams_message(item)
|
|
if channel_filter and msg.channel:
|
|
if not any(c in msg.channel.lower() for c in channel_filter):
|
|
continue
|
|
messages.append(msg)
|
|
if len(messages) >= _MAX_MESSAGES:
|
|
break
|
|
url = data.get("@odata.nextLink", "")
|
|
params = {}
|
|
|
|
logger.info("ms_graph: fetched %d Teams message(s)", len(messages))
|
|
return messages
|
|
|
|
@staticmethod
|
|
def _parse_email(item: dict[str, Any]) -> EmailMessage:
|
|
subject: str = item.get("subject", "(no subject)") or "(no subject)"
|
|
sender_block = item.get("from", {}) or {}
|
|
sender_addr = (
|
|
(sender_block.get("emailAddress") or {}).get("address", "unknown")
|
|
)
|
|
date_str: str = item.get("receivedDateTime", "")
|
|
try:
|
|
date = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
|
except Exception:
|
|
date = datetime.now(timezone.utc)
|
|
|
|
body_block = item.get("body", {}) or {}
|
|
content_type: str = body_block.get("contentType", "text")
|
|
raw_body: str = body_block.get("content", "")
|
|
if content_type == "html":
|
|
body_text = _strip_html(raw_body)
|
|
else:
|
|
body_text = raw_body or item.get("bodyPreview", "")
|
|
body_text = body_text[:_BODY_TRUNCATE]
|
|
|
|
return EmailMessage(
|
|
id=item.get("id", ""),
|
|
subject=subject,
|
|
sender=sender_addr,
|
|
body_text=body_text,
|
|
date=date,
|
|
)
|
|
|
|
@staticmethod
|
|
def _parse_teams_message(item: dict[str, Any]) -> ChatMessage:
|
|
msg_id: str = item.get("id", "")
|
|
sender_block = (item.get("from") or {}).get("user") or {}
|
|
sender: str = sender_block.get("displayName", "unknown")
|
|
channel: str | None = (item.get("channelIdentity") or {}).get("channelId")
|
|
|
|
date_str: str = item.get("createdDateTime", "")
|
|
try:
|
|
date = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
|
except Exception:
|
|
date = datetime.now(timezone.utc)
|
|
|
|
body_block = item.get("body", {}) or {}
|
|
content_type: str = body_block.get("contentType", "text")
|
|
raw_content: str = body_block.get("content", "")
|
|
content = _strip_html(raw_content) if content_type == "html" else raw_content
|
|
content = content[:_BODY_TRUNCATE]
|
|
|
|
return ChatMessage(
|
|
id=msg_id,
|
|
content=content,
|
|
sender=sender,
|
|
channel=channel,
|
|
date=date,
|
|
)
|