"""Microsoft Graph API client for Outlook and Teams cloud agent integration. Handles two data sources: * **Outlook email** (``provider="outlook"``) — ``fetch_emails()`` calls ``/me/messages`` with an OData ``$filter`` built from ``filter_config``. * **Teams messages** (``provider="teams"``) — ``fetch_messages()`` calls ``/me/chats/getAllMessages`` filtered by date. Authentication uses MSAL ``PublicClientApplication`` to acquire a token from a stored refresh token. The ``httpx.AsyncClient`` (already a project dependency) is used for all API calls. Credential dict shape (Microsoft OAuth2 / MSAL): { "access_token": "", "refresh_token": "", "token_type": "Bearer", "scope": "Mail.Read ChannelMessage.Read.All offline_access", "expires_in": 3600 } """ from __future__ import annotations import logging import re from datetime import datetime, timedelta, timezone from typing import Any import httpx from app.config.settings import settings from app.integrations import ChatMessage, EmailMessage logger = logging.getLogger(__name__) _GRAPH_BASE = "https://graph.microsoft.com/v1.0" # Max items fetched per run. _MAX_EMAILS = 200 _MAX_MESSAGES = 200 # Max characters of body forwarded to the LLM. _BODY_TRUNCATE = 8_000 def _strip_html(raw: str) -> str: """Strip HTML tags and collapse whitespace.""" no_tags = re.sub(r"<[^>]+>", " ", raw) import html as _html decoded = _html.unescape(no_tags) return re.sub(r"\s+", " ", decoded).strip() def _odata_datetime(dt: datetime) -> str: """Format a datetime as an OData datetime literal (UTC, ISO 8601).""" utc = dt.astimezone(timezone.utc) return utc.strftime("%Y-%m-%dT%H:%M:%SZ") def _build_email_filter( filter_config: dict[str, Any] | None, since: datetime | None, ) -> str: """Build an OData ``$filter`` expression for the ``/me/messages`` endpoint. Supported ``filter_config`` keys: senders (list[str]): Sender email addresses. date_range (dict): ``{from: "", to: ""}`` folders (list[str]): Folder display names (not directly filterable via OData, so ignored here — callers iterate folder IDs separately if needed; listed for completeness). A hard ``since`` date always overrides ``date_range.from`` when it is earlier. """ clauses: list[str] = [] cfg = filter_config or {} # Senders. senders: list[str] = cfg.get("senders", []) if senders: sender_clauses = [f"from/emailAddress/address eq '{s}'" for s in senders] clauses.append("(" + " or ".join(sender_clauses) + ")") # Date range. date_range: dict = cfg.get("date_range", {}) from_str: str | None = date_range.get("from") effective_since: datetime | None = since if from_str: try: cfg_since = datetime.fromisoformat(from_str.replace("Z", "+00:00")) if cfg_since.tzinfo is None: cfg_since = cfg_since.replace(tzinfo=timezone.utc) if effective_since is None or cfg_since > effective_since: effective_since = cfg_since except ValueError: logger.warning("ms_graph: invalid date_range.from %r — ignoring", from_str) if effective_since: clauses.append(f"receivedDateTime ge {_odata_datetime(effective_since)}") to_str: str | None = date_range.get("to") if to_str: try: to_dt = datetime.fromisoformat(to_str.replace("Z", "+00:00")) if to_dt.tzinfo is None: to_dt = to_dt.replace(tzinfo=timezone.utc) clauses.append(f"receivedDateTime le {_odata_datetime(to_dt)}") except ValueError: logger.warning("ms_graph: invalid date_range.to %r — ignoring", to_str) return " and ".join(clauses) class MSGraphClient: """Fetch emails and Teams messages via the Microsoft Graph REST API. Parameters ---------- credentials_info: Decrypted MSAL credential dict. """ def __init__(self, credentials_info: dict[str, Any]) -> None: self._credentials_info = credentials_info self._access_token: str = credentials_info.get("access_token", "") self._original_access_token: str = self._access_token self._refresh_token: str | None = credentials_info.get("refresh_token") # ── Token management ─────────────────────────────────────────────────── def _auth_headers(self) -> dict[str, str]: return {"Authorization": f"Bearer {self._access_token}"} async def _refresh_access_token(self) -> None: """Use MSAL to exchange the refresh token for a fresh access token. Updates ``self._access_token`` and ``self._credentials_info`` in-place. Raises: RuntimeError: MSAL reports an auth error. """ import msal app = msal.ConfidentialClientApplication( client_id=settings.MS_CLIENT_ID, client_credential=settings.MS_CLIENT_SECRET, authority=f"https://login.microsoftonline.com/{settings.MS_TENANT_ID}", ) scopes: list[str] = self._credentials_info.get("scope", "").split() if not scopes: scopes = ["https://graph.microsoft.com/.default"] result = app.acquire_token_by_refresh_token( self._refresh_token, scopes=scopes, ) if "access_token" not in result: error = result.get("error_description", result.get("error", "unknown")) raise RuntimeError(f"MS Graph token refresh failed: {error}") self._access_token = result["access_token"] # MSAL may issue a new refresh token. if "refresh_token" in result: self._refresh_token = result["refresh_token"] self._credentials_info["refresh_token"] = result["refresh_token"] self._credentials_info["access_token"] = self._access_token @property def refreshed_credentials(self) -> dict[str, Any] | None: """Return updated credential dict if the access token was refreshed. Returns ``None`` if no change was made. """ if self._access_token != self._original_access_token: return {**self._credentials_info, "access_token": self._access_token} return None # ── HTTP helpers ─────────────────────────────────────────────────────── async def _get( self, client: httpx.AsyncClient, url: str, params: dict[str, Any] | None = None, *, retry_on_401: bool = True, ) -> dict[str, Any]: """GET *url* with auth; refresh token on 401 and retry once.""" resp = await client.get(url, params=params, headers=self._auth_headers()) if resp.status_code == 401 and retry_on_401 and self._refresh_token: logger.debug("ms_graph: 401 on %s — refreshing token", url) await self._refresh_access_token() resp = await client.get(url, params=params, headers=self._auth_headers()) if resp.status_code == 429: raise RuntimeError("MS Graph rate limit hit (429). Try again later.") resp.raise_for_status() return resp.json() # ── Public API ───────────────────────────────────────────────────────── async def fetch_emails( self, filter_config: dict[str, Any] | None = None, since: datetime | None = None, ) -> list[EmailMessage]: """Return up to ``_MAX_EMAILS`` Outlook messages matching *filter_config*. Parameters ---------- filter_config: Optional dict with ``senders``, ``date_range``, ``folders`` keys. since: Hard lower-bound on email date (from last agent run). """ odata_filter = _build_email_filter(filter_config, since) params: dict[str, Any] = { "$top": 50, "$select": "id,subject,from,receivedDateTime,body,bodyPreview", "$orderby": "receivedDateTime desc", } if odata_filter: params["$filter"] = odata_filter emails: list[EmailMessage] = [] url = f"{_GRAPH_BASE}/me/messages" async with httpx.AsyncClient(timeout=30.0) as client: while url and len(emails) < _MAX_EMAILS: data = await self._get(client, url, params if url.startswith(_GRAPH_BASE) else None) for item in data.get("value", []): emails.append(self._parse_email(item)) if len(emails) >= _MAX_EMAILS: break url = data.get("@odata.nextLink", "") params = {} # nextLink already contains encoded params. logger.info("ms_graph: fetched %d Outlook email(s)", len(emails)) return emails async def fetch_messages( self, filter_config: dict[str, Any] | None = None, since: datetime | None = None, ) -> list[ChatMessage]: """Return up to ``_MAX_MESSAGES`` Teams messages matching *filter_config*. Fetches from ``/me/chats/getAllMessages`` (personal + group chats). The ``filter_config.channels`` key is checked as a text-filter on the channel name post-fetch (the API doesn't support channel OData filter directly on ``getAllMessages``). """ cfg = filter_config or {} channel_filter: list[str] = [c.lower() for c in cfg.get("channels", [])] params: dict[str, Any] = {"$top": 50} if since: params["$filter"] = f"createdDateTime ge {_odata_datetime(since)}" messages: list[ChatMessage] = [] url = f"{_GRAPH_BASE}/me/chats/getAllMessages" async with httpx.AsyncClient(timeout=30.0) as client: while url and len(messages) < _MAX_MESSAGES: try: data = await self._get(client, url, params if url.startswith(_GRAPH_BASE) else None) except httpx.HTTPStatusError as exc: # getAllMessages requires specific licensing; degrade gracefully. if exc.response.status_code in (403, 404): logger.warning( "ms_graph: /me/chats/getAllMessages not available (%d) — " "check Teams license or permissions", exc.response.status_code, ) break raise for item in data.get("value", []): msg = self._parse_teams_message(item) if channel_filter and msg.channel: if not any(c in msg.channel.lower() for c in channel_filter): continue messages.append(msg) if len(messages) >= _MAX_MESSAGES: break url = data.get("@odata.nextLink", "") params = {} logger.info("ms_graph: fetched %d Teams message(s)", len(messages)) return messages # ── Parsers ──────────────────────────────────────────────────────────── @staticmethod def _parse_email(item: dict[str, Any]) -> EmailMessage: subject: str = item.get("subject", "(no subject)") or "(no subject)" sender_block = item.get("from", {}) or {} sender_addr = ( (sender_block.get("emailAddress") or {}).get("address", "unknown") ) date_str: str = item.get("receivedDateTime", "") try: date = datetime.fromisoformat(date_str.replace("Z", "+00:00")) except Exception: date = datetime.now(timezone.utc) body_block = item.get("body", {}) or {} content_type: str = body_block.get("contentType", "text") raw_body: str = body_block.get("content", "") if content_type == "html": body_text = _strip_html(raw_body) else: body_text = raw_body or item.get("bodyPreview", "") body_text = body_text[:_BODY_TRUNCATE] return EmailMessage( id=item.get("id", ""), subject=subject, sender=sender_addr, body_text=body_text, date=date, ) @staticmethod def _parse_teams_message(item: dict[str, Any]) -> ChatMessage: msg_id: str = item.get("id", "") sender_block = (item.get("from") or {}).get("user") or {} sender: str = sender_block.get("displayName", "unknown") channel: str | None = (item.get("channelIdentity") or {}).get("channelId") date_str: str = item.get("createdDateTime", "") try: date = datetime.fromisoformat(date_str.replace("Z", "+00:00")) except Exception: date = datetime.now(timezone.utc) body_block = item.get("body", {}) or {} content_type: str = body_block.get("contentType", "text") raw_content: str = body_block.get("content", "") content = _strip_html(raw_content) if content_type == "html" else raw_content content = content[:_BODY_TRUNCATE] return ChatMessage( id=msg_id, content=content, sender=sender, channel=channel, date=date, )