"""Microsoft Graph API client for Outlook and Teams. Adapted for Batch Agent Service: import settings from shared.config. """ from __future__ import annotations import logging import re from datetime import datetime, timezone from typing import Any import httpx from shared.config import settings from app.integrations import ChatMessage, EmailMessage logger = logging.getLogger(__name__) _GRAPH_BASE = "https://graph.microsoft.com/v1.0" _MAX_EMAILS = 200 _MAX_MESSAGES = 200 _BODY_TRUNCATE = 8_000 def _strip_html(raw: str) -> str: no_tags = re.sub(r"<[^>]+>", " ", raw) import html as _html decoded = _html.unescape(no_tags) return re.sub(r"\s+", " ", decoded).strip() def _odata_datetime(dt: datetime) -> str: utc = dt.astimezone(timezone.utc) return utc.strftime("%Y-%m-%dT%H:%M:%SZ") def _build_email_filter( filter_config: dict[str, Any] | None, since: datetime | None, ) -> str: clauses: list[str] = [] cfg = filter_config or {} senders: list[str] = cfg.get("senders", []) if senders: sender_clauses = [f"from/emailAddress/address eq '{s}'" for s in senders] clauses.append("(" + " or ".join(sender_clauses) + ")") date_range: dict = cfg.get("date_range", {}) from_str: str | None = date_range.get("from") effective_since: datetime | None = since if from_str: try: cfg_since = datetime.fromisoformat(from_str.replace("Z", "+00:00")) if cfg_since.tzinfo is None: cfg_since = cfg_since.replace(tzinfo=timezone.utc) if effective_since is None or cfg_since > effective_since: effective_since = cfg_since except ValueError: logger.warning("ms_graph: invalid date_range.from %r — ignoring", from_str) if effective_since: clauses.append(f"receivedDateTime ge {_odata_datetime(effective_since)}") to_str: str | None = date_range.get("to") if to_str: try: to_dt = datetime.fromisoformat(to_str.replace("Z", "+00:00")) if to_dt.tzinfo is None: to_dt = to_dt.replace(tzinfo=timezone.utc) clauses.append(f"receivedDateTime le {_odata_datetime(to_dt)}") except ValueError: logger.warning("ms_graph: invalid date_range.to %r — ignoring", to_str) return " and ".join(clauses) class MSGraphClient: def __init__(self, credentials_info: dict[str, Any]) -> None: self._credentials_info = credentials_info self._access_token: str = credentials_info.get("access_token", "") self._original_access_token: str = self._access_token self._refresh_token: str | None = credentials_info.get("refresh_token") def _auth_headers(self) -> dict[str, str]: return {"Authorization": f"Bearer {self._access_token}"} async def _refresh_access_token(self) -> None: import msal app = msal.ConfidentialClientApplication( client_id=settings.MS_CLIENT_ID, client_credential=settings.MS_CLIENT_SECRET, authority=f"https://login.microsoftonline.com/{settings.MS_TENANT_ID}", ) scopes: list[str] = self._credentials_info.get("scope", "").split() if not scopes: scopes = ["https://graph.microsoft.com/.default"] result = app.acquire_token_by_refresh_token( self._refresh_token, scopes=scopes, ) if "access_token" not in result: error = result.get("error_description", result.get("error", "unknown")) raise RuntimeError(f"MS Graph token refresh failed: {error}") self._access_token = result["access_token"] if "refresh_token" in result: self._refresh_token = result["refresh_token"] self._credentials_info["refresh_token"] = result["refresh_token"] self._credentials_info["access_token"] = self._access_token @property def refreshed_credentials(self) -> dict[str, Any] | None: if self._access_token != self._original_access_token: return {**self._credentials_info, "access_token": self._access_token} return None async def _get( self, client: httpx.AsyncClient, url: str, params: dict[str, Any] | None = None, *, retry_on_401: bool = True, ) -> dict[str, Any]: resp = await client.get(url, params=params, headers=self._auth_headers()) if resp.status_code == 401 and retry_on_401 and self._refresh_token: await self._refresh_access_token() resp = await client.get(url, params=params, headers=self._auth_headers()) if resp.status_code == 429: raise RuntimeError("MS Graph rate limit hit (429). Try again later.") resp.raise_for_status() return resp.json() async def fetch_emails( self, filter_config: dict[str, Any] | None = None, since: datetime | None = None, ) -> list[EmailMessage]: odata_filter = _build_email_filter(filter_config, since) params: dict[str, Any] = { "$top": 50, "$select": "id,subject,from,receivedDateTime,body,bodyPreview", "$orderby": "receivedDateTime desc", } if odata_filter: params["$filter"] = odata_filter emails: list[EmailMessage] = [] url = f"{_GRAPH_BASE}/me/messages" async with httpx.AsyncClient(timeout=30.0) as client: while url and len(emails) < _MAX_EMAILS: data = await self._get(client, url, params if url.startswith(_GRAPH_BASE) else None) for item in data.get("value", []): emails.append(self._parse_email(item)) if len(emails) >= _MAX_EMAILS: break url = data.get("@odata.nextLink", "") params = {} logger.info("ms_graph: fetched %d Outlook email(s)", len(emails)) return emails async def fetch_messages( self, filter_config: dict[str, Any] | None = None, since: datetime | None = None, ) -> list[ChatMessage]: cfg = filter_config or {} channel_filter: list[str] = [c.lower() for c in cfg.get("channels", [])] params: dict[str, Any] = {"$top": 50} if since: params["$filter"] = f"createdDateTime ge {_odata_datetime(since)}" messages: list[ChatMessage] = [] url = f"{_GRAPH_BASE}/me/chats/getAllMessages" async with httpx.AsyncClient(timeout=30.0) as client: while url and len(messages) < _MAX_MESSAGES: try: data = await self._get(client, url, params if url.startswith(_GRAPH_BASE) else None) except httpx.HTTPStatusError as exc: if exc.response.status_code in (403, 404): logger.warning( "ms_graph: /me/chats/getAllMessages not available (%d)", exc.response.status_code, ) break raise for item in data.get("value", []): msg = self._parse_teams_message(item) if channel_filter and msg.channel: if not any(c in msg.channel.lower() for c in channel_filter): continue messages.append(msg) if len(messages) >= _MAX_MESSAGES: break url = data.get("@odata.nextLink", "") params = {} logger.info("ms_graph: fetched %d Teams message(s)", len(messages)) return messages @staticmethod def _parse_email(item: dict[str, Any]) -> EmailMessage: subject: str = item.get("subject", "(no subject)") or "(no subject)" sender_block = item.get("from", {}) or {} sender_addr = ( (sender_block.get("emailAddress") or {}).get("address", "unknown") ) date_str: str = item.get("receivedDateTime", "") try: date = datetime.fromisoformat(date_str.replace("Z", "+00:00")) except Exception: date = datetime.now(timezone.utc) body_block = item.get("body", {}) or {} content_type: str = body_block.get("contentType", "text") raw_body: str = body_block.get("content", "") if content_type == "html": body_text = _strip_html(raw_body) else: body_text = raw_body or item.get("bodyPreview", "") body_text = body_text[:_BODY_TRUNCATE] return EmailMessage( id=item.get("id", ""), subject=subject, sender=sender_addr, body_text=body_text, date=date, ) @staticmethod def _parse_teams_message(item: dict[str, Any]) -> ChatMessage: msg_id: str = item.get("id", "") sender_block = (item.get("from") or {}).get("user") or {} sender: str = sender_block.get("displayName", "unknown") channel: str | None = (item.get("channelIdentity") or {}).get("channelId") date_str: str = item.get("createdDateTime", "") try: date = datetime.fromisoformat(date_str.replace("Z", "+00:00")) except Exception: date = datetime.now(timezone.utc) body_block = item.get("body", {}) or {} content_type: str = body_block.get("contentType", "text") raw_content: str = body_block.get("content", "") content = _strip_html(raw_content) if content_type == "html" else raw_content content = content[:_BODY_TRUNCATE] return ChatMessage( id=msg_id, content=content, sender=sender, channel=channel, date=date, )