diff --git a/app/scouts/connectors/gmail.py b/app/scouts/connectors/gmail.py index d3abb3c..ee3bf96 100644 --- a/app/scouts/connectors/gmail.py +++ b/app/scouts/connectors/gmail.py @@ -16,12 +16,35 @@ from datetime import datetime, timezone from app.config.settings import settings from app.integrations import decrypt_token -from app.integrations.gmail import GmailClient from app.scouts.connectors.base import ItemContent, ItemMetadata, ItemRef logger = logging.getLogger(__name__) +def _extract_plain_text_body(payload: dict) -> str: + """Recursively walk a Gmail message payload to find text/plain content.""" + import base64 + mime_type = payload.get("mimeType", "") + if mime_type == "text/plain": + data = payload.get("body", {}).get("data", "") + if data: + return base64.urlsafe_b64decode(data + "==").decode("utf-8", errors="replace") + return "" + if mime_type.startswith("multipart/"): + for part in payload.get("parts", []): + text = _extract_plain_text_body(part) + if text: + return text + # text/html fallback: strip tags rudimentarily if no text/plain part + if mime_type == "text/html": + data = payload.get("body", {}).get("data", "") + if data: + import re + html = base64.urlsafe_b64decode(data + "==").decode("utf-8", errors="replace") + return re.sub(r"<[^>]+>", " ", html) + return "" + + def _get_gmail_service(scout): """Return a synchronous Google API client for low-level metadata/history calls.""" from googleapiclient.discovery import build @@ -118,32 +141,27 @@ class GmailConnector: # ── fetch_content ───────────────────────────────────────────────────── async def fetch_content(self, scout, ref: ItemRef) -> ItemContent: - """Fetch full body text via GmailClient — transient, must not be persisted.""" - creds_info = decrypt_token(scout.oauth_token_encrypted) - client = GmailClient(creds_info) - # fetch_messages returns EmailMessage dataclasses with body_text already - # extracted and decoded. We pass an empty filter to avoid narrowing by - # date — callers should only invoke fetch_content for known-new messages. - messages = await client.fetch_messages(filter_config=None, since=None) + """Fetch full body text for a single message — transient, must not be persisted.""" - # Pick the message matching our ref (or fall back to first if only one returned). - email_msg = next( - (m for m in messages if m.id == ref.source_msg_ref), - messages[0] if messages else None, - ) - if email_msg is None: - raise ValueError(f"Message {ref.source_msg_ref!r} not found via GmailClient") + def _sync() -> ItemContent: + service = _get_gmail_service(scout) + msg = service.users().messages().get( + userId="me", id=ref.source_msg_ref, format="full", + ).execute() + headers = {h["name"]: h["value"] for h in msg.get("payload", {}).get("headers", [])} + body_text = _extract_plain_text_body(msg.get("payload", {})) + return ItemContent( + metadata=ItemMetadata( + subject=headers.get("Subject"), + sender=headers.get("From"), + snippet=msg.get("snippet"), + received_at=None, + ), + body_text=body_text, + raw_headers=headers, + ) - return ItemContent( - metadata=ItemMetadata( - subject=email_msg.subject, - sender=email_msg.sender, - snippet=None, - received_at=email_msg.date, - ), - body_text=email_msg.body_text, - raw_headers={}, - ) + return await asyncio.to_thread(_sync) # ── archive ─────────────────────────────────────────────────────────── diff --git a/tests/test_scout_connectors_gmail.py b/tests/test_scout_connectors_gmail.py index 16c35aa..f54edd6 100644 --- a/tests/test_scout_connectors_gmail.py +++ b/tests/test_scout_connectors_gmail.py @@ -3,8 +3,7 @@ from __future__ import annotations import uuid -from datetime import datetime, timezone -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import MagicMock, patch import pytest @@ -53,16 +52,24 @@ async def test_fetch_metadata_returns_subject_and_snippet(): @pytest.mark.asyncio async def test_fetch_content_returns_body_text(): + import base64 scout = _make_scout() conn = GmailConnector() - # decrypt_token is patched because the test doesn't set OAUTH_ENCRYPTION_KEY. - with patch("app.scouts.connectors.gmail.decrypt_token", return_value={}), \ - patch("app.scouts.connectors.gmail.GmailClient") as MockClient: - instance = MockClient.return_value - instance.fetch_messages = AsyncMock(return_value=[ - MagicMock(id="msg-1", subject="S", sender="a@b", body_text="hello world", - date=datetime.now(tz=timezone.utc), labels=[]), - ]) + body_data = base64.urlsafe_b64encode(b"hello world").decode() + fake_message = { + "id": "msg-1", + "snippet": "hello world", + "payload": { + "mimeType": "text/plain", + "headers": [ + {"name": "Subject", "value": "S"}, + {"name": "From", "value": "a@b"}, + ], + "body": {"data": body_data}, + }, + } + with patch("app.scouts.connectors.gmail._get_gmail_service") as mock_svc: + mock_svc.return_value.users().messages().get().execute.return_value = fake_message content = await conn.fetch_content(scout, ItemRef(source_msg_ref="msg-1")) assert content.body_text == "hello world" assert content.metadata.subject == "S"