Compare commits
1 Commits
11b31e5814
...
0833db239c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0833db239c |
@@ -16,12 +16,35 @@ from datetime import datetime, timezone
|
||||
|
||||
from app.config.settings import settings
|
||||
from app.integrations import decrypt_token
|
||||
from app.integrations.gmail import GmailClient
|
||||
from app.scouts.connectors.base import ItemContent, ItemMetadata, ItemRef
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _extract_plain_text_body(payload: dict) -> str:
|
||||
"""Recursively walk a Gmail message payload to find text/plain content."""
|
||||
import base64
|
||||
mime_type = payload.get("mimeType", "")
|
||||
if mime_type == "text/plain":
|
||||
data = payload.get("body", {}).get("data", "")
|
||||
if data:
|
||||
return base64.urlsafe_b64decode(data + "==").decode("utf-8", errors="replace")
|
||||
return ""
|
||||
if mime_type.startswith("multipart/"):
|
||||
for part in payload.get("parts", []):
|
||||
text = _extract_plain_text_body(part)
|
||||
if text:
|
||||
return text
|
||||
# text/html fallback: strip tags rudimentarily if no text/plain part
|
||||
if mime_type == "text/html":
|
||||
data = payload.get("body", {}).get("data", "")
|
||||
if data:
|
||||
import re
|
||||
html = base64.urlsafe_b64decode(data + "==").decode("utf-8", errors="replace")
|
||||
return re.sub(r"<[^>]+>", " ", html)
|
||||
return ""
|
||||
|
||||
|
||||
def _get_gmail_service(scout):
|
||||
"""Return a synchronous Google API client for low-level metadata/history calls."""
|
||||
from googleapiclient.discovery import build
|
||||
@@ -118,32 +141,27 @@ class GmailConnector:
|
||||
# ── fetch_content ─────────────────────────────────────────────────────
|
||||
|
||||
async def fetch_content(self, scout, ref: ItemRef) -> ItemContent:
|
||||
"""Fetch full body text via GmailClient — transient, must not be persisted."""
|
||||
creds_info = decrypt_token(scout.oauth_token_encrypted)
|
||||
client = GmailClient(creds_info)
|
||||
# fetch_messages returns EmailMessage dataclasses with body_text already
|
||||
# extracted and decoded. We pass an empty filter to avoid narrowing by
|
||||
# date — callers should only invoke fetch_content for known-new messages.
|
||||
messages = await client.fetch_messages(filter_config=None, since=None)
|
||||
"""Fetch full body text for a single message — transient, must not be persisted."""
|
||||
|
||||
# Pick the message matching our ref (or fall back to first if only one returned).
|
||||
email_msg = next(
|
||||
(m for m in messages if m.id == ref.source_msg_ref),
|
||||
messages[0] if messages else None,
|
||||
)
|
||||
if email_msg is None:
|
||||
raise ValueError(f"Message {ref.source_msg_ref!r} not found via GmailClient")
|
||||
def _sync() -> ItemContent:
|
||||
service = _get_gmail_service(scout)
|
||||
msg = service.users().messages().get(
|
||||
userId="me", id=ref.source_msg_ref, format="full",
|
||||
).execute()
|
||||
headers = {h["name"]: h["value"] for h in msg.get("payload", {}).get("headers", [])}
|
||||
body_text = _extract_plain_text_body(msg.get("payload", {}))
|
||||
return ItemContent(
|
||||
metadata=ItemMetadata(
|
||||
subject=headers.get("Subject"),
|
||||
sender=headers.get("From"),
|
||||
snippet=msg.get("snippet"),
|
||||
received_at=None,
|
||||
),
|
||||
body_text=body_text,
|
||||
raw_headers=headers,
|
||||
)
|
||||
|
||||
return ItemContent(
|
||||
metadata=ItemMetadata(
|
||||
subject=email_msg.subject,
|
||||
sender=email_msg.sender,
|
||||
snippet=None,
|
||||
received_at=email_msg.date,
|
||||
),
|
||||
body_text=email_msg.body_text,
|
||||
raw_headers={},
|
||||
)
|
||||
return await asyncio.to_thread(_sync)
|
||||
|
||||
# ── archive ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -3,8 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -53,16 +52,24 @@ async def test_fetch_metadata_returns_subject_and_snippet():
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_content_returns_body_text():
|
||||
import base64
|
||||
scout = _make_scout()
|
||||
conn = GmailConnector()
|
||||
# decrypt_token is patched because the test doesn't set OAUTH_ENCRYPTION_KEY.
|
||||
with patch("app.scouts.connectors.gmail.decrypt_token", return_value={}), \
|
||||
patch("app.scouts.connectors.gmail.GmailClient") as MockClient:
|
||||
instance = MockClient.return_value
|
||||
instance.fetch_messages = AsyncMock(return_value=[
|
||||
MagicMock(id="msg-1", subject="S", sender="a@b", body_text="hello world",
|
||||
date=datetime.now(tz=timezone.utc), labels=[]),
|
||||
])
|
||||
body_data = base64.urlsafe_b64encode(b"hello world").decode()
|
||||
fake_message = {
|
||||
"id": "msg-1",
|
||||
"snippet": "hello world",
|
||||
"payload": {
|
||||
"mimeType": "text/plain",
|
||||
"headers": [
|
||||
{"name": "Subject", "value": "S"},
|
||||
{"name": "From", "value": "a@b"},
|
||||
],
|
||||
"body": {"data": body_data},
|
||||
},
|
||||
}
|
||||
with patch("app.scouts.connectors.gmail._get_gmail_service") as mock_svc:
|
||||
mock_svc.return_value.users().messages().get().execute.return_value = fake_message
|
||||
content = await conn.fetch_content(scout, ItemRef(source_msg_ref="msg-1"))
|
||||
assert content.body_text == "hello world"
|
||||
assert content.metadata.subject == "S"
|
||||
|
||||
Reference in New Issue
Block a user