fix(scouts): fetch single Gmail message instead of bulk in fetch_content
Replace bulk GmailClient.fetch_messages() + linear search with a direct service.users().messages().get(format="full") call. Adds _extract_plain_text_body helper for recursive MIME part walking. Update test to patch _get_gmail_service. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -16,12 +16,35 @@ from datetime import datetime, timezone
|
||||
|
||||
from app.config.settings import settings
|
||||
from app.integrations import decrypt_token
|
||||
from app.integrations.gmail import GmailClient
|
||||
from app.scouts.connectors.base import ItemContent, ItemMetadata, ItemRef
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _extract_plain_text_body(payload: dict) -> str:
|
||||
"""Recursively walk a Gmail message payload to find text/plain content."""
|
||||
import base64
|
||||
mime_type = payload.get("mimeType", "")
|
||||
if mime_type == "text/plain":
|
||||
data = payload.get("body", {}).get("data", "")
|
||||
if data:
|
||||
return base64.urlsafe_b64decode(data + "==").decode("utf-8", errors="replace")
|
||||
return ""
|
||||
if mime_type.startswith("multipart/"):
|
||||
for part in payload.get("parts", []):
|
||||
text = _extract_plain_text_body(part)
|
||||
if text:
|
||||
return text
|
||||
# text/html fallback: strip tags rudimentarily if no text/plain part
|
||||
if mime_type == "text/html":
|
||||
data = payload.get("body", {}).get("data", "")
|
||||
if data:
|
||||
import re
|
||||
html = base64.urlsafe_b64decode(data + "==").decode("utf-8", errors="replace")
|
||||
return re.sub(r"<[^>]+>", " ", html)
|
||||
return ""
|
||||
|
||||
|
||||
def _get_gmail_service(scout):
|
||||
"""Return a synchronous Google API client for low-level metadata/history calls."""
|
||||
from googleapiclient.discovery import build
|
||||
@@ -118,32 +141,27 @@ class GmailConnector:
|
||||
# ── fetch_content ─────────────────────────────────────────────────────
|
||||
|
||||
async def fetch_content(self, scout, ref: ItemRef) -> ItemContent:
|
||||
"""Fetch full body text via GmailClient — transient, must not be persisted."""
|
||||
creds_info = decrypt_token(scout.oauth_token_encrypted)
|
||||
client = GmailClient(creds_info)
|
||||
# fetch_messages returns EmailMessage dataclasses with body_text already
|
||||
# extracted and decoded. We pass an empty filter to avoid narrowing by
|
||||
# date — callers should only invoke fetch_content for known-new messages.
|
||||
messages = await client.fetch_messages(filter_config=None, since=None)
|
||||
"""Fetch full body text for a single message — transient, must not be persisted."""
|
||||
|
||||
# Pick the message matching our ref (or fall back to first if only one returned).
|
||||
email_msg = next(
|
||||
(m for m in messages if m.id == ref.source_msg_ref),
|
||||
messages[0] if messages else None,
|
||||
)
|
||||
if email_msg is None:
|
||||
raise ValueError(f"Message {ref.source_msg_ref!r} not found via GmailClient")
|
||||
def _sync() -> ItemContent:
|
||||
service = _get_gmail_service(scout)
|
||||
msg = service.users().messages().get(
|
||||
userId="me", id=ref.source_msg_ref, format="full",
|
||||
).execute()
|
||||
headers = {h["name"]: h["value"] for h in msg.get("payload", {}).get("headers", [])}
|
||||
body_text = _extract_plain_text_body(msg.get("payload", {}))
|
||||
return ItemContent(
|
||||
metadata=ItemMetadata(
|
||||
subject=headers.get("Subject"),
|
||||
sender=headers.get("From"),
|
||||
snippet=msg.get("snippet"),
|
||||
received_at=None,
|
||||
),
|
||||
body_text=body_text,
|
||||
raw_headers=headers,
|
||||
)
|
||||
|
||||
return ItemContent(
|
||||
metadata=ItemMetadata(
|
||||
subject=email_msg.subject,
|
||||
sender=email_msg.sender,
|
||||
snippet=None,
|
||||
received_at=email_msg.date,
|
||||
),
|
||||
body_text=email_msg.body_text,
|
||||
raw_headers={},
|
||||
)
|
||||
return await asyncio.to_thread(_sync)
|
||||
|
||||
# ── archive ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
Reference in New Issue
Block a user