diff --git a/app/scouts/__init__.py b/app/scouts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/scouts/connectors/__init__.py b/app/scouts/connectors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/scouts/connectors/base.py b/app/scouts/connectors/base.py new file mode 100644 index 0000000..2cbbb7c --- /dev/null +++ b/app/scouts/connectors/base.py @@ -0,0 +1,56 @@ +"""Source connector Protocol and shared item types. + +A SourceConnector adapts a third-party data source (Gmail, Slack, ...) to the +shared ScoutEngine interface. Each connector owns: + + * how to enumerate new items since the last poll (``list_new``) + * how to fetch a single item's metadata cheaply (``fetch_metadata``) + * how to fetch a single item's full content for in-memory triage + (``fetch_content``) — this content MUST NOT be persisted by the engine + * how to archive/trash an item (``archive``) for spam handling + * optional push-notification setup (``setup_watch`` / ``renew_watch``) +""" + +from __future__ import annotations + +from datetime import datetime +from typing import Literal, Protocol + +from pydantic import BaseModel, Field + + +class ItemRef(BaseModel): + source_msg_ref: str + received_at: datetime | None = None + + +class ItemMetadata(BaseModel): + subject: str | None = None + sender: str | None = None + snippet: str | None = None + received_at: datetime | None = None + + +class ItemContent(BaseModel): + metadata: ItemMetadata + body_text: str + raw_headers: dict[str, str] = Field(default_factory=dict) + + +class TriageVerdict(BaseModel): + verdict: Literal["relevant", "spam"] + reason: str + confidence: float = Field(ge=0.0, le=1.0) + + +class SourceConnector(Protocol): + """Adapter for a third-party data source (Gmail, Slack, ...).""" + + source_type: str # e.g. "gmail" + + async def list_new(self, scout) -> list[ItemRef]: ... + async def fetch_metadata(self, scout, ref: ItemRef) -> ItemMetadata: ... + async def fetch_content(self, scout, ref: ItemRef) -> ItemContent: ... + async def archive(self, scout, ref: ItemRef) -> None: ... + async def setup_watch(self, scout) -> None: ... + async def renew_watch(self, scout) -> None: ... diff --git a/tests/test_scout_connectors_base.py b/tests/test_scout_connectors_base.py new file mode 100644 index 0000000..a6ab60d --- /dev/null +++ b/tests/test_scout_connectors_base.py @@ -0,0 +1,48 @@ +"""Tests for the SourceConnector base protocol and shared types.""" + +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest + +from app.scouts.connectors.base import ( + ItemContent, + ItemMetadata, + ItemRef, + TriageVerdict, +) + + +def test_item_ref_round_trips_through_pydantic(): + ref = ItemRef(source_msg_ref="abc123", received_at=datetime.now(tz=timezone.utc)) + parsed = ItemRef.model_validate(ref.model_dump()) + assert parsed.source_msg_ref == "abc123" + assert parsed.received_at == ref.received_at + + +def test_item_metadata_allows_all_optional(): + meta = ItemMetadata() + assert meta.subject is None + assert meta.sender is None + assert meta.snippet is None + assert meta.received_at is None + + +def test_item_content_requires_metadata_and_body(): + content = ItemContent( + metadata=ItemMetadata(subject="hi"), + body_text="hello world", + raw_headers={"X-Foo": "bar"}, + ) + assert content.metadata.subject == "hi" + assert content.body_text == "hello world" + assert content.raw_headers["X-Foo"] == "bar" + + +def test_triage_verdict_constraints(): + v = TriageVerdict(verdict="relevant", reason="contains task language", confidence=0.92) + assert v.verdict == "relevant" + + with pytest.raises(ValueError): + TriageVerdict(verdict="meh", reason="x", confidence=0.5) # bad enum value