"""Base types for the preprocessor system.""" from __future__ import annotations from dataclasses import dataclass, field @dataclass class PreprocessResult: """Output of a preprocessor handler. Attributes ---------- content_type: The detected content type (e.g. ``"email_html"``, ``"plain_text"``). clean_text: Human-readable text stripped of markup/binary noise. metadata: Dict of extracted metadata (keys vary by handler). Common keys: ``subject``, ``from``, ``to``, ``date``, ``filename``. """ content_type: str clean_text: str metadata: dict = field(default_factory=dict)