api/app/core/scout_session_buffer.py

"""In-process TTL buffer for per-session LangChain message history.

Stores the full message list (including AIMessage with tool_calls and ToolMessage)
keyed by (user_id, session_id), so agents can reconstruct tool-call context across
conversation turns without it being lossy through the wire.

Single-process only. For multi-worker deployments, replace the _SessionBuffer
implementation with one backed by Redis (serialize LangChain messages to dicts via
message_to_dict / messages_from_dict from langchain_core.messages).
"""
from __future__ import annotations

import time
from threading import Lock

from langchain_core.messages import BaseMessage

SESSION_TTL_SECONDS = 1800  # 30-minute idle expiry
MAX_MESSAGES_PER_SESSION = 80  # cap to avoid unbounded memory growth


class _SessionBuffer:
    def __init__(self) -> None:
        self._store: dict[tuple[str, str], tuple[float, list[BaseMessage]]] = {}
        self._lock = Lock()

    def _evict_stale(self) -> None:
        now = time.monotonic()
        stale = [k for k, (ts, _) in self._store.items() if now - ts > SESSION_TTL_SECONDS]
        for k in stale:
            del self._store[k]

    def get(self, user_id: str, session_id: str) -> list[BaseMessage] | None:
        key = (user_id, session_id)
        with self._lock:
            entry = self._store.get(key)
            if entry is None:
                return None
            ts, msgs = entry
            if time.monotonic() - ts > SESSION_TTL_SECONDS:
                del self._store[key]
                return None
            self._store[key] = (time.monotonic(), msgs)
            return list(msgs)

    def set(self, user_id: str, session_id: str, messages: list[BaseMessage]) -> None:
        key = (user_id, session_id)
        capped = messages[-MAX_MESSAGES_PER_SESSION:]
        with self._lock:
            self._evict_stale()
            self._store[key] = (time.monotonic(), capped)

    def clear(self, user_id: str, session_id: str) -> None:
        with self._lock:
            self._store.pop((user_id, session_id), None)

    def append_system_message(self, user_id: str, session_id: str, text: str) -> None:
        """Append a synthetic system message to the buffer for the given session.

        Creates the session slot if it does not yet exist.  Used by the
        contextual_scope_update handler to inject navigation events without
        making an LLM call.
        """
        from langchain_core.messages import SystemMessage  # noqa: PLC0415

        key = (user_id, session_id)
        with self._lock:
            entry = self._store.get(key)
            if entry is None:
                msgs: list[BaseMessage] = [SystemMessage(content=text)]
            else:
                _, existing = entry
                msgs = list(existing) + [SystemMessage(content=text)]
            capped = msgs[-MAX_MESSAGES_PER_SESSION:]
            self._store[key] = (time.monotonic(), capped)


class ContextualBufferProxy:
    """Thin wrapper around _SessionBuffer that closes over user_id + session_id.

    Returned by get_session_buffer() so callers can call
    ``proxy.append_system_message(text)`` without threading user_id/session_id
    through every call site.
    """

    def __init__(self, buf: "_SessionBuffer", user_id: str, session_id: str) -> None:
        self._buf = buf
        self._user_id = user_id
        self._session_id = session_id

    def append_system_message(self, text: str) -> None:
        self._buf.append_system_message(self._user_id, self._session_id, text)


# Module-level singleton — same pattern as _pending_states in api/app/api/routes/auth.py
session_buffer = _SessionBuffer()