refactor(contextual): drop floating WS frame, runner, and prompt fallback

contextual_request + contextual_scope_update are the only WS flows for ad-hoc contextual chat now. Floating system prompt constant removed; Langfuse 'floating_system' is deleted in a separate manual step. Also removes floating-agent LLM slot from llm.py and the associated LLM_MODEL_FLOATING_AGENT setting entry.
2026-05-15 18:53:01 +02:00
parent d63fd5f3b9
commit 052c7e3741
4 changed files with 6 additions and 447 deletions
--- a/app/api/routes/device_ws.py
+++ b/app/api/routes/device_ws.py
@@ -44,7 +44,7 @@ from app.config.settings import settings
 from app.core.agent_runner import trigger_pending_runs
 from app.core.agent_session_buffer import session_buffer
 from app.core.brief_agent import run_home_brief, run_project_brief
-from app.core.deep_agent import run_contextual_stream, run_floating_stream, run_home_stream, run_task_brief_research_stream
+from app.core.deep_agent import run_contextual_stream, run_home_stream, run_task_brief_research_stream
 from app.core.output_formatter import extract_canvas_block
 from app.core.device_manager import device_manager
 from app.core.memory_middleware import MemoryMiddleware
@@ -161,11 +161,6 @@ async def _message_loop(websocket: WebSocket, user_id: str) -> None:
                _handle_home_request(websocket, user_id, frame)
            )
        elif frame_type == WsFrameType.floating_request:
            asyncio.create_task(
                _handle_floating_request(websocket, user_id, frame)
            )
        elif frame_type == WsFrameType.brief_request:
            asyncio.create_task(
                _handle_brief_request(websocket, user_id, frame)
@@ -301,76 +296,6 @@ async def _handle_home_request(
    )
 async def _handle_floating_request(
    websocket: WebSocket,
    user_id: str,
    frame: dict,
 ) -> None:
    """Handle a floating_request frame — streams FloatingFormatter output back on the socket."""
    request_id = frame.get("request_id") or str(uuid4())
    message: str = frame.get("message", "")
    session_id: str = frame.get("session_id") or str(uuid4())
    scope: dict = frame.get("scope", {})
    logger.info(
        "device_ws: floating_request_start user=%s req=%s session=%s scope=%s msg=%s",
        user_id,
        request_id,
        session_id,
        json.dumps(scope, ensure_ascii=True)[:200],
        message[:200],
    )
    # ── Memory: enrich context before LLM call ────────────────────────
    async with async_session() as db:
        memory = MemoryMiddleware(db)
        memory_context = await memory.enrich_context(
            user_id,
            message,
            trace_id=request_id,
            session_id=session_id,
        )
    context: dict = {
        "conversation_history": frame.get("conversation_history", []),
        "scope": scope,
        "_debug": {"request_id": request_id, "session_id": session_id, "user_id": user_id},
        "format_prefs": frame.get("format_prefs"),
        **memory_context,
    }
    executor = await _make_ws_executor(websocket, user_id)
    set_client_executor(executor)
    response_chunks: list[str] = []
    try:
        event_stream = run_floating_stream(user_id, message, context)
        formatter = StreamFormatter(request_id=request_id)
        async for ws_frame in formatter.format(event_stream):
            await websocket.send_text(ws_frame.model_dump_json())
            if ws_frame.type == "stream_text":  # type: ignore[union-attr]
                response_chunks.append(ws_frame.chunk)  # type: ignore[union-attr]
    except Exception as exc:
        logger.error(
            "device_ws: floating_request failed user=%s req=%s: %s",
            user_id, request_id, exc,
        )
    finally:
        clear_client_executor()
    # ── Memory: store episode after response ──────────────────────────
    async with async_session() as db:
        memory = MemoryMiddleware(db)
        await memory.store_episode(
            user_id, session_id, message, "".join(response_chunks), trace_id=request_id
        )
    logger.info(
        "device_ws: floating_request_end user=%s req=%s session=%s response_chars=%d",
        user_id,
        request_id,
        session_id,
        len("".join(response_chunks)),
    )
 # ── v8 Contextual Sidebar Handlers ───────────────────────────────────
--- a/app/config/settings.py
+++ b/app/config/settings.py
@@ -23,9 +23,8 @@ class Settings(BaseSettings):
    LLM_EMBED_MODEL: str = "text-embedding-3-small"
    # Per-agent model overrides. Leave empty to fall back to LLM_MODEL.
-    LLM_MODEL_CLASSIFIER: str = ""        # _infer_floating_domain (intent routing)
+    LLM_MODEL_CLASSIFIER: str = ""        # classifier (intent routing, future use)
    LLM_MODEL_HOME_AGENT: str = ""        # home-agent (run_single_agent / stream)
    LLM_MODEL_FLOATING_AGENT: str = ""    # floating-agent (contextual chat)
    LLM_MODEL_UNIFIED_PROCESSOR: str = "" # unified-processor (agent_runner)
    LLM_MODEL_CLOUD_PROCESSOR: str = ""   # cloud-processor (agent_runner)
    LLM_MODEL_BRIEF_AGENT: str = ""            # brief-agent (home + project text briefs)
--- a/app/core/deep_agent.py
+++ b/app/core/deep_agent.py
@@ -1,4 +1,4 @@
-"""Single-agent runners for home and floating chat contexts."""
+"""Single-agent runners for home and contextual chat contexts."""
 from __future__ import annotations
@@ -7,7 +7,7 @@ import logging
 import re
 from datetime import date
 from collections.abc import AsyncGenerator
-from typing import Any, Literal
+from typing import Any
 from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
 from langchain_core.tools import tool
@@ -29,9 +29,6 @@ logger = logging.getLogger(__name__)
 MAX_HISTORY_TURNS = 20
 FloatingDomainType = Literal["task", "timeline", "project", "node"]
 FloatingDomainSection = Literal["task", "timeline", "note"]
 # Mapping of core-memory language values to natural-language names for prompts.
 _LANGUAGE_NAMES: dict[str, str] = {
    "en": "English", "it": "Italian", "es": "Spanish",
@@ -354,44 +351,6 @@ For "today" / "tomorrow" queries, prefer list_tasks_due_today / list_timelines_t
 {request_context}\
 """
 _FLOATING_SYSTEM_PROMPT = """\
 You are adiuvAI's floating executive assistant.{user_identity}
 You are pinned to a specific entity (task, timeline event, project, or note) and you stay strictly within that scope.
 Be a proactive partner: anticipate the next useful action and close with a concrete suggestion or a clarifying question — but stay terse, one short paragraph at most.
 # How you work
 - Use tools before answering anything factual. Never guess.
 - Stay in the floating scope (see Request context). If the user asks something outside scope, answer briefly and suggest opening the home assistant.
 - Match the user's tone preference. Default to warm-but-direct.
 - When the user asks to remember, forget, or update something, use memory tools.
 # Filter discipline
 - Never set the `assignee` filter on list_tasks/count_tasks unless the user explicitly names a person ("Marco's tasks") or refers to themselves ("my tasks", "assigned to me", "mine").
 - The user's own name in the User profile block is for context only — it is NOT a default filter.
 - When in doubt, omit `assignee` and return the global result.
 # Output format
 Plain text only. Do NOT output XML/HTML-like tags such as <task>, <project>, <note>, <timeline>, or any bracketed-id wrappers, and do NOT output <chart> blocks — those are for the home assistant.
 # Date filtering
 {date_context}
 When filtering by date, take dueDateFrom / dueDateTo (ms epoch UTC) verbatim from the DATE CONTEXT boundary table above. Do NOT compute boundaries from now_ms yourself.
 For specific dates not listed, compute local-midnight in the user timezone and convert to UTC ms.
 # Language
 {language_instruction}
 # Known people & projects
 {relational_memory}
 # Behavioral hints
 {proactive_hints}
 # Request context
 {request_context}\
 """
 _CONTEXTUAL_SYSTEM_PROMPT = """You are adiuvAI's contextual assistant. The user is working inside the app and has opened a side chat anchored to a specific view ("current view"). Help them act on that view: recap, plan, create entities, answer questions.
 Rules:
@@ -486,19 +445,6 @@ Stay terse — your principal is a busy executive.
 {request_context}\
 """
 _FLOATING_DOMAIN_CLASSIFIER_PROMPT = (
    "You are a strict domain classifier for websocket floating requests. "
    "Return ONLY a JSON object with keys: type, id, section. "
    "Allowed type values: task, timeline, project, node. "
    "Allowed section values: task, timeline, note, or null. "
    "Rules: infer from user message intent first; do not blindly trust scope.type. "
    "If user asks tasks/timeline/notes for a project, set type=project and section accordingly. "
    "If project id is unknown but context.resolved_project_id exists, use it as id. "
    "If id is unknown, use null. "
    "No markdown, no prose, JSON only."
 )
 def _as_text(content: Any) -> str:
    if content is None:
        return ""
@@ -727,70 +673,6 @@ def _normalize_tagged_list_lines(text: str, message: str) -> str:
    return "\n".join(output_lines)
 _GENERIC_TAG_RE = re.compile(r"</?(task|project|note|timeline|chart)>", re.IGNORECASE)
 _BRACKETED_ID_RE = re.compile(r"\[(?:[0-9a-fA-F-]{8,}|[A-Za-z0-9_-]{8,})\]")
 _FLOATING_EMPTY_FALLBACK = "No results found."
 def _strip_floating_markup_fragment(text: str) -> str:
    if not text:
        return text
    cleaned = _GENERIC_TAG_RE.sub("", text)
    return _BRACKETED_ID_RE.sub("", cleaned)
 def _strip_floating_markup(text: str) -> str:
    """Ensure floating responses stay plain text with no XML-like tag wrappers."""
    if not text:
        return text
    cleaned = _strip_floating_markup_fragment(text)
    # Collapse excessive spaces introduced by tag/id removal while preserving lines.
    lines = [re.sub(r"[ \t]{2,}", " ", line).strip() for line in cleaned.splitlines()]
    return "\n".join(line for line in lines if line)
 def _fallback_from_raw_floating_text(raw_text: str) -> str:
    fallback = _strip_floating_markup_fragment(raw_text or "")
    fallback = re.sub(r"[ \t]{2,}", " ", fallback).strip()
    return fallback or _FLOATING_EMPTY_FALLBACK
 class _FloatingStreamSanitizer:
    """Streaming sanitizer that removes floating markup without buffering the full answer."""
    def __init__(self) -> None:
        self._pending = ""
    @staticmethod
    def _split_safe_boundary(text: str) -> tuple[str, str]:
        boundary = len(text)
        last_lt = text.rfind("<")
        if last_lt != -1 and ">" not in text[last_lt:]:
            boundary = min(boundary, last_lt)
        last_lb = text.rfind("[")
        if last_lb != -1 and "]" not in text[last_lb:]:
            boundary = min(boundary, last_lb)
        if boundary == len(text):
            return text, ""
        return text[:boundary], text[boundary:]
    def feed(self, chunk: str) -> str:
        combined = f"{self._pending}{chunk}"
        safe_text, self._pending = self._split_safe_boundary(combined)
        return _strip_floating_markup_fragment(safe_text)
    def finalize(self) -> str:
        # Drop dangling unfinished wrappers at the very end.
        tail = re.sub(r"<[^>\n]*$", "", self._pending)
        tail = re.sub(r"\[[^\]\n]*$", "", tail)
        self._pending = ""
        return _strip_floating_markup_fragment(tail)
 def _normalize_memory_label(path_or_label: str) -> str:
    value = path_or_label.strip()
    if value.startswith("/memories/"):
@@ -971,168 +853,6 @@ def _all_tools_for_user(user_id: str, trace_id: str | None) -> list[Any]:
    return [*_all_tools(), *_memory_tools(user_id, trace_id)]
 def _detect_domain_section(message: str) -> FloatingDomainSection | None:
    lowered = message.lower()
    if any(keyword in lowered for keyword in ["timeline", "milestone", "release", "schedule"]):
        return "timeline"
    if any(keyword in lowered for keyword in ["task", "tasks", "todo", "attivit", "azione"]):
        return "task"
    if any(keyword in lowered for keyword in ["note", "notes", "memo", "document"]):
        return "note"
    return None
 def _normalize_domain_payload(payload: dict[str, Any], fallback_id: str | None) -> dict[str, str | None]:
    type_raw = str(payload.get("type") or "").strip().lower()
    domain_type: FloatingDomainType = "task"
    if type_raw in {"task", "timeline", "project", "node"}:
        domain_type = type_raw
    id_value = payload.get("id")
    domain_id = id_value if isinstance(id_value, str) and id_value.strip() else None
    if domain_type == "project" and not domain_id:
        domain_id = fallback_id
    section_raw = payload.get("section")
    section: FloatingDomainSection | None = None
    if isinstance(section_raw, str):
        section_candidate = section_raw.strip().lower()
        if section_candidate in {"task", "timeline", "note"}:
            section = section_candidate
    if domain_type != "project":
        section = None
    return {
        "type": domain_type,
        "id": domain_id,
        "section": section,
    }
 def _parse_json_object(text: str) -> dict[str, Any] | None:
    raw = text.strip()
    if not raw:
        return None
    try:
        parsed = json.loads(raw)
        return parsed if isinstance(parsed, dict) else None
    except json.JSONDecodeError:
        pass
    match = re.search(r"\{.*\}", raw, re.DOTALL)
    if not match:
        return None
    try:
        parsed = json.loads(match.group(0))
    except json.JSONDecodeError:
        return None
    return parsed if isinstance(parsed, dict) else None
 def _infer_floating_domain_rule_based(message: str, context: dict[str, Any]) -> dict[str, str | None]:
    section = _detect_domain_section(message)
    scope = context.get("scope") if isinstance(context, dict) else None
    resolved_project_id = context.get("resolved_project_id") if isinstance(context, dict) else None
    project_id = resolved_project_id if isinstance(resolved_project_id, str) and resolved_project_id else None
    if isinstance(scope, dict):
        scope_type = str(scope.get("type") or "").strip().lower()
        scope_id = scope.get("id")
        scope_id_value = scope_id if isinstance(scope_id, str) and scope_id else None
        if scope_type in {"task", "tasks"}:
            return {"type": "task", "id": scope_id_value, "section": None}
        if scope_type in {"project", "projects"}:
            project_scope_id = scope_id_value or project_id
            return {
                "type": "project",
                "id": project_scope_id,
                "section": section,
            }
        if scope_type in {"note", "notes"}:
            return {
                "type": "node",
                "id": scope_id_value,
                "section": None,
            }
        if scope_type in {"timeline", "timelines"}:
            return {"type": "timeline", "id": scope_id_value, "section": None}
    lowered = message.lower()
    if any(keyword in lowered for keyword in ["project", "progetto", "client"]) or project_id:
        return {
            "type": "project",
            "id": project_id,
            "section": section,
        }
    if section == "timeline":
        return {"type": "timeline", "id": None, "section": None}
    if section == "note":
        return {"type": "node", "id": None, "section": None}
    return {"type": "task", "id": None, "section": None}
 async def _infer_floating_domain(message: str, context: dict[str, Any]) -> dict[str, str | None]:
    resolved_project_id = context.get("resolved_project_id") if isinstance(context, dict) else None
    project_id = resolved_project_id if isinstance(resolved_project_id, str) and resolved_project_id else None
    classifier_context = {
        "scope": context.get("scope") if isinstance(context.get("scope"), dict) else None,
        "resolved_project_id": project_id,
    }
    try:
        llm = get_agent_llm("classifier")
        classifier_messages = [
            SystemMessage(content=_FLOATING_DOMAIN_CLASSIFIER_PROMPT),
            HumanMessage(
                content=(
                    f"Message:\n{message}\n\n"
                    f"Context:\n{json.dumps(classifier_context, ensure_ascii=True)}"
                )
            ),
        ]
        lf = get_langfuse()
        _, classifier_prompt_obj = get_prompt_or_fallback(
            "floating_domain_classifier", _FLOATING_DOMAIN_CLASSIFIER_PROMPT
        )
        # Extract user/session from context for Langfuse attribution
        _debug = context.get("_debug") if isinstance(context, dict) else None
        _lf_user = (_debug or {}).get("user_id") if isinstance(_debug, dict) else None
        _lf_session = (_debug or {}).get("session_id") if isinstance(_debug, dict) else None
        with langfuse_context(user_id=_lf_user, session_id=_lf_session):
            if lf:
                with lf.start_as_current_observation(
                    as_type="generation",
                    name="floating-classifier",
                    model=model_for_agent("classifier"),
                    prompt=classifier_prompt_obj,
                    input=classifier_messages,
                ) as gen:
                    response = await llm.ainvoke(classifier_messages)
                    gen.update(output=_as_text(response.content), usage_details=extract_usage(response))
            else:
                response = await llm.ainvoke(classifier_messages)
        parsed = _parse_json_object(_as_text(response.content))
        if parsed is not None:
            domain = _normalize_domain_payload(parsed, project_id)
            logger.info(
                "deep_agent: floating_domain_classified type=%s id=%s section=%s",
                domain.get("type"),
                domain.get("id"),
                domain.get("section"),
            )
            return domain
        logger.warning("deep_agent: floating_domain classifier returned non-json output")
    except Exception as exc:
        logger.warning("deep_agent: floating_domain classifier failed: %s", exc)
    return _infer_floating_domain_rule_based(message, context)
 def _history_to_messages(history: list[dict[str, str]] | None) -> list[Any]:
    if not history:
        return []
@@ -1461,25 +1181,6 @@ async def run_home(user_id: str, message: str, context: dict[str, Any]) -> str:
    return _normalize_tagged_list_lines(response, message)
 async def run_floating(user_id: str, message: str, context: dict[str, Any]) -> tuple[str, dict[str, str | None]]:
    prepared_context = await _prepare_context(message, context)
    domain = await _infer_floating_domain(message, prepared_context)
    system_prompt, langfuse_prompt = _build_system_prompt("floating_system", _FLOATING_SYSTEM_PROMPT, prepared_context)
    response = await _run_single_agent(
        user_id=user_id,
        system_prompt=system_prompt,
        message=message,
        context=prepared_context,
        langfuse_prompt=langfuse_prompt,
        agent_name="floating-agent",
        conversation_history=context.get("conversation_history"),
    )
    sanitized = _strip_floating_markup(response)
    if not sanitized and response:
        sanitized = _fallback_from_raw_floating_text(response)
    return sanitized, domain
 async def run_home_stream(
    user_id: str,
    message: str,
@@ -1526,71 +1227,6 @@ async def run_home_stream(
        yield "token", normalized
 async def run_floating_stream(
    user_id: str,
    message: str,
    context: dict[str, Any],
 ) -> AsyncGenerator[tuple[str, Any], None]:
    prepared_context = await _prepare_context(message, context)
    domain = await _infer_floating_domain(message, prepared_context)
    yield "floating_domain", domain
    brief_mode: bool = bool(context.get("brief_mode"))
    briefing_context_text: str = str(context.get("briefing_context") or "").strip()
    if brief_mode and briefing_context_text:
        # Stage 2: inject briefing as ground truth context.
        # Pre-substitute {briefing_context} in the template (handles both Langfuse {{}} and fallback {})
        # before compile_prompt sees the remaining standard variables.
        template, langfuse_prompt = get_prompt_or_fallback(
            "task_brief_followup_system",
            _TASK_BRIEF_FOLLOWUP_SYSTEM_PROMPT,
        )
        system_prompt = compile_prompt(
            template, langfuse_prompt,
            date_context=_datetime_context_injection(prepared_context).strip(),
            language_instruction=_language_instruction(prepared_context).strip(),
            user_identity=_user_identity_injection(prepared_context).strip(),
            relational_memory=_relational_memory_injection(prepared_context).strip(),
            proactive_hints=_proactive_hints_injection(prepared_context).strip(),
            request_context=_request_context_block(prepared_context),
            briefing_context=briefing_context_text,
        )
    else:
        system_prompt, langfuse_prompt = _build_system_prompt("floating_system", _FLOATING_SYSTEM_PROMPT, prepared_context)
    sanitizer = _FloatingStreamSanitizer()
    emitted_sanitized = False
    raw_chunks: list[str] = []
    async for event in _run_single_agent_stream(
        user_id=user_id,
        system_prompt=system_prompt,
        message=message,
        context=prepared_context,
        langfuse_prompt=langfuse_prompt,
        agent_name="floating-agent",
        conversation_history=context.get("conversation_history"),
    ):
        event_type, data = event
        if event_type != "token":
            yield event
            continue
        raw_chunk = str(data or "")
        raw_chunks.append(raw_chunk)
        sanitized_chunk = sanitizer.feed(raw_chunk)
        if sanitized_chunk:
            emitted_sanitized = True
            yield "token", sanitized_chunk
    tail = sanitizer.finalize()
    if tail:
        emitted_sanitized = True
        yield "token", tail
    if not emitted_sanitized and raw_chunks:
        yield "token", _fallback_from_raw_floating_text("".join(raw_chunks))
 async def run_contextual_stream(
    user_id: str,
    message: str,
@@ -1599,8 +1235,8 @@ async def run_contextual_stream(
 ) -> AsyncGenerator[tuple[str, Any], None]:
    """Run the contextual agent for a single user turn.
-    Mirrors run_floating_stream's plumbing but injects the rendered scope
+    Injects the rendered scope block into the system prompt and exposes
-    block into the system prompt and exposes the contextual tool set.
+    the contextual tool set.
    Note-edit tools (propose_note_edit) are intentionally excluded.
    *context contract*: callers MUST include ``context["_debug"]["session_id"]``
--- a/app/core/llm.py
+++ b/app/core/llm.py
@@ -103,7 +103,6 @@ def get_llm(
 _AGENT_MODEL_SETTINGS: dict[str, Callable[[], str]] = {
    "classifier":          lambda: settings.LLM_MODEL_CLASSIFIER or settings.LLM_MODEL,
    "home-agent":          lambda: settings.LLM_MODEL_HOME_AGENT or settings.LLM_MODEL,
    "floating-agent":      lambda: settings.LLM_MODEL_FLOATING_AGENT or settings.LLM_MODEL,
    "unified-processor":   lambda: settings.LLM_MODEL_UNIFIED_PROCESSOR or settings.LLM_MODEL,
    "cloud-processor":     lambda: settings.LLM_MODEL_CLOUD_PROCESSOR or settings.LLM_MODEL,
    "brief-agent":         lambda: settings.LLM_MODEL_BRIEF_AGENT or settings.LLM_MODEL,