refactor(contextual): drop floating WS frame, runner, and prompt fallback
contextual_request + contextual_scope_update are the only WS flows for ad-hoc contextual chat now. Floating system prompt constant removed; Langfuse 'floating_system' is deleted in a separate manual step. Also removes floating-agent LLM slot from llm.py and the associated LLM_MODEL_FLOATING_AGENT setting entry.
This commit is contained in:
@@ -44,7 +44,7 @@ from app.config.settings import settings
|
|||||||
from app.core.agent_runner import trigger_pending_runs
|
from app.core.agent_runner import trigger_pending_runs
|
||||||
from app.core.agent_session_buffer import session_buffer
|
from app.core.agent_session_buffer import session_buffer
|
||||||
from app.core.brief_agent import run_home_brief, run_project_brief
|
from app.core.brief_agent import run_home_brief, run_project_brief
|
||||||
from app.core.deep_agent import run_contextual_stream, run_floating_stream, run_home_stream, run_task_brief_research_stream
|
from app.core.deep_agent import run_contextual_stream, run_home_stream, run_task_brief_research_stream
|
||||||
from app.core.output_formatter import extract_canvas_block
|
from app.core.output_formatter import extract_canvas_block
|
||||||
from app.core.device_manager import device_manager
|
from app.core.device_manager import device_manager
|
||||||
from app.core.memory_middleware import MemoryMiddleware
|
from app.core.memory_middleware import MemoryMiddleware
|
||||||
@@ -161,11 +161,6 @@ async def _message_loop(websocket: WebSocket, user_id: str) -> None:
|
|||||||
_handle_home_request(websocket, user_id, frame)
|
_handle_home_request(websocket, user_id, frame)
|
||||||
)
|
)
|
||||||
|
|
||||||
elif frame_type == WsFrameType.floating_request:
|
|
||||||
asyncio.create_task(
|
|
||||||
_handle_floating_request(websocket, user_id, frame)
|
|
||||||
)
|
|
||||||
|
|
||||||
elif frame_type == WsFrameType.brief_request:
|
elif frame_type == WsFrameType.brief_request:
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
_handle_brief_request(websocket, user_id, frame)
|
_handle_brief_request(websocket, user_id, frame)
|
||||||
@@ -301,76 +296,6 @@ async def _handle_home_request(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
async def _handle_floating_request(
|
|
||||||
websocket: WebSocket,
|
|
||||||
user_id: str,
|
|
||||||
frame: dict,
|
|
||||||
) -> None:
|
|
||||||
"""Handle a floating_request frame — streams FloatingFormatter output back on the socket."""
|
|
||||||
request_id = frame.get("request_id") or str(uuid4())
|
|
||||||
message: str = frame.get("message", "")
|
|
||||||
session_id: str = frame.get("session_id") or str(uuid4())
|
|
||||||
scope: dict = frame.get("scope", {})
|
|
||||||
logger.info(
|
|
||||||
"device_ws: floating_request_start user=%s req=%s session=%s scope=%s msg=%s",
|
|
||||||
user_id,
|
|
||||||
request_id,
|
|
||||||
session_id,
|
|
||||||
json.dumps(scope, ensure_ascii=True)[:200],
|
|
||||||
message[:200],
|
|
||||||
)
|
|
||||||
|
|
||||||
# ── Memory: enrich context before LLM call ────────────────────────
|
|
||||||
async with async_session() as db:
|
|
||||||
memory = MemoryMiddleware(db)
|
|
||||||
memory_context = await memory.enrich_context(
|
|
||||||
user_id,
|
|
||||||
message,
|
|
||||||
trace_id=request_id,
|
|
||||||
session_id=session_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
context: dict = {
|
|
||||||
"conversation_history": frame.get("conversation_history", []),
|
|
||||||
"scope": scope,
|
|
||||||
"_debug": {"request_id": request_id, "session_id": session_id, "user_id": user_id},
|
|
||||||
"format_prefs": frame.get("format_prefs"),
|
|
||||||
**memory_context,
|
|
||||||
}
|
|
||||||
|
|
||||||
executor = await _make_ws_executor(websocket, user_id)
|
|
||||||
set_client_executor(executor)
|
|
||||||
response_chunks: list[str] = []
|
|
||||||
try:
|
|
||||||
event_stream = run_floating_stream(user_id, message, context)
|
|
||||||
formatter = StreamFormatter(request_id=request_id)
|
|
||||||
async for ws_frame in formatter.format(event_stream):
|
|
||||||
await websocket.send_text(ws_frame.model_dump_json())
|
|
||||||
if ws_frame.type == "stream_text": # type: ignore[union-attr]
|
|
||||||
response_chunks.append(ws_frame.chunk) # type: ignore[union-attr]
|
|
||||||
except Exception as exc:
|
|
||||||
logger.error(
|
|
||||||
"device_ws: floating_request failed user=%s req=%s: %s",
|
|
||||||
user_id, request_id, exc,
|
|
||||||
)
|
|
||||||
finally:
|
|
||||||
clear_client_executor()
|
|
||||||
|
|
||||||
# ── Memory: store episode after response ──────────────────────────
|
|
||||||
async with async_session() as db:
|
|
||||||
memory = MemoryMiddleware(db)
|
|
||||||
await memory.store_episode(
|
|
||||||
user_id, session_id, message, "".join(response_chunks), trace_id=request_id
|
|
||||||
)
|
|
||||||
logger.info(
|
|
||||||
"device_ws: floating_request_end user=%s req=%s session=%s response_chars=%d",
|
|
||||||
user_id,
|
|
||||||
request_id,
|
|
||||||
session_id,
|
|
||||||
len("".join(response_chunks)),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# ── v8 Contextual Sidebar Handlers ───────────────────────────────────
|
# ── v8 Contextual Sidebar Handlers ───────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -23,9 +23,8 @@ class Settings(BaseSettings):
|
|||||||
LLM_EMBED_MODEL: str = "text-embedding-3-small"
|
LLM_EMBED_MODEL: str = "text-embedding-3-small"
|
||||||
|
|
||||||
# Per-agent model overrides. Leave empty to fall back to LLM_MODEL.
|
# Per-agent model overrides. Leave empty to fall back to LLM_MODEL.
|
||||||
LLM_MODEL_CLASSIFIER: str = "" # _infer_floating_domain (intent routing)
|
LLM_MODEL_CLASSIFIER: str = "" # classifier (intent routing, future use)
|
||||||
LLM_MODEL_HOME_AGENT: str = "" # home-agent (run_single_agent / stream)
|
LLM_MODEL_HOME_AGENT: str = "" # home-agent (run_single_agent / stream)
|
||||||
LLM_MODEL_FLOATING_AGENT: str = "" # floating-agent (contextual chat)
|
|
||||||
LLM_MODEL_UNIFIED_PROCESSOR: str = "" # unified-processor (agent_runner)
|
LLM_MODEL_UNIFIED_PROCESSOR: str = "" # unified-processor (agent_runner)
|
||||||
LLM_MODEL_CLOUD_PROCESSOR: str = "" # cloud-processor (agent_runner)
|
LLM_MODEL_CLOUD_PROCESSOR: str = "" # cloud-processor (agent_runner)
|
||||||
LLM_MODEL_BRIEF_AGENT: str = "" # brief-agent (home + project text briefs)
|
LLM_MODEL_BRIEF_AGENT: str = "" # brief-agent (home + project text briefs)
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
"""Single-agent runners for home and floating chat contexts."""
|
"""Single-agent runners for home and contextual chat contexts."""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
@@ -7,7 +7,7 @@ import logging
|
|||||||
import re
|
import re
|
||||||
from datetime import date
|
from datetime import date
|
||||||
from collections.abc import AsyncGenerator
|
from collections.abc import AsyncGenerator
|
||||||
from typing import Any, Literal
|
from typing import Any
|
||||||
|
|
||||||
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
|
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
@@ -29,9 +29,6 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
MAX_HISTORY_TURNS = 20
|
MAX_HISTORY_TURNS = 20
|
||||||
|
|
||||||
FloatingDomainType = Literal["task", "timeline", "project", "node"]
|
|
||||||
FloatingDomainSection = Literal["task", "timeline", "note"]
|
|
||||||
|
|
||||||
# Mapping of core-memory language values to natural-language names for prompts.
|
# Mapping of core-memory language values to natural-language names for prompts.
|
||||||
_LANGUAGE_NAMES: dict[str, str] = {
|
_LANGUAGE_NAMES: dict[str, str] = {
|
||||||
"en": "English", "it": "Italian", "es": "Spanish",
|
"en": "English", "it": "Italian", "es": "Spanish",
|
||||||
@@ -354,44 +351,6 @@ For "today" / "tomorrow" queries, prefer list_tasks_due_today / list_timelines_t
|
|||||||
{request_context}\
|
{request_context}\
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_FLOATING_SYSTEM_PROMPT = """\
|
|
||||||
You are adiuvAI's floating executive assistant.{user_identity}
|
|
||||||
You are pinned to a specific entity (task, timeline event, project, or note) and you stay strictly within that scope.
|
|
||||||
Be a proactive partner: anticipate the next useful action and close with a concrete suggestion or a clarifying question — but stay terse, one short paragraph at most.
|
|
||||||
|
|
||||||
# How you work
|
|
||||||
- Use tools before answering anything factual. Never guess.
|
|
||||||
- Stay in the floating scope (see Request context). If the user asks something outside scope, answer briefly and suggest opening the home assistant.
|
|
||||||
- Match the user's tone preference. Default to warm-but-direct.
|
|
||||||
- When the user asks to remember, forget, or update something, use memory tools.
|
|
||||||
|
|
||||||
# Filter discipline
|
|
||||||
- Never set the `assignee` filter on list_tasks/count_tasks unless the user explicitly names a person ("Marco's tasks") or refers to themselves ("my tasks", "assigned to me", "mine").
|
|
||||||
- The user's own name in the User profile block is for context only — it is NOT a default filter.
|
|
||||||
- When in doubt, omit `assignee` and return the global result.
|
|
||||||
|
|
||||||
# Output format
|
|
||||||
Plain text only. Do NOT output XML/HTML-like tags such as <task>, <project>, <note>, <timeline>, or any bracketed-id wrappers, and do NOT output <chart> blocks — those are for the home assistant.
|
|
||||||
|
|
||||||
# Date filtering
|
|
||||||
{date_context}
|
|
||||||
|
|
||||||
When filtering by date, take dueDateFrom / dueDateTo (ms epoch UTC) verbatim from the DATE CONTEXT boundary table above. Do NOT compute boundaries from now_ms yourself.
|
|
||||||
For specific dates not listed, compute local-midnight in the user timezone and convert to UTC ms.
|
|
||||||
|
|
||||||
# Language
|
|
||||||
{language_instruction}
|
|
||||||
|
|
||||||
# Known people & projects
|
|
||||||
{relational_memory}
|
|
||||||
|
|
||||||
# Behavioral hints
|
|
||||||
{proactive_hints}
|
|
||||||
|
|
||||||
# Request context
|
|
||||||
{request_context}\
|
|
||||||
"""
|
|
||||||
|
|
||||||
_CONTEXTUAL_SYSTEM_PROMPT = """You are adiuvAI's contextual assistant. The user is working inside the app and has opened a side chat anchored to a specific view ("current view"). Help them act on that view: recap, plan, create entities, answer questions.
|
_CONTEXTUAL_SYSTEM_PROMPT = """You are adiuvAI's contextual assistant. The user is working inside the app and has opened a side chat anchored to a specific view ("current view"). Help them act on that view: recap, plan, create entities, answer questions.
|
||||||
|
|
||||||
Rules:
|
Rules:
|
||||||
@@ -486,19 +445,6 @@ Stay terse — your principal is a busy executive.
|
|||||||
{request_context}\
|
{request_context}\
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_FLOATING_DOMAIN_CLASSIFIER_PROMPT = (
|
|
||||||
"You are a strict domain classifier for websocket floating requests. "
|
|
||||||
"Return ONLY a JSON object with keys: type, id, section. "
|
|
||||||
"Allowed type values: task, timeline, project, node. "
|
|
||||||
"Allowed section values: task, timeline, note, or null. "
|
|
||||||
"Rules: infer from user message intent first; do not blindly trust scope.type. "
|
|
||||||
"If user asks tasks/timeline/notes for a project, set type=project and section accordingly. "
|
|
||||||
"If project id is unknown but context.resolved_project_id exists, use it as id. "
|
|
||||||
"If id is unknown, use null. "
|
|
||||||
"No markdown, no prose, JSON only."
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _as_text(content: Any) -> str:
|
def _as_text(content: Any) -> str:
|
||||||
if content is None:
|
if content is None:
|
||||||
return ""
|
return ""
|
||||||
@@ -727,70 +673,6 @@ def _normalize_tagged_list_lines(text: str, message: str) -> str:
|
|||||||
return "\n".join(output_lines)
|
return "\n".join(output_lines)
|
||||||
|
|
||||||
|
|
||||||
_GENERIC_TAG_RE = re.compile(r"</?(task|project|note|timeline|chart)>", re.IGNORECASE)
|
|
||||||
_BRACKETED_ID_RE = re.compile(r"\[(?:[0-9a-fA-F-]{8,}|[A-Za-z0-9_-]{8,})\]")
|
|
||||||
_FLOATING_EMPTY_FALLBACK = "No results found."
|
|
||||||
|
|
||||||
|
|
||||||
def _strip_floating_markup_fragment(text: str) -> str:
|
|
||||||
if not text:
|
|
||||||
return text
|
|
||||||
cleaned = _GENERIC_TAG_RE.sub("", text)
|
|
||||||
return _BRACKETED_ID_RE.sub("", cleaned)
|
|
||||||
|
|
||||||
|
|
||||||
def _strip_floating_markup(text: str) -> str:
|
|
||||||
"""Ensure floating responses stay plain text with no XML-like tag wrappers."""
|
|
||||||
if not text:
|
|
||||||
return text
|
|
||||||
|
|
||||||
cleaned = _strip_floating_markup_fragment(text)
|
|
||||||
# Collapse excessive spaces introduced by tag/id removal while preserving lines.
|
|
||||||
lines = [re.sub(r"[ \t]{2,}", " ", line).strip() for line in cleaned.splitlines()]
|
|
||||||
return "\n".join(line for line in lines if line)
|
|
||||||
|
|
||||||
|
|
||||||
def _fallback_from_raw_floating_text(raw_text: str) -> str:
|
|
||||||
fallback = _strip_floating_markup_fragment(raw_text or "")
|
|
||||||
fallback = re.sub(r"[ \t]{2,}", " ", fallback).strip()
|
|
||||||
return fallback or _FLOATING_EMPTY_FALLBACK
|
|
||||||
|
|
||||||
|
|
||||||
class _FloatingStreamSanitizer:
|
|
||||||
"""Streaming sanitizer that removes floating markup without buffering the full answer."""
|
|
||||||
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self._pending = ""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _split_safe_boundary(text: str) -> tuple[str, str]:
|
|
||||||
boundary = len(text)
|
|
||||||
|
|
||||||
last_lt = text.rfind("<")
|
|
||||||
if last_lt != -1 and ">" not in text[last_lt:]:
|
|
||||||
boundary = min(boundary, last_lt)
|
|
||||||
|
|
||||||
last_lb = text.rfind("[")
|
|
||||||
if last_lb != -1 and "]" not in text[last_lb:]:
|
|
||||||
boundary = min(boundary, last_lb)
|
|
||||||
|
|
||||||
if boundary == len(text):
|
|
||||||
return text, ""
|
|
||||||
return text[:boundary], text[boundary:]
|
|
||||||
|
|
||||||
def feed(self, chunk: str) -> str:
|
|
||||||
combined = f"{self._pending}{chunk}"
|
|
||||||
safe_text, self._pending = self._split_safe_boundary(combined)
|
|
||||||
return _strip_floating_markup_fragment(safe_text)
|
|
||||||
|
|
||||||
def finalize(self) -> str:
|
|
||||||
# Drop dangling unfinished wrappers at the very end.
|
|
||||||
tail = re.sub(r"<[^>\n]*$", "", self._pending)
|
|
||||||
tail = re.sub(r"\[[^\]\n]*$", "", tail)
|
|
||||||
self._pending = ""
|
|
||||||
return _strip_floating_markup_fragment(tail)
|
|
||||||
|
|
||||||
|
|
||||||
def _normalize_memory_label(path_or_label: str) -> str:
|
def _normalize_memory_label(path_or_label: str) -> str:
|
||||||
value = path_or_label.strip()
|
value = path_or_label.strip()
|
||||||
if value.startswith("/memories/"):
|
if value.startswith("/memories/"):
|
||||||
@@ -971,168 +853,6 @@ def _all_tools_for_user(user_id: str, trace_id: str | None) -> list[Any]:
|
|||||||
return [*_all_tools(), *_memory_tools(user_id, trace_id)]
|
return [*_all_tools(), *_memory_tools(user_id, trace_id)]
|
||||||
|
|
||||||
|
|
||||||
def _detect_domain_section(message: str) -> FloatingDomainSection | None:
|
|
||||||
lowered = message.lower()
|
|
||||||
if any(keyword in lowered for keyword in ["timeline", "milestone", "release", "schedule"]):
|
|
||||||
return "timeline"
|
|
||||||
if any(keyword in lowered for keyword in ["task", "tasks", "todo", "attivit", "azione"]):
|
|
||||||
return "task"
|
|
||||||
if any(keyword in lowered for keyword in ["note", "notes", "memo", "document"]):
|
|
||||||
return "note"
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _normalize_domain_payload(payload: dict[str, Any], fallback_id: str | None) -> dict[str, str | None]:
|
|
||||||
type_raw = str(payload.get("type") or "").strip().lower()
|
|
||||||
domain_type: FloatingDomainType = "task"
|
|
||||||
if type_raw in {"task", "timeline", "project", "node"}:
|
|
||||||
domain_type = type_raw
|
|
||||||
|
|
||||||
id_value = payload.get("id")
|
|
||||||
domain_id = id_value if isinstance(id_value, str) and id_value.strip() else None
|
|
||||||
if domain_type == "project" and not domain_id:
|
|
||||||
domain_id = fallback_id
|
|
||||||
|
|
||||||
section_raw = payload.get("section")
|
|
||||||
section: FloatingDomainSection | None = None
|
|
||||||
if isinstance(section_raw, str):
|
|
||||||
section_candidate = section_raw.strip().lower()
|
|
||||||
if section_candidate in {"task", "timeline", "note"}:
|
|
||||||
section = section_candidate
|
|
||||||
|
|
||||||
if domain_type != "project":
|
|
||||||
section = None
|
|
||||||
|
|
||||||
return {
|
|
||||||
"type": domain_type,
|
|
||||||
"id": domain_id,
|
|
||||||
"section": section,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_json_object(text: str) -> dict[str, Any] | None:
|
|
||||||
raw = text.strip()
|
|
||||||
if not raw:
|
|
||||||
return None
|
|
||||||
try:
|
|
||||||
parsed = json.loads(raw)
|
|
||||||
return parsed if isinstance(parsed, dict) else None
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
match = re.search(r"\{.*\}", raw, re.DOTALL)
|
|
||||||
if not match:
|
|
||||||
return None
|
|
||||||
try:
|
|
||||||
parsed = json.loads(match.group(0))
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
return None
|
|
||||||
return parsed if isinstance(parsed, dict) else None
|
|
||||||
|
|
||||||
|
|
||||||
def _infer_floating_domain_rule_based(message: str, context: dict[str, Any]) -> dict[str, str | None]:
|
|
||||||
section = _detect_domain_section(message)
|
|
||||||
scope = context.get("scope") if isinstance(context, dict) else None
|
|
||||||
resolved_project_id = context.get("resolved_project_id") if isinstance(context, dict) else None
|
|
||||||
project_id = resolved_project_id if isinstance(resolved_project_id, str) and resolved_project_id else None
|
|
||||||
|
|
||||||
if isinstance(scope, dict):
|
|
||||||
scope_type = str(scope.get("type") or "").strip().lower()
|
|
||||||
scope_id = scope.get("id")
|
|
||||||
scope_id_value = scope_id if isinstance(scope_id, str) and scope_id else None
|
|
||||||
|
|
||||||
if scope_type in {"task", "tasks"}:
|
|
||||||
return {"type": "task", "id": scope_id_value, "section": None}
|
|
||||||
if scope_type in {"project", "projects"}:
|
|
||||||
project_scope_id = scope_id_value or project_id
|
|
||||||
return {
|
|
||||||
"type": "project",
|
|
||||||
"id": project_scope_id,
|
|
||||||
"section": section,
|
|
||||||
}
|
|
||||||
if scope_type in {"note", "notes"}:
|
|
||||||
return {
|
|
||||||
"type": "node",
|
|
||||||
"id": scope_id_value,
|
|
||||||
"section": None,
|
|
||||||
}
|
|
||||||
if scope_type in {"timeline", "timelines"}:
|
|
||||||
return {"type": "timeline", "id": scope_id_value, "section": None}
|
|
||||||
|
|
||||||
lowered = message.lower()
|
|
||||||
if any(keyword in lowered for keyword in ["project", "progetto", "client"]) or project_id:
|
|
||||||
return {
|
|
||||||
"type": "project",
|
|
||||||
"id": project_id,
|
|
||||||
"section": section,
|
|
||||||
}
|
|
||||||
if section == "timeline":
|
|
||||||
return {"type": "timeline", "id": None, "section": None}
|
|
||||||
if section == "note":
|
|
||||||
return {"type": "node", "id": None, "section": None}
|
|
||||||
return {"type": "task", "id": None, "section": None}
|
|
||||||
|
|
||||||
|
|
||||||
async def _infer_floating_domain(message: str, context: dict[str, Any]) -> dict[str, str | None]:
|
|
||||||
resolved_project_id = context.get("resolved_project_id") if isinstance(context, dict) else None
|
|
||||||
project_id = resolved_project_id if isinstance(resolved_project_id, str) and resolved_project_id else None
|
|
||||||
|
|
||||||
classifier_context = {
|
|
||||||
"scope": context.get("scope") if isinstance(context.get("scope"), dict) else None,
|
|
||||||
"resolved_project_id": project_id,
|
|
||||||
}
|
|
||||||
|
|
||||||
try:
|
|
||||||
llm = get_agent_llm("classifier")
|
|
||||||
classifier_messages = [
|
|
||||||
SystemMessage(content=_FLOATING_DOMAIN_CLASSIFIER_PROMPT),
|
|
||||||
HumanMessage(
|
|
||||||
content=(
|
|
||||||
f"Message:\n{message}\n\n"
|
|
||||||
f"Context:\n{json.dumps(classifier_context, ensure_ascii=True)}"
|
|
||||||
)
|
|
||||||
),
|
|
||||||
]
|
|
||||||
lf = get_langfuse()
|
|
||||||
_, classifier_prompt_obj = get_prompt_or_fallback(
|
|
||||||
"floating_domain_classifier", _FLOATING_DOMAIN_CLASSIFIER_PROMPT
|
|
||||||
)
|
|
||||||
|
|
||||||
# Extract user/session from context for Langfuse attribution
|
|
||||||
_debug = context.get("_debug") if isinstance(context, dict) else None
|
|
||||||
_lf_user = (_debug or {}).get("user_id") if isinstance(_debug, dict) else None
|
|
||||||
_lf_session = (_debug or {}).get("session_id") if isinstance(_debug, dict) else None
|
|
||||||
|
|
||||||
with langfuse_context(user_id=_lf_user, session_id=_lf_session):
|
|
||||||
if lf:
|
|
||||||
with lf.start_as_current_observation(
|
|
||||||
as_type="generation",
|
|
||||||
name="floating-classifier",
|
|
||||||
model=model_for_agent("classifier"),
|
|
||||||
prompt=classifier_prompt_obj,
|
|
||||||
input=classifier_messages,
|
|
||||||
) as gen:
|
|
||||||
response = await llm.ainvoke(classifier_messages)
|
|
||||||
gen.update(output=_as_text(response.content), usage_details=extract_usage(response))
|
|
||||||
else:
|
|
||||||
response = await llm.ainvoke(classifier_messages)
|
|
||||||
parsed = _parse_json_object(_as_text(response.content))
|
|
||||||
if parsed is not None:
|
|
||||||
domain = _normalize_domain_payload(parsed, project_id)
|
|
||||||
logger.info(
|
|
||||||
"deep_agent: floating_domain_classified type=%s id=%s section=%s",
|
|
||||||
domain.get("type"),
|
|
||||||
domain.get("id"),
|
|
||||||
domain.get("section"),
|
|
||||||
)
|
|
||||||
return domain
|
|
||||||
logger.warning("deep_agent: floating_domain classifier returned non-json output")
|
|
||||||
except Exception as exc:
|
|
||||||
logger.warning("deep_agent: floating_domain classifier failed: %s", exc)
|
|
||||||
|
|
||||||
return _infer_floating_domain_rule_based(message, context)
|
|
||||||
|
|
||||||
|
|
||||||
def _history_to_messages(history: list[dict[str, str]] | None) -> list[Any]:
|
def _history_to_messages(history: list[dict[str, str]] | None) -> list[Any]:
|
||||||
if not history:
|
if not history:
|
||||||
return []
|
return []
|
||||||
@@ -1461,25 +1181,6 @@ async def run_home(user_id: str, message: str, context: dict[str, Any]) -> str:
|
|||||||
return _normalize_tagged_list_lines(response, message)
|
return _normalize_tagged_list_lines(response, message)
|
||||||
|
|
||||||
|
|
||||||
async def run_floating(user_id: str, message: str, context: dict[str, Any]) -> tuple[str, dict[str, str | None]]:
|
|
||||||
prepared_context = await _prepare_context(message, context)
|
|
||||||
domain = await _infer_floating_domain(message, prepared_context)
|
|
||||||
system_prompt, langfuse_prompt = _build_system_prompt("floating_system", _FLOATING_SYSTEM_PROMPT, prepared_context)
|
|
||||||
response = await _run_single_agent(
|
|
||||||
user_id=user_id,
|
|
||||||
system_prompt=system_prompt,
|
|
||||||
message=message,
|
|
||||||
context=prepared_context,
|
|
||||||
langfuse_prompt=langfuse_prompt,
|
|
||||||
agent_name="floating-agent",
|
|
||||||
conversation_history=context.get("conversation_history"),
|
|
||||||
)
|
|
||||||
sanitized = _strip_floating_markup(response)
|
|
||||||
if not sanitized and response:
|
|
||||||
sanitized = _fallback_from_raw_floating_text(response)
|
|
||||||
return sanitized, domain
|
|
||||||
|
|
||||||
|
|
||||||
async def run_home_stream(
|
async def run_home_stream(
|
||||||
user_id: str,
|
user_id: str,
|
||||||
message: str,
|
message: str,
|
||||||
@@ -1526,71 +1227,6 @@ async def run_home_stream(
|
|||||||
yield "token", normalized
|
yield "token", normalized
|
||||||
|
|
||||||
|
|
||||||
async def run_floating_stream(
|
|
||||||
user_id: str,
|
|
||||||
message: str,
|
|
||||||
context: dict[str, Any],
|
|
||||||
) -> AsyncGenerator[tuple[str, Any], None]:
|
|
||||||
prepared_context = await _prepare_context(message, context)
|
|
||||||
domain = await _infer_floating_domain(message, prepared_context)
|
|
||||||
yield "floating_domain", domain
|
|
||||||
|
|
||||||
brief_mode: bool = bool(context.get("brief_mode"))
|
|
||||||
briefing_context_text: str = str(context.get("briefing_context") or "").strip()
|
|
||||||
|
|
||||||
if brief_mode and briefing_context_text:
|
|
||||||
# Stage 2: inject briefing as ground truth context.
|
|
||||||
# Pre-substitute {briefing_context} in the template (handles both Langfuse {{}} and fallback {})
|
|
||||||
# before compile_prompt sees the remaining standard variables.
|
|
||||||
template, langfuse_prompt = get_prompt_or_fallback(
|
|
||||||
"task_brief_followup_system",
|
|
||||||
_TASK_BRIEF_FOLLOWUP_SYSTEM_PROMPT,
|
|
||||||
)
|
|
||||||
system_prompt = compile_prompt(
|
|
||||||
template, langfuse_prompt,
|
|
||||||
date_context=_datetime_context_injection(prepared_context).strip(),
|
|
||||||
language_instruction=_language_instruction(prepared_context).strip(),
|
|
||||||
user_identity=_user_identity_injection(prepared_context).strip(),
|
|
||||||
relational_memory=_relational_memory_injection(prepared_context).strip(),
|
|
||||||
proactive_hints=_proactive_hints_injection(prepared_context).strip(),
|
|
||||||
request_context=_request_context_block(prepared_context),
|
|
||||||
briefing_context=briefing_context_text,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
system_prompt, langfuse_prompt = _build_system_prompt("floating_system", _FLOATING_SYSTEM_PROMPT, prepared_context)
|
|
||||||
sanitizer = _FloatingStreamSanitizer()
|
|
||||||
emitted_sanitized = False
|
|
||||||
raw_chunks: list[str] = []
|
|
||||||
async for event in _run_single_agent_stream(
|
|
||||||
user_id=user_id,
|
|
||||||
system_prompt=system_prompt,
|
|
||||||
message=message,
|
|
||||||
context=prepared_context,
|
|
||||||
langfuse_prompt=langfuse_prompt,
|
|
||||||
agent_name="floating-agent",
|
|
||||||
conversation_history=context.get("conversation_history"),
|
|
||||||
):
|
|
||||||
event_type, data = event
|
|
||||||
if event_type != "token":
|
|
||||||
yield event
|
|
||||||
continue
|
|
||||||
|
|
||||||
raw_chunk = str(data or "")
|
|
||||||
raw_chunks.append(raw_chunk)
|
|
||||||
sanitized_chunk = sanitizer.feed(raw_chunk)
|
|
||||||
if sanitized_chunk:
|
|
||||||
emitted_sanitized = True
|
|
||||||
yield "token", sanitized_chunk
|
|
||||||
|
|
||||||
tail = sanitizer.finalize()
|
|
||||||
if tail:
|
|
||||||
emitted_sanitized = True
|
|
||||||
yield "token", tail
|
|
||||||
|
|
||||||
if not emitted_sanitized and raw_chunks:
|
|
||||||
yield "token", _fallback_from_raw_floating_text("".join(raw_chunks))
|
|
||||||
|
|
||||||
|
|
||||||
async def run_contextual_stream(
|
async def run_contextual_stream(
|
||||||
user_id: str,
|
user_id: str,
|
||||||
message: str,
|
message: str,
|
||||||
@@ -1599,8 +1235,8 @@ async def run_contextual_stream(
|
|||||||
) -> AsyncGenerator[tuple[str, Any], None]:
|
) -> AsyncGenerator[tuple[str, Any], None]:
|
||||||
"""Run the contextual agent for a single user turn.
|
"""Run the contextual agent for a single user turn.
|
||||||
|
|
||||||
Mirrors run_floating_stream's plumbing but injects the rendered scope
|
Injects the rendered scope block into the system prompt and exposes
|
||||||
block into the system prompt and exposes the contextual tool set.
|
the contextual tool set.
|
||||||
Note-edit tools (propose_note_edit) are intentionally excluded.
|
Note-edit tools (propose_note_edit) are intentionally excluded.
|
||||||
|
|
||||||
*context contract*: callers MUST include ``context["_debug"]["session_id"]``
|
*context contract*: callers MUST include ``context["_debug"]["session_id"]``
|
||||||
|
|||||||
@@ -103,7 +103,6 @@ def get_llm(
|
|||||||
_AGENT_MODEL_SETTINGS: dict[str, Callable[[], str]] = {
|
_AGENT_MODEL_SETTINGS: dict[str, Callable[[], str]] = {
|
||||||
"classifier": lambda: settings.LLM_MODEL_CLASSIFIER or settings.LLM_MODEL,
|
"classifier": lambda: settings.LLM_MODEL_CLASSIFIER or settings.LLM_MODEL,
|
||||||
"home-agent": lambda: settings.LLM_MODEL_HOME_AGENT or settings.LLM_MODEL,
|
"home-agent": lambda: settings.LLM_MODEL_HOME_AGENT or settings.LLM_MODEL,
|
||||||
"floating-agent": lambda: settings.LLM_MODEL_FLOATING_AGENT or settings.LLM_MODEL,
|
|
||||||
"unified-processor": lambda: settings.LLM_MODEL_UNIFIED_PROCESSOR or settings.LLM_MODEL,
|
"unified-processor": lambda: settings.LLM_MODEL_UNIFIED_PROCESSOR or settings.LLM_MODEL,
|
||||||
"cloud-processor": lambda: settings.LLM_MODEL_CLOUD_PROCESSOR or settings.LLM_MODEL,
|
"cloud-processor": lambda: settings.LLM_MODEL_CLOUD_PROCESSOR or settings.LLM_MODEL,
|
||||||
"brief-agent": lambda: settings.LLM_MODEL_BRIEF_AGENT or settings.LLM_MODEL,
|
"brief-agent": lambda: settings.LLM_MODEL_BRIEF_AGENT or settings.LLM_MODEL,
|
||||||
|
|||||||
Reference in New Issue
Block a user