fix: normalize home task/timeline responses to tag-only lines
This commit is contained in:
@@ -5,6 +5,7 @@ from __future__ import annotations
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from datetime import date
|
||||
from collections.abc import AsyncGenerator
|
||||
from typing import Any, Literal
|
||||
|
||||
@@ -29,8 +30,12 @@ _HOME_SINGLE_AGENT_SYSTEM = (
|
||||
"Always use tools for factual data retrieval before answering. "
|
||||
"When the user asks to remember, forget, or update what you know about them, use memory tools. "
|
||||
"If context.context.resolved_project_id exists, use it as project_id for scoped list calls. "
|
||||
"Return markdown and embed inline tags when relevant: <project>[ids]</project>, <task>[ids]</task>, "
|
||||
"<note>[ids]</note>, <timeline>[ids]</timeline>, <chart>{json}</chart>."
|
||||
"Return markdown and use tags when relevant: <project>[ids]</project>, <task>[ids]</task>, "
|
||||
"<note>[ids]</note>, <timeline>[ids]</timeline>, <chart>{json}</chart>. "
|
||||
"When listing tasks or timelines, each id tag must be on its own line with no prefix/suffix text. "
|
||||
"Never put titles, priorities, or dates on the same line as <task> or <timeline> tags. "
|
||||
"For questions about upcoming timelines (e.g. 'prossimi eventi'), include only future items in the current month unless the user asks a different range. "
|
||||
"For upcoming tasks, after tag lines add a short recommendation based on due date and priority."
|
||||
)
|
||||
|
||||
_FLOATING_SINGLE_AGENT_SYSTEM = (
|
||||
@@ -136,6 +141,75 @@ def _context_for_model(context: dict[str, Any]) -> dict[str, Any]:
|
||||
return sanitized
|
||||
|
||||
|
||||
_TAG_LINE_RE = re.compile(r"<(task|timeline)>\[[^\]]+\]</\1>")
|
||||
_TIMELINE_DMY_RE = re.compile(r"(?P<d>\d{2})/(?P<m>\d{2})/(?P<y>\d{4})")
|
||||
|
||||
|
||||
def _is_upcoming_timeline_query(message: str) -> bool:
|
||||
lowered = message.lower()
|
||||
has_upcoming = "prossim" in lowered or "upcoming" in lowered or "next" in lowered
|
||||
has_timeline_topic = any(
|
||||
token in lowered
|
||||
for token in ("event", "evento", "eventi", "timeline", "milestone", "scaden")
|
||||
)
|
||||
return has_upcoming and has_timeline_topic
|
||||
|
||||
|
||||
def _timeline_date_in_current_month_or_future(dmy: str) -> bool:
|
||||
match = _TIMELINE_DMY_RE.search(dmy)
|
||||
if not match:
|
||||
return True
|
||||
try:
|
||||
parsed = date(
|
||||
int(match.group("y")),
|
||||
int(match.group("m")),
|
||||
int(match.group("d")),
|
||||
)
|
||||
except ValueError:
|
||||
return True
|
||||
|
||||
today = date.today()
|
||||
return parsed >= today and parsed.year == today.year and parsed.month == today.month
|
||||
|
||||
|
||||
def _normalize_tagged_list_lines(text: str, message: str) -> str:
|
||||
if not text:
|
||||
return text
|
||||
|
||||
upcoming_timeline_only = _is_upcoming_timeline_query(message)
|
||||
output_lines: list[str] = []
|
||||
|
||||
for line in text.splitlines():
|
||||
matches = list(_TAG_LINE_RE.finditer(line))
|
||||
if not matches:
|
||||
output_lines.append(line)
|
||||
continue
|
||||
|
||||
had_non_tag_text = _TAG_LINE_RE.sub("", line).strip(" -\t0123456789.*:)")
|
||||
if not had_non_tag_text and len(matches) == 1:
|
||||
tag_text = matches[0].group(0)
|
||||
if (
|
||||
upcoming_timeline_only
|
||||
and "<timeline>" in tag_text
|
||||
and not _timeline_date_in_current_month_or_future(line)
|
||||
):
|
||||
continue
|
||||
output_lines.append(tag_text)
|
||||
continue
|
||||
|
||||
for match in matches:
|
||||
tag_text = match.group(0)
|
||||
if (
|
||||
upcoming_timeline_only
|
||||
and "<timeline>" in tag_text
|
||||
and not _timeline_date_in_current_month_or_future(line)
|
||||
):
|
||||
continue
|
||||
output_lines.append(tag_text)
|
||||
|
||||
return "\n".join(output_lines)
|
||||
|
||||
|
||||
def _normalize_memory_label(path_or_label: str) -> str:
|
||||
value = path_or_label.strip()
|
||||
if value.startswith("/memories/"):
|
||||
@@ -475,12 +549,13 @@ async def _run_single_agent_stream(
|
||||
|
||||
async def run_home(user_id: str, message: str, context: dict[str, Any]) -> str:
|
||||
prepared_context = await _prepare_context(message, context)
|
||||
return await _run_single_agent(
|
||||
response = await _run_single_agent(
|
||||
user_id=user_id,
|
||||
system_prompt=_HOME_SINGLE_AGENT_SYSTEM,
|
||||
message=message,
|
||||
context=prepared_context,
|
||||
)
|
||||
return _normalize_tagged_list_lines(response, message)
|
||||
|
||||
|
||||
async def run_floating(user_id: str, message: str, context: dict[str, Any]) -> tuple[str, str]:
|
||||
@@ -501,13 +576,22 @@ async def run_home_stream(
|
||||
context: dict[str, Any],
|
||||
) -> AsyncGenerator[tuple[str, Any], None]:
|
||||
prepared_context = await _prepare_context(message, context)
|
||||
text_chunks: list[str] = []
|
||||
async for event in _run_single_agent_stream(
|
||||
user_id=user_id,
|
||||
system_prompt=_HOME_SINGLE_AGENT_SYSTEM,
|
||||
message=message,
|
||||
context=prepared_context,
|
||||
):
|
||||
yield event
|
||||
event_type, data = event
|
||||
if event_type != "token":
|
||||
yield event
|
||||
continue
|
||||
text_chunks.append(str(data or ""))
|
||||
|
||||
normalized = _normalize_tagged_list_lines("".join(text_chunks), message)
|
||||
if normalized:
|
||||
yield "token", normalized
|
||||
|
||||
|
||||
async def run_floating_stream(
|
||||
|
||||
@@ -2,13 +2,14 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, timedelta
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from langchain_core.messages import AIMessage, ToolMessage
|
||||
|
||||
from app.core.deep_agent import run_floating_stream, run_home
|
||||
from app.core.deep_agent import _normalize_tagged_list_lines, run_floating_stream, run_home
|
||||
|
||||
|
||||
class _FakeTool:
|
||||
@@ -79,3 +80,39 @@ async def test_run_floating_stream_emits_domain_then_tokens_with_mocked_tool_res
|
||||
assert events[0] == ("floating_domain", "timelines")
|
||||
assert ("token", "stream-") in events
|
||||
assert ("token", "ok") in events
|
||||
|
||||
|
||||
def test_normalize_tagged_list_lines_rewrites_mixed_task_lines_to_tag_only_lines():
|
||||
raw = (
|
||||
"Certo!\n\n"
|
||||
"1. **Task A** — priorita high <task>[task-1]</task>\n"
|
||||
"2. **Task B** — priorita medium <task>[task-2]</task>\n"
|
||||
)
|
||||
|
||||
out = _normalize_tagged_list_lines(raw, "quali sono le prossime attivita?")
|
||||
|
||||
assert "<task>[task-1]</task>" in out
|
||||
assert "<task>[task-2]</task>" in out
|
||||
assert "Task A" not in out
|
||||
assert "Task B" not in out
|
||||
|
||||
|
||||
def test_normalize_tagged_list_lines_filters_upcoming_timeline_query_to_current_month_future_only():
|
||||
today = date.today()
|
||||
tomorrow = today + timedelta(days=1)
|
||||
yesterday = today - timedelta(days=1)
|
||||
next_month = (today.replace(day=28) + timedelta(days=5)).replace(day=1)
|
||||
|
||||
raw = "\n".join(
|
||||
[
|
||||
f"- Milestone old — {yesterday.strftime('%d/%m/%Y')} <timeline>[tl-old]</timeline>",
|
||||
f"- Milestone next — {tomorrow.strftime('%d/%m/%Y')} <timeline>[tl-next]</timeline>",
|
||||
f"- Milestone future — {next_month.strftime('%d/%m/%Y')} <timeline>[tl-future]</timeline>",
|
||||
]
|
||||
)
|
||||
|
||||
out = _normalize_tagged_list_lines(raw, "invece i miei eventi prossimi?")
|
||||
|
||||
assert "<timeline>[tl-next]</timeline>" in out
|
||||
assert "<timeline>[tl-old]</timeline>" not in out
|
||||
assert "<timeline>[tl-future]</timeline>" not in out
|
||||
|
||||
Reference in New Issue
Block a user