fix: normalize home task/timeline responses to tag-only lines

This commit is contained in:
2026-03-13 12:16:58 +01:00
parent 9bd629cb59
commit 13fd8677c1
2 changed files with 126 additions and 5 deletions

View File

@@ -5,6 +5,7 @@ from __future__ import annotations
import json
import logging
import re
from datetime import date
from collections.abc import AsyncGenerator
from typing import Any, Literal
@@ -29,8 +30,12 @@ _HOME_SINGLE_AGENT_SYSTEM = (
"Always use tools for factual data retrieval before answering. "
"When the user asks to remember, forget, or update what you know about them, use memory tools. "
"If context.context.resolved_project_id exists, use it as project_id for scoped list calls. "
"Return markdown and embed inline tags when relevant: <project>[ids]</project>, <task>[ids]</task>, "
"<note>[ids]</note>, <timeline>[ids]</timeline>, <chart>{json}</chart>."
"Return markdown and use tags when relevant: <project>[ids]</project>, <task>[ids]</task>, "
"<note>[ids]</note>, <timeline>[ids]</timeline>, <chart>{json}</chart>. "
"When listing tasks or timelines, each id tag must be on its own line with no prefix/suffix text. "
"Never put titles, priorities, or dates on the same line as <task> or <timeline> tags. "
"For questions about upcoming timelines (e.g. 'prossimi eventi'), include only future items in the current month unless the user asks a different range. "
"For upcoming tasks, after tag lines add a short recommendation based on due date and priority."
)
_FLOATING_SINGLE_AGENT_SYSTEM = (
@@ -136,6 +141,75 @@ def _context_for_model(context: dict[str, Any]) -> dict[str, Any]:
return sanitized
_TAG_LINE_RE = re.compile(r"<(task|timeline)>\[[^\]]+\]</\1>")
_TIMELINE_DMY_RE = re.compile(r"(?P<d>\d{2})/(?P<m>\d{2})/(?P<y>\d{4})")
def _is_upcoming_timeline_query(message: str) -> bool:
lowered = message.lower()
has_upcoming = "prossim" in lowered or "upcoming" in lowered or "next" in lowered
has_timeline_topic = any(
token in lowered
for token in ("event", "evento", "eventi", "timeline", "milestone", "scaden")
)
return has_upcoming and has_timeline_topic
def _timeline_date_in_current_month_or_future(dmy: str) -> bool:
match = _TIMELINE_DMY_RE.search(dmy)
if not match:
return True
try:
parsed = date(
int(match.group("y")),
int(match.group("m")),
int(match.group("d")),
)
except ValueError:
return True
today = date.today()
return parsed >= today and parsed.year == today.year and parsed.month == today.month
def _normalize_tagged_list_lines(text: str, message: str) -> str:
if not text:
return text
upcoming_timeline_only = _is_upcoming_timeline_query(message)
output_lines: list[str] = []
for line in text.splitlines():
matches = list(_TAG_LINE_RE.finditer(line))
if not matches:
output_lines.append(line)
continue
had_non_tag_text = _TAG_LINE_RE.sub("", line).strip(" -\t0123456789.*:)")
if not had_non_tag_text and len(matches) == 1:
tag_text = matches[0].group(0)
if (
upcoming_timeline_only
and "<timeline>" in tag_text
and not _timeline_date_in_current_month_or_future(line)
):
continue
output_lines.append(tag_text)
continue
for match in matches:
tag_text = match.group(0)
if (
upcoming_timeline_only
and "<timeline>" in tag_text
and not _timeline_date_in_current_month_or_future(line)
):
continue
output_lines.append(tag_text)
return "\n".join(output_lines)
def _normalize_memory_label(path_or_label: str) -> str:
value = path_or_label.strip()
if value.startswith("/memories/"):
@@ -475,12 +549,13 @@ async def _run_single_agent_stream(
async def run_home(user_id: str, message: str, context: dict[str, Any]) -> str:
prepared_context = await _prepare_context(message, context)
return await _run_single_agent(
response = await _run_single_agent(
user_id=user_id,
system_prompt=_HOME_SINGLE_AGENT_SYSTEM,
message=message,
context=prepared_context,
)
return _normalize_tagged_list_lines(response, message)
async def run_floating(user_id: str, message: str, context: dict[str, Any]) -> tuple[str, str]:
@@ -501,13 +576,22 @@ async def run_home_stream(
context: dict[str, Any],
) -> AsyncGenerator[tuple[str, Any], None]:
prepared_context = await _prepare_context(message, context)
text_chunks: list[str] = []
async for event in _run_single_agent_stream(
user_id=user_id,
system_prompt=_HOME_SINGLE_AGENT_SYSTEM,
message=message,
context=prepared_context,
):
yield event
event_type, data = event
if event_type != "token":
yield event
continue
text_chunks.append(str(data or ""))
normalized = _normalize_tagged_list_lines("".join(text_chunks), message)
if normalized:
yield "token", normalized
async def run_floating_stream(

View File

@@ -2,13 +2,14 @@
from __future__ import annotations
from datetime import date, timedelta
from types import SimpleNamespace
from unittest.mock import patch
import pytest
from langchain_core.messages import AIMessage, ToolMessage
from app.core.deep_agent import run_floating_stream, run_home
from app.core.deep_agent import _normalize_tagged_list_lines, run_floating_stream, run_home
class _FakeTool:
@@ -79,3 +80,39 @@ async def test_run_floating_stream_emits_domain_then_tokens_with_mocked_tool_res
assert events[0] == ("floating_domain", "timelines")
assert ("token", "stream-") in events
assert ("token", "ok") in events
def test_normalize_tagged_list_lines_rewrites_mixed_task_lines_to_tag_only_lines():
raw = (
"Certo!\n\n"
"1. **Task A** — priorita high <task>[task-1]</task>\n"
"2. **Task B** — priorita medium <task>[task-2]</task>\n"
)
out = _normalize_tagged_list_lines(raw, "quali sono le prossime attivita?")
assert "<task>[task-1]</task>" in out
assert "<task>[task-2]</task>" in out
assert "Task A" not in out
assert "Task B" not in out
def test_normalize_tagged_list_lines_filters_upcoming_timeline_query_to_current_month_future_only():
today = date.today()
tomorrow = today + timedelta(days=1)
yesterday = today - timedelta(days=1)
next_month = (today.replace(day=28) + timedelta(days=5)).replace(day=1)
raw = "\n".join(
[
f"- Milestone old — {yesterday.strftime('%d/%m/%Y')} <timeline>[tl-old]</timeline>",
f"- Milestone next — {tomorrow.strftime('%d/%m/%Y')} <timeline>[tl-next]</timeline>",
f"- Milestone future — {next_month.strftime('%d/%m/%Y')} <timeline>[tl-future]</timeline>",
]
)
out = _normalize_tagged_list_lines(raw, "invece i miei eventi prossimi?")
assert "<timeline>[tl-next]</timeline>" in out
assert "<timeline>[tl-old]</timeline>" not in out
assert "<timeline>[tl-future]</timeline>" not in out