fix: normalize home task/timeline responses to tag-only lines

This commit is contained in:
2026-03-13 12:16:58 +01:00
parent 9bd629cb59
commit 13fd8677c1
2 changed files with 126 additions and 5 deletions

View File

@@ -5,6 +5,7 @@ from __future__ import annotations
import json import json
import logging import logging
import re import re
from datetime import date
from collections.abc import AsyncGenerator from collections.abc import AsyncGenerator
from typing import Any, Literal from typing import Any, Literal
@@ -29,8 +30,12 @@ _HOME_SINGLE_AGENT_SYSTEM = (
"Always use tools for factual data retrieval before answering. " "Always use tools for factual data retrieval before answering. "
"When the user asks to remember, forget, or update what you know about them, use memory tools. " "When the user asks to remember, forget, or update what you know about them, use memory tools. "
"If context.context.resolved_project_id exists, use it as project_id for scoped list calls. " "If context.context.resolved_project_id exists, use it as project_id for scoped list calls. "
"Return markdown and embed inline tags when relevant: <project>[ids]</project>, <task>[ids]</task>, " "Return markdown and use tags when relevant: <project>[ids]</project>, <task>[ids]</task>, "
"<note>[ids]</note>, <timeline>[ids]</timeline>, <chart>{json}</chart>. " "<note>[ids]</note>, <timeline>[ids]</timeline>, <chart>{json}</chart>. "
"When listing tasks or timelines, each id tag must be on its own line with no prefix/suffix text. "
"Never put titles, priorities, or dates on the same line as <task> or <timeline> tags. "
"For questions about upcoming timelines (e.g. 'prossimi eventi'), include only future items in the current month unless the user asks a different range. "
"For upcoming tasks, after tag lines add a short recommendation based on due date and priority."
) )
_FLOATING_SINGLE_AGENT_SYSTEM = ( _FLOATING_SINGLE_AGENT_SYSTEM = (
@@ -136,6 +141,75 @@ def _context_for_model(context: dict[str, Any]) -> dict[str, Any]:
return sanitized return sanitized
_TAG_LINE_RE = re.compile(r"<(task|timeline)>\[[^\]]+\]</\1>")
_TIMELINE_DMY_RE = re.compile(r"(?P<d>\d{2})/(?P<m>\d{2})/(?P<y>\d{4})")
def _is_upcoming_timeline_query(message: str) -> bool:
lowered = message.lower()
has_upcoming = "prossim" in lowered or "upcoming" in lowered or "next" in lowered
has_timeline_topic = any(
token in lowered
for token in ("event", "evento", "eventi", "timeline", "milestone", "scaden")
)
return has_upcoming and has_timeline_topic
def _timeline_date_in_current_month_or_future(dmy: str) -> bool:
match = _TIMELINE_DMY_RE.search(dmy)
if not match:
return True
try:
parsed = date(
int(match.group("y")),
int(match.group("m")),
int(match.group("d")),
)
except ValueError:
return True
today = date.today()
return parsed >= today and parsed.year == today.year and parsed.month == today.month
def _normalize_tagged_list_lines(text: str, message: str) -> str:
if not text:
return text
upcoming_timeline_only = _is_upcoming_timeline_query(message)
output_lines: list[str] = []
for line in text.splitlines():
matches = list(_TAG_LINE_RE.finditer(line))
if not matches:
output_lines.append(line)
continue
had_non_tag_text = _TAG_LINE_RE.sub("", line).strip(" -\t0123456789.*:)")
if not had_non_tag_text and len(matches) == 1:
tag_text = matches[0].group(0)
if (
upcoming_timeline_only
and "<timeline>" in tag_text
and not _timeline_date_in_current_month_or_future(line)
):
continue
output_lines.append(tag_text)
continue
for match in matches:
tag_text = match.group(0)
if (
upcoming_timeline_only
and "<timeline>" in tag_text
and not _timeline_date_in_current_month_or_future(line)
):
continue
output_lines.append(tag_text)
return "\n".join(output_lines)
def _normalize_memory_label(path_or_label: str) -> str: def _normalize_memory_label(path_or_label: str) -> str:
value = path_or_label.strip() value = path_or_label.strip()
if value.startswith("/memories/"): if value.startswith("/memories/"):
@@ -475,12 +549,13 @@ async def _run_single_agent_stream(
async def run_home(user_id: str, message: str, context: dict[str, Any]) -> str: async def run_home(user_id: str, message: str, context: dict[str, Any]) -> str:
prepared_context = await _prepare_context(message, context) prepared_context = await _prepare_context(message, context)
return await _run_single_agent( response = await _run_single_agent(
user_id=user_id, user_id=user_id,
system_prompt=_HOME_SINGLE_AGENT_SYSTEM, system_prompt=_HOME_SINGLE_AGENT_SYSTEM,
message=message, message=message,
context=prepared_context, context=prepared_context,
) )
return _normalize_tagged_list_lines(response, message)
async def run_floating(user_id: str, message: str, context: dict[str, Any]) -> tuple[str, str]: async def run_floating(user_id: str, message: str, context: dict[str, Any]) -> tuple[str, str]:
@@ -501,13 +576,22 @@ async def run_home_stream(
context: dict[str, Any], context: dict[str, Any],
) -> AsyncGenerator[tuple[str, Any], None]: ) -> AsyncGenerator[tuple[str, Any], None]:
prepared_context = await _prepare_context(message, context) prepared_context = await _prepare_context(message, context)
text_chunks: list[str] = []
async for event in _run_single_agent_stream( async for event in _run_single_agent_stream(
user_id=user_id, user_id=user_id,
system_prompt=_HOME_SINGLE_AGENT_SYSTEM, system_prompt=_HOME_SINGLE_AGENT_SYSTEM,
message=message, message=message,
context=prepared_context, context=prepared_context,
): ):
event_type, data = event
if event_type != "token":
yield event yield event
continue
text_chunks.append(str(data or ""))
normalized = _normalize_tagged_list_lines("".join(text_chunks), message)
if normalized:
yield "token", normalized
async def run_floating_stream( async def run_floating_stream(

View File

@@ -2,13 +2,14 @@
from __future__ import annotations from __future__ import annotations
from datetime import date, timedelta
from types import SimpleNamespace from types import SimpleNamespace
from unittest.mock import patch from unittest.mock import patch
import pytest import pytest
from langchain_core.messages import AIMessage, ToolMessage from langchain_core.messages import AIMessage, ToolMessage
from app.core.deep_agent import run_floating_stream, run_home from app.core.deep_agent import _normalize_tagged_list_lines, run_floating_stream, run_home
class _FakeTool: class _FakeTool:
@@ -79,3 +80,39 @@ async def test_run_floating_stream_emits_domain_then_tokens_with_mocked_tool_res
assert events[0] == ("floating_domain", "timelines") assert events[0] == ("floating_domain", "timelines")
assert ("token", "stream-") in events assert ("token", "stream-") in events
assert ("token", "ok") in events assert ("token", "ok") in events
def test_normalize_tagged_list_lines_rewrites_mixed_task_lines_to_tag_only_lines():
raw = (
"Certo!\n\n"
"1. **Task A** — priorita high <task>[task-1]</task>\n"
"2. **Task B** — priorita medium <task>[task-2]</task>\n"
)
out = _normalize_tagged_list_lines(raw, "quali sono le prossime attivita?")
assert "<task>[task-1]</task>" in out
assert "<task>[task-2]</task>" in out
assert "Task A" not in out
assert "Task B" not in out
def test_normalize_tagged_list_lines_filters_upcoming_timeline_query_to_current_month_future_only():
today = date.today()
tomorrow = today + timedelta(days=1)
yesterday = today - timedelta(days=1)
next_month = (today.replace(day=28) + timedelta(days=5)).replace(day=1)
raw = "\n".join(
[
f"- Milestone old — {yesterday.strftime('%d/%m/%Y')} <timeline>[tl-old]</timeline>",
f"- Milestone next — {tomorrow.strftime('%d/%m/%Y')} <timeline>[tl-next]</timeline>",
f"- Milestone future — {next_month.strftime('%d/%m/%Y')} <timeline>[tl-future]</timeline>",
]
)
out = _normalize_tagged_list_lines(raw, "invece i miei eventi prossimi?")
assert "<timeline>[tl-next]</timeline>" in out
assert "<timeline>[tl-old]</timeline>" not in out
assert "<timeline>[tl-future]</timeline>" not in out