fix: normalize home task/timeline responses to tag-only lines

2026-03-13 12:16:58 +01:00
parent 9bd629cb59
commit 13fd8677c1
2 changed files with 126 additions and 5 deletions
--- a/app/core/deep_agent.py
+++ b/app/core/deep_agent.py
@@ -5,6 +5,7 @@ from __future__ import annotations
 import json
 import logging
 import re
+from datetime import date
 from collections.abc import AsyncGenerator
 from typing import Any, Literal

@@ -29,8 +30,12 @@ _HOME_SINGLE_AGENT_SYSTEM = (
    "Always use tools for factual data retrieval before answering. "
    "When the user asks to remember, forget, or update what you know about them, use memory tools. "
    "If context.context.resolved_project_id exists, use it as project_id for scoped list calls. "
-    "Return markdown and embed inline tags when relevant: <project>[ids]</project>, <task>[ids]</task>, "
-    "<note>[ids]</note>, <timeline>[ids]</timeline>, <chart>{json}</chart>."
+    "Return markdown and use tags when relevant: <project>[ids]</project>, <task>[ids]</task>, "
+    "<note>[ids]</note>, <timeline>[ids]</timeline>, <chart>{json}</chart>. "
+    "When listing tasks or timelines, each id tag must be on its own line with no prefix/suffix text. "
+    "Never put titles, priorities, or dates on the same line as <task> or <timeline> tags. "
+    "For questions about upcoming timelines (e.g. 'prossimi eventi'), include only future items in the current month unless the user asks a different range. "
+    "For upcoming tasks, after tag lines add a short recommendation based on due date and priority."
 )

 _FLOATING_SINGLE_AGENT_SYSTEM = (
@@ -136,6 +141,75 @@ def _context_for_model(context: dict[str, Any]) -> dict[str, Any]:
    return sanitized


+_TAG_LINE_RE = re.compile(r"<(task|timeline)>\[[^\]]+\]</\1>")
+_TIMELINE_DMY_RE = re.compile(r"(?P<d>\d{2})/(?P<m>\d{2})/(?P<y>\d{4})")
+
+
+def _is_upcoming_timeline_query(message: str) -> bool:
+    lowered = message.lower()
+    has_upcoming = "prossim" in lowered or "upcoming" in lowered or "next" in lowered
+    has_timeline_topic = any(
+        token in lowered
+        for token in ("event", "evento", "eventi", "timeline", "milestone", "scaden")
+    )
+    return has_upcoming and has_timeline_topic
+
+
+def _timeline_date_in_current_month_or_future(dmy: str) -> bool:
+    match = _TIMELINE_DMY_RE.search(dmy)
+    if not match:
+        return True
+    try:
+        parsed = date(
+            int(match.group("y")),
+            int(match.group("m")),
+            int(match.group("d")),
+        )
+    except ValueError:
+        return True
+
+    today = date.today()
+    return parsed >= today and parsed.year == today.year and parsed.month == today.month
+
+
+def _normalize_tagged_list_lines(text: str, message: str) -> str:
+    if not text:
+        return text
+
+    upcoming_timeline_only = _is_upcoming_timeline_query(message)
+    output_lines: list[str] = []
+
+    for line in text.splitlines():
+        matches = list(_TAG_LINE_RE.finditer(line))
+        if not matches:
+            output_lines.append(line)
+            continue
+
+        had_non_tag_text = _TAG_LINE_RE.sub("", line).strip(" -\t0123456789.*:)")
+        if not had_non_tag_text and len(matches) == 1:
+            tag_text = matches[0].group(0)
+            if (
+                upcoming_timeline_only
+                and "<timeline>" in tag_text
+                and not _timeline_date_in_current_month_or_future(line)
+            ):
+                continue
+            output_lines.append(tag_text)
+            continue
+
+        for match in matches:
+            tag_text = match.group(0)
+            if (
+                upcoming_timeline_only
+                and "<timeline>" in tag_text
+                and not _timeline_date_in_current_month_or_future(line)
+            ):
+                continue
+            output_lines.append(tag_text)
+
+    return "\n".join(output_lines)
+
+
 def _normalize_memory_label(path_or_label: str) -> str:
    value = path_or_label.strip()
    if value.startswith("/memories/"):
@@ -475,12 +549,13 @@ async def _run_single_agent_stream(

 async def run_home(user_id: str, message: str, context: dict[str, Any]) -> str:
    prepared_context = await _prepare_context(message, context)
-    return await _run_single_agent(
+    response = await _run_single_agent(
        user_id=user_id,
        system_prompt=_HOME_SINGLE_AGENT_SYSTEM,
        message=message,
        context=prepared_context,
    )
+    return _normalize_tagged_list_lines(response, message)


 async def run_floating(user_id: str, message: str, context: dict[str, Any]) -> tuple[str, str]:
@@ -501,13 +576,22 @@ async def run_home_stream(
    context: dict[str, Any],
 ) -> AsyncGenerator[tuple[str, Any], None]:
    prepared_context = await _prepare_context(message, context)
+    text_chunks: list[str] = []
    async for event in _run_single_agent_stream(
        user_id=user_id,
        system_prompt=_HOME_SINGLE_AGENT_SYSTEM,
        message=message,
        context=prepared_context,
    ):
-        yield event
+        event_type, data = event
+        if event_type != "token":
+            yield event
+            continue
+        text_chunks.append(str(data or ""))
+
+    normalized = _normalize_tagged_list_lines("".join(text_chunks), message)
+    if normalized:
+        yield "token", normalized


 async def run_floating_stream(
--- a/tests/test_deep_agent.py
+++ b/tests/test_deep_agent.py
@@ -2,13 +2,14 @@

 from __future__ import annotations

+from datetime import date, timedelta
 from types import SimpleNamespace
 from unittest.mock import patch

 import pytest
 from langchain_core.messages import AIMessage, ToolMessage

-from app.core.deep_agent import run_floating_stream, run_home
+from app.core.deep_agent import _normalize_tagged_list_lines, run_floating_stream, run_home


 class _FakeTool:
@@ -79,3 +80,39 @@ async def test_run_floating_stream_emits_domain_then_tokens_with_mocked_tool_res
    assert events[0] == ("floating_domain", "timelines")
    assert ("token", "stream-") in events
    assert ("token", "ok") in events
+
+
+def test_normalize_tagged_list_lines_rewrites_mixed_task_lines_to_tag_only_lines():
+    raw = (
+        "Certo!\n\n"
+        "1. **Task A** — priorita high <task>[task-1]</task>\n"
+        "2. **Task B** — priorita medium <task>[task-2]</task>\n"
+    )
+
+    out = _normalize_tagged_list_lines(raw, "quali sono le prossime attivita?")
+
+    assert "<task>[task-1]</task>" in out
+    assert "<task>[task-2]</task>" in out
+    assert "Task A" not in out
+    assert "Task B" not in out
+
+
+def test_normalize_tagged_list_lines_filters_upcoming_timeline_query_to_current_month_future_only():
+    today = date.today()
+    tomorrow = today + timedelta(days=1)
+    yesterday = today - timedelta(days=1)
+    next_month = (today.replace(day=28) + timedelta(days=5)).replace(day=1)
+
+    raw = "\n".join(
+        [
+            f"- Milestone old — {yesterday.strftime('%d/%m/%Y')} <timeline>[tl-old]</timeline>",
+            f"- Milestone next — {tomorrow.strftime('%d/%m/%Y')} <timeline>[tl-next]</timeline>",
+            f"- Milestone future — {next_month.strftime('%d/%m/%Y')} <timeline>[tl-future]</timeline>",
+        ]
+    )
+
+    out = _normalize_tagged_list_lines(raw, "invece i miei eventi prossimi?")
+
+    assert "<timeline>[tl-next]</timeline>" in out
+    assert "<timeline>[tl-old]</timeline>" not in out
+    assert "<timeline>[tl-future]</timeline>" not in out