From 13fd8677c1660f0f506a2b623626cbd46ae125a3 Mon Sep 17 00:00:00 2001 From: roberto Date: Fri, 13 Mar 2026 12:16:58 +0100 Subject: [PATCH] fix: normalize home task/timeline responses to tag-only lines --- app/core/deep_agent.py | 92 ++++++++++++++++++++++++++++++++++++++-- tests/test_deep_agent.py | 39 ++++++++++++++++- 2 files changed, 126 insertions(+), 5 deletions(-) diff --git a/app/core/deep_agent.py b/app/core/deep_agent.py index f27f5de..ad34767 100644 --- a/app/core/deep_agent.py +++ b/app/core/deep_agent.py @@ -5,6 +5,7 @@ from __future__ import annotations import json import logging import re +from datetime import date from collections.abc import AsyncGenerator from typing import Any, Literal @@ -29,8 +30,12 @@ _HOME_SINGLE_AGENT_SYSTEM = ( "Always use tools for factual data retrieval before answering. " "When the user asks to remember, forget, or update what you know about them, use memory tools. " "If context.context.resolved_project_id exists, use it as project_id for scoped list calls. " - "Return markdown and embed inline tags when relevant: [ids], [ids], " - "[ids], [ids], {json}." + "Return markdown and use tags when relevant: [ids], [ids], " + "[ids], [ids], {json}. " + "When listing tasks or timelines, each id tag must be on its own line with no prefix/suffix text. " + "Never put titles, priorities, or dates on the same line as or tags. " + "For questions about upcoming timelines (e.g. 'prossimi eventi'), include only future items in the current month unless the user asks a different range. " + "For upcoming tasks, after tag lines add a short recommendation based on due date and priority." ) _FLOATING_SINGLE_AGENT_SYSTEM = ( @@ -136,6 +141,75 @@ def _context_for_model(context: dict[str, Any]) -> dict[str, Any]: return sanitized +_TAG_LINE_RE = re.compile(r"<(task|timeline)>\[[^\]]+\]") +_TIMELINE_DMY_RE = re.compile(r"(?P\d{2})/(?P\d{2})/(?P\d{4})") + + +def _is_upcoming_timeline_query(message: str) -> bool: + lowered = message.lower() + has_upcoming = "prossim" in lowered or "upcoming" in lowered or "next" in lowered + has_timeline_topic = any( + token in lowered + for token in ("event", "evento", "eventi", "timeline", "milestone", "scaden") + ) + return has_upcoming and has_timeline_topic + + +def _timeline_date_in_current_month_or_future(dmy: str) -> bool: + match = _TIMELINE_DMY_RE.search(dmy) + if not match: + return True + try: + parsed = date( + int(match.group("y")), + int(match.group("m")), + int(match.group("d")), + ) + except ValueError: + return True + + today = date.today() + return parsed >= today and parsed.year == today.year and parsed.month == today.month + + +def _normalize_tagged_list_lines(text: str, message: str) -> str: + if not text: + return text + + upcoming_timeline_only = _is_upcoming_timeline_query(message) + output_lines: list[str] = [] + + for line in text.splitlines(): + matches = list(_TAG_LINE_RE.finditer(line)) + if not matches: + output_lines.append(line) + continue + + had_non_tag_text = _TAG_LINE_RE.sub("", line).strip(" -\t0123456789.*:)") + if not had_non_tag_text and len(matches) == 1: + tag_text = matches[0].group(0) + if ( + upcoming_timeline_only + and "" in tag_text + and not _timeline_date_in_current_month_or_future(line) + ): + continue + output_lines.append(tag_text) + continue + + for match in matches: + tag_text = match.group(0) + if ( + upcoming_timeline_only + and "" in tag_text + and not _timeline_date_in_current_month_or_future(line) + ): + continue + output_lines.append(tag_text) + + return "\n".join(output_lines) + + def _normalize_memory_label(path_or_label: str) -> str: value = path_or_label.strip() if value.startswith("/memories/"): @@ -475,12 +549,13 @@ async def _run_single_agent_stream( async def run_home(user_id: str, message: str, context: dict[str, Any]) -> str: prepared_context = await _prepare_context(message, context) - return await _run_single_agent( + response = await _run_single_agent( user_id=user_id, system_prompt=_HOME_SINGLE_AGENT_SYSTEM, message=message, context=prepared_context, ) + return _normalize_tagged_list_lines(response, message) async def run_floating(user_id: str, message: str, context: dict[str, Any]) -> tuple[str, str]: @@ -501,13 +576,22 @@ async def run_home_stream( context: dict[str, Any], ) -> AsyncGenerator[tuple[str, Any], None]: prepared_context = await _prepare_context(message, context) + text_chunks: list[str] = [] async for event in _run_single_agent_stream( user_id=user_id, system_prompt=_HOME_SINGLE_AGENT_SYSTEM, message=message, context=prepared_context, ): - yield event + event_type, data = event + if event_type != "token": + yield event + continue + text_chunks.append(str(data or "")) + + normalized = _normalize_tagged_list_lines("".join(text_chunks), message) + if normalized: + yield "token", normalized async def run_floating_stream( diff --git a/tests/test_deep_agent.py b/tests/test_deep_agent.py index deddfa3..729eedc 100644 --- a/tests/test_deep_agent.py +++ b/tests/test_deep_agent.py @@ -2,13 +2,14 @@ from __future__ import annotations +from datetime import date, timedelta from types import SimpleNamespace from unittest.mock import patch import pytest from langchain_core.messages import AIMessage, ToolMessage -from app.core.deep_agent import run_floating_stream, run_home +from app.core.deep_agent import _normalize_tagged_list_lines, run_floating_stream, run_home class _FakeTool: @@ -79,3 +80,39 @@ async def test_run_floating_stream_emits_domain_then_tokens_with_mocked_tool_res assert events[0] == ("floating_domain", "timelines") assert ("token", "stream-") in events assert ("token", "ok") in events + + +def test_normalize_tagged_list_lines_rewrites_mixed_task_lines_to_tag_only_lines(): + raw = ( + "Certo!\n\n" + "1. **Task A** — priorita high [task-1]\n" + "2. **Task B** — priorita medium [task-2]\n" + ) + + out = _normalize_tagged_list_lines(raw, "quali sono le prossime attivita?") + + assert "[task-1]" in out + assert "[task-2]" in out + assert "Task A" not in out + assert "Task B" not in out + + +def test_normalize_tagged_list_lines_filters_upcoming_timeline_query_to_current_month_future_only(): + today = date.today() + tomorrow = today + timedelta(days=1) + yesterday = today - timedelta(days=1) + next_month = (today.replace(day=28) + timedelta(days=5)).replace(day=1) + + raw = "\n".join( + [ + f"- Milestone old — {yesterday.strftime('%d/%m/%Y')} [tl-old]", + f"- Milestone next — {tomorrow.strftime('%d/%m/%Y')} [tl-next]", + f"- Milestone future — {next_month.strftime('%d/%m/%Y')} [tl-future]", + ] + ) + + out = _normalize_tagged_list_lines(raw, "invece i miei eventi prossimi?") + + assert "[tl-next]" in out + assert "[tl-old]" not in out + assert "[tl-future]" not in out