From 6787e690bacfef3b163e176e576c54d0e9da5cea Mon Sep 17 00:00:00 2001 From: Roberto Date: Mon, 27 Apr 2026 09:15:08 +0200 Subject: [PATCH] fix tools calls --- app/agents/task_agent.py | 148 +++++++++++++++++++--- app/agents/timeline_agent.py | 179 +++++++++++++++++++++++---- app/core/deep_agent.py | 231 +++++++++++++++++++++++------------ tests/test_deep_agent.py | 221 ++++++++++++++++++++++++++++++++- 4 files changed, 659 insertions(+), 120 deletions(-) diff --git a/app/agents/task_agent.py b/app/agents/task_agent.py index 8ce4dbe..9dd85dd 100644 --- a/app/agents/task_agent.py +++ b/app/agents/task_agent.py @@ -26,32 +26,137 @@ def _is_uuid(value: str) -> bool: async def list_tasks( project_id: str = "", status: str = "", + priority: str = "", + assignee: str = "", search: str = "", order_by: str = "", + order_dir: str = "", + due_date_from: int = -1, + due_date_to: int = -1, + created_at_from: int = -1, + created_at_to: int = -1, + completed_at_from: int = -1, + completed_at_to: int = -1, + is_ai_suggested: int = -1, + limit: int = 50, + offset: int = 0, ) -> str: - """List tasks, optionally filtered by project_id, status (todo|in_progress|done), - a search string, or an order_by field name (dueDate|priority|createdAt).""" + """List tasks with optional filters. Returns up to `limit` results (default 50). + + project_id: UUID of the project to scope results to. + status: filter by status — todo | in_progress | done. + priority: filter by priority — high | medium | low. + assignee: substring to match against assignee names. + search: substring search across title and description. + order_by: sort field — dueDate | priority | createdAt | completedAt. + order_dir: asc (default) | desc. + due_date_from / due_date_to: ms epoch range for dueDate. Use -1 to omit. + created_at_from / created_at_to: ms epoch range for createdAt. Use -1 to omit. + completed_at_from / completed_at_to: ms epoch range for completedAt. Use -1 to omit. + is_ai_suggested: 0 or 1 to filter by AI-suggested flag; -1 = any. + limit: max rows to return (default 50). Use with offset to paginate. + offset: skip first N rows (default 0). + + Tip — combine *_from and *_to for a closed range; pass only one for open-ended. + Tip — prefer count_tasks for "how many" questions to avoid listing rows. + Tip — for natural-language windows ("today", "tomorrow", "this week", "last month", etc.) + take due_date_from / due_date_to verbatim from the DATE CONTEXT block in the system prompt; + do not compute boundaries from the current UTC instant. + """ normalized_project_id = project_id if (project_id and _is_uuid(project_id)) else "" - result = await execute_on_client( - action="select", - table="tasks", - filters={ - "projectId": normalized_project_id or None, - "status": status or None, - "search": search or None, - "orderBy": order_by or None, - }, - ) + filters: dict[str, Any] = { + "projectId": normalized_project_id or None, + "status": status or None, + "priority": priority or None, + "search": search or None, + "orderBy": order_by or None, + "orderDir": order_dir or None, + "limit": limit, + "offset": offset, + } + if assignee: + filters["assignee"] = assignee + if due_date_from != -1: + filters["dueDateFrom"] = due_date_from + if due_date_to != -1: + filters["dueDateTo"] = due_date_to + if created_at_from != -1: + filters["createdAtFrom"] = created_at_from + if created_at_to != -1: + filters["createdAtTo"] = created_at_to + if completed_at_from != -1: + filters["completedAtFrom"] = completed_at_from + if completed_at_to != -1: + filters["completedAtTo"] = completed_at_to + if is_ai_suggested != -1: + filters["isAiSuggested"] = is_ai_suggested + + result = await execute_on_client(action="select", table="tasks", filters=filters) rows = result.get("rows", []) if not rows: return "No tasks found matching the given filters." lines = [ - f"- {r['title']} (status: {r['status']}, priority: {r['priority']}, id: {r['id']})" + f"- {r['title']} (status: {r['status']}, priority: {r['priority']}, " + f"dueDate: {r.get('dueDate')}, completedAt: {r.get('completedAt')}, id: {r['id']})" for r in rows ] return f"Found {len(rows)} task(s):\n" + "\n".join(lines) +@tool +async def count_tasks( + project_id: str = "", + status: str = "", + priority: str = "", + assignee: str = "", + search: str = "", + due_date_from: int = -1, + due_date_to: int = -1, + created_at_from: int = -1, + created_at_to: int = -1, + completed_at_from: int = -1, + completed_at_to: int = -1, + is_ai_suggested: int = -1, +) -> str: + """Count tasks matching the given filters without returning rows. + + Use this instead of list_tasks for "how many" questions — it is much cheaper. + Same filter parameters as list_tasks (no limit/offset/order_by needed). + + due_date_from / due_date_to: ms epoch range for dueDate. Use -1 to omit. + created_at_from / created_at_to: ms epoch range for createdAt. Use -1 to omit. + completed_at_from / completed_at_to: ms epoch range for completedAt. Use -1 to omit. + Tip — for natural-language windows take due_date_from / due_date_to from the DATE CONTEXT block; + do not compute boundaries from the current UTC instant. + """ + normalized_project_id = project_id if (project_id and _is_uuid(project_id)) else "" + filters: dict[str, Any] = { + "projectId": normalized_project_id or None, + "status": status or None, + "priority": priority or None, + "search": search or None, + } + if assignee: + filters["assignee"] = assignee + if due_date_from != -1: + filters["dueDateFrom"] = due_date_from + if due_date_to != -1: + filters["dueDateTo"] = due_date_to + if created_at_from != -1: + filters["createdAtFrom"] = created_at_from + if created_at_to != -1: + filters["createdAtTo"] = created_at_to + if completed_at_from != -1: + filters["completedAtFrom"] = completed_at_from + if completed_at_to != -1: + filters["completedAtTo"] = completed_at_to + if is_ai_suggested != -1: + filters["isAiSuggested"] = is_ai_suggested + + result = await execute_on_client(action="count", table="tasks", filters=filters) + return f"Task count: {result.get('count', 0)}" + + @tool async def create_task( title: str, @@ -72,6 +177,8 @@ async def create_task( due_date: Unix timestamp in milliseconds; 0 means no due date project_id: optional UUID of the parent project is_ai_suggested: 1 if proactively suggested, 0 if user-requested + + completedAt is set automatically when status is 'done'. """ result = await execute_on_client( action="insert", @@ -108,6 +215,10 @@ async def update_task( """Update fields on an existing task. Only pass fields you want to change. task_id: the task's UUID (required) due_date: -1 means unchanged; 0 clears the due date; any positive value sets it + + completedAt is managed automatically: + - setting status to 'done' records the current timestamp + - changing status away from 'done' clears completedAt """ updates: dict[str, Any] = {} if title: @@ -141,11 +252,12 @@ async def delete_task(task_id: str) -> str: @tool -async def list_tasks_due_today(user_timezone: str = "UTC") -> str: +async def list_tasks_due_today(user_timezone: str = "UTC", include_done: bool = False) -> str: """List all tasks whose due date falls on today's date. user_timezone: IANA timezone name (e.g. 'Europe/Rome', 'America/New_York'). Always pass the user's timezone so 'today' is computed in their local time. + include_done: set True to also include already-completed tasks due today (default False). """ try: from zoneinfo import ZoneInfo @@ -156,10 +268,13 @@ async def list_tasks_due_today(user_timezone: str = "UTC") -> str: start_dt = datetime(now_local.year, now_local.month, now_local.day, tzinfo=tz) start_ms = int(start_dt.timestamp() * 1000) end_ms = start_ms + 86_400_000 - 1 + filters: dict[str, Any] = {"dueDateFrom": start_ms, "dueDateTo": end_ms} + if not include_done: + filters["status"] = "todo" result = await execute_on_client( action="select", table="tasks", - filters={"dueDateFrom": start_ms, "dueDateTo": end_ms}, + filters=filters, ) rows = result.get("rows", []) if not rows: @@ -203,7 +318,6 @@ async def add_task_comment(task_id: str, author: str, content: str) -> str: ) row = result.get("row", {}) row_author = row.get("author", author) - # Electron payloads can vary (taskId vs task_id). Fall back to input task_id. row_task_id = row.get("taskId") or row.get("task_id") or task_id row_comment_id = row.get("id", "unknown") return f"Comment added by {row_author} on task {row_task_id} (comment id: {row_comment_id})." @@ -221,6 +335,7 @@ async def delete_task_comment(comment_id: str) -> str: TASK_TOOLS: list[Any] = [ list_tasks, + count_tasks, create_task, update_task, delete_task, @@ -232,6 +347,7 @@ TASK_TOOLS: list[Any] = [ TASK_READ_TOOLS: list[Any] = [ list_tasks, + count_tasks, list_tasks_due_today, list_task_comments, ] diff --git a/app/agents/timeline_agent.py b/app/agents/timeline_agent.py index 2939972..0f777a1 100644 --- a/app/agents/timeline_agent.py +++ b/app/agents/timeline_agent.py @@ -20,19 +20,127 @@ def _is_uuid(value: str) -> bool: @tool -async def list_timelines(project_id: str = "") -> str: - """List timelines. Provide project_id to scope to a specific project.""" +async def list_timelines( + project_id: str = "", + type: str = "", + is_completed: int = -1, + is_ai_suggested: int = -1, + order_by: str = "", + order_dir: str = "", + date_from: int = -1, + date_to: int = -1, + created_at_from: int = -1, + created_at_to: int = -1, + completed_at_from: int = -1, + completed_at_to: int = -1, + limit: int = 50, + offset: int = 0, +) -> str: + """List timeline events (milestones, checkpoints, activities) with optional filters. + + project_id: UUID to scope results to a specific project. + type: filter by event type — milestone | checkpoint | activity. + is_completed: 0 = incomplete only, 1 = completed only, -1 = any (default). + is_ai_suggested: 0 or 1 to filter by AI-suggested flag; -1 = any. + order_by: sort field — date (default) | createdAt | completedAt. + order_dir: asc (default) | desc. + date_from / date_to: ms epoch range for the event date. Use -1 to omit. + created_at_from / created_at_to: ms epoch range for createdAt. Use -1 to omit. + completed_at_from / completed_at_to: ms epoch range for completedAt. Use -1 to omit. + limit: max rows to return (default 50). Use with offset to paginate. + offset: skip first N rows (default 0). + + Tip — combine *_from and *_to for a closed range; pass only one for open-ended. + Tip — prefer count_timelines for "how many" questions to avoid listing rows. + Tip — for natural-language windows ("today", "this week", "last month", etc.) + take date_from / date_to verbatim from the DATE CONTEXT block in the system prompt; + do not compute boundaries from the current UTC instant. + """ normalized_project_id = project_id if (project_id and _is_uuid(project_id)) else "" - result = await execute_on_client( - action="select", - table="timelines", - filters={"projectId": normalized_project_id or None}, - ) + filters: dict[str, Any] = { + "projectId": normalized_project_id or None, + "orderBy": order_by or None, + "orderDir": order_dir or None, + "limit": limit, + "offset": offset, + } + if type: + filters["type"] = type + if is_completed != -1: + filters["isCompleted"] = is_completed + if is_ai_suggested != -1: + filters["isAiSuggested"] = is_ai_suggested + if date_from != -1: + filters["dateFrom"] = date_from + if date_to != -1: + filters["dateTo"] = date_to + if created_at_from != -1: + filters["createdAtFrom"] = created_at_from + if created_at_to != -1: + filters["createdAtTo"] = created_at_to + if completed_at_from != -1: + filters["completedAtFrom"] = completed_at_from + if completed_at_to != -1: + filters["completedAtTo"] = completed_at_to + + result = await execute_on_client(action="select", table="timelines", filters=filters) rows = result.get("rows", []) if not rows: - return "No timelines found." - lines = [f"- {r['title']} (date: {r['date']}, id: {r['id']})" for r in rows] - return f"Found {len(rows)} timeline(s):\n" + "\n".join(lines) + return "No timeline events found." + lines = [ + f"- {r['title']} (date: {r['date']}, type: {r.get('type')}, " + f"completed: {bool(r.get('isCompleted'))}, completedAt: {r.get('completedAt')}, id: {r['id']})" + for r in rows + ] + return f"Found {len(rows)} timeline event(s):\n" + "\n".join(lines) + + +@tool +async def count_timelines( + project_id: str = "", + type: str = "", + is_completed: int = -1, + is_ai_suggested: int = -1, + date_from: int = -1, + date_to: int = -1, + created_at_from: int = -1, + created_at_to: int = -1, + completed_at_from: int = -1, + completed_at_to: int = -1, +) -> str: + """Count timeline events matching the given filters without returning rows. + + Use this instead of list_timelines for "how many" questions — it is much cheaper. + Same filter parameters as list_timelines (no limit/offset/order_by needed). + + date_from / date_to: ms epoch range for the event date. Use -1 to omit. + completed_at_from / completed_at_to: ms epoch range for completedAt. Use -1 to omit. + Tip — for natural-language windows take date_from / date_to from the DATE CONTEXT block; + do not compute boundaries from the current UTC instant. + """ + normalized_project_id = project_id if (project_id and _is_uuid(project_id)) else "" + filters: dict[str, Any] = {"projectId": normalized_project_id or None} + if type: + filters["type"] = type + if is_completed != -1: + filters["isCompleted"] = is_completed + if is_ai_suggested != -1: + filters["isAiSuggested"] = is_ai_suggested + if date_from != -1: + filters["dateFrom"] = date_from + if date_to != -1: + filters["dateTo"] = date_to + if created_at_from != -1: + filters["createdAtFrom"] = created_at_from + if created_at_to != -1: + filters["createdAtTo"] = created_at_to + if completed_at_from != -1: + filters["completedAtFrom"] = completed_at_from + if completed_at_to != -1: + filters["completedAtTo"] = completed_at_to + + result = await execute_on_client(action="count", table="timelines", filters=filters) + return f"Timeline event count: {result.get('count', 0)}" @tool @@ -40,13 +148,19 @@ async def create_timeline( project_id: str, title: str, date: int, + type: str = "milestone", + is_completed: int = 0, is_ai_suggested: int = 0, ) -> str: - """Create a project timeline (milestone). + """Create a project timeline event. project_id: REQUIRED UUID of the parent project - title: descriptive name for the milestone - date: Unix timestamp in milliseconds + title: descriptive name for the event + date: Unix timestamp in milliseconds for the event date + type: milestone (default) | checkpoint | activity + is_completed: 1 if already completed, 0 if not (default 0) is_ai_suggested: 1 if proactively suggested, 0 if user-requested + + completedAt is set automatically when is_completed is 1. """ result = await execute_on_client( action="insert", @@ -55,11 +169,13 @@ async def create_timeline( "projectId": project_id, "title": title, "date": date, + "type": type, + "isCompleted": is_completed, "isAiSuggested": is_ai_suggested, }, ) row = result["row"] - return f"Timeline created: '{row['title']}' (id: {row['id']}, date: {row['date']})" + return f"Timeline event created: '{row['title']}' (id: {row['id']}, date: {row['date']}, type: {row.get('type')})" @tool @@ -67,38 +183,47 @@ async def update_timeline( timeline_id: str, title: str = "", date: int = -1, + is_completed: int = -1, ) -> str: - """Update a timeline. Only pass fields that should change. - timeline_id: UUID of the timeline (required) + """Update a timeline event. Only pass fields that should change. + timeline_id: UUID of the event (required) date: -1 means unchanged; any other value sets the new date (ms timestamp) + is_completed: 0 = mark incomplete, 1 = mark complete, -1 = unchanged + + completedAt is managed automatically: + - setting is_completed to 1 records the current timestamp + - setting is_completed to 0 clears completedAt """ updates: dict[str, Any] = {} if title: updates["title"] = title if date != -1: updates["date"] = date + if is_completed != -1: + updates["isCompleted"] = is_completed result = await execute_on_client( action="update", table="timelines", data={"id": timeline_id, "updates": updates}, ) row = result["row"] - return f"Timeline updated: '{row['title']}' (id: {row['id']})" + return f"Timeline event updated: '{row['title']}' (id: {row['id']})" @tool async def delete_timeline(timeline_id: str) -> str: - """Delete a timeline permanently by its UUID.""" + """Delete a timeline event permanently by its UUID.""" await execute_on_client(action="delete", table="timelines", data={"id": timeline_id}) - return f"Timeline {timeline_id} deleted." + return f"Timeline event {timeline_id} deleted." @tool -async def list_timelines_today(user_timezone: str = "UTC") -> str: - """List all timeline events (milestones) whose date falls on today. +async def list_timelines_today(user_timezone: str = "UTC", include_completed: bool = True) -> str: + """List all timeline events whose date falls on today. user_timezone: IANA timezone name (e.g. 'Europe/Rome', 'America/New_York'). Always pass the user's timezone so 'today' is computed in their local time. + include_completed: set False to exclude already-completed events (default True). """ try: from zoneinfo import ZoneInfo @@ -109,20 +234,27 @@ async def list_timelines_today(user_timezone: str = "UTC") -> str: start_dt = datetime(now_local.year, now_local.month, now_local.day, tzinfo=tz) start_ms = int(start_dt.timestamp() * 1000) end_ms = start_ms + 86_400_000 - 1 + filters: dict[str, Any] = {"dateFrom": start_ms, "dateTo": end_ms} + if not include_completed: + filters["isCompleted"] = 0 result = await execute_on_client( action="select", table="timelines", - filters={"dateFrom": start_ms, "dateTo": end_ms}, + filters=filters, ) rows = result.get("rows", []) if not rows: return "No timeline events today." - lines = [f"- {r['title']} (date: {r['date']}, id: {r['id']})" for r in rows] + lines = [ + f"- {r['title']} (date: {r['date']}, type: {r.get('type')}, completed: {bool(r.get('isCompleted'))}, id: {r['id']})" + for r in rows + ] return f"Timeline events today ({len(rows)}):\n" + "\n".join(lines) TIMELINE_TOOLS: list[Any] = [ list_timelines, + count_timelines, list_timelines_today, create_timeline, update_timeline, @@ -131,5 +263,6 @@ TIMELINE_TOOLS: list[Any] = [ TIMELINE_READ_TOOLS: list[Any] = [ list_timelines, + count_timelines, list_timelines_today, ] diff --git a/app/core/deep_agent.py b/app/core/deep_agent.py index a885ea1..252cb72 100644 --- a/app/core/deep_agent.py +++ b/app/core/deep_agent.py @@ -16,7 +16,7 @@ from app.agents.note_agent import NOTE_TOOLS from app.agents.project_agent import PROJECT_TOOLS from app.agents.task_agent import TASK_TOOLS from app.agents.timeline_agent import TIMELINE_TOOLS -from app.core.langfuse_client import extract_usage, get_langfuse, get_prompt_or_fallback, langfuse_context +from app.core.langfuse_client import compile_prompt, extract_usage, get_langfuse, get_prompt_or_fallback, langfuse_context from app.core.llm import get_agent_llm, model_for_agent from app.core.memory_middleware import MemoryMiddleware from app.core.ws_context import clear_tool_result_collector, execute_on_client, set_tool_result_collector @@ -56,36 +56,89 @@ def _language_instruction(context: dict[str, Any]) -> str: ) def _datetime_context_injection(context: dict[str, Any]) -> str: - """Build a system-prompt paragraph with current timestamp, user timezone, and format prefs.""" + """Build a comprehensive DATE CONTEXT block with pre-computed ms-epoch boundaries for common ranges.""" fp = context.get("format_prefs") if not fp or not isinstance(fp, dict): return "" try: from zoneinfo import ZoneInfo - from datetime import datetime as _dt, timezone as _utc + from datetime import datetime as _dt, timezone as _utc, timedelta as _td + tz_name: str = str(fp.get("timezone") or "UTC") now_iso: str = str(fp.get("now_iso") or "") date_fmt: str = str(fp.get("date_format") or "dd/MM/yyyy") time_fmt: str = str(fp.get("time_format") or "24h") + tz = ZoneInfo(tz_name) if now_iso: now_utc = _dt.fromisoformat(now_iso.replace("Z", "+00:00")) else: now_utc = _dt.now(_utc.utc) - tz = ZoneInfo(tz_name) + now_ms = int(now_utc.timestamp() * 1000) now_local = now_utc.astimezone(tz) - today_local = now_local.strftime("%Y-%m-%d") - weekday_local = now_local.strftime("%A") + now_local_str = now_local.strftime("%Y-%m-%d %H:%M") + weekday_str = now_local.strftime("%A") + y, m, d = now_local.year, now_local.month, now_local.day + + def _day(year: int, month: int, day: int) -> tuple[int, int]: + s = _dt(year, month, day, tzinfo=tz) + e = s + _td(days=1) + return int(s.timestamp() * 1000), int(e.timestamp() * 1000) - 1 + + def _between(start: "_dt", end_excl: "_dt") -> tuple[int, int]: + return int(start.timestamp() * 1000), int(end_excl.timestamp() * 1000) - 1 + + today_s, today_e = _day(y, m, d) + yd = now_local - _td(days=1) + yesterday_s, yesterday_e = _day(yd.year, yd.month, yd.day) + tm = now_local + _td(days=1) + tomorrow_s, tomorrow_e = _day(tm.year, tm.month, tm.day) + + # ISO week (Mon–Sun) + monday = _dt(y, m, d, tzinfo=tz) - _td(days=now_local.weekday()) + last_monday = monday - _td(weeks=1) + next_monday = monday + _td(weeks=1) + this_week_s, this_week_e = _between(monday, next_monday) + last_week_s, last_week_e = _between(last_monday, monday) + next_week_s, next_week_e = _between(next_monday, next_monday + _td(weeks=1)) + + # Calendar months + this_m_start = _dt(y, m, 1, tzinfo=tz) + next_m_start = _dt(y + (m // 12), m % 12 + 1, 1, tzinfo=tz) + last_m_start = _dt(y - (1 if m == 1 else 0), 12 if m == 1 else m - 1, 1, tzinfo=tz) + next2_m = next_m_start.month % 12 + 1 + next2_y = next_m_start.year + (1 if next_m_start.month == 12 else 0) + next2_m_start = _dt(next2_y, next2_m, 1, tzinfo=tz) + this_month_s, this_month_e = _between(this_m_start, next_m_start) + last_month_s, last_month_e = _between(last_m_start, this_m_start) + next_month_s, next_month_e = _between(next_m_start, next2_m_start) + + # Calendar years + this_yr_s, this_yr_e = _between(_dt(y, 1, 1, tzinfo=tz), _dt(y + 1, 1, 1, tzinfo=tz)) + last_yr_s, last_yr_e = _between(_dt(y - 1, 1, 1, tzinfo=tz), _dt(y, 1, 1, tzinfo=tz)) + + sunday = monday + _td(days=6) + last_sunday = last_monday + _td(days=6) + next_sunday = next_monday + _td(days=6) return ( - f"\n\nCurrent instant: {now_utc.isoformat()}. " - f"User local date: {today_local} ({weekday_local}). " - f"Timezone: {tz_name}. " - f"Display preference: dateFormat={date_fmt}, timeFormat={time_fmt}. " - f"When calling tools with date fields, always pass integer Unix milliseconds (ms since epoch, UTC). " - f"When calling list_tasks_due_today or list_timelines_today, always pass user_timezone=\"{tz_name}\". " - f"When presenting dates to the user in chat, format using the display preference above." + f"\n\nDATE CONTEXT (timezone: {tz_name}, dateFormat: {date_fmt}, timeFormat: {time_fmt})\n" + f"now_local: {now_local_str} ({weekday_str})\n" + f"now_ms: {now_ms}\n\n" + f"today [{today_s}, {today_e}] {y:04d}-{m:02d}-{d:02d}\n" + f"tomorrow [{tomorrow_s}, {tomorrow_e}] {tm.strftime('%Y-%m-%d')}\n" + f"yesterday [{yesterday_s}, {yesterday_e}] {yd.strftime('%Y-%m-%d')}\n" + f"this_week [{this_week_s}, {this_week_e}] {monday.strftime('%Y-%m-%d')} → {sunday.strftime('%Y-%m-%d')} (Mon–Sun)\n" + f"last_week [{last_week_s}, {last_week_e}] {last_monday.strftime('%Y-%m-%d')} → {last_sunday.strftime('%Y-%m-%d')}\n" + f"next_week [{next_week_s}, {next_week_e}] {next_monday.strftime('%Y-%m-%d')} → {next_sunday.strftime('%Y-%m-%d')}\n" + f"this_month [{this_month_s}, {this_month_e}] {y:04d}-{m:02d}\n" + f"last_month [{last_month_s}, {last_month_e}] {last_m_start.strftime('%Y-%m')}\n" + f"next_month [{next_month_s}, {next_month_e}] {next_m_start.strftime('%Y-%m')}\n" + f"this_year [{this_yr_s}, {this_yr_e}] {y:04d}\n" + f"last_year [{last_yr_s}, {last_yr_e}] {y - 1:04d}\n\n" + f"When calling list_tasks_due_today or list_timelines_today, always pass user_timezone=\"{tz_name}\".\n" + f"When presenting dates, format using dateFormat={date_fmt} and timeFormat={time_fmt}." ) except Exception: return "" @@ -123,27 +176,75 @@ def _relational_memory_injection(context: dict[str, Any]) -> str: return section -_HOME_SYSTEM_PROMPT = ( - "You are the home assistant with direct access to all tools: tasks, projects, notes, timelines, and memory tools. " - "Always use tools for factual data retrieval before answering. " - "When the user asks to remember, forget, or update what you know about them, use memory tools. " - "If context.context.resolved_project_id exists, use it as project_id for scoped list calls. " - "Return markdown and use tags when relevant: [ids], [ids], " - "[ids], [ids], {json}. " - "When listing tasks or timelines, each id tag must be on its own line with no prefix/suffix text. " - "Never put titles, priorities, or dates on the same line as or tags. " - "For questions about upcoming timelines (e.g. 'prossimi eventi'), include only future items in the current month unless the user asks a different range. " - "For upcoming tasks, after tag lines add a short recommendation based on due date and priority." -) +def _request_context_block(context: dict[str, Any]) -> str: + """Return a small block with per-request scope and resolved project context.""" + parts: list[str] = [] + scope = context.get("scope") + if scope and isinstance(scope, dict): + parts.append(f"scope: {json.dumps(scope, ensure_ascii=True)}") + resolved = context.get("resolved_project_id") + if resolved and isinstance(resolved, str): + parts.append(f"resolved_project_id: {resolved}") + return "\n".join(parts) -_FLOATING_SYSTEM_PROMPT = ( - "You are the floating assistant with direct access to all tools: tasks, projects, notes, timelines, and memory tools. " - "Stay focused on the floating scope in context.scope and answer concisely. " - "Return plain text only. Do not output XML/HTML-like tags such as , , , , or any bracketed id tag wrappers. " - "Always use tools for factual data retrieval before answering. " - "When the user asks to remember, forget, or update what you know about them, use memory tools. " - "If context.context.resolved_project_id exists, use it as project_id for scoped list calls. " -) + +_HOME_SYSTEM_PROMPT = """\ +You are the home assistant for adiuvAI with direct access to all tools: tasks, projects, notes, timelines, and memory tools. +Always use tools for factual data retrieval before answering. +When the user asks to remember, forget, or update what you know about them, use memory tools. + +# Output format +Return markdown and use tags when relevant: [ids], [ids], [ids], [ids], {{json}}. +When listing tasks or timelines, each id tag must be on its own line with no prefix/suffix text. +Never put titles, priorities, or dates on the same line as or tags. +For questions about upcoming timelines (e.g. 'prossimi eventi'), include only future items in the current month unless the user asks a different range. +For upcoming tasks, after tag lines add a short recommendation based on due date and priority. + +# Date filtering +{date_context} + +When filtering tasks/timelines/notes by date, take dueDateFrom / dueDateTo (ms epoch UTC) verbatim from the DATE CONTEXT boundary table above. Do NOT compute boundaries from now_ms yourself. +For specific dates not listed, compute local-midnight in the user timezone and convert to UTC ms. +For "today" / "tomorrow" queries, prefer list_tasks_due_today / list_timelines_today with user_timezone from DATE CONTEXT. + +# Language +{language_instruction} + +# Known people & projects +{relational_memory} + +# Behavioral hints +{proactive_hints} + +# Request context +{request_context}\ +""" + +_FLOATING_SYSTEM_PROMPT = """\ +You are the floating assistant for adiuvAI with direct access to all tools: tasks, projects, notes, timelines, and memory tools. +Stay focused on the floating scope and answer concisely. +Return plain text only. Do not output XML/HTML-like tags such as , , , , or any bracketed id tag wrappers. +Always use tools for factual data retrieval before answering. +When the user asks to remember, forget, or update what you know about them, use memory tools. + +# Date filtering +{date_context} + +When filtering by date, take dueDateFrom / dueDateTo (ms epoch UTC) verbatim from the DATE CONTEXT boundary table above. Do NOT compute boundaries from now_ms yourself. +For specific dates not listed, compute local-midnight in the user timezone and convert to UTC ms. + +# Language +{language_instruction} + +# Known people & projects +{relational_memory} + +# Behavioral hints +{proactive_hints} + +# Request context +{request_context}\ +""" _FLOATING_DOMAIN_CLASSIFIER_PROMPT = ( "You are a strict domain classifier for websocket floating requests. " @@ -253,10 +354,18 @@ def _session_id_from_context(context: dict[str, Any]) -> str | None: return None -def _context_for_model(context: dict[str, Any]) -> dict[str, Any]: - sanitized = dict(context) - sanitized.pop("_debug", None) - return sanitized +def _build_system_prompt(name: str, fallback: str, context: dict[str, Any]) -> tuple[str, Any]: + """Fetch Langfuse template and compile all per-request slots into one system prompt.""" + template, prompt_obj = get_prompt_or_fallback(name, fallback) + text = compile_prompt( + template, prompt_obj, + date_context=_datetime_context_injection(context).strip(), + language_instruction=_language_instruction(context).strip(), + relational_memory=_relational_memory_injection(context).strip(), + proactive_hints=_proactive_hints_injection(context).strip(), + request_context=_request_context_block(context), + ) + return text, prompt_obj _TAG_LINE_RE = re.compile(r"<(task|timeline)>\[[^\]]+\]") @@ -713,17 +822,11 @@ async def _run_single_agent( lf = get_langfuse() llm = get_agent_llm(agent_name) tools = _all_tools_for_user(user_id, trace_id) - model_context = _context_for_model(context) logger.info("deep_agent: run_single_agent_start trace=%s user=%s", trace_id or "-", user_id) llm_with_tools = llm.bind_tools(tools) messages: list[Any] = [ SystemMessage(content=system_prompt), - HumanMessage( - content=( - f"User message:\n{message}\n\n" - f"Context:\n{json.dumps({'context': model_context}, ensure_ascii=True)[:3500]}" - ) - ), + HumanMessage(content=message), ] tool_calls_count = 0 @@ -843,17 +946,11 @@ async def _run_single_agent_stream( llm = get_agent_llm(agent_name) if tools is None: tools = _all_tools_for_user(user_id, trace_id) - model_context = _context_for_model(context) logger.info("deep_agent: run_single_agent_stream_start trace=%s user=%s", trace_id or "-", user_id) llm_with_tools = llm.bind_tools(tools) messages: list[Any] = [ SystemMessage(content=system_prompt), - HumanMessage( - content=( - f"User message:\n{message}\n\n" - f"Context:\n{json.dumps({'context': model_context}, ensure_ascii=True)[:3500]}" - ) - ), + HumanMessage(content=message), ] tool_calls_count = 0 @@ -969,13 +1066,7 @@ async def _run_single_agent_stream( async def run_home(user_id: str, message: str, context: dict[str, Any]) -> str: prepared_context = await _prepare_context(message, context) - system_prompt, langfuse_prompt = get_prompt_or_fallback( - "home_system", _HOME_SYSTEM_PROMPT - ) - system_prompt += _relational_memory_injection(context) - system_prompt += _proactive_hints_injection(context) - system_prompt += _datetime_context_injection(context) - system_prompt += _language_instruction(context) + system_prompt, langfuse_prompt = _build_system_prompt("home_system", _HOME_SYSTEM_PROMPT, prepared_context) response = await _run_single_agent( user_id=user_id, system_prompt=system_prompt, @@ -990,13 +1081,7 @@ async def run_home(user_id: str, message: str, context: dict[str, Any]) -> str: async def run_floating(user_id: str, message: str, context: dict[str, Any]) -> tuple[str, dict[str, str | None]]: prepared_context = await _prepare_context(message, context) domain = await _infer_floating_domain(message, prepared_context) - system_prompt, langfuse_prompt = get_prompt_or_fallback( - "floating_system", _FLOATING_SYSTEM_PROMPT - ) - system_prompt += _relational_memory_injection(context) - system_prompt += _proactive_hints_injection(context) - system_prompt += _datetime_context_injection(context) - system_prompt += _language_instruction(context) + system_prompt, langfuse_prompt = _build_system_prompt("floating_system", _FLOATING_SYSTEM_PROMPT, prepared_context) response = await _run_single_agent( user_id=user_id, system_prompt=system_prompt, @@ -1017,13 +1102,7 @@ async def run_home_stream( context: dict[str, Any], ) -> AsyncGenerator[tuple[str, Any], None]: prepared_context = await _prepare_context(message, context) - system_prompt, langfuse_prompt = get_prompt_or_fallback( - "home_system", _HOME_SYSTEM_PROMPT - ) - system_prompt += _relational_memory_injection(context) - system_prompt += _proactive_hints_injection(context) - system_prompt += _datetime_context_injection(context) - system_prompt += _language_instruction(context) + system_prompt, langfuse_prompt = _build_system_prompt("home_system", _HOME_SYSTEM_PROMPT, prepared_context) text_chunks: list[str] = [] async for event in _run_single_agent_stream( user_id=user_id, @@ -1053,13 +1132,7 @@ async def run_floating_stream( domain = await _infer_floating_domain(message, prepared_context) yield "floating_domain", domain - system_prompt, langfuse_prompt = get_prompt_or_fallback( - "floating_system", _FLOATING_SYSTEM_PROMPT - ) - system_prompt += _relational_memory_injection(context) - system_prompt += _proactive_hints_injection(context) - system_prompt += _datetime_context_injection(context) - system_prompt += _language_instruction(context) + system_prompt, langfuse_prompt = _build_system_prompt("floating_system", _FLOATING_SYSTEM_PROMPT, prepared_context) sanitizer = _FloatingStreamSanitizer() emitted_sanitized = False raw_chunks: list[str] = [] diff --git a/tests/test_deep_agent.py b/tests/test_deep_agent.py index 5fce456..231ce0d 100644 --- a/tests/test_deep_agent.py +++ b/tests/test_deep_agent.py @@ -10,8 +10,11 @@ import pytest from langchain_core.messages import AIMessage, ToolMessage from app.core.deep_agent import ( + _build_system_prompt, + _datetime_context_injection, _infer_floating_domain, _normalize_tagged_list_lines, + _request_context_block, run_floating, run_floating_stream, run_home, @@ -91,8 +94,12 @@ async def test_run_floating_stream_emits_domain_then_tokens_with_mocked_tool_res "floating_domain", {"type": "timeline", "id": "tl-1", "section": None}, ) - assert ("token", "stream-") in events - assert ("token", "ok") in events + # _run_single_agent_stream uses ainvoke (not astream); the final token is + # the second LLM response which echoes the tool result. + token_events = [e for e in events if e[0] == "token"] + assert token_events, "Expected at least one token event" + combined = "".join(str(e[1]) for e in token_events) + assert "Mock Task" in combined @pytest.mark.asyncio @@ -286,3 +293,213 @@ async def test_run_floating_stream_returns_fallback_when_sanitization_would_empt events.append(event) assert ("token", "No results found.") in events + + +# ── _datetime_context_injection ──────────────────────────────────────────────── + +def _fp(tz: str, now_iso: str) -> dict: + return {"timezone": tz, "now_iso": now_iso, "date_format": "dd/MM/yyyy", "time_format": "24h"} + + +def _parse_ms(block: str, key: str) -> tuple[int, int]: + """Extract [start, end] from a 'key [start, end]' line in the DATE CONTEXT block.""" + import re + m = re.search(rf"^{key}\s+\[(\d+),\s*(\d+)\]", block, re.MULTILINE) + assert m, f"Key '{key}' not found in block:\n{block}" + return int(m.group(1)), int(m.group(2)) + + +def test_datetime_context_injection_europe_rome_late_evening(): + """22:16 CEST on 2026-04-26 — 'tomorrow' must be 2026-04-27 00:00→23:59:59.999 CEST.""" + from zoneinfo import ZoneInfo + from datetime import datetime, timezone + + block = _datetime_context_injection({"format_prefs": _fp("Europe/Rome", "2026-04-26T20:16:02.155Z")}) + assert "DATE CONTEXT" in block + assert "Europe/Rome" in block + + tz = ZoneInfo("Europe/Rome") + today_start = int(datetime(2026, 4, 26, tzinfo=tz).timestamp() * 1000) + today_end = int(datetime(2026, 4, 27, tzinfo=tz).timestamp() * 1000) - 1 + tomorrow_start = today_end + 1 + tomorrow_end = int(datetime(2026, 4, 28, tzinfo=tz).timestamp() * 1000) - 1 + + t_s, t_e = _parse_ms(block, "today") + assert t_s == today_start + assert t_e == today_end + + tm_s, tm_e = _parse_ms(block, "tomorrow") + assert tm_s == tomorrow_start + assert tm_e == tomorrow_end + + # Sanity: window is exactly 86 400 000 ms (1 day, CEST has no DST jump on this date) + assert today_end - today_start + 1 == 86_400_000 + assert tomorrow_end - tomorrow_start + 1 == 86_400_000 + + +def test_datetime_context_injection_utc(): + """UTC timezone: boundaries are clean UTC midnights.""" + from datetime import datetime, timezone + + block = _datetime_context_injection({"format_prefs": _fp("UTC", "2026-01-15T10:00:00Z")}) + t_s, t_e = _parse_ms(block, "today") + expected_start = int(datetime(2026, 1, 15, tzinfo=timezone.utc).timestamp() * 1000) + assert t_s == expected_start + assert t_e == expected_start + 86_400_000 - 1 + + +def test_datetime_context_injection_dst_spring_forward(): + """Europe/Rome DST spring-forward 2026-03-29: that day is 23h, not 24h.""" + from zoneinfo import ZoneInfo + from datetime import datetime + + block = _datetime_context_injection({"format_prefs": _fp("Europe/Rome", "2026-03-29T08:00:00Z")}) + tz = ZoneInfo("Europe/Rome") + day_start = int(datetime(2026, 3, 29, tzinfo=tz).timestamp() * 1000) + day_end = int(datetime(2026, 3, 30, tzinfo=tz).timestamp() * 1000) - 1 + + t_s, t_e = _parse_ms(block, "today") + assert t_s == day_start + assert t_e == day_end + assert t_e - t_s + 1 == 23 * 3_600_000 # 23-hour day + + +def test_datetime_context_injection_dst_fall_back(): + """Europe/Rome DST fall-back 2026-10-25: that day is 25h.""" + from zoneinfo import ZoneInfo + from datetime import datetime + + block = _datetime_context_injection({"format_prefs": _fp("Europe/Rome", "2026-10-25T08:00:00Z")}) + tz = ZoneInfo("Europe/Rome") + day_start = int(datetime(2026, 10, 25, tzinfo=tz).timestamp() * 1000) + day_end = int(datetime(2026, 10, 26, tzinfo=tz).timestamp() * 1000) - 1 + + t_s, t_e = _parse_ms(block, "today") + assert t_s == day_start + assert t_e == day_end + assert t_e - t_s + 1 == 25 * 3_600_000 # 25-hour day + + +def test_datetime_context_injection_year_boundary(): + """Dec 31 → Jan 1: last_year, this_year, next_month cross year boundary correctly.""" + from zoneinfo import ZoneInfo + from datetime import datetime + + block = _datetime_context_injection({"format_prefs": _fp("UTC", "2026-12-31T23:00:00Z")}) + tz = ZoneInfo("UTC") + + yr_s, yr_e = _parse_ms(block, "this_year") + assert yr_s == int(datetime(2026, 1, 1, tzinfo=tz).timestamp() * 1000) + assert yr_e == int(datetime(2027, 1, 1, tzinfo=tz).timestamp() * 1000) - 1 + + ly_s, ly_e = _parse_ms(block, "last_year") + assert ly_s == int(datetime(2025, 1, 1, tzinfo=tz).timestamp() * 1000) + assert ly_e == yr_s - 1 + + nm_s, _ = _parse_ms(block, "next_month") + assert nm_s == int(datetime(2027, 1, 1, tzinfo=tz).timestamp() * 1000) + + +def test_datetime_context_injection_missing_format_prefs(): + assert _datetime_context_injection({}) == "" + assert _datetime_context_injection({"format_prefs": None}) == "" + assert _datetime_context_injection({"format_prefs": "bad"}) == "" + + +# ── _request_context_block ───────────────────────────────────────────────────── + +def test_request_context_block_scope_and_project(): + ctx = {"scope": {"type": "task", "id": "t-1"}, "resolved_project_id": "proj-uuid"} + block = _request_context_block(ctx) + assert "scope" in block + assert "resolved_project_id: proj-uuid" in block + + +def test_request_context_block_empty(): + assert _request_context_block({}) == "" + assert _request_context_block({"scope": None}) == "" + + +# ── _build_system_prompt ─────────────────────────────────────────────────────── + +def test_build_system_prompt_substitutes_all_slots(monkeypatch): + """All five slots must appear in the compiled output; no raw placeholder remains.""" + # Patch get_prompt_or_fallback to return None prompt_obj so we use fallback .format() path + import app.core.deep_agent as da + monkeypatch.setattr(da, "get_prompt_or_fallback", lambda name, fallback: (fallback, None)) + + ctx = { + "format_prefs": _fp("Europe/Rome", "2026-04-26T20:16:02.155Z"), + "core_memory": {"language": "it"}, + "relational_memory": ["Alice — client"], + "proactive_hints": ["User prefers morning meetings"], + "scope": {"type": "task"}, + "resolved_project_id": "proj-1", + } + from app.core.deep_agent import _HOME_SYSTEM_PROMPT + text, _ = _build_system_prompt("home_system", _HOME_SYSTEM_PROMPT, ctx) + + # No unresolved placeholders + assert "{date_context}" not in text + assert "{language_instruction}" not in text + assert "{relational_memory}" not in text + assert "{proactive_hints}" not in text + assert "{request_context}" not in text + + # Content was injected + assert "DATE CONTEXT" in text + assert "Italian" in text + assert "Alice" in text + assert "morning meetings" in text + assert "proj-1" in text + + +def test_build_system_prompt_empty_format_prefs(monkeypatch): + """Missing format_prefs must not raise — date_context slot renders empty string.""" + import app.core.deep_agent as da + monkeypatch.setattr(da, "get_prompt_or_fallback", lambda name, fallback: (fallback, None)) + + from app.core.deep_agent import _HOME_SYSTEM_PROMPT + text, _ = _build_system_prompt("home_system", _HOME_SYSTEM_PROMPT, {}) + # Prompt renders without error; date section is empty but structure holds + assert "# Date filtering" in text + assert "{date_context}" not in text + + +def test_human_message_is_bare_message(monkeypatch): + """After the refactor HumanMessage content must equal the raw user message exactly.""" + import app.core.deep_agent as da + from langchain_core.messages import HumanMessage as LCHumanMessage + + captured: list[list] = [] + + class _CaptureLLM: + def bind_tools(self, _): + return self + + async def ainvoke(self, messages): + captured.append(list(messages)) + return AIMessage(content="risposta") + + monkeypatch.setattr(da, "get_prompt_or_fallback", lambda n, f: (f, None)) + monkeypatch.setattr(da, "get_agent_llm", lambda _: _CaptureLLM()) + monkeypatch.setattr(da, "_all_tools_for_user", lambda *_: []) + monkeypatch.setattr(da, "get_langfuse", lambda: None) + monkeypatch.setattr(da, "set_tool_result_collector", lambda _: None) + monkeypatch.setattr(da, "clear_tool_result_collector", lambda: None) + + import asyncio + + async def _run(): + chunks = [] + ctx = {"format_prefs": _fp("UTC", "2026-04-27T10:00:00Z")} + async for ev in da.run_home_stream("u1", "Cosa devo fare domani?", ctx): + chunks.append(ev) + + asyncio.get_event_loop().run_until_complete(_run()) + + assert captured, "LLM was never called" + messages = captured[0] + human = next(m for m in messages if isinstance(m, LCHumanMessage)) + assert human.content == "Cosa devo fare domani?" + assert "Context:" not in human.content