From 5a03bd1cfb80b4abf7e521ce19ee894be36a6a97 Mon Sep 17 00:00:00 2001
From: Roberto Musso <roberto.musso@hpe.com>
Date: Tue, 17 Mar 2026 23:52:54 +0100
Subject: [PATCH] Clean up agent catalog and improve extraction agent prompts

- Remove unused config_schema from AgentCatalogItem (schema + route)
- Fix agent_setup system prompt: add extraction agent base behaviour
  context so journey LLM knows what is already handled and focuses on
  field mappings only; remove redundant data-types question (already
  known from user selection); derive data types list dynamically
- Rewrite processing base prompt to use actual tool names
  (list_tasks, update_task, add_task_comment, list_notes, update_note,
  list_timelines, update_timeline, list_all_projects, create_project)
  and enforce update-first strategy before falling back to creation

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 app/api/routes/agent_setup.py | 15 ++++++++---
 app/api/routes/agents.py      | 18 --------------
 app/core/agent_runner.py      | 47 +++++++++++++++++++++++++++--------
 app/schemas.py                |  1 -
 4 files changed, 47 insertions(+), 34 deletions(-)

diff --git a/app/api/routes/agent_setup.py b/app/api/routes/agent_setup.py
index d5bae95..a551f8a 100644
--- a/app/api/routes/agent_setup.py
+++ b/app/api/routes/agent_setup.py
@@ -89,6 +89,14 @@ Your job is to understand exactly what data the user wants to extract from their
 local directory and produce a detailed prompt_template that a separate AI will use
 as its instruction set.
 
+The extraction agent already has this base behaviour built in:
+  - Reads each file using file-system tools.
+  - Creates records (tasks, notes, timelines, projects) via CRUD tools.
+  - Sets isAiSuggested=1 and isApproved=0 on every record.
+  - Only extracts data explicitly present in the files — it never invents information.
+The user's custom prompt is appended AFTER this base behaviour, so focus on
+what to look for and how to map it — not on the general extraction mechanics.
+
 You have access to file-system tools to explore the user's directory:
 - list_directory: to see folder structure
 - read_file_content: to peek at file contents
@@ -100,10 +108,9 @@ Target data types: {data_types}
 Start by exploring the directory to understand its structure.  Then ask concise,
 focused questions one at a time.  Cover these topics (not necessarily in this order):
   1. The type and format of the source content (confirmed by your exploration).
-  2. Which data types to extract: tasks, notes, timelines, and/or projects.
-  3. How fields should be mapped (e.g. filename → task title).
-  4. Priority or status rules (e.g. "urgent" keyword → high priority).
-  5. Any special handling, date extraction, or exclusions.
+  2. How fields should be mapped (e.g. filename → task title).
+  3. Priority or status rules (e.g. "urgent" keyword → high priority).
+  4. Any special handling, date extraction, or exclusions.
 
 After 3-5 questions (when you have enough information), output the final prompt_template
 between these exact markers on their own lines:
diff --git a/app/api/routes/agents.py b/app/api/routes/agents.py
index 4b016ed..65844de 100644
--- a/app/api/routes/agents.py
+++ b/app/api/routes/agents.py
@@ -121,24 +121,6 @@ async def get_agent_catalog(
             type="local_directory",
             name="Local Directory Monitor",
             description="Watches local directories, extracts data from files using AI",
-            config_schema={
-                "directory": {"type": "string", "required": True},
-                "what_to_extract": {
-                    "type": "array",
-                    "items": ["task", "note", "timeline", "project"],
-                    "required": True,
-                },
-                "actions_by_type": {
-                    "type": "object",
-                    "example": {
-                        "task": ["add", "update"],
-                        "note": ["add", "update"],
-                    },
-                    "required": False,
-                },
-                "batch_interval": {"type": "string", "required": True},
-                "custom_agent_prompt": {"type": "string", "required": True},
-            },
         ),
         AgentCatalogItem(
             type="gmail",
diff --git a/app/core/agent_runner.py b/app/core/agent_runner.py
index c4c420b..aaa8aef 100644
--- a/app/core/agent_runner.py
+++ b/app/core/agent_runner.py
@@ -107,18 +107,42 @@ Return ONLY the JSON object as your final message.
 
 _PROCESSING_BASE_PROMPT = """\
 You are a data extraction and management assistant for a freelance project
-management tool.  You have access to tools for reading files and performing
-CRUD operations on the user's workspace.
+management tool.
+
+Available tools:
+  Filesystem : read_file_content, list_directory, get_file_metadata
+  Tasks      : list_tasks, create_task, update_task, add_task_comment
+  Notes      : list_notes, get_note, create_note, update_note
+  Timelines  : list_timelines, create_timeline, update_timeline
+  Projects   : list_all_projects, get_project, create_project, update_project
 
 Your task:
-1. Read the full content of each file listed below using read_file_content.
-2. Based on the content and the user's instructions, create the appropriate
-   records using the CRUD tools available to you (create_task, create_note,
-   create_timeline, create_project, etc.).
-3. ONLY create records of these entity types: {data_types}.
-4. For every record you create, set isAiSuggested=1 and isApproved=0.
-5. Do NOT invent data.  Only extract what is clearly present in the files.
-6. If a file contains no relevant data for the target entity types, skip it.
+1. Read the full content of each file below using read_file_content.
+2. For each piece of information found, ALWAYS try to match and update an
+   existing record before creating a new one.
+3. ONLY act on these entity types: {data_types}.
+4. Do NOT invent data. Only extract what is clearly present in the files.
+5. If a file contains no relevant data for the target entity types, skip it.
+
+Update-first rules (apply in this order):
+  Tasks:
+    - Call list_tasks to find a match by title or context.
+    - If found: call add_task_comment (author "Adiuva"), update_task to set
+      assignees, state (ToDo / In Progress / Completed), or other fields.
+    - If NOT found: call create_task with isAiSuggested=1, isApproved=0.
+  Timelines:
+    - Call list_timelines to find a match by title or date.
+    - If found: call update_timeline to edit fields or mark it complete.
+    - If NOT found: call create_timeline with isAiSuggested=1, isApproved=0.
+  Notes:
+    - Call list_notes to find a match by title or topic, then get_note to
+      read its current content.
+    - If found: call update_note with the merged content.
+    - If NOT found: call create_note with isAiSuggested=1, isApproved=0.
+  Projects:
+    - Call list_all_projects to check for a match first.
+    - Only call create_project if the information is clearly significant and
+      no existing project matches. Set isAiSuggested=1, isApproved=0.
 
 {project_context}
 
@@ -127,7 +151,8 @@ Files to process:
 
 {custom_prompt_section}
 
-After processing all files, respond with a brief summary of what you created.
+After processing all files, respond with a brief summary of what you updated
+and what you created.
 """
 
 
diff --git a/app/schemas.py b/app/schemas.py
index 73eb2ee..e4399ec 100644
--- a/app/schemas.py
+++ b/app/schemas.py
@@ -279,7 +279,6 @@ class AgentCatalogItem(BaseModel):
     type: str
     name: str
     description: str
-    config_schema: dict[str, Any] = Field(default_factory=dict)
 
 
 class AgentCreationCheckRequest(BaseModel):