From e7cdce82875ba44da250377c3959a8d2c567860d Mon Sep 17 00:00:00 2001
From: Roberto Musso <roberto.musso@hpe.com>
Date: Fri, 20 Mar 2026 23:45:29 +0100
Subject: [PATCH] Improve Step 1 project matching and Step 2 update-first
 enforcement
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Rewrite _STEP1_SYSTEM_PROMPT: lower matching threshold (no longer requires
  "clear" match), strongly prefer existing projects over creating new ones,
  use structured id=|name=|status= format with aiSummary for richer context
- Add code-level UUID validation: reject hallucinated ids not in the fetched
  projects list, fall back to "new" instead of creating a bad link
- Rewrite _PROCESSING_SYSTEM_PROMPT: enforce explicit scan-before-create
  process (read existing → search → update if found → create only if not)
  with hard rule against calling create_* without checking existing records

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 app/core/agent_runner.py | 113 ++++++++++++++++++++++++++-------------
 1 file changed, 77 insertions(+), 36 deletions(-)
diff --git a/app/core/agent_runner.py b/app/core/agent_runner.py
index 7292848..0f1478e 100644
--- a/app/core/agent_runner.py
+++ b/app/core/agent_runner.py
@@ -102,19 +102,29 @@ _DOMAIN_DESCRIPTIONS: dict[str, str] = {
 _STEP1_SYSTEM_PROMPT = """\
 You are a file classifier for a freelance project management tool.
 
-Given a file's content and a list of existing projects, your job is to:
-1. Identify which project this file belongs to (or "standalone" if none match).
-2. Identify which data domains are relevant to extract from this file,
-   limited to the allowed domains listed below.
+Your job is to match a file to an existing project and identify which data domains to extract.
 
-Domain definitions (only consider domains in the allowed list):
-{domain_definitions}
+## Project matching rules (STRICT — follow in order)
+
+1. Search the file content for any mention of a project name, client name, acronym, or topic
+   that overlaps with the existing projects listed below.
+2. The match does NOT need to be exact — partial name, abbreviation, or topic similarity is enough.
+3. STRONGLY PREFER matching an existing project. Only return "new" as an absolute last resort
+   when the file has zero meaningful connection to any listed project.
+4. When in doubt, pick the closest match from the list.
+
+## Response format
 
 Respond ONLY with a JSON object — no markdown, no explanation:
 
-{{"project_id": "<uuid> or standalone", "domains": ["tasks", "notes"]}}
+{{"project_id": "<exact id from the list below, or new>", "new_project_name": "<concise 2-5 word name, only when project_id is new>", "domains": ["tasks", "notes"]}}
+
+## Domain definitions (only consider domains in the allowed list)
+
+{domain_definitions}
+
+## Existing projects
 
-Existing projects:
 {projects_list}
 """
 
@@ -123,20 +133,26 @@ Existing projects:
 _PROCESSING_SYSTEM_PROMPT = """\
 You are a data extraction assistant for a freelance project management tool.
 
-Your task is to read the file content provided and create or update records
-using the available tools.
+Your task: extract structured data from the file content and persist it using the available tools.
 
-IMPORTANT — update-first rules:
-  The existing records below are the source of truth.
-  If an existing record semantically matches the content (by title, topic,
-  or context), update it instead of creating a duplicate.
-  Only create a new record when no existing match is found.
-  Set isAiSuggested=1 on all new records.
+## Mandatory process — follow this order for EVERY item you extract
+
+1. READ the existing records listed below for the relevant domain.
+2. SEARCH for a match by title, topic, or semantic similarity.
+3. If a match exists → call the update_* tool with the existing record's id.
+4. If no match exists → call the create_* tool and set isAiSuggested=1.
+
+NEVER call create_* without first checking the existing records.
+NEVER duplicate a record that already exists under a different wording.
+
+## Existing records (source of truth)
 
 {existing_context}
 
-Project context: {project_context}
-Target domains: {data_types}
+## Context
+
+Project: {project_context}
+Domains to extract: {data_types}
 
 {custom_prompt_section}
 """
@@ -470,21 +486,27 @@ async def _classify_file(
     file_content: str,
     projects: list[dict],
     config_data_types: list[str],
-) -> tuple[str, list[str]]:
+) -> tuple[str, list[str], str | None]:
     """Call the LLM to classify a file by project and relevant domains.
 
-    Returns ``(project_id_or_"standalone", domains)``.
-    Falls back to ``("standalone", config_data_types)`` on any error.
+    Returns ``(project_id_or_"new", domains, new_project_name_or_None)``.
+    - ``project_id`` is an existing project UUID, or ``"new"`` when no match found.
+    - ``new_project_name`` is only set when ``project_id == "new"``.
+    Falls back to ``("new", config_data_types, None)`` on any error.
     """
-    fallback = ("standalone", list(config_data_types))
+    fallback: tuple[str, list[str], str | None] = ("new", list(config_data_types), None)
 
     if not file_content.strip():
         return fallback
 
-    projects_list = "\n".join(
-        f"  - {p.get('name', '')} (id: {p['id']}, status: {p.get('status', '')})"
-        for p in projects
-    ) or "  (none — all files are standalone)"
+    valid_project_ids = {p["id"] for p in projects}
+
+    def _fmt_project(p: dict) -> str:
+        summary = (p.get("aiSummary") or p.get("ai_summary") or "").strip()
+        summary_part = f" — {summary[:100]}" if summary else ""
+        return f"  - id={p['id']} | name={p.get('name', '')} | status={p.get('status', '')}{summary_part}"
+
+    projects_list = "\n".join(_fmt_project(p) for p in projects) or "  (none yet)"
 
     domain_definitions = "\n".join(
         f"  - {d}: {_DOMAIN_DESCRIPTIONS[d]}"
@@ -510,14 +532,21 @@ async def _classify_file(
             if raw.startswith("json"):
                 raw = raw[4:]
         parsed = json.loads(raw.strip())
-        project_id: str = str(parsed.get("project_id") or "standalone")
+        raw_project_id: str = str(parsed.get("project_id") or "new")
+        # Reject hallucinated UUIDs — only accept ids that exist in the fetched list.
+        project_id = raw_project_id if raw_project_id in valid_project_ids else "new"
+        new_project_name: str | None = (
+            str(parsed["new_project_name"]).strip() or None
+            if project_id == "new" and parsed.get("new_project_name")
+            else None
+        )
         domains: list[str] = [
             d for d in parsed.get("domains", [])
             if d in config_data_types
         ]
         if not domains:
             domains = list(config_data_types)
-        return project_id, domains
+        return project_id, domains, new_project_name
     except Exception as exc:
         logger.warning(
             "agent_runner: step1 classification failed for %r: %s", file_path, exc
@@ -605,9 +634,6 @@ async def run_local_agent(
         # ── Code: fetch all projects once ────────────────────────────
         projects = await _fetch_projects()
 
-        # ── Per-file processing ──────────────────────────────────────
-        processing_tools = _build_processing_tools(config.data_types)
-
         for file_path in file_paths:
             try:
                 # Read file content via code.
@@ -622,30 +648,43 @@ async def run_local_agent(
                 items_processed += 1
 
                 # Step 1 — classify file.
-                project_id, domains = await _classify_file(
+                project_id, domains, new_project_name = await _classify_file(
                     file_path=file_path,
                     file_content=file_content,
                     projects=projects,
                     config_data_types=config.data_types,
                 )
                 logger.info(
-                    "agent_runner: run=%s file=%r → project=%s domains=%s",
+                    "agent_runner: run=%s file=%r → project=%s new_name=%r domains=%s",
                     run_id,
                     file_path,
                     project_id,
+                    new_project_name,
                     domains,
                 )
 
                 # Step 2 — fetch existing entities for this project + domains.
+                # When project_id is "new", entities are fetched without a project
+                # filter; the LLM will create the project and link records to it.
+                effective_project_id = project_id if project_id != "new" else "standalone"
+
                 existing_blocks: list[str] = []
                 for domain in domains:
-                    rows = await _fetch_domain_entities(domain, project_id)
+                    rows = await _fetch_domain_entities(domain, effective_project_id)
                     existing_blocks.append(_format_entities_for_context(domain, rows))
 
                 existing_context = "\n\n".join(existing_blocks)
 
-                if project_id == "standalone":
-                    project_context = "This file is not associated with any existing project."
+                if project_id == "new":
+                    name_hint = f' Use "{new_project_name}" as the project name.' if new_project_name else ""
+                    project_context = (
+                        f"No existing project matches this file. "
+                        f"Create a new project first using the create_project tool, "
+                        f"then link all extracted records to its id.{name_hint}"
+                    )
+                    # Ensure the LLM has the project tools available.
+                    if "projects" not in domains:
+                        domains = ["projects"] + domains
                 else:
                     project_context = (
                         f"This file belongs to project ID: {project_id}. "
@@ -659,6 +698,8 @@ async def run_local_agent(
                     custom_prompt_section=custom_section,
                 )
 
+                processing_tools = _build_processing_tools(domains)
+
                 result_text = await _run_agent_with_tools(
                     system_prompt=system_prompt,
                     user_message=(