Fix project creation: code-based in runner, not delegated to Step 2 LLM

Root causes fixed: 1. PROJECT_TOOLS removed from Step 2 tool set — project assignment is now exclusively handled by the runner in code, never by the LLM. 2. When Step 1 returns "new", runner calls execute_on_client insert/projects directly (before Step 2), gets the created id, and passes it as context. 3. Newly created projects are appended to the local `projects` list so that subsequent files in the same run can match to them via Step 1 — prevents one project per file when multiple files share the same topic. Also add tests/test_classify_file.py with pytest cases for _classify_file and a CLI runner: python -m tests.test_classify_file <file> [project...] Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Exclude project/projectId questions from agent setup journey
2026-03-21 23:40:38 +01:00 · 2026-03-21 22:58:05 +01:00 · 2026-03-21 22:54:34 +01:00
3 changed files with 248 additions and 34 deletions
--- a/app/api/routes/agent_setup.py
+++ b/app/api/routes/agent_setup.py
@@ -43,8 +43,10 @@ _SESSION_TTL_SECONDS: int = 1800  # 30 minutes
 _TEMPLATE_START = "PROMPT_TEMPLATE_START"
 _TEMPLATE_END = "PROMPT_TEMPLATE_END"

-# Maximum number of conversation turns before the LLM is nudged to wrap up.
-_MAX_TURNS: int = 5
+# Minimum turns before we consider nudging the LLM to wrap up.
+_MIN_TURNS_BEFORE_NUDGE: int = 3
+# Hard cap to avoid infinite loops (safety net, not the primary stopping criterion).
+_MAX_TURNS: int = 15
 # Max tool-calling steps per LLM invocation.
 _MAX_TOOL_STEPS: int = 6

@@ -92,7 +94,7 @@ as its instruction set.
 The extraction agent already has this base behaviour built in:
  - Reads each file using file-system tools.
  - Creates records (tasks, notes, timelines, projects) via CRUD tools.
-  - Sets isAiSuggested=1 and isApproved=0 on every record.
+  - Sets isAiSuggested=1 on every new record.
  - Only extracts data explicitly present in the files — it never invents information.
 The user's custom prompt is appended AFTER this base behaviour, so focus on
 what to look for and how to map it — not on the general extraction mechanics.
@@ -105,6 +107,11 @@ You have access to file-system tools to explore the user's directory:
 The user's configured directory is: {directory}
 Target data types: {data_types}

+IMPORTANT — project assignment is handled automatically by the main agent runner
+before the custom prompt is ever used.  You MUST NOT ask the user about projects,
+projectId, or how to link records to projects.  Never include projectId logic or
+project creation instructions in the generated prompt_template.
+
 Start by exploring the directory to understand its structure.  Then ask concise,
 focused questions one at a time.  Cover these topics (not necessarily in this order):
  1. The type and format of the source content (confirmed by your exploration).
@@ -112,8 +119,8 @@ focused questions one at a time.  Cover these topics (not necessarily in this or
  3. Priority or status rules (e.g. "urgent" keyword → high priority).
  4. Any special handling, date extraction, or exclusions.

-After 3-5 questions (when you have enough information), output the final prompt_template
-between these exact markers on their own lines:
+Once you reach 90% confidence, output the final prompt_template between these exact
+markers on their own lines:

 {template_start}
 <the complete extraction prompt here>
@@ -121,15 +128,17 @@ between these exact markers on their own lines:

 The prompt_template must be a self-contained instruction for an AI that reads files
 and must perform CRUD operations using tools to create records.  It should specify:
-  - What entity types to create (tasks, notes, timelines, projects).
+  - What entity types to create (tasks, notes, timelines) — never projects.
  - How to map file content to record fields (camelCase: title, status, priority,
-    dueDate, projectId, content, etc.).
-  - That isAiSuggested must be set to 1 and isApproved to 0 on every record.
+    dueDate, content, etc.) — never include projectId.
+  - That isAiSuggested must be set to 1 on every new record.
  - Concrete examples of mappings based on what you discovered in the directory.

 {existing_section}\
-Do not ask more than {max_turns} questions total.  Begin by exploring the directory,
-then ask your first question.\
+Keep asking clarifying questions until you are at least 90% confident you have
+enough information to generate an accurate prompt_template.  Once you reach that
+confidence level, stop asking and produce the final template immediately.
+Begin by exploring the directory, then ask your first question.\
 """


@@ -150,7 +159,6 @@ def _build_system_prompt(
        template_start=_TEMPLATE_START,
        template_end=_TEMPLATE_END,
        existing_section=existing_section,
-        max_turns=_MAX_TURNS,
    )


@@ -356,8 +364,8 @@ async def handle_journey_message(
    prompt_template = _extract_template(ai_reply)
    done = prompt_template is not None

-    # If the LLM didn't produce a template but we've hit max turns, nudge it
-    # and call the LLM one more time to force template generation.
+    # If the LLM didn't produce a template, nudge it once it has asked enough
+    # questions (>= _MIN_TURNS_BEFORE_NUDGE) or hits the hard safety cap.
    if not done:
        turns = sum(1 for t in session.history if t["role"] == "user")
        if turns >= _MAX_TURNS:
--- a/app/core/agent_runner.py
+++ b/app/core/agent_runner.py
@@ -70,10 +70,11 @@ _MAX_PROCESSING_STEPS: int = 12
 _MAX_SCAN_DEPTH: int = 5

 # ── Data-type to tool mapping ─────────────────────────────────────────────
+# NOTE: "projects" is intentionally excluded — project creation/assignment is
+# handled in code by the runner, never delegated to the Step 2 LLM.

 _DATA_TYPE_TOOLS: dict[str, list[Any]] = {
    "tasks": TASK_TOOLS,
-    "projects": PROJECT_TOOLS,
    "notes": NOTE_TOOLS,
    "timelines": TIMELINE_TOOLS,
 }
@@ -663,10 +664,47 @@ async def run_local_agent(
                    domains,
                )

-                # Step 2 — fetch existing entities for this project + domains.
-                # When project_id is "new", entities are fetched without a project
-                # filter; the LLM will create the project and link records to it.
-                effective_project_id = project_id if project_id != "new" else "standalone"
+                # Step 2 — resolve project_id via CODE, then fetch entities.
+                # Project creation is NEVER delegated to the Step 2 LLM.
+                if project_id == "new":
+                    proj_name = new_project_name or "Untitled Project"
+                    try:
+                        proj_result = await execute_on_client(
+                            action="insert",
+                            table="projects",
+                            data={"name": proj_name, "clientId": None},
+                        )
+                        created = proj_result.get("row", {})
+                        effective_project_id = created.get("id", "standalone")
+                        # Add to local list so subsequent files can match it.
+                        if "id" in created:
+                            projects.append(created)
+                        logger.info(
+                            "agent_runner: run=%s created project %r id=%s",
+                            run_id, proj_name, effective_project_id,
+                        )
+                    except Exception as exc:
+                        logger.warning(
+                            "agent_runner: run=%s failed to create project %r: %s",
+                            run_id, proj_name, exc,
+                        )
+                        effective_project_id = "standalone"
+                        proj_name = "unknown"
+                    project_context = (
+                        f"Project: {proj_name} (id: {effective_project_id}). "
+                        "Always set projectId to this id on every record you create."
+                    )
+                else:
+                    effective_project_id = project_id
+                    proj = next((p for p in projects if p["id"] == project_id), None)
+                    proj_name = proj.get("name", project_id) if proj else project_id
+                    project_context = (
+                        f"Project: {proj_name} (id: {project_id}). "
+                        "Always set projectId to this id on every record you create."
+                    )
+
+                # "projects" domain is never passed to Step 2 — handled above in code.
+                domains = [d for d in domains if d != "projects"]

                existing_blocks: list[str] = []
                for domain in domains:
@@ -675,22 +713,6 @@ async def run_local_agent(

                existing_context = "\n\n".join(existing_blocks)

-                if project_id == "new":
-                    name_hint = f' Use "{new_project_name}" as the project name.' if new_project_name else ""
-                    project_context = (
-                        f"No existing project matches this file. "
-                        f"Create a new project first using the create_project tool, "
-                        f"then link all extracted records to its id.{name_hint}"
-                    )
-                    # Ensure the LLM has the project tools available.
-                    if "projects" not in domains:
-                        domains = ["projects"] + domains
-                else:
-                    project_context = (
-                        f"This file belongs to project ID: {project_id}. "
-                        "Use this project_id when creating records."
-                    )
-
                system_prompt = _PROCESSING_SYSTEM_PROMPT.format(
                    existing_context=existing_context,
                    project_context=project_context,
--- a/tests/test_classify_file.py
+++ b/tests/test_classify_file.py
@@ -0,0 +1,184 @@
+"""Unit tests for Step 1 file classification (_classify_file).
+
+These tests call the real LLM so they require OPENAI_API_KEY / LLM env vars.
+Run with: pytest tests/test_classify_file.py -v
+
+To run a quick manual check against a real file without the full UI:
+    python -m tests.test_classify_file <path/to/file.txt> [project_name...]
+"""
+
+from __future__ import annotations
+
+import asyncio
+import sys
+
+import pytest
+
+from app.core.agent_runner import _classify_file
+
+
+# ── Fixtures ──────────────────────────────────────────────────────────────
+
+PROJECTS_SAMPLE = [
+    {
+        "id": "aaaa-0001-0000-0000-000000000001",
+        "name": "ARPA Sicilia POC",
+        "status": "active",
+        "aiSummary": "Proof of concept for AI features targeting ARPA Sicilia agency.",
+    },
+    {
+        "id": "bbbb-0002-0000-0000-000000000002",
+        "name": "SNAM AI Meeting Prep",
+        "status": "active",
+        "aiSummary": "AI-assisted preparation of meeting materials for SNAM.",
+    },
+    {
+        "id": "cccc-0003-0000-0000-000000000003",
+        "name": "SFERA+ Wave 2",
+        "status": "active",
+        "aiSummary": "Second wave of the SFERA+ whitelist project.",
+    },
+]
+
+ARPA_EMAIL = """\
+to: roberto.musso@hpe.com; luca.tondin@hpecds.com
+isImportance: normal
+hasAttachment: True
+---
+## Body
+Buongiorno,
+
+In riferimento alla riunione di ieri sul POC ARPA Sicilia, vi invio il riassunto
+dei deliverable concordati:
+- Preparare demo entro il 30 marzo
+- Condividere documentazione tecnica con il team ARPA
+- Fissare call di follow-up la prossima settimana
+
+Cordiali saluti
+Roberto Marchetti
+"""
+
+SNAM_EMAIL = """\
+to: roberto.musso@hpe.com
+isImportance: high
+hasAttachment: False
+---
+## Body
+Ciao,
+ti invio l'agenda per la riunione SNAM di domani.
+Per favore conferma la tua presenza.
+"""
+
+UNRELATED_EMAIL = """\
+to: roberto.musso@hpe.com
+isImportance: normal
+---
+## Body
+Benvenuto nel programma HPE Employee Learning Series.
+Completa la formazione richiesta entro la fine del trimestre.
+"""
+
+
+# ── Tests ─────────────────────────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_classify_arpa_matches_existing():
+    project_id, domains, new_name = await _classify_file(
+        file_path="arpa_email.txt",
+        file_content=ARPA_EMAIL,
+        projects=PROJECTS_SAMPLE,
+        config_data_types=["tasks", "notes", "timelines"],
+    )
+    assert project_id == "aaaa-0001-0000-0000-000000000001", (
+        f"Expected ARPA project, got project_id={project_id!r} new_name={new_name!r}"
+    )
+    assert new_name is None
+
+
+@pytest.mark.asyncio
+async def test_classify_snam_matches_existing():
+    project_id, domains, new_name = await _classify_file(
+        file_path="snam_email.txt",
+        file_content=SNAM_EMAIL,
+        projects=PROJECTS_SAMPLE,
+        config_data_types=["tasks", "notes"],
+    )
+    assert project_id == "bbbb-0002-0000-0000-000000000002", (
+        f"Expected SNAM project, got project_id={project_id!r} new_name={new_name!r}"
+    )
+
+
+@pytest.mark.asyncio
+async def test_classify_unrelated_returns_new():
+    project_id, domains, new_name = await _classify_file(
+        file_path="learning_email.txt",
+        file_content=UNRELATED_EMAIL,
+        projects=PROJECTS_SAMPLE,
+        config_data_types=["tasks", "notes"],
+    )
+    assert project_id == "new"
+    assert new_name is not None  # LLM should suggest a name
+
+
+@pytest.mark.asyncio
+async def test_classify_empty_file_returns_new():
+    project_id, domains, new_name = await _classify_file(
+        file_path="empty.txt",
+        file_content="   ",
+        projects=PROJECTS_SAMPLE,
+        config_data_types=["tasks"],
+    )
+    assert project_id == "new"
+
+
+@pytest.mark.asyncio
+async def test_classify_no_projects_returns_new():
+    project_id, domains, new_name = await _classify_file(
+        file_path="arpa_email.txt",
+        file_content=ARPA_EMAIL,
+        projects=[],
+        config_data_types=["tasks", "notes"],
+    )
+    assert project_id == "new"
+    assert new_name is not None
+
+
+# ── CLI quick-test runner ─────────────────────────────────────────────────
+
+
+async def _cli_test(file_path: str, project_names: list[str]) -> None:
+    """Run Step 1 classification against a real file from the CLI."""
+    import json
+    from pathlib import Path
+
+    content = Path(file_path).read_text(encoding="utf-8", errors="replace")
+    projects = [
+        {"id": f"test-id-{i:04d}", "name": name, "status": "active", "aiSummary": ""}
+        for i, name in enumerate(project_names)
+    ]
+
+    print(f"\nClassifying: {file_path}")
+    print(f"Projects in context: {[p['name'] for p in projects]}\n")
+
+    project_id, domains, new_name = await _classify_file(
+        file_path=file_path,
+        file_content=content,
+        projects=projects,
+        config_data_types=["tasks", "notes", "timelines"],
+    )
+
+    result = {
+        "project_id": project_id,
+        "matched_name": next((p["name"] for p in projects if p["id"] == project_id), None),
+        "new_project_name": new_name,
+        "domains": domains,
+    }
+    print(json.dumps(result, indent=2, ensure_ascii=False))
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python -m tests.test_classify_file <file_path> [project_name ...]")
+        sys.exit(1)
+    asyncio.run(_cli_test(sys.argv[1], sys.argv[2:]))