Fix project creation: code-based in runner, not delegated to Step 2 LLM

Root causes fixed:
1. PROJECT_TOOLS removed from Step 2 tool set — project assignment is now
   exclusively handled by the runner in code, never by the LLM.
2. When Step 1 returns "new", runner calls execute_on_client insert/projects
   directly (before Step 2), gets the created id, and passes it as context.
3. Newly created projects are appended to the local `projects` list so that
   subsequent files in the same run can match to them via Step 1 — prevents
   one project per file when multiple files share the same topic.

Also add tests/test_classify_file.py with pytest cases for _classify_file
and a CLI runner: python -m tests.test_classify_file <file> [project...]

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Roberto Musso
2026-03-21 23:40:38 +01:00
parent 0d93b3960d
commit 552b8eb305
2 changed files with 227 additions and 21 deletions

View File

@@ -70,10 +70,11 @@ _MAX_PROCESSING_STEPS: int = 12
_MAX_SCAN_DEPTH: int = 5
# ── Data-type to tool mapping ─────────────────────────────────────────────
# NOTE: "projects" is intentionally excluded — project creation/assignment is
# handled in code by the runner, never delegated to the Step 2 LLM.
_DATA_TYPE_TOOLS: dict[str, list[Any]] = {
"tasks": TASK_TOOLS,
"projects": PROJECT_TOOLS,
"notes": NOTE_TOOLS,
"timelines": TIMELINE_TOOLS,
}
@@ -663,10 +664,47 @@ async def run_local_agent(
domains,
)
# Step 2 — fetch existing entities for this project + domains.
# When project_id is "new", entities are fetched without a project
# filter; the LLM will create the project and link records to it.
effective_project_id = project_id if project_id != "new" else "standalone"
# Step 2 — resolve project_id via CODE, then fetch entities.
# Project creation is NEVER delegated to the Step 2 LLM.
if project_id == "new":
proj_name = new_project_name or "Untitled Project"
try:
proj_result = await execute_on_client(
action="insert",
table="projects",
data={"name": proj_name, "clientId": None},
)
created = proj_result.get("row", {})
effective_project_id = created.get("id", "standalone")
# Add to local list so subsequent files can match it.
if "id" in created:
projects.append(created)
logger.info(
"agent_runner: run=%s created project %r id=%s",
run_id, proj_name, effective_project_id,
)
except Exception as exc:
logger.warning(
"agent_runner: run=%s failed to create project %r: %s",
run_id, proj_name, exc,
)
effective_project_id = "standalone"
proj_name = "unknown"
project_context = (
f"Project: {proj_name} (id: {effective_project_id}). "
"Always set projectId to this id on every record you create."
)
else:
effective_project_id = project_id
proj = next((p for p in projects if p["id"] == project_id), None)
proj_name = proj.get("name", project_id) if proj else project_id
project_context = (
f"Project: {proj_name} (id: {project_id}). "
"Always set projectId to this id on every record you create."
)
# "projects" domain is never passed to Step 2 — handled above in code.
domains = [d for d in domains if d != "projects"]
existing_blocks: list[str] = []
for domain in domains:
@@ -675,22 +713,6 @@ async def run_local_agent(
existing_context = "\n\n".join(existing_blocks)
if project_id == "new":
name_hint = f' Use "{new_project_name}" as the project name.' if new_project_name else ""
project_context = (
f"No existing project matches this file. "
f"Create a new project first using the create_project tool, "
f"then link all extracted records to its id.{name_hint}"
)
# Ensure the LLM has the project tools available.
if "projects" not in domains:
domains = ["projects"] + domains
else:
project_context = (
f"This file belongs to project ID: {project_id}. "
"Use this project_id when creating records."
)
system_prompt = _PROCESSING_SYSTEM_PROMPT.format(
existing_context=existing_context,
project_context=project_context,