diff --git a/app/core/agent_runner.py b/app/core/agent_runner.py index 0f1478e..c11324e 100644 --- a/app/core/agent_runner.py +++ b/app/core/agent_runner.py @@ -70,10 +70,11 @@ _MAX_PROCESSING_STEPS: int = 12 _MAX_SCAN_DEPTH: int = 5 # ── Data-type to tool mapping ───────────────────────────────────────────── +# NOTE: "projects" is intentionally excluded — project creation/assignment is +# handled in code by the runner, never delegated to the Step 2 LLM. _DATA_TYPE_TOOLS: dict[str, list[Any]] = { "tasks": TASK_TOOLS, - "projects": PROJECT_TOOLS, "notes": NOTE_TOOLS, "timelines": TIMELINE_TOOLS, } @@ -663,10 +664,47 @@ async def run_local_agent( domains, ) - # Step 2 — fetch existing entities for this project + domains. - # When project_id is "new", entities are fetched without a project - # filter; the LLM will create the project and link records to it. - effective_project_id = project_id if project_id != "new" else "standalone" + # Step 2 — resolve project_id via CODE, then fetch entities. + # Project creation is NEVER delegated to the Step 2 LLM. + if project_id == "new": + proj_name = new_project_name or "Untitled Project" + try: + proj_result = await execute_on_client( + action="insert", + table="projects", + data={"name": proj_name, "clientId": None}, + ) + created = proj_result.get("row", {}) + effective_project_id = created.get("id", "standalone") + # Add to local list so subsequent files can match it. + if "id" in created: + projects.append(created) + logger.info( + "agent_runner: run=%s created project %r id=%s", + run_id, proj_name, effective_project_id, + ) + except Exception as exc: + logger.warning( + "agent_runner: run=%s failed to create project %r: %s", + run_id, proj_name, exc, + ) + effective_project_id = "standalone" + proj_name = "unknown" + project_context = ( + f"Project: {proj_name} (id: {effective_project_id}). " + "Always set projectId to this id on every record you create." + ) + else: + effective_project_id = project_id + proj = next((p for p in projects if p["id"] == project_id), None) + proj_name = proj.get("name", project_id) if proj else project_id + project_context = ( + f"Project: {proj_name} (id: {project_id}). " + "Always set projectId to this id on every record you create." + ) + + # "projects" domain is never passed to Step 2 — handled above in code. + domains = [d for d in domains if d != "projects"] existing_blocks: list[str] = [] for domain in domains: @@ -675,22 +713,6 @@ async def run_local_agent( existing_context = "\n\n".join(existing_blocks) - if project_id == "new": - name_hint = f' Use "{new_project_name}" as the project name.' if new_project_name else "" - project_context = ( - f"No existing project matches this file. " - f"Create a new project first using the create_project tool, " - f"then link all extracted records to its id.{name_hint}" - ) - # Ensure the LLM has the project tools available. - if "projects" not in domains: - domains = ["projects"] + domains - else: - project_context = ( - f"This file belongs to project ID: {project_id}. " - "Use this project_id when creating records." - ) - system_prompt = _PROCESSING_SYSTEM_PROMPT.format( existing_context=existing_context, project_context=project_context, diff --git a/tests/test_classify_file.py b/tests/test_classify_file.py new file mode 100644 index 0000000..2d16a54 --- /dev/null +++ b/tests/test_classify_file.py @@ -0,0 +1,184 @@ +"""Unit tests for Step 1 file classification (_classify_file). + +These tests call the real LLM so they require OPENAI_API_KEY / LLM env vars. +Run with: pytest tests/test_classify_file.py -v + +To run a quick manual check against a real file without the full UI: + python -m tests.test_classify_file [project_name...] +""" + +from __future__ import annotations + +import asyncio +import sys + +import pytest + +from app.core.agent_runner import _classify_file + + +# ── Fixtures ────────────────────────────────────────────────────────────── + +PROJECTS_SAMPLE = [ + { + "id": "aaaa-0001-0000-0000-000000000001", + "name": "ARPA Sicilia POC", + "status": "active", + "aiSummary": "Proof of concept for AI features targeting ARPA Sicilia agency.", + }, + { + "id": "bbbb-0002-0000-0000-000000000002", + "name": "SNAM AI Meeting Prep", + "status": "active", + "aiSummary": "AI-assisted preparation of meeting materials for SNAM.", + }, + { + "id": "cccc-0003-0000-0000-000000000003", + "name": "SFERA+ Wave 2", + "status": "active", + "aiSummary": "Second wave of the SFERA+ whitelist project.", + }, +] + +ARPA_EMAIL = """\ +to: roberto.musso@hpe.com; luca.tondin@hpecds.com +isImportance: normal +hasAttachment: True +--- +## Body +Buongiorno, + +In riferimento alla riunione di ieri sul POC ARPA Sicilia, vi invio il riassunto +dei deliverable concordati: +- Preparare demo entro il 30 marzo +- Condividere documentazione tecnica con il team ARPA +- Fissare call di follow-up la prossima settimana + +Cordiali saluti +Roberto Marchetti +""" + +SNAM_EMAIL = """\ +to: roberto.musso@hpe.com +isImportance: high +hasAttachment: False +--- +## Body +Ciao, +ti invio l'agenda per la riunione SNAM di domani. +Per favore conferma la tua presenza. +""" + +UNRELATED_EMAIL = """\ +to: roberto.musso@hpe.com +isImportance: normal +--- +## Body +Benvenuto nel programma HPE Employee Learning Series. +Completa la formazione richiesta entro la fine del trimestre. +""" + + +# ── Tests ───────────────────────────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_classify_arpa_matches_existing(): + project_id, domains, new_name = await _classify_file( + file_path="arpa_email.txt", + file_content=ARPA_EMAIL, + projects=PROJECTS_SAMPLE, + config_data_types=["tasks", "notes", "timelines"], + ) + assert project_id == "aaaa-0001-0000-0000-000000000001", ( + f"Expected ARPA project, got project_id={project_id!r} new_name={new_name!r}" + ) + assert new_name is None + + +@pytest.mark.asyncio +async def test_classify_snam_matches_existing(): + project_id, domains, new_name = await _classify_file( + file_path="snam_email.txt", + file_content=SNAM_EMAIL, + projects=PROJECTS_SAMPLE, + config_data_types=["tasks", "notes"], + ) + assert project_id == "bbbb-0002-0000-0000-000000000002", ( + f"Expected SNAM project, got project_id={project_id!r} new_name={new_name!r}" + ) + + +@pytest.mark.asyncio +async def test_classify_unrelated_returns_new(): + project_id, domains, new_name = await _classify_file( + file_path="learning_email.txt", + file_content=UNRELATED_EMAIL, + projects=PROJECTS_SAMPLE, + config_data_types=["tasks", "notes"], + ) + assert project_id == "new" + assert new_name is not None # LLM should suggest a name + + +@pytest.mark.asyncio +async def test_classify_empty_file_returns_new(): + project_id, domains, new_name = await _classify_file( + file_path="empty.txt", + file_content=" ", + projects=PROJECTS_SAMPLE, + config_data_types=["tasks"], + ) + assert project_id == "new" + + +@pytest.mark.asyncio +async def test_classify_no_projects_returns_new(): + project_id, domains, new_name = await _classify_file( + file_path="arpa_email.txt", + file_content=ARPA_EMAIL, + projects=[], + config_data_types=["tasks", "notes"], + ) + assert project_id == "new" + assert new_name is not None + + +# ── CLI quick-test runner ───────────────────────────────────────────────── + + +async def _cli_test(file_path: str, project_names: list[str]) -> None: + """Run Step 1 classification against a real file from the CLI.""" + import json + from pathlib import Path + + content = Path(file_path).read_text(encoding="utf-8", errors="replace") + projects = [ + {"id": f"test-id-{i:04d}", "name": name, "status": "active", "aiSummary": ""} + for i, name in enumerate(project_names) + ] + + print(f"\nClassifying: {file_path}") + print(f"Projects in context: {[p['name'] for p in projects]}\n") + + project_id, domains, new_name = await _classify_file( + file_path=file_path, + file_content=content, + projects=projects, + config_data_types=["tasks", "notes", "timelines"], + ) + + result = { + "project_id": project_id, + "matched_name": next((p["name"] for p in projects if p["id"] == project_id), None), + "new_project_name": new_name, + "domains": domains, + } + print(json.dumps(result, indent=2, ensure_ascii=False)) + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python -m tests.test_classify_file [project_name ...]") + sys.exit(1) + asyncio.run(_cli_test(sys.argv[1], sys.argv[2:]))