"""Unit tests for Step 1 file classification (_classify_file). These tests call the real LLM so they require OPENAI_API_KEY / LLM env vars. Run with: pytest tests/test_classify_file.py -v To run a quick manual check against a real file without the full UI: python -m tests.test_classify_file [project_name...] """ from __future__ import annotations import asyncio import sys import pytest from app.core.agent_runner import _classify_file # ── Fixtures ────────────────────────────────────────────────────────────── PROJECTS_SAMPLE = [ { "id": "aaaa-0001-0000-0000-000000000001", "name": "ARPA Sicilia POC", "status": "active", "aiSummary": "Proof of concept for AI features targeting ARPA Sicilia agency.", }, { "id": "bbbb-0002-0000-0000-000000000002", "name": "SNAM AI Meeting Prep", "status": "active", "aiSummary": "AI-assisted preparation of meeting materials for SNAM.", }, { "id": "cccc-0003-0000-0000-000000000003", "name": "SFERA+ Wave 2", "status": "active", "aiSummary": "Second wave of the SFERA+ whitelist project.", }, ] ARPA_EMAIL = """\ to: roberto.musso@hpe.com; luca.tondin@hpecds.com isImportance: normal hasAttachment: True --- ## Body Buongiorno, In riferimento alla riunione di ieri sul POC ARPA Sicilia, vi invio il riassunto dei deliverable concordati: - Preparare demo entro il 30 marzo - Condividere documentazione tecnica con il team ARPA - Fissare call di follow-up la prossima settimana Cordiali saluti Roberto Marchetti """ SNAM_EMAIL = """\ to: roberto.musso@hpe.com isImportance: high hasAttachment: False --- ## Body Ciao, ti invio l'agenda per la riunione SNAM di domani. Per favore conferma la tua presenza. """ UNRELATED_EMAIL = """\ to: roberto.musso@hpe.com isImportance: normal --- ## Body Benvenuto nel programma HPE Employee Learning Series. Completa la formazione richiesta entro la fine del trimestre. """ # ── Tests ───────────────────────────────────────────────────────────────── @pytest.mark.asyncio async def test_classify_arpa_matches_existing(): project_id, domains, new_name = await _classify_file( file_path="arpa_email.txt", file_content=ARPA_EMAIL, projects=PROJECTS_SAMPLE, config_data_types=["tasks", "notes", "timelines"], ) assert project_id == "aaaa-0001-0000-0000-000000000001", ( f"Expected ARPA project, got project_id={project_id!r} new_name={new_name!r}" ) assert new_name is None @pytest.mark.asyncio async def test_classify_snam_matches_existing(): project_id, domains, new_name = await _classify_file( file_path="snam_email.txt", file_content=SNAM_EMAIL, projects=PROJECTS_SAMPLE, config_data_types=["tasks", "notes"], ) assert project_id == "bbbb-0002-0000-0000-000000000002", ( f"Expected SNAM project, got project_id={project_id!r} new_name={new_name!r}" ) @pytest.mark.asyncio async def test_classify_unrelated_returns_new(): project_id, domains, new_name = await _classify_file( file_path="learning_email.txt", file_content=UNRELATED_EMAIL, projects=PROJECTS_SAMPLE, config_data_types=["tasks", "notes"], ) assert project_id == "new" assert new_name is not None # LLM should suggest a name @pytest.mark.asyncio async def test_classify_empty_file_returns_new(): project_id, domains, new_name = await _classify_file( file_path="empty.txt", file_content=" ", projects=PROJECTS_SAMPLE, config_data_types=["tasks"], ) assert project_id == "new" @pytest.mark.asyncio async def test_classify_no_projects_returns_new(): project_id, domains, new_name = await _classify_file( file_path="arpa_email.txt", file_content=ARPA_EMAIL, projects=[], config_data_types=["tasks", "notes"], ) assert project_id == "new" assert new_name is not None # ── CLI quick-test runner ───────────────────────────────────────────────── async def _cli_test(file_path: str, project_names: list[str]) -> None: """Run Step 1 classification against a real file from the CLI.""" import json from pathlib import Path content = Path(file_path).read_text(encoding="utf-8", errors="replace") projects = [ {"id": f"test-id-{i:04d}", "name": name, "status": "active", "aiSummary": ""} for i, name in enumerate(project_names) ] print(f"\nClassifying: {file_path}") print(f"Projects in context: {[p['name'] for p in projects]}\n") project_id, domains, new_name = await _classify_file( file_path=file_path, file_content=content, projects=projects, config_data_types=["tasks", "notes", "timelines"], ) result = { "project_id": project_id, "matched_name": next((p["name"] for p in projects if p["id"] == project_id), None), "new_project_name": new_name, "domains": domains, } print(json.dumps(result, indent=2, ensure_ascii=False)) if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: python -m tests.test_classify_file [project_name ...]") sys.exit(1) asyncio.run(_cli_test(sys.argv[1], sys.argv[2:]))