From 506f517851dd9ba2ca139eace788f6ab40d5112c Mon Sep 17 00:00:00 2001 From: Roberto Date: Tue, 12 May 2026 11:28:13 +0200 Subject: [PATCH] feat(api): manifest formatter with token-budget truncation --- app/core/deep_agent.py | 35 ++++++++++++++++++++++++++++++++ tests/test_manifest_injection.py | 35 ++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 tests/test_manifest_injection.py diff --git a/app/core/deep_agent.py b/app/core/deep_agent.py index 4141f47..a36f8c2 100644 --- a/app/core/deep_agent.py +++ b/app/core/deep_agent.py @@ -60,6 +60,41 @@ def _language_instruction(context: dict[str, Any]) -> str: f"All your output text must be written in {lang}." ) +MANIFEST_TOKEN_BUDGET = 3000 # rough budget for block + + +def format_folder_manifest(manifest: dict | None) -> str: + """Format a folder manifest into the block. + + Truncates by mtime DESC if estimated tokens exceed MANIFEST_TOKEN_BUDGET. + Returns empty string if manifest is None or has no files. + """ + if not manifest or not manifest.get("files"): + return "" + files = list(manifest["files"]) + files.sort(key=lambda f: f.get("mtimeMs", 0), reverse=True) + + header = ( + f"\npath: {manifest.get('folderPath', '?')} " + f"({len(files)} files, scanned {manifest.get('lastScannedAt', '?')})\nfiles:\n" + ) + footer_template = "… {} more files omitted, use read_project_folder_file to access by path\n" + + char_budget = MANIFEST_TOKEN_BUDGET * 4 # ~4 chars/token + body = "" + included = 0 + for f in files: + line = f"- /{f['relPath']} [{f.get('kind','text')}] {f.get('summary','')}\n" + if len(header) + len(body) + len(line) + len(footer_template.format(0)) > char_budget: + break + body += line + included += 1 + omitted = len(files) - included + if omitted > 0: + return header + body + footer_template.format(omitted) + return header + body + "" + + def _datetime_context_injection(context: dict[str, Any]) -> str: """Build a comprehensive DATE CONTEXT block with pre-computed ms-epoch boundaries for common ranges.""" fp = context.get("format_prefs") diff --git a/tests/test_manifest_injection.py b/tests/test_manifest_injection.py new file mode 100644 index 0000000..2405b77 --- /dev/null +++ b/tests/test_manifest_injection.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from app.core.deep_agent import format_folder_manifest, MANIFEST_TOKEN_BUDGET + + +def test_format_folder_manifest_basic(): + manifest = { + "folderPath": "D:\\Acme", + "lastScannedAt": "2h ago", + "files": [ + {"relPath": "briefs/kickoff.md", "kind": "text", "summary": "Kickoff notes; scope and deadlines."}, + {"relPath": "logos/logo-v3.png", "kind": "image", "summary": "Final logo on white."}, + ], + } + out = format_folder_manifest(manifest) + assert "" in out + assert "/briefs/kickoff.md" in out or "briefs/kickoff.md" in out + assert "[text]" in out + assert "[image]" in out + + +def test_format_folder_manifest_truncates_past_budget(): + files = [ + {"relPath": f"f{i}.md", "kind": "text", "summary": "x" * 100, "mtimeMs": i} + for i in range(2000) + ] + out = format_folder_manifest({"folderPath": "p", "lastScannedAt": "now", "files": files}) + assert "more files omitted" in out + # Rough token check + assert len(out) // 4 < MANIFEST_TOKEN_BUDGET + 200 + + +def test_format_folder_manifest_null_returns_empty(): + assert format_folder_manifest(None) == "" + assert format_folder_manifest({"files": []}) == ""