feat(api): manifest formatter with token-budget truncation
This commit is contained in:
@@ -60,6 +60,41 @@ def _language_instruction(context: dict[str, Any]) -> str:
|
||||
f"All your output text must be written in {lang}."
|
||||
)
|
||||
|
||||
MANIFEST_TOKEN_BUDGET = 3000 # rough budget for <linked_folder> block
|
||||
|
||||
|
||||
def format_folder_manifest(manifest: dict | None) -> str:
|
||||
"""Format a folder manifest into the <linked_folder> block.
|
||||
|
||||
Truncates by mtime DESC if estimated tokens exceed MANIFEST_TOKEN_BUDGET.
|
||||
Returns empty string if manifest is None or has no files.
|
||||
"""
|
||||
if not manifest or not manifest.get("files"):
|
||||
return ""
|
||||
files = list(manifest["files"])
|
||||
files.sort(key=lambda f: f.get("mtimeMs", 0), reverse=True)
|
||||
|
||||
header = (
|
||||
f"<linked_folder>\npath: {manifest.get('folderPath', '?')} "
|
||||
f"({len(files)} files, scanned {manifest.get('lastScannedAt', '?')})\nfiles:\n"
|
||||
)
|
||||
footer_template = "… {} more files omitted, use read_project_folder_file to access by path\n</linked_folder>"
|
||||
|
||||
char_budget = MANIFEST_TOKEN_BUDGET * 4 # ~4 chars/token
|
||||
body = ""
|
||||
included = 0
|
||||
for f in files:
|
||||
line = f"- /{f['relPath']} [{f.get('kind','text')}] {f.get('summary','')}\n"
|
||||
if len(header) + len(body) + len(line) + len(footer_template.format(0)) > char_budget:
|
||||
break
|
||||
body += line
|
||||
included += 1
|
||||
omitted = len(files) - included
|
||||
if omitted > 0:
|
||||
return header + body + footer_template.format(omitted)
|
||||
return header + body + "</linked_folder>"
|
||||
|
||||
|
||||
def _datetime_context_injection(context: dict[str, Any]) -> str:
|
||||
"""Build a comprehensive DATE CONTEXT block with pre-computed ms-epoch boundaries for common ranges."""
|
||||
fp = context.get("format_prefs")
|
||||
|
||||
35
tests/test_manifest_injection.py
Normal file
35
tests/test_manifest_injection.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from app.core.deep_agent import format_folder_manifest, MANIFEST_TOKEN_BUDGET
|
||||
|
||||
|
||||
def test_format_folder_manifest_basic():
|
||||
manifest = {
|
||||
"folderPath": "D:\\Acme",
|
||||
"lastScannedAt": "2h ago",
|
||||
"files": [
|
||||
{"relPath": "briefs/kickoff.md", "kind": "text", "summary": "Kickoff notes; scope and deadlines."},
|
||||
{"relPath": "logos/logo-v3.png", "kind": "image", "summary": "Final logo on white."},
|
||||
],
|
||||
}
|
||||
out = format_folder_manifest(manifest)
|
||||
assert "<linked_folder>" in out
|
||||
assert "/briefs/kickoff.md" in out or "briefs/kickoff.md" in out
|
||||
assert "[text]" in out
|
||||
assert "[image]" in out
|
||||
|
||||
|
||||
def test_format_folder_manifest_truncates_past_budget():
|
||||
files = [
|
||||
{"relPath": f"f{i}.md", "kind": "text", "summary": "x" * 100, "mtimeMs": i}
|
||||
for i in range(2000)
|
||||
]
|
||||
out = format_folder_manifest({"folderPath": "p", "lastScannedAt": "now", "files": files})
|
||||
assert "more files omitted" in out
|
||||
# Rough token check
|
||||
assert len(out) // 4 < MANIFEST_TOKEN_BUDGET + 200
|
||||
|
||||
|
||||
def test_format_folder_manifest_null_returns_empty():
|
||||
assert format_folder_manifest(None) == ""
|
||||
assert format_folder_manifest({"files": []}) == ""
|
||||
Reference in New Issue
Block a user