feat(api): manifest formatter with token-budget truncation
This commit is contained in:
@@ -60,6 +60,41 @@ def _language_instruction(context: dict[str, Any]) -> str:
|
|||||||
f"All your output text must be written in {lang}."
|
f"All your output text must be written in {lang}."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
MANIFEST_TOKEN_BUDGET = 3000 # rough budget for <linked_folder> block
|
||||||
|
|
||||||
|
|
||||||
|
def format_folder_manifest(manifest: dict | None) -> str:
|
||||||
|
"""Format a folder manifest into the <linked_folder> block.
|
||||||
|
|
||||||
|
Truncates by mtime DESC if estimated tokens exceed MANIFEST_TOKEN_BUDGET.
|
||||||
|
Returns empty string if manifest is None or has no files.
|
||||||
|
"""
|
||||||
|
if not manifest or not manifest.get("files"):
|
||||||
|
return ""
|
||||||
|
files = list(manifest["files"])
|
||||||
|
files.sort(key=lambda f: f.get("mtimeMs", 0), reverse=True)
|
||||||
|
|
||||||
|
header = (
|
||||||
|
f"<linked_folder>\npath: {manifest.get('folderPath', '?')} "
|
||||||
|
f"({len(files)} files, scanned {manifest.get('lastScannedAt', '?')})\nfiles:\n"
|
||||||
|
)
|
||||||
|
footer_template = "… {} more files omitted, use read_project_folder_file to access by path\n</linked_folder>"
|
||||||
|
|
||||||
|
char_budget = MANIFEST_TOKEN_BUDGET * 4 # ~4 chars/token
|
||||||
|
body = ""
|
||||||
|
included = 0
|
||||||
|
for f in files:
|
||||||
|
line = f"- /{f['relPath']} [{f.get('kind','text')}] {f.get('summary','')}\n"
|
||||||
|
if len(header) + len(body) + len(line) + len(footer_template.format(0)) > char_budget:
|
||||||
|
break
|
||||||
|
body += line
|
||||||
|
included += 1
|
||||||
|
omitted = len(files) - included
|
||||||
|
if omitted > 0:
|
||||||
|
return header + body + footer_template.format(omitted)
|
||||||
|
return header + body + "</linked_folder>"
|
||||||
|
|
||||||
|
|
||||||
def _datetime_context_injection(context: dict[str, Any]) -> str:
|
def _datetime_context_injection(context: dict[str, Any]) -> str:
|
||||||
"""Build a comprehensive DATE CONTEXT block with pre-computed ms-epoch boundaries for common ranges."""
|
"""Build a comprehensive DATE CONTEXT block with pre-computed ms-epoch boundaries for common ranges."""
|
||||||
fp = context.get("format_prefs")
|
fp = context.get("format_prefs")
|
||||||
|
|||||||
35
tests/test_manifest_injection.py
Normal file
35
tests/test_manifest_injection.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from app.core.deep_agent import format_folder_manifest, MANIFEST_TOKEN_BUDGET
|
||||||
|
|
||||||
|
|
||||||
|
def test_format_folder_manifest_basic():
|
||||||
|
manifest = {
|
||||||
|
"folderPath": "D:\\Acme",
|
||||||
|
"lastScannedAt": "2h ago",
|
||||||
|
"files": [
|
||||||
|
{"relPath": "briefs/kickoff.md", "kind": "text", "summary": "Kickoff notes; scope and deadlines."},
|
||||||
|
{"relPath": "logos/logo-v3.png", "kind": "image", "summary": "Final logo on white."},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
out = format_folder_manifest(manifest)
|
||||||
|
assert "<linked_folder>" in out
|
||||||
|
assert "/briefs/kickoff.md" in out or "briefs/kickoff.md" in out
|
||||||
|
assert "[text]" in out
|
||||||
|
assert "[image]" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_format_folder_manifest_truncates_past_budget():
|
||||||
|
files = [
|
||||||
|
{"relPath": f"f{i}.md", "kind": "text", "summary": "x" * 100, "mtimeMs": i}
|
||||||
|
for i in range(2000)
|
||||||
|
]
|
||||||
|
out = format_folder_manifest({"folderPath": "p", "lastScannedAt": "now", "files": files})
|
||||||
|
assert "more files omitted" in out
|
||||||
|
# Rough token check
|
||||||
|
assert len(out) // 4 < MANIFEST_TOKEN_BUDGET + 200
|
||||||
|
|
||||||
|
|
||||||
|
def test_format_folder_manifest_null_returns_empty():
|
||||||
|
assert format_folder_manifest(None) == ""
|
||||||
|
assert format_folder_manifest({"files": []}) == ""
|
||||||
Reference in New Issue
Block a user