feat(api): folder_indexer.summarize_text via gpt-4o-mini
This commit is contained in:
66
app/core/folder_indexer.py
Normal file
66
app/core/folder_indexer.py
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
"""Per-file summarisation for project folder integration."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from langchain_core.messages import HumanMessage, SystemMessage
|
||||||
|
|
||||||
|
from app.core.langfuse_client import (
|
||||||
|
compile_prompt,
|
||||||
|
extract_usage,
|
||||||
|
get_prompt_or_fallback,
|
||||||
|
)
|
||||||
|
from app.core.llm import get_llm
|
||||||
|
|
||||||
|
_TEXT_FALLBACK = (
|
||||||
|
"You are summarising a file for an AI assistant that helps the user manage a project.\n"
|
||||||
|
"Produce a single sentence (<=30 words, <=200 chars) that captures the file's purpose "
|
||||||
|
"and most important detail.\nFile extension: {ext}\nFile name: {name}\nContent (truncated if long):\n{content}"
|
||||||
|
)
|
||||||
|
_IMAGE_FALLBACK = (
|
||||||
|
"You are summarising an image attached to a project folder.\n"
|
||||||
|
"Produce a single sentence (<=30 words, <=200 chars) describing what the image shows "
|
||||||
|
"and any obvious purpose (logo, screenshot, diagram, photo of a whiteboard, etc.)."
|
||||||
|
)
|
||||||
|
_MAX_INPUT_CHARS = 6000
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IndexResult:
|
||||||
|
summary: str
|
||||||
|
tokens_used: int
|
||||||
|
|
||||||
|
|
||||||
|
async def _llm_text(messages: list) -> object:
|
||||||
|
"""Make the LLM call for text summarisation.
|
||||||
|
|
||||||
|
Defined as a standalone async function so tests can patch it cleanly
|
||||||
|
without needing to mock the LLM object itself.
|
||||||
|
"""
|
||||||
|
llm = get_llm(model="gpt-4o-mini", temperature=0.2)
|
||||||
|
return await llm.ainvoke(messages)
|
||||||
|
|
||||||
|
|
||||||
|
async def summarize_text(*, content: str, ext: str, name: str) -> IndexResult:
|
||||||
|
"""Return a compact summary of a text file.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
content:
|
||||||
|
Raw text content of the file (will be truncated to _MAX_INPUT_CHARS).
|
||||||
|
ext:
|
||||||
|
File extension including the leading dot, e.g. ``".md"``.
|
||||||
|
name:
|
||||||
|
File name, e.g. ``"kickoff.md"``.
|
||||||
|
"""
|
||||||
|
template, prompt_obj = get_prompt_or_fallback("folder_file_summary_text", _TEXT_FALLBACK)
|
||||||
|
truncated = content[:_MAX_INPUT_CHARS]
|
||||||
|
compiled = compile_prompt(template, prompt_obj, ext=ext, name=name, content=truncated)
|
||||||
|
messages = [
|
||||||
|
SystemMessage(content=compiled),
|
||||||
|
HumanMessage(content="Summarise this file."),
|
||||||
|
]
|
||||||
|
response = await _llm_text(messages)
|
||||||
|
usage = extract_usage(response)
|
||||||
|
summary = (response.content or "").strip()[:500]
|
||||||
|
return IndexResult(summary=summary, tokens_used=usage.get("total", 0))
|
||||||
30
tests/test_folder_indexer.py
Normal file
30
tests/test_folder_indexer.py
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
"""Folder indexer LLM helpers."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import AsyncMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from app.core.folder_indexer import summarize_text, IndexResult
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.asyncio
|
||||||
|
|
||||||
|
|
||||||
|
async def test_summarize_text_returns_summary_and_tokens():
|
||||||
|
mock_resp = AsyncMock()
|
||||||
|
mock_resp.content = "Kickoff notes covering scope and deadlines."
|
||||||
|
mock_resp.usage_metadata = {"input_tokens": 320, "output_tokens": 18, "total_tokens": 338}
|
||||||
|
with patch("app.core.folder_indexer._llm_text", new=AsyncMock(return_value=mock_resp)):
|
||||||
|
result = await summarize_text(content="hello world", ext=".md", name="kickoff.md")
|
||||||
|
assert isinstance(result, IndexResult)
|
||||||
|
assert result.summary == "Kickoff notes covering scope and deadlines."
|
||||||
|
assert result.tokens_used == 338
|
||||||
|
|
||||||
|
|
||||||
|
async def test_summarize_text_truncates_summary_at_500_chars():
|
||||||
|
mock_resp = AsyncMock()
|
||||||
|
mock_resp.content = "x" * 1000
|
||||||
|
mock_resp.usage_metadata = {"total_tokens": 100}
|
||||||
|
with patch("app.core.folder_indexer._llm_text", new=AsyncMock(return_value=mock_resp)):
|
||||||
|
result = await summarize_text(content="x", ext=".md", name="x.md")
|
||||||
|
assert len(result.summary) <= 500
|
||||||
Reference in New Issue
Block a user