feat(api): folder_indexer.summarize_text via gpt-4o-mini
This commit is contained in:
66
app/core/folder_indexer.py
Normal file
66
app/core/folder_indexer.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""Per-file summarisation for project folder integration."""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from langchain_core.messages import HumanMessage, SystemMessage
|
||||
|
||||
from app.core.langfuse_client import (
|
||||
compile_prompt,
|
||||
extract_usage,
|
||||
get_prompt_or_fallback,
|
||||
)
|
||||
from app.core.llm import get_llm
|
||||
|
||||
_TEXT_FALLBACK = (
|
||||
"You are summarising a file for an AI assistant that helps the user manage a project.\n"
|
||||
"Produce a single sentence (<=30 words, <=200 chars) that captures the file's purpose "
|
||||
"and most important detail.\nFile extension: {ext}\nFile name: {name}\nContent (truncated if long):\n{content}"
|
||||
)
|
||||
_IMAGE_FALLBACK = (
|
||||
"You are summarising an image attached to a project folder.\n"
|
||||
"Produce a single sentence (<=30 words, <=200 chars) describing what the image shows "
|
||||
"and any obvious purpose (logo, screenshot, diagram, photo of a whiteboard, etc.)."
|
||||
)
|
||||
_MAX_INPUT_CHARS = 6000
|
||||
|
||||
|
||||
@dataclass
|
||||
class IndexResult:
|
||||
summary: str
|
||||
tokens_used: int
|
||||
|
||||
|
||||
async def _llm_text(messages: list) -> object:
|
||||
"""Make the LLM call for text summarisation.
|
||||
|
||||
Defined as a standalone async function so tests can patch it cleanly
|
||||
without needing to mock the LLM object itself.
|
||||
"""
|
||||
llm = get_llm(model="gpt-4o-mini", temperature=0.2)
|
||||
return await llm.ainvoke(messages)
|
||||
|
||||
|
||||
async def summarize_text(*, content: str, ext: str, name: str) -> IndexResult:
|
||||
"""Return a compact summary of a text file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
content:
|
||||
Raw text content of the file (will be truncated to _MAX_INPUT_CHARS).
|
||||
ext:
|
||||
File extension including the leading dot, e.g. ``".md"``.
|
||||
name:
|
||||
File name, e.g. ``"kickoff.md"``.
|
||||
"""
|
||||
template, prompt_obj = get_prompt_or_fallback("folder_file_summary_text", _TEXT_FALLBACK)
|
||||
truncated = content[:_MAX_INPUT_CHARS]
|
||||
compiled = compile_prompt(template, prompt_obj, ext=ext, name=name, content=truncated)
|
||||
messages = [
|
||||
SystemMessage(content=compiled),
|
||||
HumanMessage(content="Summarise this file."),
|
||||
]
|
||||
response = await _llm_text(messages)
|
||||
usage = extract_usage(response)
|
||||
summary = (response.content or "").strip()[:500]
|
||||
return IndexResult(summary=summary, tokens_used=usage.get("total", 0))
|
||||
Reference in New Issue
Block a user