"""Per-file summarisation for project folder integration.""" from __future__ import annotations from dataclasses import dataclass from langchain_core.messages import HumanMessage, SystemMessage from app.core.langfuse_client import ( compile_prompt, extract_usage, get_prompt_or_fallback, ) from app.core.llm import get_llm _TEXT_FALLBACK = ( "You are summarising a file for an AI assistant that helps the user manage a project.\n" "Produce a single sentence (<=30 words, <=200 chars) that captures the file's purpose " "and most important detail.\nFile extension: {ext}\nFile name: {name}\nContent (truncated if long):\n{content}" ) _IMAGE_FALLBACK = ( "You are summarising an image attached to a project folder.\n" "Produce a single sentence (<=30 words, <=200 chars) describing what the image shows " "and any obvious purpose (logo, screenshot, diagram, photo of a whiteboard, etc.)." ) _MAX_INPUT_CHARS = 6000 @dataclass class IndexResult: summary: str tokens_used: int async def _llm_text(messages: list) -> object: """Make the LLM call for text summarisation. Defined as a standalone async function so tests can patch it cleanly without needing to mock the LLM object itself. """ llm = get_llm(model="gpt-4o-mini", temperature=0.2) return await llm.ainvoke(messages) async def summarize_text(*, content: str, ext: str, name: str) -> IndexResult: """Return a compact summary of a text file. Parameters ---------- content: Raw text content of the file (will be truncated to _MAX_INPUT_CHARS). ext: File extension including the leading dot, e.g. ``".md"``. name: File name, e.g. ``"kickoff.md"``. """ template, prompt_obj = get_prompt_or_fallback("folder_file_summary_text", _TEXT_FALLBACK) truncated = content[:_MAX_INPUT_CHARS] compiled = compile_prompt(template, prompt_obj, ext=ext, name=name, content=truncated) messages = [ SystemMessage(content=compiled), HumanMessage(content="Summarise this file."), ] response = await _llm_text(messages) usage = extract_usage(response) summary = (response.content or "").strip()[:500] return IndexResult(summary=summary, tokens_used=usage.get("total", 0))