feat(api): folder_indexer.summarize_image via gpt-4o-mini vision

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-12 11:09:37 +02:00
parent 822b4cd8b1
commit b7a4edac90
2 changed files with 58 additions and 1 deletions
--- a/app/core/folder_indexer.py
+++ b/app/core/folder_indexer.py
@@ -41,6 +41,40 @@ async def _llm_text(messages: list) -> object:
    return await llm.ainvoke(messages)


+async def _llm_vision(messages: list) -> object:
+    """Make the LLM call for vision (image) summarisation.
+
+    Accepts the message list and returns the response directly, mirroring
+    the ``_llm_text`` caller pattern so tests can patch it at the module level.
+    """
+    llm = get_llm(model="gpt-4o-mini", temperature=0.2)
+    return await llm.ainvoke(messages)
+
+
+async def summarize_image(*, image_b64: str, mime: str) -> IndexResult:
+    """Return a compact summary of an image file using vision.
+
+    Parameters
+    ----------
+    image_b64:
+        Base64-encoded image bytes.
+    mime:
+        MIME type of the image, e.g. ``"image/png"``.
+    """
+    template, prompt_obj = get_prompt_or_fallback("folder_file_summary_image", _IMAGE_FALLBACK)
+    messages = [
+        SystemMessage(content=template),
+        HumanMessage(content=[
+            {"type": "text", "text": "Summarise this image."},
+            {"type": "image_url", "image_url": {"url": f"data:{mime};base64,{image_b64}"}},
+        ]),
+    ]
+    response = await _llm_vision(messages)
+    usage = extract_usage(response)
+    summary = (response.content or "").strip()[:500]
+    return IndexResult(summary=summary, tokens_used=usage.get("total", 0))
+
+
 async def summarize_text(*, content: str, ext: str, name: str) -> IndexResult:
    """Return a compact summary of a text file.