feat(api): folder_indexer.summarize_image via gpt-4o-mini vision
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -41,6 +41,40 @@ async def _llm_text(messages: list) -> object:
|
||||
return await llm.ainvoke(messages)
|
||||
|
||||
|
||||
async def _llm_vision(messages: list) -> object:
|
||||
"""Make the LLM call for vision (image) summarisation.
|
||||
|
||||
Accepts the message list and returns the response directly, mirroring
|
||||
the ``_llm_text`` caller pattern so tests can patch it at the module level.
|
||||
"""
|
||||
llm = get_llm(model="gpt-4o-mini", temperature=0.2)
|
||||
return await llm.ainvoke(messages)
|
||||
|
||||
|
||||
async def summarize_image(*, image_b64: str, mime: str) -> IndexResult:
|
||||
"""Return a compact summary of an image file using vision.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
image_b64:
|
||||
Base64-encoded image bytes.
|
||||
mime:
|
||||
MIME type of the image, e.g. ``"image/png"``.
|
||||
"""
|
||||
template, prompt_obj = get_prompt_or_fallback("folder_file_summary_image", _IMAGE_FALLBACK)
|
||||
messages = [
|
||||
SystemMessage(content=template),
|
||||
HumanMessage(content=[
|
||||
{"type": "text", "text": "Summarise this image."},
|
||||
{"type": "image_url", "image_url": {"url": f"data:{mime};base64,{image_b64}"}},
|
||||
]),
|
||||
]
|
||||
response = await _llm_vision(messages)
|
||||
usage = extract_usage(response)
|
||||
summary = (response.content or "").strip()[:500]
|
||||
return IndexResult(summary=summary, tokens_used=usage.get("total", 0))
|
||||
|
||||
|
||||
async def summarize_text(*, content: str, ext: str, name: str) -> IndexResult:
|
||||
"""Return a compact summary of a text file.
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from app.core.folder_indexer import summarize_text, IndexResult
|
||||
from app.core.folder_indexer import summarize_text, summarize_image, IndexResult
|
||||
|
||||
pytestmark = pytest.mark.asyncio
|
||||
|
||||
@@ -28,3 +28,26 @@ async def test_summarize_text_truncates_summary_at_500_chars():
|
||||
with patch("app.core.folder_indexer._llm_text", new=AsyncMock(return_value=mock_resp)):
|
||||
result = await summarize_text(content="x", ext=".md", name="x.md")
|
||||
assert len(result.summary) <= 500
|
||||
|
||||
|
||||
async def test_summarize_image_uses_vision_content_blocks():
|
||||
mock_resp = AsyncMock()
|
||||
mock_resp.content = "Final logo on white background."
|
||||
mock_resp.usage_metadata = {"total_tokens": 500}
|
||||
captured = {}
|
||||
|
||||
async def fake_llm_vision(messages):
|
||||
captured["messages"] = messages
|
||||
return mock_resp
|
||||
|
||||
with patch("app.core.folder_indexer._llm_vision", new=fake_llm_vision):
|
||||
result = await summarize_image(image_b64="iVBORw0KG", mime="image/png")
|
||||
|
||||
assert "Final logo" in result.summary
|
||||
assert result.tokens_used == 500
|
||||
# last message contains an image content block
|
||||
last = captured["messages"][-1]
|
||||
assert any(
|
||||
isinstance(p, dict) and p.get("type") == "image_url"
|
||||
for p in (last.content if isinstance(last.content, list) else [])
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user