"""Folder indexer LLM helpers.""" from __future__ import annotations from unittest.mock import AsyncMock, patch import pytest from app.core.folder_indexer import summarize_text, summarize_image, IndexResult pytestmark = pytest.mark.asyncio async def test_summarize_text_returns_summary_and_tokens(): mock_resp = AsyncMock() mock_resp.content = "Kickoff notes covering scope and deadlines." mock_resp.usage_metadata = {"input_tokens": 320, "output_tokens": 18, "total_tokens": 338} with patch("app.core.folder_indexer._llm_text", new=AsyncMock(return_value=mock_resp)): result = await summarize_text(content="hello world", ext=".md", name="kickoff.md") assert isinstance(result, IndexResult) assert result.summary == "Kickoff notes covering scope and deadlines." assert result.tokens_used == 338 async def test_summarize_text_truncates_summary_at_500_chars(): mock_resp = AsyncMock() mock_resp.content = "x" * 1000 mock_resp.usage_metadata = {"total_tokens": 100} with patch("app.core.folder_indexer._llm_text", new=AsyncMock(return_value=mock_resp)): result = await summarize_text(content="x", ext=".md", name="x.md") assert len(result.summary) <= 500 async def test_summarize_image_uses_vision_content_blocks(): mock_resp = AsyncMock() mock_resp.content = "Final logo on white background." mock_resp.usage_metadata = {"total_tokens": 500} captured = {} async def fake_llm_vision(messages): captured["messages"] = messages return mock_resp with patch("app.core.folder_indexer._llm_vision", new=fake_llm_vision): result = await summarize_image(image_b64="iVBORw0KG", mime="image/png") assert "Final logo" in result.summary assert result.tokens_used == 500 # last message contains an image content block last = captured["messages"][-1] assert any( isinstance(p, dict) and p.get("type") == "image_url" for p in (last.content if isinstance(last.content, list) else []) ) async def test_summarize_pdf_extracts_then_summarizes(monkeypatch): # pypdf.PdfReader returns text from pages from app.core import folder_indexer class FakePage: def extract_text(self): return "PDF page content with project info." class FakeReader: pages = [FakePage(), FakePage()] monkeypatch.setattr(folder_indexer, "PdfReader", lambda buf: FakeReader()) mock_resp = AsyncMock(); mock_resp.content = "Project info doc."; mock_resp.usage_metadata = {"total_tokens": 50} async def fake_llm(messages): return mock_resp with patch("app.core.folder_indexer._llm_text", new=fake_llm): result = await folder_indexer.summarize_pdf(pdf_b64="SGVsbG8=", name="doc.pdf") assert "Project info" in result.summary assert result.tokens_used == 50 async def test_summarize_docx_extracts_then_summarizes(monkeypatch): from app.core import folder_indexer class FakePara: def __init__(self, t): self.text = t class FakeDoc: paragraphs = [FakePara("Heading"), FakePara("Body paragraph one.")] monkeypatch.setattr(folder_indexer, "DocxDocument", lambda buf: FakeDoc()) mock_resp = AsyncMock(); mock_resp.content = "Heading and body."; mock_resp.usage_metadata = {"total_tokens": 30} async def fake_llm(messages): return mock_resp with patch("app.core.folder_indexer._llm_text", new=fake_llm): result = await folder_indexer.summarize_docx(docx_b64="UEsDBBQ=", name="doc.docx") assert result.summary == "Heading and body."