api/tests/test_folder_indexer.py

"""Folder indexer LLM helpers."""
from __future__ import annotations

from unittest.mock import AsyncMock, patch

import pytest

from app.core.folder_indexer import summarize_text, summarize_image, IndexResult

pytestmark = pytest.mark.asyncio


async def test_summarize_text_returns_summary_and_tokens():
    mock_resp = AsyncMock()
    mock_resp.content = "Kickoff notes covering scope and deadlines."
    mock_resp.usage_metadata = {"input_tokens": 320, "output_tokens": 18, "total_tokens": 338}
    with patch("app.core.folder_indexer._llm_text", new=AsyncMock(return_value=mock_resp)):
        result = await summarize_text(content="hello world", ext=".md", name="kickoff.md")
    assert isinstance(result, IndexResult)
    assert result.summary == "Kickoff notes covering scope and deadlines."
    assert result.tokens_used == 338


async def test_summarize_text_truncates_summary_at_500_chars():
    mock_resp = AsyncMock()
    mock_resp.content = "x" * 1000
    mock_resp.usage_metadata = {"total_tokens": 100}
    with patch("app.core.folder_indexer._llm_text", new=AsyncMock(return_value=mock_resp)):
        result = await summarize_text(content="x", ext=".md", name="x.md")
    assert len(result.summary) <= 500


async def test_summarize_image_uses_vision_content_blocks():
    mock_resp = AsyncMock()
    mock_resp.content = "Final logo on white background."
    mock_resp.usage_metadata = {"total_tokens": 500}
    captured = {}

    async def fake_llm_vision(messages):
        captured["messages"] = messages
        return mock_resp

    with patch("app.core.folder_indexer._llm_vision", new=fake_llm_vision):
        result = await summarize_image(image_b64="iVBORw0KG", mime="image/png")

    assert "Final logo" in result.summary
    assert result.tokens_used == 500
    # last message contains an image content block
    last = captured["messages"][-1]
    assert any(
        isinstance(p, dict) and p.get("type") == "image_url"
        for p in (last.content if isinstance(last.content, list) else [])
    )


async def test_summarize_pdf_extracts_then_summarizes(monkeypatch):
    # pypdf.PdfReader returns text from pages
    from app.core import folder_indexer
    class FakePage:
        def extract_text(self): return "PDF page content with project info."
    class FakeReader:
        pages = [FakePage(), FakePage()]
    monkeypatch.setattr(folder_indexer, "PdfReader", lambda buf: FakeReader())
    mock_resp = AsyncMock(); mock_resp.content = "Project info doc."; mock_resp.usage_metadata = {"total_tokens": 50}
    async def fake_llm(messages): return mock_resp
    with patch("app.core.folder_indexer._llm_text", new=fake_llm):
        result = await folder_indexer.summarize_pdf(pdf_b64="SGVsbG8=", name="doc.pdf")
    assert "Project info" in result.summary
    assert result.tokens_used == 50


async def test_summarize_docx_extracts_then_summarizes(monkeypatch):
    from app.core import folder_indexer
    class FakePara:
        def __init__(self, t): self.text = t
    class FakeDoc:
        paragraphs = [FakePara("Heading"), FakePara("Body paragraph one.")]
    monkeypatch.setattr(folder_indexer, "DocxDocument", lambda buf: FakeDoc())
    mock_resp = AsyncMock(); mock_resp.content = "Heading and body."; mock_resp.usage_metadata = {"total_tokens": 30}
    async def fake_llm(messages): return mock_resp
    with patch("app.core.folder_indexer._llm_text", new=fake_llm):
        result = await folder_indexer.summarize_docx(docx_b64="UEsDBBQ=", name="doc.docx")
    assert result.summary == "Heading and body."