diff --git a/tests/conftest.py b/tests/conftest.py index 74244aa..31a3722 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,6 +4,14 @@ Provides an async SQLite in-memory engine that auto-creates all tables, a per-test session, and a FastAPI ``TestClient`` wired to use it. """ + +def pytest_addoption(parser): + parser.addoption( + "--preprocess-dir", + default=None, + help="Override fixture folder for preprocessor tests (must contain cases.yaml + data/)", + ) + from __future__ import annotations import json diff --git a/tests/test_preprocessors.py b/tests/test_preprocessors.py index 95440b1..8f8a0ed 100644 --- a/tests/test_preprocessors.py +++ b/tests/test_preprocessors.py @@ -1,9 +1,10 @@ """Tests for the preprocessor system (Step 1 — Local Agent V2). -Fixtures: tests/fixtures/preprocessors/cases.yaml + data/ - Run: pytest tests/test_preprocessors.py -v + pytest tests/test_preprocessors.py -v --preprocess-dir /path/to/folder + +The folder must contain cases.yaml + data/. """ from __future__ import annotations @@ -17,40 +18,56 @@ import yaml from app.core.langfuse_client import get_langfuse from app.core.preprocessors import detect_content_type, preprocess -_DATA_DIR = Path(__file__).parent / "fixtures" / "preprocessors" / "data" -_CASES_FILE = Path(__file__).parent / "fixtures" / "preprocessors" / "cases.yaml" +_DEFAULT_DIR = Path(__file__).parent / "fixtures" / "preprocessors" _GENERATORS = { "binary_noise": "some\x00\x01\x02\x03\x04\x05content" * 20, } -def _cases(): - return yaml.safe_load(_CASES_FILE.read_text(encoding="utf-8")) +def _fixtures_dir(config) -> Path: + override = config.getoption("--preprocess-dir") + return Path(override) if override else _DEFAULT_DIR -def _content(case: dict) -> str: +def _load_cases(config) -> list[dict]: + return yaml.safe_load((_fixtures_dir(config) / "cases.yaml").read_text(encoding="utf-8")) + + +def _content(case: dict, data_dir: Path) -> str: if "generate" in case: return _GENERATORS[case["generate"]] - return (_DATA_DIR / case["file"]).read_text(encoding="utf-8") + return (data_dir / case["file"]).read_text(encoding="utf-8") -def _lf_score(name: str, value: float, comment: str = "") -> None: +def _lf_score(name: str, value: float) -> None: lf = get_langfuse() if lf: trace = lf.trace(name=f"eval-{name}") - lf.score(trace_id=trace.id, name=name, value=value, data_type="NUMERIC", comment=comment) + lf.score(trace_id=trace.id, name=name, value=value, data_type="NUMERIC") lf.flush() +# ── parametrize at collection time via pytest hook ──────────────────── + +def pytest_generate_tests(metafunc): + if "preprocess_case" not in metafunc.fixturenames: + return + cases = _load_cases(metafunc.config) + test_name = metafunc.function.__name__ + if test_name == "test_detect": + subset = [c for c in cases if "detect" in c] + else: + subset = [c for c in cases if "process" in c] + metafunc.parametrize("preprocess_case", subset, ids=[c["id"] for c in subset]) + + # ── detect ──────────────────────────────────────────────────────────── -_detect = [c for c in _cases() if "detect" in c] - - -@pytest.mark.parametrize("case", _detect, ids=[c["id"] for c in _detect]) -def test_detect(case: dict) -> None: - raw = _content(case) +def test_detect(preprocess_case, pytestconfig) -> None: + case = preprocess_case + data_dir = _fixtures_dir(pytestconfig) / "data" + raw = _content(case, data_dir) filename = case.get("filename", case.get("file", "")) ct = detect_content_type(filename, raw) expected = case["detect"] @@ -60,12 +77,10 @@ def test_detect(case: dict) -> None: # ── preprocess ──────────────────────────────────────────────────────── -_process = [c for c in _cases() if "process" in c] - - -@pytest.mark.parametrize("case", _process, ids=[c["id"] for c in _process]) -def test_preprocess(case: dict) -> None: - raw = _content(case) +def test_preprocess(preprocess_case, pytestconfig) -> None: + case = preprocess_case + data_dir = _fixtures_dir(pytestconfig) / "data" + raw = _content(case, data_dir) result = preprocess(case["process"], raw) if case.get("no_html"):