feat(tests): add --preprocess-dir CLI option to pytest
- conftest.py: registra --preprocess-dir via pytest_addoption - test_preprocessors.py: usa pytest_generate_tests per leggere i casi a collection time con accesso a config; _content e _fixtures_dir accettano path dinamico Usage: pytest tests/test_preprocessors.py --preprocess-dir /my/folder Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4,6 +4,14 @@ Provides an async SQLite in-memory engine that auto-creates all tables,
|
|||||||
a per-test session, and a FastAPI ``TestClient`` wired to use it.
|
a per-test session, and a FastAPI ``TestClient`` wired to use it.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def pytest_addoption(parser):
|
||||||
|
parser.addoption(
|
||||||
|
"--preprocess-dir",
|
||||||
|
default=None,
|
||||||
|
help="Override fixture folder for preprocessor tests (must contain cases.yaml + data/)",
|
||||||
|
)
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
"""Tests for the preprocessor system (Step 1 — Local Agent V2).
|
"""Tests for the preprocessor system (Step 1 — Local Agent V2).
|
||||||
|
|
||||||
Fixtures: tests/fixtures/preprocessors/cases.yaml + data/
|
|
||||||
|
|
||||||
Run:
|
Run:
|
||||||
pytest tests/test_preprocessors.py -v
|
pytest tests/test_preprocessors.py -v
|
||||||
|
pytest tests/test_preprocessors.py -v --preprocess-dir /path/to/folder
|
||||||
|
|
||||||
|
The folder must contain cases.yaml + data/.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
@@ -17,40 +18,56 @@ import yaml
|
|||||||
from app.core.langfuse_client import get_langfuse
|
from app.core.langfuse_client import get_langfuse
|
||||||
from app.core.preprocessors import detect_content_type, preprocess
|
from app.core.preprocessors import detect_content_type, preprocess
|
||||||
|
|
||||||
_DATA_DIR = Path(__file__).parent / "fixtures" / "preprocessors" / "data"
|
_DEFAULT_DIR = Path(__file__).parent / "fixtures" / "preprocessors"
|
||||||
_CASES_FILE = Path(__file__).parent / "fixtures" / "preprocessors" / "cases.yaml"
|
|
||||||
|
|
||||||
_GENERATORS = {
|
_GENERATORS = {
|
||||||
"binary_noise": "some\x00\x01\x02\x03\x04\x05content" * 20,
|
"binary_noise": "some\x00\x01\x02\x03\x04\x05content" * 20,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _cases():
|
def _fixtures_dir(config) -> Path:
|
||||||
return yaml.safe_load(_CASES_FILE.read_text(encoding="utf-8"))
|
override = config.getoption("--preprocess-dir")
|
||||||
|
return Path(override) if override else _DEFAULT_DIR
|
||||||
|
|
||||||
|
|
||||||
def _content(case: dict) -> str:
|
def _load_cases(config) -> list[dict]:
|
||||||
|
return yaml.safe_load((_fixtures_dir(config) / "cases.yaml").read_text(encoding="utf-8"))
|
||||||
|
|
||||||
|
|
||||||
|
def _content(case: dict, data_dir: Path) -> str:
|
||||||
if "generate" in case:
|
if "generate" in case:
|
||||||
return _GENERATORS[case["generate"]]
|
return _GENERATORS[case["generate"]]
|
||||||
return (_DATA_DIR / case["file"]).read_text(encoding="utf-8")
|
return (data_dir / case["file"]).read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
def _lf_score(name: str, value: float, comment: str = "") -> None:
|
def _lf_score(name: str, value: float) -> None:
|
||||||
lf = get_langfuse()
|
lf = get_langfuse()
|
||||||
if lf:
|
if lf:
|
||||||
trace = lf.trace(name=f"eval-{name}")
|
trace = lf.trace(name=f"eval-{name}")
|
||||||
lf.score(trace_id=trace.id, name=name, value=value, data_type="NUMERIC", comment=comment)
|
lf.score(trace_id=trace.id, name=name, value=value, data_type="NUMERIC")
|
||||||
lf.flush()
|
lf.flush()
|
||||||
|
|
||||||
|
|
||||||
|
# ── parametrize at collection time via pytest hook ────────────────────
|
||||||
|
|
||||||
|
def pytest_generate_tests(metafunc):
|
||||||
|
if "preprocess_case" not in metafunc.fixturenames:
|
||||||
|
return
|
||||||
|
cases = _load_cases(metafunc.config)
|
||||||
|
test_name = metafunc.function.__name__
|
||||||
|
if test_name == "test_detect":
|
||||||
|
subset = [c for c in cases if "detect" in c]
|
||||||
|
else:
|
||||||
|
subset = [c for c in cases if "process" in c]
|
||||||
|
metafunc.parametrize("preprocess_case", subset, ids=[c["id"] for c in subset])
|
||||||
|
|
||||||
|
|
||||||
# ── detect ────────────────────────────────────────────────────────────
|
# ── detect ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
_detect = [c for c in _cases() if "detect" in c]
|
def test_detect(preprocess_case, pytestconfig) -> None:
|
||||||
|
case = preprocess_case
|
||||||
|
data_dir = _fixtures_dir(pytestconfig) / "data"
|
||||||
@pytest.mark.parametrize("case", _detect, ids=[c["id"] for c in _detect])
|
raw = _content(case, data_dir)
|
||||||
def test_detect(case: dict) -> None:
|
|
||||||
raw = _content(case)
|
|
||||||
filename = case.get("filename", case.get("file", ""))
|
filename = case.get("filename", case.get("file", ""))
|
||||||
ct = detect_content_type(filename, raw)
|
ct = detect_content_type(filename, raw)
|
||||||
expected = case["detect"]
|
expected = case["detect"]
|
||||||
@@ -60,12 +77,10 @@ def test_detect(case: dict) -> None:
|
|||||||
|
|
||||||
# ── preprocess ────────────────────────────────────────────────────────
|
# ── preprocess ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
_process = [c for c in _cases() if "process" in c]
|
def test_preprocess(preprocess_case, pytestconfig) -> None:
|
||||||
|
case = preprocess_case
|
||||||
|
data_dir = _fixtures_dir(pytestconfig) / "data"
|
||||||
@pytest.mark.parametrize("case", _process, ids=[c["id"] for c in _process])
|
raw = _content(case, data_dir)
|
||||||
def test_preprocess(case: dict) -> None:
|
|
||||||
raw = _content(case)
|
|
||||||
result = preprocess(case["process"], raw)
|
result = preprocess(case["process"], raw)
|
||||||
|
|
||||||
if case.get("no_html"):
|
if case.get("no_html"):
|
||||||
|
|||||||
Reference in New Issue
Block a user